dns.tokenizer

52 53 """A DNS master file format token. 54 55 @ivar ttype: The token type 56 @type ttype: int 57 @ivar value: The token value 58 @type value: string 59 @ivar has_escape: Does the token value contain escapes? 60 @type has_escape: bool 61 """ 62

63 - def __init__(self, ttype, value='', has_escape=False):

64 """Initialize a token instance. 65 66 @param ttype: The token type 67 @type ttype: int 68 @param value: The token value 69 @type value: string 70 @param has_escape: Does the token value contain escapes? 71 @type has_escape: bool 72 """ 73 self.ttype = ttype 74 self.value = value 75 self.has_escape = has_escape

76

77 - def is_eof(self):

78 return self.ttype == EOF

79

80 - def is_eol(self):

81 return self.ttype == EOL

82

83 - def is_whitespace(self):

84 return self.ttype == WHITESPACE

85

86 - def is_identifier(self):

87 return self.ttype == IDENTIFIER

88

89 - def is_quoted_string(self):

90 return self.ttype == QUOTED_STRING

91

92 - def is_comment(self):

93 return self.ttype == COMMENT

94

95 - def is_delimiter(self):

96 return self.ttype == DELIMITER

97

98 - def is_eol_or_eof(self):

99 return self.ttype == EOL or self.ttype == EOF

100

101 - def __eq__(self, other):

102 if not isinstance(other, Token): 103 return False 104 return (self.ttype == other.ttype and 105 self.value == other.value)

106

107 - def __ne__(self, other):

108 if not isinstance(other, Token): 109 return True 110 return (self.ttype != other.ttype or 111 self.value != other.value)

112

113 - def __str__(self):

114 return '%d "%s"' % (self.ttype, self.value)

115

116 - def unescape(self):

117 if not self.has_escape: 118 return self 119 unescaped = '' 120 l = len(self.value) 121 i = 0 122 while i < l: 123 c = self.value[i] 124 i += 1 125 if c == '\\': 126 if i >= l: 127 raise dns.exception.UnexpectedEnd 128 c = self.value[i] 129 i += 1 130 if c.isdigit(): 131 if i >= l: 132 raise dns.exception.UnexpectedEnd 133 c2 = self.value[i] 134 i += 1 135 if i >= l: 136 raise dns.exception.UnexpectedEnd 137 c3 = self.value[i] 138 i += 1 139 if not (c2.isdigit() and c3.isdigit()): 140 raise dns.exception.SyntaxError 141 c = chr(int(c) * 100 + int(c2) * 10 + int(c3)) 142 unescaped += c 143 return Token(self.ttype, unescaped)

144 145 # compatibility for old-style tuple tokens 146

147 - def __len__(self):

148 return 2

149

150 - def __iter__(self):

151 return iter((self.ttype, self.value))

152

153 - def __getitem__(self, i):

154 if i == 0: 155 return self.ttype 156 elif i == 1: 157 return self.value 158 else: 159 raise IndexError

163 164 """A DNS master file format tokenizer. 165 166 A token is a (type, value) tuple, where I{type} is an int, and 167 I{value} is a string. The valid types are EOF, EOL, WHITESPACE, 168 IDENTIFIER, QUOTED_STRING, COMMENT, and DELIMITER. 169 170 @ivar file: The file to tokenize 171 @type file: file 172 @ivar ungotten_char: The most recently ungotten character, or None. 173 @type ungotten_char: string 174 @ivar ungotten_token: The most recently ungotten token, or None. 175 @type ungotten_token: (int, string) token tuple 176 @ivar multiline: The current multiline level. This value is increased 177 by one every time a '(' delimiter is read, and decreased by one every time 178 a ')' delimiter is read. 179 @type multiline: int 180 @ivar quoting: This variable is true if the tokenizer is currently 181 reading a quoted string. 182 @type quoting: bool 183 @ivar eof: This variable is true if the tokenizer has encountered EOF. 184 @type eof: bool 185 @ivar delimiters: The current delimiter dictionary. 186 @type delimiters: dict 187 @ivar line_number: The current line number 188 @type line_number: int 189 @ivar filename: A filename that will be returned by the L{where} method. 190 @type filename: string 191 """ 192

193 - def __init__(self, f=sys.stdin, filename=None):

194 """Initialize a tokenizer instance. 195 196 @param f: The file to tokenize. The default is sys.stdin. 197 This parameter may also be a string, in which case the tokenizer 198 will take its input from the contents of the string. 199 @type f: file or string 200 @param filename: the name of the filename that the L{where} method 201 will return. 202 @type filename: string 203 """ 204 205 if isinstance(f, text_type): 206 f = StringIO(f) 207 if filename is None: 208 filename = '<string>' 209 elif isinstance(f, binary_type): 210 f = StringIO(f.decode()) 211 if filename is None: 212 filename = '<string>' 213 else: 214 if filename is None: 215 if f is sys.stdin: 216 filename = '<stdin>' 217 else: 218 filename = '<file>' 219 self.file = f 220 self.ungotten_char = None 221 self.ungotten_token = None 222 self.multiline = 0 223 self.quoting = False 224 self.eof = False 225 self.delimiters = _DELIMITERS 226 self.line_number = 1 227 self.filename = filename

228

229 - def _get_char(self):

230 """Read a character from input. 231 @rtype: string 232 """ 233 234 if self.ungotten_char is None: 235 if self.eof: 236 c = '' 237 else: 238 c = self.file.read(1) 239 if c == '': 240 self.eof = True 241 elif c == '\n': 242 self.line_number += 1 243 else: 244 c = self.ungotten_char 245 self.ungotten_char = None 246 return c

247

248 - def where(self):

249 """Return the current location in the input. 250 251 @rtype: (string, int) tuple. The first item is the filename of 252 the input, the second is the current line number. 253 """ 254 255 return (self.filename, self.line_number)

256

257 - def _unget_char(self, c):

258 """Unget a character. 259 260 The unget buffer for characters is only one character large; it is 261 an error to try to unget a character when the unget buffer is not 262 empty. 263 264 @param c: the character to unget 265 @type c: string 266 @raises UngetBufferFull: there is already an ungotten char 267 """ 268 269 if self.ungotten_char is not None: 270 raise UngetBufferFull 271 self.ungotten_char = c

272

273 - def skip_whitespace(self):

274 """Consume input until a non-whitespace character is encountered. 275 276 The non-whitespace character is then ungotten, and the number of 277 whitespace characters consumed is returned. 278 279 If the tokenizer is in multiline mode, then newlines are whitespace. 280 281 @rtype: int 282 """ 283 284 skipped = 0 285 while True: 286 c = self._get_char() 287 if c != ' ' and c != '\t': 288 if (c != '\n') or not self.multiline: 289 self._unget_char(c) 290 return skipped 291 skipped += 1

292

293 - def get(self, want_leading=False, want_comment=False):

294 """Get the next token. 295 296 @param want_leading: If True, return a WHITESPACE token if the 297 first character read is whitespace. The default is False. 298 @type want_leading: bool 299 @param want_comment: If True, return a COMMENT token if the 300 first token read is a comment. The default is False. 301 @type want_comment: bool 302 @rtype: Token object 303 @raises dns.exception.UnexpectedEnd: input ended prematurely 304 @raises dns.exception.SyntaxError: input was badly formed 305 """ 306 307 if self.ungotten_token is not None: 308 token = self.ungotten_token 309 self.ungotten_token = None 310 if token.is_whitespace(): 311 if want_leading: 312 return token 313 elif token.is_comment(): 314 if want_comment: 315 return token 316 else: 317 return token 318 skipped = self.skip_whitespace() 319 if want_leading and skipped > 0: 320 return Token(WHITESPACE, ' ') 321 token = '' 322 ttype = IDENTIFIER 323 has_escape = False 324 while True: 325 c = self._get_char() 326 if c == '' or c in self.delimiters: 327 if c == '' and self.quoting: 328 raise dns.exception.UnexpectedEnd 329 if token == '' and ttype != QUOTED_STRING: 330 if c == '(': 331 self.multiline += 1 332 self.skip_whitespace() 333 continue 334 elif c == ')': 335 if self.multiline <= 0: 336 raise dns.exception.SyntaxError 337 self.multiline -= 1 338 self.skip_whitespace() 339 continue 340 elif c == '"': 341 if not self.quoting: 342 self.quoting = True 343 self.delimiters = _QUOTING_DELIMITERS 344 ttype = QUOTED_STRING 345 continue 346 else: 347 self.quoting = False 348 self.delimiters = _DELIMITERS 349 self.skip_whitespace() 350 continue 351 elif c == '\n': 352 return Token(EOL, '\n') 353 elif c == ';': 354 while 1: 355 c = self._get_char() 356 if c == '\n' or c == '': 357 break 358 token += c 359 if want_comment: 360 self._unget_char(c) 361 return Token(COMMENT, token) 362 elif c == '': 363 if self.multiline: 364 raise dns.exception.SyntaxError( 365 'unbalanced parentheses') 366 return Token(EOF) 367 elif self.multiline: 368 self.skip_whitespace() 369 token = '' 370 continue 371 else: 372 return Token(EOL, '\n') 373 else: 374 # This code exists in case we ever want a 375 # delimiter to be returned. It never produces 376 # a token currently. 377 token = c 378 ttype = DELIMITER 379 else: 380 self._unget_char(c) 381 break 382 elif self.quoting: 383 if c == '\\': 384 c = self._get_char() 385 if c == '': 386 raise dns.exception.UnexpectedEnd 387 if c.isdigit(): 388 c2 = self._get_char() 389 if c2 == '': 390 raise dns.exception.UnexpectedEnd 391 c3 = self._get_char() 392 if c == '': 393 raise dns.exception.UnexpectedEnd 394 if not (c2.isdigit() and c3.isdigit()): 395 raise dns.exception.SyntaxError 396 c = chr(int(c) * 100 + int(c2) * 10 + int(c3)) 397 elif c == '\n': 398 raise dns.exception.SyntaxError('newline in quoted string') 399 elif c == '\\': 400 # 401 # It's an escape. Put it and the next character into 402 # the token; it will be checked later for goodness. 403 # 404 token += c 405 has_escape = True 406 c = self._get_char() 407 if c == '' or c == '\n': 408 raise dns.exception.UnexpectedEnd 409 token += c 410 if token == '' and ttype != QUOTED_STRING: 411 if self.multiline: 412 raise dns.exception.SyntaxError('unbalanced parentheses') 413 ttype = EOF 414 return Token(ttype, token, has_escape)

415

416 - def unget(self, token):

417 """Unget a token. 418 419 The unget buffer for tokens is only one token large; it is 420 an error to try to unget a token when the unget buffer is not 421 empty. 422 423 @param token: the token to unget 424 @type token: Token object 425 @raises UngetBufferFull: there is already an ungotten token 426 """ 427 428 if self.ungotten_token is not None: 429 raise UngetBufferFull 430 self.ungotten_token = token

431

432 - def next(self):

433 """Return the next item in an iteration. 434 @rtype: (int, string) 435 """ 436 437 token = self.get() 438 if token.is_eof(): 439 raise StopIteration 440 return token

441 442 __next__ = next 443

444 - def __iter__(self):

445 return self

446 447 # Helpers 448

449 - def get_int(self):

450 """Read the next token and interpret it as an integer. 451 452 @raises dns.exception.SyntaxError: 453 @rtype: int 454 """ 455 456 token = self.get().unescape() 457 if not token.is_identifier(): 458 raise dns.exception.SyntaxError('expecting an identifier') 459 if not token.value.isdigit(): 460 raise dns.exception.SyntaxError('expecting an integer') 461 return int(token.value)

462

463 - def get_uint8(self):

464 """Read the next token and interpret it as an 8-bit unsigned 465 integer. 466 467 @raises dns.exception.SyntaxError: 468 @rtype: int 469 """ 470 471 value = self.get_int() 472 if value < 0 or value > 255: 473 raise dns.exception.SyntaxError( 474 '%d is not an unsigned 8-bit integer' % value) 475 return value

476

477 - def get_uint16(self):

478 """Read the next token and interpret it as a 16-bit unsigned 479 integer. 480 481 @raises dns.exception.SyntaxError: 482 @rtype: int 483 """ 484 485 value = self.get_int() 486 if value < 0 or value > 65535: 487 raise dns.exception.SyntaxError( 488 '%d is not an unsigned 16-bit integer' % value) 489 return value

490

491 - def get_uint32(self):

492 """Read the next token and interpret it as a 32-bit unsigned 493 integer. 494 495 @raises dns.exception.SyntaxError: 496 @rtype: int 497 """ 498 499 token = self.get().unescape() 500 if not token.is_identifier(): 501 raise dns.exception.SyntaxError('expecting an identifier') 502 if not token.value.isdigit(): 503 raise dns.exception.SyntaxError('expecting an integer') 504 value = long(token.value) 505 if value < 0 or value > long(4294967296): 506 raise dns.exception.SyntaxError( 507 '%d is not an unsigned 32-bit integer' % value) 508 return value

509

510 - def get_string(self, origin=None):

511 """Read the next token and interpret it as a string. 512 513 @raises dns.exception.SyntaxError: 514 @rtype: string 515 """ 516 517 token = self.get().unescape() 518 if not (token.is_identifier() or token.is_quoted_string()): 519 raise dns.exception.SyntaxError('expecting a string') 520 return token.value

521

522 - def get_identifier(self, origin=None):

523 """Read the next token and raise an exception if it is not an identifier. 524 525 @raises dns.exception.SyntaxError: 526 @rtype: string 527 """ 528 529 token = self.get().unescape() 530 if not token.is_identifier(): 531 raise dns.exception.SyntaxError('expecting an identifier') 532 return token.value

533

534 - def get_name(self, origin=None):

535 """Read the next token and interpret it as a DNS name. 536 537 @raises dns.exception.SyntaxError: 538 @rtype: dns.name.Name object""" 539 540 token = self.get() 541 if not token.is_identifier(): 542 raise dns.exception.SyntaxError('expecting an identifier') 543 return dns.name.from_text(token.value, origin)

544

545 - def get_eol(self):

546 """Read the next token and raise an exception if it isn't EOL or 547 EOF. 548 549 @raises dns.exception.SyntaxError: 550 @rtype: string 551 """ 552 553 token = self.get() 554 if not token.is_eol_or_eof(): 555 raise dns.exception.SyntaxError( 556 'expected EOL or EOF, got %d "%s"' % (token.ttype, 557 token.value)) 558 return token.value

559

560 - def get_ttl(self):

561 token = self.get().unescape() 562 if not token.is_identifier(): 563 raise dns.exception.SyntaxError('expecting an identifier') 564 return dns.ttl.from_text(token.value)

Source Code for Module dns.tokenizer