1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 """Tokenize DNS master file format"""
19
20 from io import StringIO
21 import sys
22
23 import dns.exception
24 import dns.name
25 import dns.ttl
26 from ._compat import long, text_type, binary_type
27
28 _DELIMITERS = {
29 ' ': True,
30 '\t': True,
31 '\n': True,
32 ';': True,
33 '(': True,
34 ')': True,
35 '"': True}
36
37 _QUOTING_DELIMITERS = {'"': True}
38
39 EOF = 0
40 EOL = 1
41 WHITESPACE = 2
42 IDENTIFIER = 3
43 QUOTED_STRING = 4
44 COMMENT = 5
45 DELIMITER = 6
46
47
49 """An attempt was made to unget a token when the unget buffer was full."""
50
51
53 """A DNS master file format token.
54
55 ttype: The token type
56 value: The token value
57 has_escape: Does the token value contain escapes?
58 """
59
60 - def __init__(self, ttype, value='', has_escape=False):
61 """Initialize a token instance."""
62
63 self.ttype = ttype
64 self.value = value
65 self.has_escape = has_escape
66
68 return self.ttype == EOF
69
71 return self.ttype == EOL
72
75
78
81
84
87
89 return self.ttype == EOL or self.ttype == EOF
90
92 if not isinstance(other, Token):
93 return False
94 return (self.ttype == other.ttype and
95 self.value == other.value)
96
98 if not isinstance(other, Token):
99 return True
100 return (self.ttype != other.ttype or
101 self.value != other.value)
102
104 return '%d "%s"' % (self.ttype, self.value)
105
134
135
136
139
141 return iter((self.ttype, self.value))
142
144 if i == 0:
145 return self.ttype
146 elif i == 1:
147 return self.value
148 else:
149 raise IndexError
150
151
153 """A DNS master file format tokenizer.
154
155 A token object is basically a (type, value) tuple. The valid
156 types are EOF, EOL, WHITESPACE, IDENTIFIER, QUOTED_STRING,
157 COMMENT, and DELIMITER.
158
159 file: The file to tokenize
160
161 ungotten_char: The most recently ungotten character, or None.
162
163 ungotten_token: The most recently ungotten token, or None.
164
165 multiline: The current multiline level. This value is increased
166 by one every time a '(' delimiter is read, and decreased by one every time
167 a ')' delimiter is read.
168
169 quoting: This variable is true if the tokenizer is currently
170 reading a quoted string.
171
172 eof: This variable is true if the tokenizer has encountered EOF.
173
174 delimiters: The current delimiter dictionary.
175
176 line_number: The current line number
177
178 filename: A filename that will be returned by the where() method.
179 """
180
181 - def __init__(self, f=sys.stdin, filename=None):
182 """Initialize a tokenizer instance.
183
184 f: The file to tokenize. The default is sys.stdin.
185 This parameter may also be a string, in which case the tokenizer
186 will take its input from the contents of the string.
187
188 filename: the name of the filename that the where() method
189 will return.
190 """
191
192 if isinstance(f, text_type):
193 f = StringIO(f)
194 if filename is None:
195 filename = '<string>'
196 elif isinstance(f, binary_type):
197 f = StringIO(f.decode())
198 if filename is None:
199 filename = '<string>'
200 else:
201 if filename is None:
202 if f is sys.stdin:
203 filename = '<stdin>'
204 else:
205 filename = '<file>'
206 self.file = f
207 self.ungotten_char = None
208 self.ungotten_token = None
209 self.multiline = 0
210 self.quoting = False
211 self.eof = False
212 self.delimiters = _DELIMITERS
213 self.line_number = 1
214 self.filename = filename
215
217 """Read a character from input.
218 """
219
220 if self.ungotten_char is None:
221 if self.eof:
222 c = ''
223 else:
224 c = self.file.read(1)
225 if c == '':
226 self.eof = True
227 elif c == '\n':
228 self.line_number += 1
229 else:
230 c = self.ungotten_char
231 self.ungotten_char = None
232 return c
233
235 """Return the current location in the input.
236
237 Returns a (string, int) tuple. The first item is the filename of
238 the input, the second is the current line number.
239 """
240
241 return (self.filename, self.line_number)
242
244 """Unget a character.
245
246 The unget buffer for characters is only one character large; it is
247 an error to try to unget a character when the unget buffer is not
248 empty.
249
250 c: the character to unget
251 raises UngetBufferFull: there is already an ungotten char
252 """
253
254 if self.ungotten_char is not None:
255 raise UngetBufferFull
256 self.ungotten_char = c
257
259 """Consume input until a non-whitespace character is encountered.
260
261 The non-whitespace character is then ungotten, and the number of
262 whitespace characters consumed is returned.
263
264 If the tokenizer is in multiline mode, then newlines are whitespace.
265
266 Returns the number of characters skipped.
267 """
268
269 skipped = 0
270 while True:
271 c = self._get_char()
272 if c != ' ' and c != '\t':
273 if (c != '\n') or not self.multiline:
274 self._unget_char(c)
275 return skipped
276 skipped += 1
277
278 - def get(self, want_leading=False, want_comment=False):
279 """Get the next token.
280
281 want_leading: If True, return a WHITESPACE token if the
282 first character read is whitespace. The default is False.
283
284 want_comment: If True, return a COMMENT token if the
285 first token read is a comment. The default is False.
286
287 Raises dns.exception.UnexpectedEnd: input ended prematurely
288
289 Raises dns.exception.SyntaxError: input was badly formed
290
291 Returns a Token.
292 """
293
294 if self.ungotten_token is not None:
295 token = self.ungotten_token
296 self.ungotten_token = None
297 if token.is_whitespace():
298 if want_leading:
299 return token
300 elif token.is_comment():
301 if want_comment:
302 return token
303 else:
304 return token
305 skipped = self.skip_whitespace()
306 if want_leading and skipped > 0:
307 return Token(WHITESPACE, ' ')
308 token = ''
309 ttype = IDENTIFIER
310 has_escape = False
311 while True:
312 c = self._get_char()
313 if c == '' or c in self.delimiters:
314 if c == '' and self.quoting:
315 raise dns.exception.UnexpectedEnd
316 if token == '' and ttype != QUOTED_STRING:
317 if c == '(':
318 self.multiline += 1
319 self.skip_whitespace()
320 continue
321 elif c == ')':
322 if self.multiline <= 0:
323 raise dns.exception.SyntaxError
324 self.multiline -= 1
325 self.skip_whitespace()
326 continue
327 elif c == '"':
328 if not self.quoting:
329 self.quoting = True
330 self.delimiters = _QUOTING_DELIMITERS
331 ttype = QUOTED_STRING
332 continue
333 else:
334 self.quoting = False
335 self.delimiters = _DELIMITERS
336 self.skip_whitespace()
337 continue
338 elif c == '\n':
339 return Token(EOL, '\n')
340 elif c == ';':
341 while 1:
342 c = self._get_char()
343 if c == '\n' or c == '':
344 break
345 token += c
346 if want_comment:
347 self._unget_char(c)
348 return Token(COMMENT, token)
349 elif c == '':
350 if self.multiline:
351 raise dns.exception.SyntaxError(
352 'unbalanced parentheses')
353 return Token(EOF)
354 elif self.multiline:
355 self.skip_whitespace()
356 token = ''
357 continue
358 else:
359 return Token(EOL, '\n')
360 else:
361
362
363
364 token = c
365 ttype = DELIMITER
366 else:
367 self._unget_char(c)
368 break
369 elif self.quoting:
370 if c == '\\':
371 c = self._get_char()
372 if c == '':
373 raise dns.exception.UnexpectedEnd
374 if c.isdigit():
375 c2 = self._get_char()
376 if c2 == '':
377 raise dns.exception.UnexpectedEnd
378 c3 = self._get_char()
379 if c == '':
380 raise dns.exception.UnexpectedEnd
381 if not (c2.isdigit() and c3.isdigit()):
382 raise dns.exception.SyntaxError
383 c = chr(int(c) * 100 + int(c2) * 10 + int(c3))
384 elif c == '\n':
385 raise dns.exception.SyntaxError('newline in quoted string')
386 elif c == '\\':
387
388
389
390
391 token += c
392 has_escape = True
393 c = self._get_char()
394 if c == '' or c == '\n':
395 raise dns.exception.UnexpectedEnd
396 token += c
397 if token == '' and ttype != QUOTED_STRING:
398 if self.multiline:
399 raise dns.exception.SyntaxError('unbalanced parentheses')
400 ttype = EOF
401 return Token(ttype, token, has_escape)
402
404 """Unget a token.
405
406 The unget buffer for tokens is only one token large; it is
407 an error to try to unget a token when the unget buffer is not
408 empty.
409
410 token: the token to unget
411
412 Raises UngetBufferFull: there is already an ungotten token
413 """
414
415 if self.ungotten_token is not None:
416 raise UngetBufferFull
417 self.ungotten_token = token
418
420 """Return the next item in an iteration.
421
422 Returns a Token.
423 """
424
425 token = self.get()
426 if token.is_eof():
427 raise StopIteration
428 return token
429
430 __next__ = next
431
434
435
436
438 """Read the next token and interpret it as an integer.
439
440 Raises dns.exception.SyntaxError if not an integer.
441
442 Returns an int.
443 """
444
445 token = self.get().unescape()
446 if not token.is_identifier():
447 raise dns.exception.SyntaxError('expecting an identifier')
448 if not token.value.isdigit():
449 raise dns.exception.SyntaxError('expecting an integer')
450 return int(token.value, base)
451
453 """Read the next token and interpret it as an 8-bit unsigned
454 integer.
455
456 Raises dns.exception.SyntaxError if not an 8-bit unsigned integer.
457
458 Returns an int.
459 """
460
461 value = self.get_int()
462 if value < 0 or value > 255:
463 raise dns.exception.SyntaxError(
464 '%d is not an unsigned 8-bit integer' % value)
465 return value
466
468 """Read the next token and interpret it as a 16-bit unsigned
469 integer.
470
471 Raises dns.exception.SyntaxError if not a 16-bit unsigned integer.
472
473 Returns an int.
474 """
475
476 value = self.get_int(base=base)
477 if value < 0 or value > 65535:
478 if base == 8:
479 raise dns.exception.SyntaxError(
480 '%o is not an octal unsigned 16-bit integer' % value)
481 else:
482 raise dns.exception.SyntaxError(
483 '%d is not an unsigned 16-bit integer' % value)
484 return value
485
505
518
520 """Read the next token, which should be an identifier.
521
522 Raises dns.exception.SyntaxError if not an identifier.
523
524 Returns a string.
525 """
526
527 token = self.get().unescape()
528 if not token.is_identifier():
529 raise dns.exception.SyntaxError('expecting an identifier')
530 return token.value
531
533 """Read the next token and interpret it as a DNS name.
534
535 Raises dns.exception.SyntaxError if not a name.
536
537 Returns a dns.name.Name.
538 """
539
540 token = self.get()
541 if not token.is_identifier():
542 raise dns.exception.SyntaxError('expecting an identifier')
543 return dns.name.from_text(token.value, origin)
544
546 """Read the next token and raise an exception if it isn't EOL or
547 EOF.
548
549 Returns a string.
550 """
551
552 token = self.get()
553 if not token.is_eol_or_eof():
554 raise dns.exception.SyntaxError(
555 'expected EOL or EOF, got %d "%s"' % (token.ttype,
556 token.value))
557 return token.value
558
560 """Read the next token and interpret it as a DNS TTL.
561
562 Raises dns.exception.SyntaxError or dns.ttl.BadTTL if not an
563 identifier or badly formed.
564
565 Returns an int.
566 """
567
568 token = self.get().unescape()
569 if not token.is_identifier():
570 raise dns.exception.SyntaxError('expecting an identifier')
571 return dns.ttl.from_text(token.value)
572