1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 """Tokenize DNS master file format"""
17
18 from io import StringIO
19 import sys
20
21 import dns.exception
22 import dns.name
23 import dns.ttl
24 from ._compat import long, text_type, binary_type
25
26 _DELIMITERS = {
27 ' ': True,
28 '\t': True,
29 '\n': True,
30 ';': True,
31 '(': True,
32 ')': True,
33 '"': True}
34
35 _QUOTING_DELIMITERS = {'"': True}
36
37 EOF = 0
38 EOL = 1
39 WHITESPACE = 2
40 IDENTIFIER = 3
41 QUOTED_STRING = 4
42 COMMENT = 5
43 DELIMITER = 6
44
45
47
48 """An attempt was made to unget a token when the unget buffer was full."""
49
50
52
53 """A DNS master file format token.
54
55 @ivar ttype: The token type
56 @type ttype: int
57 @ivar value: The token value
58 @type value: string
59 @ivar has_escape: Does the token value contain escapes?
60 @type has_escape: bool
61 """
62
63 - def __init__(self, ttype, value='', has_escape=False):
64 """Initialize a token instance.
65
66 @param ttype: The token type
67 @type ttype: int
68 @param value: The token value
69 @type value: string
70 @param has_escape: Does the token value contain escapes?
71 @type has_escape: bool
72 """
73 self.ttype = ttype
74 self.value = value
75 self.has_escape = has_escape
76
78 return self.ttype == EOF
79
81 return self.ttype == EOL
82
85
88
91
94
97
99 return self.ttype == EOL or self.ttype == EOF
100
102 if not isinstance(other, Token):
103 return False
104 return (self.ttype == other.ttype and
105 self.value == other.value)
106
108 if not isinstance(other, Token):
109 return True
110 return (self.ttype != other.ttype or
111 self.value != other.value)
112
114 return '%d "%s"' % (self.ttype, self.value)
115
144
145
146
149
151 return iter((self.ttype, self.value))
152
154 if i == 0:
155 return self.ttype
156 elif i == 1:
157 return self.value
158 else:
159 raise IndexError
160
161
163
164 """A DNS master file format tokenizer.
165
166 A token is a (type, value) tuple, where I{type} is an int, and
167 I{value} is a string. The valid types are EOF, EOL, WHITESPACE,
168 IDENTIFIER, QUOTED_STRING, COMMENT, and DELIMITER.
169
170 @ivar file: The file to tokenize
171 @type file: file
172 @ivar ungotten_char: The most recently ungotten character, or None.
173 @type ungotten_char: string
174 @ivar ungotten_token: The most recently ungotten token, or None.
175 @type ungotten_token: (int, string) token tuple
176 @ivar multiline: The current multiline level. This value is increased
177 by one every time a '(' delimiter is read, and decreased by one every time
178 a ')' delimiter is read.
179 @type multiline: int
180 @ivar quoting: This variable is true if the tokenizer is currently
181 reading a quoted string.
182 @type quoting: bool
183 @ivar eof: This variable is true if the tokenizer has encountered EOF.
184 @type eof: bool
185 @ivar delimiters: The current delimiter dictionary.
186 @type delimiters: dict
187 @ivar line_number: The current line number
188 @type line_number: int
189 @ivar filename: A filename that will be returned by the L{where} method.
190 @type filename: string
191 """
192
193 - def __init__(self, f=sys.stdin, filename=None):
194 """Initialize a tokenizer instance.
195
196 @param f: The file to tokenize. The default is sys.stdin.
197 This parameter may also be a string, in which case the tokenizer
198 will take its input from the contents of the string.
199 @type f: file or string
200 @param filename: the name of the filename that the L{where} method
201 will return.
202 @type filename: string
203 """
204
205 if isinstance(f, text_type):
206 f = StringIO(f)
207 if filename is None:
208 filename = '<string>'
209 elif isinstance(f, binary_type):
210 f = StringIO(f.decode())
211 if filename is None:
212 filename = '<string>'
213 else:
214 if filename is None:
215 if f is sys.stdin:
216 filename = '<stdin>'
217 else:
218 filename = '<file>'
219 self.file = f
220 self.ungotten_char = None
221 self.ungotten_token = None
222 self.multiline = 0
223 self.quoting = False
224 self.eof = False
225 self.delimiters = _DELIMITERS
226 self.line_number = 1
227 self.filename = filename
228
230 """Read a character from input.
231 @rtype: string
232 """
233
234 if self.ungotten_char is None:
235 if self.eof:
236 c = ''
237 else:
238 c = self.file.read(1)
239 if c == '':
240 self.eof = True
241 elif c == '\n':
242 self.line_number += 1
243 else:
244 c = self.ungotten_char
245 self.ungotten_char = None
246 return c
247
249 """Return the current location in the input.
250
251 @rtype: (string, int) tuple. The first item is the filename of
252 the input, the second is the current line number.
253 """
254
255 return (self.filename, self.line_number)
256
258 """Unget a character.
259
260 The unget buffer for characters is only one character large; it is
261 an error to try to unget a character when the unget buffer is not
262 empty.
263
264 @param c: the character to unget
265 @type c: string
266 @raises UngetBufferFull: there is already an ungotten char
267 """
268
269 if self.ungotten_char is not None:
270 raise UngetBufferFull
271 self.ungotten_char = c
272
274 """Consume input until a non-whitespace character is encountered.
275
276 The non-whitespace character is then ungotten, and the number of
277 whitespace characters consumed is returned.
278
279 If the tokenizer is in multiline mode, then newlines are whitespace.
280
281 @rtype: int
282 """
283
284 skipped = 0
285 while True:
286 c = self._get_char()
287 if c != ' ' and c != '\t':
288 if (c != '\n') or not self.multiline:
289 self._unget_char(c)
290 return skipped
291 skipped += 1
292
293 - def get(self, want_leading=False, want_comment=False):
294 """Get the next token.
295
296 @param want_leading: If True, return a WHITESPACE token if the
297 first character read is whitespace. The default is False.
298 @type want_leading: bool
299 @param want_comment: If True, return a COMMENT token if the
300 first token read is a comment. The default is False.
301 @type want_comment: bool
302 @rtype: Token object
303 @raises dns.exception.UnexpectedEnd: input ended prematurely
304 @raises dns.exception.SyntaxError: input was badly formed
305 """
306
307 if self.ungotten_token is not None:
308 token = self.ungotten_token
309 self.ungotten_token = None
310 if token.is_whitespace():
311 if want_leading:
312 return token
313 elif token.is_comment():
314 if want_comment:
315 return token
316 else:
317 return token
318 skipped = self.skip_whitespace()
319 if want_leading and skipped > 0:
320 return Token(WHITESPACE, ' ')
321 token = ''
322 ttype = IDENTIFIER
323 has_escape = False
324 while True:
325 c = self._get_char()
326 if c == '' or c in self.delimiters:
327 if c == '' and self.quoting:
328 raise dns.exception.UnexpectedEnd
329 if token == '' and ttype != QUOTED_STRING:
330 if c == '(':
331 self.multiline += 1
332 self.skip_whitespace()
333 continue
334 elif c == ')':
335 if self.multiline <= 0:
336 raise dns.exception.SyntaxError
337 self.multiline -= 1
338 self.skip_whitespace()
339 continue
340 elif c == '"':
341 if not self.quoting:
342 self.quoting = True
343 self.delimiters = _QUOTING_DELIMITERS
344 ttype = QUOTED_STRING
345 continue
346 else:
347 self.quoting = False
348 self.delimiters = _DELIMITERS
349 self.skip_whitespace()
350 continue
351 elif c == '\n':
352 return Token(EOL, '\n')
353 elif c == ';':
354 while 1:
355 c = self._get_char()
356 if c == '\n' or c == '':
357 break
358 token += c
359 if want_comment:
360 self._unget_char(c)
361 return Token(COMMENT, token)
362 elif c == '':
363 if self.multiline:
364 raise dns.exception.SyntaxError(
365 'unbalanced parentheses')
366 return Token(EOF)
367 elif self.multiline:
368 self.skip_whitespace()
369 token = ''
370 continue
371 else:
372 return Token(EOL, '\n')
373 else:
374
375
376
377 token = c
378 ttype = DELIMITER
379 else:
380 self._unget_char(c)
381 break
382 elif self.quoting:
383 if c == '\\':
384 c = self._get_char()
385 if c == '':
386 raise dns.exception.UnexpectedEnd
387 if c.isdigit():
388 c2 = self._get_char()
389 if c2 == '':
390 raise dns.exception.UnexpectedEnd
391 c3 = self._get_char()
392 if c == '':
393 raise dns.exception.UnexpectedEnd
394 if not (c2.isdigit() and c3.isdigit()):
395 raise dns.exception.SyntaxError
396 c = chr(int(c) * 100 + int(c2) * 10 + int(c3))
397 elif c == '\n':
398 raise dns.exception.SyntaxError('newline in quoted string')
399 elif c == '\\':
400
401
402
403
404 token += c
405 has_escape = True
406 c = self._get_char()
407 if c == '' or c == '\n':
408 raise dns.exception.UnexpectedEnd
409 token += c
410 if token == '' and ttype != QUOTED_STRING:
411 if self.multiline:
412 raise dns.exception.SyntaxError('unbalanced parentheses')
413 ttype = EOF
414 return Token(ttype, token, has_escape)
415
417 """Unget a token.
418
419 The unget buffer for tokens is only one token large; it is
420 an error to try to unget a token when the unget buffer is not
421 empty.
422
423 @param token: the token to unget
424 @type token: Token object
425 @raises UngetBufferFull: there is already an ungotten token
426 """
427
428 if self.ungotten_token is not None:
429 raise UngetBufferFull
430 self.ungotten_token = token
431
433 """Return the next item in an iteration.
434 @rtype: (int, string)
435 """
436
437 token = self.get()
438 if token.is_eof():
439 raise StopIteration
440 return token
441
442 __next__ = next
443
446
447
448
462
464 """Read the next token and interpret it as an 8-bit unsigned
465 integer.
466
467 @raises dns.exception.SyntaxError:
468 @rtype: int
469 """
470
471 value = self.get_int()
472 if value < 0 or value > 255:
473 raise dns.exception.SyntaxError(
474 '%d is not an unsigned 8-bit integer' % value)
475 return value
476
478 """Read the next token and interpret it as a 16-bit unsigned
479 integer.
480
481 @raises dns.exception.SyntaxError:
482 @rtype: int
483 """
484
485 value = self.get_int()
486 if value < 0 or value > 65535:
487 raise dns.exception.SyntaxError(
488 '%d is not an unsigned 16-bit integer' % value)
489 return value
490
509
521
523 """Read the next token and raise an exception if it is not an identifier.
524
525 @raises dns.exception.SyntaxError:
526 @rtype: string
527 """
528
529 token = self.get().unescape()
530 if not token.is_identifier():
531 raise dns.exception.SyntaxError('expecting an identifier')
532 return token.value
533
544
546 """Read the next token and raise an exception if it isn't EOL or
547 EOF.
548
549 @raises dns.exception.SyntaxError:
550 @rtype: string
551 """
552
553 token = self.get()
554 if not token.is_eol_or_eof():
555 raise dns.exception.SyntaxError(
556 'expected EOL or EOF, got %d "%s"' % (token.ttype,
557 token.value))
558 return token.value
559
565