1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 """Tokenize DNS master file format"""
17
18 import cStringIO
19 import sys
20
21 import dns.exception
22 import dns.name
23 import dns.ttl
24
25 _DELIMITERS = {
26 ' ' : True,
27 '\t' : True,
28 '\n' : True,
29 ';' : True,
30 '(' : True,
31 ')' : True,
32 '"' : True }
33
34 _QUOTING_DELIMITERS = { '"' : True }
35
36 EOF = 0
37 EOL = 1
38 WHITESPACE = 2
39 IDENTIFIER = 3
40 QUOTED_STRING = 4
41 COMMENT = 5
42 DELIMITER = 6
43
45 """Raised when an attempt is made to unget a token when the unget
46 buffer is full."""
47 pass
48
50 """A DNS master file format tokenizer.
51
52 A token is a (type, value) tuple, where I{type} is an int, and
53 I{value} is a string. The valid types are EOF, EOL, WHITESPACE,
54 IDENTIFIER, QUOTED_STRING, COMMENT, and DELIMITER.
55
56 @ivar file: The file to tokenize
57 @type file: file
58 @ivar ungotten_char: The most recently ungotten character, or None.
59 @type ungotten_char: string
60 @ivar ungotten_token: The most recently ungotten token, or None.
61 @type ungotten_token: (int, string) token tuple
62 @ivar multiline: The current multiline level. This value is increased
63 by one every time a '(' delimiter is read, and decreased by one every time
64 a ')' delimiter is read.
65 @type multiline: int
66 @ivar quoting: This variable is true if the tokenizer is currently
67 reading a quoted string.
68 @type quoting: bool
69 @ivar eof: This variable is true if the tokenizer has encountered EOF.
70 @type eof: bool
71 @ivar delimiters: The current delimiter dictionary.
72 @type delimiters: dict
73 @ivar line_number: The current line number
74 @type line_number: int
75 @ivar filename: A filename that will be returned by the L{where} method.
76 @type filename: string
77 """
78
79 - def __init__(self, f=sys.stdin, filename=None):
80 """Initialize a tokenizer instance.
81
82 @param f: The file to tokenize. The default is sys.stdin.
83 This parameter may also be a string, in which case the tokenizer
84 will take its input from the contents of the string.
85 @type f: file or string
86 @param filename: the name of the filename that the L{where} method
87 will return.
88 @type filename: string
89 """
90
91 if isinstance(f, str):
92 f = cStringIO.StringIO(f)
93 if filename is None:
94 filename = '<string>'
95 else:
96 if filename is None:
97 if f is sys.stdin:
98 filename = '<stdin>'
99 else:
100 filename = '<file>'
101 self.file = f
102 self.ungotten_char = None
103 self.ungotten_token = None
104 self.multiline = 0
105 self.quoting = False
106 self.eof = False
107 self.delimiters = _DELIMITERS
108 self.line_number = 1
109 self.filename = filename
110
112 """Read a character from input.
113 @rtype: string
114 """
115
116 if self.ungotten_char is None:
117 if self.eof:
118 c = ''
119 else:
120 c = self.file.read(1)
121 if c == '':
122 self.eof = True
123 elif c == '\n':
124 self.line_number += 1
125 else:
126 c = self.ungotten_char
127 self.ungotten_char = None
128 return c
129
131 """Return the current location in the input.
132
133 @rtype: (string, int) tuple. The first item is the filename of
134 the input, the second is the current line number.
135 """
136
137 return (self.filename, self.line_number)
138
140 """Unget a character.
141
142 The unget buffer for characters is only one character large; it is
143 an error to try to unget a character when the unget buffer is not
144 empty.
145
146 @param c: the character to unget
147 @type c: string
148 @raises UngetBufferFull: there is already an ungotten char
149 """
150
151 if not self.ungotten_char is None:
152 raise UngetBufferFull
153 self.ungotten_char = c
154
156 """Consume input until a non-whitespace character is encountered.
157
158 The non-whitespace character is then ungotten, and the number of
159 whitespace characters consumed is returned.
160
161 If the tokenizer is in multiline mode, then newlines are whitespace.
162
163 @rtype: int
164 """
165
166 skipped = 0
167 while True:
168 c = self._get_char()
169 if c != ' ' and c != '\t':
170 if (c != '\n') or not self.multiline:
171 self._unget_char(c)
172 return skipped
173 skipped += 1
174
175 - def get(self, want_leading = False, want_comment = False):
176 """Get the next token.
177
178 @param want_leading: If True, return a WHITESPACE token if the
179 first character read is whitespace. The default is False.
180 @type want_leading: bool
181 @param want_comment: If True, return a COMMENT token if the
182 first token read is a comment. The default is False.
183 @type want_comment: bool
184 @rtype: (int, string) tuple
185 @raises dns.exception.UnexpectedEnd: input ended prematurely
186 @raises dns.exception.SyntaxError: input was badly formed
187 """
188
189 if not self.ungotten_token is None:
190 token = self.ungotten_token
191 self.ungotten_token = None
192 if token[0] == WHITESPACE:
193 if want_leading:
194 return token
195 elif token[0] == COMMENT:
196 if want_comment:
197 return token
198 else:
199 return token
200 skipped = self.skip_whitespace()
201 if want_leading and skipped > 0:
202 return (WHITESPACE, ' ')
203 token = ''
204 ttype = IDENTIFIER
205 while True:
206 c = self._get_char()
207 if c == '' or c in self.delimiters:
208 if c == '' and self.quoting:
209 raise dns.exception.UnexpectedEnd
210 if token == '' and ttype != QUOTED_STRING:
211 if c == '(':
212 self.multiline += 1
213 self.skip_whitespace()
214 continue
215 elif c == ')':
216 if not self.multiline > 0:
217 raise dns.exception.SyntaxError
218 self.multiline -= 1
219 self.skip_whitespace()
220 continue
221 elif c == '"':
222 if not self.quoting:
223 self.quoting = True
224 self.delimiters = _QUOTING_DELIMITERS
225 ttype = QUOTED_STRING
226 continue
227 else:
228 self.quoting = False
229 self.delimiters = _DELIMITERS
230 self.skip_whitespace()
231 continue
232 elif c == '\n':
233 return (EOL, '\n')
234 elif c == ';':
235 while 1:
236 c = self._get_char()
237 if c == '\n' or c == '':
238 break
239 token += c
240 if want_comment:
241 self._unget_char(c)
242 return (COMMENT, token)
243 elif c == '':
244 if self.multiline:
245 raise dns.exception.SyntaxError, \
246 'unbalanced parentheses'
247 return (EOF, '')
248 elif self.multiline:
249 self.skip_whitespace()
250 token = ''
251 continue
252 else:
253 return (EOL, '\n')
254 else:
255
256
257
258 token = c
259 ttype = DELIMITER
260 else:
261 self._unget_char(c)
262 break
263 elif self.quoting:
264 if c == '\\':
265 c = self._get_char()
266 if c == '':
267 raise dns.exception.UnexpectedEnd
268 if c.isdigit():
269 c2 = self._get_char()
270 if c2 == '':
271 raise dns.exception.UnexpectedEnd
272 c3 = self._get_char()
273 if c == '':
274 raise dns.exception.UnexpectedEnd
275 if not (c2.isdigit() and c3.isdigit()):
276 raise dns.exception.SyntaxError
277 c = chr(int(c) * 100 + int(c2) * 10 + int(c3))
278 elif c == '\n':
279 raise dns.exception.SyntaxError, 'newline in quoted string'
280 elif c == '\\':
281
282
283
284
285 c = self._get_char()
286 if c == '' or not c in self.delimiters:
287 self._unget_char(c)
288 c = '\\'
289 token += c
290 if token == '' and ttype != QUOTED_STRING:
291 if self.multiline:
292 raise dns.exception.SyntaxError, 'unbalanced parentheses'
293 ttype = EOF
294 return (ttype, token)
295
297 """Unget a token.
298
299 The unget buffer for tokens is only one token large; it is
300 an error to try to unget a token when the unget buffer is not
301 empty.
302
303 @param token: the token to unget
304 @type token: (int, string) token tuple
305 @raises UngetBufferFull: there is already an ungotten token
306 """
307
308 if not self.ungotten_token is None:
309 raise UngetBufferFull
310 self.ungotten_token = token
311
313 """Return the next item in an iteration.
314 @rtype: (int, string)
315 """
316
317 token = self.get()
318 if token[0] == EOF:
319 raise StopIteration
320 return token
321
324
325
326
328 """Read the next token and interpret it as an integer.
329
330 @raises dns.exception.SyntaxError:
331 @rtype: int
332 """
333
334 (ttype, value) = self.get()
335 if ttype != IDENTIFIER:
336 raise dns.exception.SyntaxError, 'expecting an identifier'
337 if not value.isdigit():
338 raise dns.exception.SyntaxError, 'expecting an integer'
339 return int(value)
340
342 """Read the next token and interpret it as an 8-bit unsigned
343 integer.
344
345 @raises dns.exception.SyntaxError:
346 @rtype: int
347 """
348
349 value = self.get_int()
350 if value < 0 or value > 255:
351 raise dns.exception.SyntaxError, \
352 '%d is not an unsigned 8-bit integer' % value
353 return value
354
356 """Read the next token and interpret it as a 16-bit unsigned
357 integer.
358
359 @raises dns.exception.SyntaxError:
360 @rtype: int
361 """
362
363 value = self.get_int()
364 if value < 0 or value > 65535:
365 raise dns.exception.SyntaxError, \
366 '%d is not an unsigned 16-bit integer' % value
367 return value
368
370 """Read the next token and interpret it as a 32-bit unsigned
371 integer.
372
373 @raises dns.exception.SyntaxError:
374 @rtype: int
375 """
376
377 (ttype, value) = self.get()
378 if ttype != IDENTIFIER:
379 raise dns.exception.SyntaxError, 'expecting an identifier'
380 if not value.isdigit():
381 raise dns.exception.SyntaxError, 'expecting an integer'
382 value = long(value)
383 if value < 0 or value > 4294967296L:
384 raise dns.exception.SyntaxError, \
385 '%d is not an unsigned 32-bit integer' % value
386 return value
387
389 """Read the next token and interpret it as a string.
390
391 @raises dns.exception.SyntaxError:
392 @rtype: string
393 """
394
395 (ttype, t) = self.get()
396 if ttype != IDENTIFIER and ttype != QUOTED_STRING:
397 raise dns.exception.SyntaxError, 'expecting a string'
398 return t
399
401 """Read the next token and interpret it as a DNS name.
402
403 @raises dns.exception.SyntaxError:
404 @rtype: dns.name.Name object"""
405
406 (ttype, t) = self.get()
407 if ttype != IDENTIFIER:
408 raise dns.exception.SyntaxError, 'expecting an identifier'
409 return dns.name.from_text(t, origin)
410
412 """Read the next token and raise an exception if it isn't EOL or
413 EOF.
414
415 @raises dns.exception.SyntaxError:
416 @rtype: string
417 """
418
419 (ttype, t) = self.get()
420 if ttype != EOL and ttype != EOF:
421 raise dns.exception.SyntaxError, \
422 'expected EOL or EOF, got %d "%s"' % (ttype, t)
423 return t
424
430