1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9   
 10   
 11   
 12   
 13   
 14   
 15   
 16  """Tokenize DNS master file format""" 
 17   
 18  from io import StringIO 
 19  import sys 
 20   
 21  import dns.exception 
 22  import dns.name 
 23  import dns.ttl 
 24  from ._compat import long, text_type, binary_type 
 25   
 26  _DELIMITERS = { 
 27      ' ': True, 
 28      '\t': True, 
 29      '\n': True, 
 30      ';': True, 
 31      '(': True, 
 32      ')': True, 
 33      '"': True} 
 34   
 35  _QUOTING_DELIMITERS = {'"': True} 
 36   
 37  EOF = 0 
 38  EOL = 1 
 39  WHITESPACE = 2 
 40  IDENTIFIER = 3 
 41  QUOTED_STRING = 4 
 42  COMMENT = 5 
 43  DELIMITER = 6 
 44   
 45   
 47   
 48      """An attempt was made to unget a token when the unget buffer was full.""" 
  49   
 50   
 52   
 53      """A DNS master file format token. 
 54   
 55      @ivar ttype: The token type 
 56      @type ttype: int 
 57      @ivar value: The token value 
 58      @type value: string 
 59      @ivar has_escape: Does the token value contain escapes? 
 60      @type has_escape: bool 
 61      """ 
 62   
 63 -    def __init__(self, ttype, value='', has_escape=False): 
  64          """Initialize a token instance. 
 65   
 66          @param ttype: The token type 
 67          @type ttype: int 
 68          @param value: The token value 
 69          @type value: string 
 70          @param has_escape: Does the token value contain escapes? 
 71          @type has_escape: bool 
 72          """ 
 73          self.ttype = ttype 
 74          self.value = value 
 75          self.has_escape = has_escape 
  76   
 78          return self.ttype == EOF 
  79   
 81          return self.ttype == EOL 
  82   
 85   
 88   
 91   
 94   
 97   
 99          return self.ttype == EOL or self.ttype == EOF 
 100   
102          if not isinstance(other, Token): 
103              return False 
104          return (self.ttype == other.ttype and 
105                  self.value == other.value) 
 106   
108          if not isinstance(other, Token): 
109              return True 
110          return (self.ttype != other.ttype or 
111                  self.value != other.value) 
 112   
114          return '%d "%s"' % (self.ttype, self.value) 
 115   
144   
145       
146   
149   
151          return iter((self.ttype, self.value)) 
 152   
154          if i == 0: 
155              return self.ttype 
156          elif i == 1: 
157              return self.value 
158          else: 
159              raise IndexError 
  160   
161   
163   
164      """A DNS master file format tokenizer. 
165   
166      A token is a (type, value) tuple, where I{type} is an int, and 
167      I{value} is a string.  The valid types are EOF, EOL, WHITESPACE, 
168      IDENTIFIER, QUOTED_STRING, COMMENT, and DELIMITER. 
169   
170      @ivar file: The file to tokenize 
171      @type file: file 
172      @ivar ungotten_char: The most recently ungotten character, or None. 
173      @type ungotten_char: string 
174      @ivar ungotten_token: The most recently ungotten token, or None. 
175      @type ungotten_token: (int, string) token tuple 
176      @ivar multiline: The current multiline level.  This value is increased 
177      by one every time a '(' delimiter is read, and decreased by one every time 
178      a ')' delimiter is read. 
179      @type multiline: int 
180      @ivar quoting: This variable is true if the tokenizer is currently 
181      reading a quoted string. 
182      @type quoting: bool 
183      @ivar eof: This variable is true if the tokenizer has encountered EOF. 
184      @type eof: bool 
185      @ivar delimiters: The current delimiter dictionary. 
186      @type delimiters: dict 
187      @ivar line_number: The current line number 
188      @type line_number: int 
189      @ivar filename: A filename that will be returned by the L{where} method. 
190      @type filename: string 
191      """ 
192   
193 -    def __init__(self, f=sys.stdin, filename=None): 
 194          """Initialize a tokenizer instance. 
195   
196          @param f: The file to tokenize.  The default is sys.stdin. 
197          This parameter may also be a string, in which case the tokenizer 
198          will take its input from the contents of the string. 
199          @type f: file or string 
200          @param filename: the name of the filename that the L{where} method 
201          will return. 
202          @type filename: string 
203          """ 
204   
205          if isinstance(f, text_type): 
206              f = StringIO(f) 
207              if filename is None: 
208                  filename = '<string>' 
209          elif isinstance(f, binary_type): 
210              f = StringIO(f.decode()) 
211              if filename is None: 
212                  filename = '<string>' 
213          else: 
214              if filename is None: 
215                  if f is sys.stdin: 
216                      filename = '<stdin>' 
217                  else: 
218                      filename = '<file>' 
219          self.file = f 
220          self.ungotten_char = None 
221          self.ungotten_token = None 
222          self.multiline = 0 
223          self.quoting = False 
224          self.eof = False 
225          self.delimiters = _DELIMITERS 
226          self.line_number = 1 
227          self.filename = filename 
 228   
230          """Read a character from input. 
231          @rtype: string 
232          """ 
233   
234          if self.ungotten_char is None: 
235              if self.eof: 
236                  c = '' 
237              else: 
238                  c = self.file.read(1) 
239                  if c == '': 
240                      self.eof = True 
241                  elif c == '\n': 
242                      self.line_number += 1 
243          else: 
244              c = self.ungotten_char 
245              self.ungotten_char = None 
246          return c 
 247   
249          """Return the current location in the input. 
250   
251          @rtype: (string, int) tuple.  The first item is the filename of 
252          the input, the second is the current line number. 
253          """ 
254   
255          return (self.filename, self.line_number) 
 256   
258          """Unget a character. 
259   
260          The unget buffer for characters is only one character large; it is 
261          an error to try to unget a character when the unget buffer is not 
262          empty. 
263   
264          @param c: the character to unget 
265          @type c: string 
266          @raises UngetBufferFull: there is already an ungotten char 
267          """ 
268   
269          if self.ungotten_char is not None: 
270              raise UngetBufferFull 
271          self.ungotten_char = c 
 272   
274          """Consume input until a non-whitespace character is encountered. 
275   
276          The non-whitespace character is then ungotten, and the number of 
277          whitespace characters consumed is returned. 
278   
279          If the tokenizer is in multiline mode, then newlines are whitespace. 
280   
281          @rtype: int 
282          """ 
283   
284          skipped = 0 
285          while True: 
286              c = self._get_char() 
287              if c != ' ' and c != '\t': 
288                  if (c != '\n') or not self.multiline: 
289                      self._unget_char(c) 
290                      return skipped 
291              skipped += 1 
 292   
293 -    def get(self, want_leading=False, want_comment=False): 
 294          """Get the next token. 
295   
296          @param want_leading: If True, return a WHITESPACE token if the 
297          first character read is whitespace.  The default is False. 
298          @type want_leading: bool 
299          @param want_comment: If True, return a COMMENT token if the 
300          first token read is a comment.  The default is False. 
301          @type want_comment: bool 
302          @rtype: Token object 
303          @raises dns.exception.UnexpectedEnd: input ended prematurely 
304          @raises dns.exception.SyntaxError: input was badly formed 
305          """ 
306   
307          if self.ungotten_token is not None: 
308              token = self.ungotten_token 
309              self.ungotten_token = None 
310              if token.is_whitespace(): 
311                  if want_leading: 
312                      return token 
313              elif token.is_comment(): 
314                  if want_comment: 
315                      return token 
316              else: 
317                  return token 
318          skipped = self.skip_whitespace() 
319          if want_leading and skipped > 0: 
320              return Token(WHITESPACE, ' ') 
321          token = '' 
322          ttype = IDENTIFIER 
323          has_escape = False 
324          while True: 
325              c = self._get_char() 
326              if c == '' or c in self.delimiters: 
327                  if c == '' and self.quoting: 
328                      raise dns.exception.UnexpectedEnd 
329                  if token == '' and ttype != QUOTED_STRING: 
330                      if c == '(': 
331                          self.multiline += 1 
332                          self.skip_whitespace() 
333                          continue 
334                      elif c == ')': 
335                          if self.multiline <= 0: 
336                              raise dns.exception.SyntaxError 
337                          self.multiline -= 1 
338                          self.skip_whitespace() 
339                          continue 
340                      elif c == '"': 
341                          if not self.quoting: 
342                              self.quoting = True 
343                              self.delimiters = _QUOTING_DELIMITERS 
344                              ttype = QUOTED_STRING 
345                              continue 
346                          else: 
347                              self.quoting = False 
348                              self.delimiters = _DELIMITERS 
349                              self.skip_whitespace() 
350                              continue 
351                      elif c == '\n': 
352                          return Token(EOL, '\n') 
353                      elif c == ';': 
354                          while 1: 
355                              c = self._get_char() 
356                              if c == '\n' or c == '': 
357                                  break 
358                              token += c 
359                          if want_comment: 
360                              self._unget_char(c) 
361                              return Token(COMMENT, token) 
362                          elif c == '': 
363                              if self.multiline: 
364                                  raise dns.exception.SyntaxError( 
365                                      'unbalanced parentheses') 
366                              return Token(EOF) 
367                          elif self.multiline: 
368                              self.skip_whitespace() 
369                              token = '' 
370                              continue 
371                          else: 
372                              return Token(EOL, '\n') 
373                      else: 
374                           
375                           
376                           
377                          token = c 
378                          ttype = DELIMITER 
379                  else: 
380                      self._unget_char(c) 
381                  break 
382              elif self.quoting: 
383                  if c == '\\': 
384                      c = self._get_char() 
385                      if c == '': 
386                          raise dns.exception.UnexpectedEnd 
387                      if c.isdigit(): 
388                          c2 = self._get_char() 
389                          if c2 == '': 
390                              raise dns.exception.UnexpectedEnd 
391                          c3 = self._get_char() 
392                          if c == '': 
393                              raise dns.exception.UnexpectedEnd 
394                          if not (c2.isdigit() and c3.isdigit()): 
395                              raise dns.exception.SyntaxError 
396                          c = chr(int(c) * 100 + int(c2) * 10 + int(c3)) 
397                  elif c == '\n': 
398                      raise dns.exception.SyntaxError('newline in quoted string') 
399              elif c == '\\': 
400                   
401                   
402                   
403                   
404                  token += c 
405                  has_escape = True 
406                  c = self._get_char() 
407                  if c == '' or c == '\n': 
408                      raise dns.exception.UnexpectedEnd 
409              token += c 
410          if token == '' and ttype != QUOTED_STRING: 
411              if self.multiline: 
412                  raise dns.exception.SyntaxError('unbalanced parentheses') 
413              ttype = EOF 
414          return Token(ttype, token, has_escape) 
 415   
417          """Unget a token. 
418   
419          The unget buffer for tokens is only one token large; it is 
420          an error to try to unget a token when the unget buffer is not 
421          empty. 
422   
423          @param token: the token to unget 
424          @type token: Token object 
425          @raises UngetBufferFull: there is already an ungotten token 
426          """ 
427   
428          if self.ungotten_token is not None: 
429              raise UngetBufferFull 
430          self.ungotten_token = token 
 431   
433          """Return the next item in an iteration. 
434          @rtype: (int, string) 
435          """ 
436   
437          token = self.get() 
438          if token.is_eof(): 
439              raise StopIteration 
440          return token 
 441   
442      __next__ = next 
443   
446   
447       
448   
462   
464          """Read the next token and interpret it as an 8-bit unsigned 
465          integer. 
466   
467          @raises dns.exception.SyntaxError: 
468          @rtype: int 
469          """ 
470   
471          value = self.get_int() 
472          if value < 0 or value > 255: 
473              raise dns.exception.SyntaxError( 
474                  '%d is not an unsigned 8-bit integer' % value) 
475          return value 
 476   
478          """Read the next token and interpret it as a 16-bit unsigned 
479          integer. 
480   
481          @raises dns.exception.SyntaxError: 
482          @rtype: int 
483          """ 
484   
485          value = self.get_int() 
486          if value < 0 or value > 65535: 
487              raise dns.exception.SyntaxError( 
488                  '%d is not an unsigned 16-bit integer' % value) 
489          return value 
 490   
509   
521   
523          """Read the next token and raise an exception if it is not an identifier. 
524   
525          @raises dns.exception.SyntaxError: 
526          @rtype: string 
527          """ 
528   
529          token = self.get().unescape() 
530          if not token.is_identifier(): 
531              raise dns.exception.SyntaxError('expecting an identifier') 
532          return token.value 
 533   
544   
546          """Read the next token and raise an exception if it isn't EOL or 
547          EOF. 
548   
549          @raises dns.exception.SyntaxError: 
550          @rtype: string 
551          """ 
552   
553          token = self.get() 
554          if not token.is_eol_or_eof(): 
555              raise dns.exception.SyntaxError( 
556                  'expected EOL or EOF, got %d "%s"' % (token.ttype, 
557                                                        token.value)) 
558          return token.value 
 559   
 565