| 1 | n/a | # Module 'parser' |
|---|
| 2 | n/a | # |
|---|
| 3 | n/a | # Parse S-expressions output by the Panel Editor |
|---|
| 4 | n/a | # (which is written in Scheme so it can't help writing S-expressions). |
|---|
| 5 | n/a | # |
|---|
| 6 | n/a | # See notes at end of file. |
|---|
| 7 | n/a | from warnings import warnpy3k |
|---|
| 8 | n/a | warnpy3k("the panelparser module has been removed in Python 3.0", stacklevel=2) |
|---|
| 9 | n/a | del warnpy3k |
|---|
| 10 | n/a | |
|---|
| 11 | n/a | |
|---|
| 12 | n/a | whitespace = ' \t\n' |
|---|
| 13 | n/a | operators = '()\'' |
|---|
| 14 | n/a | separators = operators + whitespace + ';' + '"' |
|---|
| 15 | n/a | |
|---|
| 16 | n/a | |
|---|
| 17 | n/a | # Tokenize a string. |
|---|
| 18 | n/a | # Return a list of tokens (strings). |
|---|
| 19 | n/a | # |
|---|
| 20 | n/a | def tokenize_string(s): |
|---|
| 21 | n/a | tokens = [] |
|---|
| 22 | n/a | while s: |
|---|
| 23 | n/a | c = s[:1] |
|---|
| 24 | n/a | if c in whitespace: |
|---|
| 25 | n/a | s = s[1:] |
|---|
| 26 | n/a | elif c == ';': |
|---|
| 27 | n/a | s = '' |
|---|
| 28 | n/a | elif c == '"': |
|---|
| 29 | n/a | n = len(s) |
|---|
| 30 | n/a | i = 1 |
|---|
| 31 | n/a | while i < n: |
|---|
| 32 | n/a | c = s[i] |
|---|
| 33 | n/a | i = i+1 |
|---|
| 34 | n/a | if c == '"': break |
|---|
| 35 | n/a | if c == '\\': i = i+1 |
|---|
| 36 | n/a | tokens.append(s[:i]) |
|---|
| 37 | n/a | s = s[i:] |
|---|
| 38 | n/a | elif c in operators: |
|---|
| 39 | n/a | tokens.append(c) |
|---|
| 40 | n/a | s = s[1:] |
|---|
| 41 | n/a | else: |
|---|
| 42 | n/a | n = len(s) |
|---|
| 43 | n/a | i = 1 |
|---|
| 44 | n/a | while i < n: |
|---|
| 45 | n/a | if s[i] in separators: break |
|---|
| 46 | n/a | i = i+1 |
|---|
| 47 | n/a | tokens.append(s[:i]) |
|---|
| 48 | n/a | s = s[i:] |
|---|
| 49 | n/a | return tokens |
|---|
| 50 | n/a | |
|---|
| 51 | n/a | |
|---|
| 52 | n/a | # Tokenize a whole file (given as file object, not as file name). |
|---|
| 53 | n/a | # Return a list of tokens (strings). |
|---|
| 54 | n/a | # |
|---|
| 55 | n/a | def tokenize_file(fp): |
|---|
| 56 | n/a | tokens = [] |
|---|
| 57 | n/a | while 1: |
|---|
| 58 | n/a | line = fp.readline() |
|---|
| 59 | n/a | if not line: break |
|---|
| 60 | n/a | tokens = tokens + tokenize_string(line) |
|---|
| 61 | n/a | return tokens |
|---|
| 62 | n/a | |
|---|
| 63 | n/a | |
|---|
| 64 | n/a | # Exception raised by parse_exr. |
|---|
| 65 | n/a | # |
|---|
| 66 | n/a | syntax_error = 'syntax error' |
|---|
| 67 | n/a | |
|---|
| 68 | n/a | |
|---|
| 69 | n/a | # Parse an S-expression. |
|---|
| 70 | n/a | # Input is a list of tokens as returned by tokenize_*(). |
|---|
| 71 | n/a | # Return a pair (expr, tokens) |
|---|
| 72 | n/a | # where expr is a list representing the s-expression, |
|---|
| 73 | n/a | # and tokens contains the remaining tokens. |
|---|
| 74 | n/a | # May raise syntax_error. |
|---|
| 75 | n/a | # |
|---|
| 76 | n/a | def parse_expr(tokens): |
|---|
| 77 | n/a | if (not tokens) or tokens[0] != '(': |
|---|
| 78 | n/a | raise syntax_error, 'expected "("' |
|---|
| 79 | n/a | tokens = tokens[1:] |
|---|
| 80 | n/a | expr = [] |
|---|
| 81 | n/a | while 1: |
|---|
| 82 | n/a | if not tokens: |
|---|
| 83 | n/a | raise syntax_error, 'missing ")"' |
|---|
| 84 | n/a | if tokens[0] == ')': |
|---|
| 85 | n/a | return expr, tokens[1:] |
|---|
| 86 | n/a | elif tokens[0] == '(': |
|---|
| 87 | n/a | subexpr, tokens = parse_expr(tokens) |
|---|
| 88 | n/a | expr.append(subexpr) |
|---|
| 89 | n/a | else: |
|---|
| 90 | n/a | expr.append(tokens[0]) |
|---|
| 91 | n/a | tokens = tokens[1:] |
|---|
| 92 | n/a | |
|---|
| 93 | n/a | |
|---|
| 94 | n/a | # Parse a file (given as file object, not as file name). |
|---|
| 95 | n/a | # Return a list of parsed S-expressions found at the top level. |
|---|
| 96 | n/a | # |
|---|
| 97 | n/a | def parse_file(fp): |
|---|
| 98 | n/a | tokens = tokenize_file(fp) |
|---|
| 99 | n/a | exprlist = [] |
|---|
| 100 | n/a | while tokens: |
|---|
| 101 | n/a | expr, tokens = parse_expr(tokens) |
|---|
| 102 | n/a | exprlist.append(expr) |
|---|
| 103 | n/a | return exprlist |
|---|
| 104 | n/a | |
|---|
| 105 | n/a | |
|---|
| 106 | n/a | # EXAMPLE: |
|---|
| 107 | n/a | # |
|---|
| 108 | n/a | # The input |
|---|
| 109 | n/a | # '(hip (hop hur-ray))' |
|---|
| 110 | n/a | # |
|---|
| 111 | n/a | # passed to tokenize_string() returns the token list |
|---|
| 112 | n/a | # ['(', 'hip', '(', 'hop', 'hur-ray', ')', ')'] |
|---|
| 113 | n/a | # |
|---|
| 114 | n/a | # When this is passed to parse_expr() it returns the expression |
|---|
| 115 | n/a | # ['hip', ['hop', 'hur-ray']] |
|---|
| 116 | n/a | # plus an empty token list (because there are no tokens left. |
|---|
| 117 | n/a | # |
|---|
| 118 | n/a | # When a file containing the example is passed to parse_file() it returns |
|---|
| 119 | n/a | # a list whose only element is the output of parse_expr() above: |
|---|
| 120 | n/a | # [['hip', ['hop', 'hur-ray']]] |
|---|
| 121 | n/a | |
|---|
| 122 | n/a | |
|---|
| 123 | n/a | # TOKENIZING: |
|---|
| 124 | n/a | # |
|---|
| 125 | n/a | # Comments start with semicolon (;) and continue till the end of the line. |
|---|
| 126 | n/a | # |
|---|
| 127 | n/a | # Tokens are separated by whitespace, except the following characters |
|---|
| 128 | n/a | # always form a separate token (outside strings): |
|---|
| 129 | n/a | # ( ) ' |
|---|
| 130 | n/a | # Strings are enclosed in double quotes (") and backslash (\) is used |
|---|
| 131 | n/a | # as escape character in strings. |
|---|