Hmmm I do remember seeing something in
my book about creating a custom parser, but I'm not quite there yet. If you are familiar with python you could take a look at these examples from my book.
parser1.py:
Code:
# the parser (syntax analyser, evaluates during parse)
########################################################
class UndefinedError(Exception): pass
from scanner import Scanner, LexicalError, SyntaxError
class Parser:
def __init__(self, text=''):
self.lex = Scanner(text) # embed a scanner
self.vars = {'pi':3.14159} # add a variable
def parse(self, *text):
if text: # main entry-point
self.lex.newtext(text[0]) # reuse this parser?
try:
self.lex.scan() # get first token
self.Goal() # parse a sentence
except SyntaxError:
print 'Syntax Error at column:', self.lex.start
self.lex.showerror()
except LexicalError:
print 'Lexical Error at column:', self.lex.start
self.lex.showerror()
except UndefinedError, name:
print "'%s' is undefined at column:" % name, self.lex.start
self.lex.showerror()
def Goal(self):
if self.lex.token in ['num', 'var', '(']:
val = self.Expr()
self.lex.match('\0') # expression?
print val
elif self.lex.token == 'set': # set command?
self.Assign()
self.lex.match('\0')
else:
raise SyntaxError
def Assign(self):
self.lex.match('set')
var = self.lex.match('var')
val = self.Expr()
self.vars[var] = val # assign name in dict
def Expr(self):
left = self.Factor()
while 1:
if self.lex.token in ['\0', ')']:
return left
elif self.lex.token == '+':
self.lex.scan()
left = left + self.Factor()
elif self.lex.token == '-':
self.lex.scan()
left = left - self.Factor()
else:
raise SyntaxError
def Factor(self):
left = self.Term()
while 1:
if self.lex.token in ['+', '-', '\0', ')']:
return left
elif self.lex.token == '*':
self.lex.scan()
left = left * self.Term()
elif self.lex.token == '/':
self.lex.scan()
left = left / self.Term()
else:
raise SyntaxError
def Term(self):
if self.lex.token == 'num':
val = self.lex.match('num') # numbers
return val
elif self.lex.token == 'var':
if self.vars.has_key(self.lex.value):
val = self.vars[self.lex.value] # lookup name's value
self.lex.scan()
return val
else:
raise UndefinedError, self.lex.value
elif self.lex.token == '(':
self.lex.scan()
val = self.Expr() # sub-expression
self.lex.match(')')
return val
else:
raise SyntaxError
if __name__ == '__main__':
import testparser # self-test code
testparser.test(Parser, 'parser1') # test local Parser
parser2.py
Code:
TraceDefault = False
class UndefinedError(Exception): pass
from scanner import Scanner, SyntaxError, LexicalError
####################################################
# the interpreter (a smart objects tree)
####################################################
class TreeNode:
def validate(self, dict): # default error check
pass
def apply(self, dict): # default evaluator
pass
def trace(self, level): # default unparser
print '.'*level + '<empty>'
# ROOTS
class BinaryNode(TreeNode):
def __init__(self, left, right): # inherited methods
self.left, self.right = left, right # left/right branches
def validate(self, dict):
self.left.validate(dict) # recurse down branches
self.right.validate(dict)
def trace(self, level):
print '.'*level + '[' + self.label + ']'
self.left.trace(level+3)
self.right.trace(level+3)
class TimesNode(BinaryNode):
label = '*'
def apply(self, dict):
return self.left.apply(dict) * self.right.apply(dict)
class DivideNode(BinaryNode):
label = '/'
def apply(self, dict):
return self.left.apply(dict) / self.right.apply(dict)
class PlusNode(BinaryNode):
label = '+'
def apply(self, dict):
return self.left.apply(dict) + self.right.apply(dict)
class MinusNode(BinaryNode):
label = '-'
def apply(self, dict):
return self.left.apply(dict) - self.right.apply(dict)
# LEAVES
class NumNode(TreeNode):
def __init__(self, num):
self.num = num # already numeric
def apply(self, dict): # use default validate
return self.num
def trace(self, level):
print '.'*level + repr(self.num) # as code, was `self.num`
class VarNode(TreeNode):
def __init__(self, text, start):
self.name = text # variable name
self.column = start # column for errors
def validate(self, dict):
if not dict.has_key(self.name):
raise UndefinedError, (self.name, self.column)
def apply(self, dict):
return dict[self.name] # validate before apply
def assign(self, value, dict):
dict[self.name] = value # local extension
def trace(self, level):
print '.'*level + self.name
# COMPOSITES
class AssignNode(TreeNode):
def __init__(self, var, val):
self.var, self.val = var, val
def validate(self, dict):
self.val.validate(dict) # don't validate var
def apply(self, dict):
self.var.assign( self.val.apply(dict), dict )
def trace(self, level):
print '.'*level + 'set '
self.var.trace(level + 3)
self.val.trace(level + 3)
####################################################
# the parser (syntax analyser, tree builder)
####################################################
class Parser:
def __init__(self, text=''):
self.lex = Scanner(text) # make a scanner
self.vars = {'pi':3.14159} # add constants
self.traceme = TraceDefault
def parse(self, *text): # external interface
if text:
self.lex.newtext(text[0]) # reuse with new text
tree = self.analyse() # parse string
if tree:
if self.traceme: # dump parse-tree?
print; tree.trace(0)
if self.errorCheck(tree): # check names
self.interpret(tree) # evaluate tree
def analyse(self):
try:
self.lex.scan() # get first token
return self.Goal() # build a parse-tree
except SyntaxError:
print 'Syntax Error at column:', self.lex.start
self.lex.showerror()
except LexicalError:
print 'Lexical Error at column:', self.lex.start
self.lex.showerror()
def errorCheck(self, tree):
try:
tree.validate(self.vars) # error checker
return 'ok'
except UndefinedError, instance: # args is a tuple
varinfo = instance.args # instance is a sequence
print "'%s' is undefined at column: %d" % varinfo
self.lex.start = varinfo[1]
self.lex.showerror() # returns None
def interpret(self, tree):
result = tree.apply(self.vars) # tree evals itself
if result != None: # ignore 'set' result
print result
def Goal(self):
if self.lex.token in ['num', 'var', '(']:
tree = self.Expr()
self.lex.match('\0')
return tree
elif self.lex.token == 'set':
tree = self.Assign()
self.lex.match('\0')
return tree
else:
raise SyntaxError
def Assign(self):
self.lex.match('set')
vartree = VarNode(self.lex.value, self.lex.start)
self.lex.match('var')
valtree = self.Expr()
return AssignNode(vartree, valtree) # two subtrees
def Expr(self):
left = self.Factor() # left subtree
while 1:
if self.lex.token in ['\0', ')']:
return left
elif self.lex.token == '+':
self.lex.scan()
left = PlusNode(left, self.Factor()) # add root-node
elif self.lex.token == '-':
self.lex.scan()
left = MinusNode(left, self.Factor()) # grows up/right
else:
raise SyntaxError
def Factor(self):
left = self.Term()
while 1:
if self.lex.token in ['+', '-', '\0', ')']:
return left
elif self.lex.token == '*':
self.lex.scan()
left = TimesNode(left, self.Term())
elif self.lex.token == '/':
self.lex.scan()
left = DivideNode(left, self.Term())
else:
raise SyntaxError
def Term(self):
if self.lex.token == 'num':
leaf = NumNode(self.lex.match('num'))
return leaf
elif self.lex.token == 'var':
leaf = VarNode(self.lex.value, self.lex.start)
self.lex.scan()
return leaf
elif self.lex.token == '(':
self.lex.scan()
tree = self.Expr()
self.lex.match(')')
return tree
else:
raise SyntaxError
####################################################
# self-test code: use my parser, parser1's tester
####################################################
if __name__ == '__main__':
import testparser
testparser.test(Parser, 'parser2') # run with Parser class here
scanner.py:
Code:
####################################################
# the scanner (lexical analyser)
####################################################
import string
class SyntaxError(Exception): pass # local errors
class LexicalError(Exception): pass # used to be strings
class Scanner:
def __init__(self, text):
self.next = 0
self.text = text + '\0'
def newtext(self, text):
Scanner.__init__(self, text)
def showerror(self):
print '=> ', self.text
print '=> ', (' ' * self.start) + '^'
def match(self, token):
if self.token != token:
raise SyntaxError, [token]
else:
value = self.value
if self.token != '\0':
self.scan() # next token/value
return value # return prior value
def scan(self):
self.value = None
ix = self.next
while self.text[ix] in string.whitespace:
ix = ix+1
self.start = ix
if self.text[ix] in ['(', ')', '-', '+', '/', '*', '\0']:
self.token = self.text[ix]
ix = ix+1
elif self.text[ix] in string.digits:
str = ''
while self.text[ix] in string.digits:
str = str + self.text[ix]
ix = ix+1
if self.text[ix] == '.':
str = str + '.'
ix = ix+1
while self.text[ix] in string.digits:
str = str + self.text[ix]
ix = ix+1
self.token = 'num'
self.value = float(str)
else:
self.token = 'num'
self.value = long(str)
elif self.text[ix] in string.letters:
str = ''
while self.text[ix] in (string.digits + string.letters):
str = str + self.text[ix]
ix = ix+1
if str.lower() == 'set':
self.token = 'set'
else:
self.token = 'var'
self.value = str
else:
raise LexicalError
self.next = ix
testparser.py
Code:
####################################################
# parser test code
####################################################
def test(ParserClass, msg):
print msg, ParserClass
x = ParserClass('4 / 2 + 3') # allow different Parser's
x.parse()
x.parse('3 + 4 / 2') # like eval('3 + 4 / 2')...
x.parse('(3 + 4) / 2')
x.parse('4 / (2 + 3)')
x.parse('4.0 / (2 + 3)')
x.parse('4 / (2.0 + 3)')
x.parse('4.0 / 2 * 3')
x.parse('(4.0 / 2) * 3')
x.parse('4.0 / (2 * 3)')
x.parse('(((3))) + 1')
y = ParserClass()
y.parse('set a 4 / 2 + 1')
y.parse('a * 3')
y.parse('set b 12 / a')
y.parse('b')
z = ParserClass()
z.parse('set a 99')
z.parse('set a a + 1')
z.parse('a')
z = ParserClass()
z.parse('pi')
z.parse('2 * pi')
z.parse('1.234 + 2.1')
def interact(ParserClass): # command-line entry
print ParserClass
x = ParserClass()
while 1:
cmd = raw_input('Enter=> ')
if cmd == 'stop':
break
x.parse(cmd)
grammar.txt:
Code:
goal -> <expr> END [number, variable, ( ]
goal -> <assign> END [set]
assign -> 'set' <variable> <expr> [set]
expr -> <factor> <expr-tail> [number, variable, ( ]
expr-tail -> ^ [END, ) ]
expr-tail -> '+' <factor> <expr-tail> [+]
expr-tail -> '-' <factor> <expr-tail> [-]
factor -> <term> <factor-tail> [number, variable, ( ]
factor-tail -> ^ [+, -, END, ) ]
factor-tail -> '*' <term> <factor-tail> [*]
factor-tail -> '/' <term> <factor-tail> [/]
term -> <number> [number]
term -> <variable> [variable]
term -> '(' <expr> ')' [(]
tokens: (, ), num, var, -, +, /, *, set, end
__init__.py:
Sorry if I'm not helping, I'm not quite that advanced yet.
Anyone can
download these examples and they are open for anyone to share, the author only has a problem if a large part of the examples in the book are used to make money.