Posts: 123
Threads: 7
Joined: Jan 2010
Reputation:
0
Hi, I'm trying to build a special parser in Python.
All this parser does, is take source code in a scripting language of my invention, and parser it into another scripting language, called JASS.
This parser has to be able to parse only between two specific keywords, so it can remain compatible with straight JASS, and other scripting languages.
It also needs to be able to search for certain blocks inside of the source script, and if it finds them, merges them with the very first definition of this specific block.
And finally, I need to be able to move several function/method declarations around based on the current namepace they're in, and whether functions in that namespace call functions in another (so if the namespace A calls a function in the namespace B, A goes before B to make sure the function can be called).
Here's a small example of what the two languages look like:
Code: // before
library TestCode
integer myInt = 5
function myFunc()
BJDebugMsg("Hello World");
// after
globals
integer myInt
endglobals
function myFunc takes nothing returns nothing
call BJDebugMsg("Hello World")
endfunction
That's just the basics. This has a lot more features (preprocessing commands, OOP-implementation, etc)
Does anyone have any kind of information, or links to (well documented) tools that can help me?
Posts: 245
Threads: 33
Joined: Oct 2009
Reputation:
10
Hmmm I do remember seeing something in my book about creating a custom parser, but I'm not quite there yet. If you are familiar with python you could take a look at these examples from my book.
parser1.py:
Code: # the parser (syntax analyser, evaluates during parse)
########################################################
class UndefinedError(Exception): pass
from scanner import Scanner, LexicalError, SyntaxError
class Parser:
def __init__(self, text=''):
self.lex = Scanner(text) # embed a scanner
self.vars = {'pi':3.14159} # add a variable
def parse(self, *text):
if text: # main entry-point
self.lex.newtext(text[0]) # reuse this parser?
try:
self.lex.scan() # get first token
self.Goal() # parse a sentence
except SyntaxError:
print 'Syntax Error at column:', self.lex.start
self.lex.showerror()
except LexicalError:
print 'Lexical Error at column:', self.lex.start
self.lex.showerror()
except UndefinedError, name:
print "'%s' is undefined at column:" % name, self.lex.start
self.lex.showerror()
def Goal(self):
if self.lex.token in ['num', 'var', '(']:
val = self.Expr()
self.lex.match('\0') # expression?
print val
elif self.lex.token == 'set': # set command?
self.Assign()
self.lex.match('\0')
else:
raise SyntaxError
def Assign(self):
self.lex.match('set')
var = self.lex.match('var')
val = self.Expr()
self.vars[var] = val # assign name in dict
def Expr(self):
left = self.Factor()
while 1:
if self.lex.token in ['\0', ')']:
return left
elif self.lex.token == '+':
self.lex.scan()
left = left + self.Factor()
elif self.lex.token == '-':
self.lex.scan()
left = left - self.Factor()
else:
raise SyntaxError
def Factor(self):
left = self.Term()
while 1:
if self.lex.token in ['+', '-', '\0', ')']:
return left
elif self.lex.token == '*':
self.lex.scan()
left = left * self.Term()
elif self.lex.token == '/':
self.lex.scan()
left = left / self.Term()
else:
raise SyntaxError
def Term(self):
if self.lex.token == 'num':
val = self.lex.match('num') # numbers
return val
elif self.lex.token == 'var':
if self.vars.has_key(self.lex.value):
val = self.vars[self.lex.value] # lookup name's value
self.lex.scan()
return val
else:
raise UndefinedError, self.lex.value
elif self.lex.token == '(':
self.lex.scan()
val = self.Expr() # sub-expression
self.lex.match(')')
return val
else:
raise SyntaxError
if __name__ == '__main__':
import testparser # self-test code
testparser.test(Parser, 'parser1') # test local Parser
parser2.py
Code: TraceDefault = False
class UndefinedError(Exception): pass
from scanner import Scanner, SyntaxError, LexicalError
####################################################
# the interpreter (a smart objects tree)
####################################################
class TreeNode:
def validate(self, dict): # default error check
pass
def apply(self, dict): # default evaluator
pass
def trace(self, level): # default unparser
print '.'*level + '<empty>'
# ROOTS
class BinaryNode(TreeNode):
def __init__(self, left, right): # inherited methods
self.left, self.right = left, right # left/right branches
def validate(self, dict):
self.left.validate(dict) # recurse down branches
self.right.validate(dict)
def trace(self, level):
print '.'*level + '[' + self.label + ']'
self.left.trace(level+3)
self.right.trace(level+3)
class TimesNode(BinaryNode):
label = '*'
def apply(self, dict):
return self.left.apply(dict) * self.right.apply(dict)
class DivideNode(BinaryNode):
label = '/'
def apply(self, dict):
return self.left.apply(dict) / self.right.apply(dict)
class PlusNode(BinaryNode):
label = '+'
def apply(self, dict):
return self.left.apply(dict) + self.right.apply(dict)
class MinusNode(BinaryNode):
label = '-'
def apply(self, dict):
return self.left.apply(dict) - self.right.apply(dict)
# LEAVES
class NumNode(TreeNode):
def __init__(self, num):
self.num = num # already numeric
def apply(self, dict): # use default validate
return self.num
def trace(self, level):
print '.'*level + repr(self.num) # as code, was `self.num`
class VarNode(TreeNode):
def __init__(self, text, start):
self.name = text # variable name
self.column = start # column for errors
def validate(self, dict):
if not dict.has_key(self.name):
raise UndefinedError, (self.name, self.column)
def apply(self, dict):
return dict[self.name] # validate before apply
def assign(self, value, dict):
dict[self.name] = value # local extension
def trace(self, level):
print '.'*level + self.name
# COMPOSITES
class AssignNode(TreeNode):
def __init__(self, var, val):
self.var, self.val = var, val
def validate(self, dict):
self.val.validate(dict) # don't validate var
def apply(self, dict):
self.var.assign( self.val.apply(dict), dict )
def trace(self, level):
print '.'*level + 'set '
self.var.trace(level + 3)
self.val.trace(level + 3)
####################################################
# the parser (syntax analyser, tree builder)
####################################################
class Parser:
def __init__(self, text=''):
self.lex = Scanner(text) # make a scanner
self.vars = {'pi':3.14159} # add constants
self.traceme = TraceDefault
def parse(self, *text): # external interface
if text:
self.lex.newtext(text[0]) # reuse with new text
tree = self.analyse() # parse string
if tree:
if self.traceme: # dump parse-tree?
print; tree.trace(0)
if self.errorCheck(tree): # check names
self.interpret(tree) # evaluate tree
def analyse(self):
try:
self.lex.scan() # get first token
return self.Goal() # build a parse-tree
except SyntaxError:
print 'Syntax Error at column:', self.lex.start
self.lex.showerror()
except LexicalError:
print 'Lexical Error at column:', self.lex.start
self.lex.showerror()
def errorCheck(self, tree):
try:
tree.validate(self.vars) # error checker
return 'ok'
except UndefinedError, instance: # args is a tuple
varinfo = instance.args # instance is a sequence
print "'%s' is undefined at column: %d" % varinfo
self.lex.start = varinfo[1]
self.lex.showerror() # returns None
def interpret(self, tree):
result = tree.apply(self.vars) # tree evals itself
if result != None: # ignore 'set' result
print result
def Goal(self):
if self.lex.token in ['num', 'var', '(']:
tree = self.Expr()
self.lex.match('\0')
return tree
elif self.lex.token == 'set':
tree = self.Assign()
self.lex.match('\0')
return tree
else:
raise SyntaxError
def Assign(self):
self.lex.match('set')
vartree = VarNode(self.lex.value, self.lex.start)
self.lex.match('var')
valtree = self.Expr()
return AssignNode(vartree, valtree) # two subtrees
def Expr(self):
left = self.Factor() # left subtree
while 1:
if self.lex.token in ['\0', ')']:
return left
elif self.lex.token == '+':
self.lex.scan()
left = PlusNode(left, self.Factor()) # add root-node
elif self.lex.token == '-':
self.lex.scan()
left = MinusNode(left, self.Factor()) # grows up/right
else:
raise SyntaxError
def Factor(self):
left = self.Term()
while 1:
if self.lex.token in ['+', '-', '\0', ')']:
return left
elif self.lex.token == '*':
self.lex.scan()
left = TimesNode(left, self.Term())
elif self.lex.token == '/':
self.lex.scan()
left = DivideNode(left, self.Term())
else:
raise SyntaxError
def Term(self):
if self.lex.token == 'num':
leaf = NumNode(self.lex.match('num'))
return leaf
elif self.lex.token == 'var':
leaf = VarNode(self.lex.value, self.lex.start)
self.lex.scan()
return leaf
elif self.lex.token == '(':
self.lex.scan()
tree = self.Expr()
self.lex.match(')')
return tree
else:
raise SyntaxError
####################################################
# self-test code: use my parser, parser1's tester
####################################################
if __name__ == '__main__':
import testparser
testparser.test(Parser, 'parser2') # run with Parser class here
scanner.py:
Code: ####################################################
# the scanner (lexical analyser)
####################################################
import string
class SyntaxError(Exception): pass # local errors
class LexicalError(Exception): pass # used to be strings
class Scanner:
def __init__(self, text):
self.next = 0
self.text = text + '\0'
def newtext(self, text):
Scanner.__init__(self, text)
def showerror(self):
print '=> ', self.text
print '=> ', (' ' * self.start) + '^'
def match(self, token):
if self.token != token:
raise SyntaxError, [token]
else:
value = self.value
if self.token != '\0':
self.scan() # next token/value
return value # return prior value
def scan(self):
self.value = None
ix = self.next
while self.text[ix] in string.whitespace:
ix = ix+1
self.start = ix
if self.text[ix] in ['(', ')', '-', '+', '/', '*', '\0']:
self.token = self.text[ix]
ix = ix+1
elif self.text[ix] in string.digits:
str = ''
while self.text[ix] in string.digits:
str = str + self.text[ix]
ix = ix+1
if self.text[ix] == '.':
str = str + '.'
ix = ix+1
while self.text[ix] in string.digits:
str = str + self.text[ix]
ix = ix+1
self.token = 'num'
self.value = float(str)
else:
self.token = 'num'
self.value = long(str)
elif self.text[ix] in string.letters:
str = ''
while self.text[ix] in (string.digits + string.letters):
str = str + self.text[ix]
ix = ix+1
if str.lower() == 'set':
self.token = 'set'
else:
self.token = 'var'
self.value = str
else:
raise LexicalError
self.next = ix
testparser.py
Code: ####################################################
# parser test code
####################################################
def test(ParserClass, msg):
print msg, ParserClass
x = ParserClass('4 / 2 + 3') # allow different Parser's
x.parse()
x.parse('3 + 4 / 2') # like eval('3 + 4 / 2')...
x.parse('(3 + 4) / 2')
x.parse('4 / (2 + 3)')
x.parse('4.0 / (2 + 3)')
x.parse('4 / (2.0 + 3)')
x.parse('4.0 / 2 * 3')
x.parse('(4.0 / 2) * 3')
x.parse('4.0 / (2 * 3)')
x.parse('(((3))) + 1')
y = ParserClass()
y.parse('set a 4 / 2 + 1')
y.parse('a * 3')
y.parse('set b 12 / a')
y.parse('b')
z = ParserClass()
z.parse('set a 99')
z.parse('set a a + 1')
z.parse('a')
z = ParserClass()
z.parse('pi')
z.parse('2 * pi')
z.parse('1.234 + 2.1')
def interact(ParserClass): # command-line entry
print ParserClass
x = ParserClass()
while 1:
cmd = raw_input('Enter=> ')
if cmd == 'stop':
break
x.parse(cmd)
grammar.txt:
Code: goal -> <expr> END [number, variable, ( ]
goal -> <assign> END [set]
assign -> 'set' <variable> <expr> [set]
expr -> <factor> <expr-tail> [number, variable, ( ]
expr-tail -> ^ [END, ) ]
expr-tail -> '+' <factor> <expr-tail> [+]
expr-tail -> '-' <factor> <expr-tail> [-]
factor -> <term> <factor-tail> [number, variable, ( ]
factor-tail -> ^ [+, -, END, ) ]
factor-tail -> '*' <term> <factor-tail> [*]
factor-tail -> '/' <term> <factor-tail> [/]
term -> <number> [number]
term -> <variable> [variable]
term -> '(' <expr> ')' [(]
tokens: (, ), num, var, -, +, /, *, set, end
__init__.py:
Sorry if I'm not helping, I'm not quite that advanced yet.
Anyone can download these examples and they are open for anyone to share, the author only has a problem if a large part of the examples in the book are used to make money.
Posts: 123
Threads: 7
Joined: Jan 2010
Reputation:
0
Alright, thanks. I'll have a look at those now.
|