Compiler design for beginners : Top down parser with backtracking (python source code)

import sys

inpPos = 0 #input string pos
op = 0

testStr = ["cad", "cdd","cabd","cbab","caad","c", "ca", "cabb",'xyza', 'cabb','babd','bd', 'd' , 'b' , 'c']

newLine = '\n'
gramEndChar = '$'

print ( "Enter string like" )
print ( testStr)
print( newLine )

print ( "string to check" )
inputStr = input()

print( ["Input String => ", inputStr] )
print( "Grammar End Character is => " + gramEndChar )
#print( testResult[pos] )
print ( newLine )
#dprint inputStr

A = [['a', 'b'], ['a']]
#A = ['a', 'b']

S = [['c', A, 'd', gramEndChar]] #this should be used as G, start sym
inputStr += gramEndChar
S.append(gramEndChar)
#G = [S, A, 'e'] #not his

pG = [] # previous grammar to backtrack
pgp = [] # previous grammar pos
pip = [] #previous input pos
gramG = [] # current grammar

def dprint(value):
# print( value ) #uncomment to see debug
return

def gprint(value):
print(value)
return

curGrmSymblPos = 0
gramG = S
inputStrvalid = False # input String Valid

reachedEndAtCurGram = False
curInpCharAndGramSymMatch = False
endPosOfInput = False

while( True ): #search for grammar
dprint ("< previous>")
dprint( pG)
dprint(pgp)
dprint(["Input => ",inputStr," => ", pip ])
dprint ("</previous>")
dprint

if( isinstance(gramG[curGrmSymblPos],list) ): #maybe #and list is not finished
dprint ([ 'Checking Inside Grammar => ', gramG , "at pos =>", curGrmSymblPos ])

pG.append(gramG) #move inside list, for current grammar
pgp.append(curGrmSymblPos)
pip.append(inpPos)

#curGrmSymblPos+=1
gramG = gramG[curGrmSymblPos] #get new current Grammar

curGrmSymblPos = 0 # reset grammar pos for new grammar
else:
dprint (['CURRENT Grammar ', gramG])
#dprint (gramG[curGrmSymblPos] . str(curGrmSymblPos))
dprint( ["Symbol at curGrmSymblPos => ", str(curGrmSymblPos), gramG[curGrmSymblPos] ])
dprint( ["Current Input Symbol => ", inputStr[inpPos], "at inpPos => ", inpPos])

gprint( "GrmSymbl => " + ' => ' + gramG[curGrmSymblPos] )
gprint( "Input Symbol => " + inputStr[inpPos] + " => at pos => " + str(inpPos) )
# print( newLine )
#dprint ("curGrmSymblPos" + str(curGrmSymblPos)) #maybe ##and list is not finished

if(inputStr[inpPos] == gramG[curGrmSymblPos]):
print ("Grammar and inpPos = " + str(inpPos) + " MATCH" )
gprint ( newLine )
curInpCharAndGramSymMatch = True
inpPos+=1
else:
print ("Grammar and inpPos = " + str(inpPos) + "Don't MATCH" )
gprint ( newLine )

if( gramG[curGrmSymblPos] == gramEndChar ):
dprint (" Reached end of Grammar, break")
break
else:
print (["Grammer not reached end => ", gramG[curGrmSymblPos], gramEndChar ] )

curInpCharAndGramSymMatch = False
#[ ['a', 'b'], ['a']
# ab first letter a not match
# #try to check for next a
if(pG): # only if pG is not empty stack
gramG = pG.pop()
curGrmSymblPos = pgp.pop() + 1
inpPos = pip.pop()

if( curGrmSymblPos == len(gramG) ):
dprint("current Grammar finished,so again backtracking")
gramG = pG.pop()
curGrmSymblPos = pgp.pop() + 1
inpPos = pip.pop()

dprint("<backtracking>")
dprint([ " to Grammar => ", gramG, "<= at pos", curGrmSymblPos])
dprint( ["Current Input Symbol => ", inputStr[inpPos], "at inpPos => ", inpPos])
dprint("</backtracking>")
dprint("CONTINUE")
dprint( newLine)
continue #below steps pop further, avoiding them
#inpPos = pip.pop()

#gprint ( newLine ) #above continue won't allow to print newLine

if(inpPos == len(inputStr) or inputStr[inpPos] == gramEndChar ):
dprint (" Reached End of Input Symbol, stop: inpPos => " + str(inpPos) )
endPosOfInput = True
break

curGrmSymblPos+=1 #to check for other symbols in grammar
if( curGrmSymblPos == len(gramG) and len(pG) != 0): #reached end of gramG,backtrack
reachedEndAtCurGram = True
else:
reachedEndAtCurGram = False

#may contain other grammar
#eg gram1, [ ['a', 'b'], ['a'] ] <--gram2** , gram3
#if ab is checked, (reached end)
# also need to check for 2nd 'a'
if( reachedEndAtCurGram ):
#so don't pop a/c to above comment
if( curInpCharAndGramSymMatch ):

#[ [['c', [['a', 'b'], ['a']], 'd'], 'e'] ]
# prev gram has only one list as above
# don't remove it
if( len(pG) > 1):
dprint("-- Reached End, curInput Char And Gram Sym Match--")
dprint("<pop> i.e. Removing Grammar, previous grammar pos")
dprint( [ pG,pgp ])

pG.pop()
pgp.pop() #ab is matched, don't need to match for 2nd 'a', remove gram2**

if(pG):
dprint("\t removing another top Grammar <<and>> set as curret grammar")
dprint("\t removing pgp, set as current sym pos = pgpPos + 1")
dprint( [ pG,pgp ])

gramG = pG.pop() #now get another Grammer saved
curGrmSymblPos = pgp.pop() + 1 # move to next gram pos, pos of gram2** + 1
#pip.pop()
else:
gramG = pG.pop()
curGrmSymblPos = pgp.pop() + 1
inpPos = pip.pop()

#reached [ ['a', 'b'], ['a'] ]
#2nd part i.e. 'a' list after 'ab'
# # now new pos don't have grammar to check
# # #i.e. curGrmSymblPos == 2 don't exist
# # # # need to check to next grammar
# # # # # gram1, [ ['a', 'b'], ['a'] ] <--gram2 , gram3
# # # # # # gram3 should be checked
# # # # # # gram2 should be removed
if( curGrmSymblPos == len(gramG) ):
dprint("current Grammar finished,so again backtracking")
gramG = pG.pop()
curGrmSymblPos = pgp.pop() + 1
inpPos = pip.pop()
#dprint("backtracking to " + str(curGrmSymblPos))
#dprint (gramG)
gprint('backtracking')

dprint("<backtracking>")
dprint([ " to Grammar => ", gramG, "<= at pos", curGrmSymblPos])
dprint( ["Current Input Symbol => ", inputStr[inpPos], "at inpPos => ", inpPos])
dprint("</backtracking>")
dprint

dprint('\n')
dprint

if(not gramG):
break

if( curGrmSymblPos < len(gramG)):
if( gramG[curGrmSymblPos] == gramEndChar and len(pG) == 0 ):
dprint ('Reached end of Grammar')
break
else:
curGrmSymblPos = 0

if( endPosOfInput == True ):
if ( gramG[curGrmSymblPos] == gramEndChar or gramG[curGrmSymblPos + 1] == gramEndChar ):
inputStrvalid = True
# if( endPosOfInput == True ):

if( inputStrvalid == False):
print("Input String is invalid")
else:
print("Input String is valid")

Compiler design for beginners

Tuesday, January 26, 2016

Top down parser with backtracking (python source code)

No comments:

Post a Comment