Browse Source

init

pull/2/head
Tom 6 years ago
commit
168e50ce87
8 changed files with 362 additions and 0 deletions
  1. +2
    -0
      .gitignore
  2. +5
    -0
      lab/testGrammar.grm
  3. +209
    -0
      src/grammarLexer.py
  4. +70
    -0
      src/grammarParser.py
  5. +32
    -0
      src/stateMachine.py
  6. +16
    -0
      src/testLexer.py
  7. +13
    -0
      src/testParser.py
  8. +15
    -0
      src/token.py

+ 2
- 0
.gitignore View File

@ -0,0 +1,2 @@
__pycache__
*.pyc

+ 5
- 0
lab/testGrammar.grm View File

@ -0,0 +1,5 @@
$FOO -> /foo
$FOO -> /foo$FOO
$FOO -> $BAR
$BAR -> /bar
$BAR -> $BAR

+ 209
- 0
src/grammarLexer.py View File

@ -0,0 +1,209 @@
#!/usr/bin/env python
from enum import Enum
from stateMachine import StateMachine
from collections import deque
from token import Token
T_TOKEN = Enum("T_TOKEN", "NON_TERMINAL " +
"ARROW " +
"TERMINAL " +
"ERROR")
class GrammarLexer:
__STATE = Enum("STATE", "START " +
"NON_TERMINAL " +
"ARROW " +
"TERMINAL " +
"ERROR")
__T_TOKEN_STATE_MAP = {}
__stateMachine = StateMachine()
__line = deque()
def __init__(self):
self.__T_TOKEN_STATE_MAP[self.__STATE.NON_TERMINAL] = T_TOKEN.NON_TERMINAL
self.__T_TOKEN_STATE_MAP[self.__STATE.TERMINAL] = T_TOKEN.TERMINAL
self.__T_TOKEN_STATE_MAP[self.__STATE.ARROW] = T_TOKEN.ARROW
self.__T_TOKEN_STATE_MAP[self.__STATE.ERROR] = T_TOKEN.ERROR
self.__stateMachine.setStartState(self.__STATE.START)
self.__stateMachine.setErrorState(self.__STATE.ERROR)
self.__stateMachine.addTransition(self.__STATE.START, "$", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.START, "/", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.START, "-", self.__STATE.ARROW)
self.__stateMachine.addTransition(self.__STATE.START, " ", self.__STATE.START)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "a", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "b", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "c", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "d", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "e", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "f", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "g", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "h", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "i", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "j", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "k", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "l", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "m", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "n", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "o", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "p", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "q", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "r", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "s", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "t", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "u", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "v", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "w", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "x", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "y", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "z", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "A", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "B", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "C", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "D", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "E", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "F", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "G", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "H", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "I", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "J", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "K", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "L", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "M", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "N", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "O", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "P", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "Q", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "R", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "S", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "T", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "U", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "V", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "W", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "X", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "Y", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "Z", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "0", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "1", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "2", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "3", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "4", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "5", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "6", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "7", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "8", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "9", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "/", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, " ", self.__STATE.START)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "a", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "b", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "c", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "d", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "e", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "f", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "g", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "h", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "i", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "j", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "k", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "l", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "m", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "n", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "o", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "p", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "q", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "r", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "s", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "t", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "u", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "v", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "w", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "x", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "y", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "z", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "A", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "B", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "C", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "D", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "E", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "F", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "G", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "H", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "I", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "J", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "K", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "L", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "M", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "N", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "O", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "P", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "Q", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "R", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "S", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "T", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "U", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "V", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "W", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "X", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "Y", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "Z", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "0", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "1", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "2", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "3", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "4", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "5", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "6", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "7", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "8", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "9", self.__STATE.TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, "$", self.__STATE.NON_TERMINAL)
self.__stateMachine.addTransition(self.__STATE.TERMINAL, " ", self.__STATE.START)
self.__stateMachine.addTransition(self.__STATE.ARROW, ">", self.__STATE.ARROW)
self.__stateMachine.addTransition(self.__STATE.ARROW, " ", self.__STATE.START)
def processLine(self, line):
self.__stateMachine.reset()
prevState = self.__stateMachine.getCurrentState()
currState = prevState
lexemeBuffer = "";
self.__line.clear()
for char in line:
self.__stateMachine.processChar(char)
currState = self.__stateMachine.getCurrentState()
if currState != prevState and prevState != self.__STATE.START:
tToken = self.__T_TOKEN_STATE_MAP.get(prevState)
if tToken == T_TOKEN.TERMINAL or tToken == T_TOKEN.NON_TERMINAL:
lexemeBuffer = lexemeBuffer[1:]
self.__line.append(Token(tToken, lexemeBuffer))
lexemeBuffer = ""
if char != " ":
lexemeBuffer += char;
prevState = self.__stateMachine.getCurrentState()
def nextToken(self):
return self.__line.popleft();
def hasTokensLeft(self):
return True if len(self.__line) != 0 else False
def getParsedLine(self):
return self.__line

+ 70
- 0
src/grammarParser.py View File

@ -0,0 +1,70 @@
#!/usr/bin/env python
from grammarLexer import GrammarLexer
from grammarLexer import T_TOKEN
class GrammarParser:
__lxr = GrammarLexer()
__grammar = {}
def parse(self, grammarFile):
lines = grammarFile.readlines()
for line in lines:
self.__lxr.processLine(line)
tokenLine = self.__lxr.getParsedLine()
if self.__lineIsValid(tokenLine):
leftSide = tokenLine.popleft().getLexeme()
tokenLine.popleft()
rightSide = []
while len(tokenLine) > 0:
rightSide.append(tokenLine.popleft())
if self.__isNewNonTerm(leftSide):
self.__grammar[leftSide] = []
self.__grammar[leftSide].append(rightSide)
print(self.__grammar)
def __lineIsValid(self, tokenLine):
if len(tokenLine) < 3:
return False
if tokenLine[0].getClass() != T_TOKEN.NON_TERMINAL:
return False
if tokenLine[1].getClass() != T_TOKEN.ARROW:
return False;
return True;
def __isNewNonTerm(self, token):
if self.__grammar.get(token) == None:
return True
else:
return False
def printGrammar(self):
for left in self.__grammar:
self.__printRow(left, self.__grammar[left]);
def __printRow(self, left, right):
line = left + " -> "
for i in range(len(right) - 1):
for token in right[i]:
line += str(token.getClass()) + "(" + token.getLexeme() + ")"
line += " | "
for token in right[len(right) -1]:
line += str(token.getClass()) + "(" + token.getLexeme() + ")"
print(line)

+ 32
- 0
src/stateMachine.py View File

@ -0,0 +1,32 @@
#!/usr/bin/env python
class StateMachine :
__transitions = {}
__currentState = None;
__errorState = None;
__startState = None;
def addTransition(self, state, inputChar, followState):
startStateAndInput = (state, inputChar)
self.__transitions[startStateAndInput] = followState
def getCurrentState(self):
return self.__currentState
def processChar(self, inputChar):
nextState = self.__transitions.get((self.__currentState, inputChar))
self.__currentState = nextState if nextState != None else self.__errorState
return self.__currentState;
def setErrorState(self, errorState):
self.__errorState = errorState
def setStartState(self, startState):
self.__startState = startState
self.__currState = self.__startState
def reset(self):
self.__currentState = self.__startState;

+ 16
- 0
src/testLexer.py View File

@ -0,0 +1,16 @@
#!/usr/bin/env python
from grammarLexer import GrammarLexer
from grammarLexer import T_TOKEN
lxr = GrammarLexer()
inp = input("please enter a arithmetic expr: ");
lxr.processLine(inp + " ");
#lxr.processLine("$FOO -> /foo");
while lxr.hasTokensLeft():
token = lxr.nextToken()
print(token.getClass(), ":", token.getLexeme())

+ 13
- 0
src/testParser.py View File

@ -0,0 +1,13 @@
#!/usr/bin/env python
from grammarParser import GrammarParser
prs = GrammarParser()
inp = input("grammar file: ");
prs.parse(open(inp, "r"));
prs.printGrammar();

+ 15
- 0
src/token.py View File

@ -0,0 +1,15 @@
#!/usr/bin/env python
class Token:
__tClass = None
__lexeme = None
def __init__(self, tClass, lexeme):
self.__tClass = tClass
self.__lexeme = lexeme
def getClass(self):
return self.__tClass
def getLexeme(self):
return self.__lexeme

Loading…
Cancel
Save