请生成LR(0)语法分析,可以输入压缩了的上下文无关文法,符号串以#结束,可以输出相应的LR(0)分析表,然后输出识别符号串的过程,并输出符号串是否是所输入文法的句子,含主函数,用Python实现
时间: 2024-03-18 09:38:47 浏览: 63
好的,下面是实现的代码,使用了Python的ply库来实现LR(0)语法分析器:
```python
import ply.yacc as yacc
import ply.lex as lex
# 定义文法规则
grammar = """
S : E
E : E '+' T
| T
T : T '*' F
| F
F : '(' E ')'
| 'id'
"""
# 定义压缩的文法字符串
compressed_grammar = 'S:E;E:E+T|T;T:T*F|F;F:(E)|id'
# 定义词法分析器的词汇表
tokens = ['ID', 'PLUS', 'TIMES', 'LPAREN', 'RPAREN']
# 定义词法分析器的正则表达式规则
t_PLUS = r'\+'
t_TIMES = r'\*'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_ignore = ' \t\n'
# ID的规则为字母或下划线开头,后面可以跟字母、下划线或数字
def t_ID(t):
r'[a-zA-Z_][a-zA-Z0-9_]*'
return t
# 定义错误处理函数
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# 构建词法分析器
lexer = lex.lex()
# 解析压缩的文法字符串,生成LR分析表
def build_parser(grammar):
# 构建语法分析器
parser = yacc.yacc(method='SLR')
# 解析文法规则,生成语法树
grammar_tree = parser.parse(grammar)
# 生成LR(0)分析表
lr_table = parser.lr_table
return lr_table, grammar_tree
# 解析压缩的文法字符串,生成LR分析表和语法树
lr_table, grammar_tree = build_parser(compressed_grammar)
# 定义LR分析器的状态类
class LRState:
def __init__(self, state_num, items):
self.state_num = state_num
self.items = items
def __hash__(self):
return hash(str(self.items))
def __eq__(self, other):
return str(self.items) == str(other.items)
def __str__(self):
return f"I{self.state_num}: {str(self.items)}"
# 定义LR分析器的项类
class LRItem:
def __init__(self, production, dot_pos):
self.production = production
self.dot_pos = dot_pos
def __eq__(self, other):
return self.production == other.production and self.dot_pos == other.dot_pos
def __str__(self):
prod = self.production.copy()
prod.insert(self.dot_pos, ".")
return f"{prod}"
# 定义LR分析器的项集类
class LRItemSet:
def __init__(self, items):
self.items = items
def __iter__(self):
return iter(self.items)
def __len__(self):
return len(self.items)
def __hash__(self):
return hash(str(self.items))
def __eq__(self, other):
return str(self.items) == str(other.items)
def __str__(self):
return f"{str(item)}" for item in self.items
# 定义LR分析器的文法类
class LRGrammar:
def __init__(self, productions):
self.productions = productions
def __str__(self):
return "\n".join(str(p) for p in self.productions)
# 获取文法的终结符和非终结符集合
def get_symbols(self):
nonterminals = set()
terminals = set()
for production in self.productions:
nonterminals.add(production[0])
for symbol in production[1]:
if symbol.islower():
terminals.add(symbol)
return nonterminals, terminals
# 获取文法的开始符号
def get_start_symbol(self):
return self.productions[0][0]
# 获取某个符号的FIRST集合
def get_first(self, symbol, nonterminals, terminals):
first = set()
if symbol in terminals:
first.add(symbol)
elif symbol in nonterminals:
for production in self.productions:
if production[0] == symbol:
if len(production[1]) == 0:
first.add("")
elif production[1][0] in terminals:
first.add(production[1][0])
else:
first.update(self.get_first(production[1][0], nonterminals, terminals))
i = 1
while "" in first and i < len(production[1]):
first.remove("")
if production[1][i] in terminals:
first.add(production[1][i])
break
else:
first.update(self.get_first(production[1][i], nonterminals, terminals))
i += 1
if "" in first:
first.remove("")
first.add("")
return first
# 获取某个符号串的FIRST集合
def get_first_set(self, symbol_str, nonterminals, terminals):
first_set = set()
i = 0
while i < len(symbol_str):
if symbol_str[i] in terminals:
first_set.add(symbol_str[i])
break
elif symbol_str[i] in nonterminals:
first = self.get_first(symbol_str[i], nonterminals, terminals)
if "" not in first:
first_set.update(first)
break
else:
first_set.update(first - {""})
i += 1
else:
break
if i == len(symbol_str):
first_set.add("")
return first_set
# 获取某个符号的FOLLOW集合
def get_follow(self, symbol, nonterminals, terminals, first_sets, follow_sets):
follow_set = set()
if symbol == self.get_start_symbol():
follow_set.add("#")
for production in self.productions:
for i in range(len(production[1])):
if production[1][i] == symbol:
if i == len(production[1]) - 1:
if symbol != production[0]:
if production[0] in follow_sets:
follow_set.update(follow_sets[production[0]])
else:
follow_set.update(self.get_follow(production[0], nonterminals, terminals, first_sets, follow_sets))
else:
first = self.get_first_set(production[1][i+1:], nonterminals, terminals)
if "" in first:
if symbol != production[0]:
if production[0] in follow_sets:
follow_set.update(follow_sets[production[0]])
else:
follow_set.update(self.get_follow(production[0], nonterminals, terminals, first_sets, follow_sets))
follow_set.update(first - {""})
else:
follow_set.update(first)
return follow_set
# 获取所有符号的FOLLOW集合
def get_follow_sets(self, nonterminals, terminals, first_sets):
follow_sets = {}
for nonterminal in nonterminals:
follow_sets[nonterminal] = set()
follow_sets[self.get_start_symbol()].add("#")
while True:
follow_sets_new = follow_sets.copy()
for production in self.productions:
for i in range(len(production[1])):
if production[1][i] in nonterminals:
first = self.get_first_set(production[1][i+1:], nonterminals, terminals)
follow = follow_sets[production[1][i]]
if "" in first:
follow_sets_new[production[1][i]].update(follow)
follow_sets_new[production[1][i]].update(first - {""})
if follow_sets_new == follow_sets:
break
follow_sets = follow_sets_new
return follow_sets
# 获取某个状态的闭包
def get_closure(self, item_set):
closure = item_set.copy()
while True:
closure_new = closure.copy()
for item in closure:
if item.dot_pos < len(item.production[1]) and item.production[1][item.dot_pos] in nonterminals:
for production in self.productions:
if production[0] == item.production[1][item.dot_pos]:
closure_new.add(LRItem(production, 0))
if closure_new == closure:
break
closure = closure_new
return closure
# 获取某个状态的GOTO集合
def get_goto(self, item_set, symbol):
goto = set()
for item in item_set:
if item.dot_pos < len(item.production[1]) and item.production[1][item.dot_pos] == symbol:
goto.add(LRItem(item.production, item.dot_pos+1))
return self.get_closure(goto)
# 获取所有状态和GOTO集合
def get_states(self):
states = []
start_item = LRItem(self.productions[0], 0)
start_state = LRState(0, self.get_closure({start_item}))
states.append(start_state)
i = 0
while i < len(states):
state = states[i]
i += 1
for symbol in all_symbols:
goto = self.get_goto(state.items, symbol)
if len(goto) > 0 and goto not in [s.items for s in states]:
new_state = LRState(len(states), goto)
states.append(new_state)
lr_table[new_state.state_num] = {}
for s in goto:
if s.dot_pos == len(s.production[1]):
if s.production == self.productions[0]:
lr_table[state.state_num][s.production[0]] = ("acc", None)
else:
for j in range(len(self.productions)):
if self.productions[j] == s.production:
lr_table[state.state_num][s.production[0]] = ("r", j)
break
elif s.production[1][s.dot_pos] in terminals:
next_state = self.get_goto(goto, s.production[1][s.dot_pos])
for j in range(len(states)):
if states[j].items == next_state:
lr_table[state.state_num][s.production[1][s.dot_pos]] = ("s", j)
break
elif s.production[1][s.dot_pos] in nonterminals:
next_state = self.get_goto(goto, s.production[1][s.dot_pos])
for j in range(len(states)):
if states[j].items == next_state:
lr_table[state.state_num][s.production[1][s.dot_pos]] = ("g", j)
break
return states
# 解析输入的符号串,输出语法分析过程和是否是该文法的句子
def parse(self, input_str):
input_str += "#"
stack = [0]
input_pos = 0
output = []
while True:
state = stack[-1]
symbol = input_str[input_pos]
if symbol not in lr_table[state]:
output.append("Error")
break
action = lr_table[state][symbol]
if action[0] == "s":
stack.append(symbol)
stack.append(action[1])
input_pos += 1
output.append(f"Shift {symbol} and go to state {action[1]}")
elif action[0] == "r":
production = self.productions[action[1]]
for i in range(2 * len(production[1])):
stack.pop()
state = stack[-1]
stack.append(production[0])
stack.append(lr_table[state][production[0]][1])
output.append(f"Reduce using {production[0]} -> {' '.join(production[1])}")
elif action[0] == "g":
stack.append(symbol)
stack.append(action[1])
output.append(f"Go to state {action[1]}")
elif action[0] == "acc":
output.append("Accept")
break
return output, output[-1] == "Accept"
# 解压缩文法字符串,生成文法规则列表
productions = []
for rule in compressed_grammar.split(";"):
head, body = rule.split(":")
productions.append((head, body.split("|")))
# 创建文法实例
grammar = LRGrammar(productions)
# 获取文法的终结符和非终结符集合
nonterminals, terminals = grammar.get_symbols()
all_symbols = nonterminals | terminals
# 获取所有符号的FIRST集合和FOLLOW集合
first_sets = {}
for symbol in all_symbols:
first_sets[symbol] = grammar.get_first(symbol, nonterminals, terminals)
follow_sets = grammar.get_follow_sets(nonterminals, terminals, first_sets)
# 获取所有状态和GOTO集合
states = grammar.get_states()
# 测试语法分析器
input_str = "id + id * id"
output, is_valid = grammar.parse(input_str)
print(f"Input string: {input_str}")
print("".join(f"{s}\n" for s in output))
print(f"Is valid: {is_valid}")
阅读全文