构造LR(1)分析程序,利用它进行语法分析,判断给出的符号串是否为该文法识别的句子,代码
时间: 2024-03-25 22:40:51 浏览: 17
构造LR(1)分析程序的步骤如下:
1. 构造文法的所有LR(1)项集
2. 构造LR(1)分析表,包括ACTION和GOTO两个部分
3. 利用构造的分析表对输入的符号串进行分析
下面是一个示例代码,假设我们要判断如下文法是否可以识别给出的符号串:
```
S -> E
E -> E + T | T
T -> T * F | F
F -> ( E ) | id
```
符号串为:`( id + id ) * id`
LR(1)分析程序代码如下:
```python
# 定义文法的所有产生式
productions = [
('S', 'E'),
('E', 'E', '+', 'T'),
('E', 'T'),
('T', 'T', '*', 'F'),
('T', 'F'),
('F', '(', 'E', ')'),
('F', 'id')
]
# 定义终结符和非终结符
terminals = ['+', '*', '(', ')', 'id', '$']
non_terminals = ['S', 'E', 'T', 'F']
# 定义LR(1)项
class LR1Item:
def __init__(self, production, dot_index, lookahead):
self.production = production
self.dot_index = dot_index
self.lookahead = lookahead
def __eq__(self, other):
return self.production == other.production and self.dot_index == other.dot_index and self.lookahead == other.lookahead
def __hash__(self):
return hash((self.production, self.dot_index, self.lookahead))
def __str__(self):
prod_str = ' '.join(self.production)
prod_str = prod_str[:self.dot_index] + '.' + prod_str[self.dot_index:]
return f"[{prod_str}, {self.lookahead}]"
# 构造LR(1)项集
def closure_lr1(items):
closure = set(items)
while True:
new_items = set()
for item in closure:
if item.dot_index < len(item.production) and item.production[item.dot_index] in non_terminals:
for prod in productions:
if prod[0] == item.production[item.dot_index]:
lookahead = item.lookahead
if item.dot_index + 1 < len(item.production):
lookahead = first(item.production[item.dot_index + 1:], item.lookahead)
new_item = LR1Item(prod, 0, lookahead)
if new_item not in closure:
new_items.add(new_item)
if not new_items:
break
closure |= new_items
return closure
# 计算一个符号串的FIRST集
def first(symbols, lookahead):
first_set = set()
if not symbols:
first_set.add(lookahead)
elif symbols[0] in terminals:
first_set.add(symbols[0])
elif symbols[0] in non_terminals:
first_set |= first_table[symbols[0]]
if 'eps' in first_set:
first_set.remove('eps')
first_set |= first(symbols[1:], lookahead)
return first_set
# 计算一个LR(1)项集的GOTO集
def goto_lr1(items, symbol):
new_items = set()
for item in items:
if item.dot_index < len(item.production) and item.production[item.dot_index] == symbol:
new_items.add(LR1Item(item.production, item.dot_index + 1, item.lookahead))
return closure_lr1(new_items)
# 构造文法的FIRST集
first_table = {}
for terminal in terminals:
first_table[terminal] = set([terminal])
for non_terminal in non_terminals:
first_table[non_terminal] = set()
while True:
updated = False
for production in productions:
non_terminal = production[0]
symbols = production[1:]
old_first_set = set(first_table[non_terminal])
first_set = first(symbols, 'eps')
first_table[non_terminal] |= first_set
if old_first_set != first_table[non_terminal]:
updated = True
if not updated:
break
# 构造LR(1)项集族
start_item = LR1Item(productions[0], 0, '$')
start_set = closure_lr1(set([start_item]))
item_sets = [start_set]
while True:
new_sets = []
for item_set in item_sets:
for symbol in terminals + non_terminals:
goto_set = goto_lr1(item_set, symbol)
if goto_set and goto_set not in item_sets + new_sets:
new_sets.append(goto_set)
if not new_sets:
break
item_sets += new_sets
# 构造LR(1)分析表
action_table = {}
goto_table = {}
for i, item_set in enumerate(item_sets):
for item in item_set:
if item.dot_index == len(item.production):
if item.production[0] == 'S':
action_table[(i, '$')] = 'accept'
else:
for j, production in enumerate(productions):
if item.production == production and item.lookahead in follow_table[production[0]]:
action_table[(i, item.lookahead)] = ('reduce', j)
elif item.production[item.dot_index] in terminals:
goto_set = goto_lr1(item_set, item.production[item.dot_index])
if goto_set in item_sets:
action_table[(i, item.production[item.dot_index])] = ('shift', item_sets.index(goto_set))
elif item.production[item.dot_index] in non_terminals:
goto_set = goto_lr1(item_set, item.production[item.dot_index])
if goto_set in item_sets:
goto_table[(i, item.production[item.dot_index])] = item_sets.index(goto_set)
# 构造文法的FOLLOW集
follow_table = {}
for non_terminal in non_terminals:
follow_table[non_terminal] = set()
follow_table[productions[0][0]] = set(['$'])
while True:
updated = False
for production in productions:
for i in range(len(production) - 1):
if production[i] in non_terminals:
old_follow_set = set(follow_table[production[i]])
if production[i + 1] in terminals:
follow_table[production[i]] |= set([production[i + 1]])
elif production[i + 1] in non_terminals:
follow_table[production[i]] |= first_table[production[i + 1]]
if 'eps' in follow_table[production[i]]:
follow_table[production[i]].remove('eps')
follow_table[production[i]] |= follow_table[production[0]]
if old_follow_set != follow_table[production[i]]:
updated = True
if production[-1] in non_terminals:
old_follow_set = set(follow_table[production[-1]])
follow_table[production[-1]] |= follow_table[production[0]]
if old_follow_set != follow_table[production[-1]]:
updated = True
if not updated:
break
# 对输入符号串进行分析
stack = [0]
input_str = "( id + id ) * id $"
input_index = 0
while True:
state = stack[-1]
lookahead = input_str[input_index]
if (state, lookahead) in action_table:
action = action_table[(state, lookahead)]
if action == 'accept':
print("该符号串是该文法可以识别的句子!")
break
elif action[0] == 'shift':
stack.append(lookahead)
stack.append(action[1])
input_index += 1
elif action[0] == 'reduce':
production_index = action[1]
production = productions[production_index]
for i in range(2 * len(production)):
stack.pop()
new_state = stack[-1]
stack.append(production[0])
stack.append(goto_table[(new_state, production[0])])
else:
raise Exception("ERROR")
else:
raise Exception("ERROR")
```
上述代码中,我们首先构造了LR(1)项集族,然后利用它构造了LR(1)分析表,最后利用LR(1)分析表对输入的符号串进行了分析。在分析过程中,我们使用了一个栈来模拟LR分析器的行为,不断地从栈中弹出符号并将新的符号压入栈中,直到遇到接受状态或者出现语法错误。