用c++实现LR(1)分析法
时间: 2023-08-24 12:29:27 浏览: 59
LR(1)分析法是一种自底向上的语法分析方法,用于分析上下文无关文法。下面是用C++实现LR(1)分析法的基本步骤:
1. 构造LR(1)分析表:通过扫描文法的所有产生式,确定每个符号的Follow集合,并根据这些信息构造LR(1)分析表。
2. 实现LR(1)分析器:根据构造出的LR(1)分析表,实现LR(1)分析器,用于对输入的源代码进行分析。
3. 实现语法树构造:在分析源代码的过程中,构造出语法树,用于进一步处理源代码。
下面是一个简单的示例代码:
```
#include <iostream>
#include <cstring>
#include <vector>
#include <stack>
#include <map>
using namespace std;
//定义产生式
struct production{
char left; //产生式左部
string right; //产生式右部
};
//定义LR(1)项
struct item{
production p; //项对应的产生式
int dot; //点的位置
set<char> lookahead; //展望符集
};
//定义状态
struct state{
vector<item> items; //状态中的项
map<char, int> gotos; //goto表
map<char, int> actions; //action表
};
//定义LR(1)分析表
vector<state> lr1_table;
//定义符号集合
set<char> symbols;
//定义产生式集合
vector<production> productions;
//定义终结符集合
set<char> terminals;
//定义非终结符集合
set<char> non_terminals;
//定义Follow集合
map<char, set<char>> follow;
//定义语法树节点
struct syntax_tree_node{
char symbol; //节点对应的符号
vector<syntax_tree_node*> children; //子节点
};
//获取项的核心
set<item> get_core(set<item> items){
set<item> core;
for(auto it: items){
item i = {it.p, it.dot, {}};
core.insert(i);
}
return core;
}
//获取项的闭包
set<item> get_closure(set<item> items){
set<item> closure = items;
while(true){
set<item> new_items;
for(auto it: closure){
if(it.dot < it.p.right.length()){
char next_symbol = it.p.right[it.dot];
if(non_terminals.count(next_symbol)){
//对于非终结符,将其所有产生式的项加入闭包
for(auto p: productions){
if(p.left == next_symbol){
item new_item = {p, 0, {}};
new_items.insert(new_item);
}
}
}
}
}
new_items = get_core(new_items);
if(new_items.size() == 0){
break;
}
for(auto it: new_items){
if(!closure.count(it)){
closure.insert(it);
}
}
}
return closure;
}
//获取项集的闭包
set<item> get_closure(set<item> items, char symbol){
set<item> closure;
for(auto it: items){
if(it.dot < it.p.right.length() && it.p.right[it.dot] == symbol){
item new_item = {it.p, it.dot + 1, {symbol}};
closure.insert(new_item);
}
}
return get_closure(closure);
}
//构造LR(1)分析表
void build_lr1_table(){
//初始化
state start_state;
item start_item = {productions[0], 0, {'$'}};
start_state.items.insert(start_item);
start_state.items = get_closure(start_state.items);
lr1_table.push_back(start_state);
while(true){
bool has_new_state = false;
for(int i = 0; i < lr1_table.size(); i++){
state s = lr1_table[i];
for(auto it: symbols){
set<item> next_items;
for(auto it2: s.items){
if(it2.dot < it2.p.right.length() && it2.p.right[it2.dot] == it){
item new_item = {it2.p, it2.dot + 1, it2.lookahead};
next_items.insert(new_item);
}
}
if(next_items.size() > 0){
set<item> closure = get_closure(next_items);
int next_state_id = -1;
for(int j = 0; j < lr1_table.size(); j++){
if(get_core(lr1_table[j].items) == get_core(closure)){
next_state_id = j;
break;
}
}
if(next_state_id == -1){
has_new_state = true;
state next_state;
next_state.items = closure;
lr1_table.push_back(next_state);
next_state_id = lr1_table.size() - 1;
}
if(non_terminals.count(it)){
s.gotos[it] = next_state_id;
}else{
for(auto it2: closure){
if(it2.dot == it2.p.right.length()){
if(s.actions.find(it) != s.actions.end()){
//冲突处理
cout << "conflict: state " << i << ", symbol " << it << endl;
exit(1);
}
s.actions[it] = it2.p.left;
if(it2.lookahead.count('$') == 0){
for(auto it3: it2.lookahead){
s.actions[it3] = it2.p.left;
}
}
}
}
}
}
}
lr1_table[i] = s;
}
if(!has_new_state){
break;
}
}
}
//LR(1)分析
syntax_tree_node* lr1_parse(string input){
//初始化
stack<int> states;
stack<char> symbols;
syntax_tree_node* root = new syntax_tree_node;
root->symbol = '$';
states.push(0);
symbols.push('$');
int pos = 0;
while(true){
int state = states.top();
char symbol = input[pos];
if(lr1_table[state].actions.find(symbol) != lr1_table[state].actions.end()){
//移进
int next_state = lr1_table[state].actions[symbol];
if(next_state == -1){
//出错
return nullptr;
}
syntax_tree_node* node = new syntax_tree_node;
node->symbol = symbol;
root->children.push_back(node);
states.push(next_state);
symbols.push(symbol);
pos++;
}else if(lr1_table[state].gotos.find(symbol) != lr1_table[state].gotos.end()){
//归约
int next_state = lr1_table[state].gotos[symbol];
if(next_state == -1){
//出错
return nullptr;
}
production p;
for(auto it: productions){
if(it.left == lr1_table[state].actions[symbols.top()]){
p = it;
break;
}
}
syntax_tree_node* node = new syntax_tree_node;
node->symbol = p.left;
for(int i = 0; i < p.right.length(); i++){
states.pop();
symbols.pop();
node->children.insert(node->children.begin(), root->children.back());
root->children.pop_back();
}
root->children.push_back(node);
states.push(next_state);
symbols.push(p.left);
}else{
//出错
return nullptr;
}
if(symbols.top() == '$'){
break;
}
}
return root;
}
int main(){
//初始化产生式集合和符号集合
productions.push_back({'S', "E"});
productions.push_back({'E', "E+T"});
productions.push_back({'E', "T"});
productions.push_back({'T', "T*F"});
productions.push_back({'T', "F"});
productions.push_back({'F', "(E)"});
productions.push_back({'F', "i"});
for(auto p: productions){
for(char c: p.right){
symbols.insert(c);
}
}
symbols.insert('$');
non_terminals.insert('S');
non_terminals.insert('E');
non_terminals.insert('T');
non_terminals.insert('F');
for(auto p: productions){
for(int i = 0; i < p.right.length(); i++){
if(non_terminals.count(p.right[i])){
follow[p.right[i]] = {};
}
}
}
follow['E'] = {'$', ')', '+'};
follow['T'] = {'$', ')', '+', '*'};
follow['F'] = {'$', ')', '+', '*'};
terminals = symbols;
for(char c: non_terminals){
terminals.erase(c);
}
//构造LR(1)分析表
build_lr1_table();
//LR(1)分析
string input = "i+i*i$";
syntax_tree_node* root = lr1_parse(input);
if(root == nullptr){
cout << "parse error" << endl;
}else{
cout << "parse success" << endl;
}
return 0;
}
```
以上代码实现了一个简单的LR(1)分析器,可以对输入的算术表达式进行分析,并构造出语法树。需要注意的是,这只是一个简单的示例代码,实际应用中还需要考虑更多的情况,例如错误处理、冲突处理等。