用c++实现LR(1)分析法
时间: 2023-08-19 07:05:55 浏览: 164
LR(1)分析法是一种自底向上的语法分析算法,它可以对一定范围内的上下文无关文法进行分析。下面是用C++实现LR(1)分析法的基本步骤:
1. 定义文法,将其转换为LR(1)自动机
2. 构建LR(1)自动机的状态转换表
3. 读入输入串,依次进行状态转换
4. 如果最终状态为接受状态,说明输入串符合文法,否则不符合
下面是具体实现:
```c++
#include <iostream>
#include <vector>
#include <stack>
#include <map>
#include <string>
using namespace std;
// 定义文法符号类型
enum SymbolType {
TERMINAL, // 终结符
NON_TERMINAL // 非终结符
};
// 定义文法符号
struct Symbol {
SymbolType type; // 符号类型
string name; // 符号名称
};
// 定义产生式
struct Production {
Symbol left; // 产生式左部
vector<Symbol> right; // 产生式右部
};
// 定义LR(1)自动机状态
struct LR1State {
int id; // 状态ID
map<Symbol, int> gotoTable; // GOTO表
map<Symbol, int> actionTable; // ACTION表
};
// 定义LR(1)自动机
class LR1Automaton {
public:
// 添加产生式
void addProduction(const Production& p);
// 构建自动机
void build();
// 打印状态转换表
void printTable();
// 分析输入串
bool analyze(const string& input);
private:
// 获取文法符号
Symbol getSymbol(const string& name) const;
// 获取状态ID
int getStateId(const vector<LR1State>& states, const LR1State& state);
// 获取闭包
vector<Production> closure(const vector<Production>& i);
// 计算LR(1)自动机状态转换
LR1State gotoState(const LR1State& state, const Symbol& symbol);
// 构建LR(1)自动机状态转换表
void buildTable();
// 获取符号的文法产生式
vector<Production> getProductions(const Symbol& symbol) const;
// 获取符号的FIRST集
vector<Symbol> getFirst(const Symbol& symbol) const;
// 获取符号串的FIRST集
vector<Symbol> getFirst(const vector<Symbol>& symbols) const;
// 判断符号串是否包含ε
bool containsEpsilon(const vector<Symbol>& symbols) const;
// 判断符号是否为ε
bool isEpsilon(const Symbol& symbol) const;
// 判断符号是否为终结符
bool isTerminal(const Symbol& symbol) const;
// 判断符号是否为非终结符
bool isNonTerminal(const Symbol& symbol) const;
// 判断符号是否为同步符
bool isSync(const Symbol& symbol) const;
// 获取符号的字符串表示
string getSymbolString(const Symbol& symbol) const;
vector<Production> productions; // 产生式集合
vector<LR1State> states; // 自动机状态集合
map<int, Symbol> id2symbol; // 状态ID到符号的映射
map<Symbol, int> symbol2id; // 符号到状态ID的映射
map<pair<int, Symbol>, int> actionTable;// ACTION表
map<pair<int, Symbol>, int> gotoTable; // GOTO表
};
// 添加产生式
void LR1Automaton::addProduction(const Production& p) {
productions.push_back(p);
}
// 构建自动机
void LR1Automaton::build() {
// 初始化状态
int stateId = 0;
vector<Production> startProductions = getProductions(getSymbol("S'"));
vector<Production> startClosure = closure(startProductions);
LR1State startState = { stateId++, {}, {} };
states.push_back(startState);
for (const auto& p : startClosure) {
if (p.right.empty()) { // 形如S'->S·的产生式
startState.actionTable[getSymbol("$")] = 0; // 接受
}
else {
Symbol nextSymbol = p.right.front();
if (isTerminal(nextSymbol)) { // 移进
startState.actionTable[nextSymbol] = stateId;
}
else { // GOTO
startState.gotoTable[nextSymbol] = stateId;
}
}
}
// 构建状态集合
while (true) {
bool added = false;
for (int i = 0; i < states.size(); i++) {
const auto& state = states[i];
for (const auto& p : productions) {
if (p.left.name == id2symbol[state.id].name) {
for (int j = 0; j <= p.right.size(); j++) {
Symbol X = j < p.right.size() ? p.right[j] : getSymbol("ε");
LR1State nextState = gotoState(state, X);
if (!nextState.actionTable.empty() || !nextState.gotoTable.empty()) {
int nextStateId = getStateId(states, nextState);
if (nextStateId == -1) {
nextState.id = stateId++;
states.push_back(nextState);
added = true;
nextStateId = nextState.id;
}
if (isTerminal(X)) { // 移进
state.actionTable[X] = nextStateId;
}
else { // GOTO
state.gotoTable[X] = nextStateId;
}
}
else { // 形如A->α·的产生式
for (const auto& actionSymbol : getFirst(X)) {
if (!isEpsilon(actionSymbol)) {
int actionStateId = -1;
auto key = make_pair(state.id, actionSymbol);
if (actionSymbol.type == TERMINAL && actionTable.count(key) == 0) {
actionStateId = -2; // 错误
}
else {
actionStateId = actionTable[key];
}
if (actionStateId >= 0) { // 移进/规约冲突
cout << "移进/规约冲突:" << id2symbol[actionSymbol].name << endl;
}
else if (actionStateId == -1) { // 规约
int productionId = -1;
for (int k = 0; k < productions.size(); k++) {
if (productions[k].left.name == id2symbol[state.id].name && productions[k].right == p.right) {
productionId = k;
break;
}
}
if (productionId == -1) {
cout << "内部错误" << endl;
return;
}
actionTable[key] = -1 - productionId;
}
}
}
}
}
}
}
}
if (!added) {
break;
}
}
// 构建状态转换表
buildTable();
}
// 打印状态转换表
void LR1Automaton::printTable() {
cout << "ACTION表:" << endl;
cout << " ";
for (const auto& p : id2symbol) {
if (isTerminal(p.second)) {
cout << getSymbolString(p.second);
for (int i = 0; i < 5 - getSymbolString(p.second).size(); i++) {
cout << " ";
}
}
}
cout << "$" << endl;
for (int i = 0; i < states.size(); i++) {
cout << i << " ";
for (const auto& p : id2symbol) {
if (isTerminal(p.second)) {
auto key = make_pair(i, p.second);
if (actionTable.count(key) != 0) {
int value = actionTable[key];
if (value >= 0) {
cout << "s" << value;
}
else {
cout << "r" << -1 - value;
}
}
else {
cout << " ";
}
cout << " ";
}
}
auto key = make_pair(i, getSymbol("$"));
if (actionTable.count(key) != 0) {
int value = actionTable[key];
if (value == 0) {
cout << "acc";
}
else {
cout << " ";
}
}
cout << endl;
}
cout << endl;
cout << "GOTO表:" << endl;
cout << " ";
for (const auto& p : id2symbol) {
if (isNonTerminal(p.second)) {
cout << getSymbolString(p.second);
for (int i = 0; i < 5 - getSymbolString(p.second).size(); i++) {
cout << " ";
}
}
}
cout << endl;
for (int i = 0; i < states.size(); i++) {
cout << i << " ";
for (const auto& p : id2symbol) {
if (isNonTerminal(p.second)) {
auto key = make_pair(i, p.second);
if (gotoTable.count(key) != 0) {
cout << gotoTable[key] << " ";
}
else {
cout << " ";
}
}
}
cout << endl;
}
cout << endl;
}
// 分析输入串
bool LR1Automaton::analyze(const string& input) {
stack<int> stateStack;
stack<Symbol> symbolStack;
stateStack.push(0);
symbolStack.push(getSymbol("$"));
int index = 0;
while (!stateStack.empty()) {
int stateId = stateStack.top();
Symbol symbol = getSymbol(string(1, input[index]));
auto key = make_pair(stateId, symbol);
if (isTerminal(symbol)) { // 移进操作
if (actionTable.count(key) == 0) {
return false; // 错误
}
int nextStateId = actionTable[key];
if (nextStateId < 0) { // 规约操作
int productionId = -1 - nextStateId;
const auto& production = productions[productionId];
for (int i = 0; i < production.right.size(); i++) {
stateStack.pop();
symbolStack.pop();
}
Symbol leftSymbol = production.left;
symbolStack.push(leftSymbol);
auto gotoKey = make_pair(stateStack.top(), leftSymbol);
if (gotoTable.count(gotoKey) == 0) {
return false; // 错误
}
int newStateId = gotoTable[gotoKey];
stateStack.push(newStateId);
}
else { // 移进操作
stateStack.push(nextStateId);
symbolStack.push(symbol);
index++;
}
}
else { // GOTO操作
if (gotoTable.count(key) == 0) {
return false; // 错误
}
int nextStateId = gotoTable[key];
stateStack.push(nextStateId);
symbolStack.push(symbol);
}
}
return true;
}
// 获取文法符号
Symbol LR1Automaton::getSymbol(const string& name) const {
if (symbol2id.count({ TERMINAL, name }) == 1) {
return { TERMINAL, name };
}
if (symbol2id.count({ NON_TERMINAL, name }) == 1) {
return { NON_TERMINAL, name };
}
Symbol symbol = { NON_TERMINAL, name };
symbol2id[symbol] = symbol2id.size();
id2symbol[symbol2id[symbol]] = symbol;
return symbol;
}
// 获取状态ID
int LR1Automaton::getStateId(const vector<LR1State>& states, const LR1State& state) {
for (int i = 0; i < states.size(); i++) {
if (states[i].gotoTable == state.gotoTable && states[i].actionTable == state.actionTable) {
return i;
}
}
return -1;
}
// 获取闭包
vector<Production> LR1Automaton::closure(const vector<Production>& i) {
vector<Production> j = i;
while (true) {
bool added = false;
for (int k = 0; k < j.size(); k++) {
const auto& p = j[k];
Symbol B = p.right.empty() ? getSymbol("ε") : p.right.front();
if (isNonTerminal(B)) {
vector<Symbol> beta(p.right.begin() + 1, p.right.end());
beta.push_back(getSymbol("ε"));
vector<Symbol> A = getFirst(beta);
for (const auto& a : A) {
Production q = { B, beta };
if (find(j.begin(), j.end(), q) == j.end()) {
j.push_back(q);
added = true;
}
}
}
}
if (!added) {
break;
}
}
return j;
}
// 计算LR(1)自动机状态转换
LR1State LR1Automaton::gotoState(const LR1State& state, const Symbol& symbol) {
LR1State nextState = { -1, {}, {} };
for (const auto& p : state.actionTable) {
if (p.first.name == symbol.name && p.second != 0) {
cout << "移进/规约冲突:" << id2symbol[symbol].name << endl;
return nextState; // 错误
}
}
for (const auto& p : state.gotoTable) {
if (p.first.name == symbol.name) {
nextState.gotoTable = gotoState(states[p.second], getSymbol("ε")).actionTable;
}
}
vector<Production> i;
for (const auto& p : state.actionTable) {
if (p.second < 0) { // 规约操作
const auto& production = productions[-1 - p.second];
if (production.right.front().name == symbol.name) {
i.push_back({ production.left, {}, production.right.begin() + 1, production.right.end() });
}
}
}
nextState.id = getStateId(states, { -1, {}, {} });
nextState.actionTable.clear();
nextState.gotoTable.clear();
vector<Production> j = closure(i);
for (const auto& p : j) {
if (p.right.empty()) { // 形如S->·的产生式
nextState.actionTable[getSymbol("$")] = 0; // 接受
}
else {
Symbol nextSymbol = p.right.front();
if (isTerminal(nextSymbol)) { // 移进
nextState.actionTable[nextSymbol] = nextState.id;
}
else { // GOTO
nextState.gotoTable[nextSymbol] = nextState.id;
}
}
}
return nextState;
}
// 构建LR(1)自动机状态转换表
void LR1Automaton::buildTable() {
for (int i = 0; i < states.size(); i++) {
const auto& state = states[i];
for (const auto& p : state.actionTable) {
auto key = make_pair(i, p.first);
actionTable[key] = p.second;
}
for (const auto& p : state.gotoTable) {
auto key = make_pair(i, p.first);
gotoTable[key] = p.second;
}
}
}
// 获取符号的文法产生式
vector<Production> LR1Automaton::getProductions(const Symbol& symbol) const {
vector<Production> result;
for (const auto& p : productions) {
if (p.left.name == symbol.name) {
result.push_back(p);
}
}
return result;
}
// 获取符号的FIRST集
vector<Symbol> LR1Automaton::getFirst(const Symbol& symbol) const {
vector<Symbol> result;
if (isTerminal(symbol)) {
result.push_back(symbol);
}
else {
for (const auto& p : productions) {
if (p.left.name == symbol.name) {
vector<Symbol> beta(p.right.begin(), p.right.end());
vector<Symbol> betaFirst = getFirst(beta);
result.insert(result.end(), betaFirst.begin(), betaFirst.end());
}
}
}
return result;
}
// 获取符号串的FIRST集
vector<Symbol> LR1Automaton::getFirst(const vector<Symbol>& symbols) const {
vector<Symbol> result;
for (int i = 0; i < symbols.size(); i++) {
const auto& symbol = symbols[i];
vector<Symbol> first = getFirst(symbol);
result.insert(result.end(), first.begin(), first.end());
if (!containsEpsilon(first)) {
break;
}
if (i == symbols.size() - 1) {
result.push_back(getSymbol("ε"));
}
}
return result;
}
// 判断符号串是否包含ε
bool LR1Automaton::containsEpsilon(const vector<Symbol>& symbols) const {
for (const auto& symbol : symbols) {
if (!isEpsilon(symbol)) {
return false;
}
}
return true;
}
// 判断符号是否为ε
bool LR1Automaton::isEpsilon(const Symbol& symbol) const {
return symbol.name == "ε";
阅读全文