帮我用Java写一个LR语法分析器
时间: 2023-08-28 07:28:03 浏览: 161
好的,LR语法分析器是一种自下而上的语法分析器,我将为您提供一个简单的Java实现。
1. 首先,我们需要定义文法规则。假设我们的文法规则如下:
S -> E
E -> E + T | T
T -> T * F | F
F -> ( E ) | id
2. 接下来,我们需要构建LR分析表。这里我们使用LR(1)分析方法,需要构建状态机、求出每个项目的闭包和GOTO函数,并填充分析表。代码如下:
```
class LRAutomaton {
private List<LRState> states = new ArrayList<>();
private Map<String, Integer> terminals = new HashMap<>();
private Map<String, Integer> nonTerminals = new HashMap<>();
private int[][] actionTable;
private int[][] gotoTable;
public LRAutomaton() {
// 初始化终结符和非终结符表
terminals.put("+", 0);
terminals.put("*", 1);
terminals.put("(", 2);
terminals.put(")", 3);
terminals.put("id", 4);
nonTerminals.put("S", 0);
nonTerminals.put("E", 1);
nonTerminals.put("T", 2);
nonTerminals.put("F", 3);
// 构建状态机
LRState startState = new LRState();
LRItem startItem = new LRItem(new LRProduction("S", "E"), 0, "$");
startState.add(startItem);
states.add(startState);
boolean changed = true;
while (changed) {
changed = false;
for (int i = 0; i < states.size(); i++) {
LRState s = states.get(i);
for (String symbol : getAllSymbols(s)) {
LRState t = gotoState(s, symbol);
if (t != null && !states.contains(t)) {
states.add(t);
changed = true;
}
}
}
}
// 填充分析表
actionTable = new int[states.size()][terminals.size()];
gotoTable = new int[states.size()][nonTerminals.size()];
for (int i = 0; i < states.size(); i++) {
LRState s = states.get(i);
for (int j = 0; j < terminals.size(); j++) {
String symbol = getSymbol(j, true); // 获取终结符
LRState t = gotoState(s, symbol);
if (t != null) {
if (symbol.equals("$")) {
actionTable[i][j] = 2; // 接受状态
} else {
actionTable[i][j] = 1; // 移进状态
}
actionTable[i][j] = (actionTable[i][j] << 16) | states.indexOf(t);
}
}
for (int j = 0; j < nonTerminals.size(); j++) {
String symbol = getSymbol(j, false); // 获取非终结符
LRState t = gotoState(s, symbol);
if (t != null) {
gotoTable[i][j] = states.indexOf(t);
}
}
for (LRItem item : s.getItems()) {
if (item.isReduceItem()) {
LRProduction p = item.getProduction();
int k = nonTerminals.get(p.getLeft());
for (int j = 0; j < terminals.size(); j++) {
String symbol = getSymbol(j, true); // 获取终结符
if (p.getRight().contains(symbol)) {
continue;
}
int action = (3 << 16) | item.getIndex(); // 规约状态
actionTable[i][j] = action;
}
for (int j = 0; j < nonTerminals.size(); j++) {
int gotoState = gotoTable[i][j];
if (gotoState > 0) {
continue;
}
String symbol = getSymbol(j, false); // 获取非终结符
if (p.getLeft().equals(symbol)) {
gotoTable[i][j] = item.getIndex();
}
}
}
}
}
}
private Set<String> getAllSymbols(LRState state) {
Set<String> symbols = new HashSet<>();
for (LRItem item : state.getItems()) {
List<String> rest = item.getRest();
if (rest.size() > 0) {
symbols.add(rest.get(0));
}
}
return symbols;
}
private LRState gotoState(LRState state, String symbol) {
LRState result = new LRState();
for (LRItem item : state.getItems()) {
List<String> rest = item.getRest();
if (rest.size() > 0 && rest.get(0).equals(symbol)) {
result.add(new LRItem(item.getProduction(), item.getIndex() + 1, item.getLookahead()));
}
}
if (result.getItems().size() == 0) {
return null;
}
result.closure();
return result;
}
private String getSymbol(int index, boolean isTerminal) {
if (isTerminal) {
for (Map.Entry<String, Integer> entry : terminals.entrySet()) {
if (entry.getValue() == index) {
return entry.getKey();
}
}
} else {
for (Map.Entry<String, Integer> entry : nonTerminals.entrySet()) {
if (entry.getValue() == index) {
return entry.getKey();
}
}
}
return null;
}
public int parse(List<String> input) {
Stack<Integer> stateStack = new Stack<>();
Stack<String> symbolStack = new Stack<>();
stateStack.push(0);
symbolStack.push("$");
int index = 0;
while (true) {
int state = stateStack.peek();
String symbol = input.get(index);
int symbolIndex = -1;
if (terminals.containsKey(symbol)) {
symbolIndex = terminals.get(symbol);
} else {
symbolIndex = nonTerminals.get(symbol);
}
int action = actionTable[state][symbolIndex];
int type = action >> 16;
int value = action & 0xFFFF;
if (type == 1) { // 移进状态
stateStack.push(value);
symbolStack.push(symbol);
index++;
} else if (type == 2) { // 接受状态
return 0;
} else if (type == 3) { // 规约状态
LRProduction p = states.get(state).getItems().get(value).getProduction();
for (int i = 0; i < p.getRight().size(); i++) {
stateStack.pop();
symbolStack.pop();
}
int prevState = stateStack.peek();
int gotoState = gotoTable[prevState][nonTerminals.get(p.getLeft())];
stateStack.push(gotoState);
symbolStack.push(p.getLeft());
} else { // 错误状态
return -1;
}
}
}
}
class LRState {
private List<LRItem> items = new ArrayList<>();
public void add(LRItem item) {
items.add(item);
}
public List<LRItem> getItems() {
return items;
}
public void closure() {
boolean changed = true;
while (changed) {
changed = false;
for (int i = 0; i < items.size(); i++) {
LRItem item = items.get(i);
String symbol = item.getRest().get(0);
if (symbol != null && !LRAutomaton.isTerminal(symbol)) {
for (LRProduction p : LRAutomaton.getProductions(symbol)) {
boolean exists = false;
for (LRItem newItem : items) {
if (newItem.getProduction().equals(p) && newItem.getLookahead().equals(item.getLookahead())) {
exists = true;
break;
}
}
if (!exists) {
items.add(new LRItem(p, 0, item.getLookahead()));
changed = true;
}
}
}
}
}
}
}
class LRItem {
private LRProduction production;
private int index;
private String lookahead;
public LRItem(LRProduction production, int index, String lookahead) {
this.production = production;
this.index = index;
this.lookahead = lookahead;
}
public LRProduction getProduction() {
return production;
}
public int getIndex() {
return index;
}
public String getLookahead() {
return lookahead;
}
public List<String> getRest() {
return production.getRight().subList(index, production.getRight().size());
}
public boolean isReduceItem() {
return index == production.getRight().size();
}
}
class LRProduction {
private String left;
private List<String> right;
public LRProduction(String left, String... right) {
this.left = left;
this.right = Arrays.asList(right);
}
public String getLeft() {
return left;
}
public List<String> getRight() {
return right;
}
@Override
public boolean equals(Object obj) {
if (obj instanceof LRProduction) {
LRProduction p = (LRProduction)obj;
return left.equals(p.left) && right.equals(p.right);
}
return false;
}
@Override
public int hashCode() {
return left.hashCode() ^ right.hashCode();
}
}
class LRParser {
private LRAutomaton automaton;
public LRParser() {
automaton = new LRAutomaton();
}
public boolean parse(String input) {
List<String> tokens = tokenize(input);
tokens.add("$");
return automaton.parse(tokens) == 0;
}
private List<String> tokenize(String input) {
List<String> tokens = new ArrayList<>();
String[] parts = input.split("\\s+");
for (String part : parts) {
if (part.equals("+") || part.equals("*") || part.equals("(") || part.equals(")")) {
tokens.add(part);
} else {
tokens.add("id");
}
}
return tokens;
}
}
public class LRParserTest {
public static void main(String[] args) {
LRParser parser = new LRParser();
System.out.println(parser.parse("id + id * id")); // true
System.out.println(parser.parse("id + * id")); // false
}
}
```
3. 最后,我们可以使用LRParser类来解析输入字符串。例如,LRParserTest类中的代码:
```
LRParser parser = new LRParser();
System.out.println(parser.parse("id + id * id")); // true
System.out.println(parser.parse("id + * id")); // false
```
将输出true和false,分别表示输入字符串符合文法规则和不符合文法规则。
希望这个简单的Java实现可以帮助您理解LR语法分析器的工作原理。
阅读全文