实现LR(1)分析法的java代码
时间: 2023-08-20 21:04:11 浏览: 136
以下是一个简单的LR(1)分析器的Java代码,用于分析输入的字符串是否符合给定文法的规则:
```java
import java.util.*;
public class LR1Parser {
private Map<Integer, Map<String, Action>> actionTable;
private Map<Integer, Map<String, Integer>> gotoTable;
private Map<Integer, Set<Item>> itemSets;
private Map<String, Set<Integer>> followSets;
private int startState;
private String startSymbol;
public LR1Parser(Grammar grammar) {
// 构建LR(1)项集族
itemSets = buildItemSets(grammar);
// 构建LR(1)分析表
actionTable = buildActionTable(grammar, itemSets);
gotoTable = buildGotoTable(grammar, itemSets);
// 计算起始状态和起始符号
startState = findStartState(itemSets, grammar.getStartSymbol());
startSymbol = grammar.getStartSymbol();
// 计算Follow集合
followSets = calculateFollowSets(grammar);
}
public boolean parse(String input) {
// 初始化状态栈和符号栈
Stack<Integer> stateStack = new Stack<>();
Stack<String> symbolStack = new Stack<>();
stateStack.push(startState);
symbolStack.push("$");
// 将输入符号串转换为Token序列
List<Token> tokens = scan(input);
tokens.add(new Token("$", ""));
// 开始语法分析
int i = 0;
while (i < tokens.size()) {
int state = stateStack.peek();
String symbol = tokens.get(i).getSymbol();
if (actionTable.containsKey(state) && actionTable.get(state).containsKey(symbol)) {
// 查找分析表并执行对应动作
Action action = actionTable.get(state).get(symbol);
if (action.getType() == Action.SHIFT) {
stateStack.push(action.getValue());
symbolStack.push(symbol);
i++;
} else if (action.getType() == Action.REDUCE) {
Production production = action.getProduction();
int numPops = production.getRight().size();
for (int j = 0; j < numPops; j++) {
stateStack.pop();
symbolStack.pop();
}
String nonterminal = production.getLeft();
int newState = gotoTable.get(stateStack.peek()).get(nonterminal);
stateStack.push(newState);
symbolStack.push(nonterminal);
} else if (action.getType() == Action.ACCEPT) {
return true;
} else {
return false;
}
} else {
return false;
}
}
return false;
}
private List<Token> scan(String input) {
// 将输入符号串转换为Token序列
List<Token> tokens = new ArrayList<>();
String[] parts = input.split(" ");
for (String part : parts) {
String[] pair = part.split(":");
tokens.add(new Token(pair[0], pair[1]));
}
return tokens;
}
private Map<String, Set<Integer>> calculateFollowSets(Grammar grammar) {
// 计算Follow集合
Map<String, Set<Integer>> followSets = new HashMap<>();
for (String nonterminal : grammar.getNonterminals()) {
followSets.put(nonterminal, new HashSet<>());
}
followSets.get(startSymbol).add(Grammar.EOF);
boolean changed = true;
while (changed) {
changed = false;
for (Production production : grammar.getProductions()) {
String left = production.getLeft();
List<String> right = production.getRight();
for (int i = right.size() - 1; i >= 0; i--) {
String symbol = right.get(i);
if (grammar.isNonterminal(symbol)) {
Set<Integer> follow = followSets.get(symbol);
Set<Integer> first = calculateFirstSet(right.subList(i + 1, right.size()), grammar);
int oldSize = follow.size();
follow.addAll(first);
if (first.contains(Grammar.EMPTY) || i == right.size() - 1) {
follow.addAll(followSets.get(left));
}
if (follow.size() != oldSize) {
changed = true;
}
}
}
}
}
return followSets;
}
private Set<Integer> calculateFirstSet(List<String> symbols, Grammar grammar) {
// 计算给定符号串的First集合
Set<Integer> first = new HashSet<>();
boolean hasEmpty = true;
for (String symbol : symbols) {
Set<Integer> symbolFirst = grammar.getFirstSet(symbol);
first.addAll(symbolFirst);
if (!symbolFirst.contains(Grammar.EMPTY)) {
hasEmpty = false;
break;
}
}
if (hasEmpty) {
first.add(Grammar.EMPTY);
}
return first;
}
private int findStartState(Map<Integer, Set<Item>> itemSets, String startSymbol) {
// 查找起始状态
for (Map.Entry<Integer, Set<Item>> entry : itemSets.entrySet()) {
int state = entry.getKey();
Set<Item> items = entry.getValue();
for (Item item : items) {
if (item.getDotPosition() == 0 && item.getProduction().getLeft().equals(startSymbol)) {
return state;
}
}
}
return -1;
}
private Map<Integer, Map<String, Action>> buildActionTable(Grammar grammar, Map<Integer, Set<Item>> itemSets) {
// 构建LR(1)分析表中的Action部分
Map<Integer, Map<String, Action>> actionTable = new HashMap<>();
for (Map.Entry<Integer, Set<Item>> entry : itemSets.entrySet()) {
int state = entry.getKey();
Set<Item> items = entry.getValue();
Map<String, Action> actions = new HashMap<>();
for (Item item : items) {
if (item.getDotPosition() == item.getProduction().getRight().size()) {
if (item.getProduction().getLeft().equals(grammar.getStartSymbol())) {
actions.put("$", new Action(Action.ACCEPT, -1, null));
} else {
Set<Integer> follow = followSets.get(item.getProduction().getLeft());
for (int symbol : follow) {
actions.put(grammar.getSymbolString(symbol), new Action(Action.REDUCE, item.getProduction().getIndex(), item.getProduction()));
}
}
} else {
String nextSymbol = item.getNextSymbol();
if (grammar.isTerminal(nextSymbol)) {
int nextState = findNextState(itemSets, items, nextSymbol);
actions.put(nextSymbol, new Action(Action.SHIFT, nextState, null));
}
}
}
actionTable.put(state, actions);
}
return actionTable;
}
private Map<Integer, Map<String, Integer>> buildGotoTable(Grammar grammar, Map<Integer, Set<Item>> itemSets) {
// 构建LR(1)分析表中的Goto部分
Map<Integer, Map<String, Integer>> gotoTable = new HashMap<>();
for (Map.Entry<Integer, Set<Item>> entry : itemSets.entrySet()) {
int state = entry.getKey();
Set<Item> items = entry.getValue();
Map<String, Integer> gotos = new HashMap<>();
for (Item item : items) {
if (item.getDotPosition() < item.getProduction().getRight().size()) {
String nextSymbol = item.getNextSymbol();
if (grammar.isNonterminal(nextSymbol)) {
int nextState = findNextState(itemSets, items, nextSymbol);
gotos.put(nextSymbol, nextState);
}
}
}
gotoTable.put(state, gotos);
}
return gotoTable;
}
private int findNextState(Map<Integer, Set<Item>> itemSets, Set<Item> items, String symbol) {
// 查找给定状态集合和符号的下一个状态
Set<Item> nextItems = new HashSet<>();
for (Item item : items) {
if (item.getDotPosition() < item.getProduction().getRight().size() && item.getNextSymbol().equals(symbol)) {
nextItems.add(new Item(item.getProduction(), item.getDotPosition() + 1, item.getLookahead()));
}
}
return findItemSetState(itemSets, nextItems);
}
private int findItemSetState(Map<Integer, Set<Item>> itemSets, Set<Item> items) {
// 查找给定LR(1)项集在LR(1)项集族中的状态
for (Map.Entry<Integer, Set<Item>> entry : itemSets.entrySet()) {
int state = entry.getKey();
Set<Item> existingItems = entry.getValue();
if (existingItems.equals(items)) {
return state;
}
}
return -1;
}
private Map<Integer, Set<Item>> buildItemSets(Grammar grammar) {
// 构建LR(1)项集族
Map<Integer, Set<Item>> itemSets = new HashMap<>();
int state = 0;
Set<Item> initialItems = new HashSet<>();
Production startProduction = grammar.getProductions().get(0);
initialItems.add(new Item(startProduction, 0, Grammar.EOF));
itemSets.put(state, closure(initialItems, grammar));
boolean changed = true;
while (changed) {
changed = false;
for (Map.Entry<Integer, Set<Item>> entry : new HashMap<>(itemSets).entrySet()) {
int currentState = entry.getKey();
Set<Item> currentItems = entry.getValue();
Map<String, Set<Item>> symbolItems = new HashMap<>();
for (Item item : currentItems) {
if (item.getDotPosition() < item.getProduction().getRight().size()) {
String nextSymbol = item.getNextSymbol();
Set<Item> nextItems = symbolItems.computeIfAbsent(nextSymbol, k -> new HashSet<>());
nextItems.add(new Item(item.getProduction(), item.getDotPosition() + 1, item.getLookahead()));
}
}
for (Map.Entry<String, Set<Item>> symbolEntry : symbolItems.entrySet()) {
Set<Item> nextItems = closure(symbolEntry.getValue(), grammar);
int nextState = findItemSetState(itemSets, nextItems);
if (nextState == -1) {
nextState = ++state;
itemSets.put(nextState, nextItems);
changed = true;
}
actionTable.put(currentState, new HashMap<>());
actionTable.get(currentState).put(symbolEntry.getKey(), new Action(Action.SHIFT, nextState, null));
}
}
}
return itemSets;
}
private Set<Item> closure(Set<Item> items, Grammar grammar) {
// 计算LR(1)项集的闭包
Set<Item> closure = new HashSet<>(items);
boolean changed = true;
while (changed) {
changed = false;
for (Item item : new HashSet<>(closure)) {
if (item.getDotPosition() < item.getProduction().getRight().size()) {
String nextSymbol = item.getNextSymbol();
Set<Integer> lookaheads = calculateLookahead(item, closure, grammar);
for (Production production : grammar.getProductions()) {
if (production.getLeft().equals(nextSymbol)) {
Item newItem = new Item(production, 0, lookaheads);
if (!closure.contains(newItem)) {
closure.add(newItem);
changed = true;
}
}
}
}
}
}
return closure;
}
private Set<Integer> calculateLookahead(Item item, Set<Item> closure, Grammar grammar) {
// 计算LR(1)项的向前看符号集合
Set<Integer> lookaheads = new HashSet<>();
if (item.getDotPosition() == item.getProduction().getRight().size()) {
lookaheads.addAll(item.getLookahead());
} else {
String nextSymbol = item.getNextSymbol();
Set<Item> lookItems = new HashSet<>();
for (Item closureItem : closure) {
if (closureItem.getProduction().getLeft().equals(nextSymbol)) {
lookItems.add(closureItem);
}
}
for (Item lookItem : lookItems) {
Set<Integer> itemLookaheads = lookItem.getLookahead();
if (itemLookaheads.contains(Grammar.EMPTY)) {
lookaheads.addAll(calculateFollowSets(grammar).get(item.getProduction().getLeft()));
lookaheads.remove(Grammar.EMPTY);
}
lookaheads.addAll(itemLookaheads);
}
}
return lookaheads;
}
private static class Item {
private Production production;
private int dotPosition;
private Set<Integer> lookahead;
public Item(Production production, int dotPosition, int lookahead) {
this.production = production;
this.dotPosition = dotPosition;
this.lookahead = new HashSet<>();
this.lookahead.add(lookahead);
}
public Item(Production production, int dotPosition, Set<Integer> lookahead) {
this.production = production;
this.dotPosition = dotPosition;
this.lookahead = lookahead;
}
public Production getProduction() {
return production;
}
public int getDotPosition() {
return dotPosition;
}
public String getNextSymbol() {
return production.getRight().get(dotPosition);
}
public Set<Integer> getLookahead() {
return lookahead;
}
@Override
public boolean equals(Object obj) {
if (obj instanceof Item) {
Item other = (Item) obj;
return production.equals(other.production) && dotPosition == other.dotPosition && lookahead.equals(other.lookahead);
}
return false;
}
@Override
public int hashCode() {
return Objects.hash(production, dotPosition, lookahead);
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append(production.getLeft());
builder.append(" ->");
for (int i = 0; i < production.getRight().size(); i++) {
if (i == dotPosition) {
builder.append(" ·");
}
builder.append(" ");
builder.append(production.getRight().get(i));
}
if (dotPosition == production.getRight().size()) {
builder.append(" ·");
}
builder.append(", {");
for (int lookahead : lookahead) {
builder.append(" '");
builder.append(Grammar.getSymbolString(lookahead));
builder.append("'");
}
builder.append(" }");
return builder.toString();
}
}
private static class Action {
public static final int SHIFT = 1;
public static final int REDUCE = 2;
public static final int ACCEPT = 3;
public static final int ERROR = 4;
private int type;
private int value;
private Production production;
public Action(int type, int value, Production production) {
this.type = type;
this.value = value;
this.production = production;
}
public int getType() {
return type;
}
public int getValue() {
return value;
}
public Production getProduction() {
return production;
}
}
}
```
上述代码中,`Grammar`类表示给定的文法,`Token`类表示输入符号串中的一个Token,`LR1Parser`类表示LR(1)分析器,包含了构建LR(1)项集族、构建LR(1)分析表、计算Follow集合、计算First集合、计算闭包、计算向前看符号等功能函数。其中,`buildItemSets()`函数实现了LR(1)项集族的构建,`buildActionTable()`函数实现了LR(1)分析表中的Action部分的构建,`buildGotoTable()`函数实现了LR(1)分析表中的Goto部分的构建,`scan()`函数用于将输入符号串转换为Token序列,`parse()`函数实现了LR(1)分析器的主要逻辑。
阅读全文