java 手写词法分析器
时间: 2023-12-03 14:40:50 浏览: 31
以下是一个用Java编写的手写词法分析器的示例代码:
```java
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class Lexer {
private static final char EOF = (char) -1;
private static final char EOL = '\n';
private static final String OPERATORS = "+-*/()";
private static final String DIGITS = "0123456789";
private BufferedReader reader;
private char currentChar;
public Lexer(String fileName) throws IOException {
reader = new BufferedReader(new FileReader(fileName));
currentChar = (char) reader.read();
}
public List<Token> tokenize() throws IOException {
List<Token> tokens = new ArrayList<>();
while (currentChar != EOF) {
if (currentChar == EOL) {
consume();
continue;
}
if (Character.isWhitespace(currentChar)) {
consume();
continue;
}
if (OPERATORS.indexOf(currentChar) != -1) {
tokens.add(new Token(Token.Type.OPERATOR, Character.toString(currentChar)));
consume();
continue;
}
if (DIGITS.indexOf(currentChar) != -1) {
StringBuilder sb = new StringBuilder();
while (DIGITS.indexOf(currentChar) != -1) {
sb.append(currentChar);
consume();
}
tokens.add(new Token(Token.Type.NUMBER, sb.toString()));
continue;
}
throw new RuntimeException("Invalid character: " + currentChar);
}
return tokens;
}
private void consume() throws IOException {
currentChar = (char) reader.read();
}
}
class Token {
enum Type {
OPERATOR, NUMBER
}
private Type type;
private String value;
public Token(Type type, String value) {
this.type = type;
this.value = value;
}
public Type getType() {
return type;
}
public String getValue() {
return value;
}
}
```
这个词法分析器可以读取一个文件并将其分解为令牌。它支持四种类型的令牌:运算符、数字、换行符和文件结束符。它使用一个缓冲区来存储当前正在解析的符号,并使用一个符号类型的数组来存储确定模型的符号。