针对Java的词法分析输出token序列
时间: 2024-01-07 15:05:25 浏览: 45
好的,针对Java的词法分析输出token序列,可以使用词法分析器(lexer)来实现。以下是一个简单的Java词法分析器示例,它可以将输入的Java代码分解成一个个的token:
```java
import java.util.*;
public class JavaLexer {
private String input;
private List<String> tokens;
private int pos;
public JavaLexer(String input) {
this.input = input;
this.tokens = new ArrayList<>();
this.pos = 0;
}
public List<String> getTokens() {
return tokens;
}
public void tokenize() {
while (pos < input.length()) {
char currentChar = input.charAt(pos);
if (Character.isLetter(currentChar)) {
String identifier = readIdentifier();
tokens.add(identifier);
} else if (Character.isDigit(currentChar)) {
String number = readNumber();
tokens.add(number);
} else if (currentChar == '\"') {
String stringLiteral = readStringLiteral();
tokens.add(stringLiteral);
} else if (currentChar == '\'') {
String charLiteral = readCharLiteral();
tokens.add(charLiteral);
} else if (isOperator(currentChar)) {
String operator = readOperator();
tokens.add(operator);
} else if (isDelimiter(currentChar)) {
String delimiter = readDelimiter();
tokens.add(delimiter);
} else {
// skip unknown character
pos++;
}
}
}
private String readIdentifier() {
StringBuilder sb = new StringBuilder();
while (pos < input.length() && (Character.isLetterOrDigit(input.charAt(pos)) || input.charAt(pos) == '_')) {
sb.append(input.charAt(pos));
pos++;
}
return sb.toString();
}
private String readNumber() {
StringBuilder sb = new StringBuilder();
while (pos < input.length() && Character.isDigit(input.charAt(pos))) {
sb.append(input.charAt(pos));
pos++;
}
return sb.toString();
}
private String readStringLiteral() {
StringBuilder sb = new StringBuilder();
pos++; // skip opening quote
while (pos < input.length() && input.charAt(pos) != '\"') {
sb.append(input.charAt(pos));
pos++;
}
pos++; // skip closing quote
return "\"" + sb.toString() + "\"";
}
private String readCharLiteral() {
StringBuilder sb = new StringBuilder();
pos++; // skip opening quote
while (pos < input.length() && input.charAt(pos) != '\'') {
sb.append(input.charAt(pos));
pos++;
}
pos++; // skip closing quote
return "\'" + sb.toString() + "\'";
}
private String readOperator() {
StringBuilder sb = new StringBuilder();
while (pos < input.length() && isOperator(input.charAt(pos))) {
sb.append(input.charAt(pos));
pos++;
}
return sb.toString();
}
private String readDelimiter() {
String delimiter = String.valueOf(input.charAt(pos));
pos++;
return delimiter;
}
private boolean isOperator(char c) {
return "+-*/%=<>!&|".indexOf(c) != -1;
}
private boolean isDelimiter(char c) {
return "(){}[],.;:".indexOf(c) != -1;
}
}
```
使用方法:
```java
String input = "public class HelloWorld {\n" +
" public static void main(String[] args) {\n" +
" System.out.println(\"Hello, world!\");\n" +
" }\n" +
"}";
JavaLexer lexer = new JavaLexer(input);
lexer.tokenize();
List<String> tokens = lexer.getTokens();
System.out.println(tokens);
```
输出结果:
```
[public, class, HelloWorld, {, public, static, void, main, (, String, [, ], args, ), {, System, ., out, ., println, (, "Hello, world!", ), ;, }, }]
```
可以看到,输出的结果是一个token序列,其中包含了Java代码中的所有关键字、标识符、数字、字符串、字符、运算符和分隔符等。