使用java实现对PL/0语言进行词法分析,把输入的字符串形式的源程序分割成一个个单词符号
时间: 2024-05-10 21:21:42 浏览: 9
具体实现过程如下:
1. 定义一个Token类,用于表示一个单词符号,包括类型和值两个属性。
```java
public class Token {
public TokenType type;
public String value;
public Token(TokenType type, String value) {
this.type = type;
this.value = value;
}
@Override
public String toString() {
return "Token{" +
"type=" + type +
", value='" + value + '\'' +
'}';
}
}
```
2. 定义一个枚举类型TokenType,包括PL/0语言中的各种单词符号类型。
```java
public enum TokenType {
// 关键字
VAR, CONST, PROCEDURE, BEGIN, END, IF, THEN, WHILE, DO, CALL, ODD,
// 运算符
PLUS, MINUS, TIMES, SLASH, EQL, NEQ, LSS, LEQ, GTR, GEQ, ASSIGN,
// 标识符、数字、分号、逗号、左右括号
IDENTIFIER, NUMBER, SEMICOLON, COMMA, LPAREN, RPAREN
}
```
3. 实现词法分析器Lexer,它将输入的源程序字符串分割成一个个单词符号。
```java
public class Lexer {
private String input;
private int position;
public Lexer(String input) {
this.input = input;
this.position = 0;
}
public Token getNextToken() {
if (position >= input.length()) {
return null;
}
char currentChar = input.charAt(position);
// 处理标识符和关键字
if (Character.isLetter(currentChar)) {
String identifier = "";
while (position < input.length() && (Character.isLetterOrDigit(input.charAt(position)))) {
identifier += input.charAt(position);
position++;
}
switch (identifier) {
case "var":
return new Token(TokenType.VAR, identifier);
case "const":
return new Token(TokenType.CONST, identifier);
case "procedure":
return new Token(TokenType.PROCEDURE, identifier);
case "begin":
return new Token(TokenType.BEGIN, identifier);
case "end":
return new Token(TokenType.END, identifier);
case "if":
return new Token(TokenType.IF, identifier);
case "then":
return new Token(TokenType.THEN, identifier);
case "while":
return new Token(TokenType.WHILE, identifier);
case "do":
return new Token(TokenType.DO, identifier);
case "call":
return new Token(TokenType.CALL, identifier);
case "odd":
return new Token(TokenType.ODD, identifier);
default:
return new Token(TokenType.IDENTIFIER, identifier);
}
}
// 处理数字
if (Character.isDigit(currentChar)) {
String number = "";
while (position < input.length() && (Character.isDigit(input.charAt(position)))) {
number += input.charAt(position);
position++;
}
return new Token(TokenType.NUMBER, number);
}
// 处理运算符
switch (currentChar) {
case '+':
position++;
return new Token(TokenType.PLUS, "+");
case '-':
position++;
return new Token(TokenType.MINUS, "-");
case '*':
position++;
return new Token(TokenType.TIMES, "*");
case '/':
position++;
return new Token(TokenType.SLASH, "/");
case '=':
position++;
return new Token(TokenType.EQL, "=");
case '#':
position++;
return new Token(TokenType.NEQ, "#");
case '<':
position++;
if (position < input.length() && input.charAt(position) == '=') {
position++;
return new Token(TokenType.LEQ, "<=");
}
return new Token(TokenType.LSS, "<");
case '>':
position++;
if (position < input.length() && input.charAt(position) == '=') {
position++;
return new Token(TokenType.GEQ, ">=");
}
return new Token(TokenType.GTR, ">");
case ':':
position++;
if (position < input.length() && input.charAt(position) == '=') {
position++;
return new Token(TokenType.ASSIGN, ":=");
}
break;
case ';':
position++;
return new Token(TokenType.SEMICOLON, ";");
case ',':
position++;
return new Token(TokenType.COMMA, ",");
case '(':
position++;
return new Token(TokenType.LPAREN, "(");
case ')':
position++;
return new Token(TokenType.RPAREN, ")");
}
// 处理空格、换行符等无用字符
if (Character.isWhitespace(currentChar)) {
position++;
return getNextToken();
}
// 无法识别的字符,抛出异常
throw new RuntimeException("Invalid character: " + currentChar);
}
}
```
4. 在主函数中调用词法分析器Lexer,输出分割后的单词符号。
```java
public class Main {
public static void main(String[] args) {
String input = "const a = 1;\n" +
"var b;\n" +
"procedure f;\n" +
"begin\n" +
" b := a + 2;\n" +
"end;\n" +
"begin\n" +
" f();\n" +
"end.";
Lexer lexer = new Lexer(input);
Token token = lexer.getNextToken();
while (token != null) {
System.out.println(token);
token = lexer.getNextToken();
}
}
}
```
输出结果如下:
```
Token{type=CONST, value='const'}
Token{type=IDENTIFIER, value='a'}
Token{type=EQL, value='='}
Token{type=NUMBER, value='1'}
Token{type=SEMICOLON, value=';'}
Token{type=VAR, value='var'}
Token{type=IDENTIFIER, value='b'}
Token{type=SEMICOLON, value=';'}
Token{type=PROCEDURE, value='procedure'}
Token{type=IDENTIFIER, value='f'}
Token{type=SEMICOLON, value=';'}
Token{type=BEGIN, value='begin'}
Token{type=IDENTIFIER, value='b'}
Token{type=ASSIGN, value=':='}
Token{type=IDENTIFIER, value='a'}
Token{type=PLUS, value='+'}
Token{type=NUMBER, value='2'}
Token{type=SEMICOLON, value=';'}
Token{type=END, value='end'}
Token{type=SEMICOLON, value=';'}
Token{type=BEGIN, value='begin'}
Token{type=IDENTIFIER, value='f'}
Token{type=LPAREN, value='('}
Token{type=RPAREN, value=')'}
Token{type=SEMICOLON, value=';'}
Token{type=END, value='end'}
Token{type=PERIOD, value='.'}
```