使用C++代码写出PL/0词法分析的过程
时间: 2023-12-11 07:06:10 浏览: 87
好的,下面是一个简单的C++代码实现PL/0词法分析的过程。这里使用了上面提到的正规表达式转换为DFA的方法来实现。
```cpp
#include <iostream>
#include <string>
#include <map>
using namespace std;
// Token的类型
enum TokenType {
T_EOF, // 文件结束
T_IDENT, // 标识符
T_NUMBER, // 数字
T_PLUS, // 加号
T_MINUS, // 减号
T_TIMES, // 乘号
T_SLASH, // 除号
T_ODD, // odd
T_EQ, // 等于
T_NEQ, // 不等于
T_LT, // 小于
T_GT, // 大于
T_LEQ, // 小于等于
T_GEQ, // 大于等于
T_LPAREN, // 左括号
T_RPAREN, // 右括号
T_COMMA, // 逗号
T_SEMICOLON,// 分号
T_PERIOD, // 英文句号
T_ASSIGN, // 赋值符号
T_BEGIN, // begin
T_END, // end
T_IF, // if
T_THEN, // then
T_WHILE, // while
T_DO, // do
T_CALL, // call
T_CONST, // const
T_VAR, // var
T_PROCEDURE,// procedure
};
// Token的结构体
struct Token {
TokenType type;
string value;
};
// 词法分析器的结构体
struct Lexer {
string input;
int position;
char current_char;
map<string, TokenType> keywords; // 关键字表
// 初始化
Lexer(string input) {
this->input = input;
this->position = 0;
this->current_char = this->input[0];
// 初始化关键字表
keywords["begin"] = T_BEGIN;
keywords["end"] = T_END;
keywords["if"] = T_IF;
keywords["then"] = T_THEN;
keywords["while"] = T_WHILE;
keywords["do"] = T_DO;
keywords["call"] = T_CALL;
keywords["const"] = T_CONST;
keywords["var"] = T_VAR;
keywords["procedure"] = T_PROCEDURE;
}
// 移动到下一个字符
void advance() {
position++;
if (position < input.length()) {
current_char = input[position];
} else {
current_char = '\0';
}
}
// 跳过空白字符
void skip_whitespace() {
while (current_char != '\0' && isspace(current_char)) {
advance();
}
}
// 标识符
Token identifier() {
string result = "";
while (current_char != '\0' && (isalnum(current_char) || current_char == '_')) {
result += current_char;
advance();
}
// 判断是否是关键字
if (keywords.count(result) > 0) {
return Token{keywords[result], result};
} else {
return Token{T_IDENT, result};
}
}
// 数字
Token number() {
string result = "";
while (current_char != '\0' && isdigit(current_char)) {
result += current_char;
advance();
}
return Token{T_NUMBER, result};
}
// 获取下一个Token
Token get_next_token() {
while (current_char != '\0') {
if (isspace(current_char)) {
skip_whitespace();
continue;
}
if (isalpha(current_char) || current_char == '_') {
return identifier();
}
if (isdigit(current_char)) {
return number();
}
if (current_char == '+') {
advance();
return Token{T_PLUS, "+"};
}
if (current_char == '-') {
advance();
return Token{T_MINUS, "-"};
}
if (current_char == '*') {
advance();
return Token{T_TIMES, "*"};
}
if (current_char == '/') {
advance();
return Token{T_SLASH, "/"};
}
if (current_char == '(') {
advance();
return Token{T_LPAREN, "("};
}
if (current_char == ')') {
advance();
return Token{T_RPAREN, ")"};
}
if (current_char == ',') {
advance();
return Token{T_COMMA, ","};
}
if (current_char == ';') {
advance();
return Token{T_SEMICOLON, ";"};
}
if (current_char == '.') {
advance();
return Token{T_PERIOD, "."};
}
if (current_char == ':') {
advance();
if (current_char == '=') {
advance();
return Token{T_ASSIGN, ":="};
} else {
return Token{T_EOF, ""};
}
}
if (current_char == '<') {
advance();
if (current_char == '=') {
advance();
return Token{T_LEQ, "<="};
} else if (current_char == '>') {
advance();
return Token{T_NEQ, "<>"};
} else {
return Token{T_LT, "<"};
}
}
if (current_char == '>') {
advance();
if (current_char == '=') {
advance();
return Token{T_GEQ, ">="};
} else {
return Token{T_GT, ">"};
}
}
if (current_char == '=') {
advance();
return Token{T_EQ, "="};
}
if (current_char == '&') {
advance();
return Token{T_ODD, "&"};
}
// 无法识别的符号
return Token{T_EOF, ""};
}
return Token{T_EOF, ""};
}
};
int main() {
// 测试
Lexer lexer("a := 1 + 2;");
Token token = lexer.get_next_token();
while (token.type != T_EOF) {
cout << token.type << " " << token.value << endl;
token = lexer.get_next_token();
}
return 0;
}
```
以上代码中,关键字表使用了STL的map来实现,get_next_token()函数逐个字符地分析输入字符串,并返回对应的Token类型和值。在实际应用中,还需要将Token传递给语法分析器进行处理。
阅读全文