第1关:使用C/C++语言编写PL/0编译程序的语法分析程序
时间: 2023-11-25 11:07:19 浏览: 392
PL/0编译程序的语法分析程序可以使用自顶向下的递归下降分析法来实现。以下是一个简单的C++实现,假设已经有了词法分析器返回的token序列:
```c++
#include <iostream>
#include <vector>
#include <string>
using namespace std;
enum class TokenType {
UNKNOWN,
IDENT,
NUMBER,
RESERVED_WORD,
SYMBOL
};
struct Token {
TokenType type;
string value;
};
class Parser {
public:
Parser(vector<Token> tokens) : tokens(tokens) {}
bool parse() {
position = 0;
token = tokens[position];
program();
return true;
}
private:
vector<Token> tokens;
size_t position;
Token token;
void error(const string& message) {
cerr << "Error: " << message << endl;
exit(1);
}
void match(TokenType expectedType) {
if (token.type == expectedType) {
position++;
if (position < tokens.size()) {
token = tokens[position];
}
} else {
error("Unexpected token: " + token.value);
}
}
void program() {
block();
match(TokenType::SYMBOL);
}
void block() {
if (token.type == TokenType::RESERVED_WORD && token.value == "const") {
constDeclaration();
}
if (token.type == TokenType::RESERVED_WORD && token.value == "var") {
varDeclaration();
}
while (token.type == TokenType::RESERVED_WORD && (token.value == "procedure" || token.value == "function")) {
procedureDeclaration();
}
statement();
}
void constDeclaration() {
match(TokenType::RESERVED_WORD);
match(TokenType::IDENT);
match(TokenType::SYMBOL);
match(TokenType::NUMBER);
while (token.type == TokenType::SYMBOL && token.value == ",") {
match(TokenType::SYMBOL);
match(TokenType::IDENT);
match(TokenType::SYMBOL);
match(TokenType::NUMBER);
}
match(TokenType::SYMBOL);
}
void varDeclaration() {
match(TokenType::RESERVED_WORD);
match(TokenType::IDENT);
while (token.type == TokenType::SYMBOL && token.value == ",") {
match(TokenType::SYMBOL);
match(TokenType::IDENT);
}
match(TokenType::SYMBOL);
}
void procedureDeclaration() {
if (token.type == TokenType::RESERVED_WORD && token.value == "procedure") {
match(TokenType::RESERVED_WORD);
match(TokenType::IDENT);
match(TokenType::SYMBOL);
if (token.type == TokenType::IDENT) {
while (token.type == TokenType::IDENT) {
match(TokenType::IDENT);
match(TokenType::SYMBOL);
match(TokenType::IDENT);
}
}
block();
match(TokenType::SYMBOL);
} else if (token.type == TokenType::RESERVED_WORD && token.value == "function") {
match(TokenType::RESERVED_WORD);
match(TokenType::IDENT);
match(TokenType::SYMBOL);
if (token.type == TokenType::IDENT) {
while (token.type == TokenType::IDENT) {
match(TokenType::IDENT);
match(TokenType::SYMBOL);
match(TokenType::IDENT);
}
}
block();
match(TokenType::SYMBOL);
}
}
void statement() {
if (token.type == TokenType::IDENT) {
match(TokenType::IDENT);
if (token.type == TokenType::SYMBOL && token.value == ":=") {
match(TokenType::SYMBOL);
expression();
}
} else if (token.type == TokenType::RESERVED_WORD && token.value == "if") {
match(TokenType::RESERVED_WORD);
condition();
match(TokenType::RESERVED_WORD);
statement();
if (token.type == TokenType::RESERVED_WORD && token.value == "else") {
match(TokenType::RESERVED_WORD);
statement();
}
} else if (token.type == TokenType::RESERVED_WORD && token.value == "while") {
match(TokenType::RESERVED_WORD);
condition();
match(TokenType::RESERVED_WORD);
statement();
} else if (token.type == TokenType::RESERVED_WORD && token.value == "begin") {
match(TokenType::RESERVED_WORD);
statement();
while (token.type == TokenType::SYMBOL && token.value == ";") {
match(TokenType::SYMBOL);
statement();
}
match(TokenType::RESERVED_WORD);
}
}
void condition() {
expression();
if (token.type == TokenType::SYMBOL && (token.value == "=" || token.value == "<>" || token.value == "<"
|| token.value == ">" || token.value == "<=" || token.value == ">=")) {
match(TokenType::SYMBOL);
expression();
}
}
void expression() {
if (token.type == TokenType::SYMBOL && (token.value == "+" || token.value == "-")) {
match(TokenType::SYMBOL);
}
term();
while (token.type == TokenType::SYMBOL && (token.value == "+" || token.value == "-")) {
match(TokenType::SYMBOL);
term();
}
}
void term() {
factor();
while (token.type == TokenType::SYMBOL && (token.value == "*" || token.value == "/")) {
match(TokenType::SYMBOL);
factor();
}
}
void factor() {
if (token.type == TokenType::IDENT) {
match(TokenType::IDENT);
} else if (token.type == TokenType::NUMBER) {
match(TokenType::NUMBER);
} else if (token.type == TokenType::SYMBOL && token.value == "(") {
match(TokenType::SYMBOL);
expression();
match(TokenType::SYMBOL);
}
}
};
int main() {
vector<Token> tokens = {
{ TokenType::RESERVED_WORD, "const" },
{ TokenType::IDENT, "a" },
{ TokenType::SYMBOL, "=" },
{ TokenType::NUMBER, "1" },
{ TokenType::SYMBOL, "," },
{ TokenType::IDENT, "b" },
{ TokenType::SYMBOL, "=" },
{ TokenType::NUMBER, "2" },
{ TokenType::SYMBOL, ";" },
{ TokenType::RESERVED_WORD, "var" },
{ TokenType::IDENT, "x" },
{ TokenType::SYMBOL, "," },
{ TokenType::IDENT, "y" },
{ TokenType::SYMBOL, ";" },
{ TokenType::RESERVED_WORD, "procedure" },
{ TokenType::IDENT, "test" },
{ TokenType::SYMBOL, ";" },
{ TokenType::IDENT, "begin" },
{ TokenType::IDENT, "x" },
{ TokenType::SYMBOL, ":=" },
{ TokenType::IDENT, "a" },
{ TokenType::SYMBOL, "+" },
{ TokenType::IDENT, "b" },
{ TokenType::SYMBOL, ";" },
{ TokenType::IDENT, "if" },
{ TokenType::IDENT, "x" },
{ TokenType::SYMBOL, "=" },
{ TokenType::NUMBER, "3" },
{ TokenType::RESERVED_WORD, "then" },
{ TokenType::IDENT, "y" },
{ TokenType::SYMBOL, ":=" },
{ TokenType::NUMBER, "4" },
{ TokenType::RESERVED_WORD, "else" },
{ TokenType::IDENT, "y" },
{ TokenType::SYMBOL, ":=" },
{ TokenType::NUMBER, "5" },
{ TokenType::RESERVED_WORD, ";" },
{ TokenType::RESERVED_WORD, "end" },
{ TokenType::SYMBOL, ";" },
{ TokenType::RESERVED_WORD, "end" },
{ TokenType::SYMBOL, "." }
};
Parser parser(tokens);
parser.parse();
return 0;
}
```
这个实现中,每个非终结符都对应了一个函数,函数内部实现了对应的语法规则。从 `program()` 开始,依次递归调用其他函数,直到最后匹配到 `.` 结束符。在每个函数中,首先根据当前token的类型进行判断,如果不符合规则就报错,如果符合规则就调用 `match()` 函数将指针后移。`match()` 函数会检查当前token是否符合期望的类型,如果符合就将指针后移,如果不符合就报错。
阅读全文