使用C/C++语言编写PL/0编译程序的语法分析代码实现
时间: 2023-12-29 10:02:30 浏览: 104
PL/0是一种简单的编程语言,它的语法分析可以使用递归下降法来实现。下面是一个简单的PL/0语法分析程序的C++代码实现:
```
#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cctype>
using namespace std;
const int MAX_SYMBOL_TABLE_SIZE = 1000;
const int MAX_CODE_LENGTH = 1000;
const int MAX_IDENT_LENGTH = 20;
enum {
nulsym = 0, identsym, numbersym, plussym, minussym,
multsym, slashsym, oddsym, eqlsym, neqsym, lessym,
leqsym, gtrsym, geqsym, lparentsym, rparentsym,
commasym, semicolonsym, periodsym, becomessym, beginsym,
endsym, ifsym, thensym, whilesym, dosym, callsym,
constsym, varsym, procsym, writesym, readsym
};
typedef struct {
int kind;
char name[MAX_IDENT_LENGTH + 1];
int val;
int level;
int addr;
} symbol;
typedef struct {
int op;
int l;
int m;
} instruction;
int symbol_table_size = 0;
symbol symbol_table[MAX_SYMBOL_TABLE_SIZE];
instruction code[MAX_CODE_LENGTH];
int code_length = 0;
char input_buffer[1000];
int input_buffer_pos = 0;
int current_token;
char current_ident[MAX_IDENT_LENGTH + 1];
int current_number;
void error(const char* message) {
cerr << message << endl;
exit(EXIT_FAILURE);
}
void get_char() {
if (input_buffer[input_buffer_pos] == 0) {
fgets(input_buffer, sizeof(input_buffer), stdin);
input_buffer_pos = 0;
}
}
void get_token() {
while (isspace(input_buffer[input_buffer_pos])) {
input_buffer_pos++;
}
if (isalpha(input_buffer[input_buffer_pos])) {
int i = 0;
while (isalnum(input_buffer[input_buffer_pos])) {
current_ident[i++] = input_buffer[input_buffer_pos++];
}
current_ident[i] = 0;
current_token = identsym;
} else if (isdigit(input_buffer[input_buffer_pos])) {
current_number = 0;
while (isdigit(input_buffer[input_buffer_pos])) {
current_number = current_number * 10 + input_buffer[input_buffer_pos++] - '0';
}
current_token = numbersym;
} else {
switch (input_buffer[input_buffer_pos]) {
case '+':
current_token = plussym;
break;
case '-':
current_token = minussym;
break;
case '*':
current_token = multsym;
break;
case '/':
current_token = slashsym;
break;
case '=':
current_token = eqlsym;
break;
case '<':
if (input_buffer[input_buffer_pos + 1] == '=') {
input_buffer_pos++;
current_token = leqsym;
} else {
current_token = lessym;
}
break;
case '>':
if (input_buffer[input_buffer_pos + 1] == '=') {
input_buffer_pos++;
current_token = geqsym;
} else {
current_token = gtrsym;
}
break;
case '(':
current_token = lparentsym;
break;
case ')':
current_token = rparentsym;
break;
case ',':
current_token = commasym;
break;
case ';':
current_token = semicolonsym;
break;
case '.':
current_token = periodsym;
break;
default:
error("Invalid character");
}
input_buffer_pos++;
}
}
void enter_symbol(int kind, const char* name, int val, int level, int addr) {
if (symbol_table_size >= MAX_SYMBOL_TABLE_SIZE) {
error("Symbol table overflow");
}
symbol_table[symbol_table_size].kind = kind;
strncpy(symbol_table[symbol_table_size].name, name, sizeof(symbol_table[symbol_table_size].name));
symbol_table[symbol_table_size].val = val;
symbol_table[symbol_table_size].level = level;
symbol_table[symbol_table_size].addr = addr;
symbol_table_size++;
}
int find_symbol(const char* name) {
for (int i = symbol_table_size - 1; i >= 0; i--) {
if (strcmp(symbol_table[i].name, name) == 0) {
return i;
}
}
return -1;
}
void emit(int op, int l, int m) {
if (code_length >= MAX_CODE_LENGTH) {
error("Code overflow");
}
code[code_length].op = op;
code[code_length].l = l;
code[code_length].m = m;
code_length++;
}
void factor(int level) {
if (current_token == identsym) {
int symbol_index = find_symbol(current_ident);
if (symbol_index < 0) {
error("Undefined identifier");
}
switch (symbol_table[symbol_index].kind) {
case constsym:
emit(1, 0, symbol_table[symbol_index].val);
break;
case varsym:
emit(3, level - symbol_table[symbol_index].level, symbol_table[symbol_index].addr);
break;
case procsym:
error("Identifier cannot be a procedure");
}
get_token();
} else if (current_token == numbersym) {
emit(1, 0, current_number);
get_token();
} else if (current_token == lparentsym) {
get_token();
expression(level);
if (current_token != rparentsym) {
error("Missing right parenthesis");
}
get_token();
} else {
error("Invalid factor");
}
}
void term(int level) {
factor(level);
while (current_token == multsym || current_token == slashsym) {
int token = current_token;
get_token();
factor(level);
if (token == multsym) {
emit(2, 0, 4);
} else {
emit(2, 0, 5);
}
}
}
void expression(int level) {
if (current_token == plussym || current_token == minussym) {
int token = current_token;
get_token();
term(level);
if (token == minussym) {
emit(2, 0, 1);
}
} else {
term(level);
}
while (current_token == plussym || current_token == minussym) {
int token = current_token;
get_token();
term(level);
if (token == plussym) {
emit(2, 0, 2);
} else {
emit(2, 0, 3);
}
}
}
void condition(int level) {
if (current_token == oddsym) {
get_token();
expression(level);
emit(2, 0, 6);
} else {
expression(level);
if (current_token == eqlsym || current_token == neqsym || current_token == lessym ||
current_token == leqsym || current_token == gtrsym || current_token == geqsym) {
int token = current_token;
get_token();
expression(level);
switch (token) {
case eqlsym:
emit(2, 0, 8);
break;
case neqsym:
emit(2, 0, 9);
break;
case lessym:
emit(2, 0, 10);
break;
case leqsym:
emit(2, 0, 11);
break;
case gtrsym:
emit(2, 0, 12);
break;
case geqsym:
emit(2, 0, 13);
break;
}
} else {
error("Invalid condition");
}
}
}
void statement(int level) {
if (current_token == identsym) {
int symbol_index = find_symbol(current_ident);
if (symbol_index < 0) {
error("Undefined identifier");
}
if (symbol_table[symbol_index].kind != varsym) {
error("Identifier cannot be a constant or a procedure");
}
get_token();
if (current_token != becomessym) {
error("Missing assignment operator");
}
get_token();
expression(level);
emit(4, level - symbol_table[symbol_index].level, symbol_table[symbol_index].addr);
} else if (current_token == callsym) {
get_token();
if (current_token != identsym) {
error("Missing procedure identifier");
}
int symbol_index = find_symbol(current_ident);
if (symbol_index < 0) {
error("Undefined procedure identifier");
}
if (symbol_table[symbol_index].kind != procsym) {
error("Identifier cannot be a constant or a variable");
}
emit(5, level - symbol_table[symbol_index].level, symbol_table[symbol_index].addr);
get_token();
} else if (current_token == beginsym) {
get_token();
statement(level);
while (current_token == semicolonsym) {
get_token();
statement(level);
}
if (current_token != endsym) {
error("Missing end");
}
get_token();
} else if (current_token == ifsym) {
get_token();
condition(level);
int jmp_instr_index = code_length;
emit(8, 0, 0);
if (current_token != thensym) {
error("Missing then");
}
get_token();
statement(level);
code[jmp_instr_index].m = code_length;
} else if (current_token == whilesym) {
int instr_index = code_length;
get_token();
condition(level);
int jmp_instr_index = code_length;
emit(8, 0, 0);
if (current_token != dosym) {
error("Missing do");
}
get_token();
statement(level);
emit(7, 0, instr_index);
code[jmp_instr_index].m = code_length;
} else if (current_token == readsym) {
get_token();
if (current_token != identsym) {
error("Missing identifier");
}
int symbol_index = find_symbol(current_ident);
if (symbol_index < 0) {
error("Undefined identifier");
}
if (symbol_table[symbol_index].kind != varsym) {
error("Identifier cannot be a constant or a procedure");
}
emit(9, 0, 2);
emit(4, level - symbol_table[symbol_index].level, symbol_table[symbol_index].addr);
get_token();
} else if (current_token == writesym) {
get_token();
expression(level);
emit(9, 0, 1);
}
}
void block(int level) {
int num_vars = 0;
while (current_token == constsym) {
get_token();
if (current_token != identsym) {
error("Missing identifier");
}
char ident[MAX_IDENT_LENGTH + 1];
strncpy(ident, current_ident, sizeof(ident));
get_token();
if (current_token != eqlsym) {
error("Missing equal sign");
}
get_token();
if (current_token != numbersym) {
error("Missing number");
}
enter_symbol(constsym, ident, current_number, level, 0);
num_vars++;
get_token();
}
while (current_token == varsym) {
get_token();
if (current_token != identsym) {
error("Missing identifier");
}
enter_symbol(varsym, current_ident, 0, level, num_vars + 3);
num_vars++;
get_token();
while (current_token == commasym) {
get_token();
if (current_token != identsym) {
error("Missing identifier");
}
enter_symbol(varsym, current_ident, 0, level, num_vars + 3);
num_vars++;
get_token();
}
if (current_token != semicolonsym) {
error("Missing semicolon");
}
get_token();
}
while (current_token == procsym) {
get_token();
if (current_token != identsym) {
error("Missing procedure identifier");
}
enter_symbol(procsym, current_ident, 0, level, code_length);
get_token();
if (current_token != semicolonsym) {
error("Missing semicolon");
}
get_token();
block(level + 1);
if (current_token != semicolonsym) {
error("Missing semicolon");
}
get_token();
}
emit(6, 0, num_vars + 3);
statement(level);
}
int main() {
fgets(input_buffer, sizeof(input_buffer), stdin);
get_char();
get_token();
block(0);
emit(9, 0, 3);
for (int i = 0; i < code_length; i++) {
printf("%d %d %d\n", code[i].op, code[i].l, code[i].m);
}
return 0;
}
```
该程序将从标准输入读取PL/0源代码,并将生成的目标代码写入标准输出。
阅读全文