自己编程实现对C语言的词法分析程序,加深对词法分析原理的理解。1、 C语言分为关键字、标识符、数字、运算符、界符五类。自己设置单词符号对应的种别码
时间: 2023-07-10 11:26:02 浏览: 119
好的,下面是一份简单的C语言词法分析程序示例,使用了C++语言编写,并设置了对应的种别码:
```cpp
#include <iostream>
#include <string>
#include <unordered_map>
#include <regex>
using namespace std;
// 定义单词种别码
enum Token {
ERROR = -1,
KEYWORD = 1,
IDENTIFIER = 2,
NUMBER = 3,
OPERATOR = 4,
DELIMITER = 5
};
// 定义关键字和对应的种别码
const unordered_map<string, int> keywords = {
{"auto", KEYWORD},
{"break", KEYWORD},
{"case", KEYWORD},
{"char", KEYWORD},
{"const", KEYWORD},
{"continue", KEYWORD},
{"default", KEYWORD},
{"do", KEYWORD},
{"double", KEYWORD},
{"else", KEYWORD},
{"enum", KEYWORD},
{"extern", KEYWORD},
{"float", KEYWORD},
{"for", KEYWORD},
{"goto", KEYWORD},
{"if", KEYWORD},
{"int", KEYWORD},
{"long", KEYWORD},
{"register", KEYWORD},
{"return", KEYWORD},
{"short", KEYWORD},
{"signed", KEYWORD},
{"sizeof", KEYWORD},
{"static", KEYWORD},
{"struct", KEYWORD},
{"switch", KEYWORD},
{"typedef", KEYWORD},
{"union", KEYWORD},
{"unsigned", KEYWORD},
{"void", KEYWORD},
{"volatile", KEYWORD},
{"while", KEYWORD}
};
// 定义运算符和对应的种别码
const unordered_map<string, int> operators = {
{"+", OPERATOR},
{"-", OPERATOR},
{"*", OPERATOR},
{"/", OPERATOR},
{"%", OPERATOR},
{"++", OPERATOR},
{"--", OPERATOR},
{"==", OPERATOR},
{"!=", OPERATOR},
{"<", OPERATOR},
{">", OPERATOR},
{"<=", OPERATOR},
{">=", OPERATOR},
{"&&", OPERATOR},
{"||", OPERATOR},
{"!", OPERATOR},
{"&", OPERATOR},
{"|", OPERATOR},
{"^", OPERATOR},
{"~", OPERATOR},
{"<<", OPERATOR},
{">>", OPERATOR},
{"=", OPERATOR},
{"+=", OPERATOR},
{"-=", OPERATOR},
{"*=", OPERATOR},
{"/=", OPERATOR},
{"%=", OPERATOR},
{"&=", OPERATOR},
{"|=", OPERATOR},
{"^=", OPERATOR},
{"<<=", OPERATOR},
{">>=", OPERATOR}
};
// 定义界符和对应的种别码
const unordered_map<string, int> delimiters = {
{"(", DELIMITER},
{")", DELIMITER},
{"[", DELIMITER},
{"]", DELIMITER},
{"{", DELIMITER},
{"}", DELIMITER},
{",", DELIMITER},
{";", DELIMITER},
{":", DELIMITER},
{"?", DELIMITER},
{"#", DELIMITER},
{"\\", DELIMITER}
};
// 判断是否为数字
bool isNumber(const string& str) {
regex reg("\\d+");
return regex_match(str, reg);
}
// 判断是否为标识符
bool isIdentifier(const string& str) {
regex reg("[a-zA-Z_]+[a-zA-Z0-9_]*");
return regex_match(str, reg);
}
// 分析单词
int analyzeToken(const string& word) {
if (keywords.count(word)) {
return keywords.at(word);
} else if (isNumber(word)) {
return NUMBER;
} else if (isIdentifier(word)) {
return IDENTIFIER;
} else if (operators.count(word)) {
return operators.at(word);
} else if (delimiters.count(word)) {
return delimiters.at(word);
} else {
return ERROR;
}
}
int main() {
string code = "int main() { int a = 1; int b = 2; int c = a + b; return c; }";
string word = "";
int token = 0;
int index = 0;
int lineNumber = 1;
while (index < code.size()) {
char c = code[index];
if (c == '\n') {
lineNumber++;
} else if (isspace(c)) {
// 忽略空格、制表符等空白字符
} else {
word += c;
auto nextToken = analyzeToken(word);
if (nextToken == ERROR) {
// 如果分析出错,输出错误信息
cout << "Error: invalid token \"" << word << "\" at line " << lineNumber << endl;
return 0;
} else if (nextToken != OPERATOR) {
// 如果遇到运算符,先输出前面的单词
if (token != 0) {
cout << "Token " << token << ": " << word.substr(0, word.size() - 1) << endl;
}
token = nextToken;
word = "";
}
}
index++;
}
// 输出最后一个单词
if (token != 0) {
cout << "Token " << token << ": " << word << endl;
} else {
auto finalToken = analyzeToken(word);
cout << "Token " << finalToken << ": " << word << endl;
}
return 0;
}
```
在上面的代码中,我们通过定义三个unordered_map来存储关键字、运算符和界符,并分别设置了对应的种别码。在analyzeToken函数中,我们根据输入的单词判断其种别,并返回对应的种别码。
在主函数中,我们依次读取源代码中的每个字符,并用isNumber和isIdentifier函数判断当前单词是否为数字或标识符。如果是,则直接进行下一个字符的读取;如果不是,则调用analyzeToken函数对其进行分析,并输出对应的种别码和单词内容。
需要注意的是,在遇到运算符时,我们先输出前面的单词,并将当前单词的种别码保存下来,以便后续输出。这是因为运算符可能会包含多个字符,如"++"和">=", 如果不先输出前面的单词,会导致输出结果出错。
最后,我们输出最后一个单词,并结束程序的执行。
阅读全文