基于c++的词法分析器
时间: 2023-08-18 11:03:53 浏览: 35
在C++中实现词法分析器可以使用正则表达式或有限状态自动机(DFA)算法。下面是一个使用DFA算法的代码示例:
```cpp
#include <iostream>
#include <string>
#include <vector>
#include <map>
using namespace std;
enum TokenType {
INT,
FLOAT,
IDENTIFIER,
OPERATOR,
DELIMITER,
KEYWORD,
ERROR
};
struct Token {
TokenType type;
string value;
};
class Lexer {
public:
Lexer(string input) : input(input) {}
vector<Token> tokenize() {
vector<Token> tokens;
int i = 0;
while (i < input.length()) {
if (input[i] == ' ' || input[i] == '\n' || input[i] == '\r' || input[i] == '\t') {
// Skip whitespace characters
i++;
} else if (isdigit(input[i])) {
// Parse integer or float
string value = "";
bool is_float = false;
while (i < input.length() && (isdigit(input[i]) || input[i] == '.')) {
if (input[i] == '.') {
if (is_float) {
break;
} else {
is_float = true;
}
}
value += input[i];
i++;
}
tokens.push_back({is_float ? FLOAT : INT, value});
} else if (isalpha(input[i]) || input[i] == '_') {
// Parse identifier or keyword
string value = "";
while (i < input.length() && (isalnum(input[i]) || input[i] == '_')) {
value += input[i];
i++;
}
if (keywords.count(value)) {
tokens.push_back({KEYWORD, value});
} else {
tokens.push_back({IDENTIFIER, value});
}
} else if (operators.count(input[i]) || delimiters.count(input[i])) {
// Parse operator or delimiter
string value = "";
value += input[i];
i++;
if (i < input.length() && operators.count(value + input[i])) {
value += input[i];
i++;
}
tokens.push_back({OPERATOR, value});
} else {
// Unknown token
string value = "";
value += input[i];
tokens.push_back({ERROR, value});
i++;
}
}
return tokens;
}
private:
string input;
map<string, TokenType> keywords = {
{"if", KEYWORD},
{"else", KEYWORD},
{"while", KEYWORD},
{"for", KEYWORD},
{"int", KEYWORD},
{"float", KEYWORD},
{"void", KEYWORD},
{"return", KEYWORD}
};
map<string, TokenType> operators = {
{"+", OPERATOR},
{"-", OPERATOR},
{"*", OPERATOR},
{"/", OPERATOR},
{"=", OPERATOR},
{"<", OPERATOR},
{">", OPERATOR},
{"==", OPERATOR},
{"!=", OPERATOR},
{"<=", OPERATOR},
{">=", OPERATOR},
{"&&", OPERATOR},
{"||", OPERATOR}
};
map<char, TokenType> delimiters = {
{'(', DELIMITER},
{')', DELIMITER},
{'{', DELIMITER},
{'}', DELIMITER},
{'[', DELIMITER},
{']', DELIMITER},
{',', DELIMITER},
{';', DELIMITER}
};
};
int main() {
string input = "int main() {\n\tfloat x = 3.14;\n\tfor (int i = 0; i < 10; i++) {\n\t\tcout << i * x << endl;\n\t}\n\treturn 0;\n}";
Lexer lexer(input);
vector<Token> tokens = lexer.tokenize();
for (Token token : tokens) {
cout << token.type << " " << token.value << endl;
}
return 0;
}
```
在这个例子中,我们使用DFA算法来逐个字符读取输入并识别令牌。我们将输入分为五类令牌:整数,浮点数,标识符,运算符和分隔符。我们使用C++的STL库中的映射(map)来存储关键字,运算符和分隔符,并使用特定的枚举类型来表示令牌类型。在识别令牌时,我们使用循环并检查当前字符是否符合预期模式。如果当前字符符合预期模式,则我们将继续读取输入并构造令牌。如果当前字符不符合预期模式,则我们将构造一个未知令牌并继续读取输入。最后,我们返回所有构造的令牌。