帮我完善其中lex.mian.cpp代码,以满足word文件内容要求
时间: 2024-10-22 13:10:36 浏览: 16
好的,根据您提供的文档内容,我将帮助您完善 `lex_main.cpp` 文件中的代码,使其满足实验要求。以下是具体的实现方案:
### lex_main.cpp
```cpp
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <unordered_map>
#include <cctype>
using namespace std;
const string KEYWORDS[] = {"if", "else", "while", "for", "int", "float", "char", "void"};
const int NUM_KEYWORDS = sizeof(KEYWORDS) / sizeof(KEYWORDS[0]);
enum TokenType {
IDENTIFIER, INTEGER, FLOAT, OPERATOR, DELIMITER, COMMENT, ERROR, EOF_TOKEN
};
struct Token {
TokenType type;
string value;
int line;
int column;
};
class Lexer {
private:
ifstream file;
string filename;
string content;
vector<Token> tokens;
vector<pair<int, int>> errorPositions;
int line = 1;
int column = 1;
public:
Lexer(const string& filename) : filename(filename) {
file.open(filename);
if (!file.is_open()) {
cerr << "无法打开文件: " << filename << endl;
exit(1);
}
getline(file, content, '\0');
file.close();
}
void removeComments() {
string newContent;
bool inBlockComment = false;
for (size_t i = 0; i < content.size(); ++i) {
char c = content[i];
if (inBlockComment) {
if (content.substr(i, 2) == "*/") {
inBlockComment = false;
i++;
}
} else if (content.substr(i, 2) == "/*") {
inBlockComment = true;
errorPositions.push_back({line, column});
i++;
} else if (c == '/') {
if (i + 1 < content.size() && content[i + 1] == '/') {
while (i < content.size() && content[i] != '\n') {
i++;
}
errorPositions.push_back({line, column});
} else {
newContent += c;
column++;
}
} else if (isspace(c)) {
if (c == '\n') {
line++;
column = 1;
} else if (c != ' ') {
continue;
}
newContent += c;
column++;
} else {
newContent += c;
column++;
}
}
content = newContent;
}
Token getNextToken() {
skipWhitespace();
if (isalpha(content[column - 1]) || content[column - 1] == '_') {
return getIdentifierOrKeyword();
} else if (isdigit(content[column - 1])) {
return getNumber();
} else if (isOperator(content[column - 1])) {
return getOperator();
} else if (isDelimiter(content[column - 1])) {
return getDelimiter();
} else {
return {ERROR, string(1, content[column - 1]), line, column};
}
}
void tokenize() {
while (column <= content.size()) {
Token token = getNextToken();
if (token.type != ERROR) {
tokens.push_back(token);
} else {
errorPositions.push_back({token.line, token.column});
}
column++;
}
}
void printTokens() {
ofstream outFile("lex_out.txt");
for (const auto& token : tokens) {
outFile << token.value << " (" << token.type << ") at line " << token.line << ", column " << token.column << endl;
}
outFile << "Errors:" << endl;
for (const auto& pos : errorPositions) {
outFile << "Error at line " << pos.first << ", column " << pos.second << endl;
}
outFile.close();
}
private:
void skipWhitespace() {
while (column <= content.size() && isspace(content[column - 1])) {
if (content[column - 1] == '\n') {
line++;
column = 1;
} else {
column++;
}
}
}
Token getIdentifierOrKeyword() {
size_t start = column - 1;
while (column <= content.size() && (isalnum(content[column - 1]) || content[column - 1] == '_')) {
column++;
}
string identifier = content.substr(start, column - start);
for (int i = 0; i < NUM_KEYWORDS; ++i) {
if (identifier == KEYWORDS[i]) {
return {IDENTIFIER, identifier, line, column};
}
}
return {IDENTIFIER, identifier, line, column};
}
Token getNumber() {
size_t start = column - 1;
bool isFloat = false;
if (content[start] == '0' && content[start + 1] == 'x') {
start += 2;
while (column <= content.size() && isxdigit(content[column - 1])) {
column++;
}
return {INTEGER, content.substr(start - 2, column - start + 2), line, column};
}
while (column <= content.size() && (isdigit(content[column - 1]) || content[column - 1] == '.')) {
if (content[column - 1] == '.') {
isFloat = true;
}
column++;
}
return isFloat ? FLOAT : INTEGER;
}
Token getOperator() {
size_t start = column - 1;
while (column <= content.size() && isOperator(content[column - 1])) {
column++;
}
return {OPERATOR, content.substr(start, column - start), line, column};
}
Token getDelimiter() {
return {DELIMITER, string(1, content[column - 1]), line, column};
}
bool isOperator(char c) {
static const string operators = "+-*/%<>=!&|^";
return operators.find(c) != string::npos;
}
bool isDelimiter(char c) {
static const string delimiters = "{}()[],;";
return delimiters.find(c) != string::npos;
}
};
int main() {
Lexer lexer("testHard.c");
lexer.removeComments();
lexer.tokenize();
lexer.printTokens();
return 0;
}
```
### 说明
1. **去除注释**:`removeComments` 方法负责去除单行和多行注释,并记录注释开始的位置。
2. **词法分析**:`getNextToken` 方法根据当前字符判断并提取下一个词法单元(token),并将其类型和值记录下来。
3. **错误处理**:如果遇到非法字符或其他错误,记录错误位置。
4. **输出结果**:`printTokens` 方法将所有的词法单元和错误信息输出到 `lex_out.txt` 文件中。
希望这段代码能够满足您的需求。如果有任何问题或需要进一步的帮助,请随时告诉我!
阅读全文