You are to write a lexical analyzer/scanner for the language decaf in java. Goals: 1. The input of the scanner is a source code file and the output of the scanner is a stream of tokens. 2. Your scanner should go for longest possible match i.e. a string ‘:=’is to be identified as ‘ass-symbol’ and not as ‘:’and ‘=’. 3. Token is represented as (Kind, Value). We use the following symbols to denote different kinds of tokens KEY denotes reserved words SYM denotes special symbols ID denotes identifiers NUM denotes numeric constants STR denotes string constants 4. Check lexical errors: giving meaning error messages and the lines where errors occur. The kinds of lexical errors are: - Illegal character, that is, scanner may recognize a character that is not in the alphabet of TINY+ - The right bracket of a STRING is lost - The right delimiter of a comment is lost
时间: 2023-12-03 14:43:01 浏览: 201
DFA.rar_NFA DFA_fa_java DFA_lexical nfa to dfa_nfa to dfa.cpp
Sure, I can help you with that! Here's a sample code for a lexical analyzer/scanner for the decaf language in Java:
```
import java.io.*;
import java.util.*;
public class Scanner {
private BufferedReader br;
private char currentChar;
private int currentLineNum;
private String currentTokenValue;
// reserved words
private static final Map<String, Token.Kind> reservedWords = new HashMap<>();
static {
reservedWords.put("class", Token.Kind.CLASS);
reservedWords.put("extends", Token.Kind.EXTENDS);
reservedWords.put("static", Token.Kind.STATIC);
reservedWords.put("void", Token.Kind.VOID);
reservedWords.put("main", Token.Kind.MAIN);
reservedWords.put("public", Token.Kind.PUBLIC);
reservedWords.put("private", Token.Kind.PRIVATE);
reservedWords.put("int", Token.Kind.INT);
reservedWords.put("boolean", Token.Kind.BOOLEAN);
reservedWords.put("if", Token.Kind.IF);
reservedWords.put("else", Token.Kind.ELSE);
reservedWords.put("while", Token.Kind.WHILE);
reservedWords.put("return", Token.Kind.RETURN);
reservedWords.put("true", Token.Kind.TRUE);
reservedWords.put("false", Token.Kind.FALSE);
reservedWords.put("this", Token.Kind.THIS);
reservedWords.put("new", Token.Kind.NEW);
}
// special symbols
private static final Map<String, Token.Kind> specialSymbols = new HashMap<>();
static {
specialSymbols.put("(", Token.Kind.LPAREN);
specialSymbols.put(")", Token.Kind.RPAREN);
specialSymbols.put("{", Token.Kind.LBRACE);
specialSymbols.put("}", Token.Kind.RBRACE);
specialSymbols.put("[", Token.Kind.LBRACKET);
specialSymbols.put("]", Token.Kind.RBRACKET);
specialSymbols.put(".", Token.Kind.DOT);
specialSymbols.put(",", Token.Kind.COMMA);
specialSymbols.put(";", Token.Kind.SEMICOLON);
specialSymbols.put("=", Token.Kind.ASSIGN);
specialSymbols.put("!", Token.Kind.NOT);
specialSymbols.put("&", Token.Kind.AND);
specialSymbols.put("|", Token.Kind.OR);
specialSymbols.put("<", Token.Kind.LT);
specialSymbols.put(">", Token.Kind.GT);
specialSymbols.put("==", Token.Kind.EQUAL);
specialSymbols.put("!=", Token.Kind.NOTEQUAL);
specialSymbols.put("<=", Token.Kind.LE);
specialSymbols.put(">=", Token.Kind.GE);
specialSymbols.put("+", Token.Kind.ADD);
specialSymbols.put("-", Token.Kind.SUB);
specialSymbols.put("*", Token.Kind.MUL);
specialSymbols.put("/", Token.Kind.DIV);
specialSymbols.put("%", Token.Kind.MOD);
}
public Scanner(String filename) throws IOException {
br = new BufferedReader(new FileReader(filename));
currentLineNum = 1;
currentChar = (char) br.read();
}
private void getNextChar() throws IOException {
if (currentChar == '\n') {
currentLineNum++;
}
currentChar = (char) br.read();
}
private boolean isWhitespace(char c) {
return c == ' ' || c == '\t' || c == '\r' || c == '\n';
}
private boolean isDigit(char c) {
return c >= '0' && c <= '9';
}
private boolean isLetter(char c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
private boolean isLegalCharacter(char c) {
return isWhitespace(c) || isDigit(c) || isLetter(c) || specialSymbols.containsKey(Character.toString(c));
}
private void skipWhitespace() throws IOException {
while (isWhitespace(currentChar)) {
getNextChar();
}
}
private Token scanIdentifierOrKeyword() throws IOException {
StringBuilder sb = new StringBuilder();
while (isLetter(currentChar) || isDigit(currentChar) || currentChar == '_') {
sb.append(currentChar);
getNextChar();
}
String tokenValue = sb.toString();
Token.Kind kind = reservedWords.getOrDefault(tokenValue, Token.Kind.ID);
return new Token(kind, tokenValue, currentLineNum);
}
private Token scanNumber() throws IOException {
StringBuilder sb = new StringBuilder();
while (isDigit(currentChar)) {
sb.append(currentChar);
getNextChar();
}
String tokenValue = sb.toString();
return new Token(Token.Kind.NUM, tokenValue, currentLineNum);
}
private Token scanString() throws IOException {
StringBuilder sb = new StringBuilder();
getNextChar();
while (currentChar != '"') {
if (currentChar == '\n' || currentChar == -1) {
throw new LexicalException("Unterminated string", currentLineNum);
}
sb.append(currentChar);
getNextChar();
}
getNextChar();
return new Token(Token.Kind.STR, sb.toString(), currentLineNum);
}
private Token scanComment() throws IOException {
getNextChar();
getNextChar();
while (!(currentChar == '*' && br.read() == '/')) {
if (currentChar == '\n') {
currentLineNum++;
}
getNextChar();
}
getNextChar();
return getNextToken();
}
private Token scanSpecialSymbol() throws IOException {
StringBuilder sb = new StringBuilder();
while (specialSymbols.containsKey(sb.toString() + currentChar)) {
sb.append(currentChar);
getNextChar();
}
String tokenValue = sb.toString();
Token.Kind kind = specialSymbols.get(tokenValue);
if (kind == null) {
throw new LexicalException("Illegal character: " + tokenValue, currentLineNum);
}
return new Token(kind, tokenValue, currentLineNum);
}
public Token getNextToken() throws IOException {
while (isWhitespace(currentChar)) {
skipWhitespace();
}
if (currentChar == -1) {
return new Token(Token.Kind.EOF, "", currentLineNum);
}
if (!isLegalCharacter(currentChar)) {
throw new LexicalException("Illegal character: " + currentChar, currentLineNum);
}
if (currentChar == '/') {
getNextChar();
if (currentChar == '/') {
while (currentChar != '\n' && currentChar != -1) {
getNextChar();
}
return getNextToken();
} else if (currentChar == '*') {
return scanComment();
} else {
return new Token(Token.Kind.DIV, "/", currentLineNum);
}
}
if (isLetter(currentChar)) {
return scanIdentifierOrKeyword();
}
if (isDigit(currentChar)) {
return scanNumber();
}
if (currentChar == '"') {
return scanString();
}
if (specialSymbols.containsKey(Character.toString(currentChar))) {
return scanSpecialSymbol();
}
throw new LexicalException("Illegal character: " + currentChar, currentLineNum);
}
public static class LexicalException extends RuntimeException {
private int lineNum;
public LexicalException(String message, int lineNum) {
super(message);
this.lineNum = lineNum;
}
public int getLineNum() {
return lineNum;
}
}
}
```
This code reads in a source code file and returns a stream of tokens. It uses a map to store the reserved words and special symbols, and uses a series of `if` statements to determine what kind of token it is dealing with. It also handles lexical errors by throwing a `LexicalException` with a meaningful error message and the line number where the error occurred.
阅读全文