Using Java to implement simple lexical analyzer example code

Author：Eve Cole Update Time：2025-06-08 14:48:02

First, let’s take a look at the code segment we want to analyze as follows:

The output result is as follows:

Output result (a).PNG

Output result (b).PNG

Output result (c).PNG

There is a binary expression in brackets: (word category encoding, word position number)

The code is as follows:

 package Yue.LexicalAnalyzer;import java.io.*;/* * Main program*/public class Main { public static void main(String[] args) throws IOException { Lexer lexer = new Lexer(); lexer.printToken(); lexer.printSymbolsTable(); }}

 package Yue.LexicalAnalyzer;import java.io.*;import java.util.*;/* * Lexical analysis and output */public class Lexer { /*Record line number*/ public static int line = 1; /*Storage the latest read characters*/ char character = ' '; /*Reserve words*/ Hashtable<String, KeyWord> keywords = new Hashtable<String, KeyWord>(); /*Token sequence*/ private ArrayList<Token> tokens = new ArrayList<Token>(); /*Symbol table*/ private ArrayList<Symbol> symtable = new ArrayList<Symbol>(); /*Read file variable*/ BufferedReader reader = null; /*Save whether the end of the file is currently read*/ private Boolean isEnd = false; /*Whether the end of the file is read*/ public Boolean getReaderState() { return this.isEnd; } /*Print tokens sequence*/ public void printToken() throws IOException { FileWriter writer = new FileWriter("E://lex.txt"); System.out.println("Lexical analysis results are as follows: "); System.out.print("Du Yue-2015220201031/r/n/n"); writer.write("Du Yue-2015220201031/r/n/r/n"); while (getReaderState() == false) { Token tok = scan(); String str = "line " + tok.line + "/t(" + tok.tag + "," + tok.pos + ")/t/t" + tok.name + ": " + tok.toString() + "/r/n"; writer.write(str); System.out.print(str); } writer.flush(); } /*Print symbol table*/ public void printSymbolsTable() throws IOException { FileWriter writer = new FileWriter("E://symtab1.txt"); System.out.print("/r/n/r/n symbol table/r/n"); System.out.print("number/t line number/t name/r/n"); writer.write("symbol table/r/n"); writer.write("number" + "/t line number" + "/t name/r/n"); Iterator<Symbol> e = symtable.iterator(); while (e.hasNext()) { Symbol symbol = e.next(); String desc = symbol.pos + "/t" + symbol.line + "/t" + symbol.toString(); System.out.print(desc + "/r/n"); writer.write(desc + "/r/n"); } writer.flush(); } /*Print error*/ public void printError(Token tok) throws IOException{ FileWriter writer = new FileWriter("E://error.txt"); System.out.print("/r/n/r/n Error lexicography is as follows: /r/n"); writer.write("Error lexicography is as follows: /r/n"); String str = "line " + tok.line + "/t(" + tok.tag + "," + tok.pos + ")/t/t" + tok.name + ": " + tok.toString() + "/r/n"; writer.write(str); } /*Add reserved words*/ void reserve(KeyWord w) { keywords.put(w.lexme, w); } public Lexer() { /*Initialize the read file variable*/ try { reader = new BufferedReader(new FileReader("E://Input.txt")); } catch (IOException e) { System.out.print(e); } /*Add reserved words*/ this.reserve(KeyWord.begin); this.reserve(KeyWord.end); this.reserve(KeyWord.integer); this.reserve(KeyWord.function); this.reserve(KeyWord.read); this.reserve(KeyWord.write); this.reserve(KeyWord.aIf); this.reserve(KeyWord.aThen); this.reserve(KeyWord.aElse); } /*Read by character*/ public void readch() throws IOException { character = (char) reader.read(); if ((int) character == 0xffff) { this.isEnd = true; } } /*Judge whether it matches*/ public Boolean readch(char ch) throws IOException { readch(); if (this.character != ch) { return false; } this.character = ' '; return true; } /*Recognition of numbers*/ public Boolean isDigit() throws IOException { if (Character.isDigit(character)) { int value = 0; while (Character.isDigit(character)) { value = 10 * value + Character.digit(character, 10); readch(); } Num n = new Num(value); n.line = line; tokens.add(n); return true; } else return false; } /*Recognition of reserved words and identifiers*/ public Boolean isLetter() throws IOException { if (Character.isLetter(character)) { StringBuffer sb = new StringBuffer(); /*First get the entire split*/ while (Character.isLetterOrDigit(character)) { sb.append(character); readch(); } /*Judge whether it is a reserved word or an identifier*/ String s = sb.toString(); KeyWord w = keywords.get(s); /*If it is a reserved word, w should not be empty*/ if (w != null) { w.line = line; tokens.add(w); } else { /* Otherwise it is an identifier, here there are additional statements that record the identifier number*/ Symbol sy = new Symbol(s); Symbol mark = sy; // Used to mark the existing identifier Boolean isRepeat = false; sy.line = line; for (Symbol i : symtable) { if (sy.toString().equals(i.toString())) { mark = i; isRepeat = true; } } if (!isRepeat) { sy.pos = symtable.size() + 1; symtable.add(sy); } else if (isRepeat) { sy.pos = mark.pos; } tokens.add(sy); } return true; } else return false; } /*Symbol recognition*/ public Boolean isSign() throws IOException { switch (character) { case '#': readch(); AllEnd.allEnd.line = line; tokens.add(AllEnd.allEnd); return true; case '/r': if (readch('/n')) { readch(); LineEnd.lineEnd.line = line; tokens.add(LineEnd.lineEnd); line++; return true; } case '(': readch(); Delimiter.lpar.line = line; tokens.add(Delimiter.lpar); return true; case ')': readch(); Delimiter.rpar.line = line; tokens.add(Delimiter.rpar); return true; case ';': readch(); Delimiter.sem.line = line; tokens.add(Delimiter.sem); return true; case '+': readch(); CalcWord.add.line = line; tokens.add(CalcWord.add); return true; case '-': readch(); CalcWord.sub.line = line; tokens.add(CalcWord.sub); return true; case '*': readch(); CalcWord.mul.line = line; tokens.add(CalcWord.mul); return true; case '/': readch(); CalcWord.div.line = line; tokens.add(CalcWord.div); return true; case ':': if (readch('=')) { readch(); CalcWord.assign.line = line; tokens.add(CalcWord.assign); return true; } break; case '>': if (readch('=')) { readch(); CalcWord.ge.line = line; tokens.add(CalcWord.ge); return true; } break; case '<': if (readch('=')) { readch(); CalcWord.le.line = line; tokens.add(CalcWord.le); return true; } break; case '!': if (readch('=')) { readch(); CalcWord.ne.line = line; tokens.add(CalcWord.ne); return true; } break; } return false; } /*The following starts to split keywords, identifiers and other information*/ public Token scan() throws IOException { Token tok; while (character == ' ') readch(); if (isDigit() || isSign() || isLetter()) { tok = tokens.get(tokens.size() - 1); } else { tok = new Token(character); printError(tok); } return tok; }}

 package Yue.LexicalAnalyzer;/* * Token parent class */public class Token { public final int tag; public int line = 1; public String name = ""; public int pos = 0; public Token(int t) { this.tag = t; } public String toString() { return "" + (char) tag; }}

 package Yue.LexicalAnalyzer;/* * Word category assignment*/public class Tag { public final static int BEGIN = 1, //Reserved word END = 2, //Reserved word INTEGER = 3, //Reserved word FUNCTION = 4, //Reserved word READ = 5, //Reserved word WRITE = 6, //Reserved word IF = 7, //Reserved word THEN = 8, //Reserved word ELSE = 9, //Reserved word SYMBOL = 11, //Identifier CONSTANT = 12, //Constant ADD = 13, //Operator "+" SUB = 14, //Operator "-" MUL = 15, //Operator "*" DIV = 16, //Operator "/" LE = 18, //Operator "<=" GE = 19, //Operator">=" NE = 20, //Operator"!=" ASSIGN = 23, //Operator":=" LPAR = 24, //Operator"(" RPAR = 25, //Operator")" SEM = 26, //Operator";" LINE_END = 27, //Operator ALL_END = 28; //Operator "#"}

 package Yue.LexicalAnalyzer;/** * Reserved words*/public class KeyWord extends Token { public String lexme = ""; public KeyWord(String s, int t) { super(t); this.lexme = s; this.name = "Reserved words"; } public String toString() { return this.lexme; } public static final KeyWord begin = new KeyWord("begin", Tag.BEGIN), end = new KeyWord("end", Tag.END), integer = new KeyWord("integer", Tag.INTEGER), function = new KeyWord("function", Tag.FUNCTION), read = new KeyWord("read", Tag.READ), write = new KeyWord("write", Tag.WRITE), aIf = new KeyWord("if", Tag.IF), aThen = new KeyWord("then", Tag.THEN), aElse = new KeyWord("else", Tag.ELSE);}

 package Yue.LexicalAnalyzer;/* * Identifier*/public class Symbol extends Token { public String lexme = ""; public Symbol(String s) { super(Tag.SYMBOL); this.lexme = s; this.name = "Identifier"; } public String toString() { return this.lexme; }}

 package Yue.LexicalAnalyzer;/** * operator*/public class CalcWord extends Token { public String lexme = ""; public CalcWord(String s, int t) { super(t); this.lexme = s; this.name = "operator"; } public String toString() { return this.lexme; } public static final CalcWord add = new CalcWord("+", Tag.ADD), sub = new CalcWord("-", Tag.SUB), mul = new CalcWord("*", Tag.MUL), div = new CalcWord("/", Tag.DIV), le = new CalcWord("<=", Tag.LE), ge = new CalcWord(">=", Tag.GE), ne = new CalcWord("!=", Tag.NE), assign = new CalcWord(":=", Tag.ASSIGN);}

 package Yue.LexicalAnalyzer;/** * bounding symbol*/public class Delimiter extends Token { public String lexme = ""; public Delimiter(String s, int t) { super(t); this.lexme = s; this.name = " bounding symbol"; } public String toString() { return this.lexme; } public static final Delimiter lpar = new Delimiter("(", Tag.LPAR), rpar = new Delimiter(")", Tag.RPAR), sem = new Delimiter(";", Tag.SEM);}

 package Yue.LexicalAnalyzer;/* * Constant*/public class Num extends Token { public final int value; public Num(int v) { super(Tag.CONSTANT); this.value = v; this.name = "constant"; } public String toString() { return "" + value; }}

 package Yue.LexicalAnalyzer;/** * end of line characters*/public class LineEnd extends Token { public String lexme = ""; public LineEnd(String s) { super(Tag.LINE_END); this.lexme = s; this.name = "end of line characters"; } public String toString() { return this.lexme; } public static final LineEnd lineEnd = new LineEnd("/r/n");}

 package Yue.LexicalAnalyzer;/** * Ending character*/public class AllEnd extends Token { public String lexme = ""; public AllEnd(String s) { super(Tag.ALL_END); this.lexme = s; this.name = "end character"; } public String toString() { return this.lexme; } public static final AllEnd allEnd = new AllEnd("#");}

Summarize

I’m going to sleep with the entire content of this article. I hope the content of this article will be of some help to your study or work. If you have any questions, you can leave a message to communicate.