Lexical Analyzer Generator

　　This program generates a random Regular Expression in the form of Binary Tree, and repeatedly checks whether an input string match that Regex ("$" as the end of input):

　　(1) Use McNaughton-Yamada-Thompson (MYT) Construction to generate the NFA of the given regex;

　　(2) Use the Subset Construction to generate the DFA from the given NFA;

　　(3) Implement the DFA to check whether an input string match the regex.

  1 import java.util.*;
  2 import java.io.*;
  3
  4 class State {
  5     public boolean ac;
  6     public Edge next;
  7 }
  8
  9 class Edge {
 10     public static Set<Character> digit;
 11     public static Set<Character> letter;
 12     public static Set<Character> binop;
 13     public static Set<Character> uniop;
 14     public static Set<Character> or;
 15     public static Set<Character> plus;
 16     public static Set<Character> star;
 17     static {
 18         digit = new HashSet<Character>();
 19         for (int i=0;i<10;i++) {
 20             digit.add(new Character((char)(‘0‘+i)));
 21         }
 22         letter = new HashSet<Character>();
 23         for (int i=0;i<26;i++) {
 24             letter.add(new Character((char)(‘a‘+i)));
 25             letter.add(new Character((char)(‘A‘+i)));
 26         }
 27         binop = new HashSet<Character>();
 28         binop.add(new Character(‘|‘));
 29         binop.add(new Character(‘^‘));
 30         binop.add(new Character(‘&‘));
 31         binop.add(new Character(‘+‘));
 32         binop.add(new Character(‘-‘));
 33         binop.add(new Character(‘*‘));
 34         binop.add(new Character(‘/‘));
 35         binop.add(new Character(‘%‘));
 36         binop.add(new Character(‘<‘));
 37         binop.add(new Character(‘>‘));
 38         binop.add(new Character(‘=‘));
 39         uniop = new HashSet<Character>();
 40         uniop.add(new Character(‘~‘));
 41         uniop.add(new Character(‘!‘));
 42         uniop.add(new Character(‘-‘));
 43         // or, plus and star are just sentinels
 44         or = new HashSet<Character>();
 45         plus = new HashSet<Character>();
 46         star = new HashSet<Character>();
 47     }
 48
 49     public Set<Character> data;
 50     public Edge next;
 51     public int end;
 52 }
 53
 54 class Node {
 55     public Set<Character> data;
 56     public Node left, right;
 57
 58     public Node() {
 59         int rand = (new Random()).nextInt(16);
 60         if (rand<2) {
 61             data = Edge.digit;
 62         } else if (rand<5) {
 63             data = Edge.letter;
 64         } else if (rand<7) {
 65             data = Edge.binop;
 66         } else if (rand<8) {
 67             data = Edge.uniop;
 68         } else if (rand<11) {
 69             data = Edge.or;
 70             left = new Node();
 71             right = new Node();
 72         } else if (rand<14) {
 73             data = Edge.plus;
 74             left = new Node();
 75             right = new Node();
 76         } else {
 77             data = Edge.star;
 78             left = new Node();
 79         }
 80     }
 81     public String toString() {
 82         if (data==Edge.digit) {
 83             return "DIGIT";
 84         } else if (data==Edge.letter) {
 85             return "LETTER";
 86         } else if (data==Edge.uniop) {
 87             return "UNIOP";
 88         } else if (data==Edge.binop) {
 89             return "BINOP";
 90         }
 91         String leftStr="", rightStr="";
 92         if (left!=null) {
 93             leftStr = "("+left.toString()+")";
 94         }
 95         if (right!=null) {
 96             rightStr = "("+right.toString()+")";
 97         }
 98         if (data==Edge.or) {
 99             return leftStr+"|"+rightStr;
100         } else if (data==Edge.star) {
101             return leftStr+"*";
102         } else {
103             return leftStr+rightStr;
104         }
105     }
106 }
107
108 class NFA {
109     private List<State> state;
110
111     public NFA(Node root) {
112         state = new LinkedList<State>();
113         state.add(new State());    // start
114         state.add(new State());    // final
115         state.get(1).ac = true;
116         preOrder(root,0,1);
117     }
118     private void preOrder(Node sub,int from,int to) {
119         // MYT Construction: regex -> NFA
120         if (sub.data==Edge.or) {
121             preOrder(sub.left,from,to);
122             preOrder(sub.right,from,to);
123         } else if (sub.data==Edge.plus) {
124             int mid = insertState();
125             preOrder(sub.left,from,mid);
126             preOrder(sub.right,mid,to);
127         } else if (sub.data==Edge.star) {
128             int a = insertState();
129             int b = insertState();
130             insertEdge(from,null,a);
131             insertEdge(b,null,a);
132             insertEdge(b,null,to);
133             insertEdge(from,null,to);
134             preOrder(sub.left,a,b);
135         } else {
136             insertEdge(from,sub.data,to);
137         }
138     }
139     private int insertState() {
140         // Insert a new state and return its index:
141         state.add(new State());
142         return state.size()-1;
143     }
144     private void insertEdge(int idx,Set<Character> data,int end) {
145         // Insert an Edge: NTran[idx,end] = data
146         Edge e = new Edge();
147         e.data = data;
148         e.next = state.get(idx).next;
149         e.end = end;
150         state.get(idx).next = e;
151     }
152     public List<State> getStates() {
153         return state;
154     }
155 }
156
157 class DFA {
158     private List<State> dState;
159     private List<State> nState;
160     private List<Set<State>> nSet;
161     private Queue<Integer> q;
162     private List<Boolean> vis;
163
164     public DFA(Node root) {
165         nState = (new NFA(root)).getStates();
166         dState = new LinkedList<State>();
167         nSet = new LinkedList<Set<State>>();
168         q = new LinkedList<Integer>();
169         vis = new LinkedList<Boolean>();
170         bfs();
171     }
172     private void bfs() {
173         // Create the first dState:
174         dState.add(new State());
175         nSet.add(new HashSet<State>());
176         addClosure(nState.get(0),nSet.get(0));
177         // Breadth-First Search:
178         q.add(new Integer(0));
179         vis.add(new Boolean(false));
180         while (!q.isEmpty()) {
181             int idx = q.poll().intValue();
182             if (!vis.get(idx)) {
183                 vis.set(idx, new Boolean(true));
184                 addTrans(idx,Edge.digit);
185                 addTrans(idx,Edge.letter);
186                 addTrans(idx,Edge.binop);
187                 addTrans(idx,Edge.uniop);
188             }
189         }
190     }
191     private void addTrans(int idx,Set<Character> data) {
192         // Add to set all nStates that can be accessed from start through data:
193         Set<State> set = new HashSet<State>();
194         for (State nstate:nSet.get(idx)) {
195             Edge itr = nstate.next;
196             while (itr!=null) {
197                 if (itr.data==data) {
198                     addClosure(nState.get(itr.end),set);
199                 }
200                 itr = itr.next;
201             }
202         }
203         if (!set.isEmpty()) {
204             for (int i=0;i<nSet.size();i++) {
205                 // If set exists in nSet, only insert an edge:
206                 if (nSet.get(i).equals(set)) {
207                     insertEdge(idx,data,i);
208                     return;
209                 }
210             }
211             insertEdge(idx,data,dState.size());
212             dState.add(new State());
213             nSet.add(set);
214             for (State itr:set) {
215                 // Mark on accepting states:
216                 if (itr.ac) {
217                     dState.get(dState.size()-1).ac = true;
218                     break;
219                 }
220             }
221             q.add(new Integer(dState.size()-1));
222             vis.add(new Boolean(false));
223         }
224     }
225     private void addClosure(State start,Set<State> set) {
226         // Add the Epsilon-Closure of nState.get(start) to set:
227         set.add(start);
228         Edge itr = start.next;
229         while (itr!=null) {
230             if (itr.data==null && !set.contains(nState.get(itr.end))) {
231                 addClosure(nState.get(itr.end),set);
232             }
233             itr = itr.next;
234         }
235     }
236     private void insertEdge(int idx,Set<Character> data,int end) {
237         // Insert an Edge: DTran[idx,end] = data
238         Edge e = new Edge();
239         e.data = data;
240         e.next = dState.get(idx).next;
241         e.end = end;
242         dState.get(idx).next = e;
243     }
244     public boolean test(int pos,String expr,int i) {
245         // Test expr.char(i) at dState.get(pos):
246         if (i==expr.length()) {
247             return dState.get(pos).ac;
248         }
249         Edge itr = dState.get(pos).next;
250         while (itr!=null) {
251             if (itr.data.contains(new Character(expr.charAt(i)))) {
252                 return test(itr.end,expr,i+1);
253             }
254             itr = itr.next;
255         }
256         return false;
257     }
258 }
259
260 class Regex {
261     private Node root;
262     private DFA dfa;
263
264     public Regex() {
265         root = new Node();
266         dfa = new DFA(root);
267     }
268     public String toString() {
269         return root.toString();
270     }
271     public boolean test(String expr) {
272         return dfa.test(0,expr,0);
273     }
274 }
275
276 public class LexicalAnalyzer {
277
278     public static void main(String[] args) {
279         Regex regex = new Regex();
280         System.out.println("Regex:\t"+regex);
281         Scanner in = new Scanner(System.in);
282         System.out.print("Test Expressions:\n\t");
283         String expr = in.nextLine();
284         while (!expr.equals("$")) {
285             if (regex.test(expr)) {
286                 System.out.print("\tACCEPTED\n\t");
287             } else {
288                 System.out.print("\tWRONG\n\t");
289             }
290             expr = in.nextLine();
291         }
292         in.close();
293     }
294 }

时间： 2024-10-07 13:34:48

Lexical Analyzer Generator的相关文章

Stanford CS1 Compilers PA2J notes

[题记·碎碎念] 斯坦福这门Compilers原本放在Coursera上,当年错过档期很是可惜,后来发现他们自己的MOOC站放了一个self paced版本,真是普大喜奔,趁着放假有空学学. 这门课的分量可以很轻,亦可很重.self-paced版本如果你只是看看视频,做做quiz,那么很简单,根据课程介绍,一个星期3小时就够了:而如果你要做programming assignments,老师给“有经验的”程序员预估6-10个小时/周的工作量.其实Coursera版写的是8-10 hours/we

Ubuntu 安装软件和centos 对比命令

之前都是使用Redhat 或者Centos 等rpm的linux ,开始使用ubuntu 很不习惯 1. 安装命令Centos : yum install httpd Ubuntu: sudo apt-get install apache2 或者sudo aptitude install apache2 yum list yum search

基于PLY的STL文件基本信息统计方法

1.摘要 STL文件是快速成型设备中常用的文件格式,随着3D打印技术的发展,STL格式文件的应用日益广泛.Python语言具有丰富和强大的类库,其语言简洁而清晰,因而Python语言越来越受欢迎.PLY是具名的LEX.YACC的Python版本,本文介绍了利用PLY模块对STL文件进行基本信息统计. KeyWord:PLY.语法分析.词法分析.STL 2.简介计算机语言是计算机软件的系统的基础,经过几十年的发展,实现了从低级语言到高级语言的转变.原始的机器语言,到低级的汇编语言,它们虽然可以运

Erlang 设计原则 process port io

Erlang原理 (转载自ITEYE cryolite博客 ps:精彩)by Robert Virding This is a description of some of the basic properties and features of Erlang and an attempt to describe the rationale behind them. Erlang grew as we better understood the original problem we were

C# 词法分析器（一）词法分析介绍

系列导航 (一)词法分析介绍 (二)输入缓冲和代码定位 (三)正则表达式 (四)构造 NFA (五)转换 DFA (六)构造词法分析器 (七)总结虽然文章的标题是词法分析,但首先还是要从编译原理说开来.编译原理应该很多人都听说过,虽然不一定会有多么了解. 简单的说,编译原理就是研究如何进行编译——也就如何从代码(*.cs 文件)转换为计算机可以执行的程序(*.exe 文件).当然也有些语言如 JavaScript 是解释执行的,它的代码是直接被执行的,不需要生成可执行程序. 编译过程是很复杂的

cs262 Programming Languages（2）Lexical Analysis

这一讲重要的内容从13-Specifying Tokens开始.但是一开始就出现了这么个东西: def t_RANGLES(token) r'>' return token 前面完全没提怎么来的,看着有点迷糊,特别是r'>'这个,这是什么语法,于是第一次就放弃了.后来知道是在用PLY这个库,也看到文档中是这么说的: When a function is used, the regular expression rule is specified in the function document

Lexical Analysis & State Diagrams

Assignment 2: Lexical Analysis & State DiagramsLexical Analysis (Overview):Lexical analysis is the process of converting a sequence of characters from source program into a sequence of tokens. A lexical analyzer is a pattern matcher for character str

centeros-ns3/Redhead

Installation From Nsnam Jump to: navigation, search Main Page - Current Development - Developer FAQ - Tools - Related Projects - Project Ideas - Summer Projects Installation - Troubleshooting - User FAQ - HOWTOs - Samples - Models - Education - Con

语法创建示例之——CREATE PROCEDURE

1.背景知识 1) flex: The Fast Lexical Analyzer 2) Bison: A general-purpose parser generator 3) C语言前二者请自行查阅文档吧,中文资料也很多,OSChina上搜索就可以看到它们的介绍 2.过程首先,有些语言是区分函数与过程的,有些没有,但它们其实没有本质区别,还是一个东西.对于PG来讲,过程就是RETURNS VOID函数,因此CREATE PROCEDURE其实也就是创建一个同义词.我们来看CREATE F