This program generates a random Regular Expression in the form of Binary Tree, and repeatedly checks whether an input string match that Regex ("$" as the end of input):
(1) Use McNaughton-Yamada-Thompson (MYT) Construction to generate the NFA of the given regex;
(2) Use the Subset Construction to generate the DFA from the given NFA;
(3) Implement the DFA to check whether an input string match the regex.
1 import java.util.*; 2 import java.io.*; 3 4 class State { 5 public boolean ac; 6 public Edge next; 7 } 8 9 class Edge { 10 public static Set<Character> digit; 11 public static Set<Character> letter; 12 public static Set<Character> binop; 13 public static Set<Character> uniop; 14 public static Set<Character> or; 15 public static Set<Character> plus; 16 public static Set<Character> star; 17 static { 18 digit = new HashSet<Character>(); 19 for (int i=0;i<10;i++) { 20 digit.add(new Character((char)(‘0‘+i))); 21 } 22 letter = new HashSet<Character>(); 23 for (int i=0;i<26;i++) { 24 letter.add(new Character((char)(‘a‘+i))); 25 letter.add(new Character((char)(‘A‘+i))); 26 } 27 binop = new HashSet<Character>(); 28 binop.add(new Character(‘|‘)); 29 binop.add(new Character(‘^‘)); 30 binop.add(new Character(‘&‘)); 31 binop.add(new Character(‘+‘)); 32 binop.add(new Character(‘-‘)); 33 binop.add(new Character(‘*‘)); 34 binop.add(new Character(‘/‘)); 35 binop.add(new Character(‘%‘)); 36 binop.add(new Character(‘<‘)); 37 binop.add(new Character(‘>‘)); 38 binop.add(new Character(‘=‘)); 39 uniop = new HashSet<Character>(); 40 uniop.add(new Character(‘~‘)); 41 uniop.add(new Character(‘!‘)); 42 uniop.add(new Character(‘-‘)); 43 // or, plus and star are just sentinels 44 or = new HashSet<Character>(); 45 plus = new HashSet<Character>(); 46 star = new HashSet<Character>(); 47 } 48 49 public Set<Character> data; 50 public Edge next; 51 public int end; 52 } 53 54 class Node { 55 public Set<Character> data; 56 public Node left, right; 57 58 public Node() { 59 int rand = (new Random()).nextInt(16); 60 if (rand<2) { 61 data = Edge.digit; 62 } else if (rand<5) { 63 data = Edge.letter; 64 } else if (rand<7) { 65 data = Edge.binop; 66 } else if (rand<8) { 67 data = Edge.uniop; 68 } else if (rand<11) { 69 data = Edge.or; 70 left = new Node(); 71 right = new Node(); 72 } else if (rand<14) { 73 data = Edge.plus; 74 left = new Node(); 75 right = new Node(); 76 } else { 77 data = Edge.star; 78 left = new Node(); 79 } 80 } 81 public String toString() { 82 if (data==Edge.digit) { 83 return "DIGIT"; 84 } else if (data==Edge.letter) { 85 return "LETTER"; 86 } else if (data==Edge.uniop) { 87 return "UNIOP"; 88 } else if (data==Edge.binop) { 89 return "BINOP"; 90 } 91 String leftStr="", rightStr=""; 92 if (left!=null) { 93 leftStr = "("+left.toString()+")"; 94 } 95 if (right!=null) { 96 rightStr = "("+right.toString()+")"; 97 } 98 if (data==Edge.or) { 99 return leftStr+"|"+rightStr; 100 } else if (data==Edge.star) { 101 return leftStr+"*"; 102 } else { 103 return leftStr+rightStr; 104 } 105 } 106 } 107 108 class NFA { 109 private List<State> state; 110 111 public NFA(Node root) { 112 state = new LinkedList<State>(); 113 state.add(new State()); // start 114 state.add(new State()); // final 115 state.get(1).ac = true; 116 preOrder(root,0,1); 117 } 118 private void preOrder(Node sub,int from,int to) { 119 // MYT Construction: regex -> NFA 120 if (sub.data==Edge.or) { 121 preOrder(sub.left,from,to); 122 preOrder(sub.right,from,to); 123 } else if (sub.data==Edge.plus) { 124 int mid = insertState(); 125 preOrder(sub.left,from,mid); 126 preOrder(sub.right,mid,to); 127 } else if (sub.data==Edge.star) { 128 int a = insertState(); 129 int b = insertState(); 130 insertEdge(from,null,a); 131 insertEdge(b,null,a); 132 insertEdge(b,null,to); 133 insertEdge(from,null,to); 134 preOrder(sub.left,a,b); 135 } else { 136 insertEdge(from,sub.data,to); 137 } 138 } 139 private int insertState() { 140 // Insert a new state and return its index: 141 state.add(new State()); 142 return state.size()-1; 143 } 144 private void insertEdge(int idx,Set<Character> data,int end) { 145 // Insert an Edge: NTran[idx,end] = data 146 Edge e = new Edge(); 147 e.data = data; 148 e.next = state.get(idx).next; 149 e.end = end; 150 state.get(idx).next = e; 151 } 152 public List<State> getStates() { 153 return state; 154 } 155 } 156 157 class DFA { 158 private List<State> dState; 159 private List<State> nState; 160 private List<Set<State>> nSet; 161 private Queue<Integer> q; 162 private List<Boolean> vis; 163 164 public DFA(Node root) { 165 nState = (new NFA(root)).getStates(); 166 dState = new LinkedList<State>(); 167 nSet = new LinkedList<Set<State>>(); 168 q = new LinkedList<Integer>(); 169 vis = new LinkedList<Boolean>(); 170 bfs(); 171 } 172 private void bfs() { 173 // Create the first dState: 174 dState.add(new State()); 175 nSet.add(new HashSet<State>()); 176 addClosure(nState.get(0),nSet.get(0)); 177 // Breadth-First Search: 178 q.add(new Integer(0)); 179 vis.add(new Boolean(false)); 180 while (!q.isEmpty()) { 181 int idx = q.poll().intValue(); 182 if (!vis.get(idx)) { 183 vis.set(idx, new Boolean(true)); 184 addTrans(idx,Edge.digit); 185 addTrans(idx,Edge.letter); 186 addTrans(idx,Edge.binop); 187 addTrans(idx,Edge.uniop); 188 } 189 } 190 } 191 private void addTrans(int idx,Set<Character> data) { 192 // Add to set all nStates that can be accessed from start through data: 193 Set<State> set = new HashSet<State>(); 194 for (State nstate:nSet.get(idx)) { 195 Edge itr = nstate.next; 196 while (itr!=null) { 197 if (itr.data==data) { 198 addClosure(nState.get(itr.end),set); 199 } 200 itr = itr.next; 201 } 202 } 203 if (!set.isEmpty()) { 204 for (int i=0;i<nSet.size();i++) { 205 // If set exists in nSet, only insert an edge: 206 if (nSet.get(i).equals(set)) { 207 insertEdge(idx,data,i); 208 return; 209 } 210 } 211 insertEdge(idx,data,dState.size()); 212 dState.add(new State()); 213 nSet.add(set); 214 for (State itr:set) { 215 // Mark on accepting states: 216 if (itr.ac) { 217 dState.get(dState.size()-1).ac = true; 218 break; 219 } 220 } 221 q.add(new Integer(dState.size()-1)); 222 vis.add(new Boolean(false)); 223 } 224 } 225 private void addClosure(State start,Set<State> set) { 226 // Add the Epsilon-Closure of nState.get(start) to set: 227 set.add(start); 228 Edge itr = start.next; 229 while (itr!=null) { 230 if (itr.data==null && !set.contains(nState.get(itr.end))) { 231 addClosure(nState.get(itr.end),set); 232 } 233 itr = itr.next; 234 } 235 } 236 private void insertEdge(int idx,Set<Character> data,int end) { 237 // Insert an Edge: DTran[idx,end] = data 238 Edge e = new Edge(); 239 e.data = data; 240 e.next = dState.get(idx).next; 241 e.end = end; 242 dState.get(idx).next = e; 243 } 244 public boolean test(int pos,String expr,int i) { 245 // Test expr.char(i) at dState.get(pos): 246 if (i==expr.length()) { 247 return dState.get(pos).ac; 248 } 249 Edge itr = dState.get(pos).next; 250 while (itr!=null) { 251 if (itr.data.contains(new Character(expr.charAt(i)))) { 252 return test(itr.end,expr,i+1); 253 } 254 itr = itr.next; 255 } 256 return false; 257 } 258 } 259 260 class Regex { 261 private Node root; 262 private DFA dfa; 263 264 public Regex() { 265 root = new Node(); 266 dfa = new DFA(root); 267 } 268 public String toString() { 269 return root.toString(); 270 } 271 public boolean test(String expr) { 272 return dfa.test(0,expr,0); 273 } 274 } 275 276 public class LexicalAnalyzer { 277 278 public static void main(String[] args) { 279 Regex regex = new Regex(); 280 System.out.println("Regex:\t"+regex); 281 Scanner in = new Scanner(System.in); 282 System.out.print("Test Expressions:\n\t"); 283 String expr = in.nextLine(); 284 while (!expr.equals("$")) { 285 if (regex.test(expr)) { 286 System.out.print("\tACCEPTED\n\t"); 287 } else { 288 System.out.print("\tWRONG\n\t"); 289 } 290 expr = in.nextLine(); 291 } 292 in.close(); 293 } 294 }
时间: 2024-10-07 13:34:48