重写了上次的代码
1.将node分为三种,voidchar,char,manychars,分别表示空node,单字符node,多字符node(针对自定义的\w,\n,\a);
2.顺序建树;
3.空节点的父子节点为非空节点,非空节点的父子节点为空节点;
4.空节点有多个子节点,非空节点只有一个子节点,根节点为空节点;
5.每次receive一个正则表达式,就在根节点建一子树;
6.转确定有穷自动机时,每次只需沿子节点前进两个节点(未实现)。
这样就清晰多了。
// main.cpp #include <iostream> #include <map> #include <vector> #include <string> #include <cstdlib> using namespace std; #define DEBUG const int MAXMATCHSIZE = 5; const char *word = "\\w"; const char *number = "\\n"; const char *alnum = "\\a"; const char *allchar = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; struct node { int type; vector<node*> voidnode; // type == 0 char schar; // type == 1 vector<char> mchar; // type == 2 node *next; // type >= 1 int number; node(int t = 0):next(NULL), type(t), voidnode(), mchar(), schar(0) { static int count = 0; number = count++; #ifdef DEBUG cout << "new node,count=" << count << ",type=" << type << endl; #endif } }; class parser { private: node *head; map< string, vector<node*> > endnode; node* addnode(const string& name,node *cur, char c, bool pack_flag, bool end_flag);// a letter node* addnode(const string& name,node *cur, string, bool pack_flag, bool end_flag);// or string public: parser(); void receive(string ,string); }; int main() { parser p; p.receive("begin[\\w]end","first"); p.receive("123[word]456","second"); return 0; } // ensure that cur --> voidchar node node* parser::addnode(const string& name,node *cur, char c, bool pack_flag, bool end_flag) { //cout <<c<<endl; node *p = new node(1); p->schar = c; p->next = new node; cur->voidnode.push_back(p); if (pack_flag) p->next->voidnode.push_back(p); if (end_flag) endnode[name].push_back(cur->next); return p->next; } node* parser::addnode(const string& name,node *cur, string s, bool pack_flag, bool end_flag) { //cout<<s<<endl; node *p = new node(2); p->next = new node; if (s[0] == ‘\\‘) { int begin = 0, end = 0; if (s[1] == word[1])begin = 10, end = 10 + 26 + 26; else if (s[1] == alnum[1])end = 36 + 26; else end = 10; p->mchar = vector<char>(allchar + begin, allchar + end); } else { p->mchar = vector<char>(s.begin(),s.end()); } cur->voidnode.push_back(p); if (pack_flag) p->next->voidnode.push_back(cur); if (end_flag) endnode[name].push_back(cur->next); return p->next; } parser::parser():head(NULL) { head = new node(); } void parser::receive(string s, string name) { if (s.empty()) return; node *cur = head; bool pack_flag = false; int len = s.size(); for (int i = 0; i < len; ) { if ( s[i] == ‘*‘ ) pack_flag = true, i++; else if (s[i] == ‘[‘) { int j; for (j = i + 1; j < len && s[j] != ‘]‘; j++); cur = addnode(name,cur ,s.substr(i + 1 , j - i - 1) , pack_flag, j == len - 1); pack_flag = false; i = j + 1; } else if ( isalnum(s[i]) ) { cur = addnode(name,cur, s[i], pack_flag, i == 0); pack_flag = false; i++; } } }
时间: 2024-10-24 12:02:30