以前写过两版算术表达式解析代码,但都是基于栈结构或者树模型的,并不是通用的算法。其实算术表达式解析是最基本的词法分析算法,直到我看了《自制编程语言》里面介绍的递归向下分析法,才明白这种问题的终极解决方案是使用词法分析和语法分析。。
//用于词法定界的头文件 token.h 1 #ifndef _TOKEN_H_VERSION_20140930 2 #define _TOKEN_H_VERSION_20140930 3 4 enum TokenKind{ 5 BAD_TOKEN, 6 NUMBER_TOKEN, 7 ADD_OPERATOR_TOKEN, 8 SUB_OPERATOR_TOKEN, 9 MUL_OPERATOR_TOKEN, 10 DIV_OPERATOR_TOKEN, 11 LEFT_PAREN_TOKEN, 12 RIGHT_PAREN_TOKEN, 13 END_OF_LINE_TOKEN, 14 }; 15 16 #define MAX_TOKEN_SIZE (100) 17 18 struct Token{ 19 TokenKind kind; 20 double value; 21 char str[MAX_TOKEN_SIZE]; 22 }; 23 24 void set_line(char* line); 25 void get_token(Token* token); 26 27 #endif
//词法分析器或者叫单词收割机 lexianalizer.cpp 1 #include "stdafx.h" 2 #include "token.h" 3 #include "stdio.h" 4 #include "stdlib.h" 5 #include "ctype.h" 6 7 static char* st_line; 8 static int st_line_pos; 9 10 typedef enum{ 11 INITIAL_STATUS, 12 IN_INT_PART_STATUS, 13 DOT_STATUS, 14 IN_FRAC_PART_STATUS, 15 }LexerStatus; 16 17 //只是为了获取一个token,里面的循环是忽略空格和读取数字用的 18 void get_token(Token* token) 19 { 20 int out_pos = 0; 21 LexerStatus status = INITIAL_STATUS; 22 char current_char; 23 24 token->kind = BAD_TOKEN; 25 while(st_line[st_line_pos] != ‘\0‘) 26 { 27 current_char = st_line[st_line_pos]; 28 //读取数字 29 if((status == IN_INT_PART_STATUS || status == IN_FRAC_PART_STATUS) 30 && !isdigit(current_char) && current_char != ‘.‘) 31 { 32 token->kind = NUMBER_TOKEN; 33 sscanf_s(token->str, "%lf", &token->value); 34 return; 35 } 36 //忽略空格 37 if(isspace(current_char)) 38 { 39 if(current_char == ‘\n‘) 40 { 41 token->kind = END_OF_LINE_TOKEN; 42 return; 43 } 44 st_line_pos++; 45 continue; 46 } 47 //token太长错误 48 if(out_pos >= MAX_TOKEN_SIZE-1){ 49 fprintf(stderr, "token too long\n"); 50 exit(1); 51 } 52 //token收下这个char 53 token->str[out_pos] = st_line[st_line_pos]; 54 st_line_pos++; 55 out_pos++; 56 token->str[out_pos] = ‘\0‘; 57 58 //根据这个char判断token的kind 59 if(current_char == ‘+‘){ 60 token->kind = ADD_OPERATOR_TOKEN; 61 return; 62 } 63 if(current_char == ‘-‘){ 64 token->kind = SUB_OPERATOR_TOKEN; 65 return; 66 } 67 if(current_char == ‘*‘){ 68 token->kind = MUL_OPERATOR_TOKEN; 69 return; 70 } 71 if(current_char == ‘/‘){ 72 token->kind = DIV_OPERATOR_TOKEN; 73 return; 74 } 75 if(current_char == ‘(‘){ 76 token->kind = LEFT_PAREN_TOKEN; 77 return; 78 } 79 if(current_char == ‘)‘) 80 { 81 token->kind = RIGHT_PAREN_TOKEN; 82 return; 83 } 84 if(isdigit(current_char)){ 85 if(status == INITIAL_STATUS) 86 { 87 status = IN_INT_PART_STATUS; 88 }else if(status == DOT_STATUS) 89 { 90 status = IN_FRAC_PART_STATUS; 91 } 92 }else if(current_char == ‘.‘) 93 { 94 if(status == IN_INT_PART_STATUS){ 95 status = DOT_STATUS; 96 }else{ 97 fprintf(stderr, "syntax error\n"); 98 exit(1); 99 } 100 }else{ 101 fprintf(stderr, "bad character(%c)\n", current_char); 102 exit(1); 103 } 104 } 105 } 106 107 108 void set_line(char* line) 109 { 110 st_line = line; 111 st_line_pos = 0; 112 } 113 114 void parse_line(char* buf) 115 { 116 Token token; 117 set_line(buf); 118 for(;;) 119 { 120 get_token(&token); 121 if(token.kind == END_OF_LINE_TOKEN){ 122 break; 123 }else{ 124 printf("kind %d, str %s\n", token.kind, token.str); 125 } 126 } 127 }
//递归向下的语法分析器 parser.cpp 1 #include "stdafx.h" 2 #include <stdio.h> 3 #include <stdlib.h> 4 #include "token.h" 5 6 #define LINE_BUFF_SIZE (1024) 7 8 static Token st_look_ahead_token; 9 static int st_look_ahead_token_exists; 10 11 //可以重复利用已经get的token(get_token本来就只能一直向下读) 12 static void my_get_token(Token* token) 13 { 14 if(st_look_ahead_token_exists) 15 { 16 *token = st_look_ahead_token; 17 st_look_ahead_token_exists=0; 18 }else{ 19 get_token(token); 20 } 21 } 22 23 //归还预读的token放到全局变量 24 static void unget_token(Token* token) 25 { 26 st_look_ahead_token = *token; 27 st_look_ahead_token_exists = 1; 28 } 29 double parse_expression(); 30 //解析一元表达式,token必须是数字 31 static double parse_primary_expressioin() 32 { 33 double value; 34 Token token; 35 int minus_flag = 0; 36 37 my_get_token(&token); 38 if(token.kind == SUB_OPERATOR_TOKEN) 39 { 40 minus_flag = 1; 41 } 42 if(token.kind == NUMBER_TOKEN) 43 { 44 value = token.value; 45 }else if(token.kind == LEFT_PAREN_TOKEN) 46 { 47 value = parse_expression(); 48 my_get_token(&token); 49 if(token.kind != RIGHT_PAREN_TOKEN) 50 { 51 fprintf(stderr, "missing ‘)‘ error\n"); 52 exit(1); 53 } 54 }else{ 55 unget_token(&token); 56 fprintf(stderr, "syntax error\n"); 57 exit(1); 58 return 0.0; 59 60 } 61 if(minus_flag == 1) 62 { 63 value = -value; 64 } 65 return value; 66 67 } 68 69 //解析乘除表达式,可以连续的乘除 70 static double parse_term() 71 { 72 double v1,v2; 73 Token token; 74 75 v1 = parse_primary_expressioin(); 76 for(;;) 77 { 78 my_get_token(&token); 79 if(token.kind != MUL_OPERATOR_TOKEN && token.kind != DIV_OPERATOR_TOKEN) 80 { 81 unget_token(&token); 82 break; 83 } 84 v2 = parse_primary_expressioin(); 85 if(token.kind == MUL_OPERATOR_TOKEN) 86 { 87 v1 *= v2; 88 }else if(token.kind == DIV_OPERATOR_TOKEN) 89 { 90 v1 /= v2; 91 } 92 } 93 return v1; 94 } 95 96 //解析加减表达式,可以连续加减 97 double parse_expression() 98 { 99 double v1,v2; 100 Token token; 101 102 v1=parse_term(); 103 for(;;) 104 { 105 my_get_token(&token); 106 if(token.kind != ADD_OPERATOR_TOKEN && token.kind != SUB_OPERATOR_TOKEN) 107 { 108 unget_token(&token); 109 break; 110 } 111 112 v2 = parse_term(); 113 if(token.kind == ADD_OPERATOR_TOKEN) 114 { 115 v1 += v2; 116 }else if(token.kind == SUB_OPERATOR_TOKEN) 117 { 118 v1 -= v2; 119 } 120 121 } 122 return v1; 123 } 124 125 double parse_line() 126 { 127 double value; 128 st_look_ahead_token_exists = 0; 129 value = parse_expression(); 130 return value; 131 } 132 133 134 int _tmain(int argc, _TCHAR* argv[]) 135 { 136 char line[LINE_BUFF_SIZE]; 137 double value; 138 139 printf("this is a LL(1) calc parser,version 1.0, copyright @ jwk.\n>"); 140 while (fgets(line, LINE_BUFF_SIZE, stdin) != NULL) 141 { 142 set_line(line); 143 value = parse_line(); 144 printf("%lf\n>", value); 145 } 146 return 0; 147 }
这么经典的算法,有空的时候我得画个图好好演示一下。
时间: 2024-10-11 00:41:25