// lexical_analyzer.cpp : Defines the entry point for the console application. // #include #include #include #include using namespace std; // типы лексем.... enum Type_lexem { spacer, // разделитель intconst, // целочисленная константа strconst, // строковая константа variable, // имя переменной label, // метка function, // имя функции keyword }; // ключевое слово // состояния автомата... enum status { H, // начальное состояние автомата N, // состояние анализа целого числа I, // анализ индентификатора (имени переменной, функции или метки) K, // анализ ключевого слова A, // ввод знака присваивания := S, // обработка текстовых констант X }; // пустышка enum propriety { rightl, rightend, wrongend }; struct Lexeme { Type_lexem ind; // индентификатор типа лексемы char *s; // указатель на строку, хранящую саму лексему short int numstr; // номер строки лексемы propriety sign; // знак конца списка Lexeme() { }; // конструктор Lexeme *next; // указатель на следующую лексему Lexeme (const Lexeme& x) { }; // конструктор копирования ~Lexeme () { }; // деструктор }; class Lexic_analyzer { Lexeme *list; FILE *fp; short int nums; Lexeme* GetNextLexem(); status analis(int c); void processing(status t, Lexeme *a, int c); int f(status t, int c); void g(Lexeme *a, char *s); int spacerr(status t, int c); public: Lexic_analyzer (FILE *fp_ext) { fp = fp_ext; list = NULL; nums = 1; } bool list_Lexem (); void printlist(); }; void Lexic_analyzer:: g(Lexeme *a, char *s) { a->sign = rightl; a->numstr = nums; a->s = new char [strlen(s)+1]; strcpy(a->s, s); } int Lexic_analyzer :: spacerr(status t, int c) { if (t == N || t == I || t == K) if (c == '+' || c== '-' || c == '*' || c == '/' || c == '%' || c == ':' || c == '(' || c == ')' || c == '|' || c == '[' || c == ']' || c == '<' || c == '>' || c == '=' || c == ';' || c != ' ' || c != '\t' || c != 13 || c != '\n' ) return 1; if (t == S) if (c == '"') return 1; return 0; } int Lexic_analyzer :: f( status t, int c) { switch (t) { case N: if (isdigit(c)) return 1; break; case I: if (isdigit(c) || c >= 'A' && c <= 'Z' || c>= 'a' && c <= 'z' || c == '_') return 1; break; case K: if ( c >= 'A' && c <= 'Z' || c>= 'a' && c <= 'z' || c == '_') return 1; break; case S: if ( c != '"') return 1; break; } return 0; } status Lexic_analyzer ::analis (int c) { if (isdigit(c)) return N; if (c == '?' || c == '@' || c == '$') return I; if ( c >= 'A' && c <= 'Z' || c>= 'a' && c <= 'z' ) return K; if (c == ':') return A; if (c == '"') return S; if (c == '+' || c == '-' || c == '*' || c == '/' || c == '(' || c == ')' || c == '|' || c == '[' || c == ']' || c == '%' || c == '<' || c == '>' || c == '=' || c == ';') return H; return X; } void Lexic_analyzer ::processing (status t, Lexeme *a, int c) { char s[100]; int i = 0; if ( t == S ) { c = getc(fp); if (c == EOF) { a->sign = rightend; return;} } do { s[i] = c; ++i; c = getc(fp); if (c == EOF) { a->sign = rightend; break; } if (f(t, c)) continue; else break; } while (1); if (spacerr(t, c)) { if (t != S) ungetc(c, fp); s[i]='\0'; g(a, s); if (t == N) a->ind = intconst; if (t == K) a->ind = keyword; if (t == I) switch (s[0]) { case '?': a->ind = function; break; case '@': a->ind = label; break; case '$': a->ind = variable; break; } if (t == S) a->ind = strconst; } } Lexeme* Lexic_analyzer :: GetNextLexem () { Lexeme *a; int c; status state = H; a = new Lexeme; a->sign = wrongend; a->s = NULL; char st; while ( (c = getc(fp)) != EOF ) { if (c != ' ' && c != '\t' && c != 13 && c != '\n' ) { st = c; break; } if (c == 13 || c== '\n') ++nums; } if (c == EOF) a->sign = rightend; switch (analis(c)) { case N: processing(N, a, c); break; case I: processing(I, a, c); break; case K: processing(K, a, c); break; case A: char s[3]; s[0] = c; c = getc(fp); if (c == EOF) return a; if (c == '=') { s[1] = c; s[2]='\0'; g(a, s); a->ind = spacer; break; } case S: processing(S, a, c); break; case H: s[0] = c; s[1] = '\0'; g(a, s); a->ind = spacer; break; } a->next=NULL; return a; } bool Lexic_analyzer ::list_Lexem() { Lexeme *l; l = GetNextLexem (); list = l; while ( l->sign == rightl ) { l->next = GetNextLexem (); if (l->next->sign == rightend && l->next->s == NULL) { l->sign = rightend; l->next = NULL; break; } l = l->next; } if ( l->sign == rightend ) return true; else return false; } void Lexic_analyzer :: printlist() { Lexeme *l; l = list; int i = 1; while (l != NULL) { cout << "LexemeNum " << i <<": "; cout << "identifier: "; switch (l->ind) { case spacer: cout << "spacer"; break; case intconst: cout << "intconst"; break; case strconst: cout << "strconst"; break; case variable: cout << "variable"; break; case label: cout << "label"; break; case function: cout << "function"; break; case keyword: cout << "keyword"; break; } cout << "\n num string: " << l->numstr << " "; cout << " Lexeme: " << l->s << "\n\n"; l = l->next; ++i; } } int main(int argc, char** argv[]) { FILE *fp; fp = fopen("c:\\robo.txt", "r"); Lexic_analyzer L(fp); if (L.list_Lexem()) L.printlist(); else cout << " Error \n"; return 0; }