main.c #include <stdio.h> #include <stdlib.h> #include <ctype.h> #include "lex.h"
extern int get_tok(); extern int yylval, /* integer value of TK_NUM */ yylen; /* length of TK_ID string */ extern char yytext[]; /* point to TK_ID string */
extern FILE *fin;
main(int argc, char *argv[]) { int n, i; char *pc;
if (argc != 2) { fprintf(stderr, "usage: %s <input>\n", argv[0]); exit(1) ; } if ((fin = fopen(argv[1], "r")) == NULL) { fprintf(stderr, "can't open %s\n", argv[1]); exit(1) ; } n = get_tok(); while (n != TK_EOF) { switch(n) { case TK_ID: printf("%s", "TK_ID: "); pc = yytext; for (i=0; i<yylen; i++) printf("%c", *pc++); printf("\n"); break; case TK_NUM: printf("%s", "TK_NUM: "); printf("%d\n", yylval); break; case TK_IF: printf("%s\n", "TK_IF"); break; case TK_ELSE: printf("%s\n", "TK_ELSE"); break; case TK_ASSIGN: printf("%s\n", "TK_ASSIGN"); break; case TK_EQ: printf("%s\n", "TK_EQ"); break; case TK_NE: printf("%s\n", "TK_NE"); break; case TK_GT: printf("%s\n", "TK_GT"); break; case TK_GE: printf("%s\n", "TK_GE"); break; case TK_LT: printf("%s\n", "TK_LT"); break; case TK_LE: printf("%s\n", "TK_LE"); break; case TK_LPAR: printf("%s\n", "TK_LPAR"); break; case TK_RPAR: printf("%s\n", "TK_RPAR"); break; case TK_SCOLON: printf("%s\n", "TK_SCOLON"); break; } n = get_tok(); } printf("%s\n", "TK_EOF"); fclose(fin); } |
lex.h #define TK_ID 256 #define TK_NUM 257 #define TK_IF 258 #define TK_ELSE 259 #define TK_ASSIGN 260 #define TK_EQ 261 #define TK_NE 262 #define TK_GT 263 #define TK_GE 264 #define TK_LT 265 #define TK_LE 266 #define TK_LPAR 267 #define TK_RPAR 268 #define TK_SCOLON 269 #define TK_EOF 270 |
lex.c #include <stdio.h> #include <stdlib.h> #include <ctype.h> #include "lex.h"
#define BufSize 1024 #define Sentinel '\0'
#define Space ' ' #define Tab '\t' #define Newline '\n'
#define MAX_MSG 30 /* maximum size of an error message */ #define MAX_BUF 80 /* maximum size of an identifier */
static unsigned char buf[2*BufSize + 1]; static unsigned char *lexeme_beginning, *forward;
static int line = 1, column = 0;
#define retract1 forward-- #define NextChar() ((*forward)? *forward++:((forward=buf+BufSize+1),fillbuf())) #define Add lexeme_beginning++ /*initial status when getting token */ static int IsFirst = 1;
int yylval, yylen; char yytext[MAX_BUF];
void yyerror(); char err_msg[MAX_MSG]; /* error message buffer */
FILE *fin; static int next_state[] = { 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0 ~ 14 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,18, 0, 0, 0, // 15 ~ 29 0, 0, 0, 8, 0, 0, 0, 0, 0, 0,15,16, 0, 0, 0, // 30 ~ 44 0, 0,19, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0,17, // 45 ~ 59 12, 5, 9, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 60 ~ 74 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 75 ~ 89 3, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 2, 3, 3, 3, // 90 ~ 104 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 105~ 119 3, 3, 3, 0, 0, 0, 0, 0 }; // 120~ 128
static unsigned char fillbuf() { register unsigned char *i, *j;
/* when two buffers overflow, issue error messsage and abort program */ if (lexeme_beginning == buf) { printf("Buffer overflow error\n"); exit(0); }
/* move the rest of token to the other buffer */ i = lexeme_beginning; j = lexeme_beginning - BufSize;
while (j < forward-1) *(j++) = *(i++) ;
lexeme_beginning -= BufSize;
buf[BufSize + fread(buf+BufSize, 1, BufSize, fin)] = Sentinel;
return buf[BufSize]; /* end of buffer */ }
/* gets the next token from input stream */ int get_tok() { register unsigned char c, *pc; register int state = 0; register int i;
/* check whether initial or not */ if (IsFirst) { buf[2*BufSize] = Sentinel; forward = buf + 2*BufSize; IsFirst = 0; }
lexeme_beginning = forward; yylen = 0; /* until pattern match, repeat */ while(1) { switch(state) { case 0: c = NextChar(); yylval = 0; column++; if(c == Space) Add; else if (c == Tab) { column += 3; Add; } else if (c == Newline) { line++; column = 0; Add; } else if( next_state[c] == 0 ) { sprintf(err_msg, "Invalid char : 0x%x", c); yyerror(err_msg); Add; }else state = next_state[c]; break; case 1: c = NextChar(); column++; if( c == 'f') // if { state = 21, Add; break; } else // letter { c = 'i', retract1, column--, state = 3; break; } case 2: c = NextChar(); column++; // else if(c == 'l') { c = NextChar(); column++; Add; if(c == 's') { c = NextChar(); column++; Add; if( c == 'e') return TK_ELSE; else { state = 6; Add; break; } } else { state = 6; Add; break; } } else { state = 6; Add; break; } case 3: yytext[yylen++] = c; column++; Add; // letter while(c = NextChar()) { if(isalnum(c)) yytext[yylen++] = c, column++, Add; else { yytext[yylen] = Sentinel; retract1; column--; return TK_ID; } } case 4: yytext[yylen++] = c; column++; Add; // digit while(c = NextChar()) { if(isdigit(c)) { yytext[yylen++] = c; column++; Add; } else { pc = Sentinel; retract1; column--; yylval = atoi(yytext); return TK_NUM; } } case 5: c = NextChar(); column++; Add; // = if(c == '=') // == state = 7; else { retract1; column--; state = 6; } break; case 6: return TK_ASSIGN; // = case 7: return TK_EQ; // == case 8: // ! if(NextChar() == '=') return TK_NE; // != else { sprintf(err_msg, "Invalid character : 0x%x", c); yyerror(err_msg); c = NextChar(); state = next_state[c]; } break; case 9: c = NextChar(); column++; // > if(c == Space) state = 10; // > else if (c == '=') state = 11; // >= else { retract1; column--; } break; case 10: return TK_GT; // > case 11: return TK_GE; // >= case 12 : c = NextChar(); column++; // < if(state == Space) state = 13; // < else if (state == '=') state = 14; // <= else { retract1; column--; } break; case 13: return TK_LT; // < case 14: return TK_LE; // <= case 15: return TK_LPAR; // ( case 16: return TK_RPAR; // ) case 17: return TK_SCOLON; // ; case 18: if(c == 0) // EOF return TK_EOF; case 19: c = NextChar(); column++; // / if(c == '*') // /* { while(c = NextChar()) { Add; if(c == '*') // /* * { state = 20; break; } } break; } else if (c == '/') { Add; return; } case 20: c = NextChar(); column++; if(c == '/') // /* */ { Add; state = 19; return; } case 21: return TK_IF; /* FILL IN THIS BLANK */
} /* end of state */ } /* end of while */ } /* end of function get_tok */
/* prints the current line, column error message */ void yyerror(char *msg) { fprintf(stderr, "line:%3d, column:%3d %s\n", line, column, msg); } |
'대학교 > 4.컴파일러' 카테고리의 다른 글
[03] lex.l (0) | 2019.04.14 |
[01] wc (0) | 2019.03.20 |