본문 바로가기
대학교/4.컴파일러

[02] lex.c

by Jcoder 2019. 4. 14.

lex.c
0.01MB
lex.h
0.00MB
main.c
0.00MB

 

main.c

main.c

#include <stdio.h> 

#include <stdlib.h> 

#include <ctype.h> 

#include "lex.h" 

 

extern int get_tok(); 

extern int yylval,       /* integer value of TK_NUM */ 

           yylen;        /* length of TK_ID string */ 

extern char yytext[];    /* point to TK_ID string */ 

 

extern FILE *fin; 

 

main(int argc, char *argv[]) 

   int n, i; 

   char *pc; 

 

   if (argc != 2) {

      fprintf(stderr, "usage: %s <input>\n", argv[0]);

      exit(1) ;

   }

   if ((fin = fopen(argv[1], "r")) == NULL) {

      fprintf(stderr, "can't open %s\n", argv[1]);

      exit(1) ;

   }

   n = get_tok();

   while (n != TK_EOF) {

      switch(n) {

         case TK_ID: printf("%s""TK_ID: ");

            pc = yytext;

            for (i=0; i<yylen; i++)

               printf("%c"*pc++);

            printf("\n");

            break;

         case TK_NUM: printf("%s""TK_NUM: ");

            printf("%d\n", yylval);

            break;

         case TK_IF:     printf("%s\n""TK_IF"); break;         

         case TK_ELSE:   printf("%s\n""TK_ELSE"); break;

         case TK_ASSIGN: printf("%s\n""TK_ASSIGN"); break;

         case TK_EQ:     printf("%s\n""TK_EQ"); break;

         case TK_NE:     printf("%s\n""TK_NE"); break;

         case TK_GT:     printf("%s\n""TK_GT"); break;

         case TK_GE:     printf("%s\n""TK_GE"); break;

         case TK_LT:     printf("%s\n""TK_LT"); break;

         case TK_LE:     printf("%s\n""TK_LE"); break;

         case TK_LPAR:   printf("%s\n""TK_LPAR"); break;

         case TK_RPAR:   printf("%s\n""TK_RPAR"); break;

         case TK_SCOLON: printf("%s\n""TK_SCOLON"); break;

      }

      n = get_tok();

   }

   printf("%s\n""TK_EOF");

   fclose(fin);

}

lex.h

lex.h

#define TK_ID           256

#define TK_NUM          257

#define TK_IF           258

#define TK_ELSE         259

#define TK_ASSIGN       260

#define TK_EQ           261

#define TK_NE           262

#define TK_GT           263

#define TK_GE           264

#define TK_LT           265

#define TK_LE           266

#define TK_LPAR         267

#define TK_RPAR         268

#define TK_SCOLON       269

#define TK_EOF          270

lex.c

lex.c

#include <stdio.h>

#include <stdlib.h>

#include <ctype.h>

#include "lex.h"

 

#define BufSize  1024

#define Sentinel '\0'

 

#define Space    ' '

#define Tab      '\t'

#define Newline  '\n'

 

#define MAX_MSG  30   /* maximum size of an error message */

#define MAX_BUF  80   /* maximum size of an identifier */

 

static unsigned char buf[2*BufSize + 1];

static unsigned char *lexeme_beginning, *forward;

 

static int line = 1, column = 0;

 

#define retract1 forward--

#define NextChar() ((*forward)? *forward++:((forward=buf+BufSize+1),fillbuf()))

#define Add lexeme_beginning++

/*initial status when getting token */

static int IsFirst = 1;

 

int yylval, yylen;

char yytext[MAX_BUF];

 

void yyerror();

char err_msg[MAX_MSG];      /* error message buffer */

 

FILE *fin;

static int next_state[] = { 

       1800000000000000// 0  ~ 14   

         00000000000,18000// 15 ~ 29  

         0008000000,15,16000// 30 ~ 44     

         00,1944444444440,17// 45 ~ 59 

       1259003333333333// 60 ~ 74 

        333333333333333// 75 ~ 89 

         300000033332333// 90 ~ 104 

        133333333333333// 105~ 119 

         33300000 };          // 120~ 128

 

static unsigned char fillbuf()

{

   register unsigned char *i, *j;

 

   /* when two buffers overflow, issue error messsage and abort program */

   if (lexeme_beginning == buf) {

      printf("Buffer overflow error\n");

      exit(0);

   }

 

   /* move the rest of token to the other buffer */

   i = lexeme_beginning;

   j = lexeme_beginning - BufSize;

 

   while (j < forward-1)

      *(j++= *(i++) ; 

 

   lexeme_beginning -= BufSize;

 

   buf[BufSize + fread(buf+BufSize, 1, BufSize, fin)] = Sentinel;

 

   return buf[BufSize];        /* end of buffer */

}

 

/* gets the next token from input stream */

int get_tok()

{

   register unsigned char c, *pc;

   register int state = 0;

   register int i;

 

   /* check whether initial or not */

   if (IsFirst) {

      buf[2*BufSize] = Sentinel;

      forward = buf + 2*BufSize;

      IsFirst = 0;

   }

 

   lexeme_beginning = forward;

    yylen = 0;

   /* until pattern match, repeat */

   while(1) {

      switch(state) {

            case 0: c = NextChar(); yylval = 0;

                      column++;

                if(c == Space)

                    Add;

                else if (c == Tab)

                {

                    column += 3; Add;

                }

                else if (c == Newline)

                {

                    line++; column = 0; Add;

                }

                else if( next_state[c] == 0 )

                {

                    sprintf(err_msg, "Invalid char : 0x%x", c);

                    yyerror(err_msg);

                    Add;

                }else 

                    state = next_state[c];

                    break;

            case 1: c = NextChar(); column++;

                if( c == 'f')    //    if

                {

                    state = 21, Add; 

                    break;

                }

                else    // letter

                {

                    c = 'i', retract1, column--, state = 3;

                    break;

                }

            case 2: c = NextChar(); column++// else

                      if(c == 'l')

                      {

                          c = NextChar(); column++; Add;

                          if(c == 's')

                          {

                              c = NextChar(); column++; Add;

                              if( c == 'e')

                                  return TK_ELSE;

                              else

                              {

                                  state = 6; Add; break;

                              }

                          }

                          else

                          {

                              state = 6; Add; break;

                          }

                      }

                      else

                      {

                          state = 6; Add; break;

                      }

            case 3: yytext[yylen++= c; column++; Add; // letter

                      while(c = NextChar())

                      {

                            if(isalnum(c))

                                yytext[yylen++= c, column++, Add;

                            else

                            {

                                yytext[yylen] = Sentinel; retract1; column--;

                                return TK_ID;

                            }

                      }

            case 4: yytext[yylen++= c; column++; Add; // digit

                      while(c = NextChar())

                      {

                            if(isdigit(c))

                            {

                                yytext[yylen++= c; column++; Add;

                            }

                            else

                            {

                                pc = Sentinel; retract1; column--;

                                yylval = atoi(yytext);

                                return TK_NUM;

                            }

                      }

            case 5: c = NextChar(); column++; Add; // =

                      if(c == '='// ==

                          state = 7;

                      else

                      {

                          retract1; column--; state = 6;

                      }

                      break;

            case 6return TK_ASSIGN; // = 

            case 7return TK_EQ; // ==

            case 8:  // !

                        if(NextChar() == '=')

                            return TK_NE; // !=

                        else

                        {

                            sprintf(err_msg, "Invalid character : 0x%x", c);

                            yyerror(err_msg);

                            c = NextChar();

                            state = next_state[c];

                        }

                        break;

            case 9: c = NextChar(); column++// >

                      if(c == Space) 

                          state = 10// >

                      else if (c == '=')

                          state = 11// >=

                      else

                      {

                          retract1; column--;

                      }

                      break;

            case 10return TK_GT; // >

            case 11return TK_GE; // >=

            case 12 : c = NextChar(); column++// <

                      if(state == Space) 

                          state = 13// <

                      else if (state == '=')

                          state = 14// <=

                      else

                      {

                          retract1; column--;

                      }

                      break;

            case 13return TK_LT; // <

            case 14return TK_LE; // <=

            case 15return TK_LPAR; // (

            case 16return TK_RPAR; // )

            case 17return TK_SCOLON; // ;

            case 18if(c == 0// EOF

                            return TK_EOF;

            case 19: c = NextChar(); column++// /

                        if(c == '*'// /*

                        {

                            while(c = NextChar())

                            {

                                Add;

                                if(c == '*'// /* *

                                {

                                    state = 20break;

                                }

                            }

                            break;

                        }

                        else if (c == '/')

                        {

                            Add;

                            return;

                        }

            case 20: c = NextChar(); column++;

                        if(c == '/'// /* */

                        {

                            Add; state = 19return;

                        }

            case 21return TK_IF;

/* FILL IN THIS BLANK */

 

      }   /* end of state */

   }      /* end of while */

}         /* end of function get_tok */

 

/* prints the current line, column error message */

void yyerror(char *msg)

{

   fprintf(stderr, "line:%3d, column:%3d %s\n", line, column, msg); 

}

'대학교 > 4.컴파일러' 카테고리의 다른 글

[03] lex.l  (0) 2019.04.14
[01] wc  (0) 2019.03.20