Saturday, October 20, 2007

Lexical Analyser Project Source Code.

#include
#include
#include
#include
#include

#define INT 01
#define CHAR 02
#define FLOAT 03
#define LONG 04
#define DOUBLE 05
#define STRUCT 06
#define PRINTF 07
#define SCANF 08
#define FOR 09
#define DO 10
#define WHILE 11
#define IF 12
#define ELSE 13
#define CONTINUE 14
#define BREAK 15
#define DEFAULT 16
#define CASE 17
#define GOTO 18
#define SWITCH 19
#define OB 20
#define CB 21
#define LP 22
#define RP 23
#define EQUALTO 24
#define LT 25
#define GT 26
#define PLUS 27
#define MINUS 28
#define MUL 29
#define DIV 30
#define COLON 31
#define SCOLON 32
#define TERNARY 33
#define COMMA 34
#define PLINE 35
#define CARAT 36
#define DIGIT 37
#define ID 38
#define OSB 39
#define CSB 40
#define HSH 41
#define PER 42
#define MAIN 43
#define VOID 44
#define NOTEQU 45
#define AND 46
#define BSH 47
#define DQ 48
#define SQ 49
#define SPACE 50
#define TAB 51
#define NUM_ENTRIES 52
int token_no=0;

struct table
{
char name[25];
int token;
};

struct table ref_tab[NUM_ENTRIES] = {

{"int", INT},
{"char", CHAR},
{"float", FLOAT},
{"long", LONG},
{"double", DOUBLE},
{"struct", STRUCT},
{"printf", PRINTF},
{"do", DO},
{"while", WHILE},
{"if", IF},
{"else", ELSE},
{"continue", CONTINUE},
{"break", BREAK},
{"default", DEFAULT},
{"case", CASE},
{"goto", GOTO},
{"switch", SWITCH},
{"{", OB},
{"}", CB},
{"(", LP},
{")", RP},
{"=", EQUALTO},
{"<", LT},
{">", GT},
{"+", PLUS},
{"-", MINUS},
{"*", MUL},
{"/", DIV},
{":", COLON},
{";", SCOLON},
{"?", TERNARY},
{",", COMMA},
{"|", PLINE},
{"^", CARAT},
{"[", OSB},
{"]", CSB},
{"#", HSH},
{"%", PER},
{"main", MAIN},
{"void", VOID},
{"!=", NOTEQU},
{"&", AND},
{"\\",BSH},
{"\"",DQ},
{"'", SQ},
};

char out_token[NUM_ENTRIES+1][40]=
{" ",
"INT",
"CHAR",
"FLOAT",
"LONG",
"DOUBLE",
"STRUCT",
"PRINTF",
"SCANF",
"FOR",
"DO",
"WHILE",
"IF",
"ELSE",
"CONTINUE",
"BREAK",
"DEFAULT",
"CASE",
"GOTO",
"SWITCH",
"OB",
"CB",
"LP",
"RP",
"EQUALTO",
"LT",
"GT",
"PLUS",
"MINUS",
"MUL",
"DIV",
"COLON",
"SCOLON",
"TERNARY",
"COMMA",
"PLINE",
"CARAT",
"DIGIT",
"ID",
"OSB",
"CSB",
"HSH",
"PER",
"MAIN",
"VOID",
"NOTEQU",
"AND",
"BSH",
"DQ",
"SQ"
};



int search(char lexeme[])
{
int i;

for ( i = 0; i < NUM_ENTRIES; i++)
{
if (strcmp(lexeme,ref_tab[i].name) == 0)
return ref_tab[i].token;
}

return ID;
}

void output(int num)
{
printf("\n%s\t\t\t%d\t%d\n", out_token[num],num,++token_no);
}

void lexical(char store[], int store_len)
{
int i,j,line=2;
char ch,lexeme[10],next;

for (i = 0; i < store_len;)
{

ch = store[i];
switch(ch)
{
case' ':
i++;
printf("SPACE REMOVED\n");
break;
case' ':
i++;
printf("TAB REMOVED\n");
break;
case '{':
i++;
output(OB);
break;
case '}':
i++;
output(CB);
break;
case '(':
i++;
output(LP);
break;
case ')':
i++;
output(RP);
break;
case '=':
i++;
output(EQUALTO);
break;
case '<':
i++;
output(LT);
break;
case '>':
i++;
output(GT);
break;
case '!=':
i++;
output(NOTEQU);
break;
case '+':
i++;
output(PLUS);
break;
case '-':
i++;
output(MINUS);
break;
case '*':
i++;
output(MUL);
break;

case '/':
i++;
next=store[i];
if(next=='*')
{
i++;
while(store[i]!='*')i++;
i++;
if(store[i]=='/')
// else goto
printf("\nComments removed\n");
i++;
}
else if(next=='/')
{
i++;
while(store[i]!='\n')i++;
i++;
printf("\nComments removed\n");
}


else
output(DIV);
break;

case ':':
i++;
output(COLON);
break;

case ';':
i++;
output(SCOLON);
break;

case '?':
i++;
output(TERNARY);
break;
case '\"':
i++;
output(DQ);
break;

case '\'':
i++;
output(SQ);
break;
case ',':
i++;
output(COMMA);
break;
case '|':
i++;
output(PLINE);
break;
case '^':
i++;
output(CARAT);
break;
case '[':
i++;
output(OSB);
break;
case ']':
i++;
output(CSB);
break;
case '#':
i++;
output(HSH);
break;
case '%':
i++;
output(PER);
break;
case '!':
i++;
output(NOTEQU);
break;
case '&':
i++;
output(AND);
break;
case '\\':
i++;
output(BSH);
break;
default:
if (isalpha(store[i]))
{
j = 0;
while(isalpha(store[i]))
lexeme[j++] = store[i++];
lexeme[j] = '\0';
output(search(lexeme));
break;
}
else if(isdigit(store[i]))
{
j = 0;
while (isdigit(store[i]))
lexeme[j++] = store[i++];
lexeme[j] = '\0';
output(DIGIT);
break;
}
else if (store[i]=='\n')
{
i++;
printf("Line=%d\n",line++);
printf("____\n");
}
else if (store[i]=='\t' || store[i]==' ')
i++;
else
i++, printf("Invalid symbol\n");
}

}
}

void main()
{
FILE *fp1; /* Source file pointer */

int ch,i; /* used to store character read */
char store[4000];
int store_len;
char file[15];

printf("Enter file name with .c as the extension\n");
scanf("%s",file);

fp1 = fopen(file, "r");

if (fp1 == NULL)
{
printf("The source file can not be opened for reading\n");
exit(0);
}


/* read till end of file is encountered */
i = 0;
while ( (ch = getc(fp1)) != EOF)
{
store[i++] = ch;
}

store_len = i;


/* close the input file */
fclose(fp1);
printf("SYMBOL\t\t\tVALUE\tNUMBER\n");
printf("______\t\t\t_____\t______\n\n");
printf("Line=1\n");
printf("____\n");

lexical(store, store_len);
}