简体   繁体   中英

Lex program to count number of lines, characters, digits, and key words

I have been working on developing a lex scanner however when I feed it my input file it is producing the wrong output. Here is my source code:

%{
#include <stdio.h>

int NumberOfLines=0;
int NumberOfChar=0;
int NumberOfIntegers=0;
int KWCount=0;
int NumberOfComments=0;
%}

DIGIT   [0-9]*
ID  [a-z][a-z0-9]*
%x COMMENT
%option noyywrap
%%

^[\t]*"/*" {BEGIN COMMENT;}
^[\t]*"/*".*"*/"[\t]*\n {NumberOfComments++;}

<COMMENT>"*/"[\t]*\n {BEGIN 0; NumberOfComments++;}
<COMMENT>"*/" {BEGIN 0;}
<COMMENT>\n {NumberOfComments++;}
<COMMENT>.\n {NumberOfComments++;}

\n {NumberOfLines++, NumberOfChar++; NumberOfChar +=strlen(yytext);}
. {NumberOfChar++; NumberOfChar +=strlen(yytext);}



{DIGIT}     {NumberOfIntegers++; NumberOfChar +=strlen(yytext); }


{DIGIT}+"."{DIGIT}* {
    printf("A flot: %s (%g) \n", yytext, atof(yytext));
    NumberOfChar +=strlen(yytext); 
    }

if|else|while|return    {
    printf("A keyword: %s\n", yytext); KWCount++;
    NumberOfChar +=strlen(yytext); 
    }

{ID}        {
    printf("An identifier: %s\n", yytext);
    NumberOfChar +=strlen(yytext); 
    }
"{"[^}\n]*"}"   {
    /*each up one-line comments*/
    NumberOfChar +=strlen(yytext);
    }

%%
int main(int argc, char **argv){
    ++argv, --argc; /*skip over program name */
    if (argc > 0)
        yyin = fopen(argv[0], "r"); 
    else
        yyin = stdin; 
    yylex();
    printf("Character count: %d",NumberOfChar);
    printf("\n");
    printf("Number count: %d",NumberOfIntegers);
    printf("\n");
    printf("Keyword count: %d",KWCount);
    printf("\n");
    printf("Line count: %d",NumberOfLines);
    printf("\n");
    printf("Comment count: %d", NumberOfComments);
    printf("\n"); 
    return 0; 
}

Whenever I run my input file with the source it give me the wrong output. For instance the output of the file should be:

Output:

Number of Keywords: 3

Number of Characters: 196

Number of Lines: 17

Number of Digits: 3

However the output it is currently producing is:

Output:

Number of keywords: 0

Number of Characters: 3

Number of Lines: 7

Number of Digits: 0   

I suspect it has to do with my regular expressions, any help would be appreciated as I am still learning regex!

Here is my input file contents:

/*comment 1*/
/*comment
  comment 2 
  */
  /*comment 3*
   */if this is a line
{comment 4}
int i = 789; 
int j = 689;
if i == 172 then
 {comment 5}
else
{comment 6}
{comment 7}
/*8 comments
 *
 */ 
end

Here's some mostly working code, closely based on your code.

%{
#include <stdio.h>

int NumberOfLines=0;
int NumberOfChar=0;
int NumberOfIntegers=0;
int KWCount = 0;
int IDCount = 0;
int RCCount = 0;
int OCCount = 0;
int DTCount = 0;
int FLCount = 0;
%}

%option noyywrap
%option noinput
%option nounput

DIGIT   [0-9]*
ID  [a-z][a-z0-9]*

%%

\n {NumberOfLines++; NumberOfChar++; RCCount += strlen(yytext); }
. {NumberOfChar++; DTCount++; RCCount++; printf(" '%c'", yytext[0]); }

{DIGIT}     {NumberOfIntegers++; RCCount += strlen(yytext); }

{DIGIT}+"."{DIGIT}* {
    printf("\nA float: %s (%g) \n", yytext, atof(yytext)); 
    RCCount += strlen(yytext);
    FLCount++;
    }

if|else|while|return    {
    printf("\nA keyword: %s\n", yytext); 
    KWCount++;
    RCCount += strlen(yytext);
    }

{ID}        {
    printf("\nAn identifier: %s\n", yytext); 
    IDCount++;
    RCCount += strlen(yytext);
    }
"{"[^}\n]*"}"   {
    RCCount += strlen(yytext);
    OCCount += strlen(yytext);
    }

%%
int main(int argc, char **argv){
    ++argv, --argc; /*skip over program name */
    if (argc > 0)
        yyin = fopen(argv[0], "r"); 
    else
        yyin = stdin; 
    yylex();
    printf("Character count: %d\n", NumberOfChar);
    printf("Number count:    %d\n", NumberOfIntegers);
    printf("Keyword count:   %d\n", KWCount);
    printf("Line count:      %d\n", NumberOfLines);
    printf("ID count:        %d\n", IDCount);
    printf("Dot count:       %d\n", DTCount);
    printf("Raw count:       %d\n", RCCount);
    printf("Float count:     %d\n", FLCount);
    printf("Other count:     %d\n", OCCount);
    printf("\n"); 
    return 0; 
}

When run on the data file:

/*commEnt 1*/
/*COMMENT
  commEnt 2 
  */
  /*commEnt 3*
   */if this is a linE
{commEnt 4}
int i = 789; 
int j = 689;
if i == 172 thEn
 {commEnt 5}
ElsE
{commEnt 6}
{commEnt 7}
float 12.34
/*8 commEnts
 *
 else
 return
 while
 the
 going
 is
 good
 */ 
end

I get the output:

 '/' '*'
An identifier: comm
 'E'
An identifier: nt
 ' ' '1' '*' '/' '/' '*' 'C' 'O' 'M' 'M' 'E' 'N' 'T' ' ' ' '
An identifier: comm
 'E'
An identifier: nt
 ' ' '2' ' ' ' ' ' ' '*' '/' ' ' ' ' '/' '*'
An identifier: comm
 'E'
An identifier: nt
 ' ' '3' '*' ' ' ' ' ' ' '*' '/'
A keyword: if
 ' '
An identifier: this
 ' '
An identifier: is
 ' ' 'a' ' '
An identifier: lin
 'E'
An identifier: int
 ' ' 'i' ' ' '=' ' ' ';' ' '
An identifier: int
 ' ' 'j' ' ' '=' ' ' ';'
A keyword: if
 ' ' 'i' ' ' '=' '=' ' ' ' '
An identifier: th
 'E' 'n' ' ' 'E'
An identifier: ls
 'E'
An identifier: float
 ' '
A float: 12.34 (12.34) 
 '/' '*' '8' ' '
An identifier: comm
 'E'
An identifier: nts
 ' ' '*' ' '
A keyword: else
 ' '
A keyword: return
 ' '
A keyword: while
 ' '
An identifier: the
 ' '
An identifier: going
 ' '
An identifier: is
 ' '
An identifier: good
 ' ' '*' '/' ' '
An identifier: end
Character count: 115
Number count:    3
Keyword count:   5
Line count:      26
ID count:        21
Dot count:       89
Raw count:       258
Float count:     1
Other count:     44

The output from wc is:

$ wc data.2
      26      49     258 data.2
$

The 'raw count' of characters matches the character count from wc ; the line count matches too. The number of integers, floats, keywords and identifiers all looks correct, given that upper case letters are counted in the 'dot characters'. You can work out whether there are other problems; I think that the count of integers is wrong, but I'm not sure why.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM