简体   繁体   中英

Reading words separately from file

I'm trying to make a program that scans a file containing words line by line and removes words that are spelled the same if you read them backwards (palindromes)

This is the program.c file:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "header.h"

int main(int argc, char **argv)
{
if(argc != 3)
{
   printf("Wrong parameters");
   return 0;
}
FILE *data;
FILE *result;
char *StringFromFile = (char*)malloc(255);
char *word = (char*)malloc(255);

const char *dat = argv[1];
const char *res = argv[2];

data = fopen(dat, "r");
result =fopen(res, "w");

while(fgets(StringFromFile, 255, data))
{
    function1(StringFromFile, word);
    fputs(StringFromFile, result);
}
free(StringFromFile);
free (word);
fclose(data);
fclose(result);
return 0;
}

This is the header.h file:

#ifndef HEADER_H_INCLUDEC
#define HEADER_H_INCLUDED

void function1(char *StringFromFile, char *word);
void moving(char *StringFromFile, int *index, int StringLength, int WordLength);

#endif

This is the function file:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "header.h"

void function1(char *StringFromFile, char *word)
{
int StringLength = strlen(StringFromFile);
int WordLength;
int i;
int p;
int k;
int t;
int m;
int match;
for(i = 0; i < StringLength; i++)
{   k=0;
    t=0;
    m=i;
if (StringFromFile[i] != ' ')
{   while (StringFromFile[i] != ' ')
    {
        word[k]=StringFromFile[i];
        k=k+1;
        i=i+1;
    }
//printf("%s\n", word);
WordLength = strlen(word)-1;
p = WordLength-1;
match=0;
while (t <= p)
{
    if (word[t] == word[p])
        {
            match=match+1;
        }
    t=t+1;
    p=p-1;
}
if ((match*2) >= (WordLength))
{
    moving(StringFromFile, &m, StringLength, WordLength);
}
}
}

}

void moving(char *StringFromFile, int *index, int StringLength, int WordLength)
{   int i;
    int q=WordLength-1;
    for(i = *index; i < StringLength; i++)
{
    StringFromFile[i-1] = StringFromFile[i+q];
}
*(index) = *(index)-1;
}

It doesn't read each word correctly, though.

This is the data file:

abcba rttt plllp
aaaaaaaaaaaa
ababa
abbbba
kede

These are the separate words the program reads:

abcba
rttta
plllp
aaaaaaaaaaaa
ababa
abbbba
kede

This is the result file:

abcba rtttp



kede

It works fine if there is only one word in a single line, but it messes up when there are multiple words. Any help is appreciated.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "header.h"

# define MAX 255

int Find_Number_Words_in_Line( char str[MAX] )
{
   char *ptr;
   int count = 0;
   int j;

   /* advance character pointer ptr until end of str[MAX] */
   /* everytime you see the space character, increase count */
   /* might not always work, you'll need to handle multiple space characters before/between/after words */

   ptr = str;
   for ( j = 0; j < MAX; j++ )
   {
      if ( *ptr == ' ' )
         count++;
      else if (( *ptr == '\0' ) || ( *ptr == '\n' ))
         break;

      ptr++;
   }

   return count;
}

void Extract_Word_From_Line_Based_on_Position( char line[MAX], char word[MAX], const int position )
{
   char *ptr;

   /* move pointer down line[], counting past the number of spaces specified by position */
   /* then copy the next word from line[] into word[] */
}


int Is_Palindrome ( char str[MAX] )
{
   /* check if str[] is a palindrome, if so return 1, else return 0 */
}


int main(int argc, char **argv)
{
   FILE *data_file;
   FILE *result_file;
   char *line_from_data_file = (char*)malloc(MAX);
   char *word = (char*)malloc(MAX);
   const char *dat = argv[1];
   const char *res = argv[2];
   int j, n;

   if (argc != 3)
   {
      printf("Wrong parameters");
      return 0;
   }

   data_file = fopen(dat, "r");
   result_file = fopen(res, "w");

   fgets( line_from_data_file, MAX, data_file );
   while ( ! feof( data_file ) )
   {
       /*
          fgets returns everything up to newline character from data_file,
          function1 in original context would only run once for each line read
          from data_file, so you would only get the first word

             function1( line_from_data_file, word );
             fputs( word, result_file );
             fgets( line_from_data_file, MAX, data_file );

          instead try below, you will need to write the code for these new functions
          don't be afraid to name functions in basic English for what they are meant to do
          make your code more easily readable
       */

      n = Find_Number_Words_in_Line( line_from_data_file );
      for ( j = 0; j < n; j++ )
      {
         Extract_Word_From_Line_Based_on_Position( line_from_data_file, word, n );
         if ( Is_Palindrome( word ) )
            fputs( word, result_file );  /* this will put one palindrome per line in result file */
      }

      fgets( line_from_data_file, MAX, data_file );
   }
   free( line_from_data_file );
   free( word );

   fclose( data_file );
   fclose( result_file );

   return 0;
}

To follow up from the comments, you may be overthinking the problem a bit. To check whether each word in each line of a file is a palindrome, you have a 2 part problem. (1) reading each line ( fgets is fine), and (2) breaking each line into individual words (tokens) so that you can test whether each token is a palindrome.

When reading each line with fgets , a simple while loop conditioned on the return of fgets will do. eg, with a buffer buf of sufficient size ( MAXC chars), and FILE * stream fp open for reading, you can do:

while (fgets (buf, MAXC, fp)) { /* read each line */
    ...                         /* process line */
}

(you can test the length of the line read into buf is less than MAXC chars to insure you read the complete line, if not, any unread chars will be placed in buf on the next loop iteration. This check, and how you want to handle it, is left for you.)

Once you have your line read, you can either use a simple pair of pointers (start and end pointers) to work your way through buf , or you can use strtok and let it return a pointer to the beginning of each word in the line based on the set of delimiters you pass to it. For example, to split a line into words, you probably want to use delimiters like " \\t\\n.,:;!?" to insure you get words alone and not words with punctuation (eg in the line "sit here." , you want "sit" and "here" , not "here." )

Using strtok is straight forward. On the first call, you pass the name of the buffer holding the string to be tokenized and a pointer to the string containing the delimiters (eg strtok (buf, delims) above), then for each subsequent call (until the end of the line is reached) you use NULL as name of the buffer (eg strtok (NULL, delims) ) You can either call it once and then loop until NULL is returned, or you can do it all using a single for loop given that for allows setting an initial condition as part of the statement, eg, using separate calls:

char  *delims = " \t\n.,:;";    /* delimiters */
char *p = strtok (buf, delims); /* first call to strtok */

while ((p = strtok (NULL, delims))) {  /* all subsequent calls */
    ... /* check for palindrome */
}

Or you can simply make the initial call and all subsequent calls in a for loop:

/* same thing in a single 'for' statement */
for (p = strtok (buf, delims); p; p = strtok (NULL, delims)) {
    ... /* check for palindrome */
}

Now you are to the point you need to check for palindromes . That is a fairly easy process. Find the length of the token, then either using string indexes, or simply using a pointer to the first and last character, work from the ends to the middle of each token making sure the characters match. On the first mismatch, you know the token is not a palindrome. I find a start and end pointer just as easy as manipulating sting indexes, eg with the token in s :

char *ispalindrome (char *s)    /* function to check palindrome */
{
    char *p = s,                   /* start pointer */
        *ep = s + strlen (s) - 1;  /* end pointer  */

    for ( ; p < ep; p++, ep--)  /* work from end to middle */
        if (*p != *ep)          /* if chars !=, not palindrome */
            return NULL;

    return s;
}

If you put all the pieces together, you can do something like the following:

#include <stdio.h>
#include <string.h>

enum { MAXC = 256 };    /* max chars for line buffer */

char *ispalindrome (char *s);

int main (int argc, char **argv) {

    char buf[MAXC] = "",        /* line buffer */
         *delims = " \t\n.,:;"; /* delimiters */
    unsigned ndx = 0;           /* line index */
    FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;

    if (!fp) {  /* validate file open for reading */
        fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
        return 1;
    }

    while (fgets (buf, MAXC, fp)) { /* read each line */
        char *p = buf;   /* pointer to pass to strtok */
        printf ("\n line[%2u]:  %s\n tokens:\n", ndx++, buf);
        for (p = strtok (buf, delims); p; p = strtok (NULL, delims))
            if (ispalindrome (p))
                printf ("  %-16s  - palindrome\n", p);
            else
                printf ("  %-16s  - not palindrome\n", p);
    }
    if (fp != stdin) fclose (fp);

    return 0;
}

char *ispalindrome (char *s)    /* function to check palindrome */
{
    char *p = s, *ep = s + strlen (s) - 1;  /* ptr & end-ptr */

    for ( ; p < ep; p++, ep--)  /* work from end to middle */
        if (*p != *ep)          /* if chars !=, not palindrome */
            return NULL;

    return s;
}

Example Input

$ cat dat/palins.txt
abcba rttt plllp
aaaaaaaaaaaa
ababa
abbbba
kede

Example Use/Output

$ ./bin/palindrome <dat/palins.txt

 line[ 0]:  abcba rttt plllp

 tokens:
  abcba             - palindrome
  rttt              - not palindrome
  plllp             - palindrome

 line[ 1]:  aaaaaaaaaaaa

 tokens:
  aaaaaaaaaaaa      - palindrome

 line[ 2]:  ababa

 tokens:
  ababa             - palindrome

 line[ 3]:  abbbba

 tokens:
  abbbba            - palindrome

 line[ 4]:  kede

 tokens:
  kede              - not palindrome

Look things over and think about what it taking place. As mentioned above, insuring you have read a complete line in each call with fgets should be validated, that is left to you. (but with this input file -- of course it will) If you have any questions, let me know and I'll be happy to help further.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM