简体   繁体   English

字符串中最长的单词

[英]Longest word in string

How can I find length of longest word in const char string without using auxiliary string?如何在不使用辅助字符串的情况下找到 const char 字符串中最长单词的长度?

#include <stdio.h>

int longest(const char *str) {
    int max = 0, prev = 0, final_max = 0;
    
    while (*str != '\0') {
        prev = max;
        max = 0;
    
        while (1) {
            if (*str != ' ') 
                max++;

            str++;

            if (*str == ' ') 
                break;
        }
    
        if (prev > max)
            final_max = prev;
    
        str++;
  }

  return final_max;
}

void main() {
    const char str[] = "Longest word in string";
    printf("%d", longest(str));
}

This prints 4 as length of longest word.这将打印4作为最长单词的长度。 Could you help me fix this?你能帮我解决这个问题吗?

You can find the longest word in linear time.您可以在线性时间内找到最长的单词。 This is surely optimal since any algorithm has to process every character at least once.这肯定是最优的,因为任何算法都必须至少处理每个字符一次。

The basic idea is to loop over every character of the string, keeping track of the position of the last space (or any other word separator).基本思想是遍历字符串的每个字符,跟踪最后一个空格(或任何其他单词分隔符)的 position。 Everytime the current character is a space we update the answer.每次当前字符是空格时,我们都会更新答案。

Note that we need to add an extra check at the end for the case where the longest word is the last in the string (and thus is not necessarily followed by a space).请注意,我们需要在末尾添加一个额外的检查,以防最长的单词是字符串中的最后一个(因此不一定后跟空格)。

Here's a simple implementation:这是一个简单的实现:

#include <stdio.h>

size_t longest(const char *str) {
  size_t i, last=-1, ans=0;
  for (i = 0; str[i] != '\0'; i++)
    if (str[i] == ' ') {
      if (ans < i-last) ans = i-last-1;
      last = i;
    }
  if (ans < i-last) ans = i-last-1;
  return ans;
}

void main(){
  printf("%zu\n", longest("Longest word in string")); // 7
}

Simply walk the string once.只需走一次绳子。

After walking past white spaces, note the beginning of a word.走过空格后,注意单词的开头。 After walking through a word, note its length and compare to the current longest length.遍历一个单词后,记下它的长度并与当前最长的长度进行比较。 Do this until a null character detected.这样做直到检测到 null 字符。

Use size_t , rather than int to handle long strings.使用size_t而不是int来处理字符串。

Access the characters via an unsigned char* to properly use isspace() .通过unsigned char*访问字符以正确使用isspace()

Example:例子:

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>

size_t longest1(const char *str) {
  // Print up to 30 characters of str - for debug
  printf("<%.30s> ", str);

  size_t longest_length = 0;
  // Access the string as if it had unsigned chars.
  const unsigned char *s = (const unsigned char*) str;

  while (isspace(*s)) {
    s++;
  }

  while (*s) {
    const unsigned char *start = s;
    do {
      s++;
    } while (!isspace(*s) && *s);
    size_t length = (size_t) (s - start);
    if (length > longest_length) {
      longest_length = length;
    }
    while (isspace(*s)) {
      s++;
    }
  }
  return longest_length;
}

Tests测试

int main(void) {
  printf("%zu\n", longest("Longest word in string"));
  printf("%zu\n", longest(""));
  printf("%zu\n", longest(" "));
  printf("%zu\n", longest("  "));
  printf("%zu\n", longest("a"));
  printf("%zu\n", longest(" b"));
  printf("%zu\n", longest("c "));
  printf("%zu\n", longest("dd e"));
  printf("%zu\n", longest(" ff g"));
  printf("%zu\n", longest("hh i "));
  printf("%zu\n", longest("j kk"));
  printf("%zu\n", longest(" l mm"));
  printf("%zu\n", longest("n oo "));
  char *buf = malloc(INT_MAX * 2u);
  if (1 && buf) {
    memset(buf, 'x', INT_MAX * 2u - 1);
    buf[INT_MAX * 2u - 1] = '\0';
    printf("%zu\n", longest(buf));
    free(buf);
  }
}

Output Output

<Longest word in string> 7
<> 0
< > 0
<  > 0
<a> 1
< b> 1
<c > 1
<dd e> 2
< ff g> 2
<hh i > 2
<j kk> 2
< l mm> 2
<n oo > 2
<xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx> 4294967293

I came up with two closely related algorithms, one that only reports the length of the longest word but not where it is located, and the other which does both.我提出了两种密切相关的算法,一种只报告最长单词的长度但不报告它所在的位置,另一种则同时报告这两种算法。 The bookkeeping tracks the longest word found so far and the current word — in the first case, just the length;簿记跟踪到目前为止找到的最长单词和当前单词——在第一种情况下,只是长度; in the second, both the length and the starting point.第二,长度和起点。

Length — no position长度——没有 position

#include <ctype.h>
#include <stdio.h>

static size_t longest(const char *str)
{
    size_t max_len = 0;
    size_t cur_len = 0;
    for (char c = 0; (c = *str) != '\0'; str++)
    {
        if (!isspace(c))
            cur_len++;
        else
        {
            if (cur_len > max_len)
                max_len = cur_len;
            cur_len = 0;
        }
    }
    if (cur_len > max_len)
        max_len = cur_len;
    return max_len;
}

int main(void)
{
    const char *data[] =
    {
        "a",
        "a bc",
        "ab c",
        "a b c",
        "a2345678 b234567 c23456789",
        "a234  b234    c234   ",
        "   a234  b2345  c23456  ",
        "",
        "      ",
        "   xxxxxxxxx1xxxxxxxxx2xxxxxxxxx3xxxxxxxxx4xxxxxxxxx5xxxxxxxxx6   ",
    };
    enum { NUM_DATA = sizeof(data) / sizeof(data[0]) };

    for (size_t i = 0; i < NUM_DATA; i++)
        printf("LW = %3zu: [%s]\n", longest(data[i]), data[i]);

    return 0;
}

Output: Output:

LW =   1: [a]
LW =   2: [a bc]
LW =   2: [ab c]
LW =   1: [a b c]
LW =   9: [a2345678 b234567 c23456789]
LW =   4: [a234  b234    c234   ]
LW =   6: [   a234  b2345  c23456  ]
LW =   0: []
LW =   0: [      ]
LW =  60: [   xxxxxxxxx1xxxxxxxxx2xxxxxxxxx3xxxxxxxxx4xxxxxxxxx5xxxxxxxxx6   ]

Length and position长度和position

#include <ctype.h>
#include <stdio.h>

static size_t longest_word(const char *str, char const **word)
{
    size_t max_len = 0;
    size_t cur_len = 0;
    const char *max_word = 0;
    const char *cur_word = 0;
    for (char c = 0; (c = *str) != '\0'; str++)
    {
        if (!isspace(c))
        {
            if (cur_word == 0)
                cur_word = str;
            cur_len++;
        }
        else
        {
            if (cur_len > max_len)
            {
                max_len = cur_len;
                max_word = cur_word;
            }
            cur_len = 0;
            cur_word = 0;
        }
    }
    if (cur_len > max_len)
    {
        max_len = cur_len;
        max_word = cur_word;
    }
    else if (max_word == 0)
        max_word = str;
    *word = max_word;
    return max_len;
}

int main(void)
{
    const char *data[] =
    {
        "a",
        "a bc",
        "ab c",
        "a b c",
        "a2345678 b234567 c234567890",
        "a234  b234    c234   ",
        "   a234  b2345  c23456  ",
        "",
        "      ",
        " x xxxx-xxxx1xxxx-xxxx2yyyy-yyyy3yyyy-yyyy4  x   ",
        "   xxxxxxxxx1xxxxxxxxx2xxxxxxxxx3xxxxxxxxx4xxxxxxxxx5xxxxxxxxx6   ",
    };
    enum { NUM_DATA = sizeof(data) / sizeof(data[0]) };

    for (size_t i = 0; i < NUM_DATA; i++)
    {
        const char *word;
        size_t len = longest_word(data[i], &word);
        printf("LW = %2zu: [%.*s] in [%s]\n", len, (int)len, word, data[i]);
    }

    return 0;
}

Output: Output:

LW =  1: [a] in [a]
LW =  2: [bc] in [a bc]
LW =  2: [ab] in [ab c]
LW =  1: [a] in [a b c]
LW = 10: [c234567890] in [a2345678 b234567 c234567890]
LW =  4: [a234] in [a234  b234    c234   ]
LW =  6: [c23456] in [   a234  b2345  c23456  ]
LW =  0: [] in []
LW =  0: [] in [      ]
LW = 40: [xxxx-xxxx1xxxx-xxxx2yyyy-yyyy3yyyy-yyyy4] in [ x xxxx-xxxx1xxxx-xxxx2yyyy-yyyy3yyyy-yyyy4  x   ]
LW = 60: [xxxxxxxxx1xxxxxxxxx2xxxxxxxxx3xxxxxxxxx4xxxxxxxxx5xxxxxxxxx6] in [   xxxxxxxxx1xxxxxxxxx2xxxxxxxxx3xxxxxxxxx4xxxxxxxxx5xxxxxxxxx6   ]

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM