簡體   English   中英

搜索整個數組以查找字符序列的C程序

[英]C-program that searches through an array to find a sequence of characters

我只是C編程的初學者。 請在以下問題中幫助我。

問題:一個程序搜索包含給定字符序列的給定數組。 這些字符被限制為字母A,G,T或C。序列中的最后一個字符被設置為代碼0,以便易於檢測結尾。

在這里找不到我在做什么錯,但是一直出錯。

/*A program that searches through a given array that contains a sequence of characters. These characters are restricted 
to be the letters A, G, T, or C. The last character in the sequence is set to be the code 0, so that the end is easily
detected. That array should be declared and initialized.*/

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
void input_sequence(int length,char input[]);
void search(char C[],char DNA[],int length);

int main(void) {
    //Given array
    char DNA[] = {'A', 'G', 'C', 'G', 'G', 'G', 'A', 'C', 'C', 'G', 'T', 'C', 
          'C', 'C', 'G', 'A', 'C', 'A', 'T', 'T', 'G', 'A', 'T', 'G', 
          'A', 'A', 'G', 'G', 'G', 'T', 'C', 'A', 'T', 'A', 'G', 'A', 
          'C', 'C', 'C', 'A', 'A', 'T', 'A', 'C', 'G', 'C', 'C', 'A', 
          'C', 'C', 'A', 'C', 'C', 'C', 'C', 'A', 'A', 'G', 'T', 'T', 
          'T', 'T', 'C', 'C', 'T', 'G', 'T', 'G', 'T', 'C', 'T', 'T', 
          'C', 'C', 'A', 'T', 'T', 'G', 'A', 'G', 'T', 'A', 'G', 'A', 
          'T', 'T', 'G', 'A', 'C', 'A', 'C', 'T', 'C', 'C', 'C', 'A', 
          'G', 'A', 'T', 'G', '\0'};
    int length,i=0,k;
    /*Program should repeatedly ask the user for two things: the length of a search sequence,
    and the search sequence itself*/
    /*The program should terminate when the length of the input sequence is zero or less*/
    do{
        printf("Enter length of DNA sequence to match: ");
        scanf("%d",&length);
        Search sequence array
        char input[length];
        //input sequence length has to be >0
        if(length>0){
            input_sequence(length,input[]);
            /*The elements of the search sequence may take on one of five characters: A,G,T,C and *. The
            meaning of the ‘*’ character is that it matches all four nucleotides: A,G,T and C.*/
            for(i=0; i<length; i++){
                k=0;
                if(input[i]!='A'&&input[i]!='G'&&input[i]!='T'&&input[i]!='C'&&input[i]!='*'){
                    printf("Erroneous character input ’%c’ exiting\n",input[i]);
                    k=1;
                }
                if(k==1)
                    break;             
            }
            if(k==0){
                search(input,DNA,length);
            }
            k=0;
        }
    }
    while(length>0);
    printf("Goodbye");

    return (EXIT_SUCCESS);
}

//Function to search for input sequence in the given array
void search(char C[],char DNA[],int length){
    int numFound = 0,i,foundIndex;
    bool found = false;
    for(i=0;i<length && !found;i++) {
        int n=0;
        char temp=C[i];
        if (temp==DNA[i]) {
            numFound++;
            if (numFound == length) {
                found = true;
                foundIndex = i - (length-1);
            }
        }
        else numFound = 0;
    }
    if (found)
        printf("Match of search sequence found at element %d\n",foundIndex);   
}

void input_sequence(int length,char input[]){
    int i;
    printf("Enter %d characters (one of AGTC*) as a search sequence: ",length);
    for(i=0; i<length; i++){
        scanf(" %c", &input[i]);
        }
}

下面是使用GNU C庫regexp的示例:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <regex.h>

void search(const char *regexp_str, const char *DNA, int length)
{
    int reti;
    const char *p = DNA;
    const int n_matches = 5;
    regmatch_t m[n_matches];
    regex_t regex;
    (void)length;

    reti = regcomp(&regex, regexp_str, 0);
    if(reti) {
        printf("Could not compile regex: %s\n", regexp_str);
        return;
    }

    while(1) {//based on http://www.lemoda.net/c/unix-regex/
        int nomatch = regexec(&regex, p, n_matches, m, 0);
        if(nomatch) {
            printf ("No more matches.\n");
            return;
        }
        if(m[0].rm_so != -1) {
            int start = m[0].rm_so + (p - DNA);
            int finish = m[0].rm_eo + (p - DNA);
            printf("'%.*s' (bytes %d:%d)\n",
                    m[0].rm_eo - m[0].rm_so, m[0].rm_so + p,
                    start, finish);
        }
        p += m[0].rm_eo;
    }
    regfree(&regex);
}

int main(void) {
    const char *DNA = "AGCGGGACCGTCCCGACATTGATGAAGGGTCATAGACCCA"
                      "ATACGCCACCACCCCAAGTTTTCCTGTGTCTTCCATTGAG"
                      "TAGATTGACACTCCCAGATG";
    while(1) {
        int length;
        char input[256];

        printf("Enter length of DNA sequence to match: ");
        fgets(input, sizeof(input), stdin);
        length = strtol(input, NULL, 10);
        if(length <= 0) {//input sequence length has to be >0
            break;
        } else if(length >= (int)(sizeof(input) - 1)) {
            printf("ERROR: Too big length=%d, max supported length=%d\n",
                   length, sizeof(input) - 1);
            break;
        }

        while(1) {
            const char *validInputs = "AGTC*";
            printf("Enter %d characters (one of AGTC*) as a search sequence: ",length);
            fgets(input, sizeof(input), stdin);

            int valid = 1;
            for(int i = 0; i < length; i++) {
                if(strchr(validInputs, input[i]) == NULL) {
                  printf("Erroneous character input '%c' in '%s'\n", input[i], input);
                  valid = 0;
                  break;
                }
            }
            if(valid) {
                break;
            }
        }
        input[length] = 0;
        //substitute '*' on '.' for using in regexp
        char *ptr = input;
        while((ptr = strchr(ptr, '*')) != NULL) {
            *ptr = '.';
        };
        printf("search for: %s\n", input);
        search(input, DNA, length);
    }
    printf("Goodbye\n");
    return (EXIT_SUCCESS);
}

在另外使用C ++ 11 std::regex (僅需要更改search() ):

#include <regex>
#include <iterator>

void search(const char *C, const char *DNA, int )
{
    std::regex regex(C);
    std::string str(DNA);
    auto words_begin = std::sregex_iterator(str.begin(), str.end(), regex);
    auto words_end = std::sregex_iterator();
    printf("Found %d matches:\n", std::distance(words_begin, words_end));
    for(std::sregex_iterator i = words_begin; i != words_end; ++i) {
        std::smatch match = *i;
        printf(" match: %s, pos=%d\n", match.str().c_str(), match.position());
    }
}

在您的主要功能中,此行是一個問題:

search(input[],DNA[],length);

參數1和2,input []和DNA []不正確。 該符號用於聲明和初始化數組。 調用這些數組時,除非您希望該數組中有特定元素,否則應將方括號括起來。 嘗試將其重寫為:

search(input, DNA, length);

另外,在do while循環的結尾,您缺少結尾花括號。

基本思想是掃描數組,比較字符,直到找到所有匹配項。 要實現您的方法,可以有兩個指針,一個指針最初指向DNA陣列,另一個指針指向您的目標陣列。 然后,您比較兩個字符,如果指針匹配,則將指針向前移動一步。 如果匹配失敗,則將目標數組指針重置為第一個字符,並將DNA指針向前移動一步。 重新啟動這些過程,直到所有匹配。 您可以看一下一種非常有效的算法,即Boyer-Moore字符串搜索算法

如果您不想自己實現算法,則有一個簡單的內置函數strstr() 您將兩個數組傳遞給它,它將返回第一個出現的位置。

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM