简体   繁体   中英

Socket Programming — recv() cannot get all data

I am learning socket programming in C language, and this is an incomprehensible problem I encountered during my study.

Today I am trying to send a HTTP request to my test server which host an Apache example website, then receive the response from test server. Here is a part of my receive code.

unsigned long recv_size = 0;
unsigned long response_size = 4096;
int ret = 0;
char *recv_buff = (char *)malloc(response_size);

while (1)
{
    // ret = recv(socket, recv_buff, response_size, MSG_WAITALL); // cannot get all data
    ret = read(socket, recv_buff, response_size); // same effect as the above
    recv_size += ret;
    if (ret < 0)
        error(strerror(errno));
    else if (ret == 0)
        break; // all data recved
}

The normal result of my test with burpsuit is this.

打嗝结果

But what I received with the C language program was incomplete data.

C程序结果

I searched the reason for one night, but I still did not find a solution for my problem. Whether it is to set the buff to a super large size or any other method, the complete data cannot be accepted at all.

线鲨

The traffic monitored from wireshark is ok, but my program still cannot receive the complete data. What is the problem?

If you know why, please let me know. THX. (o゜▽゜)o☆

UPDATE

The while loop will execute twice, and first time the value of ret is 3343 , and second time is 0 , so the loop will stop here.

数据库

You can get a short read on a socket.

But, your code to handle that has a few issues.

You're allocating a buffer of size response_size . You are always reading that amount instead of reducing the amount read by the amount you've already read on a prior loop iteration.

This can cause you to read past the end of the buffer causing UB (undefined behavior).

Your termination condition is if (ret == 0) . This can fail if another packet arrives "early". You'll never see a ret of 0, because the partial data from the next packet will make it non-zero

Here's the corrected code:

#if 0
unsigned long recv_size = 0;
#endif

unsigned long response_size = 4096;
int ret = 0;
char *recv_buff = (char *) malloc(response_size);

#if 1
unsigned long remaining_size = response_size;
unsigned long offset = 0;
#endif

for (;  remaining_size > 0;  remaining_size -= ret, offset += ret) {
    ret = read(socket, &recv_buff[offset], remaining_size);
    if (ret < 0)
        error(strerror(errno));
}

UPDATE:

The above code corrects some of the issues. But, for a variable length source [such as http ], we don't know how much to read at the outset.

So, we have to parse the headers and look for the "Content-Length" field. This will tell us how much to read.

So, we'd like to have line oriented input for the headers. Or, manage our own buffer

Assuming we can parse that value, we have to wait for the empty line to denote the start of the payload. And, then we can loop on that exact amount.

Here's some code that attempts the header parsing and saving of the payload. I've coded it, but not compiled it. So, you can take it as pseudo code:

unsigned long recv_size = 0;
unsigned long response_size = 4096;
char *recv_buff = malloc(response_size + 1);

// line oriented header buffer
char *endl = NULL;
unsigned long linelen;
char linebuf[1000];

int ret = 0;

// read headers
while (1) {
    // fill up a chunk of data
    while (recv_size < response_size) {
        recv_buff[recv_size] = 0;

        // do we have a line end?
        endl = strstr(recv_buff,"\r\n");
        if (endl != NULL)
            break;

        ret = read(socket, &recv_buff[recv_size], response_size - recv_size);
        if (ret < 0)
            error(strerror(errno));
        if (ret == 0)
            break;

        recv_size += ret;
    }

    // error -- no line end but short read
    if (endl == NULL)
        error(strerror(errno));

    // copy header to work buffer
    linelen = endl - recv_buff;
    memcpy(linebuf,recv_buff,linelen);
    linebuf[linelen] = 0;

    // remove header from receive buffer
    linelen += 2;
    recv_size -= linelen;
    if (recv_size > 0)
        memcpy(recv_buff,&recv_buff[linelen],recv_size);

    // stop on end of headers (back to back "\r\n")
    if ((recv_size >= 2) && (recv_buff[0] == '\r') && (recv_buff[1] == '\n')) {
        memcpy(recv_buff,&recv_buff[2],recv_size - 2);
        recv_size -= 2;
        break;
    }

    // parse line work buffer for keywords ... (e.g.)
    content_length = ...;
}

// save payload to file
while (content_length > 0) {
    // write out prior payload amount
    if (recv_size > 0) {
        write(file_fd,recv_buff,recv_size);
        content_length -= recv_size;
        recv_size = 0;
        continue;
    }

    recv_size = read(socket,recv_buff,response_size);
    if (recv_size < 0)
        error(strerror(errno));
    if (recv_size == 0)
        break;
}

UPDATE #2:

Yeah, it hard to make the pseudo code run, and the returned values are all garbled

Okay, here is a soup-to-nuts working version that I've tested against my own http server.

I had to create my own routines for the parts you didn't post (eg connect , etc.).

At the core, there might have been a minor tweak to the buffer slide code [it was sliding by an extra 2 bytes in one place], but, otherwise it was pretty close to my previous version

// htprcv/htprcv.c -- HTTP receiver

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <error.h>

#include <netdb.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <sys/socket.h>

typedef unsigned char byte;

#define HTPSLIDE(_rmlen) \
    recv_size = htpslide(recv_buff,recv_size,_rmlen)

#define _dbgprt(_fmt...) \
    fprintf(stderr,_fmt)

#if DEBUG || _USE_ZPRT_
#define dbgprt(_lvl,_fmt...) \
    do { \
        if (dbgok(_lvl)) \
            _dbgprt(_fmt); \
    } while (0)
#define dbgexec(_lvl,_expr) \
    do { \
        if (dbgok(_lvl)) \
            _expr; \
    } while (0)
#else
#define dbgprt(_lvl,_fmt...) \
    do { \
    } while (0)
#define dbgexec(_lvl,_expr) \
    do { \
    } while (0)
#endif

#define dbgok(_lvl) \
    opt_d[(byte) #_lvl[0]]
byte opt_d[256];

char *opt_o;

#define HEXMAX      16

// htpconn -- do connect to server
int
htpconn(const char *hostname,unsigned short portno)
{
    struct addrinfo hints, *res;
    struct hostent *hostent;
    int ret;
    char portstr[20];
    int sockfd;

    /* Prepare hint (socket address input). */
    hostent = gethostbyname(hostname);
    if (hostent == NULL)
        error(1,errno,"htpconn: gethostbyname -- %s\n",hostname);

    memset(&hints, 0, sizeof hints);
    hints.ai_family = AF_INET;          // ipv4
    hints.ai_socktype = SOCK_STREAM;    // tcp
    hints.ai_flags = AI_PASSIVE;        // fill in my IP for me

    sprintf(portstr, "%d", portno);
    getaddrinfo(NULL, portstr, &hints, &res);

    sockfd = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
    if (sockfd < 0)
        error(1,errno,"htpconn: socket\n");

    // do the actual connection
    ret = connect(sockfd, res->ai_addr, res->ai_addrlen);
    if (ret < 0)
        error(1,errno,"htprcv: read header\n");

    return sockfd;
}

// htpslide -- slide buffer (strip out processed data)
size_t
htpslide(char *recv_buff,size_t recv_size,int slidelen)
{
    size_t new_size;

    if (slidelen > recv_size)
        slidelen = recv_size;

    new_size = recv_size - slidelen;

    dbgprt(S,"htpslide: slidelen=%d recv_size=%zu new_size=%zu\n",
        slidelen,recv_size,new_size);

    memcpy(&recv_buff[0],&recv_buff[slidelen],new_size);

    return new_size;
}

// _htphex -- dump a line in hex
void
_htphex(unsigned long off,const void *vp,size_t xlen)
{
    const byte *bp = vp;
    int idx;
    int chr;
    char hexbuf[200];
    char alfbuf[200];

    char *hexptr = hexbuf;
    char *alfptr = alfbuf;

    for (idx = 0;  idx < HEXMAX;  ++idx) {
        chr = bp[idx];

        if ((idx % 4) == 0)
            *hexptr++ = ' ';

        if (idx < xlen) {
            hexptr += sprintf(hexptr,"%2.2X",chr);
            if ((chr < 0x20) || (chr > 0x7E))
                chr = '.';
        }
        else {
            hexptr += sprintf(hexptr,"  ");
            chr = ' ';
        }

        *alfptr++ = chr;
    }

    *hexptr = 0;
    *alfptr = 0;

    _dbgprt("  %8.8lX: %s  *%s*\n",off,hexbuf,alfbuf);
}

// htphex -- dump a buffer in hex
void
htphex(const char *buf,size_t buflen,const char *reason)
{
    size_t off = 0;
    size_t xlen;

    if (reason != NULL)
        _dbgprt("htphex: DUMP buf=%p buflen=%zu (from %s)\n",
            buf,buflen,reason);

    for (;  buflen > 0;  buflen -= xlen, buf += xlen, off += xlen) {
        xlen = buflen;
        if (xlen > HEXMAX)
            xlen = HEXMAX;
        _htphex(off,buf,xlen);
    }
}

// htpsym -- get symbol/value
int
htpsym(char *linebuf,char *sym,char *val)
{
    char *cp;
    int match;

    dbgprt(H,"htpsym: PARAM linebuf='%s'\n",linebuf);

    // FORMAT:
    //   foo-bar: baz
    do {
        match = 0;

        cp = strchr(linebuf,':');
        if (cp == NULL)
            break;

        *cp++ = 0;
        strcpy(sym,linebuf);

        for (;  (*cp == ' ') || (*cp == '\t');  ++cp);
        strcpy(val,cp);

        match = 1;

        dbgprt(H,"htpsym: SYMBOL sym='%s' val='%s'\n",sym,val);
    } while (0);

    return match;
}

// htprcv -- receive server response
void
htprcv(int sockfd,int fdout)
{
    size_t recv_size = 0;
    size_t response_size = 4096;
    char *recv_buff = malloc(response_size + 1);

    // line oriented header buffer
    char *endl = NULL;
    size_t linelen;
    char linebuf[1000];

    ssize_t ret = 0;
    off_t content_length = 0;

    // read headers
    while (1) {
        // fill up a chunk of data
        while (recv_size < response_size) {
            recv_buff[recv_size] = 0;

            // do we have a line end?
            endl = strstr(recv_buff,"\r\n");
            if (endl != NULL)
                break;

            // read a chunk of data
            ret = read(sockfd,&recv_buff[recv_size],response_size - recv_size);
            if (ret < 0)
                error(1,errno,"htprcv: read header\n");
            if (ret == 0)
                break;

            recv_size += ret;
            dbgprt(R,"htprcv: READ ret=%zd\n",ret);
            dbgexec(R,htphex(recv_buff,recv_size,"htprcv/READ"));
        }

        // error -- no line end but short read
        if (endl == NULL)
            error(1,0,"htprcv: no endl\n");

        // copy header to work buffer
        linelen = endl - recv_buff;
        memcpy(linebuf,recv_buff,linelen);
        linebuf[linelen] = 0;

        // remove header from receive buffer
        linelen += 2;
        HTPSLIDE(linelen);

        // stop on end of headers (back to back "\r\n")
        if ((recv_size >= 2) &&
            (recv_buff[0] == '\r') && (recv_buff[1] == '\n')) {
            HTPSLIDE(2);
            break;
        }

        // parse line work buffer for keywords ...
        char sym[100];
        char val[1000];
        if (! htpsym(linebuf,sym,val))
            continue;

        if (strcasecmp(sym,"Content-Length") == 0) {
            content_length = atoi(val);
            continue;
        }
    }

    // save payload to file
    while (content_length > 0) {
        // write out prior payload amount
        if (recv_size > 0) {
            dbgexec(W,htphex(recv_buff,recv_size,"htprcv/WRITE"));

            ret = write(fdout,recv_buff,recv_size);
            if (ret < 0)
                error(1,errno,"htprcv: write body\n");

            content_length -= recv_size;
            recv_size = 0;
            continue;
        }

        // read in new chunk of payload
        ret = read(sockfd,recv_buff,response_size);
        if (ret < 0)
            error(1,errno,"htprcv: read body\n");
        if (ret == 0)
            break;

        recv_size = ret;
    }

    free(recv_buff);
}

// htpget -- do initial dialog
void
htpget(int sockfd,const char *hostname,const char *file)
{
    char *bp;
    char buf[1024];
    ssize_t resid;
    ssize_t xlen;
    size_t off;

    bp = buf;

    if (file == NULL)
        file = "/";
    bp += sprintf(bp,"GET %s HTTP/1.1\r\n",file);

    if (hostname == NULL)
        hostname = "localhost";
    bp += sprintf(bp,"Host: %s\r\n",hostname);

    if (0) {
        bp += sprintf(bp,"User-Agent: %s\r\n","curl/7.61.1");
    }
    else {
        bp += sprintf(bp,"User-Agent: %s\r\n","htprcv");
    }
    bp += sprintf(bp,"Accept: */*\r\n");

    bp += sprintf(bp,"\r\n");

    resid = bp - buf;
    off = 0;
    for (;  resid > 0;  resid -= xlen, off += xlen) {
        xlen = write(sockfd,buf,resid);
        if (xlen < 0)
            error(1,errno,"htpget: write error\n");
    }
}

// main -- main program
int
main(int argc,char **argv)
{
    char *cp;
    char *portstr;
    unsigned short portno;
    int sockfd;
    int filefd;
    char url[1000];

    --argc;
    ++argv;

    //setlinebuf(stdout);
    setlinebuf(stderr);

    for (;  argc > 0;  --argc, ++argv) {
        cp = *argv;
        if (*cp != '-')
            break;

        cp += 2;
        switch(cp[-1]) {
        case 'd':  // debug options
            if (*cp == 0)
                cp = "SHRW";
            for (;  *cp != 0;  ++cp)
                opt_d[(byte) *cp] = 1;
            break;

        case 'o':  // output file
            opt_o = cp;
            break;
        }
    }

    // get the remote host:port
    do {
        if (argc <= 0) {
            strcpy(url,"localhost:80");
            break;
        }
        strcpy(url,*argv++);
        --argc;
    } while (0);

    // get remote port number
    portstr = strchr(url,':');
    if (portstr != NULL)
        *portstr++ = 0;
    else
        portstr = "80";
    portno = atoi(portstr);

    // open the output file (or send to stdout)
    do {
        if (opt_o == NULL) {
            filefd = 1;
            break;
        }

        filefd = open(opt_o,O_WRONLY | O_CREAT,0644);
        if (filefd < 0)
            filefd = 1;
    } while (0);

    // establish connection
    sockfd = htpconn(url,portno);

    // send the file request
    htpget(sockfd,NULL,"/");

    // receive the server response
    htprcv(sockfd,filefd);

    close(sockfd);

    return 0;
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM