简体   繁体   中英

Have a very long buffer but only use the last 1GB bytes of data.

Need to write an application in C/C++ on Linux that receives a stream of bytes from a socket and process them. The total bytes could be close to 1TB. If I have unlimited amount memory, I will just put it all in the memory, so my application can easily process data. It's much easy to do many things on flat memory space, such as memmem(), memcmp() ... On a circular buffer, the application has to be extra smart to be aware of the circular buffer.

I have about 8G of memory, but luckily due to locality, my application never needs to go back by more than 1GB from the latest data it received. Is there a way to have a 1TB buffer, with only the latest 1GB data mapped to physical memory? If so, how to do it?

Any ideas? Thanks.

Here's an example. It sets up a full terabyte mapping, but initially inaccessible ( PROT_NONE ). You, the programmer, maintain a window that can only extend and move upwards in memory. The example program uses a one and a half gigabyte window, advancing it in steps of 1,023,739,137 bytes (the mapping_use() makes sure the available pages cover at least the desired region), and does actually modify every page in every window, just to be sure.

#define _GNU_SOURCE
#define _POSIX_C_SOURCE 200809L
#include <stdlib.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
#include <stdio.h>

typedef struct mapping  mapping;
struct mapping {
    unsigned char   *head;  /* Start of currently accessible region */
    unsigned char   *tail;  /* End of currently accessible region */ 
    unsigned char   *ends;  /* End of region */
    size_t           page;  /* Page size of this mapping */
};

/* Discard mapping.
*/
void mapping_free(mapping *const m)
{
    if (m && m->ends > m->head) {
        munmap(m->head, (size_t)(m->ends - m->head));
        m->head = NULL;
        m->tail = NULL;
        m->ends = NULL;
        m->page = 0;
    }
}

/* Move the accessible part up in memory, to [from..to).
*/
int mapping_use(mapping *const m, void *const from, void *const to)
{
    if (m && m->ends > m->head) {
        unsigned char *const head = ((unsigned char *)from <= m->head) ? m->head :
                                    ((unsigned char *)from >= m->ends) ? m->ends :
                                    m->head + m->page * (size_t)(((size_t)((unsigned char *)from - m->head)) / m->page);
        unsigned char *const tail = ((unsigned char *)to <= head) ? head :
                                    ((unsigned char *)to >= m->ends) ? m->ends :
                                    m->head + m->page * (size_t)(((size_t)((unsigned char *)to - m->head) + m->page - 1) / m->page); 

        if (head > m->head) {
            munmap(m->head, (size_t)(head - m->head));
            m->head = head;
        }

        if (tail > m->tail) {
#ifdef USE_MPROTECT
            mprotect(m->tail, (size_t)(tail - m->tail), PROT_READ | PROT_WRITE);
#else
            void *result;
            do {
                result = mmap(m->tail, (size_t)(tail - m->tail), PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_FIXED | MAP_PRIVATE | MAP_NORESERVE, -1, (off_t)0);
            } while (result == MAP_FAILED && errno == EINTR);
            if (result == MAP_FAILED)
                return errno = ENOMEM;
#endif
            m->tail = tail;
        }

        return 0;
    }
    return errno = EINVAL;
}

/* Initialize a mapping.
*/
int mapping_create(mapping *const m, const size_t size)
{
    void  *base;
    size_t page, truesize;

    if (!m || size < (size_t)1)
        return errno = EINVAL;

    m->head = NULL;
    m->tail = NULL;
    m->ends = NULL;
    m->page = 0;

    /* Obtain default page size. */
    {
        long   value = sysconf(_SC_PAGESIZE);
        page = (size_t)value;
        if (value < 1L || (long)page != value)
            return errno = ENOTSUP;
    }

    /* Round size up to next multiple of page. */
    if (size % page)
        truesize = size + page - (size % page);
    else
        truesize = size;

    /* Create mapping. */
    do {
        errno = ENOTSUP;
        base = mmap(NULL, truesize, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, (off_t)0);
    } while (base == MAP_FAILED && errno == EINTR);
    if (base == MAP_FAILED)
        return errno;

    /* Success. */
    m->head = base;
    m->tail = base;
    m->ends = (unsigned char *)base + truesize;
    m->page = page;

    errno = 0;
    return 0;
}

static void memtouch(void *const ptr, const size_t size)
{
    if (ptr && size > 0) {
        unsigned char *mem = (unsigned char *)ptr;
        const size_t   step = 2048;
        size_t         n = size / (size_t)step - 1;

        mem[0]++;
        mem[size-1]++;

        while (n-->0) {
            mem += step;
            mem[0]++;
        }
    }
}

int main(void)
{
    const size_t   size = (size_t)1024 * (size_t)1024 * (size_t)1024 * (size_t)1024;
    const size_t   need = (size_t)1500000000UL;
    const size_t   step = (size_t)1023739137UL;
    unsigned char *base;
    mapping        map;
    size_t         i;

    if (mapping_create(&map, size)) {
        fprintf(stderr, "Cannot create a %zu-byte mapping: %m.\n", size);
        return EXIT_FAILURE;
    }

    printf("Have a %zu-byte mapping at %p to %p.\n", size, (void *)map.head, (void *)map.ends);
    fflush(stdout);


    base = map.head;

    for (i = 0; i <= size - need; i += step) {
        printf("Requesting %p to %p .. ", (void *)(base + i), (void *)(base + i + need));
        fflush(stdout);
        if (mapping_use(&map, base + i, base + i + need)) {
            printf("Failed (%m).\n");
            fflush(stdout);
            return EXIT_FAILURE;
        }
        printf("received %p to %p.\n", (void *)map.head, (void *)map.tail);
        fflush(stdout);
        memtouch(base + i, need);
    }

    mapping_free(&map);

    return EXIT_SUCCESS;
}

The approach is twofold. First, an inaccessible ( PROT_NONE ) mapping is created to reserve the necessary virtual contiguous address space. If we omit this step, it would make it possible for a malloc() call or similar to acquire pages within this range, which would defeat the entire purpose; a single terabyte-long mapping.

Second, when the accessible window extends into the region, either mprotect() (if USE_MPROTECT is defined), or mmap() is used to make the required pages accessible. Pages no longer needed are completely unmapped.

Compile and run using

gcc -Wall -Wextra -std=c99 example.c -o example
time ./example

or, to use mmap() only once and mprotect() to move the window,

gcc -DUSE_MPROTECT=1 -Wall -Wextra -std=c99 example.c -o example
time ./example

Note that you probably don't want to run the test if you don't have at least 4GB of physical RAM.

On this particular machine (i5-4200U laptop with 4GB of RAM, 3.13.0-62-generic kernel on Ubuntu x86_64), quick testing didn't show any kind of performance difference between mprotect() and mmap() , in execution speed or resident set size.

If anyone bothers to compile and run the above, and finds that one of them has a repeatable benefit/drawback (resident set size or time used), I'd very much like to know about it. Please also define your kernel and CPU used.

I'm not sure which details I should expand on, since this is pretty straightforward, really, and the Linux man pages project man 2 mmap and man 2 mprotect pages are quite descriptive. If you have any questions on this approach or program, I'd be happy to try and elaborate.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM