繁体   English   中英

为什么我的非阻塞原始 sockets 程序运行如此缓慢?

[英]Why is my non blocking raw sockets program running so slowly?

我有一个程序,它使用PF_PACKET原始 sockets 将TCP SYN数据包发送到 web 服务器列表。 该程序读入一个文件,该文件在 web 服务器的每一行都有一个 IPv4 地址。 该程序是尝试以高性能方式连接到多个服务器的开始。 然而,目前该程序每秒只发送大约 10 个数据包。 尽管程序使用非阻塞套接字。 它的运行速度应该快几个数量级。 任何想法为什么它可以运行得如此缓慢。

我在下面包含了完整的代码清单。 警告 - 代码很长。 这是因为获取网关路由器的 IP 和 MAC 地址需要大量的代码。 好消息是您可以在 main 之前跳过所有功能,因为它们只是完成获取路由器的 IP 和 MAC 地址以及本地 IP 地址的必要工作。 无论如何,这是代码:

#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <linux/if_packet.h>
#include <net/ethernet.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <net/if.h>
#include <netinet/tcp.h>    //Provides declarations for tcp header
#include <netinet/ip.h> //Provides declarations for ip header
#include <netinet/ether.h>
#include <ifaddrs.h>
#include <asm/types.h>
#include <linux/if_ether.h>
//#include <linux/if_arp.h>
#include <arpa/inet.h>  //htons etc
#include <time.h>
#include <linux/rtnetlink.h>
#include <sys/resource.h>

#define PROTO_ARP 0x0806
#define ETH2_HEADER_LEN 14
#define HW_TYPE 1
#define MAC_LENGTH 6
#define IPV4_LENGTH 4
#define ARP_REQUEST 0x01
#define ARP_REPLY 0x02
#define BUF_SIZE 60
#define MAX_CONNECTIONS 10000

#define debug(x...) printf(x);printf("\n");
#define info(x...) printf(x);printf("\n");
#define warn(x...) printf(x);printf("\n");
#define err(x...) printf(x);printf("\n");

static char * str_devname= NULL;
static int mode_loss     = 0;
static int c_packet_sz   = 150;
static int c_buffer_sz   = 1024*8;
static int c_buffer_nb   = 1024;
static int c_sndbuf_sz   = 0;
static int c_send_mask   = 127;
static int c_error       = 0;
static int c_mtu         = 0;
static int mode_thread   = 0;

volatile int fd_socket;
volatile int data_offset = 0;
volatile struct tpacket_hdr * ps_header_start;
volatile struct sockaddr_ll *ps_sockaddr = NULL;
volatile int shutdown_flag = 0;
int done = 0;
struct tpacket_req s_packet_req;
unsigned char buffer[BUF_SIZE];
struct arp_header *arp_resp = (struct arp_header *) (buffer + ETH2_HEADER_LEN);
char ifname[512];
char ip[512];

/* 
    96 bit (12 bytes) pseudo header needed for tcp header checksum calculation 
*/
struct pseudo_header
{
    u_int32_t source_address;
    u_int32_t dest_address;
    u_int8_t placeholder;
    u_int8_t protocol;
    u_int16_t tcp_length;
};


struct arp_header {
    unsigned short hardware_type;
    unsigned short protocol_type;
    unsigned char hardware_len;
    unsigned char protocol_len;
    unsigned short opcode;
    unsigned char sender_mac[MAC_LENGTH];
    unsigned char sender_ip[IPV4_LENGTH];
    unsigned char target_mac[MAC_LENGTH];
    unsigned char target_ip[IPV4_LENGTH];
};

int rtnl_receive(int fd, struct msghdr *msg, int flags)
{
    int len;

    do { 
        len = recvmsg(fd, msg, flags);
    } while (len < 0 && (errno == EINTR || errno == EAGAIN));

    if (len < 0) {
        perror("Netlink receive failed");
        return -errno;
    }

    if (len == 0) { 
        perror("EOF on netlink");
        return -ENODATA;
    }

    return len;
}

static int rtnl_recvmsg(int fd, struct msghdr *msg, char **answer)
{
    struct iovec *iov = msg->msg_iov;
    char *buf;
    int len;

    iov->iov_base = NULL;
    iov->iov_len = 0;

    len = rtnl_receive(fd, msg, MSG_PEEK | MSG_TRUNC);

    if (len < 0) {
        return len;
    }

    buf = malloc(len);

    if (!buf) {
        perror("malloc failed");
        return -ENOMEM;
    }

    iov->iov_base = buf;
    iov->iov_len = len;

    len = rtnl_receive(fd, msg, 0);

    if (len < 0) {
        free(buf);
        return len;
    }

    *answer = buf;

    return len;
}

void parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
{
    memset(tb, 0, sizeof(struct rtattr *) * (max + 1));

    while (RTA_OK(rta, len)) {
        if (rta->rta_type <= max) {
            tb[rta->rta_type] = rta;
        }

        rta = RTA_NEXT(rta,len);
    }
}

static inline int rtm_get_table(struct rtmsg *r, struct rtattr **tb)
{
    __u32 table = r->rtm_table;

    if (tb[RTA_TABLE]) {
        table = *(__u32 *)RTA_DATA(tb[RTA_TABLE]);
    }

    return table;
}

void print_route(struct nlmsghdr* nl_header_answer)
{
    struct rtmsg* r = NLMSG_DATA(nl_header_answer);
    int len = nl_header_answer->nlmsg_len;
    struct rtattr* tb[RTA_MAX+1];
    int table;
    char buf[256];

    len -= NLMSG_LENGTH(sizeof(*r));

    if (len < 0) {
        perror("Wrong message length");
        return;
    }
    
    parse_rtattr(tb, RTA_MAX, RTM_RTA(r), len);

    table = rtm_get_table(r, tb);

    if (r->rtm_family != AF_INET && table != RT_TABLE_MAIN) {
        return;
    }

    if (tb[RTA_DST]) {
        if ((r->rtm_dst_len != 24) && (r->rtm_dst_len != 16)) {
            return;
        }

        printf("%s/%u ", inet_ntop(r->rtm_family, RTA_DATA(tb[RTA_DST]), buf, sizeof(buf)), r->rtm_dst_len);

    } else if (r->rtm_dst_len) {
        printf("0/%u ", r->rtm_dst_len);
    } else {
        printf("default ");
    }

    if (tb[RTA_GATEWAY]) {
        printf("via %s", inet_ntop(r->rtm_family, RTA_DATA(tb[RTA_GATEWAY]), buf, sizeof(buf)));
        strcpy(ip, inet_ntop(r->rtm_family, RTA_DATA(tb[RTA_GATEWAY]), buf, sizeof(buf)));
    }

    if (tb[RTA_OIF]) {
        char if_nam_buf[IF_NAMESIZE];
        int ifidx = *(__u32 *)RTA_DATA(tb[RTA_OIF]);

        printf(" dev %s", if_indextoname(ifidx, if_nam_buf));
    }

    if (tb[RTA_GATEWAY] && tb[RTA_OIF]) {
        char if_nam_buf[IF_NAMESIZE];
        int ifidx = *(__u32 *)RTA_DATA(tb[RTA_OIF]);

        strcpy(ifname, if_indextoname(ifidx, if_nam_buf));
    }

    if (tb[RTA_SRC]) {
        printf("src %s", inet_ntop(r->rtm_family, RTA_DATA(tb[RTA_SRC]), buf, sizeof(buf)));
    }

    printf("\n");
}

int open_netlink()
{
    struct sockaddr_nl saddr;

    int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);

    if (sock < 0) {
        perror("Failed to open netlink socket");
        return -1;
    }

    memset(&saddr, 0, sizeof(saddr));

    saddr.nl_family = AF_NETLINK;
    saddr.nl_pid = getpid();

    if (bind(sock, (struct sockaddr *)&saddr, sizeof(saddr)) < 0) {
        perror("Failed to bind to netlink socket");
        close(sock);
        return -1;
    }

    return sock;
}

int do_route_dump_requst(int sock)
{
    struct {
        struct nlmsghdr nlh;
        struct rtmsg rtm;
    } nl_request;

    nl_request.nlh.nlmsg_type = RTM_GETROUTE;
    nl_request.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
    nl_request.nlh.nlmsg_len = sizeof(nl_request);
    nl_request.nlh.nlmsg_seq = time(NULL);
    nl_request.rtm.rtm_family = AF_INET;

    return send(sock, &nl_request, sizeof(nl_request), 0);
}

int get_route_dump_response(int sock)
{
    struct sockaddr_nl nladdr;
    struct iovec iov;
    struct msghdr msg = {
        .msg_name = &nladdr,
        .msg_namelen = sizeof(nladdr),
        .msg_iov = &iov,
        .msg_iovlen = 1,
    };

    char *buf;
    int dump_intr = 0;

    int status = rtnl_recvmsg(sock, &msg, &buf);

    struct nlmsghdr *h = (struct nlmsghdr *)buf;
    int msglen = status;

    printf("Main routing table IPv4\n");

    while (NLMSG_OK(h, msglen)) {
        if (h->nlmsg_flags & NLM_F_DUMP_INTR) {
            fprintf(stderr, "Dump was interrupted\n");
            free(buf);
            return -1;
        }

        if (nladdr.nl_pid != 0) {
            continue;
        }

        if (h->nlmsg_type == NLMSG_ERROR) {
            perror("netlink reported error");
            free(buf);
        }

        print_route(h);

        h = NLMSG_NEXT(h, msglen);
    }

    free(buf);

    return status;
}


/*
 * Converts struct sockaddr with an IPv4 address to network byte order uin32_t.
 * Returns 0 on success.
 */
int int_ip4(struct sockaddr *addr, uint32_t *ip)
{
    if (addr->sa_family == AF_INET) {
        struct sockaddr_in *i = (struct sockaddr_in *) addr;
        *ip = i->sin_addr.s_addr;
        return 0;
    } else {
        err("Not AF_INET");
        return 1;
    }
}

/*
 * Formats sockaddr containing IPv4 address as human readable string.
 * Returns 0 on success.
 */
int format_ip4(struct sockaddr *addr, char *out)
{
    if (addr->sa_family == AF_INET) {
        struct sockaddr_in *i = (struct sockaddr_in *) addr;
        const char *ip = inet_ntoa(i->sin_addr);
        if (!ip) {
            return -2;
        } else {
            strcpy(out, ip);
            return 0;
        }
    } else {
        return -1;
    }
}

/*
 * Writes interface IPv4 address as network byte order to ip.
 * Returns 0 on success.
 */
int get_if_ip4(int fd, const char *ifname, uint32_t *ip) {
    int err = -1;
    struct ifreq ifr;
    memset(&ifr, 0, sizeof(struct ifreq));
    if (strlen(ifname) > (IFNAMSIZ - 1)) {
        err("Too long interface name");
        goto out;
    }

    strcpy(ifr.ifr_name, ifname);
    if (ioctl(fd, SIOCGIFADDR, &ifr) == -1) {
        perror("SIOCGIFADDR");
        goto out;
    }

    if (int_ip4(&ifr.ifr_addr, ip)) {
        goto out;
    }
    err = 0;
out:
    return err;
}

/*
 * Sends an ARP who-has request to dst_ip
 * on interface ifindex, using source mac src_mac and source ip src_ip.
 */
int send_arp(int fd, int ifindex, const unsigned char *src_mac, uint32_t src_ip, uint32_t dst_ip)
{
    int err = -1;
    unsigned char buffer[BUF_SIZE];
    memset(buffer, 0, sizeof(buffer));

    struct sockaddr_ll socket_address;
    socket_address.sll_family = AF_PACKET;
    socket_address.sll_protocol = htons(ETH_P_ARP);
    socket_address.sll_ifindex = ifindex;
    socket_address.sll_hatype = htons(ARPHRD_ETHER);
    socket_address.sll_pkttype = (PACKET_BROADCAST);
    socket_address.sll_halen = MAC_LENGTH;
    socket_address.sll_addr[6] = 0x00;
    socket_address.sll_addr[7] = 0x00;

    struct ethhdr *send_req = (struct ethhdr *) buffer;
    struct arp_header *arp_req = (struct arp_header *) (buffer + ETH2_HEADER_LEN);
    int index;
    ssize_t ret, length = 0;

    //Broadcast
    memset(send_req->h_dest, 0xff, MAC_LENGTH);

    //Target MAC zero
    memset(arp_req->target_mac, 0x00, MAC_LENGTH);

    //Set source mac to our MAC address
    memcpy(send_req->h_source, src_mac, MAC_LENGTH);
    memcpy(arp_req->sender_mac, src_mac, MAC_LENGTH);
    memcpy(socket_address.sll_addr, src_mac, MAC_LENGTH);

    /* Setting protocol of the packet */
    send_req->h_proto = htons(ETH_P_ARP);

    /* Creating ARP request */
    arp_req->hardware_type = htons(HW_TYPE);
    arp_req->protocol_type = htons(ETH_P_IP);
    arp_req->hardware_len = MAC_LENGTH;
    arp_req->protocol_len = IPV4_LENGTH;
    arp_req->opcode = htons(ARP_REQUEST);

    debug("Copy IP address to arp_req");
    memcpy(arp_req->sender_ip, &src_ip, sizeof(uint32_t));
    memcpy(arp_req->target_ip, &dst_ip, sizeof(uint32_t));

    ret = sendto(fd, buffer, 42, 0, (struct sockaddr *) &socket_address, sizeof(socket_address));
    if (ret == -1) {
        perror("sendto():");
        goto out;
    }
    err = 0;
out:
    return err;
}

/*
 * Gets interface information by name:
 * IPv4
 * MAC
 * ifindex
 */
int get_if_info(const char *ifname, uint32_t *ip, char *mac, int *ifindex)
{
    debug("get_if_info for %s", ifname);
    int err = -1;
    struct ifreq ifr;
    int sd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ARP));
    if (sd <= 0) {
        perror("socket()");
        goto out;
    }
    if (strlen(ifname) > (IFNAMSIZ - 1)) {
        printf("Too long interface name, MAX=%i\n", IFNAMSIZ - 1);
        goto out;
    }

    strcpy(ifr.ifr_name, ifname);

    //Get interface index using name
    if (ioctl(sd, SIOCGIFINDEX, &ifr) == -1) {
        perror("SIOCGIFINDEX");
        goto out;
    }
    *ifindex = ifr.ifr_ifindex;
    printf("interface index is %d\n", *ifindex);

    //Get MAC address of the interface
    if (ioctl(sd, SIOCGIFHWADDR, &ifr) == -1) {
        perror("SIOCGIFINDEX");
        goto out;
    }

    //Copy mac address to output
    memcpy(mac, ifr.ifr_hwaddr.sa_data, MAC_LENGTH);

    if (get_if_ip4(sd, ifname, ip)) {
        goto out;
    }
    debug("get_if_info OK");

    err = 0;
out:
    if (sd > 0) {
        debug("Clean up temporary socket");
        close(sd);
    }
    return err;
}

/*
 * Creates a raw socket that listens for ARP traffic on specific ifindex.
 * Writes out the socket's FD.
 * Return 0 on success.
 */
int bind_arp(int ifindex, int *fd)
{
    debug("bind_arp: ifindex=%i", ifindex);
    int ret = -1;

    // Submit request for a raw socket descriptor.
    *fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ARP));
    if (*fd < 1) {
        perror("socket()");
        goto out;
    }

    debug("Binding to ifindex %i", ifindex);
    struct sockaddr_ll sll;
    memset(&sll, 0, sizeof(struct sockaddr_ll));
    sll.sll_family = AF_PACKET;
    sll.sll_ifindex = ifindex;
    if (bind(*fd, (struct sockaddr*) &sll, sizeof(struct sockaddr_ll)) < 0) {
        perror("bind");
        goto out;
    }

    ret = 0;
out:
    if (ret && *fd > 0) {
        debug("Cleanup socket");
        close(*fd);
    }
    return ret;
}

/*
 * Reads a single ARP reply from fd.
 * Return 0 on success.
 */
int read_arp(int fd)
{
    debug("read_arp");
    int ret = -1;
    ssize_t length = recvfrom(fd, buffer, BUF_SIZE, 0, NULL, NULL);
    int index;
    if (length == -1) {
        perror("recvfrom()");
        goto out;
    }
    struct ethhdr *rcv_resp = (struct ethhdr *) buffer;
    if (ntohs(rcv_resp->h_proto) != PROTO_ARP) {
        debug("Not an ARP packet");
        goto out;
    }
    if (ntohs(arp_resp->opcode) != ARP_REPLY) {
        debug("Not an ARP reply");
        goto out;
    }
    debug("received ARP len=%ld", length);
    struct in_addr sender_a;
    memset(&sender_a, 0, sizeof(struct in_addr));
    memcpy(&sender_a.s_addr, arp_resp->sender_ip, sizeof(uint32_t));
    debug("Sender IP: %s", inet_ntoa(sender_a));

    debug("Sender MAC: %02X:%02X:%02X:%02X:%02X:%02X",
          arp_resp->sender_mac[0],
          arp_resp->sender_mac[1],
          arp_resp->sender_mac[2],
          arp_resp->sender_mac[3],
          arp_resp->sender_mac[4],
          arp_resp->sender_mac[5]);

    ret = 0;

out:
    return ret;
}

/*
 *
 * Sample code that sends an ARP who-has request on
 * interface <ifname> to IPv4 address <ip>.
 * Returns 0 on success.
 */
int test_arping(const char *ifname, const char *ip) {
    int ret = -1;
    uint32_t dst = inet_addr(ip);
    if (dst == 0 || dst == 0xffffffff) {
        printf("Invalid source IP\n");
        return 1;
    }

    int src;
    int ifindex;
    char mac[MAC_LENGTH];
    if (get_if_info(ifname, &src, mac, &ifindex)) {
        err("get_if_info failed, interface %s not found or no IP set?", ifname);
        goto out;
    }
    int arp_fd;
    if (bind_arp(ifindex, &arp_fd)) {
        err("Failed to bind_arp()");
        goto out;
    }

    if (send_arp(arp_fd, ifindex, mac, src, dst)) {
        err("Failed to send_arp");
        goto out;
    }

    while(1) {
        int r = read_arp(arp_fd);
        if (r == 0) {
            info("Got reply, break out");
            break;
        }
    }

    ret = 0;
out:
    if (arp_fd) {
        close(arp_fd);
        arp_fd = 0;
    }
    return ret;
}

unsigned short checksum2(const char *buf, unsigned size)
{
    unsigned long long sum = 0;
    const unsigned long long *b = (unsigned long long *) buf;

    unsigned t1, t2;
    unsigned short t3, t4;

    /* Main loop - 8 bytes at a time */
    while (size >= sizeof(unsigned long long))
    {
        unsigned long long s = *b++;
        sum += s;
        if (sum < s) sum++;
        size -= 8;
    }

    /* Handle tail less than 8-bytes long */
    buf = (const char *) b;
    if (size & 4)
    {
        unsigned s = *(unsigned *)buf;
        sum += s;
        if (sum < s) sum++;
        buf += 4;
    }

    if (size & 2)
    {
        unsigned short s = *(unsigned short *) buf;
        sum += s;
        if (sum < s) sum++;
        buf += 2;
    }

    if (size)
    {
        unsigned char s = *(unsigned char *) buf;
        sum += s;
        if (sum < s) sum++;
    }

    /* Fold down to 16 bits */
    t1 = sum;
    t2 = sum >> 32;
    t1 += t2;
    if (t1 < t2) t1++;
    t3 = t1;
    t4 = t1 >> 16;
    t3 += t4;
    if (t3 < t4) t3++;

    return ~t3;
}

int main( int argc, char ** argv )
{
    uint32_t size;
    size_t len;
    struct sockaddr_ll my_addr, peer_addr;
    int i_ifindex;
    int ec;
    struct ifreq s_ifr; /* points to one interface returned from ioctl */
    int tmp;
    FILE * fp;
    char server[254];
    int count = 0;
    int first_time = 1;
    int z;
    int first_mmap = 1;
    
    #define HWADDR_len 6
    #define IP_len 4
    int s,s2,i;
    struct ifreq ifr,ifr2;
    int ret = -1;
    struct rlimit lim;
    
    if (argc != 2) {
        printf("Usage: %s <INPUT_FILE>\n", argv[0]);
        return 1;
    }

    getrlimit(RLIMIT_NOFILE, &lim);
    printf("Soft: %d Hard: %d\n", (int)lim.rlim_cur, (int)lim.rlim_max);
    lim.rlim_cur = lim.rlim_max;
    
    
    if (setrlimit(RLIMIT_NOFILE, &lim) == -1) {
        printf("rlimit failed\n");
        return -1;
    }
    getrlimit(RLIMIT_NOFILE, &lim);
    printf("New Soft: %d New Hard: %d\n", (int)lim.rlim_cur, (int)lim.rlim_max);
    
    int nl_sock = open_netlink();

    if (do_route_dump_requst(nl_sock) < 0) {
        perror("Failed to perfom request");
        close(nl_sock);
        return -1;
    }

    get_route_dump_response(nl_sock);

    close (nl_sock);
    
    test_arping(ifname, ip);

    
    s = socket(AF_INET, SOCK_DGRAM, 0);
    s2 = socket(AF_INET, SOCK_DGRAM, 0);
    strcpy(ifr.ifr_name, ifname);
    strcpy(ifr2.ifr_name, ifname);
    ioctl(s, SIOCGIFHWADDR, &ifr);
    ioctl(s2, SIOCGIFADDR, &ifr2);
    struct sockaddr_in* ipaddr = (struct sockaddr_in*)&ifr2.ifr_addr;
    close(s);

    fp = fopen(argv[1], "r");
    if (!fp)
        exit(EXIT_FAILURE);


    while (!done)
    {   
        fd_socket = socket(PF_PACKET, SOCK_RAW|SOCK_NONBLOCK, htons(ETH_P_ALL));
        if(fd_socket == -1)
        {
            perror("socket");
            return EXIT_FAILURE;
        }
    
        /* clear structure */
        memset(&my_addr, 0, sizeof(struct sockaddr_ll));
        my_addr.sll_family = PF_PACKET;
        my_addr.sll_protocol = htons(ETH_P_ALL);
    
        str_devname = ifname;
        //strcpy (str_devname, ifname);
        
        /* initialize interface struct */
        strncpy (s_ifr.ifr_name, str_devname, sizeof(s_ifr.ifr_name));
    
        /* Get the broad cast address */
        ec = ioctl(fd_socket, SIOCGIFINDEX, &s_ifr);
        if(ec == -1)
        {
            perror("iotcl");
            return EXIT_FAILURE;
        }
    
        /* update with interface index */
        i_ifindex = s_ifr.ifr_ifindex;
    
        s_ifr.ifr_mtu = 7200;
        /* update the mtu through ioctl */
        ec = ioctl(fd_socket, SIOCSIFMTU, &s_ifr);
        if(ec == -1)
        {
            perror("iotcl");
            return EXIT_FAILURE;
        }
    
        /* set sockaddr info */
        memset(&my_addr, 0, sizeof(struct sockaddr_ll));
        my_addr.sll_family = AF_PACKET;
        my_addr.sll_protocol = ETH_P_ALL;
        my_addr.sll_ifindex = i_ifindex;
    
        /* bind port */
        if (bind(fd_socket, (struct sockaddr *)&my_addr, sizeof(struct sockaddr_ll)) == -1)
        {
            perror("bind");
            return EXIT_FAILURE;
        }
    
        /* prepare Tx ring request */
        s_packet_req.tp_block_size = c_buffer_sz;
        s_packet_req.tp_frame_size = c_buffer_sz;
        s_packet_req.tp_block_nr = c_buffer_nb;
        s_packet_req.tp_frame_nr = c_buffer_nb;
    
        /* calculate memory to mmap in the kernel */
        size = s_packet_req.tp_block_size * s_packet_req.tp_block_nr;
    
        /* set packet loss option */
        tmp = mode_loss;
        if (setsockopt(fd_socket, SOL_PACKET, PACKET_LOSS, (char *)&tmp, sizeof(tmp))<0)
        {
            perror("setsockopt: PACKET_LOSS");
            return EXIT_FAILURE;
        }
        
        /* send TX ring request */
        if (setsockopt(fd_socket, SOL_PACKET, PACKET_TX_RING, (char *)&s_packet_req, sizeof(s_packet_req))<0)
        {
            perror("setsockopt: PACKET_TX_RING");
            return EXIT_FAILURE;
        }
    
        /* change send buffer size */
        if(c_sndbuf_sz) {
            printf("send buff size = %d\n", c_sndbuf_sz);
            if (setsockopt(fd_socket, SOL_SOCKET, SO_SNDBUF, &c_sndbuf_sz, sizeof(c_sndbuf_sz))< 0)
            {
                perror("getsockopt: SO_SNDBUF");
                return EXIT_FAILURE;
            }
        }
    
        /* get data offset */
        data_offset = TPACKET_HDRLEN - sizeof(struct sockaddr_ll);
    
        /* mmap Tx ring buffers memory */
        ps_header_start = mmap(0, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd_socket, 0);
        if (ps_header_start == (void*)-1)
        {
            perror("mmap");
            return EXIT_FAILURE;
        }
    
    
        int i,j;
        int i_index = 0;
        char * data;
        int first_loop = 1;
        struct tpacket_hdr * ps_header;
        int ec_send = 0;
 
        int i_index_start = i_index;
        
        ps_header = ((struct tpacket_hdr *)((void *)ps_header_start + (c_buffer_sz*i_index)));
        data = ((void*) ps_header) + data_offset;
        //Datagram to represent the packet
        char datagram[4096] , source_ip[32] , *pseudogram;
    
        //zero out the packet buffer
        memset (datagram, 0, 4096);
    
        //Ethernet header
        struct ether_header *eh = (struct ether_header *) datagram;
        
        //IP header
        struct iphdr *iph = (struct iphdr *) (datagram + sizeof (struct ether_header));
    
        //TCP header
        struct tcphdr *tcph = (struct tcphdr *) (datagram + sizeof (struct ether_header) + sizeof (struct ip));
        struct sockaddr_in sin;
        struct pseudo_header psh;
    
        //some address resolution
        strcpy(source_ip , inet_ntoa(ipaddr->sin_addr));
        sin.sin_family = AF_INET;
        sin.sin_port = htons(80);
        if (fscanf(fp, "%253s", server) == 1)
            sin.sin_addr.s_addr = inet_addr (server);   
        else
        {
            done = 1;
            break;
        }
                        
        //Fill in the Ethernet Header
        eh->ether_dhost[0] = arp_resp->sender_mac[0];
        eh->ether_dhost[1] = arp_resp->sender_mac[1];
        eh->ether_dhost[2] = arp_resp->sender_mac[2];
        eh->ether_dhost[3] = arp_resp->sender_mac[3];
        eh->ether_dhost[4] = arp_resp->sender_mac[4];
        eh->ether_dhost[5] = arp_resp->sender_mac[5];

        memcpy(eh->ether_shost, ifr.ifr_hwaddr.sa_data, HWADDR_len);
        eh->ether_type = htons(0x0800);
                        
        //Fill in the IP Header
        iph->ihl = 5;
        iph->version = 4;
        iph->tos = 0;
        iph->tot_len = htons(sizeof (struct iphdr) + sizeof (struct tcphdr));
        iph->id = htons (54321);    //Id of this packet
        iph->frag_off = 0;
        iph->ttl = 255;
        iph->protocol = IPPROTO_TCP;
        iph->check = 0;     //Set to 0 before calculating checksum
        iph->saddr = inet_addr ( source_ip );
        iph->daddr = sin.sin_addr.s_addr;
    
        //Ip checksum
        iph->check = checksum2 (datagram + sizeof (struct ether_header), sizeof (struct iphdr));
    
        //TCP Header
        tcph->source = htons (1234);
        tcph->dest = htons (80);
        tcph->seq = 0;
        tcph->ack_seq = 0;
        tcph->doff = 5; //tcp header size
        tcph->fin=0;
        tcph->syn=1;
        tcph->rst=0;
        tcph->psh=0;
        tcph->ack=0;
        tcph->urg=0;
        tcph->window = htons (5840);    // maximum allowed window size 
        tcph->check = 0;    //leave checksum 0 now, filled later by pseudo header
        tcph->urg_ptr = 0;

        //Now the TCP checksum
        psh.source_address = inet_addr( source_ip );
        psh.dest_address = sin.sin_addr.s_addr;
        psh.placeholder = 0;
        psh.protocol = IPPROTO_TCP;
        psh.tcp_length = htons(sizeof(struct tcphdr));
    
        int psize = sizeof(struct pseudo_header) + sizeof(struct tcphdr);
        pseudogram = malloc(psize);
    
        memcpy(pseudogram , (char*) &psh , sizeof (struct pseudo_header));
        memcpy(pseudogram + sizeof(struct pseudo_header) , tcph , sizeof(struct tcphdr));
    
        tcph->check = checksum2(pseudogram , psize);
                        
        memcpy(data, datagram, (sizeof(struct ether_header) + sizeof(struct iphdr) + sizeof(struct tcphdr)));
        free(pseudogram);
        len = sizeof(struct ether_header) + sizeof(struct iphdr) + sizeof(struct tcphdr);
        
        i_index ++;
        if(i_index >= c_buffer_nb)
        {
            i_index = 0;
            first_loop = 0;
        }
 
        /* update packet len */
        //ps_header->tp_len = c_packet_sz;
        ps_header->tp_len = len;
        /* set header flag to USER (trigs xmit)*/
        ps_header->tp_status = TP_STATUS_SEND_REQUEST;
        
        //int ec_send;
        static int total=0;
        //int blocking = 1;
        
        /* send all buffers with TP_STATUS_SEND_REQUEST */
        /* Wait end of transfer */
        //ec_send = sendto(fd_socket,NULL,0,(blocking? 0 : MSG_DONTWAIT),(struct sockaddr *) ps_sockaddr,sizeof(struct sockaddr_ll));
        ec_send = sendto(fd_socket,NULL,len,MSG_DONTWAIT,(struct sockaddr *) ps_sockaddr,sizeof(struct sockaddr_ll));
        
        if(ec_send < 0) {
            perror("sendto");
        }
        else if ( ec_send == 0 ) {
            /* nothing to do => schedule : useful if no SMP */
            printf("Sleeping\n");
            usleep(0);
        }
        else {
            total += ec_send/(len);
            printf("send %d packets (+%d bytes)\n",total, ec_send);
            fflush(0);
        }
        //ps_header_start = mmap(0, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd_socket, 0);
        if (munmap(ps_header_start, size) == -1)
        {
            perror("munmap");
            exit(EXIT_FAILURE);
        }       
    
        close(fd_socket);
    }
    return 1;
}

这是strace -c的 output 发送的超过 5,000 个数据包:

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 48.11    3.962165         395     10012           setsockopt
 16.69    1.374748         274      5014           mmap
 14.85    1.222565         244      5007           munmap
 10.91    0.898695         179      5016           close
  3.15    0.259055          25     10022           ioctl
  2.04    0.167613          33      5016           socket
  1.70    0.139623          27      5008           sendto
  1.41    0.116430          23      5025           write
  1.14    0.093826          18      5008           bind
  0.01    0.000505          26        19           read
  0.00    0.000000           0         4           mprotect
  0.00    0.000000           0         3           brk
  0.00    0.000000           0         4           pread64
  0.00    0.000000           0         3         1 access
  0.00    0.000000           0         1           getpid
  0.00    0.000000           0         1           recvfrom
  0.00    0.000000           0         2           recvmsg
  0.00    0.000000           0         1           execve
  0.00    0.000000           0         2         1 arch_prctl
  0.00    0.000000           0         1           set_tid_address
  0.00    0.000000           0         3           openat
  0.00    0.000000           0         4           newfstatat
  0.00    0.000000           0         1           set_robust_list
  0.00    0.000000           0         4           prlimit64
  0.00    0.000000           0         1           getrandom
------ ----------- ----------- --------- --------- ----------------
100.00    8.235225         149     55182         2 total

如果我正确地遵循代码,那么您将为每个不需要重做的 IP 地址重做大量工作。 每次通过主循环你都是:

  • 创建一个新的数据包套接字
  • 绑定它
  • 设置 tx 数据包环形缓冲区
  • 映射它
  • 发送单个数据包
  • 取消映射
  • 关闭套接字

这是您导致系统为一个数据包做的大量工作。

您应该在开始时只创建一个数据包套接字,设置一次 tx 缓冲区和 mmap,并使其保持打开状态,直到程序完成。 您可以通过接口发送任意数量的数据包,而无需关闭/重新打开。

这就是为什么您的主要用户是setsockoptmmapunmap等。所有这些操作在 kernel 中都很繁重。

此外, PACKET_TX_RING的意义在于您可以设置一个大缓冲区并在缓冲区内一个接一个地创建一个数据包,而无需为每个数据包进行send系统调用。 通过使用数据包头的tp_status字段,您告诉 kernel 该帧已准备好发送。 然后,您将指针在环形缓冲区内推进到下一个可用插槽并构建另一个数据包。 当您没有更多的数据包要构建时(或者您已经填充了缓冲区中的可用空间[即环绕到您最旧的仍在进行中的帧]),然后您可以进行一次send/sendto调用来告诉 kernel go 查看您的缓冲区并(开始)发送所有这些数据包。

然后,您可以开始构建更多数据包(注意确保 kernel 仍在使用它们——通过tp_status字段)。

也就是说,如果这是我正在做的一个项目,我会简化很多 - 至少对于第一遍:创建一个数据包套接字,将其绑定到接口,一次构建一个数据包,然后每帧使用一次send (即不打扰PACKET_TX_RING )。 如果(且仅当)性能要求如此严格以至于需要更快地发送,我会费心设置和使用环形缓冲区。 我怀疑你会需要那个。 在没有多余的setsockoptmmap调用的情况下,这应该 go 快一吨。

最后,非阻塞套接字仅在您等待时有其他事情要做时才有用。 在这种情况下,如果您将套接字设置为非阻塞,并且由于调用会阻塞而无法发送数据包,则send调用将失败,并且如果您对此不做任何事情(将数据包排队某处,稍后重试,比如说),数据包将丢失。 在这个程序中,我看不到使用非阻塞套接字的任何好处。 如果套接字阻塞,那是因为设备传输队列已满。 在那之后,您继续生产要发送的数据包没有意义,您也将无法发送这些数据包。 在那一点上阻塞要简单得多,直到队列耗尽。

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM