使用按位運算符在 C 中打包位

Question

假設我有一個帶有二進制值的 ASCII 字符的 unsigned char 數組：

00001100  00011100  00110100  00111000 00110100  00100100  00010010  00011100  00100100 00010000  00011011  00001110  00001010 00011101  00100101

我不需要每個字節的兩個最高有效位（即我只需要每個字節的 6 個最低有效位）：

--001100  --011100 --110100  --111000 --110100  --100100  --010010  --011100  --100100 --010000  --011011  --001110 --001010  --011101 --100101

我試圖通過將數組中下一個元素的最低有效位放在前一個元素的剩余位中，將這些位打包到一個無符號字符數組中。 最終結果應如下所示：

00001100  01000111  11100011  00110100  00101001  01110001  00100100  10110100  00111001  01001010  01010111  00000010

希望你能看到這個模式。

我目前正在嘗試使用嵌套的 for 循環來實現這一點，但無法獲得正確的結果：

void pack(unsigned char *message)
{
    unsigned char mask[] = {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80};
    unsigned char packed_bits[MAX_PACK_SIZE];

    int i, j, k, l, m;
    int count = 0;

    // initialize packed bits
    for (k = 0; k < MAX_PACK_SIZE; k++)
        packed_bits[k] = 0;

    // loop through message array
    for (i = 0; i < MAX_UNPACK_SIZE; i++)
    {
        // loop through message[i]'s bits
        for (j = 0; j < 8; j++)
        {
            j = count;

            if (count == 6 && j == 6)
            {
                packed_bits[i] += (message[i + 1] & mask[0]) & mask[j + 0];
                packed_bits[i] += (message[i + 1] & mask[1]) & mask[j + 1];
                count = 2;
                break;
            }
            else if (count == 6 && j == 4)
            {
                packed_bits[i] += (message[i + 1] & mask[0]) & mask[j + 0];
                packed_bits[i] += (message[i + 1] & mask[1]) & mask[j + 1];
                packed_bits[i] += (message[i + 1] & mask[2]) & mask[j + 2];
                packed_bits[i] += (message[i + 1] & mask[3]) & mask[j + 3];
                count = 4;
                break;
            }
            else if (count == 6 && j == 2)
            {
                packed_bits[i] += (message[i + 1] & mask[0]) & mask[j + 0];
                packed_bits[i] += (message[i + 1] & mask[1]) & mask[j + 1];
                packed_bits[i] += (message[i + 1] & mask[2]) & mask[j + 2];
                packed_bits[i] += (message[i + 1] & mask[3]) & mask[j + 3];
                packed_bits[i] += (message[i + 1] & mask[4]) & mask[j + 4];
                packed_bits[i] += (message[i + 1] & mask[5]) & mask[j + 5];
                count = 0;
                ++i;
                break;
            }
            else
            {
                packed_bits[i] += message[i] & mask[j];
                count++;
            }
        }
    }

    for (m = 0; m < MAX_PACK_SIZE; m++)
    {
        printf("packed_bits[%d]=%d\n", m, packed_bits[m]);
    }
}

Answer 1

對於 OP 的問題，使用一些“手動”循環展開，所有的小技巧都會變得更容易掌握。

將每個字節的 6 位緊密打包意味着

8 個字節將被打包為 6 個字節，並且
4 個字節將被打包為 3 個字節

在這兩種情況下都以全字節輸出結束。

因此，我編寫了一個循環，它總是一次打包 4 個字節。

輸入的結尾可能未對齊到 4。因此，對於結尾的最多 3 個字節，需要進行特殊處理。

我的 MCVE：

#include <stdio.h>

size_t pack(const unsigned char *message, size_t len, unsigned char *packed_bits)
{
  size_t i = 3, j = 0;
  // handle bulk
  for (; i < len; i += 4, j += 3) {
    packed_bits[j + 0] = ((message[i - 3] & 0x3f) >> 0) | ((message[i - 2] & 0x03) << 6);
    packed_bits[j + 1] = ((message[i - 2] & 0x3c) >> 2) | ((message[i - 1] & 0x0f) << 4);
    packed_bits[j + 2] = ((message[i - 1] & 0x30) >> 4) | ((message[i - 0] & 0x3f) << 2);
  }
  // handle end (which might be not a full package of 4 bytes)
  if (i - 3 < len) {
    packed_bits[j++] = message[i - 3] & 0x3f;
  }
  if (i - 2 < len) {
    packed_bits[j - 1] |= (message[i - 2] & 0x03) << 6;
    packed_bits[j++] = (message[i - 2] & 0x3c) >> 2;
  }
  if (i - 1 < len) {
    packed_bits[j - 1] |= (message[i - 1] & 0x0f) << 4;
    packed_bits[j++] = (message[i - 1] & 0x30) >> 4;
  }
  // i < len is not possible because it would've been handled in for loop.
  return j;
}

void printBits(const unsigned char *buf, size_t len);

int main(void)
{
  // sample data
  unsigned char message[] = {
#if 0 // gcc extension
    0b00001100, 0b00011100, 0b00110100, 0b00111000,  0b00110100, 0b00100100, 0b00010010, 0b00011100,
    0b00100100, 0b00010000, 0b00011011, 0b00001110,  0b00001010, 0b00011101, 0b00100101
#else // the same in hex
    0x0c, 0x1c, 0x34, 0x38, 0x34, 0x24, 0x12, 0x1c, 0x24, 0x10, 0x1b, 0x0e, 0x0a, 0x1d, 0x25,
#endif // 0
  };
  const size_t len = sizeof message / sizeof *message;
  // output buffer
  unsigned char packed_bits[len]; // len is a bit over-pessimistic but definitely sufficient ;-)
  // pack the sample
  const size_t len_packed = pack(message, len, packed_bits);
  // report
  printf("message[%zu]:\n", len);
  printBits(message, len);
  printf("packed to packed_bits[%zu]:\n", len_packed);
  printBits(packed_bits, len_packed);
  return 0;
}

void printBits(const unsigned char *buf, size_t len)
{
  for (size_t i = 0; i < len; ++i) {
    unsigned byte = buf[i];
    printf(" %c%c%c%c%c%c%c%c",
      byte & 0x80 ? '1' : '0',
      byte & 0x40 ? '1' : '0',
      byte & 0x20 ? '1' : '0',
      byte & 0x10 ? '1' : '0',
      byte & 0x08 ? '1' : '0',
      byte & 0x04 ? '1' : '0',
      byte & 0x02 ? '1' : '0',
      byte & 0x01 ? '1' : '0');
  }
  putchar('\n');
}

輸出：

message[15]:
 00001100 00011100 00110100 00111000 00110100 00100100 00010010 00011100 00100100 00010000 00011011 00001110 00001010 00011101 00100101
packed to packed_bits[12]:
 00001100 01000111 11100011 00110100 00101001 01110001 00100100 10110100 00111001 01001010 01010111 00000010

在coliru上進行現場演示

我將我的輸出與 OP 的預期輸出進行了比較，它們匹配。

OP和我的輸入：

00001100 00011100 00110100 00111000 00110100 00100100 00010010 00011100 00100100 00010000 00011011 00001110 00001010 00011101 00100101
00001100 00011100 00110100 00111000 00110100 00100100 00010010 00011100 00100100 00010000 00011011 00001110 00001010 00011101 00100101

完全相等。

OP 和我的預期輸出：

00001100 01000111 11100011 00110100 00101001 01110001 00100100 10110100 00111001 01001010 01010111 00000010
00001100 01000111 11100011 00110100 00101001 01110001 00100100 10110100 00111001 01001010 01010111 00000010

完全相等。

Answer 2

有多種方法可以做到這一點。 這是一個應該很容易理解的。 （它使用 GCC 擴展、二進制文字來提供示例數據。）

#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>


//  Calculate the number of elements in an array.
#define NumberOf(a) (sizeof (a) / sizeof *(a))


//  Make a mask in which the n low bits are set.
#define Mask(n) ((1u << n) - 1)


//  Divide x by y, rounding up to the next integer.
#define DivideUp(x, y)  (((x) + (y) - 1) / (y))


/*  Pack the lower six bits of each of the NI elements in In to the Out,
    putting eight bits in each element of Out.  SO is the number of elements
    available in Out.  The number of output elements used is returned.
*/
static size_t pack(unsigned char *Out, size_t SO,
    const unsigned char *In, size_t NI)
{
    //  Test whether the output buffer is large enough.
    if (SO < DivideUp(NI*6, 8))
    {
        printf("Error, output buffer is %zu bytes but need %zu.\n",
            SO, DivideUp(NI*6, 8));
        exit(EXIT_FAILURE);
    }

    //  Use NO to count output bytes.
    size_t NO = 0;

    //  Initialize a buffer.
    uint32_t Buffer = 0;

    //  Process each input element.
    for (size_t i = 0; i < NI; ++i)
    {
        //  Put the incoming six bits at their place in the buffer.
        Buffer |= (In[i] & Mask(6)) << i%4 * 6;

        //  After each four bytes, write the buffer to the output and reset it.
        if (i % 4 == 3)
        {
            for (size_t j = 0; j < 3; ++j)
                Out[NO++] = Buffer >> j*8;
            Buffer = 0;
        }
    }

    //  Process any residual bytes remaining in the buffer.
    size_t Residue = NI % 4;
    if (Residue)
        for (size_t j = 0; j < DivideUp(Residue*6, 8); ++j)
            Out[NO++] = Buffer >> j*8;

    return NO;
}


int main(void)
{
    unsigned char In[] =
    {
        0b00001100,
        0b00011100,
        0b00110100,
        0b00111000,
        0b00110100,
        0b00100100,
        0b00010010,
        0b00011100,
        0b00100100,
        0b00010000,
        0b00011011,
        0b00001110,
        0b00001010,
        0b00011101,
        0b00100101,
    };

    unsigned char Observed[] =
    {
        0b00001100,
        0b01000111,
        0b11100011,
        0b00110100,
        0b00101001,
        0b01110001,
        0b00100100,
        0b10110100,
        0b00111001,
        0b01001010,
        0b01010111,
        0b00000010,
    };

    unsigned char Expected[NumberOf(Observed)];
    size_t N = pack(Expected, NumberOf(Expected), In, NumberOf(In));

    if (N != NumberOf(Observed))
    {
        printf("Error, expected %zu bytes but got %zu bytes.\n",
            NumberOf(Observed), N);
        exit(EXIT_FAILURE);
    }

    for (size_t n = 0; n < N; ++n)
        if (Expected[n] != Observed[n])
        {
            printf(
"Error, expected byte %zu to be %#02hhx but observed %#02hhx.\n",
                n, Expected[n], Observed[n]);
            exit(EXIT_FAILURE);
        }

    printf("No error found.\n");
}

使用按位運算符在 C 中打包位

問題描述

2 個解決方案

解決方案1
0 2020-03-14 10:14:15

解決方案2
0 2020-03-14 12:36:05

使用按位運算符在 C 中打包位

問題描述

2 個解決方案

解決方案1 0 2020-03-14 10:14:15

解決方案2 0 2020-03-14 12:36:05

解決方案1
0 2020-03-14 10:14:15

解決方案2
0 2020-03-14 12:36:05