[英]Packing bits in C using bitwise operators
假設我有一個帶有二進制值的 ASCII 字符的 unsigned char 數組:
00001100 00011100 00110100 00111000 00110100 00100100 00010010 00011100 00100100 00010000 00011011 00001110 00001010 00011101 00100101
我不需要每個字節的兩個最高有效位(即我只需要每個字節的 6 個最低有效位):
--001100 --011100 --110100 --111000 --110100 --100100 --010010 --011100 --100100 --010000 --011011 --001110 --001010 --011101 --100101
我試圖通過將數組中下一個元素的最低有效位放在前一個元素的剩余位中,將這些位打包到一個無符號字符數組中。 最終結果應如下所示:
00001100 01000111 11100011 00110100 00101001 01110001 00100100 10110100 00111001 01001010 01010111 00000010
希望你能看到這個模式。
我目前正在嘗試使用嵌套的 for 循環來實現這一點,但無法獲得正確的結果:
void pack(unsigned char *message)
{
unsigned char mask[] = {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80};
unsigned char packed_bits[MAX_PACK_SIZE];
int i, j, k, l, m;
int count = 0;
// initialize packed bits
for (k = 0; k < MAX_PACK_SIZE; k++)
packed_bits[k] = 0;
// loop through message array
for (i = 0; i < MAX_UNPACK_SIZE; i++)
{
// loop through message[i]'s bits
for (j = 0; j < 8; j++)
{
j = count;
if (count == 6 && j == 6)
{
packed_bits[i] += (message[i + 1] & mask[0]) & mask[j + 0];
packed_bits[i] += (message[i + 1] & mask[1]) & mask[j + 1];
count = 2;
break;
}
else if (count == 6 && j == 4)
{
packed_bits[i] += (message[i + 1] & mask[0]) & mask[j + 0];
packed_bits[i] += (message[i + 1] & mask[1]) & mask[j + 1];
packed_bits[i] += (message[i + 1] & mask[2]) & mask[j + 2];
packed_bits[i] += (message[i + 1] & mask[3]) & mask[j + 3];
count = 4;
break;
}
else if (count == 6 && j == 2)
{
packed_bits[i] += (message[i + 1] & mask[0]) & mask[j + 0];
packed_bits[i] += (message[i + 1] & mask[1]) & mask[j + 1];
packed_bits[i] += (message[i + 1] & mask[2]) & mask[j + 2];
packed_bits[i] += (message[i + 1] & mask[3]) & mask[j + 3];
packed_bits[i] += (message[i + 1] & mask[4]) & mask[j + 4];
packed_bits[i] += (message[i + 1] & mask[5]) & mask[j + 5];
count = 0;
++i;
break;
}
else
{
packed_bits[i] += message[i] & mask[j];
count++;
}
}
}
for (m = 0; m < MAX_PACK_SIZE; m++)
{
printf("packed_bits[%d]=%d\n", m, packed_bits[m]);
}
}
對於 OP 的問題,使用一些“手動”循環展開,所有的小技巧都會變得更容易掌握。
將每個字節的 6 位緊密打包意味着
在這兩種情況下都以全字節輸出結束。
因此,我編寫了一個循環,它總是一次打包 4 個字節。
輸入的結尾可能未對齊到 4。因此,對於結尾的最多 3 個字節,需要進行特殊處理。
我的 MCVE:
#include <stdio.h>
size_t pack(const unsigned char *message, size_t len, unsigned char *packed_bits)
{
size_t i = 3, j = 0;
// handle bulk
for (; i < len; i += 4, j += 3) {
packed_bits[j + 0] = ((message[i - 3] & 0x3f) >> 0) | ((message[i - 2] & 0x03) << 6);
packed_bits[j + 1] = ((message[i - 2] & 0x3c) >> 2) | ((message[i - 1] & 0x0f) << 4);
packed_bits[j + 2] = ((message[i - 1] & 0x30) >> 4) | ((message[i - 0] & 0x3f) << 2);
}
// handle end (which might be not a full package of 4 bytes)
if (i - 3 < len) {
packed_bits[j++] = message[i - 3] & 0x3f;
}
if (i - 2 < len) {
packed_bits[j - 1] |= (message[i - 2] & 0x03) << 6;
packed_bits[j++] = (message[i - 2] & 0x3c) >> 2;
}
if (i - 1 < len) {
packed_bits[j - 1] |= (message[i - 1] & 0x0f) << 4;
packed_bits[j++] = (message[i - 1] & 0x30) >> 4;
}
// i < len is not possible because it would've been handled in for loop.
return j;
}
void printBits(const unsigned char *buf, size_t len);
int main(void)
{
// sample data
unsigned char message[] = {
#if 0 // gcc extension
0b00001100, 0b00011100, 0b00110100, 0b00111000, 0b00110100, 0b00100100, 0b00010010, 0b00011100,
0b00100100, 0b00010000, 0b00011011, 0b00001110, 0b00001010, 0b00011101, 0b00100101
#else // the same in hex
0x0c, 0x1c, 0x34, 0x38, 0x34, 0x24, 0x12, 0x1c, 0x24, 0x10, 0x1b, 0x0e, 0x0a, 0x1d, 0x25,
#endif // 0
};
const size_t len = sizeof message / sizeof *message;
// output buffer
unsigned char packed_bits[len]; // len is a bit over-pessimistic but definitely sufficient ;-)
// pack the sample
const size_t len_packed = pack(message, len, packed_bits);
// report
printf("message[%zu]:\n", len);
printBits(message, len);
printf("packed to packed_bits[%zu]:\n", len_packed);
printBits(packed_bits, len_packed);
return 0;
}
void printBits(const unsigned char *buf, size_t len)
{
for (size_t i = 0; i < len; ++i) {
unsigned byte = buf[i];
printf(" %c%c%c%c%c%c%c%c",
byte & 0x80 ? '1' : '0',
byte & 0x40 ? '1' : '0',
byte & 0x20 ? '1' : '0',
byte & 0x10 ? '1' : '0',
byte & 0x08 ? '1' : '0',
byte & 0x04 ? '1' : '0',
byte & 0x02 ? '1' : '0',
byte & 0x01 ? '1' : '0');
}
putchar('\n');
}
輸出:
message[15]:
00001100 00011100 00110100 00111000 00110100 00100100 00010010 00011100 00100100 00010000 00011011 00001110 00001010 00011101 00100101
packed to packed_bits[12]:
00001100 01000111 11100011 00110100 00101001 01110001 00100100 10110100 00111001 01001010 01010111 00000010
我將我的輸出與 OP 的預期輸出進行了比較,它們匹配。
OP和我的輸入:
00001100 00011100 00110100 00111000 00110100 00100100 00010010 00011100 00100100 00010000 00011011 00001110 00001010 00011101 00100101
00001100 00011100 00110100 00111000 00110100 00100100 00010010 00011100 00100100 00010000 00011011 00001110 00001010 00011101 00100101
完全相等。
OP 和我的預期輸出:
00001100 01000111 11100011 00110100 00101001 01110001 00100100 10110100 00111001 01001010 01010111 00000010
00001100 01000111 11100011 00110100 00101001 01110001 00100100 10110100 00111001 01001010 01010111 00000010
完全相等。
有多種方法可以做到這一點。 這是一個應該很容易理解的。 (它使用 GCC 擴展、二進制文字來提供示例數據。)
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
// Calculate the number of elements in an array.
#define NumberOf(a) (sizeof (a) / sizeof *(a))
// Make a mask in which the n low bits are set.
#define Mask(n) ((1u << n) - 1)
// Divide x by y, rounding up to the next integer.
#define DivideUp(x, y) (((x) + (y) - 1) / (y))
/* Pack the lower six bits of each of the NI elements in In to the Out,
putting eight bits in each element of Out. SO is the number of elements
available in Out. The number of output elements used is returned.
*/
static size_t pack(unsigned char *Out, size_t SO,
const unsigned char *In, size_t NI)
{
// Test whether the output buffer is large enough.
if (SO < DivideUp(NI*6, 8))
{
printf("Error, output buffer is %zu bytes but need %zu.\n",
SO, DivideUp(NI*6, 8));
exit(EXIT_FAILURE);
}
// Use NO to count output bytes.
size_t NO = 0;
// Initialize a buffer.
uint32_t Buffer = 0;
// Process each input element.
for (size_t i = 0; i < NI; ++i)
{
// Put the incoming six bits at their place in the buffer.
Buffer |= (In[i] & Mask(6)) << i%4 * 6;
// After each four bytes, write the buffer to the output and reset it.
if (i % 4 == 3)
{
for (size_t j = 0; j < 3; ++j)
Out[NO++] = Buffer >> j*8;
Buffer = 0;
}
}
// Process any residual bytes remaining in the buffer.
size_t Residue = NI % 4;
if (Residue)
for (size_t j = 0; j < DivideUp(Residue*6, 8); ++j)
Out[NO++] = Buffer >> j*8;
return NO;
}
int main(void)
{
unsigned char In[] =
{
0b00001100,
0b00011100,
0b00110100,
0b00111000,
0b00110100,
0b00100100,
0b00010010,
0b00011100,
0b00100100,
0b00010000,
0b00011011,
0b00001110,
0b00001010,
0b00011101,
0b00100101,
};
unsigned char Observed[] =
{
0b00001100,
0b01000111,
0b11100011,
0b00110100,
0b00101001,
0b01110001,
0b00100100,
0b10110100,
0b00111001,
0b01001010,
0b01010111,
0b00000010,
};
unsigned char Expected[NumberOf(Observed)];
size_t N = pack(Expected, NumberOf(Expected), In, NumberOf(In));
if (N != NumberOf(Observed))
{
printf("Error, expected %zu bytes but got %zu bytes.\n",
NumberOf(Observed), N);
exit(EXIT_FAILURE);
}
for (size_t n = 0; n < N; ++n)
if (Expected[n] != Observed[n])
{
printf(
"Error, expected byte %zu to be %#02hhx but observed %#02hhx.\n",
n, Expected[n], Observed[n]);
exit(EXIT_FAILURE);
}
printf("No error found.\n");
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.