與字節序無關的 base64_encode/decode 函數

Question

我在谷歌上搜索我碰巧需要的這兩個 C 函數，我遇到的最干凈的是http://fm4dd.com/programming/base64/base64_stringencode_c.htm但它看起來像它的以下一小部分。 ..

void decodeblock(unsigned char in[], char *clrstr) {
  unsigned char out[4];
  out[0] = in[0] << 2 | in[1] >> 4;
  out[1] = in[1] << 4 | in[2] >> 2;
  out[2] = in[2] << 6 | in[3] >> 0;
  out[3] = '\0';
  strncat(clrstr, out, sizeof(out));
}

...將取決於字節序（同上對應的 encodeblack() ，您可以在上面的 url 中看到）。 但它在其他方面又好又干凈，不像其他一些：一個有三個自己的頭文件，另一個調用它自己特殊的類似 malloc() 的函數，等等。任何人都知道一個漂亮、小、干凈的（沒有頭文件，沒有依賴項等）版本，比如這個版本，它更獨立於架構嗎？

我正在尋找這個的編輯原因是 base64_encode() 將在作為 html 頁面一部分的 php 腳本中完成，將該編碼字符串傳遞給遠處盒子上執行的 cgi 程序。 然后那個 cgi 必須 base64_decode() 它。 所以架構獨立性只是一個額外的安全，以防萬一cgi在非英特爾大端盒（英特爾的小）上運行。

根據下面的評論進行編輯，這是完整的代碼以及我所做的一些更改...

/* downloaded from...
   http://fm4dd.com/programming/base64/base64_stringencode_c.htm */
/* ------------------------------------------------------------------------ *
 * file:        base64_stringencode.c v1.0                                  *
 * purpose:     tests encoding/decoding strings with base64                 *
 * author:      02/23/2009 Frank4DD                                         *
 *                                                                          *
 * source:      http://base64.sourceforge.net/b64.c for encoding            *
 *              http://en.literateprograms.org/Base64_(C) for decoding      *
 * ------------------------------------------------------------------------ */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* ---- Base64 Encoding/Decoding Table --- */
char b64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

/* decodeblock - decode 4 '6-bit' characters into 3 8-bit binary bytes */
void decodeblock(unsigned char in[], char *clrstr) {
  unsigned char out[4];
  out[0] = in[0] << 2 | in[1] >> 4;
  out[1] = in[1] << 4 | in[2] >> 2;
  out[2] = in[2] << 6 | in[3] >> 0;
  out[3] = '\0';
  strncat(clrstr, out, sizeof(out));
  } /* --- end-of-function decodeblock() --- */

char *base64_decode(char *b64src /*, char *clrdst */) {
  static char clrdstbuff[8192];
         char *clrdst = clrdstbuff;
  int c, phase, i;
  unsigned char in[4];
  char *p;

  clrdst[0] = '\0';
  phase = 0; i=0;
  while(b64src[i]) {
    c = (int) b64src[i];
    if(c == '=') {
      decodeblock(in, clrdst); 
      break; }
    p = strchr(b64, c);
    if(p) {
      in[phase] = p - b64;
      phase = (phase + 1) % 4;
      if(phase == 0) {
        decodeblock(in, clrdst);
        in[0]=in[1]=in[2]=in[3]=0; }
      } /* --- end-of-if(p) --- */
    i++;
    } /* --- end-of-while(b64src[i]) --- */
  return ( clrdstbuff );
  } /* --- end-of-function base64_decode() --- */

/* encodeblock - encode 3 8-bit binary bytes as 4 '6-bit' characters */
void encodeblock( unsigned char in[], char b64str[], int len ) {
  unsigned char out[5];
  out[0] = b64[ in[0] >> 2 ];
  out[1] = b64[ ((in[0] & 0x03) << 4) | ((in[1] & 0xf0) >> 4) ];
  out[2] = (unsigned char) (len > 1 ? b64[ ((in[1] & 0x0f) << 2) |
           ((in[2] & 0xc0) >> 6) ] : '=');
  out[3] = (unsigned char) (len > 2 ? b64[ in[2] & 0x3f ] : '=');
  out[4] = '\0';
  strncat(b64str, out, sizeof(out));
  } /* --- end-of-function encodeblock() --- */

/* encode - base64 encode a stream, adding padding if needed */
char *base64_encode(char *clrstr /*, char *b64dst */) {
  static char b64dstbuff[8192];
         char *b64dst = b64dstbuff;
  unsigned char in[3];
  int i, len = 0;
  int j = 0;

  b64dst[0] = '\0';
  while(clrstr[j]) {
    len = 0;
    for(i=0; i<3; i++) {
      in[i] = (unsigned char) clrstr[j];
      if(clrstr[j]) {
        len++; j++;   }
      else in[i] = 0;
      } /* --- end-of-for(i) --- */
    if( len ) {
      encodeblock( in, b64dst, len );  }
    } /* --- end-of-while(clrstr[j]) --- */
  return ( b64dstbuff );
  } /* --- end-of-function base64_encode() --- */

#ifdef TESTBASE64
int main( int argc, char *argv[] ) {
  char *mysrc  = (argc>1? argv[1] : "My bonnie is over the ocean      ");
  char *mysrc2 = (argc>2? argv[2] : "My bonnie is over the sea        ");
  char myb64[2048]="", myb642[2048]="";
  char mydst[2048]="", mydst2[2048]="";
  char *base64_enclode(), *base64_decode();
  int  testnum = 1;
  if ( strncmp(mysrc,"test",4) == 0 )
    testnum = atoi(mysrc+4);

  if ( testnum == 1 ) {
    strcpy(myb64,base64_encode(mysrc));
    printf("The string [%s]\n\tencodes into base64 as: [%s]\n",mysrc,myb64);
    strcpy(myb642,base64_encode(mysrc2));
    printf("The string [%s]\n\tencodes into base64 as: [%s]\n",mysrc2,myb642);
    printf("...\n");
    strcpy(mydst,base64_decode(myb64));
    printf("The string [%s]\n\tdecodes from base64 as: [%s]\n",myb64,mydst);
    strcpy(mydst2,base64_decode(myb642));
    printf("The string [%s]\n\tdecodes from base64 as: [%s]\n",myb642,mydst2);
    } /* --- end-of-if(testnum==1) --- */

  if ( testnum == 2 ) {
    strcpy(mydst,base64_decode(mysrc2)); /* input is b64 */
    printf("The string [%s]\n\tdecodes from base64 as: [%s]\n",mysrc2,mydst);
    } /* --- end-of-if(testnum==2) --- */

  if ( testnum == 3 ) {
    int itest, ntests = (argc>2?atoi(argv[2]):999);
    int ichar, nchars = (argc>3?atoi(argv[3]):128);
    unsigned int seed = (argc>4?atoi(argv[4]):987654321);
    char blanks[999] = "                           ";
    srand(seed);
    for ( itest=1; itest<=ntests; itest++ ) {
      for ( ichar=0; ichar<nchars; ichar++ ) mydst[ichar] = 1+(rand()%255);
      mydst[nchars] = '\000';
      if ( strlen(blanks) > 0 ) strcat(mydst,blanks);
      strcpy(myb64,base64_encode(mydst));
      strcpy(mydst2,base64_decode(myb64));
      if ( strcmp(mydst,mydst2) != 0 )
        printf("Test#%d:\n\t in=%s\n\tout=%s\n",itest,mydst,mydst2);
      } /* --- end-of-for(itest) --- */
    } /* --- end-of-if(testnum==3) --- */

  return 0;
  } /* --- end-of-function main() --- */
#endif

Answer 1

不，它不依賴於字節序。 Base64 本身是 4 字節到 3 字節的編碼，並不關心內存中的實際表示。 但是，如果您希望傳輸小/大字節序數據，則必須在編碼之前和解碼之后對字節序進行歸一化。

該片段僅獨立處理所有字節。 如果它在uint32_t左右加載 4 個字節並使用一些位旋轉產生的輸出將按原樣復制到結果緩沖區中，則它將取決於字節序。

然而，該代碼因其strncat被危險地破壞，並且不適用於嵌入的 NUL 字節。 相反，您應該使用類似的東西

void decodeblock(unsigned char in[], unsigned char **clrstr) {
     *((*clrstr) ++) = in[0] << 2 | in[1] >> 4;
     *((*clrstr) ++) = in[1] << 4 | in[2] >> 2;
     *((*clrstr) ++) = in[2] << 6 | in[3] >> 0;
}

這將適用於嵌入式 NUL。

Answer 2

在字節序和在不同字節序的平台上兼容的代碼方面......

首先是處理平台硬件的字節序，傳輸數據的字節序和base64編碼/解碼過程的字節序。

base64 編碼的字節順序決定了我們是采用第一個字節的低 6 位還是第一個字節的高 6 位來形成第一個字符。 看來 base64 使用的是大端格式的后者。

無論平台如何，您都需要您的編碼器/解碼器進行匹配，因此您使用固定位移位顯示的代碼已經可以在大端或小端平台上運行。 您不希望您的 little-endian 平台通過將第一個字節的低 6 位放入第一個編碼字符中來使用 little-endian 位移。 如果是這樣，它將與其他平台不兼容，因此在這種情況下，您不需要依賴平台的代碼。

但是，當涉及到數據時，您可能需要轉換字節順序，但要對二進制數據執行此操作，而不是作為 base64 編碼或編碼文本的一部分。

與字節序無關的 base64_encode/decode 函數

問題描述

2 個解決方案

解決方案1
2 已采納 2017-06-26 08:17:39

解決方案2
1 2022-07-11 10:30:37

與字節序無關的 base64_encode/decode 函數

問題描述

2 個解決方案

解決方案1 2 已采納 2017-06-26 08:17:39

解決方案2 1 2022-07-11 10:30:37

解決方案1
2 已采納 2017-06-26 08:17:39

解決方案2
1 2022-07-11 10:30:37