[英]Set multiple values in a char array at once in C (reimplementing strcpy)
如何逐字而不是逐字節地復制信息,如Apple(或任何其他enterprice的C庫)? ( 他們的memcpy
供參考 ,他們使用結構“word”來復制一組大小的信息。 他們的strcpy
只調用memcpy
)
當我做這樣的事情時( mystrcpy
逐字節復制):
char *src = "Hi";
char *dst = malloc(3); // or 99, just so the focus is method instead
mystrcpy(dst, src); // of safety or this very specific case
dst將如下: ? ? ?
? ? ?
- > 'H' ? ?
'H' ? ?
- > 'H' 'i' ?
- > 'H' 'i' 0
為了澄清這個問題,mystrcpy最簡單的代碼是mystrcpy
讓dst直接來自? ? ?
? ? ?
一步到'H' 'i' 0
?
我問,因為我很難理解Apple的源代碼,谷歌搜索沒有產生任何類似的東西。 如果有人可以簡化和解釋Apple的代碼,我們將不勝感激。
更新以包含當前的mystrcpy
:
char *mystrcpy(char *dst, const char *src)
{
char *tmp;
tmp = dst;
while (*src)
*tmp++ = *src++;
*tmp = 0;
return (dst);
}
通過盡可能復制寄存器大小的塊來實現strcpy()
的完全優化的實現通常是復雜且不可移植的。 特別是,它們傾向於使用匯編語言編寫給定平台。 它們使用單通道設計,而不使用strlen()
。
下面我將展示半便攜式C代碼,它應該適用於所有小端64位平台,例如x64,ARM64,Power 8,包括那些要求所有加載和存儲自然對齊的平台。 基本策略是執行單字節移動,直到源指針為8字節對齊。 從那時起,源數據總是以對齊的8字節塊加載,而目標是在需要以自然對齊的方式編寫每個塊的要求所需的最小數量的塊中寫入的(2 n字節)在2 n字節邊界上訪問對象)。
請注意,以8字節塊的形式讀取可能會讀取超出源字符串分配的內存的字節數。 如果負載對齊,這是無害的,因為它們永遠不會越過頁面邊界,因此永遠不會觸及屬於不同進程的內存。 因此,這在C的“as-if”規則下是允許的,即,沒有觀察到與抽象機器語義的偏差。 但是,內存檢查工具通常會抱怨這一點,因為訪問相對於分配的對象超出了界限。
當加載源字符串的每個8字節塊時,執行快速檢查以查看它是否包含指示字符串結尾的零字節。 如果是這種情況,則以字節方式寫出最后一個塊,直到到達源字符串的末尾。 快速檢查空字節使用了由Alan Mycroft於1987年4月8日發布到新聞組 comp.lang.c
的技術。 他定義了以下內容來檢測32位字中的空字節,這可以簡單地擴展到64位操作數。
#define has_nullbyte_(x) ((x - 0x01010101) & ~x & 0x80808080)
下面的代碼是為了合理清晰而編寫的,並未完全優化。 特別是對於可以展開的結束情況的字節處理仍然存在循環。 在現實生活中,傳遞給strcpy()
的字符串通常會出乎意料地短,因此以盡可能高的速度處理最終案例通常是必不可少的。 代碼使用了許多輔助函數,並依賴於編譯器優化來內聯這些函數,因此應該在高優化級別進行編譯。
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <assert.h>
#define M0 0x0101010101010101ULL // Mycroft's first mask (LSBs)
#define M1 0x8080808080808080ULL // Mycroft's second mask (MSBs)
#define BM 0x00000000000000ffULL // mask for byte
#define HM 0x000000000000ffffULL // mask for half-word
#define WM 0x00000000ffffffffULL // mask for word
#define HAS_NULLBYTE(x) (((x) - M0) & ~(x) & M1)
void store_byte (uintptr_t a, uint8_t b)
{
*((uint8_t *)(void *)a) = b;
}
void store_half (uintptr_t a, uint16_t b)
{
assert ((a & 1) == 0);
*((uint16_t *)(void *)a) = b;
}
void store_word (uintptr_t a, uint32_t b)
{
assert ((a & 3) == 0);
*((uint32_t *)(void *)a) = b;
}
void store_dword (uintptr_t a, uint64_t b)
{
assert ((a & 7) == 0);
*((uint64_t *)(void *)a) = b;
}
uint8_t load_byte (uintptr_t a)
{
return *((uint8_t *)(void *)a);
}
uint64_t load_dword (uintptr_t a)
{
assert ((a & 7) == 0);
return *((uint64_t *)(void *)a);
}
void store_last_dword (uintptr_t d, uint64_t t)
{
uint8_t b;
do {
b = t & 0xff;
store_byte (d, b);
d++;
t = t >> 8;
} while (b);
}
void store_8 (uintptr_t d, uintptr_t diff)
{
uint64_t t, zero_byte;
do {
t = load_dword (d + diff);
zero_byte = HAS_NULLBYTE (t);
d += 8;
if (!zero_byte) {
store_dword (d - 8, t);
}
} while (!zero_byte);
store_last_dword (d - 8, t);
}
void store_44 (uintptr_t d, uintptr_t diff)
{
uint64_t t, zero_byte;
do {
t = load_dword (d + diff);
zero_byte = HAS_NULLBYTE (t);
d += 8;
if (!zero_byte) {
store_word (d - 8, (uint32_t)(t >> 0) & WM);
store_word (d - 4, (uint32_t)(t >> 32) & WM);
}
} while (!zero_byte);
store_last_dword (d - 8, t);
}
void store_242 (uintptr_t d, uintptr_t diff)
{
uint64_t t, zero_byte;
do {
t = load_dword (d + diff);
zero_byte = HAS_NULLBYTE (t);
d += 8;
if (!zero_byte) {
store_half (d - 8, (uint16_t)((t >> 0) & HM));
store_word (d - 6, (uint32_t)((t >> 16) & WM));
store_half (d - 2, (uint16_t)((t >> 48) & HM));
}
} while (!zero_byte);
store_last_dword (d - 8, t);
}
void store_1421 (uintptr_t d, uintptr_t diff)
{
uint64_t t, zero_byte;
do {
t = load_dword (d + diff);
zero_byte = HAS_NULLBYTE (t);
d += 8;
if (!zero_byte) {
store_byte (d - 8, (uint8_t )((t >> 0) & BM));
store_word (d - 7, (uint32_t)((t >> 8) & WM));
store_half (d - 3, (uint16_t)((t >> 40) & HM));
store_byte (d - 1, (uint8_t )((t >> 56) & BM));
}
} while (!zero_byte);
store_last_dword (d - 8, t);
}
void store_1241 (uintptr_t d, uintptr_t diff)
{
uint64_t t, zero_byte;
do {
t = load_dword (d + diff);
zero_byte = HAS_NULLBYTE (t);
d += 8;
if (!zero_byte) {
store_byte (d - 8, (uint8_t )((t >> 0) & BM));
store_half (d - 7, (uint16_t)((t >> 8) & HM));
store_word (d - 5, (uint32_t)((t >> 24) & WM));
store_byte (d - 1, (uint8_t )((t >> 56) & BM));
}
} while (!zero_byte);
store_last_dword (d - 8, t);
}
char* my_strcpy (char *dst, const char* src)
{
uintptr_t s = (uintptr_t)(void *)src;
uintptr_t d = (uintptr_t)(void *)dst;
uintptr_t diff = s - d;
uint8_t b = 0xff;
// align source pointer to next 8-byte boundary
int unaligned_bytes = (s & 7) ? (8 - (s & 7)) : 0;
while (unaligned_bytes && (b != 0)) {
b = load_byte (d + diff);
store_byte (d, b);
d++;
unaligned_bytes--;
}
// source now 8-byte aligned, write destination according to its alignment
if (b) {
switch (d & 7) {
case 0: store_8 (d, diff);
break;
case 1: store_1241 (d, diff);
break;
case 2: store_242 (d, diff);
break;
case 3: store_1421 (d, diff);
break;
case 4: store_44 (d, diff);
break;
case 5: store_1241 (d, diff);
break;
case 6: store_242 (d, diff);
break;
case 7: store_1421 (d, diff);
break;
}
}
return dst;
}
int main (void)
{
const char a[] = "0123456789 the quick brown fox jumps over the lazy dog";
char* src = malloc (sizeof(a));
int buffer_len = sizeof(a) + 16;
char* res = malloc (buffer_len);
char* ref = malloc (buffer_len);
printf ("src=%p res=%p ref=%p\n", a, res, ref);
for (int srcofs = 0; srcofs < 8; srcofs++) {
for (int dstofs = 0; dstofs < 8; dstofs++) {
for (size_t len = 0; len < sizeof(a); len++) {
memcpy (src, a, sizeof(a));
src[len] = 0;
memset (res, 0xff, buffer_len);
memset (ref, 0xff, buffer_len);
my_strcpy (res + dstofs, src + srcofs);
strcpy (ref + dstofs, src + srcofs);
if (memcmp (res, ref, buffer_len) != 0) {
printf ("error @ srcofs=%d dstofs=%d len=%llu\n",
dstofs, srcofs, len);
}
}
}
}
printf ("Test passed\n");
return EXIT_SUCCESS;
}
在我需要通過位翻轉復制大塊數據之前我一直處於類似的情況(所以我不能只使用memcpy())。 基本計划是盡可能復制8字節的塊,然后在最后擦除任何奇數字節。 但是如果源或目標中的任何一個都不是八字節對齊的話,我們必須要小心。 這是一個簡化版本,可以改進非對齊數據的處理。
#include <stdio.h>
#include <stdint.h>
#include <string.h>
// Try to copy a string 8 bytes at a time
void myStrcpy(char *dest, const char *src) {
size_t n = 1 + strlen(src);
// Check we're aligned
if ((((uintptr_t) dest) % 8) || (((uintptr_t) src) % 8)) {
strcpy(dest, src);
return;
}
// Copy eight-byte chunks as far as possible
const uint64_t *s = (uint64_t *) src;
uint64_t *d = (uint64_t *) dest;
for(; n >= 8; n -= 8) {
fprintf(stderr, "Long copy\n");
*d++ = *s++;
}
// Now mop up any remaining bytes
src = (const char *) s;
dest = (char *) d;
while(n-- > 0) {
*dest++ = *src++;
fprintf(stderr, "Short copy\n");
}
}
int main() {
char s[] ="3r78cfjkcu8cdecowfcjefj0fj6d4j0e89j6rgffjk34kk4kik3f--f?";
char d[sizeof s];
myStrcpy(d, s);
puts(s);
puts(d);
}
約翰
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.