![](/img/trans.png)
[英]Why is std::string implemented as basic_string of `char` and not `unsigned char`
[英]How to assign a std::string to std::basic_string<unsigned short int, TRAITS_CLASS>(Unicode2String) on Linux
我正在研究 Linux 系統,我認為標准 Linux std::string
支持 Unicode 和 ASCII 字符。 所以,我想在我的代碼中使用std::string
,但我從應用程序接收格式為std::basic_string<unsigned short int, TRAIT_CLASS>
的字符串(同時支持 Windows 和 Linux)。 TRAITS_CLASS
如下:
class TRAITS_CLASS
{
public:
typedef unsigned short char_type;
typedef unsigned short int_type;
typedef size_t pos_type;
typedef size_t off_type;
typedef int state_type;
static inline void assign(unsigned short &dest, const unsigned short &src)
{
dest = src;
}
static inline bool eq(const unsigned short &left, const unsigned short &right)
{
return left == right;
}
static inline bool lt(const unsigned short &left, const unsigned short &right)
{
return left < right;
}
static int compare(const unsigned short *p1, const unsigned short *p2, size_t count)
{
for (; 0 < count; --count, ++p1, ++p2)
{
if (!eq(*p1, *p2))
{
return lt(*p1, *p2) ? -1 : 1;
}
}
return 0;
}
static size_t length(const unsigned short *p)
{
size_t count = 0;
while (*p++)
{
++count;
}
return count;
}
static unsigned short* copy(unsigned short *p1, const unsigned short *p2, size_t count)
{
unsigned short *res = p1;
for (; 0 < count; --count, ++p1, ++p2)
{
assign(*p1, *p2);
}
return res;
}
static const unsigned short* find(const unsigned short *p, size_t count,
const unsigned short &value)
{
for (; 0 < count; --count, ++p)
{
if (eq(*p, value))
{
return p;
}
}
return 0;
}
static unsigned short* move(unsigned short *dest, const unsigned short *src, size_t count)
{
unsigned short *res = dest;
if ((src < dest) && (dest < src + count))
{
for (dest += count, src += count; 0 < count; --count)
{
assign(*--dest, *--src);
}
}
else
{
for (; 0 < count; --count, ++dest, ++src)
{
assign(*dest, *src);
}
}
return res;
}
static unsigned short* assign(unsigned short *dest, size_t count, unsigned short value)
{
unsigned short *res = dest;
for (; 0 < count; --count, ++dest)
{
assign(*dest, value);
}
return res;
}
static inline unsigned short to_char_type(const int_type &arg)
{
return static_cast<unsigned short>(arg);
}
static inline int_type to_int_type(const unsigned short &value)
{
return static_cast<int_type>(value);
}
static inline bool eq_int_type(const int_type &left, const int_type &right)
{
return left == right;
}
static inline int_type eof()
{
return static_cast<int_type>(EOF);
}
static inline int_type not_eof(const int_type &value)
{
return value != eof() ? value : 1;
}
};
如何將普通的std::string
分配給上述std::basic_string
模板? 喜歡:
basic_string<unsigned short int, TRAIT_ClASS> temp = u"string";
如果無法分配,我該如何使用上面的basic_string
模板?
我認為標准 Linux std::string 支持 Unicode 和 ASCII 字符
std::string
(又名std::basic_string<char>
)沒有 Unicode 或 ASCII 的概念,它只知道char
元素,僅此而已。 You might be confused by the fact that Linux apps typically use UTF-8 strings, and UTF-8 can be stored in a std::string
(or preferably in std::u8string
aka std:::basic_string<char8_t>
in C++20). 但是將此類責任分配給std::string
的任何用法是您的代碼的工作。
如何將普通的
std::string
分配給上述std::basic_string
模板?
您不能直接將std::string
分配給/從另一個std::basic_string<CharT>
,其中CharT
是與char
不同的字符類型。
假設數據是兼容的,您將不得不使用類型轉換來解決這個問題 - 在您的示例中並非如此! char
大小為 1 個字節,但unsigned short int
大小為 2 個字節。 因此,您的其他應用程序的basic_string
最有可能使用 UCS-2/UTF-16,您不能將其存儲在std::string
中(好吧,無論如何,不是您想要的方式),但您可以存儲在std::u16string
(又名std::basic_string<char16_t>
),或在 Windows 上的std::wstring
(又名std::basic_string<wchar_t>
)中,例如:
std::basic_string<unsigned short int, TRAITS_CLASS> temp =
reinterpret_cast<const unsigned short int*>(u"string");
// or:
std::basic_string<unsigned short int, TRAITS_CLASS> temp(
reinterpret_cast<const unsigned short int*>(u"string"),
6);
std::u16string str = u"string";
std::basic_string<unsigned short int, TRAITS_CLASS> temp =
reinterpret_cast<const unsigned short int*>(str.c_str());
// or:
std::basic_string<unsigned short int, TRAITS_CLASS> temp(
reinterpret_cast<const unsigned short int*>(str.c_str()),
str.size());
std::basic_string<unsigned short int, TRAITS_CLASS> temp = ...;
std::u16string str =
reinterpret_cast<const char16_t*>(temp.c_str());
// or:
std::u16string str(
reinterpret_cast<const char16_t*>(temp.c_str()),
temp.size());
如果您絕對需要在代碼中使用std::string
,那么您必須在UTF-8 (或您想要的任何其他char
兼容字符集)和其他應用程序的 16 位格式(假設 UCS-2/UTF-16)之間進行轉換,例如使用std::wstring_convert
或第三方 Unicode 庫,如 libiconv、ICU 等。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.