![](/img/trans.png)
[英]Why is std::string implemented as basic_string of `char` and not `unsigned char`
[英]How to assign a std::string to std::basic_string<unsigned short int, TRAITS_CLASS>(Unicode2String) on Linux
我正在研究 Linux 系统,我认为标准 Linux std::string
支持 Unicode 和 ASCII 字符。 所以,我想在我的代码中使用std::string
,但我从应用程序接收格式为std::basic_string<unsigned short int, TRAIT_CLASS>
的字符串(同时支持 Windows 和 Linux)。 TRAITS_CLASS
如下:
class TRAITS_CLASS
{
public:
typedef unsigned short char_type;
typedef unsigned short int_type;
typedef size_t pos_type;
typedef size_t off_type;
typedef int state_type;
static inline void assign(unsigned short &dest, const unsigned short &src)
{
dest = src;
}
static inline bool eq(const unsigned short &left, const unsigned short &right)
{
return left == right;
}
static inline bool lt(const unsigned short &left, const unsigned short &right)
{
return left < right;
}
static int compare(const unsigned short *p1, const unsigned short *p2, size_t count)
{
for (; 0 < count; --count, ++p1, ++p2)
{
if (!eq(*p1, *p2))
{
return lt(*p1, *p2) ? -1 : 1;
}
}
return 0;
}
static size_t length(const unsigned short *p)
{
size_t count = 0;
while (*p++)
{
++count;
}
return count;
}
static unsigned short* copy(unsigned short *p1, const unsigned short *p2, size_t count)
{
unsigned short *res = p1;
for (; 0 < count; --count, ++p1, ++p2)
{
assign(*p1, *p2);
}
return res;
}
static const unsigned short* find(const unsigned short *p, size_t count,
const unsigned short &value)
{
for (; 0 < count; --count, ++p)
{
if (eq(*p, value))
{
return p;
}
}
return 0;
}
static unsigned short* move(unsigned short *dest, const unsigned short *src, size_t count)
{
unsigned short *res = dest;
if ((src < dest) && (dest < src + count))
{
for (dest += count, src += count; 0 < count; --count)
{
assign(*--dest, *--src);
}
}
else
{
for (; 0 < count; --count, ++dest, ++src)
{
assign(*dest, *src);
}
}
return res;
}
static unsigned short* assign(unsigned short *dest, size_t count, unsigned short value)
{
unsigned short *res = dest;
for (; 0 < count; --count, ++dest)
{
assign(*dest, value);
}
return res;
}
static inline unsigned short to_char_type(const int_type &arg)
{
return static_cast<unsigned short>(arg);
}
static inline int_type to_int_type(const unsigned short &value)
{
return static_cast<int_type>(value);
}
static inline bool eq_int_type(const int_type &left, const int_type &right)
{
return left == right;
}
static inline int_type eof()
{
return static_cast<int_type>(EOF);
}
static inline int_type not_eof(const int_type &value)
{
return value != eof() ? value : 1;
}
};
如何将普通的std::string
分配给上述std::basic_string
模板? 喜欢:
basic_string<unsigned short int, TRAIT_ClASS> temp = u"string";
如果无法分配,我该如何使用上面的basic_string
模板?
我认为标准 Linux std::string 支持 Unicode 和 ASCII 字符
std::string
(又名std::basic_string<char>
)没有 Unicode 或 ASCII 的概念,它只知道char
元素,仅此而已。 You might be confused by the fact that Linux apps typically use UTF-8 strings, and UTF-8 can be stored in a std::string
(or preferably in std::u8string
aka std:::basic_string<char8_t>
in C++20). 但是将此类责任分配给std::string
的任何用法是您的代码的工作。
如何将普通的
std::string
分配给上述std::basic_string
模板?
您不能直接将std::string
分配给/从另一个std::basic_string<CharT>
,其中CharT
是与char
不同的字符类型。
假设数据是兼容的,您将不得不使用类型转换来解决这个问题 - 在您的示例中并非如此! char
大小为 1 个字节,但unsigned short int
大小为 2 个字节。 因此,您的其他应用程序的basic_string
最有可能使用 UCS-2/UTF-16,您不能将其存储在std::string
中(好吧,无论如何,不是您想要的方式),但您可以存储在std::u16string
(又名std::basic_string<char16_t>
),或在 Windows 上的std::wstring
(又名std::basic_string<wchar_t>
)中,例如:
std::basic_string<unsigned short int, TRAITS_CLASS> temp =
reinterpret_cast<const unsigned short int*>(u"string");
// or:
std::basic_string<unsigned short int, TRAITS_CLASS> temp(
reinterpret_cast<const unsigned short int*>(u"string"),
6);
std::u16string str = u"string";
std::basic_string<unsigned short int, TRAITS_CLASS> temp =
reinterpret_cast<const unsigned short int*>(str.c_str());
// or:
std::basic_string<unsigned short int, TRAITS_CLASS> temp(
reinterpret_cast<const unsigned short int*>(str.c_str()),
str.size());
std::basic_string<unsigned short int, TRAITS_CLASS> temp = ...;
std::u16string str =
reinterpret_cast<const char16_t*>(temp.c_str());
// or:
std::u16string str(
reinterpret_cast<const char16_t*>(temp.c_str()),
temp.size());
如果您绝对需要在代码中使用std::string
,那么您必须在UTF-8 (或您想要的任何其他char
兼容字符集)和其他应用程序的 16 位格式(假设 UCS-2/UTF-16)之间进行转换,例如使用std::wstring_convert
或第三方 Unicode 库,如 libiconv、ICU 等。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.