[英]How to work with UTF-16 in python ctypes?
我有一個外國C庫,該庫在API中使用utf-16:作為函數參數,返回值和結構成員。
在Windows上使用ctypes.c_wchar_p可以,但是在OSX下ctypes在c_wchar中使用UCS-32,我找不到支持utf-16的方法。
這是我的研究:
使用_SimpleCData子類重新定義_check_retval_ 。
from_param()
方法從未調用過( 為什么? ): func('str', b'W\\x00B\\x00\\x00\\x00') # passed without conversion
與from_param()
方法一起使用自己的類型。
這里是:
ustr = myutf16('hello')
func(ustr)
func('hello') # calls myutf16.from_param('hello')
您可以覆蓋from_param
子類中的c_char_p
以將unicode
字符串編碼為UTF-16。 您可以添加_check_retval_
方法來將UTF-16結果解碼為unicode
字符串。 對於結構字段,您可以使用描述符類來處理設置和獲取屬性。 將字段設置為c_char_p
類型的private _name
,並將描述符設置為公共name
。 例如:
import sys
import ctypes
if sys.version_info[0] > 2:
unicode = str
def decode_utf16_from_address(address, byteorder='little',
c_char=ctypes.c_char):
if not address:
return None
if byteorder not in ('little', 'big'):
raise ValueError("byteorder must be either 'little' or 'big'")
chars = []
while True:
c1 = c_char.from_address(address).value
c2 = c_char.from_address(address + 1).value
if c1 == b'\x00' and c2 == b'\x00':
break
chars += [c1, c2]
address += 2
if byteorder == 'little':
return b''.join(chars).decode('utf-16le')
return b''.join(chars).decode('utf-16be')
class c_utf16le_p(ctypes.c_char_p):
def __init__(self, value=None):
super(c_utf16le_p, self).__init__()
if value is not None:
self.value = value
@property
def value(self,
c_void_p=ctypes.c_void_p):
addr = c_void_p.from_buffer(self).value
return decode_utf16_from_address(addr, 'little')
@value.setter
def value(self, value,
c_char_p=ctypes.c_char_p):
value = value.encode('utf-16le') + b'\x00'
c_char_p.value.__set__(self, value)
@classmethod
def from_param(cls, obj):
if isinstance(obj, unicode):
obj = obj.encode('utf-16le') + b'\x00'
return super(c_utf16le_p, cls).from_param(obj)
@classmethod
def _check_retval_(cls, result):
return result.value
class UTF16LEField(object):
def __init__(self, name):
self.name = name
def __get__(self, obj, cls,
c_void_p=ctypes.c_void_p,
addressof=ctypes.addressof):
field_addr = addressof(obj) + getattr(cls, self.name).offset
addr = c_void_p.from_address(field_addr).value
return decode_utf16_from_address(addr, 'little')
def __set__(self, obj, value):
value = value.encode('utf-16le') + b'\x00'
setattr(obj, self.name, value)
例:
if __name__ == '__main__':
class Test(ctypes.Structure):
_fields_ = (('x', ctypes.c_int),
('y', ctypes.c_void_p),
('_string', ctypes.c_char_p))
string = UTF16LEField('_string')
print('test 1: structure field')
t = Test()
t.string = u'eggs and spam'
print(t.string)
print('test 2: parameter and result')
result = None
@ctypes.CFUNCTYPE(c_utf16le_p, c_utf16le_p)
def testfun(string):
global result
print('parameter: %s' % string.value)
# callbacks leak memory except for simple return
# values such as an integer address, so return the
# address of a global variable.
result = c_utf16le_p(string.value + u' and eggs')
return ctypes.c_void_p.from_buffer(result).value
print('result: %s' % testfun(u'spam'))
輸出:
test 1: structure field
eggs and spam
test 2: parameter and result
parameter: spam
result: spam and eggs
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.