简体   繁体   中英

How to read a unicode string from a hdf-5 file with c#

I wrote a string to a hdf-5 file with the help of the HDF.Pinvoke library. I copied one of their unit tests to write a function that writes a unicode string to a file. When I open the file in matlab and hdfview I see the correct string. Only when I try to read it with the c# function I wrote it fails.

public static int WriteUnicodeString(int groupId, string name, string str)
{
    byte[] wdata = Encoding.UTF8.GetBytes(str);

    int spaceId = H5S.create(H5S.class_t.SCALAR);

    hid_t dtype = H5T.create(H5T.class_t.STRING, new IntPtr(wdata.Length));
    H5T.set_cset(dtype, H5T.cset_t.UTF8);
    H5T.set_strpad(dtype, H5T.str_t.SPACEPAD);

    hid_t datasetId = H5D.create(groupId, name, dtype, spaceId);

    GCHandle hnd = GCHandle.Alloc(wdata, GCHandleType.Pinned);
    int result = H5D.write(datasetId, dtype, H5S.ALL,
        H5S.ALL, H5P.DEFAULT, hnd.AddrOfPinnedObject());
    hnd.Free();

    H5T.close(dtype);
    H5D.close(datasetId);
    H5S.close(spaceId);
    return result;
}

And the code to write it:

string filename = "testUnicodeString.H5"
fileId = H5F.create(filename, H5F.ACC_TRUNC);
string test = "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ στὸ χρυσαφὶ ξέφωτο";
Hdf5.WriteUnicodeString(fileId, "/test", test);
H5F.close(fileId)

Here's my attempt at writing a function that reads a unicode string:

    public static string ReadUnicodeString(int groupId, string name)
    {
        int datatype = H5T.create(H5T.class_t.STRING, H5T.VARIABLE);
        H5T.set_cset(datatype, H5T.cset_t.UTF8);
        H5T.set_strpad(datatype, H5T.str_t.SPACEPAD);

        var datasetId = H5D.open(groupId, name);
        var typeId = H5D.get_type(datasetId);

        var classId = H5T.get_class(typeId);
        var order = H5T.get_order(typeId);
        IntPtr size = H5T.get_size(typeId);
        int strLen = (int)size;

        int spaceId = H5D.get_space(datasetId);

        byte[] wdata = new byte[strLen];

        //IntPtr ptr = new IntPtr();
        GCHandle hnd = GCHandle.Alloc(wdata, GCHandleType.Pinned);
        H5D.read(datasetId, datatype, H5S.ALL, H5S.ALL,
            H5P.DEFAULT, hnd.AddrOfPinnedObject());
        hnd.Free();

        //int len = 0;
        //while (Marshal.ReadByte(ptr, len) != 0) { ++len; }
        //byte[] name_buf = new byte[len];
        //Marshal.Copy(ptr, name_buf, 0, len);
        string s = Encoding.UTF8.GetString(wdata);

        H5S.close(spaceId);
        H5T.close(datatype);
        H5D.close(datasetId);
        return s;
    }
}

In the read method I get a wdata array of 103 bytes (that is correct), but the bytes all have a value of 0. What am I doing wrong?

I had a pull request from bendly with the answer to my question. Below is the code he wrote.

    public static string ReadUnicodeString(hid_t groupId, string name)
    {
        var datasetId = H5D.open(groupId, name);
        var typeId = H5D.get_type(datasetId);

        if (H5T.is_variable_str(typeId) > 0)
        {
            var spaceId = H5D.get_space(datasetId);
            hid_t count = H5S.get_simple_extent_npoints(spaceId);

            IntPtr[] rdata = new IntPtr[count];

            GCHandle hnd = GCHandle.Alloc(rdata, GCHandleType.Pinned);
            H5D.read(datasetId, typeId, H5S.ALL, H5S.ALL,
                H5P.DEFAULT, hnd.AddrOfPinnedObject());

            var attrStrings = new List<string>();
            for (int i = 0; i < rdata.Length; ++i)
            {
                int attrLength = 0;
                while (Marshal.ReadByte(rdata[i], attrLength) != 0)
                {
                    ++attrLength;
                }

                byte[] buffer = new byte[attrLength];
                Marshal.Copy(rdata[i], buffer, 0, buffer.Length);

                string stringPart = Encoding.UTF8.GetString(buffer);

                attrStrings.Add(stringPart);

                H5.free_memory(rdata[i]);
            }

            hnd.Free();
            H5S.close(spaceId);
            H5D.close(datasetId);

            return attrStrings[0];
        }

        // Must be a non-variable length string.
        int size = H5T.get_size(typeId).ToInt32();
        IntPtr iPtr = Marshal.AllocHGlobal(size);

        int result = H5D.read(datasetId, typeId, H5S.ALL, H5S.ALL,
            H5P.DEFAULT, iPtr);
        if (result < 0)
        {
            throw new IOException("Failed to read dataset");
        }

        var strDest = new byte[size];
        Marshal.Copy(iPtr, strDest, 0, size);
        Marshal.FreeHGlobal(iPtr);

        H5D.close(datasetId);

        return Encoding.UTF8.GetString(strDest).TrimEnd((Char)0);
    }
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM