[英]C# StreamReader detect  encoding of an XML File
在我的xml文件中,我有这样的数据:
<Data>
<Field>
<Name>BarcodeCapture_0</Name>
<Type>SimpleIndex</Type>
<DataType>DataMatrix</DataType>
<Value>DEA"¡CV°)Ñ võ Fƒ´ 20100410050</Value>
</Field>
</Data>
我使用从StreamReader扩展的类,我重写读取方法,以防止不可用的字符,如? 字符。
这是班级
public class CustomStreamReader : StreamReader
{
private const int EOF = -1;
public CustomStreamReader(Stream stream) : base(stream)
{
}
public CustomStreamReader(string path) : base(path)
{
}
public CustomStreamReader(string path, Encoding encoding) : base(path, encoding)
{
}
/// <summary>
/// Get whether an integer represents a legal XML 1.0 or 1.1 character. See
/// the specification at w3.org for these characters.
/// </summary>
/// <param name="xmlVersion">
/// The version number as a string. Use "1.0" for XML 1.0 character
/// validation, and use "1.1" for XML 1.1 character validation.
/// </param>
public static bool IsLegalXmlChar(string xmlVersion, int character)
{
switch (xmlVersion)
{
case "1.1": // http://www.w3.org/TR/xml11/#charsets
{
return
!(
character <= 0x8 ||
character == 0xB ||
character == 0xC ||
(character >= 0xE && character <= 0x1F) ||
(character >= 0x7F && character <= 0x84) ||
(character >= 0x86 && character <= 0x9F) ||
character > 0x10FFFF
);
}
case "1.0": // http://www.w3.org/TR/REC-xml/#charsets
{
return
(
character == 0x9 /* == '\t' == 9 */ ||
character == 0xA /* == '\n' == 10 */ ||
character == 0xD /* == '\r' == 13 */ ||
(character >= 0x20 && character <= 0xD7FF) ||
(character >= 0xE000 && character <= 0xFFFD) ||
(character >= 0x10000 && character <= 0x10FFFF)
);
}
default:
{
throw new ArgumentOutOfRangeException
("xmlVersion", string.Format("'{0}' is not a valid XML version."));
}
}
}
/// <summary>
/// Get whether an integer represents a legal XML 1.0 character. See the
/// specification at w3.org for these characters.
/// </summary>
public static bool IsLegalXmlChar(int character)
{
return CustomStreamReader.IsLegalXmlChar("1.0", character);
}
public override int Read()
{
// Read each character, skipping over characters that XML has prohibited
int nextCharacter;
do
{
// Read a character
if ((nextCharacter = base.Read()) == EOF)
{
// If the character denotes the end of the file, stop reading
break;
}
}
// Skip the character if it's prohibited, and try the next
while (!CustomStreamReader.IsLegalXmlChar(nextCharacter));
return nextCharacter;
}
public override int Peek()
{
// Return the next legl XML character without reading it
int nextCharacter;
do
{
// See what the next character is
nextCharacter = base.Peek();
}
while
(
// If it's prohibited XML, skip over the character in the stream
// and try the next.
!CustomStreamReader.IsLegalXmlChar(nextCharacter) &&
(nextCharacter = base.Read()) != EOF
);
return nextCharacter;
} // method
// The following methods are exact copies of the methods in TextReader,
// extracting by disassembling it in Refelctor
public override int Read(char[] buffer, int index, int count)
{
if (buffer == null)
{
throw new ArgumentNullException("buffer");
}
if (index < 0)
{
throw new ArgumentOutOfRangeException("index");
}
if (count < 0)
{
throw new ArgumentOutOfRangeException("count");
}
if ((buffer.Length - index) < count)
{
throw new ArgumentException();
}
int num = 0;
do
{
int num2 = this.Read();
if (num2 == -1)
{
return num;
}
buffer[index + num++] = (char)num2;
}
while (num < count);
return num;
}
public override int ReadBlock(char[] buffer, int index, int count)
{
int num;
int num2 = 0;
do
{
num2 += num = this.Read(buffer, index + num2, count - num2);
}
while ((num > 0) && (num2 < count));
return num2;
}
public override string ReadLine()
{
StringBuilder builder = new StringBuilder();
while (true)
{
int num = this.Read();
switch (num)
{
case -1:
if (builder.Length > 0)
{
return builder.ToString();
}
return null;
case 13:
case 10:
if ((num == 13) && (this.Peek() == 10))
{
this.Read();
}
return builder.ToString();
}
builder.Append((char)num);
}
}
public override string ReadToEnd()
{
int num;
char[] buffer = new char[0x1000];
StringBuilder builder = new StringBuilder(0x1000);
while ((num = this.Read(buffer, 0, buffer.Length)) != 0)
{
builder.Append(buffer, 0, num);
}
return builder.ToString();
}
}
在XML反序列化方面:
CustomStreamReader fStream_scanTransaction_XML = new CustomStreamReader(scanTransactionFilePath, Encoding.UTF8);
XmlSerializer s = new XmlSerializer(typeof(ScanTransaction));
ScanTransaction result = ScanTransaction)s.Deserialize(fStream_scanTransaction_XML);
问题是StreamReader无法检测到? 编码,所以它不删除此字符和Xml反序列化faild。
尝试:
using (var sr = new StreamReader("XMLFile1.xml", Encoding.UTF8))
using (var xtr = new XmlTextReader(sr))
{
XmlSerializer s = new XmlSerializer(typeof(ScanTransaction));
ScanTransaction result = (ScanTransaction)s.Deserialize(xtr);
}
你甚至不需要一个“特殊的” StreamReader
。 XmlTextReader
不检查非法字符(您可以使用Normalize
boolean属性控制它,但默认情况下为false
,因此不检查非法字符)
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.