[英]Reading HTML from Tab Delimited Text File Into String Using C#
我正在嘗試從制表符分隔的文本文件中讀取HTML,並創建一個HTML文件,然后將其轉換為pdf。 當我嘗試閱讀文本文件時,出現了'和其他一些奇怪的字符。 這是我的代碼
var lines = System.IO.File.ReadAllLines(@"C:\temp\Laura.txt", Encoding.GetEncoding("Windows-1255"));
var csv = lines.Select(x =>
{
var parts = x.Split('\t');
return new Articles()
{
id = parts[0].Trim(),
name = parts[1].Trim(),
body = parts[2].Trim(),
//body = WebUtility.HtmlDecode(parts[2].Trim()),
//body = HttpUtility.HtmlEncode(parts[2].Trim()),
//body = WebUtility.HtmlEncode(parts[2].Trim()),
//body = SecurityElement.Escape(parts[2].Trim()),
};
}).ToList();
foreach (var item in csv)
{
string id = item.name;
string filename = item.name + ".html";
string body = item.body;
string path = @"c:\temp\" + filename;
// This text is added only once to the file.
if (!File.Exists(path))
{
// Create a file to write to.
File.WriteAllText(path, body);
Microsoft.Office.Interop.Word.Application ap = new Microsoft.Office.Interop.Word.Application();
Document document = ap.Documents.Open(path);
object oFalse = false;
object oTrue = true;
object OutputFileName = Path.Combine(
Path.GetDirectoryName(path),
Path.GetFileNameWithoutExtension(path) + ".pdf");
object missing = System.Reflection.Missing.Value;
document.PrintOut(
oTrue, // Background
oFalse, // Append
ref missing, // Range
OutputFileName, // OutputFileName
ref missing, // From
ref missing, // To
ref missing, // Item
ref missing, // Copies
ref missing, // Pages
ref missing, // PageType
ref missing, // PrintToFile
ref missing, // Collate
ref missing, // ActivePrinterMacGX
ref missing, // ManualDuplexPrint
ref missing, // PrintZoomColumn
ref missing, // PrintZoomRow
ref missing, // PrintZoomPaperWidth
ref missing // PrintZoomPaperHeight
);
}
}
我已經嘗試了注釋掉的代碼,但是似乎沒有任何效果。
嘗試這個
var lines = System.IO.File.ReadAllLines(@"C:\temp\Laura.txt", Encoding.GetEncoding("Windows-1255"));
var csv = lines.Select(x =>
{
var parts = x.Split('\t');
return new Articles()
{
id = parts[0].Trim(),
name = parts[1].Trim(),
body = parts[2].Trim(),
};
}).ToList();
試試這個wdExportFormatPDF
var lines = System.IO.File.ReadAllText(@"1.html", Encoding.GetEncoding("Windows-1255"));
var path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"2.html");
var app = new Microsoft.Office.Interop.Word.Application();
var doc = app.Documents.Open(path, false);
var OutputFileName = Path.Combine(
Path.GetDirectoryName(path),
Path.GetFileNameWithoutExtension(path)+
".pdf");
doc.ExportAsFixedFormat(OutputFileName, WdExportFormat.wdExportFormatPDF);
這是完整的代碼
static void connvert()
{
var lines =
File.
ReadAllLines
(@"C:\temp\Laura.txt",
Encoding.GetEncoding("Windows-1255")
);
var csv = lines.Select(x =>
{
var parts = x.Split('\t');
return new Articles()
{
id = parts[0].Trim(),
name = parts[1].Trim(),
body = parts[2].Trim(),
};
}).ToList();
foreach (var item in csv)
{
string id = item.name;
string filename = item.name + ".html";
string body = item.body;
string path = @"c:\temp\" + filename;
// This text is added only once to the file.
if (!File.Exists(path))
{
// Create a file to write to.
// File.WriteAllText(path, body);
File.WriteAllText(path, body, Encoding.Unicode); // try this
// File.WriteAllText(path, body, Encoding.Encoding.GetEncoding("Windows-1255"));// then this
var app = new Application();
var doc = app.Documents.Open(path, false);
var OutputFileName =
Path.Combine(
Path.GetDirectoryName(path),
Path.GetFileNameWithoutExtension(path) +
".pdf");
doc.ExportAsFixedFormat
(OutputFileName,
WdExportFormat.wdExportFormatPDF
);
}
}
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.