简体   繁体   中英

C# Convert pdf to txt

I warmly welcome... I have a question I'm trying to convert PDF to txt and I can not save a txt file ?? Someone please help me ??

using System;
using System.Text;
using System.Windows.Forms;
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
using System.IO;

namespace ZestawienieFaktur
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();

        }



        private void button1_Click(object sender, EventArgs e)
        {

            string[] filePaths = Directory.GetFiles(@"D:\\faktury\\", "*.pdf");

           foreach (string fp in filePaths)
            {
                ExtractTextFromPdf(fp);
            }

        }

        public static string ExtractTextFromPdf(string path)
        {
            using (PdfReader reader = new PdfReader(path))
            {
                StringBuilder text = new StringBuilder();

                for (int i = 1; i <= reader.NumberOfPages; i++)
                {
                    text.Append(PdfTextExtractor.GetTextFromPage(reader, i));
                }

                string lines = text.ToString();
                using (var file = new StreamWriter(@"D:\faktury\test1.txt"))
                {
                    file.WriteLine(lines);
                    file.Close();
                }


            }




        }

    }
}

In the folder I have a few pdf files with different names. And I want all converted to the format of txt. Big thx for answer...

You should remove the return keyword instead and just return void . The reason why it's not executing is because it stops executing the rest of the code after return . Change it to this:

public static void ExtractTextFromPdf(string path)
{
    using (PdfReader reader = new PdfReader(path))
    {
        StringBuilder text = new StringBuilder();

        for (int i = 1; i <= reader.NumberOfPages; i++)
        {
            text.Append(PdfTextExtractor.GetTextFromPage(reader, i));
        }
        string lines = "";
       using(var file = new StreamWriter(path2))
       {
          file.WriteLine(lines);
          file.Close();
       }      

    }
}

Hope it helps!

OK WORKS thx friends...

using System;
using System.Text;
using System.Windows.Forms;
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
using System.IO;

namespace ZestawienieFaktur
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();

        }



        private void button1_Click(object sender, EventArgs e)
        {

            string[] filePaths = Directory.GetFiles(@"D:\faktury\", "*.pdf");

           foreach (string fp in filePaths)
            {
                ExtractTextFromPdf(fp);
            }

        }

        public static string ExtractTextFromPdf(string path)
        {
            using (PdfReader reader = new PdfReader(path))
            {
                StringBuilder text = new StringBuilder();

                for (int i = 1; i <= reader.NumberOfPages; i++)
                {
                    text.Append(PdfTextExtractor.GetTextFromPage(reader, i));
                }

                string lines = text.ToString();
                using (var file = new StreamWriter(@"D:\faktury\test1.txt"))
                {
                    file.WriteLine(lines);
                    file.Close();
                }
                return lines; 
            }


        }




    }

    }

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM