繁体   English   中英

在 pdf 文件中搜索字符串,然后将信息复制到 word/excel/txt 文件中?

[英]Search for a string in a pdf file then copy the info into word/excel/txt file?

我正在尝试打开一个 pdf 文件并搜索一个字符串或子字符串以到达我需要的页面,然后将该页面上的信息(不是整个页面,只是其中的一部分)复制到一个 word 文件中(或者我可以将该信息存储在 txt 文件或 excel 中,然后获取它)。

我希望它足够清楚。 我是 VBA 新手,不知道该怎么做。 我在互联网上搜索并没有找到任何有用的东西。 我也使用 Adob​​e Reader DC。

此外,您需要安装 Adob​​e Acrobat 才能使用 VBA 扫描 PDF 文件。 我不知道它要多少钱,但它不是免费的。 如果您想要免费选项,请将所有 PDF 文件转换为 Word 文件,然后对这些文件进行扫描。

Sub ConvertToWord()
   Dim MyObj As Object, MySource As Object, file As Variant
   file = Dir("C:\Users\Excel\Desktop\test\" & "*.pdf") 'pdf path
   Do While (file <> "")
   ChangeFileOpenDirectory "C:\Users\Excel\Desktop\test\"
          Documents.Open FileName:=file, ConfirmConversions:=False, ReadOnly:= _
        False, AddToRecentFiles:=False, PasswordDocument:="", PasswordTemplate:= _
        "", Revert:=False, WritePasswordDocument:="", WritePasswordTemplate:="", _
        Format:=wdOpenFormatAuto, XMLTransform:=""
    ChangeFileOpenDirectory "C:\Users\Excel\Desktop\test\" 'path for saving word
    ActiveDocument.SaveAs2 FileName:=Replace(file, ".pdf", ".docx"), FileFormat:=wdFormatXMLDocument _
        , LockComments:=False, Password:="", AddToRecentFiles:=True, _
        WritePassword:="", ReadOnlyRecommended:=False, EmbedTrueTypeFonts:=False, _
         SaveNativePictureFormat:=False, SaveFormsData:=False, SaveAsAOCELetter:= _
        False, CompatibilityMode:=15
    ActiveDocument.Close
     file = Dir
   Loop
End Sub

然后,在 Excel 中运行下面的代码。

Sub OpenAndReadWordDoc()

Rows("2:1000000").Select
Range(Selection, Selection.End(xlDown)).Select
Selection.ClearContents
Range("A1").Select

    ' assumes that the previous procedure has been executed
    Dim oWordApp As Word.Application
    Dim oWordDoc As Word.Document
    Dim blnStart As Boolean
    Dim r As Long
    Dim sFolder As String
    Dim strFilePattern As String
    Dim strFileName As String
    Dim sFileName As String
    Dim ws As Worksheet
    Dim c As Long
    Dim n As Long
    Dim iCount As Long
    Dim strSearch As String

    '~~> Establish an Word application object
    On Error Resume Next
    Set oWordApp = GetObject(, "Word.Application")
    If Err Then
        Set oWordApp = CreateObject("Word.Application")
        ' We started Word for this macro
        blnStart = True
    End If
    On Error GoTo ErrHandler

    Set ws = ActiveSheet
    r = 1 ' startrow for the copied text from the Word document
    ' Last column
    n = ws.Range("A1").End(xlToRight).Column

    sFolder = "C:\Users\Excel\Desktop\test\"

    '~~> This is the extension you want to go in for
    strFilePattern = "*.doc*"
    '~~> Loop through the folder to get the word files
    strFileName = Dir(sFolder & strFilePattern)
    Do Until strFileName = ""
        sFileName = sFolder & strFileName

        '~~> Open the word doc
        Set oWordDoc = oWordApp.Documents.Open(sFileName)
        ' Increase row number
        r = r + 1
        ' Enter file name in column A
        ws.Cells(r, 1).Value = sFileName

        ActiveCell.Offset(1, 0).Select
        ActiveSheet.Hyperlinks.Add Anchor:=Sheets("Sheet1").Range("A" & r), Address:=sFileName, _
        SubAddress:="A" & r, TextToDisplay:=sFileName

        ' Loop through the columns
        For c = 2 To n
            If oWordDoc.Content.Find.Execute(FindText:=Trim(ws.Cells(1, c).Value), _
                    MatchWholeWord:=True, MatchCase:=False) Then

                    strSearch = ws.Cells(1, c).Value
                    iCount = 0

                    With ActiveDocument.Content.Find
                        .Text = strSearch
                        .Format = False
                        .Wrap = wdFindStop
                        Do While .Execute
                            iCount = iCount + 1
                        Loop
                    End With

            ws.Cells(r, c).Value = iCount
            End If
        Next c
        oWordDoc.Close SaveChanges:=False

        '~~> Find next file
        strFileName = Dir
    Loop

ExitHandler:
    On Error Resume Next
    ' close the Word application
    Set oWordDoc = Nothing
    If blnStart Then
        ' We started Word, so we close it
        oWordApp.Quit
    End If
    Set oWordApp = Nothing
    Exit Sub

ErrHandler:
    MsgBox Err.Description, vbExclamation
    Resume ExitHandler
End Sub

Function GetDirectory(path)
   GetDirectory = Left(path, InStrRev(path, "\"))
End Function

在此处输入图片说明

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM