![](/img/trans.png)
[英]Search a string in a PDF file is working from Word but not working from Excel
[英]Search for a string in a pdf file then copy the info into word/excel/txt file?
我正在尝试打开一个 pdf 文件并搜索一个字符串或子字符串以到达我需要的页面,然后将该页面上的信息(不是整个页面,只是其中的一部分)复制到一个 word 文件中(或者我可以将该信息存储在 txt 文件或 excel 中,然后获取它)。
我希望它足够清楚。 我是 VBA 新手,不知道该怎么做。 我在互联网上搜索并没有找到任何有用的东西。 我也使用 Adobe Reader DC。
此外,您需要安装 Adobe Acrobat 才能使用 VBA 扫描 PDF 文件。 我不知道它要多少钱,但它不是免费的。 如果您想要免费选项,请将所有 PDF 文件转换为 Word 文件,然后对这些文件进行扫描。
Sub ConvertToWord()
Dim MyObj As Object, MySource As Object, file As Variant
file = Dir("C:\Users\Excel\Desktop\test\" & "*.pdf") 'pdf path
Do While (file <> "")
ChangeFileOpenDirectory "C:\Users\Excel\Desktop\test\"
Documents.Open FileName:=file, ConfirmConversions:=False, ReadOnly:= _
False, AddToRecentFiles:=False, PasswordDocument:="", PasswordTemplate:= _
"", Revert:=False, WritePasswordDocument:="", WritePasswordTemplate:="", _
Format:=wdOpenFormatAuto, XMLTransform:=""
ChangeFileOpenDirectory "C:\Users\Excel\Desktop\test\" 'path for saving word
ActiveDocument.SaveAs2 FileName:=Replace(file, ".pdf", ".docx"), FileFormat:=wdFormatXMLDocument _
, LockComments:=False, Password:="", AddToRecentFiles:=True, _
WritePassword:="", ReadOnlyRecommended:=False, EmbedTrueTypeFonts:=False, _
SaveNativePictureFormat:=False, SaveFormsData:=False, SaveAsAOCELetter:= _
False, CompatibilityMode:=15
ActiveDocument.Close
file = Dir
Loop
End Sub
然后,在 Excel 中运行下面的代码。
Sub OpenAndReadWordDoc()
Rows("2:1000000").Select
Range(Selection, Selection.End(xlDown)).Select
Selection.ClearContents
Range("A1").Select
' assumes that the previous procedure has been executed
Dim oWordApp As Word.Application
Dim oWordDoc As Word.Document
Dim blnStart As Boolean
Dim r As Long
Dim sFolder As String
Dim strFilePattern As String
Dim strFileName As String
Dim sFileName As String
Dim ws As Worksheet
Dim c As Long
Dim n As Long
Dim iCount As Long
Dim strSearch As String
'~~> Establish an Word application object
On Error Resume Next
Set oWordApp = GetObject(, "Word.Application")
If Err Then
Set oWordApp = CreateObject("Word.Application")
' We started Word for this macro
blnStart = True
End If
On Error GoTo ErrHandler
Set ws = ActiveSheet
r = 1 ' startrow for the copied text from the Word document
' Last column
n = ws.Range("A1").End(xlToRight).Column
sFolder = "C:\Users\Excel\Desktop\test\"
'~~> This is the extension you want to go in for
strFilePattern = "*.doc*"
'~~> Loop through the folder to get the word files
strFileName = Dir(sFolder & strFilePattern)
Do Until strFileName = ""
sFileName = sFolder & strFileName
'~~> Open the word doc
Set oWordDoc = oWordApp.Documents.Open(sFileName)
' Increase row number
r = r + 1
' Enter file name in column A
ws.Cells(r, 1).Value = sFileName
ActiveCell.Offset(1, 0).Select
ActiveSheet.Hyperlinks.Add Anchor:=Sheets("Sheet1").Range("A" & r), Address:=sFileName, _
SubAddress:="A" & r, TextToDisplay:=sFileName
' Loop through the columns
For c = 2 To n
If oWordDoc.Content.Find.Execute(FindText:=Trim(ws.Cells(1, c).Value), _
MatchWholeWord:=True, MatchCase:=False) Then
strSearch = ws.Cells(1, c).Value
iCount = 0
With ActiveDocument.Content.Find
.Text = strSearch
.Format = False
.Wrap = wdFindStop
Do While .Execute
iCount = iCount + 1
Loop
End With
ws.Cells(r, c).Value = iCount
End If
Next c
oWordDoc.Close SaveChanges:=False
'~~> Find next file
strFileName = Dir
Loop
ExitHandler:
On Error Resume Next
' close the Word application
Set oWordDoc = Nothing
If blnStart Then
' We started Word, so we close it
oWordApp.Quit
End If
Set oWordApp = Nothing
Exit Sub
ErrHandler:
MsgBox Err.Description, vbExclamation
Resume ExitHandler
End Sub
Function GetDirectory(path)
GetDirectory = Left(path, InStrRev(path, "\"))
End Function
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.