I am creating an search engine that is looking for a specific word in pdf, and if it finds it it will move folder to different folder.
This is what I have done so far!
import PyPDF2
import re
import os
import shutil
pattern = input("Enter string pattern to search: ")
src = 'Folder 1'
dest = 'Folder 2'
for file_name in os.listdir(src):
object = PyPDF2.PdfFileReader(file_name, 'rb')
numPages = object.getNumPages()
for i in range(0, numPages):
pageObj = object.getPage(i)
text = pageObj.extractText()
for match in re.finditer(pattern, text):
print(f'Page no: {i} | Match: {match}')
destination = shutil.copytree(src, dest, copy_function = shutil.copy)
When I run it I get the error:
FileNotFoundError: [Errno 2] No such file or directory:
I think the error is in following line because it does not see path, but only the name of the file in folder.
object = PyPDF2.PdfFileReader(file_name, 'rb')
How to assign path before file_name?
from pathlib import Path # change
import PyPDF2
import re
import os
import shutil
pattern = input("Enter string pattern to search: ")
basepath = Path('') # absolute path to parent of folder 1/folder 2 # change
src = basepath / 'Folder 1' # change
dest = basepath / 'Folder 2' # change
for file in os.scandir(src): # change
object = PyPDF2.PdfFileReader(str(src / file.name), 'rb') # change
numPages = object.getNumPages()
for i in range(0, numPages):
pageObj = object.getPage(i)
text = pageObj.extractText()
mvFlag = True
for match in re.finditer(pattern, text):
print(f'Page no: {i} | Match: {match}') # change below
if len(match)!=0 and mvFlag == True:
destination = shutil.copytree(str(src / file.name), str(dest / file.name), copy_function = shutil.copy)
mvFlag = False # because we will move one pdf only once
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.