I have a large number of PDF files split across multiple folders (in a tree structure) in my Google Drive. I'd like to retain the PDF files while also creating a copy of each PDF document in Google Docs format which has been OCRd. The Google Docs file needs to have the same name as the PDF file.
How do I do this?
As part of this, I tried to at least convert one file into PDF by code but ran into issues there as well.
function pdfToDoc() {
var fileBlob = DriveApp.getFileById('<ID>').getBlob();
var resource = {
title: fileBlob.getName(),
mimeType: fileBlob.getContentType()
};
var options = {
ocr: true
};
var docFile = Drive.Files.insert(resource, fileBlob, options); // <-- Google said "Empty response (line 10, file "Code")"
Logger.log(docFile.alternateLink);
}
I followed this tutorial but made some changes because I'm using v3 of Drive API. Here is the snippet:
var blob = DriveApp.getFileById('FILE_ID').getBlob();
Logger.log(blob)
var text = pdfToText(blob, {ocrLanguage: "en"});
Logger.log(text);
/**
* Convert pdf file (blob) to a text file on Drive, using built-in OCR.
* By default, the text file will be placed in the root folder, with the same
* name as source pdf (but extension 'txt'). Options:
*/
function pdfToText ( pdfFile, options ) {
// Ensure Advanced Drive Service is enabled
try {
Drive.Files.list();
}
catch (e) {
throw new Error( "Enable 'Drive API' in Resources - Advanced Google Services." );
}
// Prepare resource object for file creation
var parents = [];
var pdfName = "Sample Docs";
Logger.log(pdfName)
var resource = {
name: pdfName,
mimeType: MimeType.GOOGLE_DOCS,
parents: parents
};
// Save PDF as GDOC
resource.title = pdfName.replace(/pdf$/, '');
var insertOpts = {
'ocr': true,
'ocrLanguage': 'en'
}
Logger.log(resource.title)
var gdocFile = Drive.Files.create(resource, pdfFile, insertOpts);
// Get text from GDOC
var gdocDoc = DocumentApp.openById(gdocFile.id);
var text = gdocDoc.getBody().getText();
// Save text file, if requested
resource.name = pdfName.replace(/pdf$/, 'txt');
resource.mimeType = MimeType.PLAIN_TEXT;
var textBlob = Utilities.newBlob(text, MimeType.PLAIN_TEXT, resource.name);
var textFile = Drive.Files.create(resource, textBlob);
return text;
}
Initially, DriveApp cannot convert pdf to Google Docs directly so I used Advance Drive Service. Just follow this link on how to enable advanced services .
Hope this helps.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.