My problem is that textract asynchronous method start_document_analysis, has an option for the type of analysis you want to perform, but when I try to use the "Queries" feature =>
FeatureTypes=[
'TABLES'|'FORMS'|'QUERIES',
],
you would have to pass another parameter with the queries list =>
QueriesConfig={
'Queries': [
{
'Text': 'string',
'Alias': 'string',
'Pages': [
'string',
]
},
]
}
once I pass this parameter, boto3 throws an exception that Queries config is not recognized as one of the parameters accepted, have anyone used this feature with python before?
You can use by this way:
def getJobResults(jobId):
pages = []
client = boto3.client('textract')
response = client.get_document_analysis(JobId=jobId)
pages.append(response)
print("Resultset page recieved: {}".format(len(pages)))
nextToken = None
if('NextToken' in response):
nextToken = response['NextToken']
while(nextToken):
response = client.get_document_analysis(JobId=jobId, NextToken=nextToken)
pages.append(response)
print("Resultset page recieved: {}".format(len(pages)))
nextToken = None
if('NextToken' in response):
nextToken = response['NextToken']
return pages
def get_kv_map(s3BucketName, documentName):
client = boto3.client('textract')
response = client.start_document_analysis(
DocumentLocation={
'S3Object': {
'Bucket': s3BucketName,
'Name': documentName
}
},
FeatureTypes=['QUERIES'],
QueriesConfig={
'Queries': [
{
"Text": "is 1. A. checkbox seleted"
}
]
}
)
job_id = response['JobId']
response = client.get_document_analysis(JobId=job_id)
status = response["JobStatus"]
while(status == "IN_PROGRESS"):
time.sleep(3)
response = client.get_document_analysis(JobId=job_id)
status = response["JobStatus"]
print("Job status2: {}".format(status))
response = getJobResults(job_id)
return response
def query_extraction():
s3BucketName = "bucket-name"
documentName = "xyz.pdf"
data = get_kv_map(s3BucketName, documentName)
return data
data = query_extraction()
Hope this will solve your issue
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.