Hello all,
I am using Textract async StartDocumentAnalysis and GetDocumentAnalysis for detecting signatures. However, when I test the code with a PDF document the job status of GetDocumentAnalysis is always shown as In Progress.
Attached is my code for StartDocumentAnalysis and GetDocumentAnalysis:
def process_file(input_file):
try:
response = client.start_document_analysis(
DocumentLocation={
'S3Object': {
'Bucket': BUCKET_NAME,
'Name': input_file
}
},
FeatureTypes=['FORMS'],
NotificationChannel={
'SNSTopicArn': 'arn:aws:sns:us-east-2:634358717878:Detect_Sign',
'RoleArn': 'arn:aws:iam::634358717878:role/textract'
},
)
job_id = response["JobId"]
logger.info("Started analyzing the document for signatures %s on %s.", job_id, input_file)
except ClientError:
logger.exception("Couldn't analyze the document for signatures in %s.", input_file)
raise
else:
return job_id
def parse_json_file(job_id_value):
document_analysis_response = client.get_document_analysis(
JobId=job_id_value,
MaxResults=100
)
job_status = document_analysis_response['JobStatus']
while job_status != "SUCCEEDED":
if job_status == "SUCCEEDED":
signature_array = []
signature_detected = 0 # signature blanks with signatures
signature = 0 # total number of signatures in a document
blocks = document_analysis_response['Blocks']
for block in blocks:
if block['BlockType'] == 'SIGNATURE':
signature += 1
if block['Confidence'] != 0.0:
signature_detected += 1
signature_array.append(block)
logger.info("Analyzing document completed. %s JobId and %s JobStatus ", job_id_value, job_status)
logger.info("Total Number of signatures blanks: %s ", signature)
logger.info("Number of signatures detected: %s ", signature_detected)
break
else:
logger.info("Could not detect signatures. Job status is %s ", job_status)
job_status = document_analysis_response['JobStatus']