I am able to extract text from my multi-page PDF using Amazon Textract. Now I want start Human Loop Review. I have already created a workflow and specified the condition there to trigger the Human Loop. Below is my code: -
import os
import json
import time
import uuid
from urllib.parse import unquote_plus
import boto3
def lambda_handler(event, context):
textract = boto3.client("textract")
a2i = boto3.client("sagemaker-a2i-runtime")
FLOW_ARN = os.environ["FLOW_ARN"]
if event:
file_obj = event["Records"][0]
bucketname = str(file_obj["s3"]["bucket"]["name"])
filename = unquote_plus(str(file_obj["s3"]["object"]["key"]))
# Start document analysis for the whole document
response = textract.start_document_analysis(
DocumentLocation={
"S3Object": {
"Bucket": bucketname,
"Name": filename,
}
},
FeatureTypes=["FORMS"], # Specify the feature types to analyze
ClientRequestToken=str(uuid.uuid4()), # Generate a unique client request token
)
# Retrieve the job ID from the response
job_id = response["JobId"]
# Poll for the completion of the job
while True:
job_status = textract.get_document_analysis(JobId=job_id)['JobStatus']
if job_status in ['SUCCEEDED', 'FAILED']:
break
time.sleep(5) # Wait for 5 seconds before checking again
# Get the results of the analysis
response = textract.get_document_analysis(JobId=job_id)
# Process the results
print(json.dumps(response))
a2i.start_human_loop(
HumanLoopName=uuid.uuid4().hex,
FlowDefinitionArn=FLOW_ARN,
HumanLoopInput={
'InputContent': json.dumps({
"InitialValue": {
"Bucket": bucketname,
"DocumentPath": filename,
}
})
},
DataAttributes={
'ContentClassifiers': [
'FreeOfAdultContent',
]
}
)
return {
"statusCode": 200,
"body": json.dumps("Document processed successfully!"),
}
return {"statusCode": 500, "body": json.dumps("Issue processing file!")}
I was expecting it to start the human loop review but it return following error: -
[ERROR] ValidationException: An error occurred (ValidationException) when calling the StartHumanLoop operation: Provided InputContent is not valid. Please use valid InputContent JSON and try your request again.
Could someone please point what I am doing wrong? I need to pass my PDF in S3 bucket to HumanLoopInput.
Thanks