I want to build a Glue Trigger that triggers Glue Crawler after Glue Job is finished successfully.
I looked over the cfnTrigger and wrote a code for it.
After CDK DEPLOY and finishing Glue Job successfully, I found that Crawler has never run.
I don't know why it doesn't work
The below code is for building glue job:
cfn_glue_job_for_initial_load = aws_glue.CfnJob(self, "initialGlueETLJob",
command=aws_glue.CfnJob.JobCommandProperty(
name="glueetl",
python_version="3",
script_location=f"s3://{glue_assets_bucket_name}/scripts/{initial_glue_job_script_file_name}"
),
role=glue_job_role.role_arn,
default_arguments=glue_job_parameter,
description=f"This job loads the data from S3 Raw Zone and creates {open_table_format} table.",
execution_property=aws_glue.CfnJob.ExecutionPropertyProperty(
max_concurrent_runs=1
),
glue_version="4.0",
max_retries=0,
name=initial_glue_job_name,
number_of_workers=10,
timeout=2880,
worker_type="G.8X"
)
The below code is for building glue crawler:
delta_lake_cfn_glue_crawler = aws_glue.CfnCrawler(self, "glueCrawler",
role=glue_crawler_role.role_arn,
targets=aws_glue.CfnCrawler.TargetsProperty(
# catalog_targets=[aws_glue.CfnCrawler.CatalogTargetProperty(
# connection_name="connectionName",
# database_name="",
# dlq_event_queue_arn="dlqEventQueueArn",
# event_queue_arn="eventQueueArn",
# tables=["tables"]
# )],
delta_targets=[aws_glue.CfnCrawler.DeltaTargetProperty(
# connection_name="connectionName",
create_native_delta_table=True,
delta_tables=delta_tables,
write_manifest=False
)]
),
# the properties below are optional
#classifiers=["classifiers"],
#configuration="configuration",
#crawler_security_configuration="crawlerSecurityConfiguration",
database_name=database,
description="Glue Crawler for Delta Lake Tables",
name=delta_lake_glue_crawler_name,
#recrawl_policy=glue.CfnCrawler.RecrawlPolicyProperty(
# recrawl_behavior="recrawlBehavior"
#),
#schedule=aws_glue.CfnCrawler.ScheduleProperty(
# schedule_expression="scheduleExpression"
#),
schema_change_policy=aws_glue.CfnCrawler.SchemaChangePolicyProperty(
delete_behavior="DELETE_FROM_DATABASE",
update_behavior="UPDATE_IN_DATABASE"
),
#table_prefix="tablePrefix",
#tags=tags
)
The below code is for building a glue trigger:
delta_lake_cfn_trigger_for_glue_crawler = aws_glue.CfnTrigger(self,"crawlerTrigger",
actions=[aws_glue.CfnTrigger.ActionProperty(
#arguments=arguments_,
crawler_name=delta_lake_glue_crawler_name,
#job_name="jobName",
#notification_property=glue.CfnTrigger.NotificationPropertyProperty(
# notify_delay_after=123
#),
#security_configuration="securityConfiguration",
#timeout=123
)],
type="CONDITIONAL",
# the properties below are optional
description="Trigger Glue Crawler when Initial Glue Job is completed successfully",
#event_batching_condition=aws_glue.CfnTrigger.EventBatchingConditionProperty(
# batch_size=123,
# the properties below are optional
# batch_window=123
#),
name=delta_lake_glue_trigger_name,
predicate=aws_glue.CfnTrigger.PredicateProperty(
conditions=[aws_glue.CfnTrigger.ConditionProperty(
#crawler_name="crawlerName",
#crawl_state="crawlState",
job_name=initial_glue_job_name,
logical_operator="EQUALS",
state="SUCCEEDED"
)],
logical="ANY"
),
#schedule="schedule",
start_on_creation=True,
#tags=tags,
#workflow_name="workflowName"
)