cHello everyone,
I try according to the following instructions:
https://docs.aws.amazon.com/sagemaker/latest/dg/async-inference-autoscale.html
to scale an asynchronous endpoint to 0 if no requests come in or to scale it up if there are any in the queue. I have created the policy and the cloud watch alarm, but the instance always remains at 1:
I always have to specify at least 1 instance when creating the asynchronous production variant. Perhaps the problem is related to this.
I would be very happy to receive information on this, as scaling to 0 is a very important Feature for us. I've created this script:
import boto3
import json
#https://docs.aws.amazon.com/sagemaker/latest/dg/async-inference-autoscale.html
client = boto3.client('application-autoscaling')
endpoint = "websites-document-clf-endpoint-async"
resource_id=f'endpoint/{endpoint}/variant/async-variant'
resource_id = (resource_id)
response = client.register_scalable_target(
ServiceNamespace="sagemaker",
ResourceId=resource_id,
ScalableDimension="sagemaker:variant:DesiredInstanceCount",
MinCapacity=0,
MaxCapacity=5,
)
print(json.dumps(response, indent=4))
response = client.put_scaling_policy(
PolicyName="HasBacklogWithoutCapacity-ScalingPolicy",
ServiceNamespace="sagemaker", # The namespace of the service that provides the resource.
ResourceId=resource_id, # Endpoint name
ScalableDimension="sagemaker:variant:DesiredInstanceCount", # SageMaker supports only Instance Count
PolicyType="StepScaling", # 'StepScaling' or 'TargetTrackingScaling'
StepScalingPolicyConfiguration={
"AdjustmentType": "ChangeInCapacity", # Specifies whether the ScalingAdjustment value in the StepAdjustment property is an absolute number or a percentage of the current capacity.
"MetricAggregationType": "Average", # The aggregation type for the CloudWatch metrics.
"Cooldown": 300, # The amount of time, in seconds, to wait for a previous scaling activity to take effect.
"StepAdjustments": # A set of adjustments that enable you to scale based on the size of the alarm breach.
[
{
"MetricIntervalLowerBound": 0,
"ScalingAdjustment": 1
}
]
},
)
print(json.dumps(response, indent=4))
cw_client = boto3.client('cloudwatch')
cw_response = cw_client.put_metric_alarm(
AlarmName='HasBacklogWithoutCapacity-ScalingPolicy-Alarm',
MetricName='HasBacklogWithoutCapacity',
Namespace='AWS/SageMaker',
Statistic='Average',
EvaluationPeriods= 2,
DatapointsToAlarm= 2,
Threshold= 1,
ComparisonOperator='GreaterThanOrEqualToThreshold',
TreatMissingData='missing',
Dimensions=[
{ 'Name':'EndpointName', 'Value':endpoint },
],
Period= 60,
AlarmActions=[response['PolicyARN']]
)
print(json.dumps(cw_response, indent=4))
Thx for any kind of help!