|
3 | 3 | import json
|
4 | 4 | import os
|
5 | 5 | import argparse
|
| 6 | +from emd.models.utils.serialize_utils import load_extra_params |
6 | 7 |
|
7 | 8 | # Post build script for ECS, it will deploy the VPC and ECS cluster.
|
8 |
| - |
9 |
| -CFN_ROOT_PATH = 'cfn' |
| 9 | +CFN_ROOT_PATH = "../cfn" |
10 | 10 | WAIT_SECONDS = 10
|
11 |
| -# CFN_ROOT_PATH = '../../cfn' |
12 |
| -JSON_DOUBLE_QUOTE_REPLACE = '<!>' |
13 |
| - |
14 |
| -def load_extra_params(string): |
15 |
| - string = string.replace(JSON_DOUBLE_QUOTE_REPLACE,'"') |
16 |
| - try: |
17 |
| - return json.loads(string) |
18 |
| - except json.JSONDecodeError: |
19 |
| - raise argparse.ArgumentTypeError(f"Invalid dictionary format: {string}") |
20 | 11 |
|
21 |
| -def dump_extra_params(d:dict): |
22 |
| - return json.dumps(d).replace('"', JSON_DOUBLE_QUOTE_REPLACE) |
23 | 12 |
|
24 |
| -def wait_for_stack_completion(client, stack_id, stack_name): |
| 13 | +def wait_for_stack_completion(client, stack_name): |
25 | 14 | while True:
|
26 |
| - stack_status = client.describe_stacks(StackName=stack_id)['Stacks'][0]['StackStatus'] |
27 |
| - if stack_status in ['CREATE_COMPLETE', 'UPDATE_COMPLETE']: |
| 15 | + response = client.describe_stacks(StackName=stack_name) |
| 16 | + stack_status = response["Stacks"][0]["StackStatus"] |
| 17 | + while stack_status.endswith("IN_PROGRESS"): |
| 18 | + print( |
| 19 | + f"Stack {stack_name} is currently {stack_status}. Waiting for completion..." |
| 20 | + ) |
| 21 | + time.sleep(WAIT_SECONDS) |
| 22 | + response = client.describe_stacks(StackName=stack_name) |
| 23 | + stack_status = response["Stacks"][0]["StackStatus"] |
| 24 | + |
| 25 | + if stack_status in ["CREATE_COMPLETE", "UPDATE_COMPLETE"]: |
28 | 26 | print(f"Stack {stack_name} deployment complete")
|
29 | 27 | break
|
30 |
| - elif stack_status in ['CREATE_IN_PROGRESS', 'UPDATE_IN_PROGRESS']: |
31 |
| - print(f"Stack {stack_name} is still being deployed...") |
32 |
| - time.sleep(WAIT_SECONDS) |
33 | 28 | else:
|
34 |
| - raise Exception(f"Stack {stack_name} deployment failed with status {stack_status}") |
| 29 | + raise Exception( |
| 30 | + f"Post build stage failed. The stack {stack_name} is in an unexpected status: {stack_status}. Please visit the AWS CloudFormation Console to delete the stack." |
| 31 | + ) |
| 32 | + |
35 | 33 |
|
36 | 34 | def get_stack_outputs(client, stack_name):
|
37 | 35 | response = client.describe_stacks(StackName=stack_name)
|
38 |
| - return response['Stacks'][0].get('Outputs', []) |
| 36 | + return response["Stacks"][0].get("Outputs", []) |
| 37 | + |
39 | 38 |
|
40 | 39 | def create_or_update_stack(client, stack_name, template_path, parameters=[]):
|
41 | 40 | try:
|
| 41 | + wait_for_stack_completion(client, stack_name) |
42 | 42 | response = client.describe_stacks(StackName=stack_name)
|
43 |
| - stack_status = response['Stacks'][0]['StackStatus'] |
44 |
| - if stack_status in ['ROLLBACK_COMPLETE', 'ROLLBACK_FAILED', 'DELETE_FAILED']: |
45 |
| - print(f"Stack {stack_name} is in {stack_status} state. Deleting the stack to allow for recreation.") |
46 |
| - client.delete_stack(StackName=stack_name) |
47 |
| - while True: |
48 |
| - try: |
49 |
| - response = client.describe_stacks(StackName=stack_name) |
50 |
| - stack_status = response['Stacks'][0]['StackStatus'] |
51 |
| - if stack_status == 'DELETE_IN_PROGRESS': |
52 |
| - print(f"Stack {stack_name} is being deleted...") |
53 |
| - time.sleep(WAIT_SECONDS) |
54 |
| - else: |
55 |
| - raise Exception(f"Unexpected status {stack_status} while waiting for stack deletion.") |
56 |
| - except client.exceptions.ClientError as e: |
57 |
| - if 'does not exist' in str(e): |
58 |
| - print(f"Stack {stack_name} successfully deleted.") |
59 |
| - break |
60 |
| - else: |
61 |
| - raise |
62 |
| - while stack_status not in ['CREATE_COMPLETE', 'UPDATE_COMPLETE']: |
63 |
| - if stack_status in ['CREATE_IN_PROGRESS', 'UPDATE_IN_PROGRESS']: |
64 |
| - print(f"Stack {stack_name} is currently {stack_status}. Waiting for it to complete...") |
65 |
| - time.sleep(WAIT_SECONDS) |
66 |
| - response = client.describe_stacks(StackName=stack_name) |
67 |
| - stack_status = response['Stacks'][0]['StackStatus'] |
68 |
| - else: |
69 |
| - raise Exception(f"Stack {stack_name} is in an unexpected state: {stack_status}") |
70 |
| - print(f"Stack {stack_name} already exists with status {stack_status}") |
| 43 | + stack_status = response["Stacks"][0]["StackStatus"] |
| 44 | + |
| 45 | + if stack_status in ["CREATE_COMPLETE", "UPDATE_COMPLETE"]: |
| 46 | + print(f"Stack {stack_name} already exists. Proceeding with update.") |
| 47 | + with open(template_path, "r") as template_file: |
| 48 | + template_body = template_file.read() |
| 49 | + |
| 50 | + response = client.update_stack( |
| 51 | + StackName=stack_name, |
| 52 | + TemplateBody=template_body, |
| 53 | + Capabilities=["CAPABILITY_NAMED_IAM"], |
| 54 | + Parameters=parameters |
| 55 | + ) |
| 56 | + |
| 57 | + print(f"Started update of stack {stack_name}") |
| 58 | + wait_for_stack_completion(client, stack_name) |
| 59 | + |
71 | 60 | except client.exceptions.ClientError as e:
|
72 |
| - if 'does not exist' in str(e): |
| 61 | + if "does not exist" in str(e): |
73 | 62 | print(f"Stack {stack_name} does not exist. Proceeding with creation.")
|
74 |
| - with open(template_path, 'r') as template_file: |
| 63 | + with open(template_path, "r") as template_file: |
75 | 64 | template_body = template_file.read()
|
76 | 65 |
|
77 | 66 | response = client.create_stack(
|
78 | 67 | StackName=stack_name,
|
79 | 68 | TemplateBody=template_body,
|
80 |
| - Capabilities=['CAPABILITY_NAMED_IAM'], |
81 |
| - Parameters=parameters |
| 69 | + Capabilities=["CAPABILITY_NAMED_IAM"], |
| 70 | + Parameters=parameters, |
| 71 | + EnableTerminationProtection=True, |
82 | 72 | )
|
83 | 73 |
|
84 |
| - stack_id = response['StackId'] |
| 74 | + stack_id = response["StackId"] |
85 | 75 | print(f"Started deployment of stack {stack_name} with ID {stack_id}")
|
86 |
| - wait_for_stack_completion(client, stack_id, stack_name) |
| 76 | + wait_for_stack_completion(client, stack_name) |
87 | 77 | else:
|
88 |
| - raise |
| 78 | + raise Exception( |
| 79 | + f"Post build stage failed. The stack {stack_name} is in an unexpected status: {stack_status}. Please visit the AWS CloudFormation Console to delete the stack." |
| 80 | + ) |
| 81 | + |
89 | 82 |
|
90 | 83 | def update_parameters_file(parameters_path, updates):
|
91 |
| - with open(parameters_path, 'r') as file: |
| 84 | + with open(parameters_path, "r") as file: |
92 | 85 | data = json.load(file)
|
93 | 86 |
|
94 |
| - data['Parameters'].update(updates) |
| 87 | + data["Parameters"].update(updates) |
95 | 88 |
|
96 |
| - with open(parameters_path, 'w') as file: |
| 89 | + with open(parameters_path, "w") as file: |
97 | 90 | json.dump(data, file, indent=4)
|
98 | 91 |
|
| 92 | + |
99 | 93 | def deploy_vpc_template(region):
|
100 |
| - client = boto3.client('cloudformation', region_name=region) |
101 |
| - stack_name = 'EMD-VPC' |
102 |
| - template_path = f'{CFN_ROOT_PATH}/vpc/template.yaml' |
| 94 | + client = boto3.client("cloudformation", region_name=region) |
| 95 | + stack_name = "EMD-VPC" |
| 96 | + template_path = f"{CFN_ROOT_PATH}/vpc/template.yaml" |
103 | 97 | create_or_update_stack(client, stack_name, template_path)
|
104 | 98 | outputs = get_stack_outputs(client, stack_name)
|
105 | 99 | vpc_id = None
|
106 | 100 | subnets = None
|
107 | 101 | for output in outputs:
|
108 |
| - if output['OutputKey'] == 'VPCID': |
109 |
| - vpc_id = output['OutputValue'] |
110 |
| - elif output['OutputKey'] == 'Subnets': |
111 |
| - subnets = output['OutputValue'] |
112 |
| - update_parameters_file('parameters.json', {'VPCID': vpc_id, 'Subnets': subnets}) |
| 102 | + if output["OutputKey"] == "VPCID": |
| 103 | + vpc_id = output["OutputValue"] |
| 104 | + elif output["OutputKey"] == "Subnets": |
| 105 | + subnets = output["OutputValue"] |
| 106 | + update_parameters_file("parameters.json", {"VPCID": vpc_id, "Subnets": subnets}) |
113 | 107 | return vpc_id, subnets
|
114 | 108 |
|
115 | 109 |
|
116 | 110 | def deploy_ecs_cluster_template(region, vpc_id, subnets):
|
117 |
| - client = boto3.client('cloudformation', region_name=region) |
118 |
| - stack_name = 'EMD-ECS-Cluster' |
119 |
| - template_path = f'{CFN_ROOT_PATH}/ecs/cluster.yaml' |
120 |
| - create_or_update_stack(client, stack_name, template_path, [ |
121 |
| - { |
122 |
| - 'ParameterKey': 'VPCID', |
123 |
| - 'ParameterValue': vpc_id, |
124 |
| - }, |
125 |
| - { |
126 |
| - 'ParameterKey': 'Subnets', |
127 |
| - 'ParameterValue': subnets, |
128 |
| - }, |
129 |
| - ]) |
| 111 | + client = boto3.client("cloudformation", region_name=region) |
| 112 | + stack_name = "EMD-ECS-Cluster" |
| 113 | + template_path = f"{CFN_ROOT_PATH}/ecs/cluster.yaml" |
| 114 | + create_or_update_stack( |
| 115 | + client, |
| 116 | + stack_name, |
| 117 | + template_path, |
| 118 | + [ |
| 119 | + { |
| 120 | + "ParameterKey": "VPCID", |
| 121 | + "ParameterValue": vpc_id, |
| 122 | + }, |
| 123 | + { |
| 124 | + "ParameterKey": "Subnets", |
| 125 | + "ParameterValue": subnets, |
| 126 | + }, |
| 127 | + ], |
| 128 | + ) |
130 | 129 |
|
131 | 130 | outputs = get_stack_outputs(client, stack_name)
|
132 | 131 | for output in outputs:
|
133 |
| - update_parameters_file('parameters.json', {output['OutputKey']: output['OutputValue']}) |
| 132 | + update_parameters_file( |
| 133 | + "parameters.json", {output["OutputKey"]: output["OutputValue"]} |
| 134 | + ) |
134 | 135 |
|
135 | 136 |
|
136 | 137 | def post_build():
|
137 | 138 | parser = argparse.ArgumentParser()
|
138 |
| - parser.add_argument('--region', type=str, required=False) |
139 |
| - parser.add_argument('--model_id', type=str, required=False) |
140 |
| - parser.add_argument('--model_tag', type=str, required=False) |
141 |
| - parser.add_argument('--framework_type', type=str, required=False) |
142 |
| - parser.add_argument('--service_type', type=str, required=False) |
143 |
| - parser.add_argument('--backend_type', type=str, required=False) |
144 |
| - parser.add_argument('--model_s3_bucket', type=str, required=False) |
145 |
| - parser.add_argument('--instance_type', type=str, required=False) |
146 |
| - parser.add_argument('--extra_params', type=load_extra_params, required=False, default=os.environ.get("extra_params","{}")) |
| 139 | + parser.add_argument("--region", type=str, required=False) |
| 140 | + parser.add_argument("--model_id", type=str, required=False) |
| 141 | + parser.add_argument("--model_tag", type=str, required=False) |
| 142 | + parser.add_argument("--framework_type", type=str, required=False) |
| 143 | + parser.add_argument("--service_type", type=str, required=False) |
| 144 | + parser.add_argument("--backend_type", type=str, required=False) |
| 145 | + parser.add_argument("--model_s3_bucket", type=str, required=False) |
| 146 | + parser.add_argument("--instance_type", type=str, required=False) |
| 147 | + parser.add_argument( |
| 148 | + "--extra_params", |
| 149 | + type=load_extra_params, |
| 150 | + required=False, |
| 151 | + default=os.environ.get("extra_params", "{}"), |
| 152 | + ) |
147 | 153 |
|
148 | 154 | args = parser.parse_args()
|
149 | 155 |
|
150 |
| - service_params = args.extra_params.get('service_params',{}) |
| 156 | + service_params = args.extra_params.get("service_params", {}) |
151 | 157 |
|
152 |
| - if 'vpc_id' not in service_params: |
| 158 | + if "vpc_id" not in service_params: |
153 | 159 | vpc_id, subnets = deploy_vpc_template(args.region)
|
154 | 160 | else:
|
155 |
| - vpc_id = service_params.get('vpc_id') |
156 |
| - subnets = service_params.get('subnet_ids') |
157 |
| - update_parameters_file('parameters.json', {'VPCID': vpc_id, 'Subnets': subnets}) |
| 161 | + vpc_id = service_params.get("vpc_id") |
| 162 | + subnets = service_params.get("subnet_ids") |
| 163 | + update_parameters_file("parameters.json", {"VPCID": vpc_id, "Subnets": subnets}) |
158 | 164 |
|
159 | 165 | deploy_ecs_cluster_template(args.region, vpc_id, subnets)
|
160 | 166 |
|
| 167 | + |
161 | 168 | if __name__ == "__main__":
|
162 | 169 | post_build()
|
0 commit comments