Skip to content

Commit b4ad1d3

Browse files
committed
merge
2 parents 8d37586 + 36a4997 commit b4ad1d3

File tree

9 files changed

+412
-159
lines changed

9 files changed

+412
-159
lines changed

poetry.lock

Lines changed: 144 additions & 40 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ python = "^3.9"
2222
typer = {extras = ["all"], version = "^0.13.0"}
2323
rich = "^13.9.4"
2424
boto3 = "^1.35.0"
25-
questionary = "^2.0.1"
25+
questionary = "^2.1.0"
2626
requests = "^2.26"
2727
pydantic = "^2.9.2"
2828
sagemaker = "^2.237.0"

src/emd/cfn/ecs/template.yaml

Lines changed: 49 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@ Parameters:
1818
Description: The listener to be used for the ECS Endpoint
1919
PublicLoadBalancerSecurityGroup:
2020
Type: String
21-
Default: "sg-0d059e1ba522921fe"
2221
Description: The security group to be used for the ECS Endpoint
22+
APIRouterSecurityGroup:
23+
Type: String
24+
Description: The security group to be used for the API Router
2325
LambdaDeploymentHelperArn:
2426
Type: String
2527
Description: The ARN of the Lambda function for capacity provider association
@@ -35,23 +37,19 @@ Parameters:
3537
Description: The instance type to be used for the ECS Endpoint
3638
ModelId:
3739
Type: String
38-
Default: "qwen2-5-7b"
3940
Description: The emd model ID to be used for the ECS Endpoint
4041
ModelTag:
4142
Type: String
4243
Default: "dev"
4344
Description: The model tag to be used for the ECS Endpoint
4445
FrameWorkType:
4546
Type: String
46-
Default: "awq"
4747
Description: The framework type to be used for the ECS Endpoint
4848
ServiceType:
4949
Type: String
50-
Default: "instruct"
5150
Description: The service type to be used for the ECS Endpoint
5251
EngineType:
5352
Type: String
54-
Default: "qwen2-5-72b"
5553
Description: The engine type to be used for the ECS Endpoint
5654
DesiredCapacity:
5755
Type: Number
@@ -171,6 +169,12 @@ Resources:
171169
Type: Custom::GetPriorityNumber
172170
Properties:
173171
ServiceToken: !Ref LambdaDeploymentHelperArn
172+
ConvertDnsName:
173+
Type: Custom::ConvertDnsName
174+
Properties:
175+
ServiceToken: !Ref LambdaDeploymentHelperArn
176+
ModelName: !Join ['', [!Ref ModelId, '/', !Ref ModelTag]]
177+
174178
ContainerHostSecurityGroup:
175179
Type: AWS::EC2::SecurityGroup
176180
Properties:
@@ -249,6 +253,8 @@ Resources:
249253
PortMappings:
250254
- ContainerPort: !Ref ContainerPort
251255
HostPort: !Ref ContainerPort
256+
Name: model-port
257+
AppProtocol: http
252258
- ContainerPort: 80
253259
HostPort: 80
254260
# Command: [--gpus, all]
@@ -261,11 +267,11 @@ Resources:
261267
LinuxParameters:
262268
sharedMemorySize: 1024
263269
healthCheck:
264-
command: ["CMD-SHELL","curl -f http://localhost:8080/health || exit 1"]
265-
interval: 120
266-
retries: 10
267-
timeout: 60
268-
startPeriod: 120
270+
command: ["CMD-SHELL","echo 'Server is running' || exit 0"]
271+
interval: 30
272+
retries: 3
273+
timeout: 5
274+
startPeriod: 10
269275
LogConfiguration:
270276
LogDriver: awslogs
271277
Options:
@@ -310,11 +316,31 @@ Resources:
310316
- ContainerName: !Sub '${AWS::StackName}'
311317
ContainerPort: !Ref ContainerPort
312318
TargetGroupArn: !Ref ServiceTargetGroup
319+
ServiceConnectConfiguration:
320+
Enabled: true
321+
Namespace: emd-service-connect-namespace
322+
Services:
323+
- PortName: model-port
324+
DiscoveryName: !GetAtt ConvertDnsName.DnsName
325+
ClientAliases:
326+
- Port: !Ref ContainerPort
327+
DnsName: !GetAtt ConvertDnsName.DnsName
313328
ServiceSecurityGroup:
314329
Type: 'AWS::EC2::SecurityGroup'
315330
Properties:
316331
GroupDescription: Security group for service
317332
VpcId: !Ref VPCID
333+
334+
ServiceSecurityGroupIngress:
335+
Type: AWS::EC2::SecurityGroupIngress
336+
Properties:
337+
GroupId: !GetAtt ServiceSecurityGroup.GroupId
338+
IpProtocol: tcp
339+
FromPort: !Ref ContainerPort
340+
ToPort: !Ref ContainerPort
341+
SourceSecurityGroupId: !Ref APIRouterSecurityGroup
342+
Description: Allow traffic from API router service to model service
343+
318344
ServiceTargetGroup:
319345
Type: 'AWS::ElasticLoadBalancingV2::TargetGroup'
320346
Properties:
@@ -360,10 +386,19 @@ Resources:
360386
- Type: forward
361387
TargetGroupArn: !Ref ServiceTargetGroup
362388

389+
ForceApiRouterDeployment:
390+
Type: Custom::ForceApiRouterDeployment
391+
DependsOn: Service
392+
Properties:
393+
ServiceToken: !Ref LambdaDeploymentHelperArn
394+
363395
Outputs:
364396
Model:
365-
Description: Model ID used to generate the response
397+
Description: Model ID used to generate the response.
366398
Value: !Join ['', [!Ref ModelId, '/', !Ref ModelTag]]
367-
PublicLoadBalancerDNSName:
368-
Description: The DNS name of the public load balancer. To use HTTPS, create an SSL certificate in AWS Certificate Manager and attach it to the load balancer.
369-
Value: !Join ['', ['http://', !Ref DNSName, '/', !Ref ModelId, '/', !Ref ModelTag]]
399+
BaseURL:
400+
Description: Please use this URL for the OpenAI-compatible model API, like /v1/chat/completions. To use HTTPS, create an SSL certificate in AWS Certificate Manager and attach it to the load balancer.
401+
Value: !Join ['', ['http://', !Ref DNSName]]
402+
ECSServiceConnect:
403+
Description: Service Connect allows for service-to-service communications with automatic discovery using short names and standard ports.
404+
Value: !Join ['', ['http://', !GetAtt ConvertDnsName.DnsName, ':', !Ref ContainerPort]]

src/emd/cfn/sagemaker_realtime/template.yaml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ Parameters:
4242
Type: String
4343
Description: The name of the SageMaker Endpoint
4444
Default: "Auto-generate"
45+
DNSName:
46+
Type: String
47+
Description: The DNS name of the public load balancer. To use HTTPS, create an SSL certificate in AWS Certificate Manager and attach it to the load balancer.
4548

4649
Conditions:
4750
UseDefaultEndpointName: !Equals [!Ref SageMakerEndpointName, "Auto-generate"]
@@ -153,8 +156,11 @@ Resources:
153156

154157
Outputs:
155158
Model:
156-
Description: Model ID used to generate the response
159+
Description: Model ID used to generate the response.
157160
Value: !Join ['', [!Ref ModelId, '/', !Ref ModelTag]]
161+
BaseURL:
162+
Description: Please use this URL for the OpenAI-compatible model API, like /v1/chat/completions. To use HTTPS, create an SSL certificate in AWS Certificate Manager and attach it to the load balancer.
163+
Value: !Join ['', ['http://', !Ref DNSName]]
158164
SageMakerEndpointName:
159-
Description: The name of the SageMaker Endpoint
165+
Description: You can use invoke the endpoint by using the SageMaker runtime API.
160166
Value: !GetAtt SageMakerEndpoint.EndpointName

src/emd/cfn/shared/ecs_cluster.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ def deploy_vpc_template(region):
170170
return vpc_id, subnets
171171

172172

173-
def deploy_ecs_cluster_template(region, vpc_id, subnets, api_router_uri, use_spot):
173+
def deploy_ecs_cluster_template(region, vpc_id, subnets, use_spot):
174174
client = boto3.client("cloudformation", region_name=region)
175175
stack_name = "EMD-ECS-Cluster"
176176
template_path = f"{CFN_ROOT_PATH}/shared/ecs_cluster.yaml"
@@ -187,10 +187,6 @@ def deploy_ecs_cluster_template(region, vpc_id, subnets, api_router_uri, use_spo
187187
"ParameterKey": "Subnets",
188188
"ParameterValue": subnets,
189189
},
190-
{
191-
"ParameterKey": "APIRouterImageURI",
192-
"ParameterValue": api_router_uri,
193-
},
194190
{
195191
"ParameterKey": "UseSpot",
196192
"ParameterValue": "yes" if use_spot else "no",
@@ -222,10 +218,10 @@ def deploy_ecs_cluster(region, vpc_id=None, subnets=None, use_spot=False):
222218
update_parameters_file("parameters.json", {"VPCID": vpc_id, "Subnets": subnets})
223219

224220
# Build and push Fargate image to ECR as the OpenAI compatible API router
225-
api_router_uri = build_router_image(region)
221+
# api_router_uri = build_router_image(region)
226222

227223
# Deploy the ECS cluster
228-
deploy_ecs_cluster_template(region, vpc_id, subnets, api_router_uri, use_spot)
224+
deploy_ecs_cluster_template(region, vpc_id, subnets, use_spot)
229225

230226
if __name__ == "__main__":
231227
deploy_ecs_cluster("us-east-1")

0 commit comments

Comments
 (0)