Skip to content

fix: s5cmd is missing in local mode #180

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/emd/commands/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def deploy(
region = get_current_region()

if region != LOCAL_REGION:
smart_bootstrap_manager.auto_bootstrap_if_needed(region)
smart_bootstrap_manager.auto_bootstrap_if_needed(region, skip_confirm)

if dockerfile_local_path:
response = sdk_deploy(
Expand Down
27 changes: 14 additions & 13 deletions src/emd/utils/smart_bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def show_version_mismatch_warning(self, current_version: str, deployed_version:
self.console.print() # Empty line for spacing


def auto_bootstrap_if_needed(self, region: str) -> bool:
def auto_bootstrap_if_needed(self, region: str, skip_confirm: bool = False) -> bool:
"""
Automatically run bootstrap if needed based on comprehensive infrastructure check
Returns: True if bootstrap was run, False otherwise
Expand All @@ -145,18 +145,19 @@ def auto_bootstrap_if_needed(self, region: str) -> bool:
# Infrastructure missing/incomplete OR version mismatch - ask for confirmation
self.show_bootstrap_notification(current_version, deployed_version)

# Ask for user confirmation
if deployed_version:
# Update scenario
confirm_msg = f"Update infrastructure from {deployed_version} to {current_version}?"
else:
# Initialize scenario
confirm_msg = f"Initialize EMD infrastructure for version {current_version}?"

if not typer.confirm(confirm_msg, default=False):
self.console.print("[yellow]Bootstrap cancelled. Infrastructure will not be updated.[/yellow]")
self.console.print("[red]Deployment cannot proceed without compatible infrastructure.[/red]")
raise typer.Exit(1)
# Ask for user confirmation unless skip_confirm is True
if not skip_confirm:
if deployed_version:
# Update scenario
confirm_msg = f"Update infrastructure from {deployed_version} to {current_version}?"
else:
# Initialize scenario
confirm_msg = f"Initialize EMD infrastructure for version {current_version}?"

if not typer.confirm(confirm_msg, default=False):
self.console.print("[yellow]Bootstrap cancelled. Infrastructure will not be updated.[/yellow]")
self.console.print("[red]Deployment cannot proceed without compatible infrastructure.[/red]")
raise typer.Exit(1)

# User confirmed - proceed with bootstrap
try:
Expand Down
46 changes: 39 additions & 7 deletions src/pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import importlib
import json
import logging
import urllib.request
import zipfile
from concurrent.futures import as_completed,ProcessPoolExecutor

from utils.common import download_file_from_s3_by_s5cmd
Expand All @@ -24,6 +26,9 @@

logger = get_logger(__name__)

# Global constants
S5CMD_PATH = "./s5cmd"


def parse_args():
parser = argparse.ArgumentParser()
Expand Down Expand Up @@ -241,19 +246,46 @@ def get_executable_model(args):
return execute_model

def download_s5cmd():
assert os.system('curl https://github.com/peak/s5cmd/releases/download/v2.0.0/s5cmd_2.0.0_Linux-64bit.tar.gz -L -o /tmp/s5cmd.tar.gz') == 0
assert os.system("mkdir -p /tmp/s5cmd && tar -xvf /tmp/s5cmd.tar.gz -C /tmp/s5cmd") == 0
assert os.system(f"cp /tmp/s5cmd/s5cmd .") == 0
"""Download s5cmd binary using S3 URL (always use us-east-1 for local deployment)"""
# Check if s5cmd already exists
if os.path.exists(S5CMD_PATH):
return S5CMD_PATH

s5cmd_url = "https://aws-gcr-solutions-us-east-1.s3.us-east-1.amazonaws.com/easy-model-deployer/pipeline/s5cmd.zip"

try:
# Download and extract
zip_path = S5CMD_PATH + ".zip"
urllib.request.urlretrieve(s5cmd_url, zip_path)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(".")

os.remove(zip_path) # Clean up zip file

# Make s5cmd executable
if os.path.exists(S5CMD_PATH):
os.chmod(S5CMD_PATH, os.stat(S5CMD_PATH).st_mode | 0o755)
return S5CMD_PATH
else:
raise FileNotFoundError("Required component(s5cmd) installation failed")

except Exception as e:
raise RuntimeError("Required component(s5cmd) download failed")

if __name__ == "__main__":
t0 = time.time()
start_time = time.time()
args = parse_args()
if not (check_cn_region(args.region) or args.region == LOCAL_REGION):
download_s5cmd()

s5_cmd_path = "./s5cmd"
os.chmod(s5_cmd_path, os.stat(s5_cmd_path).st_mode | 0o100)
# Download s5cmd
download_s5cmd()

if not os.path.exists(S5CMD_PATH):
logger.error("Required component(s5cmd) not found")
sys.exit(1)

os.chmod(S5CMD_PATH, os.stat(S5CMD_PATH).st_mode | 0o100)
extra_params = args.extra_params
for k,v in extra_params.items():
setattr(args,k,v)
Expand Down