From 43a37f09b698514637fbcf3d1c0ceba2e8a07bd9 Mon Sep 17 00:00:00 2001 From: Kai Koehler Date: Fri, 12 Jul 2024 17:02:21 -0700 Subject: [PATCH 1/3] Prepare for deployment --- .github/workflows/ci.yaml | 26 +++++++++++ Dockerfile | 64 ++++++++++++++++++++++++++ scripts/docker-tag.sh | 14 ++++++ scripts/install-base-packages.sh | 33 +++++++++++++ scripts/install-dependency-packages.sh | 33 +++++++++++++ 5 files changed, 170 insertions(+) create mode 100644 Dockerfile create mode 100755 scripts/docker-tag.sh create mode 100755 scripts/install-base-packages.sh create mode 100755 scripts/install-dependency-packages.sh diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6aa7d45..6647fb7 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -55,6 +55,32 @@ jobs: tox-envs: "py,coverage-report,typing" tox-requirements: "requirements/tox.txt" + build: + runs-on: ubuntu-latest + needs: [lint, test] + timeout-minutes: 10 + + # Only do Docker builds of tagged releases and pull requests from ticket + # branches. This will still trigger on pull requests from untrusted + # repositories whose branch names match our tickets/* branch convention, + # but in this case the build will fail with an error since the secret + # won't be set. + if: > + startsWith(github.ref, 'refs/tags/') + || startsWith(github.head_ref, 'tickets/') + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: lsst-sqre/build-and-push-to-ghcr@v1 + id: build + with: + image: ${{ github.repository }} + # TODO: set token + github_token: ${{ secrets.GITHUB_TOKEN }} + docs: runs-on: ubuntu-latest diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..f4e42f5 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,64 @@ +# This Dockerfile has three stages: +# +# base-image +# Updates the base Python image with security patches and common system +# packages. This image becomes the base of all other images. +# install-image +# Installs third-party dependencies (requirements/main.txt) and the +# application into a virtual environment. This virtual environment is +# ideal for copying across build stages. +# runtime-image +# - Copies the virtual environment into place. +# - Runs a non-root user. +# - Sets up the entrypoint and port. + +FROM python:3.12.4-slim-bookworm as base-image + +# Update system packages +COPY scripts/install-base-packages.sh . +RUN ./install-base-packages.sh && rm ./install-base-packages.sh + +FROM base-image AS install-image + +# Install system packages only needed for building dependencies. +COPY scripts/install-dependency-packages.sh . +RUN ./install-dependency-packages.sh + +# Create a Python virtual environment +ENV VIRTUAL_ENV=/opt/venv +RUN python -m venv $VIRTUAL_ENV + +# Make sure we use the virtualenv +ENV PATH="$VIRTUAL_ENV/bin:$PATH" + +# Put the latest pip and setuptools in the virtualenv +RUN pip install --upgrade --no-cache-dir pip setuptools wheel + +# Install the app's Python runtime dependencies +COPY requirements/main.txt ./requirements.txt +RUN pip install --quiet --no-cache-dir -r requirements.txt + +# Install the application. +COPY . /workdir +WORKDIR /workdir +RUN pip install --no-cache-dir . + +FROM base-image AS runtime-image + +# Create a non-root user. +RUN useradd --create-home appuser + +# Copy the virtualenv. +COPY --from=install-image /opt/venv /opt/venv + +# Make sure we use the virtualenv. +ENV PATH="/opt/venv/bin:$PATH" + +# Switch to the non-root user. +USER appuser + +# Expose the port. +EXPOSE 8080 + +# Run the application. +CMD ["sasquatchbackpack", "usgs-earthquake-data", "-d", "10", "0"] \ No newline at end of file diff --git a/scripts/docker-tag.sh b/scripts/docker-tag.sh new file mode 100755 index 0000000..080a9c9 --- /dev/null +++ b/scripts/docker-tag.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# Determine the tag for Docker images based on GitHub Actions environment +# variables. + +set -eo pipefail + +if [ -n "$GITHUB_HEAD_REF" ]; then + # For pull requests + echo ${GITHUB_HEAD_REF} | sed -E 's,/,-,g' +else + # For push events + echo ${GITHUB_REF} | sed -E 's,refs/(heads|tags)/,,' | sed -E 's,/,-,g' +fi diff --git a/scripts/install-base-packages.sh b/scripts/install-base-packages.sh new file mode 100755 index 0000000..0292762 --- /dev/null +++ b/scripts/install-base-packages.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# This script updates packages in the base Docker image that's used by both +# the build and runtime images, and gives us a place to install additional +# system-level packages with apt-get. +# +# Based on the blog post: +# https://pythonspeed.com/articles/system-packages-docker/ + +# Bash "strict mode", to help catch problems and bugs in the shell +# script. Every bash script you write should include this. See +# http://redsymbol.net/articles/unofficial-bash-strict-mode/ for details. +set -euo pipefail + +# Display each command as it's run. +set -x + +# Tell apt-get we're never going to be able to give manual feedback. +export DEBIAN_FRONTEND=noninteractive + +# Update the package listing, so we know what packages exist. +apt-get update + +# Install security updates. +apt-get -y upgrade + +# Install dependencies required at runtime. git is used to check out notebook +# repositories. git-lfs is required to check the Git LFS service. +apt-get -y install --no-install-recommends git git-lfs + +# Delete cached files we don't need anymore. +apt-get clean +rm -rf /var/lib/apt/lists/* diff --git a/scripts/install-dependency-packages.sh b/scripts/install-dependency-packages.sh new file mode 100755 index 0000000..fd82be3 --- /dev/null +++ b/scripts/install-dependency-packages.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# This script installs additional packages used by the dependency image but +# not needed by the runtime image, such as additional packages required to +# build Python dependencies. +# +# Since the base image wipes all the apt caches to clean up the image that +# will be reused by the runtime image, we unfortunately have to do another +# apt-get update here, which wastes some time and network. + +# Bash "strict mode", to help catch problems and bugs in the shell +# script. Every bash script you write should include this. See +# http://redsymbol.net/articles/unofficial-bash-strict-mode/ for details. +set -euo pipefail + +# Display each command as it's run. +set -x + +# Tell apt-get we're never going to be able to give manual feedback. +export DEBIAN_FRONTEND=noninteractive + +# Update the package listing, so we know what packages exist. +apt-get update + +# Install various dependencies that may be required to install mobu: +# +# build-essential: sometimes needed to build Python modules +# libffi-dev: sometimes needed to build cffi, a cryptography dependency +apt-get -y install --no-install-recommends build-essential libffi-dev + +# Delete cached files we don't need anymore. +apt-get clean +rm -rf /var/lib/apt/lists/* From 5c55c71a20006a38547a80e391c407839e89b89b Mon Sep 17 00:00:00 2001 From: Kai Koehler Date: Thu, 18 Jul 2024 13:56:55 -0700 Subject: [PATCH 2/3] Add relative schema path --- src/sasquatchbackpack/sources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sasquatchbackpack/sources.py b/src/sasquatchbackpack/sources.py index ff14252..b8b3ae3 100644 --- a/src/sasquatchbackpack/sources.py +++ b/src/sasquatchbackpack/sources.py @@ -57,7 +57,7 @@ class USGSConfig: radius: int coords: tuple[float, float] magnitude_bounds: tuple[int, int] - schema_file: str = "src/sasquatchbackpack/schemas/usgs.avsc" + schema_file: str = "./schemas/usgs.avsc" class USGSSource(DataSource): From 2ac2ec08e9d9afa5f167acdbdf99d9324b4c11c8 Mon Sep 17 00:00:00 2001 From: Kai Koehler Date: Thu, 18 Jul 2024 14:53:18 -0700 Subject: [PATCH 3/3] Add cron schema --- src/sasquatchbackpack/sources.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/sasquatchbackpack/sources.py b/src/sasquatchbackpack/sources.py index b8b3ae3..cd3a448 100644 --- a/src/sasquatchbackpack/sources.py +++ b/src/sasquatchbackpack/sources.py @@ -51,13 +51,19 @@ class USGSConfig: Directory path to the relevant source schema (src/sasquatchbackpack/schemas/schema_name_here.avsc), optional, defaults to src/sasquatchbackpack/schemas/usgs.avsc + cron_schema : `str`, optional + Directory path to the relevant source schema from a cronjob. """ duration: timedelta radius: int coords: tuple[float, float] magnitude_bounds: tuple[int, int] - schema_file: str = "./schemas/usgs.avsc" + schema_file: str = "src/sasquatchbackpack/schemas/usgs.avsc" + cron_schema: str = ( + "/opt/venv/lib/python3.12/site-packages/" + "sasquatchbackpack/schemas/usgs.avsc" + ) class USGSSource(DataSource): @@ -88,8 +94,12 @@ def load_schema(self) -> str: """Query the USGS API using the current provided parameters, then update results. """ - with Path(self.config.schema_file).open("r") as file: - return file.read() + try: + with Path(self.config.schema_file).open("r") as file: + return file.read() + except FileNotFoundError: + with Path(self.config.cron_schema).open("r") as file: + return file.read() def get_records(self) -> list[dict]: """Call the USGS Comcat API and assembles records.