|
| 1 | +# ----------------------------------------------------------------------------------------------------------------- |
| 2 | +# Replication package for "Usage of Not" |
| 3 | +# |
| 4 | +# Description: Postgres SQL database containing a corpus of over 80thousand JSON Schema documents, |
| 5 | +# collected from open source GitHub repositories. |
| 6 | +# Version: 1.0 |
| 7 | +# License: Creative Commons Attribution 4.0 International License (https://creativecommons.org/licenses/by/4.0/) |
| 8 | +# SPDX-License-Identifier: CC-BY-4.0 |
| 9 | +# Copyright: Copyright 2021 (c): Stefanie Scherzinger <stefanie.scherzinger@uni-passau.de>, |
| 10 | +# Copyright 2021 (c): Thomas Pilz <thomas.pilz@st.oth-regensburg.de> |
| 11 | +# ----------------------------------------------------------------------------------------------------------------- |
| 12 | + |
| 13 | +# Start from long-term maintained base distribution |
| 14 | +FROM ubuntu:18.04 |
| 15 | + |
| 16 | +#--- IMAGE DETAILS --- |
| 17 | +LABEL maintainers="Stefanie Scherzinger <stefanie.scherzinger@uni-passau.de>, Thomas Pilz <thomas.pilz@st.oth-regensburg.de>" |
| 18 | +LABEL version="1.0" |
| 19 | +LABEL description="Postgres SQL database containing a corpus of over 80thousand JSON Schema documents,\ |
| 20 | +collected from open source GitHub repositories." |
| 21 | +LABEL license="Creative Commons Attribution 4.0 International License (https://creativecommons.org/licenses/by/4.0/)" |
| 22 | +LABEL copyright="Copyright 2021 (c): Stefanie Scherzinger <stefanie.scherzinger@uni-passau.de>, Copyright 2021 (c): Thomas Pilz <thomas.pilz@st.oth-regensburg.de>" |
| 23 | +LABEL spdx-license-identifier="CC-BY-4.0" |
| 24 | + |
| 25 | +#--- ENVIRONMENT VARIABLES --- |
| 26 | +# For details on environment variables refer to README.md |
| 27 | +# Configure OS environment |
| 28 | +ENV DEBIAN_FRONTEND=noninteractive |
| 29 | +ENV LANG="C.UTF-8" |
| 30 | +ENV LC_ALL="C.UTF-8" |
| 31 | +ENV ROOT_PW="password" |
| 32 | +# Create user with password and a database |
| 33 | +ENV POSTGRES_USER="root" |
| 34 | +ENV POSTGRES_PASSWORD="password" |
| 35 | +ENV POSTGRES_DB="jsonschemacorpus" |
| 36 | +ENV POSTGRES_PORT=5432 |
| 37 | +# PostgreSQL major version |
| 38 | +ENV PG_MAJOR_VERSION=12 |
| 39 | +# Name of SQL-Dump file (used in setup.sh) |
| 40 | +ENV SQL_DUMP_FNAME="jsonschemacorpus_dump.sql" |
| 41 | +# Name of directory where data etc. are stored |
| 42 | +ENV WORKDIR=/json-schema-corpus |
| 43 | +# Inlude postgres executables in path |
| 44 | +ENV PATH=${PATH}:/usr/lib/postgresql/${PG_MAJOR_VERSION}/bin |
| 45 | + |
| 46 | +# 1. Make en_GB.UTF-8 locale so postgres will be utf-8 enabled by default |
| 47 | +RUN apt-get update && apt-get install -y locales && rm -rf /var/lib/apt/lists/* \ |
| 48 | + && localedef -i en_GB -c -f UTF-8 -A /usr/share/locale/locale.alias en_GB.UTF-8 |
| 49 | +ENV LANG en_GB.utf8 |
| 50 | + |
| 51 | +# 2. Install utilities |
| 52 | +RUN set -ex; \ |
| 53 | + apt update && apt install -y --no-install-recommends \ |
| 54 | + ca-certificates \ |
| 55 | + gnupg2 \ |
| 56 | + gzip \ |
| 57 | + nano \ |
| 58 | + vim \ |
| 59 | + wget |
| 60 | + |
| 61 | +# 3. Get pgp key for postgres repo |
| 62 | +RUN set -x; \ |
| 63 | + sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt bionic-pgdg main" > /etc/apt/sources.list.d/pgdg.list' \ |
| 64 | + && wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - |
| 65 | + |
| 66 | +# 4. Install postgres |
| 67 | +RUN set -ex; \ |
| 68 | + apt-get update \ |
| 69 | + && apt-get -y install --no-install-recommends \ |
| 70 | + postgresql-${PG_MAJOR_VERSION} \ |
| 71 | + && rm -rf /var/lib/apt/lists/* |
| 72 | + |
| 73 | +# 4. Configure postgres |
| 74 | +USER postgres |
| 75 | +RUN set -ex; \ |
| 76 | + # Start postgres service |
| 77 | + /etc/init.d/postgresql start \ |
| 78 | + # Enable trust authentication so no password is required |
| 79 | + && echo "host all all all trust" >> /etc/postgresql/${PG_MAJOR_VERSION}/main/pg_hba.conf \ |
| 80 | + # Allow remote connections |
| 81 | + && echo "listen_addresses='*'" >> /etc/postgresql/${PG_MAJOR_VERSION}/main/postgresql.conf \ |
| 82 | + # Change port (service will be restarted later) |
| 83 | + && sed -i "s/port = 5432/port = ${POSTGRES_PORT}/g" /etc/postgresql/${PG_MAJOR_VERSION}/main/postgresql.conf \ |
| 84 | + # Create postgres superuser |
| 85 | + && psql -c "CREATE USER ${POSTGRES_USER} WITH SUPERUSER PASSWORD '${POSTGRES_PASSWORD}';" \ |
| 86 | + # Create database with created user |
| 87 | + && createdb -O ${POSTGRES_USER} ${POSTGRES_DB} "Corpus of over 80thousand JSON schema documents" \ |
| 88 | + # Update search_path to include schema "jsonnegation" which will be created by restore |
| 89 | + && psql -c "ALTER DATABASE ${POSTGRES_DB} SET search_path = jsonnegation, \"\$user\", public;" \ |
| 90 | + # Shutdown Postgres service again otherwise it will not exit properly |
| 91 | + && /etc/init.d/postgresql stop |
| 92 | +USER root |
| 93 | + |
| 94 | + |
| 95 | +# 5. Set the default STOPSIGNAL to SIGINT, which corresponds to what PostgreSQL |
| 96 | +# calls "Fast Shutdown mode" wherein new connections are disallowed and any |
| 97 | +# in-progress transactions are aborted, allowing PostgreSQL to stop cleanly and |
| 98 | +# flush tables to disk, which is the best compromise available to avoid data |
| 99 | +# corruption. |
| 100 | +# See https://www.postgresql.org/docs/12/server-shutdown.html for more details |
| 101 | +# about available PostgreSQL server shutdown signals. |
| 102 | +STOPSIGNAL SIGINT |
| 103 | + |
| 104 | +# 6. Make working directory for required files |
| 105 | +RUN mkdir -p ${WORKDIR} |
| 106 | +WORKDIR ${WORKDIR} |
| 107 | + |
| 108 | +# 7. Copy SQL dump |
| 109 | +COPY ./${SQL_DUMP_FNAME}.gz ./${SQL_DUMP_FNAME}.gz |
| 110 | + |
| 111 | +# 8. Copy init script |
| 112 | +COPY ./scripts/init.sh ./scripts/init.sh |
| 113 | + |
| 114 | +# 9. Restore data from SQL-dump |
| 115 | +RUN set -ex; \ |
| 116 | + # Verify SQL-dump was downloaded correctly and stop build (exit code 1) if not |
| 117 | + echo "9609b35fffe654fc3379773492ee4fac1c519193f646c6fa50390eaefd08e4df ./${SQL_DUMP_FNAME}.gz" | sha256sum -c \ |
| 118 | + # SQL-Dump invalid -> stop build |
| 119 | + || { \ |
| 120 | + echo -e "\n\n\033[0;91mERROR\033[0m SQL dump SHA-256 hash invalid. Please make sure ${SQL_DUMP_FNAME}.gz is downloaded properly from git lfs. \ |
| 121 | + \n\033[0;91m>\033[0m Expected hash: 9609b35fffe654fc3379773492ee4fac1c519193f646c6fa50390eaefd08e4df \ |
| 122 | + \n\033[0;91m>\033[0m Expected filesize: 1475453498\n" && exit 1; }; \ |
| 123 | + # Execute restore script |
| 124 | + ./scripts/init.sh |
| 125 | + |
| 126 | +# 10. Copy SQL-scripts |
| 127 | +COPY ./sql-queries ./sql-queries/ |
| 128 | + |
| 129 | +# 11. Expose postgres port |
| 130 | +EXPOSE ${POSTGRES_PORT} |
| 131 | + |
| 132 | +# 12. Copy rest of the scripts |
| 133 | +COPY ./scripts/ ./scripts/ |
| 134 | + |
| 135 | +# 13. Postgres must be started as user postgres |
| 136 | +USER postgres |
| 137 | + |
| 138 | +# 14. Executed on "docker run": Start postgres server in foreground |
| 139 | +ENTRYPOINT ./scripts/entrypoint.sh |
0 commit comments