Skip to content

Commit 84e0870

Browse files
committed
"Usage of Not" Replication Package
0 parents  commit 84e0870

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+2398
-0
lines changed

.dockerignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
jsonschemacorpus_dump.sql
2+
readme-resources
3+
servers.json
4+
.git

.gitattributes

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*.gz filter=lfs diff=lfs merge=lfs -text
2+
*.tar filter=lfs diff=lfs merge=lfs -text

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*jsonschemacorpus_dump.sql

CITATION.cff

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# YAML 1.2
2+
---
3+
abstract: "Replication package for the empirical study on the usage of the operator \"not\" in JSON Schema."
4+
authors:
5+
-
6+
affiliation: "Sorbonne Université"
7+
family-names: Baazizi
8+
given-names: "Mohamed Amine"
9+
-
10+
affiliation: "Université Paris-Dauphine"
11+
family-names: Colazzo
12+
given-names: Dario
13+
-
14+
affiliation: "Università di Pisa"
15+
family-names: Ghelli
16+
given-names: Giorgio
17+
-
18+
affiliation: "Università di Pisa"
19+
family-names: Sartiani
20+
given-names: Carlo
21+
-
22+
affiliation: "Universität Passau"
23+
family-names: Scherzinger
24+
given-names: Stefanie
25+
cff-version: "1.1.0"
26+
doi: "10.5281/zenodo.5141378"
27+
license: "CC-BY-4.0"
28+
message: "If you use this software, please cite it using these metadata."
29+
repository-code: "https://github.com/sdbs-uni-p/usage-of-not-replication"
30+
title: "\"Usage of Not\" Replication Package "
31+
date-released: 2021-08-24
32+
version: "1.0"
33+
...

Dockerfile

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
# -----------------------------------------------------------------------------------------------------------------
2+
# Replication package for "Usage of Not"
3+
#
4+
# Description: Postgres SQL database containing a corpus of over 80thousand JSON Schema documents,
5+
# collected from open source GitHub repositories.
6+
# Version: 1.0
7+
# License: Creative Commons Attribution 4.0 International License (https://creativecommons.org/licenses/by/4.0/)
8+
# SPDX-License-Identifier: CC-BY-4.0
9+
# Copyright: Copyright 2021 (c): Stefanie Scherzinger <stefanie.scherzinger@uni-passau.de>,
10+
# Copyright 2021 (c): Thomas Pilz <thomas.pilz@st.oth-regensburg.de>
11+
# -----------------------------------------------------------------------------------------------------------------
12+
13+
# Start from long-term maintained base distribution
14+
FROM ubuntu:18.04
15+
16+
#--- IMAGE DETAILS ---
17+
LABEL maintainers="Stefanie Scherzinger <stefanie.scherzinger@uni-passau.de>, Thomas Pilz <thomas.pilz@st.oth-regensburg.de>"
18+
LABEL version="1.0"
19+
LABEL description="Postgres SQL database containing a corpus of over 80thousand JSON Schema documents,\
20+
collected from open source GitHub repositories."
21+
LABEL license="Creative Commons Attribution 4.0 International License (https://creativecommons.org/licenses/by/4.0/)"
22+
LABEL copyright="Copyright 2021 (c): Stefanie Scherzinger <stefanie.scherzinger@uni-passau.de>, Copyright 2021 (c): Thomas Pilz <thomas.pilz@st.oth-regensburg.de>"
23+
LABEL spdx-license-identifier="CC-BY-4.0"
24+
25+
#--- ENVIRONMENT VARIABLES ---
26+
# For details on environment variables refer to README.md
27+
# Configure OS environment
28+
ENV DEBIAN_FRONTEND=noninteractive
29+
ENV LANG="C.UTF-8"
30+
ENV LC_ALL="C.UTF-8"
31+
ENV ROOT_PW="password"
32+
# Create user with password and a database
33+
ENV POSTGRES_USER="root"
34+
ENV POSTGRES_PASSWORD="password"
35+
ENV POSTGRES_DB="jsonschemacorpus"
36+
ENV POSTGRES_PORT=5432
37+
# PostgreSQL major version
38+
ENV PG_MAJOR_VERSION=12
39+
# Name of SQL-Dump file (used in setup.sh)
40+
ENV SQL_DUMP_FNAME="jsonschemacorpus_dump.sql"
41+
# Name of directory where data etc. are stored
42+
ENV WORKDIR=/json-schema-corpus
43+
# Inlude postgres executables in path
44+
ENV PATH=${PATH}:/usr/lib/postgresql/${PG_MAJOR_VERSION}/bin
45+
46+
# 1. Make en_GB.UTF-8 locale so postgres will be utf-8 enabled by default
47+
RUN apt-get update && apt-get install -y locales && rm -rf /var/lib/apt/lists/* \
48+
&& localedef -i en_GB -c -f UTF-8 -A /usr/share/locale/locale.alias en_GB.UTF-8
49+
ENV LANG en_GB.utf8
50+
51+
# 2. Install utilities
52+
RUN set -ex; \
53+
apt update && apt install -y --no-install-recommends \
54+
ca-certificates \
55+
gnupg2 \
56+
gzip \
57+
nano \
58+
vim \
59+
wget
60+
61+
# 3. Get pgp key for postgres repo
62+
RUN set -x; \
63+
sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt bionic-pgdg main" > /etc/apt/sources.list.d/pgdg.list' \
64+
&& wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add -
65+
66+
# 4. Install postgres
67+
RUN set -ex; \
68+
apt-get update \
69+
&& apt-get -y install --no-install-recommends \
70+
postgresql-${PG_MAJOR_VERSION} \
71+
&& rm -rf /var/lib/apt/lists/*
72+
73+
# 4. Configure postgres
74+
USER postgres
75+
RUN set -ex; \
76+
# Start postgres service
77+
/etc/init.d/postgresql start \
78+
# Enable trust authentication so no password is required
79+
&& echo "host all all all trust" >> /etc/postgresql/${PG_MAJOR_VERSION}/main/pg_hba.conf \
80+
# Allow remote connections
81+
&& echo "listen_addresses='*'" >> /etc/postgresql/${PG_MAJOR_VERSION}/main/postgresql.conf \
82+
# Change port (service will be restarted later)
83+
&& sed -i "s/port = 5432/port = ${POSTGRES_PORT}/g" /etc/postgresql/${PG_MAJOR_VERSION}/main/postgresql.conf \
84+
# Create postgres superuser
85+
&& psql -c "CREATE USER ${POSTGRES_USER} WITH SUPERUSER PASSWORD '${POSTGRES_PASSWORD}';" \
86+
# Create database with created user
87+
&& createdb -O ${POSTGRES_USER} ${POSTGRES_DB} "Corpus of over 80thousand JSON schema documents" \
88+
# Update search_path to include schema "jsonnegation" which will be created by restore
89+
&& psql -c "ALTER DATABASE ${POSTGRES_DB} SET search_path = jsonnegation, \"\$user\", public;" \
90+
# Shutdown Postgres service again otherwise it will not exit properly
91+
&& /etc/init.d/postgresql stop
92+
USER root
93+
94+
95+
# 5. Set the default STOPSIGNAL to SIGINT, which corresponds to what PostgreSQL
96+
# calls "Fast Shutdown mode" wherein new connections are disallowed and any
97+
# in-progress transactions are aborted, allowing PostgreSQL to stop cleanly and
98+
# flush tables to disk, which is the best compromise available to avoid data
99+
# corruption.
100+
# See https://www.postgresql.org/docs/12/server-shutdown.html for more details
101+
# about available PostgreSQL server shutdown signals.
102+
STOPSIGNAL SIGINT
103+
104+
# 6. Make working directory for required files
105+
RUN mkdir -p ${WORKDIR}
106+
WORKDIR ${WORKDIR}
107+
108+
# 7. Copy SQL dump
109+
COPY ./${SQL_DUMP_FNAME}.gz ./${SQL_DUMP_FNAME}.gz
110+
111+
# 8. Copy init script
112+
COPY ./scripts/init.sh ./scripts/init.sh
113+
114+
# 9. Restore data from SQL-dump
115+
RUN set -ex; \
116+
# Verify SQL-dump was downloaded correctly and stop build (exit code 1) if not
117+
echo "9609b35fffe654fc3379773492ee4fac1c519193f646c6fa50390eaefd08e4df ./${SQL_DUMP_FNAME}.gz" | sha256sum -c \
118+
# SQL-Dump invalid -> stop build
119+
|| { \
120+
echo -e "\n\n\033[0;91mERROR\033[0m SQL dump SHA-256 hash invalid. Please make sure ${SQL_DUMP_FNAME}.gz is downloaded properly from git lfs. \
121+
\n\033[0;91m>\033[0m Expected hash: 9609b35fffe654fc3379773492ee4fac1c519193f646c6fa50390eaefd08e4df \
122+
\n\033[0;91m>\033[0m Expected filesize: 1475453498\n" && exit 1; }; \
123+
# Execute restore script
124+
./scripts/init.sh
125+
126+
# 10. Copy SQL-scripts
127+
COPY ./sql-queries ./sql-queries/
128+
129+
# 11. Expose postgres port
130+
EXPOSE ${POSTGRES_PORT}
131+
132+
# 12. Copy rest of the scripts
133+
COPY ./scripts/ ./scripts/
134+
135+
# 13. Postgres must be started as user postgres
136+
USER postgres
137+
138+
# 14. Executed on "docker run": Start postgres server in foreground
139+
ENTRYPOINT ./scripts/entrypoint.sh

0 commit comments

Comments
 (0)