Skip to content

Commit ee4260d

Browse files
authored
Merge pull request #42 from gestaogovbr/clean-libs
clean libs and add requirements files
2 parents 5bc4691 + 03e931e commit ee4260d

File tree

3 files changed

+168
-33
lines changed

3 files changed

+168
-33
lines changed

Dockerfile

Lines changed: 20 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,6 @@
1-
FROM apache/airflow:2.7.1-python3.10
1+
# for dev: docker build -t ghcr.io/gestaogovbr/airflow2-docker:latest-dev --build-arg dev_build=true .
22

3-
ARG PYTHON_DEPS=" \
4-
ctds==1.12.0 \
5-
tqdm==4.60.0 \
6-
ijson==3.0.4 \
7-
pysmb==1.2.6 \
8-
xlrd==1.2.0 \
9-
pygsheets==2.0.5 \
10-
ipdb==0.13.3 \
11-
py-trello==0.17.1 \
12-
PyPDF2==1.26.0 \
13-
frictionless==5.11.1 \
14-
great-expectations==0.17.2 \
15-
unidecode==1.2.0 \
16-
odfpy==1.4.1 \
17-
openpyxl==3.0.7 \
18-
pytest==6.2.5 \
19-
ckanapi==4.6 \
20-
sharepy==1.3.0 \
21-
Office365-REST-Python-Client==2.3.14 \
22-
GeoAlchemy2==0.10.2 \
23-
acryl-datahub-airflow-plugin==0.10.4 \
24-
geopandas==0.12.2 \
25-
"
3+
FROM apache/airflow:2.7.3-python3.10
264

275
USER root
286
RUN apt-get update \
@@ -38,7 +16,7 @@ RUN apt-get update \
3816
&& curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add --no-tty - \
3917
&& curl https://packages.microsoft.com/config/debian/10/prod.list > /etc/apt/sources.list.d/mssql-release.list \
4018
&& apt-get update -yqq \
41-
&& ACCEPT_EULA=Y apt-get install -yqq msodbcsql17 \
19+
&& ACCEPT_EULA=Y apt-get install -yqq msodbcsql17 mssql-tools \
4220
&& sed -i 's,^\(MinProtocol[ ]*=\).*,\1'TLSv1.0',g' /etc/ssl/openssl.cnf \
4321
&& sed -i 's,^\(CipherString[ ]*=\).*,\1'DEFAULT@SECLEVEL=1',g' /etc/ssl/openssl.cnf \
4422
&& curl -O http://acraiz.icpbrasil.gov.br/credenciadas/CertificadosAC-ICP-Brasil/ACcompactado.zip \
@@ -63,24 +41,33 @@ RUN curl https://ssltools.digicert.com/chainTester/webservice/validatecerts/cert
6341

6442
USER airflow
6543

66-
RUN if [ -n "${PYTHON_DEPS}" ]; \
67-
then pip install --no-cache-dir --user ${PYTHON_DEPS}; \
68-
fi \
69-
&& mkdir /opt/airflow/export-data
44+
WORKDIR /opt/airflow
7045

71-
RUN pip install --no-cache-dir --user \
72-
apache-airflow[jdbc,microsoft.mssql,samba,google_auth,odbc,sentry] \
46+
COPY requirements-uninstall.txt .
47+
COPY requirements-cdata-dags.txt .
48+
49+
RUN pip uninstall -y -r requirements-uninstall.txt && \
50+
pip install --no-cache-dir --user -r requirements-cdata-dags.txt && \
51+
pip install --no-cache-dir --user -r \
52+
https://raw.githubusercontent.com/gestaogovbr/Ro-dou/main/requirements.txt && \
53+
pip install --no-cache-dir --user \
54+
apache-airflow[jdbc,microsoft.mssql,samba,odbc,sentry] \
7355
apache-airflow-providers-docker \
7456
apache-airflow-providers-common-sql \
75-
apache-airflow-providers-telegram
57+
apache-airflow-providers-telegram \
58+
acryl-datahub-airflow-plugin==0.10.4
7659

7760
ARG dev_build="false"
7861
RUN \
7962
if [[ "${dev_build}" == "false" ]] ; \
8063
then pip install --no-cache-dir --user apache-airflow-providers-fastetl; \
8164
else \
82-
echo ***apache-airflow-providers-fastetl not installed*** ; \
65+
echo ***apache-airflow-providers-fastetl not installed*** && \
66+
pip install --no-cache-dir --user -r https://raw.githubusercontent.com/gestaogovbr/FastETL/main/requirements.txt ; \
8367
fi
8468

8569
RUN while [[ "$(curl -s -o /tmp/thawte.pem -w ''%{http_code}'' https://ssltools.digicert.com/chainTester/webservice/validatecerts/certificate?certKey=issuer.intermediate.cert.98&fileName=Thawte%20RSA%20CA%202018&fileExtension=txt)" != "200" ]]; do sleep 1; done
8670
RUN cat /tmp/thawte.pem >> /home/airflow/.local/lib/python3.10/site-packages/certifi/cacert.pem
71+
RUN echo 'export PATH="$PATH:/opt/mssql-tools/bin"' >> ~/.bashrc && \
72+
source ~/.bashrc
73+
RUN rm ACcompactado.zip requirements-cdata-dags.txt requirements-uninstall.txt

requirements-cdata-dags.txt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# airflow-dags
2+
ctds==1.12.0
3+
tqdm==4.60.0
4+
py-trello==0.17.1
5+
frictionless==5.11.1
6+
great-expectations==0.17.2
7+
openpyxl==3.0.7
8+
Office365-REST-Python-Client==2.3.14
9+
geopandas==0.12.2
10+
pandas==1.5.2,<2
11+
12+
# airflow-dags-delog
13+
# tqdm==4.60.0
14+
unidecode==1.2.0
15+
16+
# airflow-dags-detru
17+
18+
# airflow-commons
19+
# great-expectations==0.17.2

requirements-uninstall.txt

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
# from apache-airflow-providers-amazon
2+
apache-airflow-providers-amazon
3+
aiobotocore
4+
jsonpath-ng
5+
boto3
6+
mypy-boto3-appflow
7+
mypy-boto3-rds
8+
mypy-boto3-redshift-data
9+
mypy-boto3-s3
10+
redshift-connector
11+
scramp
12+
sqlalchemy-redshift
13+
watchtower
14+
15+
# from apache-airflow-providers-elasticsearch
16+
apache-airflow-providers-elasticsearch
17+
elasticsearch
18+
elastic-transport
19+
20+
# from apache-airflow-providers-sendgrid
21+
apache-airflow-providers-sendgrid
22+
sendgrid
23+
starkbank-ecdsa
24+
25+
# from apache-airflow-providers-snowflake
26+
apache-airflow-providers-snowflake
27+
snowflake-connector-python
28+
snowflake-sqlalchemy
29+
pycryptodomex
30+
tomlkit
31+
asn1crypto
32+
oscrypto
33+
34+
# from apache-airflow-providers-microsoft-azure
35+
apache-airflow-providers-microsoft-azure
36+
adal
37+
azure-batch
38+
azure-cosmos
39+
azure-datalake-store
40+
azure-identity
41+
azure-keyvault-secrets
42+
azure-kusto-data
43+
azure-mgmt-containerinstance
44+
azure-mgmt-cosmosdb
45+
azure-mgmt-datafactory
46+
azure-mgmt-datalake-store
47+
azure-mgmt-resource
48+
azure-servicebus
49+
azure-storage-blob
50+
azure-storage-common
51+
azure-storage-file
52+
azure-storage-file-datalake
53+
azure-synapse-spark
54+
azure-common
55+
azure-core
56+
msal-extensions
57+
msrest
58+
azure-mgmt-datalake-nspkg
59+
adal
60+
azure-mgmt-nspkg
61+
azure-mgmt-containerregistry
62+
azure-mgmt-core
63+
azure-mgmt-storage
64+
azure-nspkg
65+
azure-storage-file-share
66+
67+
# from apache-airflow-providers-google
68+
apache-airflow-providers-google
69+
gcloud-aio-auth
70+
gcloud-aio-bigquery
71+
gcloud-aio-storage
72+
google-ads
73+
google-api-core
74+
google-api-python-client
75+
google-cloud-aiplatform
76+
google-cloud-automl
77+
google-cloud-batch
78+
google-cloud-bigquery-datatransfer
79+
google-cloud-bigtable
80+
google-cloud-build
81+
google-cloud-compute
82+
google-cloud-container
83+
google-cloud-datacatalog
84+
google-cloud-dataflow-client
85+
google-cloud-dataform
86+
google-cloud-dataplex
87+
google-cloud-dataproc
88+
google-cloud-dataproc-metastore
89+
google-cloud-dlp
90+
google-cloud-kms
91+
google-cloud-language
92+
google-cloud-logging
93+
google-cloud-memcache
94+
google-cloud-monitoring
95+
google-cloud-orchestration-airflow
96+
google-cloud-os-login
97+
google-cloud-pubsub
98+
google-cloud-redis
99+
google-cloud-run
100+
google-cloud-secret-manager
101+
google-cloud-spanner
102+
google-cloud-speech
103+
google-cloud-storage
104+
google-cloud-storage-transfer
105+
google-cloud-tasks
106+
google-cloud-texttospeech
107+
google-cloud-translate
108+
google-cloud-videointelligence
109+
google-cloud-vision
110+
google-cloud-workflows
111+
grpcio-gcp
112+
json-merge-patch
113+
looker-sdk
114+
pandas-gbq
115+
proto-plus
116+
PyOpenSSL
117+
sqlalchemy-bigquery
118+
sqlalchemy-spanner
119+
google-auth-oauthlib
120+
google-cloud-appengine-logging
121+
google-cloud-audit-log
122+
google-cloud-bigquery
123+
google-cloud-bigquery-storage
124+
google-cloud-core
125+
google-cloud-resource-manager
126+
google-crc32c
127+
google-resumable-media
128+
grpc-google-iam-v1
129+
pydata-google-auth

0 commit comments

Comments
 (0)