Skip to content

Commit 8b6cb59

Browse files
authored
Merge pull request #89 from INCATools/additional-sources2
additional sources
2 parents d8a58bd + af957d1 commit 8b6cb59

File tree

11 files changed

+292
-28
lines changed

11 files changed

+292
-28
lines changed

ontologies.Makefile

Lines changed: 85 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,28 @@ db/oeo.owl: download/oeo.owl
229229
cp $< $@
230230

231231

232+
download/taxslim.owl: STAMP
233+
curl -L -s http://purl.obolibrary.org/obo/ncbitaxon/subsets/taxslim.obo > $@.tmp
234+
sha256sum -b $@.tmp > $@.sha256
235+
mv $@.tmp $@
236+
237+
.PRECIOUS: download/taxslim.owl
238+
239+
db/taxslim.owl: download/taxslim.owl
240+
robot convert -i $< -o $@
241+
242+
243+
download/goldterms.owl: STAMP
244+
curl -L -s https://raw.githubusercontent.com/cmungall/gold-ontology/main/gold.owl > $@.tmp
245+
sha256sum -b $@.tmp > $@.sha256
246+
mv $@.tmp $@
247+
248+
.PRECIOUS: download/goldterms.owl
249+
250+
db/goldterms.owl: download/goldterms.owl
251+
robot relax -i $< reason -o $@
252+
253+
232254
download/sdgio.owl: STAMP
233255
curl -L -s https://raw.githubusercontent.com/SDG-InterfaceOntology/sdgio/master/sdgio.owl > $@.tmp
234256
sha256sum -b $@.tmp > $@.sha256
@@ -240,6 +262,17 @@ db/sdgio.owl: download/sdgio.owl
240262
cp $< $@
241263

242264

265+
download/kin.owl: STAMP
266+
curl -L -s http://purl.org/ga4gh/kin.owl > $@.tmp
267+
sha256sum -b $@.tmp > $@.sha256
268+
mv $@.tmp $@
269+
270+
.PRECIOUS: download/kin.owl
271+
272+
db/kin.owl: download/kin.owl
273+
robot reason -i $< -o $@
274+
275+
243276
download/biovoices.owl: STAMP
244277
curl -L -s https://zenodo.org/record/5589773/files/ontology.owl?download=1 > $@.tmp
245278
sha256sum -b $@.tmp > $@.sha256
@@ -262,6 +295,17 @@ db/omop.owl: download/omop.owl
262295
cp $< $@
263296

264297

298+
download/comet.owl: STAMP
299+
curl -L -s https://raw.githubusercontent.com/linkml/linkml-common/main/project/owl/linkml_common.owl.ttl > $@.tmp
300+
sha256sum -b $@.tmp > $@.sha256
301+
mv $@.tmp $@
302+
303+
.PRECIOUS: download/comet.owl
304+
305+
db/comet.owl: download/comet.owl
306+
robot relax -i $< merge -o $@
307+
308+
265309
download/cco.owl: STAMP
266310
curl -L -s http://www.ontologyrepository.com/CommonCoreOntologies/Mid/AllCoreOntology > $@.tmp
267311
sha256sum -b $@.tmp > $@.sha256
@@ -428,7 +472,7 @@ db/mlo.owl: download/mlo.owl
428472

429473

430474
download/ito.owl: STAMP
431-
curl -L -s https://github.com/OpenBioLink/ITO/raw/master/ITO.owl.zip > $@.zip.tmp && unzip -p $@.zip.tmp {ont.zip_extract_file} > $@.tmp && rm $@.zip.tmp
475+
curl -L -s https://github.com/OpenBioLink/ITO/raw/master/ITO.owl.zip > $@.zip.tmp && unzip -p $@.zip.tmp ITO.owl > $@.tmp && rm $@.zip.tmp
432476
sha256sum -b $@.tmp > $@.sha256
433477
mv $@.tmp $@
434478

@@ -438,14 +482,25 @@ db/ito.owl: download/ito.owl
438482
cp $< $@
439483

440484

441-
download/reactome-Homo-sapiens.owl: STAMP
442-
curl -L -s https://reactome.org/download/current/biopax.zip > $@.zip.tmp && unzip -p $@.zip.tmp {ont.zip_extract_file} > $@.tmp && rm $@.zip.tmp
485+
download/reactome-hs.owl: STAMP
486+
curl -L -s https://reactome.org/download/current/biopax.zip > $@.zip.tmp && unzip -p $@.zip.tmp Homo_sapiens.owl > $@.tmp && rm $@.zip.tmp
487+
sha256sum -b $@.tmp > $@.sha256
488+
mv $@.tmp $@
489+
490+
.PRECIOUS: download/reactome-hs.owl
491+
492+
db/reactome-hs.owl: download/reactome-hs.owl
493+
cp $< $@
494+
495+
496+
download/reactome-mm.owl: STAMP
497+
curl -L -s https://reactome.org/download/current/biopax.zip > $@.zip.tmp && unzip -p $@.zip.tmp Mus_musculus.owl > $@.tmp && rm $@.zip.tmp
443498
sha256sum -b $@.tmp > $@.sha256
444499
mv $@.tmp $@
445500

446-
.PRECIOUS: download/reactome-Homo-sapiens.owl
501+
.PRECIOUS: download/reactome-mm.owl
447502

448-
db/reactome-Homo-sapiens.owl: download/reactome-Homo-sapiens.owl
503+
db/reactome-mm.owl: download/reactome-mm.owl
449504
cp $< $@
450505

451506

@@ -823,6 +878,28 @@ db/nando.owl: download/nando.owl
823878
cp $< $@
824879

825880

881+
download/ecso.owl: STAMP
882+
curl -L -s 'https://data.bioontology.org/ontologies/ECSO/submissions/64/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb' > $@.tmp
883+
sha256sum -b $@.tmp > $@.sha256
884+
mv $@.tmp $@
885+
886+
.PRECIOUS: download/ecso.owl
887+
888+
db/ecso.owl: download/ecso.owl
889+
cp $< $@
890+
891+
892+
download/enigma_context.owl: STAMP
893+
curl -L -s https://raw.githubusercontent.com/jmchandonia/CORAL/main/example/enigma/ontologies/context_measurement_ontology.obo > $@.tmp
894+
sha256sum -b $@.tmp > $@.sha256
895+
mv $@.tmp $@
896+
897+
.PRECIOUS: download/enigma_context.owl
898+
899+
db/enigma_context.owl: download/enigma_context.owl
900+
robot merge -i $< -o $@
901+
902+
826903
download/ontie.owl: STAMP
827904
curl -L -s https://ontology.iedb.org/file/ontie.owl > $@.tmp
828905
sha256sum -b $@.tmp > $@.sha256
@@ -857,14 +934,14 @@ db/nmdc_schema.owl: download/nmdc_schema.owl
857934

858935

859936
download/mixs.owl: STAMP
860-
curl -L -s https://raw.githubusercontent.com/microbiomedata/mixs-6-2-release-candidate/main/schema-derivatives/mixs_6_2_rc.owl.ttl > $@.tmp
937+
curl -L -s https://raw.githubusercontent.com/GenomicsStandardsConsortium/mixs/main/project/owl/mixs.owl.ttl > $@.tmp
861938
sha256sum -b $@.tmp > $@.sha256
862939
mv $@.tmp $@
863940

864941
.PRECIOUS: download/mixs.owl
865942

866943
db/mixs.owl: download/mixs.owl
867-
robot merge -i $< reason -o $@.tmp.owl && perl -npe 's@_6_2_rc@@g;s@-6-2-rc@@g' $@.tmp.owl > $@
944+
robot merge -i $< reason -o $@
868945

869946

870947
download/fibo.owl: STAMP
@@ -932,4 +1009,4 @@ download/%.owl: STAMP
9321009
db/%.owl: download/%.owl
9331010
robot merge -i $< -o $@
9341011

935-
EXTRA_ONTOLOGIES = upheno chiro ncit fma maxo foodon chebiplus msio modl phenio phenio_test comploinc bero aio reacto bcio icd10who ordo gard mondo-ingest oeo sdgio biovoices omop cco occo iof upa go go-lego go-amigo neo bao orcid cpont biolink biopax enanomapper mlo ito reactome-Homo-sapiens efo hcao hpinternational edam sweetAll lov schema-dot-org prov cellosaurus cosmo fhkb dbpendiaont uberoncm icd10cm co_324 ppeo interpro hgnc.genegroup hgnc sgd dictybase eccode uniprot rhea swisslipid drugbank drugcentral complexportal wikipathways drugmechdb rxnorm vccf ontobiotope nando ontie ecosim nmdc_schema mixs fibo bfo2020 bfo2020_core bfo2020_notime bfo2020_time
1012+
EXTRA_ONTOLOGIES = upheno chiro ncit fma maxo foodon chebiplus msio modl phenio phenio_test comploinc bero aio reacto bcio icd10who ordo gard mondo-ingest oeo taxslim goldterms sdgio kin biovoices omop comet cco occo iof upa go go-lego go-amigo neo bao orcid cpont biolink biopax enanomapper mlo ito reactome-hs reactome-mm efo hcao hpinternational edam sweetAll lov schema-dot-org prov cellosaurus cosmo fhkb dbpendiaont uberoncm icd10cm co_324 ppeo interpro hgnc.genegroup hgnc sgd dictybase eccode uniprot rhea swisslipid drugbank drugcentral complexportal wikipathways drugmechdb rxnorm vccf ontobiotope nando ecso enigma_context ontie ecosim nmdc_schema mixs fibo bfo2020 bfo2020_core bfo2020_notime bfo2020_time

src/semsql/builder/build.Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ PREFIX_YAML_PATH = $(PREFIX_DIR)/prefixes.yaml
6161
gzip -f $*-$(RGSUFFIX).tsv && \
6262
cat $(THIS_DIR)/indexes/*.sql | sqlite3 $@.tmp && \
6363
echo "ALTER TABLE statements ADD COLUMN graph TEXT;" | sqlite3 $@.tmp && \
64+
(test -d views && find views -maxdepth 1 -name '$(notdir $*)*.sql' -type f -print0 | xargs -0 -I{} sh -c 'sqlite3 $@.tmp< "$$1"' sh {} || echo no views ) && \
6465
mv $@.tmp $@
6566
.PRECIOUS: %.db
6667

src/semsql/builder/builder.py

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
import shutil
55
import subprocess
66
from pathlib import Path
7-
from typing import Optional, TextIO
7+
from typing import List, Optional, TextIO
88

99
import requests
1010
from linkml_runtime.loaders import yaml_loader
1111
from sqlalchemy import create_engine
1212
from sqlalchemy.orm import sessionmaker
1313

14-
from semsql.builder.registry import registry_schema
14+
from semsql.builder.registry import path_to_ontology_registry, registry_schema
1515
from semsql.builder.registry.registry_schema import (CompressionEnum, Makefile,
1616
MakefileRule, Ontology)
1717
from semsql.utils.makefile_utils import makefile_to_string
@@ -117,6 +117,27 @@ def connect(owl_file: str):
117117
return session
118118

119119

120+
def get_postprocessing_steps(
121+
ontology: str, db: str, registry_path: str = None
122+
) -> List[str]:
123+
"""
124+
Get postprocessing steps for an ontology
125+
126+
:param registry_path:
127+
:param ontology:
128+
:return:
129+
"""
130+
if registry_path is None:
131+
registry_path = path_to_ontology_registry()
132+
registry: registry_schema.Registry
133+
registry = yaml_loader.load(
134+
str(registry_path), target_class=registry_schema.Registry
135+
)
136+
# steps = [step.format(ont=ontology, db=db) for step in registry.ontologies.get(ontology, []).post_processing_steps]
137+
steps = registry.ontologies.get(ontology, [])
138+
return steps
139+
140+
120141
def compile_registry(registry_path: str, local_prefix_file: TextIO = None) -> str:
121142
"""
122143
Generate makefile content from registry
@@ -139,7 +160,7 @@ def compile_registry(registry_path: str, local_prefix_file: TextIO = None) -> st
139160
elif ont.zip_extract_file:
140161
command = (
141162
f"curl -L -s {ont.url} > $@.zip.tmp && "
142-
"unzip -p $@.zip.tmp {ont.zip_extract_file} "
163+
f"unzip -p $@.zip.tmp {ont.zip_extract_file} "
143164
"> $@.tmp && rm $@.zip.tmp"
144165
)
145166
elif ont.compression:
@@ -169,9 +190,8 @@ def compile_registry(registry_path: str, local_prefix_file: TextIO = None) -> st
169190
command = "robot merge -i $< -o $@"
170191
else:
171192
command = "cp $< $@"
172-
rule = MakefileRule(
173-
target=target, dependencies=dependencies, commands=[command]
174-
)
193+
commands = [command]
194+
rule = MakefileRule(target=target, dependencies=dependencies, commands=commands)
175195
makefile.rules.append(rule)
176196
if not ont.suppress:
177197
onts.append(ont.id)

src/semsql/builder/cli.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
import subprocess
23
from itertools import chain, combinations
34

45
import click
@@ -62,6 +63,16 @@ def make(path, docker, **kwargs):
6263
else:
6364
docker_config = None
6465
builder.make(path, docker_config=docker_config, **kwargs)
66+
# check if path is db/{foo}.db using regular expression
67+
import re
68+
69+
matches = re.match(r"db/(\w+).db", path)
70+
if matches:
71+
ontology = matches.group(1)
72+
steps = builder.get_postprocessing_steps(ontology, path)
73+
for step in steps:
74+
print(f"RUNNING: {step}")
75+
subprocess.run(step, shell=True)
6576

6677

6778
@main.command()

src/semsql/builder/prefixes/prefixes.csv

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,13 @@ OMIM,https://omim.org/entry/
7777
OMIMPS,https://www.omim.org/phenotypicSeries/PS
7878
CHR,http://purl.obolibrary.org/obo/CHR_
7979
OEO,http://openenergy-platform.org/ontology/oeo/OEO_
80+
GOLDTERMS,https://w3id.org/gold.path/
81+
GOLDVOCAB,https://w3id.org/gold.vocab/
8082
SDGIO,http://purl.unep.org/sdg/SDGIO_
83+
KIN,http://purl.org/ga4gh/kin.owl#KIN_
8184
ontorion,http://ontorion.com/namespace#
8285
omop,https://athena.ohdsi.org/search-terms/terms/
86+
comet,https://w3id.org/linkml-common/
8387
CCO,http://www.ontologyrepository.com/CommonCoreOntologies/
8488
OccO,http://purl.obolibrary.org/obo/OccO_
8589
IOFcore,https://spec.industrialontologies.org/ontology/
@@ -115,6 +119,7 @@ MESH,http://id.nlm.nih.gov/mesh/
115119
RXNORM,http://purl.bioontology.org/ontology/RXNORM/
116120
OBT,http://purl.obolibrary.org/obo/OBT_
117121
NANDO,http://nanbyodata.jp/ontology/NANDO_
122+
ECSO,http://purl.dataone.org/odo/ECSO_
118123
ONTIE,https://ontology.iedb.org/ontology/ONTIE_
119124
ECOSIM,http://purl.obolibrary.org/obo/ECOSIM_
120125
ECOSIMCONCEPT,http://purl.obolibrary.org/obo/ECOSIMCONCEPT_

src/semsql/builder/prefixes/prefixes_local.csv

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,13 @@ OMIM,https://omim.org/entry/
1414
OMIMPS,https://www.omim.org/phenotypicSeries/PS
1515
CHR,http://purl.obolibrary.org/obo/CHR_
1616
OEO,http://openenergy-platform.org/ontology/oeo/OEO_
17+
GOLDTERMS,https://w3id.org/gold.path/
18+
GOLDVOCAB,https://w3id.org/gold.vocab/
1719
SDGIO,http://purl.unep.org/sdg/SDGIO_
20+
KIN,http://purl.org/ga4gh/kin.owl#KIN_
1821
ontorion,http://ontorion.com/namespace#
1922
omop,https://athena.ohdsi.org/search-terms/terms/
23+
comet,https://w3id.org/linkml-common/
2024
CCO,http://www.ontologyrepository.com/CommonCoreOntologies/
2125
OccO,http://purl.obolibrary.org/obo/OccO_
2226
IOFcore,https://spec.industrialontologies.org/ontology/
@@ -52,6 +56,7 @@ MESH,http://id.nlm.nih.gov/mesh/
5256
RXNORM,http://purl.bioontology.org/ontology/RXNORM/
5357
OBT,http://purl.obolibrary.org/obo/OBT_
5458
NANDO,http://nanbyodata.jp/ontology/NANDO_
59+
ECSO,http://purl.dataone.org/odo/ECSO_
5560
ONTIE,https://ontology.iedb.org/ontology/ONTIE_
5661
ECOSIM,http://purl.obolibrary.org/obo/ECOSIM_
5762
ECOSIMCONCEPT,http://purl.obolibrary.org/obo/ECOSIMCONCEPT_

src/semsql/builder/registry/ontologies.yaml

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,24 @@ ontologies:
9898
url: https://openenergyplatform.org/ontology/oeo/releases/oeo-full.owl
9999
prefixmap:
100100
OEO: http://openenergy-platform.org/ontology/oeo/OEO_
101+
taxslim:
102+
url: http://purl.obolibrary.org/obo/ncbitaxon/subsets/taxslim.obo
103+
build_command: "robot convert -i $< -o $@"
104+
goldterms:
105+
url: https://raw.githubusercontent.com/cmungall/gold-ontology/main/gold.owl
106+
build_command: "robot relax -i $< reason -o $@"
107+
prefixmap:
108+
GOLDTERMS: https://w3id.org/gold.path/
109+
GOLDVOCAB: https://w3id.org/gold.vocab/
101110
sdgio:
102111
url: https://raw.githubusercontent.com/SDG-InterfaceOntology/sdgio/master/sdgio.owl
103112
prefixmap:
104113
SDGIO: http://purl.unep.org/sdg/SDGIO_
114+
kin:
115+
url: http://purl.org/ga4gh/kin.owl
116+
build_command: "robot reason -i $< -o $@"
117+
prefixmap:
118+
KIN: "http://purl.org/ga4gh/kin.owl#KIN_"
105119
biovoices:
106120
url: https://zenodo.org/record/5589773/files/ontology.owl?download=1
107121
build_command: "robot relax -i $< merge -o $@"
@@ -110,6 +124,11 @@ ontologies:
110124
omop:
111125
prefixmap:
112126
omop: https://athena.ohdsi.org/search-terms/terms/
127+
comet:
128+
url: https://raw.githubusercontent.com/linkml/linkml-common/main/project/owl/linkml_common.owl.ttl
129+
build_command: "robot relax -i $< merge -o $@"
130+
prefixmap:
131+
comet: https://w3id.org/linkml-common/
113132
cco:
114133
url: http://www.ontologyrepository.com/CommonCoreOntologies/Mid/AllCoreOntology
115134
build_command: "robot merge -i $< relax -o $@"
@@ -166,9 +185,16 @@ ontologies:
166185
ito:
167186
url: https://github.com/OpenBioLink/ITO/raw/master/ITO.owl.zip
168187
zip_extract_file: ITO.owl
169-
reactome-Homo-sapiens:
188+
reactome-hs:
170189
url: https://reactome.org/download/current/biopax.zip
171190
zip_extract_file: Homo_sapiens.owl
191+
#post_processing_steps:
192+
# - "sqlite3 {db} < views/reactome.sql"
193+
reactome-mm:
194+
url: https://reactome.org/download/current/biopax.zip
195+
zip_extract_file: Mus_musculus.owl
196+
post_processing_steps:
197+
- "sqlite3 {db} < views/reactome.sql"
172198
efo:
173199
url: http://www.ebi.ac.uk/efo/efo.owl
174200
has_imports: true
@@ -327,6 +353,17 @@ ontologies:
327353
url: "'https://data.bioontology.org/ontologies/NANDO/submissions/15/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb'"
328354
prefixmap:
329355
NANDO: http://nanbyodata.jp/ontology/NANDO_
356+
ecso:
357+
url: "'https://data.bioontology.org/ontologies/ECSO/submissions/64/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb'"
358+
prefixmap:
359+
ECSO: http://purl.dataone.org/odo/ECSO_
360+
enigma_context:
361+
url: https://raw.githubusercontent.com/jmchandonia/CORAL/main/example/enigma/ontologies/context_measurement_ontology.obo
362+
build_command: "robot merge -i $< -o $@"
363+
#meo:
364+
# url: "'https://data.bioontology.org/ontologies/MEO/submissions/9/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb'"
365+
# prefixmap:
366+
# MEO: http://purl.jp/bio/11/meo/MEO_
330367
ontie:
331368
url: https://ontology.iedb.org/file/ontie.owl
332369
prefixmap:
@@ -349,8 +386,8 @@ ontologies:
349386
linkml: https://w3id.org/linkml/
350387
mixs: https://w3id.org/mixs/
351388
mixs:
352-
url: https://raw.githubusercontent.com/microbiomedata/mixs-6-2-release-candidate/main/schema-derivatives/mixs_6_2_rc.owl.ttl
353-
build_command: "robot merge -i $< reason -o $@.tmp.owl && perl -npe 's@_6_2_rc@@g;s@-6-2-rc@@g' $@.tmp.owl > $@"
389+
url: https://raw.githubusercontent.com/GenomicsStandardsConsortium/mixs/main/project/owl/mixs.owl.ttl
390+
build_command: "robot merge -i $< reason -o $@"
354391
prefixmap:
355392
mixs: https://w3id.org/mixs/
356393
fibo:
@@ -363,6 +400,9 @@ ontologies:
363400
# BFO variant products
364401
bfo2020:
365402
url: http://purl.obolibrary.org/obo/bfo/2020/bfo.owl
403+
#post_processing_steps:
404+
# - "echo hello {db}"
405+
# - "echo goodbye {db}"
366406
bfo2020_core:
367407
url: http://purl.obolibrary.org/obo/bfo/2020/bfo-core.owl
368408
bfo2020_notime:

0 commit comments

Comments
 (0)