Skip to content

Commit bb35ddc

Browse files
Merge branch 'main' into convert-test-loaders-pydantic-to-pytest
2 parents c1c94ae + bbf4f8b commit bb35ddc

File tree

1 file changed

+168
-160
lines changed

1 file changed

+168
-160
lines changed
Lines changed: 168 additions & 160 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import json
22
import logging
33
import os
4-
import unittest
54
from decimal import Decimal
65

6+
import pytest
77
import yaml
88
from rdflib import Graph, Namespace
99

@@ -41,176 +41,184 @@
4141
WD = Namespace("http://www.wikidata.org/entity/")
4242

4343

44-
class LoadersDumpersTestCase(unittest.TestCase):
45-
def setUp(self):
46-
view = SchemaView(SCHEMA)
47-
container: Container
48-
container = yaml_loader.load(DATA, target_class=Container)
49-
self._check_objs(view, container)
50-
test_fn = OUT_TTL
51-
rdflib_dumper.dump(container, schemaview=view, to_file=test_fn, prefix_map=prefix_map)
52-
container = rdflib_loader.load(test_fn, target_class=Container, schemaview=view, prefix_map=prefix_map)
53-
self._check_objs(view, container)
54-
test_fn = OUT_JSON
55-
json_dumper.dump(container, to_file=test_fn)
56-
container = json_loader.load(test_fn, target_class=Container)
57-
self._check_objs(view, container)
58-
test_fn = OUT_YAML
59-
yaml_dumper.dump(container, to_file=test_fn)
60-
container = yaml_loader.load(test_fn, target_class=Container)
61-
self._check_objs(view, container)
62-
# TODO: use jsonpatch to compare files
63-
64-
def test_load_from_list(self):
65-
"""
66-
Tests the load_any loader method, which can be used to load directly to a list
67-
"""
68-
view = SchemaView(SCHEMA)
69-
with open(DATA, encoding="UTF-8") as stream:
70-
data = yaml.safe_load(stream)
71-
# persons = yaml_loader.load_source(data, target_class=Person)
72-
# container = Container(persons=persons)
73-
person_dicts = data["persons"]
74-
tuples = [(yaml_loader, yaml.dump(person_dicts)), (json_loader, json.dumps(person_dicts, default=str))]
75-
for loader, person_list_str in tuples:
76-
persons = loader.loads_any(person_list_str, target_class=Person)
77-
assert isinstance(persons, list)
78-
assert isinstance(persons[0], Person)
79-
[p1] = [p for p in persons if p.id == "P:001"]
80-
[p2] = [p for p in persons if p.id == "P:002"]
81-
self.assertEqual(p1.name, "fred bloggs")
82-
self.assertEqual(p2.name, "joe schmö")
83-
self.assertEqual(p1.age_in_years, 33)
84-
self.assertEqual(p1.gender.code.text, "cisgender man")
85-
self.assertEqual(p2.gender.code.text, "transgender man")
86-
87-
def test_encoding(self):
88-
"""
89-
This will reveal if generated yaml or json files are utf-8 encoded
90-
"""
91-
# pyyaml or json read non-ascii strings just fine no matter if the
92-
# file is ascii or utf-8 encoded. So we use Python's open function
93-
# to detect undesired ascii encoding. (linkml issue #634)
94-
with open(OUT_YAML, encoding="UTF-8") as f:
95-
[p2_name_line] = [l for l in f.readlines() if "joe schm" in l]
96-
self.assertIn("joe schmö", p2_name_line)
97-
98-
with open(OUT_JSON, encoding="UTF-8") as f:
99-
[p2_name_line] = [l for l in f.readlines() if "joe schm" in l]
100-
self.assertIn("joe schmö", p2_name_line)
101-
102-
def _check_objs(self, view: SchemaView, container: Container):
103-
persons = container.persons
104-
orgs = container.organizations.values()
44+
@pytest.fixture(scope="module")
45+
def loader_dumper_setup():
46+
"""Set up loader/dumper test environment with round-trip testing."""
47+
view = SchemaView(SCHEMA)
48+
container: Container
49+
container = yaml_loader.load(DATA, target_class=Container)
50+
_check_objs(view, container)
51+
52+
# Test RDF round-trip
53+
test_fn = OUT_TTL
54+
rdflib_dumper.dump(container, schemaview=view, to_file=test_fn, prefix_map=prefix_map)
55+
container = rdflib_loader.load(test_fn, target_class=Container, schemaview=view, prefix_map=prefix_map)
56+
_check_objs(view, container)
57+
58+
# Test JSON round-trip
59+
test_fn = OUT_JSON
60+
json_dumper.dump(container, to_file=test_fn)
61+
container = json_loader.load(test_fn, target_class=Container)
62+
_check_objs(view, container)
63+
64+
# Test YAML round-trip
65+
test_fn = OUT_YAML
66+
yaml_dumper.dump(container, to_file=test_fn)
67+
container = yaml_loader.load(test_fn, target_class=Container)
68+
_check_objs(view, container)
69+
# TODO: use jsonpatch to compare files
70+
71+
return {"view": view, "container": container}
72+
73+
74+
def test_load_from_list(loader_dumper_setup):
75+
"""
76+
Tests the load_any loader method, which can be used to load directly to a list
77+
"""
78+
view = SchemaView(SCHEMA)
79+
with open(DATA, encoding="UTF-8") as stream:
80+
data = yaml.safe_load(stream)
81+
person_dicts = data["persons"]
82+
tuples = [(yaml_loader, yaml.dump(person_dicts)), (json_loader, json.dumps(person_dicts, default=str))]
83+
for loader, person_list_str in tuples:
84+
persons = loader.loads_any(person_list_str, target_class=Person)
85+
assert isinstance(persons, list)
86+
assert isinstance(persons[0], Person)
10587
[p1] = [p for p in persons if p.id == "P:001"]
10688
[p2] = [p for p in persons if p.id == "P:002"]
107-
[o1] = [o for o in orgs if o.id == "ROR:1"]
108-
[o2] = [o for o in orgs if o.id == "ROR:2"]
109-
[o3] = [o for o in orgs if o.id == "ROR:3"]
110-
[o4] = [o for o in orgs if o.id == "ROR:4"]
111-
o1cats = [c.code.text for c in o1.categories]
112-
o2cats = [c.code.text for c in o2.categories]
113-
self.assertEqual(p1.name, "fred bloggs")
114-
self.assertEqual(p2.name, "joe schmö")
115-
self.assertEqual(p1.age_in_years, 33)
116-
self.assertEqual(p1.gender.code.text, "cisgender man")
117-
self.assertEqual(p2.gender.code.text, "transgender man")
118-
self.assertCountEqual(o1cats, ["non profit", "charity"])
119-
self.assertCountEqual(o2cats, ["shell company"])
120-
p2: Person
121-
emp = p2.has_employment_history[0]
122-
self.assertEqual(emp.started_at_time, "2019-01-01")
123-
self.assertEqual(emp.is_current, True)
124-
self.assertEqual(emp.employed_at, o1.id)
125-
frel = p2.has_familial_relationships[0]
126-
self.assertEqual(frel.related_to, p1.id)
127-
# TODO: check PV vs PVText
128-
self.assertEqual(str(frel.type), "SIBLING_OF")
129-
med = p2.has_medical_history[0]
130-
self.assertEqual(med.in_location, "GEO:1234")
131-
self.assertEqual(med.diagnosis.id, "CODE:D0001")
132-
self.assertEqual(med.diagnosis.name, "headache")
133-
self.assertEqual(med.diagnosis.code_system, "CODE:D")
134-
# Check decimal representation
135-
self.assertEqual(o1.score, Decimal(1))
136-
self.assertEqual(o2.score, Decimal("1.5"))
137-
self.assertEqual(o3.score, Decimal(1))
138-
self.assertEqual(o4.score, Decimal(1))
139-
self.assertEqual(o1.min_salary, Decimal("99999.00"))
140-
141-
def test_edge_cases(self):
142-
"""
143-
Tests various edge cases:
144-
145-
- unprocessed triples (triples that cannot be reached via root objects)
146-
- mismatch between expected range categories (Type vs Class) and value (Literal vs Node)
147-
- complex range expressions (e.g. modeling a range as being EITHER string OR object
148-
"""
149-
# schema with following characterics:
150-
# - reified triples
151-
# - object has a complex union range (experimental new feature)
152-
view = SchemaView(os.path.join(INPUT_DIR, "complex_range_example.yaml"))
153-
graph = Graph()
154-
taxon_prefix_map = {
155-
"NCBITaxon": "http://purl.obolibrary.org/obo/NCBITaxon_",
156-
"RO": "http://purl.obolibrary.org/obo/RO_",
157-
}
158-
# this graph has the following characteristics
159-
# - blank nodes to represent statements
160-
# - some triples not reachable from roots
161-
# - implicit schema with complex ranges (rdf:object has range of either node or literal)
162-
graph.parse(os.path.join(INPUT_DIR, "bacteria-taxon-class.ttl"), format="ttl")
163-
objs = rdflib_loader.from_rdf_graph(
89+
assert p1.name == "fred bloggs"
90+
assert p2.name == "joe schmö"
91+
assert p1.age_in_years == 33
92+
assert p1.gender.code.text == "cisgender man"
93+
assert p2.gender.code.text == "transgender man"
94+
95+
96+
def test_encoding(loader_dumper_setup):
97+
"""
98+
This will reveal if generated yaml or json files are utf-8 encoded
99+
"""
100+
# pyyaml or json read non-ascii strings just fine no matter if the
101+
# file is ascii or utf-8 encoded. So we use Python's open function
102+
# to detect undesired ascii encoding. (linkml issue #634)
103+
with open(OUT_YAML, encoding="UTF-8") as f:
104+
[p2_name_line] = [l for l in f.readlines() if "joe schm" in l]
105+
assert "joe schmö" in p2_name_line
106+
107+
with open(OUT_JSON, encoding="UTF-8") as f:
108+
[p2_name_line] = [l for l in f.readlines() if "joe schm" in l]
109+
assert "joe schmö" in p2_name_line
110+
111+
112+
def _check_objs(view: SchemaView, container: Container):
113+
"""Helper function to check container objects."""
114+
persons = container.persons
115+
orgs = container.organizations.values()
116+
[p1] = [p for p in persons if p.id == "P:001"]
117+
[p2] = [p for p in persons if p.id == "P:002"]
118+
[o1] = [o for o in orgs if o.id == "ROR:1"]
119+
[o2] = [o for o in orgs if o.id == "ROR:2"]
120+
[o3] = [o for o in orgs if o.id == "ROR:3"]
121+
[o4] = [o for o in orgs if o.id == "ROR:4"]
122+
o1cats = [c.code.text for c in o1.categories]
123+
o2cats = [c.code.text for c in o2.categories]
124+
assert p1.name == "fred bloggs"
125+
assert p2.name == "joe schmö"
126+
assert p1.age_in_years == 33
127+
assert p1.gender.code.text == "cisgender man"
128+
assert p2.gender.code.text == "transgender man"
129+
assert sorted(o1cats) == sorted(["non profit", "charity"])
130+
assert sorted(o2cats) == sorted(["shell company"])
131+
p2: Person
132+
emp = p2.has_employment_history[0]
133+
assert emp.started_at_time == "2019-01-01"
134+
assert emp.is_current == True
135+
assert emp.employed_at == o1.id
136+
frel = p2.has_familial_relationships[0]
137+
assert frel.related_to == p1.id
138+
# TODO: check PV vs PVText
139+
assert str(frel.type) == "SIBLING_OF"
140+
med = p2.has_medical_history[0]
141+
assert med.in_location == "GEO:1234"
142+
assert med.diagnosis.id == "CODE:D0001"
143+
assert med.diagnosis.name == "headache"
144+
assert med.diagnosis.code_system == "CODE:D"
145+
# Check decimal representation
146+
assert o1.score == Decimal(1)
147+
assert o2.score == Decimal("1.5")
148+
assert o3.score == Decimal(1)
149+
assert o4.score == Decimal(1)
150+
assert o1.min_salary == Decimal("99999.00")
151+
152+
153+
def test_edge_cases(loader_dumper_setup):
154+
"""
155+
Tests various edge cases:
156+
157+
- unprocessed triples (triples that cannot be reached via root objects)
158+
- mismatch between expected range categories (Type vs Class) and value (Literal vs Node)
159+
- complex range expressions (e.g. modeling a range as being EITHER string OR object
160+
"""
161+
# schema with following characterics:
162+
# - reified triples
163+
# - object has a complex union range (experimental new feature)
164+
view = SchemaView(os.path.join(INPUT_DIR, "complex_range_example.yaml"))
165+
graph = Graph()
166+
taxon_prefix_map = {
167+
"NCBITaxon": "http://purl.obolibrary.org/obo/NCBITaxon_",
168+
"RO": "http://purl.obolibrary.org/obo/RO_",
169+
}
170+
# this graph has the following characteristics
171+
# - blank nodes to represent statements
172+
# - some triples not reachable from roots
173+
# - implicit schema with complex ranges (rdf:object has range of either node or literal)
174+
graph.parse(os.path.join(INPUT_DIR, "bacteria-taxon-class.ttl"), format="ttl")
175+
objs = rdflib_loader.from_rdf_graph(
176+
graph,
177+
target_class=NodeObject,
178+
schemaview=view,
179+
cast_literals=False, ## strict
180+
allow_unprocessed_triples=True, ## known issue
181+
prefix_map=taxon_prefix_map,
182+
)
183+
[obj] = objs
184+
for x in obj.statements:
185+
assert x.subject is None
186+
assert x.predicate is not None
187+
assert x.object is not None
188+
logger.info(f" x={x}")
189+
# ranges that are objects are contracted
190+
assert Triple(subject=None, predicate="rdfs:subClassOf", object="owl:Thing") in obj.statements
191+
assert Triple(subject=None, predicate="rdfs:subClassOf", object="NCBITaxon:1") in obj.statements
192+
# string ranges
193+
assert Triple(subject=None, predicate="rdfs:label", object="Bacteria") in obj.statements
194+
with pytest.raises(ValueError):
195+
rdflib_loader.from_rdf_graph(
164196
graph,
165197
target_class=NodeObject,
166198
schemaview=view,
167-
cast_literals=False, ## strict
168-
allow_unprocessed_triples=True, ## known issue
199+
cast_literals=False,
200+
allow_unprocessed_triples=False,
169201
prefix_map=taxon_prefix_map,
170202
)
171-
[obj] = objs
172-
for x in obj.statements:
173-
assert x.subject is None
174-
assert x.predicate is not None
175-
assert x.object is not None
176-
logger.info(f" x={x}")
177-
# ranges that are objects are contracted
178-
assert Triple(subject=None, predicate="rdfs:subClassOf", object="owl:Thing") in obj.statements
179-
assert Triple(subject=None, predicate="rdfs:subClassOf", object="NCBITaxon:1") in obj.statements
180-
# string ranges
181-
assert Triple(subject=None, predicate="rdfs:label", object="Bacteria") in obj.statements
182-
with self.assertRaises(ValueError) as context:
183-
rdflib_loader.from_rdf_graph(
184-
graph,
185-
target_class=NodeObject,
186-
schemaview=view,
187-
cast_literals=False,
188-
allow_unprocessed_triples=False,
189-
prefix_map=taxon_prefix_map,
190-
)
191-
logger.error("Passed unexpectedly: there are known to be unreachable triples")
192-
# removing complex range, object has a range of string
193-
view.schema.slots["object"].exactly_one_of = []
194-
view.set_modified()
203+
logger.error("Passed unexpectedly: there are known to be unreachable triples")
204+
# removing complex range, object has a range of string
205+
view.schema.slots["object"].exactly_one_of = []
206+
view.set_modified()
207+
rdflib_loader.from_rdf_graph(
208+
graph,
209+
target_class=NodeObject,
210+
schemaview=view,
211+
cast_literals=True, ## required to pass
212+
allow_unprocessed_triples=True,
213+
prefix_map=taxon_prefix_map,
214+
)
215+
with pytest.raises(ValueError):
195216
rdflib_loader.from_rdf_graph(
196217
graph,
197218
target_class=NodeObject,
198219
schemaview=view,
199-
cast_literals=True, ## required to pass
220+
cast_literals=False,
200221
allow_unprocessed_triples=True,
201222
prefix_map=taxon_prefix_map,
202223
)
203-
with self.assertRaises(ValueError) as context:
204-
rdflib_loader.from_rdf_graph(
205-
graph,
206-
target_class=NodeObject,
207-
schemaview=view,
208-
cast_literals=False,
209-
allow_unprocessed_triples=True,
210-
prefix_map=taxon_prefix_map,
211-
)
212-
logger.error("Passed unexpectedly: rdf:object is known to have a mix of literals and nodes")
213-
214-
215-
if __name__ == "__main__":
216-
unittest.main()
224+
logger.error("Passed unexpectedly: rdf:object is known to have a mix of literals and nodes")

0 commit comments

Comments
 (0)