|
1 | 1 | import json
|
2 | 2 | import logging
|
3 | 3 | import os
|
4 |
| -import unittest |
5 | 4 | from decimal import Decimal
|
6 | 5 |
|
| 6 | +import pytest |
7 | 7 | import yaml
|
8 | 8 | from rdflib import Graph, Namespace
|
9 | 9 |
|
|
41 | 41 | WD = Namespace("http://www.wikidata.org/entity/")
|
42 | 42 |
|
43 | 43 |
|
44 |
| -class LoadersDumpersTestCase(unittest.TestCase): |
45 |
| - def setUp(self): |
46 |
| - view = SchemaView(SCHEMA) |
47 |
| - container: Container |
48 |
| - container = yaml_loader.load(DATA, target_class=Container) |
49 |
| - self._check_objs(view, container) |
50 |
| - test_fn = OUT_TTL |
51 |
| - rdflib_dumper.dump(container, schemaview=view, to_file=test_fn, prefix_map=prefix_map) |
52 |
| - container = rdflib_loader.load(test_fn, target_class=Container, schemaview=view, prefix_map=prefix_map) |
53 |
| - self._check_objs(view, container) |
54 |
| - test_fn = OUT_JSON |
55 |
| - json_dumper.dump(container, to_file=test_fn) |
56 |
| - container = json_loader.load(test_fn, target_class=Container) |
57 |
| - self._check_objs(view, container) |
58 |
| - test_fn = OUT_YAML |
59 |
| - yaml_dumper.dump(container, to_file=test_fn) |
60 |
| - container = yaml_loader.load(test_fn, target_class=Container) |
61 |
| - self._check_objs(view, container) |
62 |
| - # TODO: use jsonpatch to compare files |
63 |
| - |
64 |
| - def test_load_from_list(self): |
65 |
| - """ |
66 |
| - Tests the load_any loader method, which can be used to load directly to a list |
67 |
| - """ |
68 |
| - view = SchemaView(SCHEMA) |
69 |
| - with open(DATA, encoding="UTF-8") as stream: |
70 |
| - data = yaml.safe_load(stream) |
71 |
| - # persons = yaml_loader.load_source(data, target_class=Person) |
72 |
| - # container = Container(persons=persons) |
73 |
| - person_dicts = data["persons"] |
74 |
| - tuples = [(yaml_loader, yaml.dump(person_dicts)), (json_loader, json.dumps(person_dicts, default=str))] |
75 |
| - for loader, person_list_str in tuples: |
76 |
| - persons = loader.loads_any(person_list_str, target_class=Person) |
77 |
| - assert isinstance(persons, list) |
78 |
| - assert isinstance(persons[0], Person) |
79 |
| - [p1] = [p for p in persons if p.id == "P:001"] |
80 |
| - [p2] = [p for p in persons if p.id == "P:002"] |
81 |
| - self.assertEqual(p1.name, "fred bloggs") |
82 |
| - self.assertEqual(p2.name, "joe schmö") |
83 |
| - self.assertEqual(p1.age_in_years, 33) |
84 |
| - self.assertEqual(p1.gender.code.text, "cisgender man") |
85 |
| - self.assertEqual(p2.gender.code.text, "transgender man") |
86 |
| - |
87 |
| - def test_encoding(self): |
88 |
| - """ |
89 |
| - This will reveal if generated yaml or json files are utf-8 encoded |
90 |
| - """ |
91 |
| - # pyyaml or json read non-ascii strings just fine no matter if the |
92 |
| - # file is ascii or utf-8 encoded. So we use Python's open function |
93 |
| - # to detect undesired ascii encoding. (linkml issue #634) |
94 |
| - with open(OUT_YAML, encoding="UTF-8") as f: |
95 |
| - [p2_name_line] = [l for l in f.readlines() if "joe schm" in l] |
96 |
| - self.assertIn("joe schmö", p2_name_line) |
97 |
| - |
98 |
| - with open(OUT_JSON, encoding="UTF-8") as f: |
99 |
| - [p2_name_line] = [l for l in f.readlines() if "joe schm" in l] |
100 |
| - self.assertIn("joe schmö", p2_name_line) |
101 |
| - |
102 |
| - def _check_objs(self, view: SchemaView, container: Container): |
103 |
| - persons = container.persons |
104 |
| - orgs = container.organizations.values() |
| 44 | +@pytest.fixture(scope="module") |
| 45 | +def loader_dumper_setup(): |
| 46 | + """Set up loader/dumper test environment with round-trip testing.""" |
| 47 | + view = SchemaView(SCHEMA) |
| 48 | + container: Container |
| 49 | + container = yaml_loader.load(DATA, target_class=Container) |
| 50 | + _check_objs(view, container) |
| 51 | + |
| 52 | + # Test RDF round-trip |
| 53 | + test_fn = OUT_TTL |
| 54 | + rdflib_dumper.dump(container, schemaview=view, to_file=test_fn, prefix_map=prefix_map) |
| 55 | + container = rdflib_loader.load(test_fn, target_class=Container, schemaview=view, prefix_map=prefix_map) |
| 56 | + _check_objs(view, container) |
| 57 | + |
| 58 | + # Test JSON round-trip |
| 59 | + test_fn = OUT_JSON |
| 60 | + json_dumper.dump(container, to_file=test_fn) |
| 61 | + container = json_loader.load(test_fn, target_class=Container) |
| 62 | + _check_objs(view, container) |
| 63 | + |
| 64 | + # Test YAML round-trip |
| 65 | + test_fn = OUT_YAML |
| 66 | + yaml_dumper.dump(container, to_file=test_fn) |
| 67 | + container = yaml_loader.load(test_fn, target_class=Container) |
| 68 | + _check_objs(view, container) |
| 69 | + # TODO: use jsonpatch to compare files |
| 70 | + |
| 71 | + return {"view": view, "container": container} |
| 72 | + |
| 73 | + |
| 74 | +def test_load_from_list(loader_dumper_setup): |
| 75 | + """ |
| 76 | + Tests the load_any loader method, which can be used to load directly to a list |
| 77 | + """ |
| 78 | + view = SchemaView(SCHEMA) |
| 79 | + with open(DATA, encoding="UTF-8") as stream: |
| 80 | + data = yaml.safe_load(stream) |
| 81 | + person_dicts = data["persons"] |
| 82 | + tuples = [(yaml_loader, yaml.dump(person_dicts)), (json_loader, json.dumps(person_dicts, default=str))] |
| 83 | + for loader, person_list_str in tuples: |
| 84 | + persons = loader.loads_any(person_list_str, target_class=Person) |
| 85 | + assert isinstance(persons, list) |
| 86 | + assert isinstance(persons[0], Person) |
105 | 87 | [p1] = [p for p in persons if p.id == "P:001"]
|
106 | 88 | [p2] = [p for p in persons if p.id == "P:002"]
|
107 |
| - [o1] = [o for o in orgs if o.id == "ROR:1"] |
108 |
| - [o2] = [o for o in orgs if o.id == "ROR:2"] |
109 |
| - [o3] = [o for o in orgs if o.id == "ROR:3"] |
110 |
| - [o4] = [o for o in orgs if o.id == "ROR:4"] |
111 |
| - o1cats = [c.code.text for c in o1.categories] |
112 |
| - o2cats = [c.code.text for c in o2.categories] |
113 |
| - self.assertEqual(p1.name, "fred bloggs") |
114 |
| - self.assertEqual(p2.name, "joe schmö") |
115 |
| - self.assertEqual(p1.age_in_years, 33) |
116 |
| - self.assertEqual(p1.gender.code.text, "cisgender man") |
117 |
| - self.assertEqual(p2.gender.code.text, "transgender man") |
118 |
| - self.assertCountEqual(o1cats, ["non profit", "charity"]) |
119 |
| - self.assertCountEqual(o2cats, ["shell company"]) |
120 |
| - p2: Person |
121 |
| - emp = p2.has_employment_history[0] |
122 |
| - self.assertEqual(emp.started_at_time, "2019-01-01") |
123 |
| - self.assertEqual(emp.is_current, True) |
124 |
| - self.assertEqual(emp.employed_at, o1.id) |
125 |
| - frel = p2.has_familial_relationships[0] |
126 |
| - self.assertEqual(frel.related_to, p1.id) |
127 |
| - # TODO: check PV vs PVText |
128 |
| - self.assertEqual(str(frel.type), "SIBLING_OF") |
129 |
| - med = p2.has_medical_history[0] |
130 |
| - self.assertEqual(med.in_location, "GEO:1234") |
131 |
| - self.assertEqual(med.diagnosis.id, "CODE:D0001") |
132 |
| - self.assertEqual(med.diagnosis.name, "headache") |
133 |
| - self.assertEqual(med.diagnosis.code_system, "CODE:D") |
134 |
| - # Check decimal representation |
135 |
| - self.assertEqual(o1.score, Decimal(1)) |
136 |
| - self.assertEqual(o2.score, Decimal("1.5")) |
137 |
| - self.assertEqual(o3.score, Decimal(1)) |
138 |
| - self.assertEqual(o4.score, Decimal(1)) |
139 |
| - self.assertEqual(o1.min_salary, Decimal("99999.00")) |
140 |
| - |
141 |
| - def test_edge_cases(self): |
142 |
| - """ |
143 |
| - Tests various edge cases: |
144 |
| -
|
145 |
| - - unprocessed triples (triples that cannot be reached via root objects) |
146 |
| - - mismatch between expected range categories (Type vs Class) and value (Literal vs Node) |
147 |
| - - complex range expressions (e.g. modeling a range as being EITHER string OR object |
148 |
| - """ |
149 |
| - # schema with following characterics: |
150 |
| - # - reified triples |
151 |
| - # - object has a complex union range (experimental new feature) |
152 |
| - view = SchemaView(os.path.join(INPUT_DIR, "complex_range_example.yaml")) |
153 |
| - graph = Graph() |
154 |
| - taxon_prefix_map = { |
155 |
| - "NCBITaxon": "http://purl.obolibrary.org/obo/NCBITaxon_", |
156 |
| - "RO": "http://purl.obolibrary.org/obo/RO_", |
157 |
| - } |
158 |
| - # this graph has the following characteristics |
159 |
| - # - blank nodes to represent statements |
160 |
| - # - some triples not reachable from roots |
161 |
| - # - implicit schema with complex ranges (rdf:object has range of either node or literal) |
162 |
| - graph.parse(os.path.join(INPUT_DIR, "bacteria-taxon-class.ttl"), format="ttl") |
163 |
| - objs = rdflib_loader.from_rdf_graph( |
| 89 | + assert p1.name == "fred bloggs" |
| 90 | + assert p2.name == "joe schmö" |
| 91 | + assert p1.age_in_years == 33 |
| 92 | + assert p1.gender.code.text == "cisgender man" |
| 93 | + assert p2.gender.code.text == "transgender man" |
| 94 | + |
| 95 | + |
| 96 | +def test_encoding(loader_dumper_setup): |
| 97 | + """ |
| 98 | + This will reveal if generated yaml or json files are utf-8 encoded |
| 99 | + """ |
| 100 | + # pyyaml or json read non-ascii strings just fine no matter if the |
| 101 | + # file is ascii or utf-8 encoded. So we use Python's open function |
| 102 | + # to detect undesired ascii encoding. (linkml issue #634) |
| 103 | + with open(OUT_YAML, encoding="UTF-8") as f: |
| 104 | + [p2_name_line] = [l for l in f.readlines() if "joe schm" in l] |
| 105 | + assert "joe schmö" in p2_name_line |
| 106 | + |
| 107 | + with open(OUT_JSON, encoding="UTF-8") as f: |
| 108 | + [p2_name_line] = [l for l in f.readlines() if "joe schm" in l] |
| 109 | + assert "joe schmö" in p2_name_line |
| 110 | + |
| 111 | + |
| 112 | +def _check_objs(view: SchemaView, container: Container): |
| 113 | + """Helper function to check container objects.""" |
| 114 | + persons = container.persons |
| 115 | + orgs = container.organizations.values() |
| 116 | + [p1] = [p for p in persons if p.id == "P:001"] |
| 117 | + [p2] = [p for p in persons if p.id == "P:002"] |
| 118 | + [o1] = [o for o in orgs if o.id == "ROR:1"] |
| 119 | + [o2] = [o for o in orgs if o.id == "ROR:2"] |
| 120 | + [o3] = [o for o in orgs if o.id == "ROR:3"] |
| 121 | + [o4] = [o for o in orgs if o.id == "ROR:4"] |
| 122 | + o1cats = [c.code.text for c in o1.categories] |
| 123 | + o2cats = [c.code.text for c in o2.categories] |
| 124 | + assert p1.name == "fred bloggs" |
| 125 | + assert p2.name == "joe schmö" |
| 126 | + assert p1.age_in_years == 33 |
| 127 | + assert p1.gender.code.text == "cisgender man" |
| 128 | + assert p2.gender.code.text == "transgender man" |
| 129 | + assert sorted(o1cats) == sorted(["non profit", "charity"]) |
| 130 | + assert sorted(o2cats) == sorted(["shell company"]) |
| 131 | + p2: Person |
| 132 | + emp = p2.has_employment_history[0] |
| 133 | + assert emp.started_at_time == "2019-01-01" |
| 134 | + assert emp.is_current == True |
| 135 | + assert emp.employed_at == o1.id |
| 136 | + frel = p2.has_familial_relationships[0] |
| 137 | + assert frel.related_to == p1.id |
| 138 | + # TODO: check PV vs PVText |
| 139 | + assert str(frel.type) == "SIBLING_OF" |
| 140 | + med = p2.has_medical_history[0] |
| 141 | + assert med.in_location == "GEO:1234" |
| 142 | + assert med.diagnosis.id == "CODE:D0001" |
| 143 | + assert med.diagnosis.name == "headache" |
| 144 | + assert med.diagnosis.code_system == "CODE:D" |
| 145 | + # Check decimal representation |
| 146 | + assert o1.score == Decimal(1) |
| 147 | + assert o2.score == Decimal("1.5") |
| 148 | + assert o3.score == Decimal(1) |
| 149 | + assert o4.score == Decimal(1) |
| 150 | + assert o1.min_salary == Decimal("99999.00") |
| 151 | + |
| 152 | + |
| 153 | +def test_edge_cases(loader_dumper_setup): |
| 154 | + """ |
| 155 | + Tests various edge cases: |
| 156 | +
|
| 157 | + - unprocessed triples (triples that cannot be reached via root objects) |
| 158 | + - mismatch between expected range categories (Type vs Class) and value (Literal vs Node) |
| 159 | + - complex range expressions (e.g. modeling a range as being EITHER string OR object |
| 160 | + """ |
| 161 | + # schema with following characterics: |
| 162 | + # - reified triples |
| 163 | + # - object has a complex union range (experimental new feature) |
| 164 | + view = SchemaView(os.path.join(INPUT_DIR, "complex_range_example.yaml")) |
| 165 | + graph = Graph() |
| 166 | + taxon_prefix_map = { |
| 167 | + "NCBITaxon": "http://purl.obolibrary.org/obo/NCBITaxon_", |
| 168 | + "RO": "http://purl.obolibrary.org/obo/RO_", |
| 169 | + } |
| 170 | + # this graph has the following characteristics |
| 171 | + # - blank nodes to represent statements |
| 172 | + # - some triples not reachable from roots |
| 173 | + # - implicit schema with complex ranges (rdf:object has range of either node or literal) |
| 174 | + graph.parse(os.path.join(INPUT_DIR, "bacteria-taxon-class.ttl"), format="ttl") |
| 175 | + objs = rdflib_loader.from_rdf_graph( |
| 176 | + graph, |
| 177 | + target_class=NodeObject, |
| 178 | + schemaview=view, |
| 179 | + cast_literals=False, ## strict |
| 180 | + allow_unprocessed_triples=True, ## known issue |
| 181 | + prefix_map=taxon_prefix_map, |
| 182 | + ) |
| 183 | + [obj] = objs |
| 184 | + for x in obj.statements: |
| 185 | + assert x.subject is None |
| 186 | + assert x.predicate is not None |
| 187 | + assert x.object is not None |
| 188 | + logger.info(f" x={x}") |
| 189 | + # ranges that are objects are contracted |
| 190 | + assert Triple(subject=None, predicate="rdfs:subClassOf", object="owl:Thing") in obj.statements |
| 191 | + assert Triple(subject=None, predicate="rdfs:subClassOf", object="NCBITaxon:1") in obj.statements |
| 192 | + # string ranges |
| 193 | + assert Triple(subject=None, predicate="rdfs:label", object="Bacteria") in obj.statements |
| 194 | + with pytest.raises(ValueError): |
| 195 | + rdflib_loader.from_rdf_graph( |
164 | 196 | graph,
|
165 | 197 | target_class=NodeObject,
|
166 | 198 | schemaview=view,
|
167 |
| - cast_literals=False, ## strict |
168 |
| - allow_unprocessed_triples=True, ## known issue |
| 199 | + cast_literals=False, |
| 200 | + allow_unprocessed_triples=False, |
169 | 201 | prefix_map=taxon_prefix_map,
|
170 | 202 | )
|
171 |
| - [obj] = objs |
172 |
| - for x in obj.statements: |
173 |
| - assert x.subject is None |
174 |
| - assert x.predicate is not None |
175 |
| - assert x.object is not None |
176 |
| - logger.info(f" x={x}") |
177 |
| - # ranges that are objects are contracted |
178 |
| - assert Triple(subject=None, predicate="rdfs:subClassOf", object="owl:Thing") in obj.statements |
179 |
| - assert Triple(subject=None, predicate="rdfs:subClassOf", object="NCBITaxon:1") in obj.statements |
180 |
| - # string ranges |
181 |
| - assert Triple(subject=None, predicate="rdfs:label", object="Bacteria") in obj.statements |
182 |
| - with self.assertRaises(ValueError) as context: |
183 |
| - rdflib_loader.from_rdf_graph( |
184 |
| - graph, |
185 |
| - target_class=NodeObject, |
186 |
| - schemaview=view, |
187 |
| - cast_literals=False, |
188 |
| - allow_unprocessed_triples=False, |
189 |
| - prefix_map=taxon_prefix_map, |
190 |
| - ) |
191 |
| - logger.error("Passed unexpectedly: there are known to be unreachable triples") |
192 |
| - # removing complex range, object has a range of string |
193 |
| - view.schema.slots["object"].exactly_one_of = [] |
194 |
| - view.set_modified() |
| 203 | + logger.error("Passed unexpectedly: there are known to be unreachable triples") |
| 204 | + # removing complex range, object has a range of string |
| 205 | + view.schema.slots["object"].exactly_one_of = [] |
| 206 | + view.set_modified() |
| 207 | + rdflib_loader.from_rdf_graph( |
| 208 | + graph, |
| 209 | + target_class=NodeObject, |
| 210 | + schemaview=view, |
| 211 | + cast_literals=True, ## required to pass |
| 212 | + allow_unprocessed_triples=True, |
| 213 | + prefix_map=taxon_prefix_map, |
| 214 | + ) |
| 215 | + with pytest.raises(ValueError): |
195 | 216 | rdflib_loader.from_rdf_graph(
|
196 | 217 | graph,
|
197 | 218 | target_class=NodeObject,
|
198 | 219 | schemaview=view,
|
199 |
| - cast_literals=True, ## required to pass |
| 220 | + cast_literals=False, |
200 | 221 | allow_unprocessed_triples=True,
|
201 | 222 | prefix_map=taxon_prefix_map,
|
202 | 223 | )
|
203 |
| - with self.assertRaises(ValueError) as context: |
204 |
| - rdflib_loader.from_rdf_graph( |
205 |
| - graph, |
206 |
| - target_class=NodeObject, |
207 |
| - schemaview=view, |
208 |
| - cast_literals=False, |
209 |
| - allow_unprocessed_triples=True, |
210 |
| - prefix_map=taxon_prefix_map, |
211 |
| - ) |
212 |
| - logger.error("Passed unexpectedly: rdf:object is known to have a mix of literals and nodes") |
213 |
| - |
214 |
| - |
215 |
| -if __name__ == "__main__": |
216 |
| - unittest.main() |
| 224 | + logger.error("Passed unexpectedly: rdf:object is known to have a mix of literals and nodes") |
0 commit comments