Skip to content

Commit 804c741

Browse files
authored
Add native support for expressions via filters (#50)
1 parent c8d00f8 commit 804c741

File tree

2 files changed

+147
-25
lines changed

2 files changed

+147
-25
lines changed

pyalex/api.py

Lines changed: 85 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,32 @@ def __setattr__(self, key, value):
3131
)
3232

3333

34+
class or_(dict):
35+
pass
36+
37+
38+
class _LogicalExpression:
39+
token = None
40+
41+
def __init__(self, value):
42+
self.value = value
43+
44+
def __str__(self) -> str:
45+
return f"{self.token}{self.value}"
46+
47+
48+
class not_(_LogicalExpression):
49+
token = "!"
50+
51+
52+
class gt_(_LogicalExpression):
53+
token = ">"
54+
55+
56+
class lt_(_LogicalExpression):
57+
token = "<"
58+
59+
3460
def _quote_oa_value(v):
3561
"""Prepare a value for the OpenAlex API.
3662
@@ -41,30 +67,40 @@ def _quote_oa_value(v):
4167
if isinstance(v, bool):
4268
return str(v).lower()
4369

70+
if isinstance(v, _LogicalExpression) and isinstance(v.value, str):
71+
v.value = quote_plus(v.value)
72+
return v
73+
4474
if isinstance(v, str):
4575
return quote_plus(v)
4676

4777
return v
4878

4979

50-
def _flatten_kv(d, prefix=""):
80+
def _flatten_kv(d, prefix=None, logical="+"):
81+
if prefix is None and not isinstance(d, dict):
82+
raise ValueError("prefix should be set if d is not a dict")
83+
5184
if isinstance(d, dict):
85+
logical_subd = "|" if isinstance(d, or_) else logical
86+
5287
t = []
5388
for k, v in d.items():
54-
if isinstance(v, list):
55-
t.extend([f"{prefix}.{k}:{_quote_oa_value(i)}" for i in v])
56-
else:
57-
new_prefix = f"{prefix}.{k}" if prefix else f"{k}"
58-
x = _flatten_kv(v, prefix=new_prefix)
59-
t.append(x)
89+
x = _flatten_kv(
90+
v, prefix=f"{prefix}.{k}" if prefix else f"{k}", logical=logical_subd
91+
)
92+
t.append(x)
6093

6194
return ",".join(t)
95+
elif isinstance(d, list):
96+
list_str = logical.join([f"{_quote_oa_value(i)}" for i in d])
97+
return f"{prefix}:{list_str}"
6298
else:
6399
return f"{prefix}:{_quote_oa_value(d)}"
64100

65101

66102
def _params_merge(params, add_params):
67-
for k, _v in add_params.items():
103+
for k in add_params.keys():
68104
if (
69105
k in params
70106
and isinstance(params[k], dict)
@@ -113,6 +149,18 @@ def invert_abstract(inv_index):
113149
return " ".join(map(lambda x: x[0], sorted(l_inv, key=lambda x: x[1])))
114150

115151

152+
def _wrap_values_nested_dict(d, func):
153+
for k, v in d.items():
154+
if isinstance(v, dict):
155+
d[k] = _wrap_values_nested_dict(v, func)
156+
elif isinstance(v, list):
157+
d[k] = [func(i) for i in v]
158+
else:
159+
d[k] = func(v)
160+
161+
return d
162+
163+
116164
class QueryError(ValueError):
117165
pass
118166

@@ -207,9 +255,6 @@ class BaseOpenAlex:
207255
def __init__(self, params=None):
208256
self.params = params
209257

210-
def _get_multi_items(self, record_list):
211-
return self.filter(openalex_id="|".join(record_list)).get()
212-
213258
def _full_collection_name(self):
214259
if self.params is not None and "q" in self.params.keys():
215260
return (
@@ -234,10 +279,14 @@ def __getattr__(self, key):
234279

235280
def __getitem__(self, record_id):
236281
if isinstance(record_id, list):
237-
return self._get_multi_items(record_id)
282+
if len(record_id) > 100:
283+
raise ValueError("OpenAlex does not support more than 100 ids")
284+
285+
return self.filter_or(openalex_id=record_id).get(per_page=len(record_id))
238286

239287
return self._get_from_url(
240-
f"{self._full_collection_name()}/{record_id}", return_meta=False
288+
f"{self._full_collection_name()}/{_quote_oa_value(record_id)}",
289+
return_meta=False,
241290
)
242291

243292
@property
@@ -322,7 +371,10 @@ def paginate(self, method="cursor", page=1, per_page=None, cursor="*", n_max=100
322371
def random(self):
323372
return self.__getitem__("random")
324373

325-
def _add_params(self, argument, new_params):
374+
def _add_params(self, argument, new_params, raise_if_exists=False):
375+
if raise_if_exists:
376+
raise NotImplementedError("raise_if_exists is not implemented")
377+
326378
if self.params is None:
327379
self.params = {argument: new_params}
328380
elif argument in self.params and isinstance(self.params[argument], dict):
@@ -336,6 +388,25 @@ def filter(self, **kwargs):
336388
self._add_params("filter", kwargs)
337389
return self
338390

391+
def filter_and(self, **kwargs):
392+
return self.filter(**kwargs)
393+
394+
def filter_or(self, **kwargs):
395+
self._add_params("filter", or_(kwargs), raise_if_exists=False)
396+
return self
397+
398+
def filter_not(self, **kwargs):
399+
self._add_params("filter", _wrap_values_nested_dict(kwargs, not_))
400+
return self
401+
402+
def filter_gt(self, **kwargs):
403+
self._add_params("filter", _wrap_values_nested_dict(kwargs, gt_))
404+
return self
405+
406+
def filter_lt(self, **kwargs):
407+
self._add_params("filter", _wrap_values_nested_dict(kwargs, lt_))
408+
return self
409+
339410
def search_filter(self, **kwargs):
340411
self._add_params("filter", {f"{k}.search": v for k, v in kwargs.items()})
341412
return self

tests/test_pyalex.py

Lines changed: 62 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,12 @@ def test_multi_works():
117117
# the work to extract the referenced works of
118118
w = Works()["W2741809807"]
119119

120-
assert len(Works()[w["referenced_works"]]) == 25
120+
assert len(Works()[w["referenced_works"]]) >= 38
121+
122+
assert (
123+
len(Works().filter_or(openalex_id=w["referenced_works"]).get(per_page=100))
124+
>= 38
125+
)
121126

122127

123128
def test_works_multifilter():
@@ -278,33 +283,80 @@ def test_random_publishers():
278283

279284

280285
def test_and_operator():
281-
# https://github.com/J535D165/pyalex/issues/11
282-
url = "https://api.openalex.org/works?filter=institutions.country_code:tw,institutions.country_code:hk,institutions.country_code:us,publication_year:2022"
286+
urls = [
287+
"https://api.openalex.org/works?filter=institutions.country_code:tw,institutions.country_code:hk,institutions.country_code:us,publication_year:2022",
288+
"https://api.openalex.org/works?filter=institutions.country_code:tw+hk+us,publication_year:2022",
289+
]
283290

284291
assert (
285-
url
286-
== Works()
292+
Works()
287293
.filter(
288294
institutions={"country_code": ["tw", "hk", "us"]}, publication_year=2022
289295
)
290296
.url
297+
in urls
291298
)
292299
assert (
293-
url
294-
== Works()
300+
Works()
295301
.filter(institutions={"country_code": "tw"})
296302
.filter(institutions={"country_code": "hk"})
297303
.filter(institutions={"country_code": "us"})
298304
.filter(publication_year=2022)
299305
.url
306+
in urls
300307
)
301308
assert (
302-
url
303-
== Works()
309+
Works()
304310
.filter(institutions={"country_code": ["tw", "hk"]})
305311
.filter(institutions={"country_code": "us"})
306312
.filter(publication_year=2022)
307313
.url
314+
in urls
315+
)
316+
317+
318+
def test_or_operator():
319+
assert (
320+
Works()
321+
.filter_or(
322+
institutions={"country_code": ["tw", "hk", "us"]}, publication_year=2022
323+
)
324+
.url
325+
== "https://api.openalex.org/works?filter=institutions.country_code:tw|hk|us,publication_year:2022"
326+
)
327+
328+
329+
def test_not_operator():
330+
assert (
331+
Works()
332+
.filter_not(institutions={"country_code": "us"})
333+
.filter(publication_year=2022)
334+
.url
335+
== "https://api.openalex.org/works?filter=institutions.country_code:!us,publication_year:2022"
336+
)
337+
338+
339+
def test_not_operator_list():
340+
assert (
341+
Works()
342+
.filter_not(institutions={"country_code": ["tw", "hk", "us"]})
343+
.filter(publication_year=2022)
344+
.url
345+
== "https://api.openalex.org/works?filter=institutions.country_code:!tw+!hk+!us,publication_year:2022"
346+
)
347+
348+
349+
@pytest.mark.skip("Wait for feedback on issue by OpenAlex")
350+
def test_combined_operators():
351+
# works:
352+
# https://api.openalex.org/works?filter=publication_year:>2022,publication_year:!2023
353+
354+
# doesn't work
355+
# https://api.openalex.org/works?filter=publication_year:>2022+!2023
356+
357+
assert (
358+
Works().filter_gt(publication_year=2022).filter_not(publication_year=2023).url
359+
== "https://api.openalex.org/works?filter=publication_year:>2022+!2023"
308360
)
309361

310362

@@ -359,11 +411,10 @@ def test_filter_urlencoding():
359411
)
360412

361413

362-
@pytest.mark.skip("This test is not working due to inconsistencies in the API.")
363414
def test_urlencoding_list():
364415
assert (
365416
Works()
366-
.filter(
417+
.filter_or(
367418
doi=[
368419
"https://doi.org/10.1207/s15327809jls0703&4_2",
369420
"https://doi.org/10.1001/jama.264.8.944b",

0 commit comments

Comments
 (0)