Skip to content

Commit e4aa4ba

Browse files
authored
Merge pull request #39 from linkml/additional-formats
Additional formats
2 parents e43d18f + 9ccdf50 commit e4aa4ba

File tree

16 files changed

+3206
-791
lines changed

16 files changed

+3206
-791
lines changed

docs/how-to/Calculate-Enrichment.ipynb

Lines changed: 812 additions & 0 deletions
Large diffs are not rendered by default.

poetry.lock

Lines changed: 1461 additions & 770 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ pystow = "^0.5.4"
2323
black = { version=">=24.0.0", optional = true }
2424
ruff = { version=">=0.6.2", optional = true }
2525
llm = { version="*", optional = true }
26+
lightrdf = { version="*", optional = true }
2627
tiktoken = { version="*", optional = true }
2728
pymongo = "^4.11"
2829
neo4j = { version="*", optional = true }
@@ -91,6 +92,7 @@ renderer = ["linkml_renderer"]
9192
fastapi = ["fastapi", "uvicorn"]
9293
frictionless = ["frictionless"]
9394
scipy = ["scipy", "scikit-learn"]
95+
rdf = ["lightrdf"]
9496
#ibis = ["ibis-framework", "multipledispatch", "gcsfs"]
9597
bigquery = ["google-cloud-bigquery"]
9698
all = ["llm", "mongodb", "neo4j", "validation", "map", "renderer", "bigquery"]

src/linkml_store/api/collection.py

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -454,7 +454,12 @@ def get_one(self, id: IDENTIFIER, **kwargs) -> Optional[OBJECT]:
454454
return qr.rows[0]
455455
return None
456456

457-
def find(self, where: Optional[Any] = None, **kwargs) -> QueryResult:
457+
def find(
458+
self,
459+
where: Optional[Any] = None,
460+
select_cols: Optional[List[str] ] = None,
461+
**kwargs,
462+
) -> QueryResult:
458463
"""
459464
Find objects in the collection using a where query.
460465
@@ -484,10 +489,14 @@ def find(self, where: Optional[Any] = None, **kwargs) -> QueryResult:
484489
485490
486491
:param where:
492+
:param select_cols:
487493
:param kwargs:
488494
:return:
489495
"""
490-
query = self._create_query(where_clause=where)
496+
query = self._create_query(
497+
where_clause=where,
498+
select_cols=select_cols,
499+
)
491500
self._pre_query_hook(query)
492501
return self.query(query, **kwargs)
493502

@@ -608,6 +617,47 @@ def row2array(row):
608617
new_qr.rows = [r[1] for r in results]
609618
return new_qr
610619

620+
def group_by(
621+
self,
622+
group_by_fields: List[str],
623+
inlined_field = "objects",
624+
agg_map: Optional[Dict[str, str]] = None,
625+
where: Optional[Dict] = None,
626+
**kwargs,
627+
) -> QueryResult:
628+
"""
629+
Group objects in the collection by a column.
630+
631+
:param group_by:
632+
:param where:
633+
:param kwargs:
634+
:return:
635+
"""
636+
if isinstance(group_by_fields, str):
637+
group_by_fields = [group_by_fields]
638+
df = self.find(where=where, limit=-1).rows_dataframe
639+
pk_fields = agg_map.get("first", []) + group_by_fields
640+
list_fields = agg_map.get("list", [])
641+
if not list_fields:
642+
list_fields = [a for a in df.columns if a not in pk_fields]
643+
644+
grouped_objs = defaultdict(list)
645+
for _, row in df.iterrows():
646+
pk = tuple(row[pk_fields])
647+
grouped_objs[pk].append({k: row[k] for k in list_fields})
648+
results = []
649+
for pk, objs in grouped_objs.items():
650+
top_obj = {k: v for k, v in zip(pk_fields, pk)}
651+
top_obj[inlined_field] = objs
652+
results.append(top_obj)
653+
r = QueryResult(
654+
num_rows=len(results),
655+
rows=results
656+
)
657+
return r
658+
659+
660+
611661
@property
612662
def is_internal(self) -> bool:
613663
"""
@@ -1062,7 +1112,7 @@ def induce_class_definition_from_objects(
10621112
multivalued = any(multivalueds)
10631113
inlined = any(inlineds)
10641114
if multivalued and False in multivalueds:
1065-
raise ValueError(f"Mixed list non list: {vs} // inferred= {multivalueds}")
1115+
logger.info(f"Mixed list non list: {vs} // inferred= {multivalueds}")
10661116
# if not rngs:
10671117
# raise AssertionError(f"Empty rngs for {k} = {vs}")
10681118
rng = rngs[0] if rngs else None

src/linkml_store/api/stores/duckdb/duckdb_collection.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import logging
2-
from typing import Any, Dict, List, Optional, Union
2+
from typing import Any, Dict, List, Optional, Union, Tuple
33

44
import sqlalchemy as sqla
55
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
@@ -94,7 +94,9 @@ def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True,
9494

9595
def query_facets(
9696
self, where: Dict = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
97-
) -> Dict[str, Dict[str, int]]:
97+
) -> Dict[Union[str, Tuple[str, ...]], List[Tuple[Any, int]]]:
98+
if facet_limit is None:
99+
facet_limit = DEFAULT_FACET_LIMIT
98100
results = {}
99101
cd = self.class_definition()
100102
with self.parent.engine.connect() as conn:

src/linkml_store/api/stores/mongodb/mongodb_collection.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,8 @@ def query_facets(
164164
facet_limit=DEFAULT_FACET_LIMIT,
165165
**kwargs,
166166
) -> Dict[Union[str, Tuple[str, ...]], List[Tuple[Any, int]]]:
167+
if facet_limit is None:
168+
facet_limit = DEFAULT_FACET_LIMIT
167169
results = {}
168170
if not facet_columns:
169171
facet_columns = list(self.class_definition().attributes.keys())

src/linkml_store/api/stores/mongodb/mongodb_database.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ def _db_name(self) -> str:
4141
if self.handle:
4242
parsed_url = urlparse(self.handle)
4343
path_parts = parsed_url.path.lstrip("/").split("?")[0].split("/")
44-
print(path_parts)
4544
db_name = path_parts[0] if path_parts else "default"
4645
else:
4746
db_name = "default"

src/linkml_store/api/stores/solr/solr_collection.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,18 @@ def query(self, query: Query, **kwargs) -> QueryResult:
6262
return QueryResult(query=query, num_rows=num_rows, rows=rows)
6363

6464
def query_facets(
65-
self, where: Optional[Dict] = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
65+
self,
66+
where: Optional[Dict] = None,
67+
facet_columns: List[str] = None,
68+
facet_limit=DEFAULT_FACET_LIMIT,
69+
facet_min_count: int = 1,
70+
**kwargs
6671
) -> Dict[str, Dict[str, int]]:
6772
solr_query = self._build_solr_query(where)
6873
solr_query["facet"] = "true"
6974
solr_query["facet.field"] = facet_columns
7075
solr_query["facet.limit"] = facet_limit
76+
solr_query["facet.mincount"] = facet_min_count
7177

7278
logger.info(f"Querying Solr collection {self.alias} for facets with query: {solr_query}")
7379

0 commit comments

Comments
 (0)