Skip to content

Commit 48c6a08

Browse files
authored
Merge pull request #21 from linkml/web-api
web api
2 parents a5ce8d0 + 01addac commit 48c6a08

File tree

17 files changed

+1051
-178
lines changed

17 files changed

+1051
-178
lines changed

docs/how-to/Index-caDSR.ipynb

Lines changed: 76 additions & 39 deletions
Large diffs are not rendered by default.

docs/manual/data-model.ipynb

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"source": [
6+
"# Data Model\n",
7+
"\n",
8+
"The LinkML-Store data model is based around a three-level structure:\n",
9+
"\n",
10+
" * A `Client` "
11+
],
12+
"metadata": {
13+
"collapsed": false
14+
},
15+
"id": "d3371bb475f6fe4a"
16+
}
17+
],
18+
"metadata": {
19+
"kernelspec": {
20+
"display_name": "Python 3",
21+
"language": "python",
22+
"name": "python3"
23+
},
24+
"language_info": {
25+
"codemirror_mode": {
26+
"name": "ipython",
27+
"version": 2
28+
},
29+
"file_extension": ".py",
30+
"mimetype": "text/x-python",
31+
"name": "python",
32+
"nbconvert_exporter": "python",
33+
"pygments_lexer": "ipython2",
34+
"version": "2.7.6"
35+
}
36+
},
37+
"nbformat": 4,
38+
"nbformat_minor": 5
39+
}

src/linkml_data_browser/app.py

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,28 @@
11
import logging
2+
import os
23
from typing import Any, Dict
34

45
import numpy as np
56
import pandas as pd
67
import streamlit as st
8+
import yaml
79
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
10+
from linkml_store import Client
811
from linkml_store.api import Collection
9-
from linkml_store.api.stores.duckdb.duckdb_database import DuckDBDatabase
12+
from linkml_store.api.queries import QueryResult
1013

1114
logger = logging.getLogger(__name__)
1215

1316
# Set page config to make layout "wide" by default
1417
st.set_page_config(layout="wide")
1518

19+
config = None
20+
if os.environ.get("LINKML_STORE_CONFIG"):
21+
with open(os.environ["LINKML_STORE_CONFIG"], "r") as f:
22+
config = yaml.safe_load(f)
23+
24+
# Initialize client
25+
client = Client().from_config(config) if config else Client()
1626

1727
DEFAULT_LIMIT = 25
1828

@@ -33,14 +43,16 @@ def init_reset_filters(cd: ClassDefinition, reset=False):
3343
st.session_state[key] = "" # Assuming text input, adjust for other types
3444

3545

36-
def apply_filters(collection: Collection, filters: Dict[str, Any], offset: int, limit: int, **kwargs):
37-
print(f"FILTERS={filters}")
38-
return collection.find(filters, offset=offset, limit=limit, **kwargs)
46+
def apply_filters(collection: Collection, filters: Dict[str, Any], offset: int, limit: int, **kwargs) -> QueryResult:
47+
print(f"FILTERS={filters} // offset={offset}")
48+
qr = collection.find(filters, offset=offset, limit=limit, **kwargs)
49+
print(f"QR={qr.num_rows}")
50+
return qr
3951

4052

4153
def render_filter_widget(collection: Collection, attribute: SlotDefinition):
4254
"""Render appropriate Streamlit widget based on column type."""
43-
logger.info("Rendering filter widget")
55+
logger.info(f"Rendering filter widget: {attribute.name}")
4456
# print(f"{attribute.name} // RANGE={attribute.range}")
4557
# col_type = attribute.range
4658
col_name = attribute.name
@@ -72,24 +84,30 @@ def render_filter_widget(collection: Collection, attribute: SlotDefinition):
7284
# Main function to render the app
7385
def main():
7486
st.title("LinkML Table Browser")
75-
selected_db = st.selectbox("Select a Database", list(DBS.keys()), key="db_selector")
87+
db_names = list(client.databases.keys())
88+
selected_db = st.selectbox("Select a Database", db_names, key="db_selector")
7689
print(f"DB SELECTED={selected_db}")
7790
# con = duckdb.connect(DB_PATH.format(db=selected_db))
78-
db_name = DB_PATH.format(db=selected_db)
79-
database = DuckDBDatabase(f"duckdb:///{db_name}")
91+
# db_name = DB_PATH.format(db=selected_db)
92+
# database = DuckDBDatabase(f"duckdb:///{db_name}")
93+
database = client.get_database(selected_db)
8094
st.write(f"Connected to {selected_db}")
81-
candidate_tables = DBS.get(selected_db)
95+
candidate_tables = database.list_collection_names()
96+
print(f"COLLECtiONS={candidate_tables}")
8297
if len(candidate_tables) > 1:
8398
curr_table = st.selectbox("Select a Table", candidate_tables, key="table_selector")
8499
else:
85-
curr_table = DBS.get(selected_db)[0]
100+
curr_table = candidate_tables[0]
86101
collection = database.get_collection(curr_table)
102+
print(f"CURR={collection.alias} // {collection.target_class_name}")
87103
cd = collection.class_definition()
104+
print(f"CD={cd.name} // {len(cd.attributes)}")
88105
filters = {}
89106

90107
# Pagination setup
91108
session_state = st.session_state
92109
if "current_page" not in session_state:
110+
print(f"RESETTING CP// {session_state}")
93111
session_state.current_page = 0 # Start with page 0
94112
rows_per_page = DEFAULT_LIMIT
95113

@@ -105,8 +123,8 @@ def main():
105123
if filter_widget is not None and filter_widget != "":
106124
filters[att_name] = filter_widget
107125
new_value = filters.get(att_name)
108-
if prev_value != new_value:
109-
# print(f"CHANGE FOR {att_name}: {prev_value} -> {new_value}")
126+
if prev_value != new_value and not (not prev_value and not new_value):
127+
print(f"CHANGE FOR {att_name}: {prev_value} -> {new_value}")
110128
filter_changed = True
111129
# st.session_state[key] = new_value
112130
facet_key = f"facet_view_{att_name}"
@@ -116,13 +134,15 @@ def main():
116134
st.sidebar.write(facet_df)
117135
# If any filter has changed, reset pagination
118136
if filter_changed:
137+
print(f"FILTER CHANGED={filter_changed}")
119138
st.session_state.current_page = 0 # Reset offset
120139
result = apply_filters(collection, filters, session_state.current_page * rows_per_page, rows_per_page)
121140
# if filter_changed:
122141
# facet_results = collection.query_facets(filters, facet_columns=["evidence_type"])
123142
# print(f"FACET={facet_results}")
124143
st.write(f"Number of rows: {result.num_rows}")
125144
st.write(f"Page: {session_state.current_page + 1}")
145+
print(f"SESSION STATE: {session_state}")
126146
filtered_data = pd.DataFrame(result.rows)
127147

128148
# Pagination buttons
@@ -133,6 +153,7 @@ def main():
133153
if session_state.current_page > 0:
134154
session_state.current_page -= 1
135155
if next_button.button("Next"):
156+
print(f"NEXT: CP={session_state.current_page} RPP={rows_per_page} NR={result.num_rows}")
136157
# Assuming result.num_rows gives the total number of rows after filtering, not just this page's rows
137158
if (session_state.current_page + 1) * rows_per_page < result.num_rows:
138159
session_state.current_page += 1

src/linkml_store/api/collection.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,10 @@ def get(self, ids: Optional[List[IDENTIFIER]], **kwargs) -> QueryResult:
346346
id_field = self.identifier_attribute_name
347347
if not id_field:
348348
raise ValueError(f"No identifier for {self.name}")
349-
return self.find({id_field: ids})
349+
if len(ids) == 1:
350+
return self.find({id_field: ids[0]})
351+
else:
352+
return self.find({id_field: {"$in": ids}})
350353

351354
def get_one(self, id: IDENTIFIER, **kwargs) -> Optional[OBJECT]:
352355
"""
@@ -518,7 +521,7 @@ def exists(self) -> Optional[bool]:
518521
:return:
519522
"""
520523
cd = self.class_definition()
521-
return cd is not None
524+
return cd is not None and cd.attributes
522525

523526
def load_from_source(self, load_if_exists=False):
524527
"""
@@ -535,11 +538,19 @@ def load_from_source(self, load_if_exists=False):
535538
kwargs = source.arguments or {}
536539
if source.local_path:
537540
objects = load_objects(
538-
metadata.source.local_path, format=source.format, expected_type=source.expected_type, **kwargs
541+
metadata.source.local_path,
542+
format=source.format,
543+
expected_type=source.expected_type,
544+
compression=source.compression,
545+
**kwargs,
539546
)
540547
elif metadata.source.url:
541548
objects = load_objects_from_url(
542-
metadata.source.url, format=source.format, expected_type=source.expected_type, **kwargs
549+
metadata.source.url,
550+
format=source.format,
551+
expected_type=source.expected_type,
552+
compression=source.compression,
553+
**kwargs,
543554
)
544555
self.insert(objects)
545556

@@ -746,6 +757,7 @@ def class_definition(self) -> Optional[ClassDefinition]:
746757
sv: SchemaView = self.parent.schema_view
747758
if sv:
748759
cls = sv.get_class(self.target_class_name)
760+
# cls = sv.schema.classes[self.target_class_name]
749761
if cls and not cls.attributes:
750762
if not sv.class_induced_slots(cls.name):
751763
for att in self._induce_attributes():
@@ -868,7 +880,7 @@ def induce_class_definition_from_objects(
868880
exact_dimensions_list.append(v.shape)
869881
break
870882
if isinstance(v, list):
871-
v = v[0]
883+
v = v[0] if v else None
872884
multivalueds.append(True)
873885
elif isinstance(v, dict):
874886
v = list(v.values())[0]

src/linkml_store/api/config.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ class CollectionSource(ConfiguredBaseModel):
3333
refresh_interval_days: Optional[float] = None
3434
expected_type: Optional[str] = None
3535
format: Optional[str] = None
36+
compression: Optional[str] = None
3637
arguments: Optional[Dict[str, Any]] = None
3738

3839

@@ -73,11 +74,11 @@ class CollectionConfig(ConfiguredBaseModel):
7374
default=None,
7475
description="Metadata about the source",
7576
)
76-
# TODO: derived_from
7777
derived_from: Optional[List[DerivationConfiguration]] = Field(
7878
default=None,
7979
description="LinkML-Map derivations",
8080
)
81+
page_size: Optional[int] = Field(default=None, description="Suggested page size (items per page) in apps and APIs")
8182

8283

8384
class DatabaseConfig(ConfiguredBaseModel):

src/linkml_store/api/database.py

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
)
2020

2121
from linkml_store.api.types import CollectionType
22-
from linkml_store.utils.format_utils import load_objects, render_output
22+
from linkml_store.utils.format_utils import Format, load_objects, render_output
2323
from linkml_store.utils.patch_utils import PatchDict
2424

2525
try:
@@ -705,19 +705,35 @@ def drop(self, **kwargs):
705705
"""
706706
raise NotImplementedError()
707707

708-
def import_database(self, location: str, source_format: Optional[str] = None, **kwargs):
708+
def import_database(self, location: str, source_format: Optional[Union[str, Format]] = None, **kwargs):
709709
"""
710710
Import a database from a file or location.
711711
712712
:param location: location of the file
713713
:param source_format: source format
714714
:param kwargs: additional arguments
715715
"""
716+
if isinstance(source_format, str):
717+
source_format = Format(source_format)
718+
if isinstance(source_format, Format):
719+
if source_format.is_dump_format() and source_format in [Format.SQLDUMP_DUCKDB, Format.DUMP_MONGODB]:
720+
# import into a test instance
721+
tmp_handle = source_format.value
722+
client = self.parent
723+
tmp_db = client.attach_database(tmp_handle, alias="tmp")
724+
# TODO: check for infinite recursion
725+
tmp_db.import_database(location, source_format=source_format)
726+
obj = {}
727+
for coll in tmp_db.list_collections():
728+
qr = coll.find({}, limit=-1)
729+
obj[coll.alias] = qr.rows
730+
self.store(obj)
731+
return
716732
objects = load_objects(location, format=source_format)
717733
for obj in objects:
718734
self.store(obj)
719735

720-
def export_database(self, location: str, target_format: Optional[str] = None, **kwargs):
736+
def export_database(self, location: str, target_format: Optional[Union[str, Format]] = None, **kwargs):
721737
"""
722738
Export a database to a file or location.
723739
@@ -726,10 +742,23 @@ def export_database(self, location: str, target_format: Optional[str] = None, **
726742
:param kwargs: additional arguments
727743
"""
728744
obj = {}
745+
if isinstance(target_format, str):
746+
target_format = Format(target_format)
729747
for coll in self.list_collections():
730748
qr = coll.find({}, limit=-1)
731749
obj[coll.alias] = qr.rows
732750
logger.info(f"Exporting object with {len(obj)} collections to {location} in {target_format} format")
751+
if isinstance(target_format, Format):
752+
if target_format.is_dump_format() and target_format in [Format.SQLDUMP_DUCKDB, Format.DUMP_MONGODB]:
753+
tmp_handle = target_format.value
754+
client = self.parent
755+
tmp_db = client.attach_database(tmp_handle, alias="tmp")
756+
tmp_db.store(obj)
757+
# TODO: check for infinite recursion
758+
tmp_db.export_database(location, target_format=target_format)
759+
return
760+
if Path(location).is_dir():
761+
raise ValueError(f"{location} is a directory; cannot write {target_format} to a dir")
733762
with open(location, "w", encoding="utf-8") as stream:
734763
stream.write(render_output(obj, format=target_format))
735764

src/linkml_store/api/stores/duckdb/duckdb_database.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
import json
22
import logging
33
from pathlib import Path
4-
from typing import Optional
4+
from typing import Optional, Union
55

66
import pandas as pd
77
import sqlalchemy
8-
from duckdb import DuckDBPyConnection
98
from linkml_runtime import SchemaView
109
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
1110
from linkml_runtime.utils.schema_builder import SchemaBuilder
@@ -14,6 +13,7 @@
1413
from linkml_store.api import Database
1514
from linkml_store.api.queries import Query, QueryResult
1615
from linkml_store.api.stores.duckdb.duckdb_collection import DuckDBCollection
16+
from linkml_store.utils.format_utils import Format
1717
from linkml_store.utils.sql_utils import introspect_schema, query_to_sql
1818

1919
TYPE_MAP = {
@@ -45,7 +45,7 @@ class DuckDBDatabase(Database):
4545
types are used for nested inlined objects.
4646
"""
4747

48-
_connection: DuckDBPyConnection = None
48+
# _connection: DuckDBPyConnection = None
4949
_engine: sqlalchemy.Engine = None
5050
collection_class = DuckDBCollection
5151

@@ -202,3 +202,31 @@ def induce_schema_view(self) -> SchemaView:
202202
cls = ClassDefinition(name=collection_metadata.type, attributes=collection_metadata.attributes)
203203
schema.classes[cls.name] = cls
204204
return SchemaView(schema)
205+
206+
def export_database(self, location: str, target_format: Optional[Union[str, Format]] = None, **kwargs):
207+
if target_format == "duckdb" or target_format == Format.SQLDUMP_DUCKDB:
208+
path = Path(location)
209+
if path.exists():
210+
if path.is_file():
211+
path.unlink()
212+
with self.engine.connect() as conn:
213+
sql = text(f"EXPORT DATABASE '{location}'")
214+
conn.execute(sql)
215+
else:
216+
super().export_database(location, target_format=target_format, **kwargs)
217+
218+
def import_database(self, location: str, source_format: Optional[str] = None, **kwargs):
219+
"""
220+
Import a database from a file or location.
221+
222+
:param location: location of the file
223+
:param source_format: source format
224+
:param kwargs: additional arguments
225+
"""
226+
if source_format == Format.SQLDUMP_DUCKDB.value or source_format == Format.SQLDUMP_DUCKDB:
227+
with self.engine.connect() as conn:
228+
sql = text(f"IMPORT DATABASE '{location}'")
229+
conn.execute(sql)
230+
conn.commit()
231+
else:
232+
super().import_database(location, source_format=source_format, **kwargs)

0 commit comments

Comments
 (0)