Skip to content

Commit 653a6d9

Browse files
authored
Merge pull request #189 from lsst-sqre/tickets/DM-51522
DM-51522: Initial implementation of bibliography API
2 parents 26d9b8a + 600dc27 commit 653a6d9

38 files changed

+4492
-271
lines changed

.github/workflows/ci.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name: CI
22

33
env:
4-
UV_VERSION: "0.7.20"
4+
UV_VERSION: "0.7.22"
55

66
"on":
77
merge_group: {}

.github/workflows/periodic-ci.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
name: Periodic CI
77

88
env:
9-
UV_VERSION: "0.7.20"
9+
UV_VERSION: "0.7.22"
1010

1111
"on":
1212
schedule:

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,12 @@ repos:
1111
args: [--autofix, --indent=2, '--top-keys=name,doc,type']
1212

1313
- repo: https://github.com/astral-sh/uv-pre-commit
14-
rev: 0.7.20
14+
rev: 0.7.21
1515
hooks:
1616
- id: uv-lock
1717

1818
- repo: https://github.com/astral-sh/ruff-pre-commit
19-
rev: v0.12.3
19+
rev: v0.12.4
2020
hooks:
2121
- id: ruff-check
2222
args: [--fix, --exit-non-zero-on-fix]

CHANGELOG.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,30 @@
22

33
<!-- scriv-insert-here -->
44

5+
<a id='changelog-0.18.0'></a>
6+
7+
## 0.18.0 (2025-07-29)
8+
9+
### Backwards-incompatible changes
10+
11+
- This release requires a database migration to add new tables for the resources API: `113ced7d2d29` to `1ad667eab84e`.
12+
13+
### New features
14+
15+
- Ook now has a bibliographic resource API for storing metadata records about Rubin Observatory documentation (technical notes, documents, user guides), software code bases, and other resources:
16+
17+
- Core data model designed to be compatible with DataCite concepts for straightforward integration with DataCite DOI registration.
18+
19+
- Polymorphic resource model allows different types of resources (documents, software, datasets) to be stored efficiently. This release demonstrates this model with a `Document` resource type.
20+
21+
- Support for relationships between records and external references (such as papers with DOIs). Relationships are annotated with DataCite relationship types to enable features such as reference tracking and tracing documents that supersede other documents.
22+
23+
- Integration with the existing author API for both author lists and tracking other types of contributors.
24+
25+
- Resources are available through `GET /resources` and `GET /resources/{id}` endpoints. These endpoints should be considered experimental and subject to change in future releases.
26+
27+
This bibliographic API will enable features such as sophisticated documentation search APIs and user interfaces, automation for DOI registration, and more. Future releases will integrate Ook's existing documentation ingest processes with the bibliographic database and develop API endpoints for querying and managing bibliographic resources.
28+
529
<a id='changelog-0.17.0'></a>
630

731
## 0.17.0 (2025-07-15)

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
2222
FROM base-image AS install-image
2323

2424
# Install uv.
25-
COPY --from=ghcr.io/astral-sh/uv:0.7.20 /uv /bin/uv
25+
COPY --from=ghcr.io/astral-sh/uv:0.7.22 /uv /bin/uv
2626

2727
# Install system packages only needed for building dependencies.
2828
COPY scripts/install-dependency-packages.sh .
Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
"""Add resource tables
2+
3+
Revision ID: 1ad667eab84e
4+
Revises: 113ced7d2d29
5+
Create Date: 2025-07-18 20:48:07.574879+00:00
6+
"""
7+
8+
from collections.abc import Sequence
9+
10+
import sqlalchemy as sa
11+
12+
from alembic import op
13+
14+
# revision identifiers, used by Alembic.
15+
revision: str = "1ad667eab84e"
16+
down_revision: str | None = "113ced7d2d29"
17+
branch_labels: str | Sequence[str] | None = None
18+
depends_on: str | Sequence[str] | None = None
19+
20+
21+
def upgrade() -> None:
22+
op.create_table(
23+
"external_reference",
24+
sa.Column("id", sa.BigInteger(), nullable=False),
25+
sa.Column("url", sa.UnicodeText(), nullable=True),
26+
sa.Column("doi", sa.UnicodeText(), nullable=True),
27+
sa.Column("arxiv_id", sa.UnicodeText(), nullable=True),
28+
sa.Column("isbn", sa.UnicodeText(), nullable=True),
29+
sa.Column("issn", sa.UnicodeText(), nullable=True),
30+
sa.Column("ads_bibcode", sa.UnicodeText(), nullable=True),
31+
sa.Column("type", sa.UnicodeText(), nullable=True),
32+
sa.Column("title", sa.UnicodeText(), nullable=True),
33+
sa.Column("publication_year", sa.UnicodeText(), nullable=True),
34+
sa.Column("volume", sa.UnicodeText(), nullable=True),
35+
sa.Column("issue", sa.UnicodeText(), nullable=True),
36+
sa.Column("number", sa.UnicodeText(), nullable=True),
37+
sa.Column("number_type", sa.UnicodeText(), nullable=True),
38+
sa.Column("first_page", sa.UnicodeText(), nullable=True),
39+
sa.Column("last_page", sa.UnicodeText(), nullable=True),
40+
sa.Column("publisher", sa.UnicodeText(), nullable=True),
41+
sa.Column("edition", sa.UnicodeText(), nullable=True),
42+
sa.Column("contributors", sa.JSON(), nullable=True),
43+
sa.PrimaryKeyConstraint("id"),
44+
sa.UniqueConstraint("doi"),
45+
sa.UniqueConstraint("arxiv_id"),
46+
sa.UniqueConstraint("isbn"),
47+
sa.UniqueConstraint("issn"),
48+
sa.UniqueConstraint("ads_bibcode"),
49+
)
50+
op.create_table(
51+
"resource",
52+
sa.Column("id", sa.BigInteger(), autoincrement=False, nullable=False),
53+
sa.Column("resource_class", sa.String(), nullable=True),
54+
sa.Column("date_created", sa.DateTime(timezone=True), nullable=False),
55+
sa.Column("date_updated", sa.DateTime(timezone=True), nullable=False),
56+
sa.Column("title", sa.UnicodeText(), nullable=False),
57+
sa.Column("description", sa.UnicodeText(), nullable=True),
58+
sa.Column("url", sa.UnicodeText(), nullable=True),
59+
sa.Column("doi", sa.UnicodeText(), nullable=True),
60+
sa.Column(
61+
"date_resource_published",
62+
sa.DateTime(timezone=True),
63+
nullable=True,
64+
),
65+
sa.Column(
66+
"date_resource_updated", sa.DateTime(timezone=True), nullable=True
67+
),
68+
sa.Column("version", sa.String(), nullable=True),
69+
sa.Column("type", sa.String(), nullable=True),
70+
sa.PrimaryKeyConstraint("id"),
71+
sa.UniqueConstraint("doi"),
72+
)
73+
op.create_table(
74+
"contributor",
75+
sa.Column("id", sa.BigInteger(), autoincrement=True, nullable=False),
76+
sa.Column("resource_id", sa.BigInteger(), nullable=False),
77+
sa.Column("order", sa.Integer(), nullable=False),
78+
sa.Column("role", sa.UnicodeText(), nullable=False),
79+
sa.Column("author_id", sa.BigInteger(), nullable=True),
80+
sa.ForeignKeyConstraint(
81+
["author_id"],
82+
["author.id"],
83+
),
84+
sa.ForeignKeyConstraint(
85+
["resource_id"],
86+
["resource.id"],
87+
),
88+
sa.PrimaryKeyConstraint("id"),
89+
sa.UniqueConstraint(
90+
"resource_id",
91+
"order",
92+
"role",
93+
name="uq_contributor_resource_order_role",
94+
),
95+
)
96+
op.create_table(
97+
"document_resource",
98+
sa.Column("id", sa.BigInteger(), nullable=False),
99+
sa.Column("series", sa.UnicodeText(), nullable=False),
100+
sa.Column("handle", sa.UnicodeText(), nullable=False),
101+
sa.Column("generator", sa.UnicodeText(), nullable=True),
102+
sa.Column("number", sa.Integer(), nullable=False),
103+
sa.ForeignKeyConstraint(
104+
["id"],
105+
["resource.id"],
106+
),
107+
sa.PrimaryKeyConstraint("id"),
108+
sa.UniqueConstraint("handle"),
109+
sa.UniqueConstraint(
110+
"series", "number", name="uq_document_series_number"
111+
),
112+
)
113+
op.create_table(
114+
"resource_relation",
115+
sa.Column("id", sa.BigInteger(), autoincrement=True, nullable=False),
116+
sa.Column("source_resource_id", sa.BigInteger(), nullable=False),
117+
sa.Column("related_resource_id", sa.BigInteger(), nullable=True),
118+
sa.Column("related_external_ref_id", sa.BigInteger(), nullable=True),
119+
sa.Column("relation_type", sa.UnicodeText(), nullable=False),
120+
sa.ForeignKeyConstraint(
121+
["related_external_ref_id"],
122+
["external_reference.id"],
123+
),
124+
sa.ForeignKeyConstraint(
125+
["related_resource_id"],
126+
["resource.id"],
127+
),
128+
sa.ForeignKeyConstraint(
129+
["source_resource_id"],
130+
["resource.id"],
131+
),
132+
sa.PrimaryKeyConstraint("id"),
133+
sa.CheckConstraint(
134+
"(related_resource_id IS NOT NULL AND "
135+
"related_external_ref_id IS NULL) OR "
136+
"(related_resource_id IS NULL AND "
137+
"related_external_ref_id IS NOT NULL)",
138+
name="chk_exactly_one_related",
139+
),
140+
sa.UniqueConstraint(
141+
"source_resource_id",
142+
"related_resource_id",
143+
"related_external_ref_id",
144+
"relation_type",
145+
name="uq_resource_relation",
146+
),
147+
)
148+
149+
# Add indexes for query optimization
150+
op.create_index(
151+
"idx_resource_class", "resource", ["resource_class"], unique=False
152+
)
153+
op.create_index(
154+
"idx_resource_date_published",
155+
"resource",
156+
["date_resource_published"],
157+
unique=False,
158+
)
159+
op.create_index(
160+
"idx_resource_date_updated",
161+
"resource",
162+
["date_resource_updated"],
163+
unique=False,
164+
)
165+
op.create_index(
166+
"idx_resource_relation_source",
167+
"resource_relation",
168+
["source_resource_id"],
169+
unique=False,
170+
)
171+
op.create_index(
172+
"idx_resource_relation_type",
173+
"resource_relation",
174+
["relation_type"],
175+
unique=False,
176+
)
177+
op.create_index(
178+
"idx_document_series_number",
179+
"document_resource",
180+
["series", "number"],
181+
unique=False,
182+
)
183+
184+
185+
def downgrade() -> None:
186+
# Drop indexes first
187+
op.drop_index("idx_document_series_number", table_name="document_resource")
188+
op.drop_index("idx_resource_relation_type", table_name="resource_relation")
189+
op.drop_index(
190+
"idx_resource_relation_source", table_name="resource_relation"
191+
)
192+
op.drop_index("idx_resource_date_updated", table_name="resource")
193+
op.drop_index("idx_resource_date_published", table_name="resource")
194+
op.drop_index("idx_resource_class", table_name="resource")
195+
196+
# Drop tables
197+
op.drop_table("resource_relation")
198+
op.drop_table("document_resource")
199+
op.drop_table("contributor")
200+
op.drop_table("resource")
201+
op.drop_table("external_reference")

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ dependencies = [
3939
"aiohttp>=2.0,<4.0", # for algoliasearch
4040
"async_timeout>=2.0,<4.0", # for algoliasearch
4141
"pylatexenc>=2.10",
42+
"base32-lib>=1.0.2",
4243
]
4344
dynamic = ["version"]
4445

src/ook/dbschema/__init__.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,26 @@
44
from .base import Base
55
from .glossary import SqlTerm, term_relationships
66
from .links import SqlLink, SqlSdmColumnLink, SqlSdmSchemaLink, SqlSdmTableLink
7+
from .resources import (
8+
SqlContributor,
9+
SqlDocumentResource,
10+
SqlExternalReference,
11+
SqlResource,
12+
SqlResourceRelation,
13+
)
714
from .sdmschemas import SqlSdmColumn, SqlSdmSchema, SqlSdmTable
815

916
__all__ = [
1017
"Base",
1118
"SqlAffiliation",
1219
"SqlAuthor",
1320
"SqlAuthorAffiliation",
21+
"SqlContributor",
22+
"SqlDocumentResource",
23+
"SqlExternalReference",
1424
"SqlLink",
25+
"SqlResource",
26+
"SqlResourceRelation",
1527
"SqlSdmColumn",
1628
"SqlSdmColumnLink",
1729
"SqlSdmSchema",

0 commit comments

Comments
 (0)