Skip to content

Commit acff573

Browse files
committed
experimental search
1 parent 79f2d1a commit acff573

File tree

8 files changed

+430
-185
lines changed

8 files changed

+430
-185
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Changes
22

33
## June 12, 2025
4+
- Added dandiset counts next to contact person names in experimental search panel
45
- Added dandi-index and implemented advanced search
56
- Added .env.local template for configuring PubNub keys in local development
67

python/dandi-index/dandi-index-query-job-runner/src/jobRunner.ts

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ type PubNubMessageObject = {
1616
status: 'accepted' | 'rejected' | 'completed' | 'error';
1717
error?: string;
1818
output?: string;
19+
part?: number;
20+
totalParts?: number;
1921
} | {
2022
type: 'probe-response';
2123
status: 'alive';
@@ -130,16 +132,24 @@ export class JobRunner {
130132

131133
try {
132134
console.info(`Starting execution of job ${jobMessage.jobId}`);
133-
let result = await this.executeJob(jobMessage);
135+
const result = await this.executeJob(jobMessage);
134136
console.info(`Job ${jobMessage.jobId} completed successfully`);
135-
if (result.length > 10000) {
136-
result = `${result.slice(0, 10000)}... [output truncated]`;
137+
if (result.length > 1000_000) {
138+
throw new Error(`Job ${jobMessage.jobId} output is too large (${result.length} characters)`);
139+
}
140+
const parts = Math.max(1, Math.ceil(result.length / 10000));
141+
for (let i = 0; i < parts; i++) {
142+
const start = i * 10000;
143+
const end = Math.min((i + 1) * 10000, result.length);
144+
const partResult = result.slice(start, end);
145+
await this.sendResponse({
146+
jobId: jobMessage.jobId,
147+
status: 'completed',
148+
output: partResult,
149+
part: i + 1,
150+
totalParts: parts
151+
});
137152
}
138-
await this.sendResponse({
139-
jobId: jobMessage.jobId,
140-
status: 'completed',
141-
output: result
142-
});
143153
} catch (error) {
144154
console.info(`Job ${jobMessage.jobId} failed:`, error);
145155
await this.sendResponse({

python/dandi-index/dandi-index-query-job-runner/src/scriptInterface.ts

Lines changed: 144 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,45 @@ export interface ScriptInterface {
1414
getDandiset: (o: {dandisetId: string}) => Promise<DandisetInfo | undefined>;
1515
}
1616

17+
// const _findDandisets = async (o: {search?: string}): Promise<DandisetInfo[]> => {
18+
// const params: FetchDandisetsParams = {};
19+
// if (o.search) {
20+
// params.search = o.search;
21+
// }
22+
// const vvv = '11';
23+
// const r = await getCachedResult<DandisetInfo[]>(
24+
// 'findDandisets',
25+
// vvv,
26+
// [params.search || ''],
27+
// 60 // Cache for 60 minutes
28+
// )
29+
// if (r) return r;
30+
// const result = await fetchDandisetsFromApi(params.search || '');
31+
// await setCachedResult('findDandisets', vvv, [params.search || ''], result);
32+
// return result;
33+
// }
34+
35+
// const _findNwbFiles = async (o: {dandisetId: string, version: string}): Promise<NwbFileInfo[]> => {
36+
// const vvv = '11';
37+
// const r = await getCachedResult<NwbFileInfo[]>(
38+
// 'findNwbFiles',
39+
// vvv,
40+
// [o.dandisetId, o.version],
41+
// o.version !== 'draft' ? 60 * 24 * 100 : 60 // Cache for 100 days if not draft, otherwise 60 minutes
42+
// );
43+
// if (r) return r;
44+
// const result = await fetchNwbFilesFromApi(o.dandisetId, o.version);
45+
// await setCachedResult('findNwbFiles', vvv, [o.dandisetId, o.version], result);
46+
// return result;
47+
// }o: {search?: string}
48+
1749
class DandiInterface {
1850
constructor(public o: {onStatusUpdate: (status: string) => void}) {
1951
this.o.onStatusUpdate('Dandi Interface initialized');
2052
}
2153
async getDandiset(o: {dandisetId: string}): Promise<DandiInterfaceDandiset | undefined> {
2254
this.o.onStatusUpdate(`Getting dandiset: ${o.dandisetId}`);
23-
const fname = '../data/dandi.json';
55+
const fname = `${baseDir}/dandi.json`;
2456
if (!fs.existsSync(fname)) {
2557
throw new Error(`Dandiset data file not found: ${fname}`);
2658
}
@@ -49,7 +81,7 @@ class DandiInterface {
4981
}
5082
async getDandisets(): Promise<DandiInterfaceDandiset[]> {
5183
this.o.onStatusUpdate('Getting dandisets...');
52-
const fname = '../data/dandi.json';
84+
const fname = `${baseDir}/dandi.json`;
5385
if (!fs.existsSync(fname)) {
5486
throw new Error(`Dandiset data file not found: ${fname}`);
5587
}
@@ -74,11 +106,11 @@ class DandiInterface {
74106
ds.star_count
75107
));
76108
}
77-
async findDandisets(o: {search?: string, /*semanticSearch?: string, */restrictToDandisets?: string[]}): Promise<DandiInterfaceDandiset[]> {
109+
async findDandisets(o: {search?: string, semanticSearch?: string, restrictToDandisets?: string[]}): Promise<DandiInterfaceDandiset[]> {
78110
if (o.search) {
79-
// if (o.semanticSearch) {
80-
// throw new Error('Cannot use both search and semanticSearch at the same time');
81-
// }
111+
if (o.semanticSearch) {
112+
throw new Error('Cannot use both search and semanticSearch at the same time');
113+
}
82114
if (o.restrictToDandisets) {
83115
throw new Error('Cannot use restrictToDandisets with search');
84116
}
@@ -98,21 +130,20 @@ class DandiInterface {
98130
ds.star_count
99131
));
100132
}
101-
// else if (o.semanticSearch) {
102-
// this.o.onStatusUpdate(`Performing semantic search for: ${o.semanticSearch}`);
103-
// const dandisetIds = await doDandisetSemanticSearch(o.semanticSearch, o.restrictToDandisets);
104-
// const result: DandiInterfaceDandiset[] = [];
105-
// for (const dandisetId of dandisetIds) {
106-
// const dandiset = await this.getDandiset({dandisetId});
107-
// if (dandiset) {
108-
// result.push(dandiset);
109-
// }
110-
// }
111-
// return result;
112-
// }
133+
else if (o.semanticSearch) {
134+
this.o.onStatusUpdate(`Performing semantic search for: ${o.semanticSearch}`);
135+
const dandisetIds = await doDandisetSemanticSearch(o.semanticSearch, o.restrictToDandisets);
136+
const result: DandiInterfaceDandiset[] = [];
137+
for (const dandisetId of dandisetIds) {
138+
const dandiset = await this.getDandiset({dandisetId});
139+
if (dandiset) {
140+
result.push(dandiset);
141+
}
142+
}
143+
return result;
144+
}
113145
else {
114-
// throw new Error('Either search or semanticSearch must be provided');
115-
throw new Error('No search criteria provided');
146+
throw new Error('Either search or semanticSearch must be provided');
116147
}
117148
}
118149
}
@@ -184,6 +215,8 @@ type DandisetMetadata = {
184215
manifestLocation: string[];
185216
}
186217

218+
const baseDir = "../data";
219+
187220
class DandiInterfaceDandiset {
188221
constructor(
189222
public dandiInterface: DandiInterface,
@@ -199,8 +232,8 @@ class DandiInterfaceDandiset {
199232
public star_count: number
200233
) {
201234
}
202-
async getNwbFiles(): Promise<DandiInterfaceNwbFile[]> {
203-
const dandisetFname = `../data/dandisets/${this.dandiset_id}/dandiset.json`;
235+
get nwbFiles(): DandiInterfaceNwbFile[] {
236+
const dandisetFname = `${baseDir}/dandisets/${this.dandiset_id}/dandiset.json`;
204237
if (!fs.existsSync(dandisetFname)) {
205238
throw new Error(`Dandiset file not found: ${dandisetFname}`);
206239
}
@@ -238,8 +271,8 @@ class DandiInterfaceDandiset {
238271
nwb.asset_id
239272
));
240273
}
241-
async getDandisetMetadata(): Promise<DandisetMetadata> {
242-
const dandisetFname = `../data/dandisets/${this.dandiset_id}/dandiset.json`;
274+
dandisetMetadata(): DandisetMetadata {
275+
const dandisetFname = `${baseDir}/dandisets/${this.dandiset_id}/dandiset.json`;
243276
if (!fs.existsSync(dandisetFname)) {
244277
throw new Error(`Dandiset metadata file not found: ${dandisetFname}`);
245278
}
@@ -274,6 +307,25 @@ class DandiInterfaceDandiset {
274307
}
275308

276309
class DandiInterfaceNwbFile {
310+
private assetData: {
311+
dandiset_id: string;
312+
asset_id: string;
313+
session_description: string;
314+
subject: {
315+
age: string;
316+
sex: string;
317+
genotype: string;
318+
species: string;
319+
subject_id: string;
320+
strain?: string;
321+
specimen_name?: string;
322+
};
323+
neurodata_objects: {
324+
path: string;
325+
type: string;
326+
description: string;
327+
}[];
328+
} | undefined;
277329
constructor(
278330
public dandiInterface: DandiInterface,
279331
public dandiset_id: string,
@@ -283,31 +335,35 @@ class DandiInterfaceNwbFile {
283335
public asset_id: string
284336
) {
285337
}
286-
getNeurodataObjects = async (): Promise<DandiInterfaceNeurodataObject[]> => {
287-
const fname = `../data/dandisets/${this.dandiset_id}/assets.v7/${this.asset_id}.json`;
338+
339+
_loadAssetData = () => {
340+
if (this.assetData) return;
341+
const fname = `${baseDir}/dandisets/${this.dandiset_id}/assets.v7/${this.asset_id}.json`;
288342
if (!fs.existsSync(fname)) {
289-
// maybe it wasn't created yet
290-
return [];
343+
return
291344
}
292345
const fileContent = fs.readFileSync(fname, 'utf8');
293-
let assetData: {
294-
dandiset_id: string;
295-
asset_id: string;
296-
neurodata_objects: {
297-
path: string;
298-
type: string;
299-
description: string;
300-
}[];
301-
}
302346
try {
303-
assetData = JSON.parse(fileContent);
304-
} catch (error) {
347+
this.assetData = JSON.parse(fileContent);
348+
}
349+
catch (error) {
305350
throw new Error(`Failed to parse JSON from ${fname}: ${error}`);
306351
}
307-
if (assetData.dandiset_id !== this.dandiset_id || assetData.asset_id !== this.asset_id) {
308-
throw new Error(`Asset ID or dandiset ID mismatch: expected ${this.dandiset_id}/${this.asset_id}, got ${assetData.dandiset_id}/${assetData.asset_id}`);
352+
if (!this.assetData) {
353+
throw new Error(`Asset data is empty for ${fname}`);
354+
}
355+
if (this.assetData.dandiset_id !== this.dandiset_id || this.assetData.asset_id !== this.asset_id) {
356+
throw new Error(`Asset ID or dandiset ID mismatch: expected ${this.dandiset_id}/${this.asset_id}, got ${this.assetData.dandiset_id}/${this.assetData.asset_id}`);
309357
}
310-
return assetData.neurodata_objects.map(no => new DandiInterfaceNeurodataObject({
358+
}
359+
360+
get neurodataObjects(): DandiInterfaceNeurodataObject[] {
361+
this._loadAssetData();
362+
if (!this.assetData) {
363+
// may not be created yet
364+
return []
365+
}
366+
return this.assetData.neurodata_objects.map(no => new DandiInterfaceNeurodataObject({
311367
dandisetId: this.dandiset_id,
312368
version: this.version,
313369
assetId: this.asset_id,
@@ -316,6 +372,22 @@ class DandiInterfaceNwbFile {
316372
description: no.description
317373
}));
318374
}
375+
get session_description(): string {
376+
this._loadAssetData();
377+
if (!this.assetData) {
378+
// may not be created yet
379+
return '';
380+
}
381+
return this.assetData.session_description || '';
382+
}
383+
get subject() {
384+
this._loadAssetData();
385+
if (!this.assetData) {
386+
// may not be created yet
387+
return {};
388+
}
389+
return this.assetData.subject || {};
390+
}
319391
}
320392

321393
class DandiInterfaceNeurodataObject {
@@ -374,7 +446,7 @@ export function createScriptInterface(onStatusUpdate: (status: string) => void):
374446
getDandisets: async () => {
375447
return await dandiInterface.getDandisets();
376448
},
377-
findDandisets: async (o: {search?: string, /*semanticSearch?: string*/}) => {
449+
findDandisets: async (o: {search?: string, semanticSearch?: string}) => {
378450
return await dandiInterface.findDandisets(o);
379451
},
380452
getDandiset: async (o: {dandisetId: string}) => {
@@ -396,33 +468,33 @@ export interface DandisetInfo {
396468
star_count: number;
397469
}
398470

399-
// const doDandisetSemanticSearch = async (query: string, restrictToDandisets?: string[]): Promise<string[]> => {
400-
// const url = 'https://neurosift-chat-agent-tools.vercel.app/api/dandi_semantic_search';
401-
// const params: {[key: string]: any} = {
402-
// query,
403-
// limit: 20
404-
// }
405-
// if (restrictToDandisets && restrictToDandisets.length > 0) {
406-
// params['dandisets'] = restrictToDandisets;
407-
// }
408-
// // POST
409-
// const response = await fetch(url, {
410-
// method: 'POST',
411-
// headers: {
412-
// 'Content-Type': 'application/json'
413-
// },
414-
// body: JSON.stringify(params)
415-
// });
416-
// if (!response.ok) {
417-
// throw new Error(`Failed to perform semantic search: ${response.statusText}`);
418-
// }
419-
// const data: any = await response.json();
420-
// if (!data) {
421-
// throw new Error('Semantic search returned no data');
422-
// }
423-
// if (typeof data !== 'object' || !Array.isArray(data.results)) {
424-
// throw new Error('Semantic search returned invalid data format');
425-
// }
426-
// return data.results.map((item: {id: string}) => item.id);
427-
// };
471+
const doDandisetSemanticSearch = async (query: string, restrictToDandisets?: string[]): Promise<string[]> => {
472+
const url = 'https://neurosift-chat-agent-tools.vercel.app/api/dandi_semantic_search';
473+
const params: {[key: string]: any} = {
474+
query,
475+
limit: 20
476+
}
477+
if (restrictToDandisets && restrictToDandisets.length > 0) {
478+
params['dandisets'] = restrictToDandisets;
479+
}
480+
// POST
481+
const response = await fetch(url, {
482+
method: 'POST',
483+
headers: {
484+
'Content-Type': 'application/json'
485+
},
486+
body: JSON.stringify(params)
487+
});
488+
if (!response.ok) {
489+
throw new Error(`Failed to perform semantic search: ${response.statusText}`);
490+
}
491+
const data: any = await response.json();
492+
if (!data) {
493+
throw new Error('Semantic search returned no data');
494+
}
495+
if (typeof data !== 'object' || !Array.isArray(data.results)) {
496+
throw new Error('Semantic search returned invalid data format');
497+
}
498+
return data.results.map((item: {id: string}) => item.id);
499+
};
428500

0 commit comments

Comments
 (0)