Skip to content

Commit 2411c9f

Browse files
feat: Auto-create index for MongoDB vector store (if not exists) (#1139)
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
1 parent be3e280 commit 2411c9f

File tree

5 files changed

+61
-80
lines changed

5 files changed

+61
-80
lines changed

.changeset/funny-ants-do.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"llamaindex": patch
3+
---
4+
5+
Auto-create index for MongoDB vector store (if not exists)

examples/mongodb/2_load_and_index.ts

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -45,39 +45,6 @@ async function loadAndIndex() {
4545
await client.close();
4646
}
4747

48-
/**
49-
* This method is document in https://www.mongodb.com/docs/atlas/atlas-search/create-index/#create-an-fts-index-programmatically
50-
* But, while testing a 'CommandNotFound' error occurred, so we're not using this here.
51-
*/
52-
async function createSearchIndex() {
53-
const client = new MongoClient(mongoUri);
54-
const database = client.db(databaseName);
55-
const collection = database.collection(vectorCollectionName);
56-
57-
// define your Atlas Search index
58-
const index = {
59-
name: indexName,
60-
definition: {
61-
/* search index definition fields */
62-
mappings: {
63-
dynamic: true,
64-
fields: [
65-
{
66-
type: "vector",
67-
path: "embedding",
68-
numDimensions: 1536,
69-
similarity: "cosine",
70-
},
71-
],
72-
},
73-
},
74-
};
75-
// run the helper method
76-
const result = await collection.createSearchIndex(index);
77-
console.log("Successfully created search index:", result);
78-
await client.close();
79-
}
80-
8148
loadAndIndex().catch(console.error);
8249

8350
// you can't query your index yet because you need to create a vector search index in mongodb's UI now

examples/mongodb/3_query.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ async function query() {
2121
const retriever = index.asRetriever({ similarityTopK: 20 });
2222
const queryEngine = index.asQueryEngine({ retriever });
2323
const result = await queryEngine.query({
24-
query: "What does the author think of web frameworks?",
24+
query: "What does author receive when he was 11 years old?", // Isaac Asimov's "Foundation" for Christmas
2525
});
2626
console.log(result.response);
2727
await client.close();

examples/mongodb/README.md

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -68,45 +68,6 @@ What you're doing here is creating a Reader which loads the data out of Mongo in
6868

6969
Now you're creating a vector search client for Mongo. In addition to a MongoDB client object, you again tell it what database everything is in. This time you give it the name of the collection where you'll store the vector embeddings, and the name of the vector search index you'll create in the next step.
7070

71-
### Create a vector search index
72-
73-
Now if all has gone well you should be able to log in to the Mongo Atlas UI and see two collections in your database: the original data in `tiny_tweets_collection`, and the vector embeddings in `tiny_tweets_vectors`.
74-
75-
![MongoDB Atlas collections](./docs/3_vectors_in_db.png)
76-
77-
Now it's time to create the vector search index so that you can query the data.
78-
It's not yet possible to programmatically create a vector search index using the [`createIndex`](https://www.mongodb.com/docs/manual/reference/method/db.collection.createIndex/) function, therefore we have to create one manually in the UI.
79-
To do so, first, click the 'Atlas Search' tab, and then click "Create Search Index":
80-
81-
![MongoDB Atlas create search index](./docs/4_search_tab.png)
82-
83-
We have to use the JSON editor, as the Visual Editor does not yet support to create a vector search index:
84-
85-
![MongoDB Atlas JSON editor](./docs/5_json_editor.png)
86-
87-
Now under "database and collection" select `tiny_tweets_db` and within that select `tiny_tweets_vectors`. Then under "Index name" enter `tiny_tweets_vector_index` (or whatever value you put for MONGODB_VECTOR_INDEX in `.env`). Under that, you'll want to enter this JSON object:
88-
89-
```json
90-
{
91-
"fields": [
92-
{
93-
"type": "vector",
94-
"path": "embedding",
95-
"numDimensions": 1536,
96-
"similarity": "cosine"
97-
}
98-
]
99-
}
100-
```
101-
102-
This tells Mongo that the `embedding` field in each document (in the `tiny_tweets_vectors` collection) is a vector of 1536 dimensions (this is the size of embeddings used by OpenAI), and that we want to use cosine similarity to compare vectors. You don't need to worry too much about these values unless you want to use a different LLM to OpenAI entirely.
103-
104-
The UI will ask you to review and confirm your choices, then you need to wait a minute or two while it generates the index. If all goes well, you should see something like this screen:
105-
106-
![MongoDB Atlas index created](./docs/7_index_created.png)
107-
108-
Now you're ready to query your data!
109-
11071
### Run a test query
11172

11273
You can do this by running

packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts

Lines changed: 55 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ export class MongoDBAtlasVectorSearch
3535
storesText: boolean = true;
3636
flatMetadata: boolean = true;
3737

38+
dbName: string;
39+
collectionName: string;
40+
autoCreateIndex: boolean;
41+
3842
/**
3943
* The used MongoClient. If not given, a new MongoClient is created based on the MONGODB_URI env variable.
4044
*/
@@ -92,13 +96,28 @@ export class MongoDBAtlasVectorSearch
9296
* Default: query.similarityTopK * 10
9397
*/
9498
numCandidates: (query: VectorStoreQuery) => number;
95-
private collection: Collection;
99+
private collection?: Collection;
100+
101+
// define your Atlas Search index. See detail https://www.mongodb.com/docs/atlas/atlas-search/field-types/knn-vector/
102+
readonly SEARCH_INDEX_DEFINITION = {
103+
mappings: {
104+
dynamic: true,
105+
fields: {
106+
embedding: {
107+
type: "knnVector",
108+
dimensions: 1536,
109+
similarity: "cosine",
110+
},
111+
},
112+
},
113+
};
96114

97115
constructor(
98116
init: Partial<MongoDBAtlasVectorSearch> & {
99117
dbName: string;
100118
collectionName: string;
101119
embedModel?: BaseEmbedding;
120+
autoCreateIndex?: boolean;
102121
},
103122
) {
104123
super(init.embedModel);
@@ -114,9 +133,9 @@ export class MongoDBAtlasVectorSearch
114133
this.mongodbClient = new MongoClient(mongoUri);
115134
}
116135

117-
this.collection = this.mongodbClient
118-
.db(init.dbName ?? "default_db")
119-
.collection(init.collectionName ?? "default_collection");
136+
this.dbName = init.dbName ?? "default_db";
137+
this.collectionName = init.collectionName ?? "default_collection";
138+
this.autoCreateIndex = init.autoCreateIndex ?? true;
120139
this.indexName = init.indexName ?? "default";
121140
this.embeddingKey = init.embeddingKey ?? "embedding";
122141
this.idKey = init.idKey ?? "id";
@@ -127,6 +146,32 @@ export class MongoDBAtlasVectorSearch
127146
this.insertOptions = init.insertOptions;
128147
}
129148

149+
async ensureCollection() {
150+
if (!this.collection) {
151+
const collection = await this.mongodbClient
152+
.db(this.dbName)
153+
.createCollection(this.collectionName);
154+
155+
this.collection = collection;
156+
}
157+
158+
if (this.autoCreateIndex) {
159+
const searchIndexes = await this.collection.listSearchIndexes().toArray();
160+
const indexExists = searchIndexes.some(
161+
(index) => index.name === this.indexName,
162+
);
163+
if (!indexExists) {
164+
await this.collection.createSearchIndex({
165+
name: this.indexName,
166+
definition: this.SEARCH_INDEX_DEFINITION,
167+
});
168+
console.log("Created search index: ", this.indexName);
169+
}
170+
}
171+
172+
return this.collection;
173+
}
174+
130175
/**
131176
* Add nodes to the vector store.
132177
*
@@ -154,7 +199,8 @@ export class MongoDBAtlasVectorSearch
154199
});
155200

156201
console.debug("Inserting data into MongoDB: ", dataToInsert);
157-
const insertResult = await this.collection.insertMany(
202+
const collection = await this.ensureCollection();
203+
const insertResult = await collection.insertMany(
158204
dataToInsert,
159205
this.insertOptions,
160206
);
@@ -169,7 +215,8 @@ export class MongoDBAtlasVectorSearch
169215
* @param deleteOptions Options to pass to the deleteOne function
170216
*/
171217
async delete(refDocId: string, deleteOptions?: any): Promise<void> {
172-
await this.collection.deleteMany(
218+
const collection = await this.ensureCollection();
219+
await collection.deleteMany(
173220
{
174221
[`${this.metadataKey}.ref_doc_id`]: refDocId,
175222
},
@@ -215,7 +262,8 @@ export class MongoDBAtlasVectorSearch
215262
];
216263

217264
console.debug("Running query pipeline: ", pipeline);
218-
const cursor = await this.collection.aggregate(pipeline);
265+
const collection = await this.ensureCollection();
266+
const cursor = await collection.aggregate(pipeline);
219267

220268
const nodes: BaseNode[] = [];
221269
const ids: string[] = [];

0 commit comments

Comments
 (0)