Updates references to SimpleNodeParser to SentenceSplitter. (#1129)

philnash · web-flow · commit be3e280f2a8d · 2024-08-30T11:15:57.000+07:00
diff --git a/apps/docs/docs/modules/ingestion_pipeline/index.md b/apps/docs/docs/modules/ingestion_pipeline/index.md
@@ -16,7 +16,7 @@ import {
   MetadataMode,
   OpenAIEmbedding,
   TitleExtractor,
-  SimpleNodeParser,
+  SentenceSplitter,
 } from "llamaindex";
 
 async function main() {
@@ -29,7 +29,7 @@ async function main() {
   const document = new Document({ text: essay, id_: path });
   const pipeline = new IngestionPipeline({
     transformations: [
-      new SimpleNodeParser({ chunkSize: 1024, chunkOverlap: 20 }),
+      new SentenceSplitter({ chunkSize: 1024, chunkOverlap: 20 }),
       new TitleExtractor(),
       new OpenAIEmbedding(),
     ],
@@ -62,7 +62,7 @@ import {
   MetadataMode,
   OpenAIEmbedding,
   TitleExtractor,
-  SimpleNodeParser,
+  SentenceSplitter,
   QdrantVectorStore,
   VectorStoreIndex,
 } from "llamaindex";
@@ -81,7 +81,7 @@ async function main() {
   const document = new Document({ text: essay, id_: path });
   const pipeline = new IngestionPipeline({
     transformations: [
-      new SimpleNodeParser({ chunkSize: 1024, chunkOverlap: 20 }),
+      new SentenceSplitter({ chunkSize: 1024, chunkOverlap: 20 }),
       new TitleExtractor(),
       new OpenAIEmbedding(),
     ],
diff --git a/apps/docs/docs/modules/ingestion_pipeline/transformations.md b/apps/docs/docs/modules/ingestion_pipeline/transformations.md
@@ -4,7 +4,7 @@ A transformation is something that takes a list of nodes as an input, and return
 
 Currently, the following components are Transformation objects:
 
-- [SimpleNodeParser](../../api/classes/SimpleNodeParser.md)
+- [SentenceSplitter](../../api/classes/SentenceSplitter.md)
 - [MetadataExtractor](../documents_and_nodes/metadata_extraction.md)
 - [Embeddings](../embeddings/index.md)
 
@@ -13,10 +13,10 @@ Currently, the following components are Transformation objects:
 While transformations are best used with with an IngestionPipeline, they can also be used directly.
 
 ```ts
-import { SimpleNodeParser, TitleExtractor, Document } from "llamaindex";
+import { SentenceSplitter, TitleExtractor, Document } from "llamaindex";
 
 async function main() {
-  let nodes = new SimpleNodeParser().getNodesFromDocuments([
+  let nodes = new SentenceSplitter().getNodesFromDocuments([
     new Document({ text: "I am 10 years old. John is 20 years old." }),
   ]);
 
@@ -34,15 +34,15 @@ main().catch(console.error);
 
 ## Custom Transformations
 
-You can implement any transformation yourself by implementing the `TransformerComponent`.
+You can implement any transformation yourself by implementing the `TransformComponent`.
 
-The following custom transformation will remove any special characters or punctutaion in text.
+The following custom transformation will remove any special characters or punctutation in text.
 
 ```ts
-import { TransformerComponent, Node } from "llamaindex";
+import { TransformComponent, TextNode } from "llamaindex";
 
-class RemoveSpecialCharacters extends TransformerComponent {
-  async transform(nodes: Node[]): Promise<Node[]> {
+export class RemoveSpecialCharacters extends TransformComponent {
+  async transform(nodes: TextNode[]): Promise<TextNode[]> {
     for (const node of nodes) {
       node.text = node.text.replace(/[^\w\s]/gi, "");
     }
diff --git a/apps/docs/docs/modules/node_parser.md b/apps/docs/docs/modules/node_parser.md
@@ -7,9 +7,9 @@ sidebar_position: 4
 The `NodeParser` in LlamaIndex is responsible for splitting `Document` objects into more manageable `Node` objects. When you call `.fromDocuments()`, the `NodeParser` from the `Settings` is used to do this automatically for you. Alternatively, you can use it to split documents ahead of time.
 
 ```typescript
-import { Document, SimpleNodeParser } from "llamaindex";
+import { Document, SentenceSplitter } from "llamaindex";
 
-const nodeParser = new SimpleNodeParser();
+const nodeParser = new SentenceSplitter();
 
 Settings.nodeParser = nodeParser;
 ```
@@ -93,6 +93,5 @@ The output metadata will be something like:
 
 ## API Reference
 
-- [SimpleNodeParser](../api/classes/SimpleNodeParser.md)
 - [SentenceSplitter](../api/classes/SentenceSplitter.md)
 - [MarkdownNodeParser](../api/classes/MarkdownNodeParser.md)
diff --git a/apps/docs/docs/modules/query_engines/router_query_engine.md b/apps/docs/docs/modules/query_engines/router_query_engine.md
@@ -15,7 +15,7 @@ import {
   OpenAI,
   RouterQueryEngine,
   SimpleDirectoryReader,
-  SimpleNodeParser,
+  SentenceSplitter,
   SummaryIndex,
   VectorStoreIndex,
   Settings,
@@ -34,11 +34,11 @@ const documents = await new SimpleDirectoryReader().loadData({
 
 ## Service Context
 
-Next, we need to define some basic rules and parse the documents into nodes. We will use the `SimpleNodeParser` to parse the documents into nodes and `Settings` to define the rules (eg. LLM API key, chunk size, etc.):
+Next, we need to define some basic rules and parse the documents into nodes. We will use the `SentenceSplitter` to parse the documents into nodes and `Settings` to define the rules (eg. LLM API key, chunk size, etc.):
 
 ```ts
 Settings.llm = new OpenAI();
-Settings.nodeParser = new SimpleNodeParser({
+Settings.nodeParser = new SentenceSplitter({
   chunkSize: 1024,
 });
 ```
@@ -104,14 +104,14 @@ import {
   OpenAI,
   RouterQueryEngine,
   SimpleDirectoryReader,
-  SimpleNodeParser,
+  SentenceSplitter,
   SummaryIndex,
   VectorStoreIndex,
   Settings,
 } from "llamaindex";
 
 Settings.llm = new OpenAI();
-Settings.nodeParser = new SimpleNodeParser({
+Settings.nodeParser = new SentenceSplitter({
   chunkSize: 1024,
 });
 
diff --git a/examples/agent/multi_document_agent.ts b/examples/agent/multi_document_agent.ts
@@ -6,8 +6,8 @@ import {
   OpenAI,
   OpenAIAgent,
   QueryEngineTool,
+  SentenceSplitter,
   Settings,
-  SimpleNodeParser,
   SimpleToolNodeMapping,
   SummaryIndex,
   VectorStoreIndex,
@@ -43,7 +43,7 @@ async function main() {
   for (const title of wikiTitles) {
     console.log(`Processing ${title}`);
 
-    const nodes = new SimpleNodeParser({
+    const nodes = new SentenceSplitter({
       chunkSize: 200,
       chunkOverlap: 20,
     }).getNodesFromDocuments([countryDocs[title]]);
diff --git a/examples/extractors/keywordExtractor.ts b/examples/extractors/keywordExtractor.ts
@@ -2,13 +2,13 @@ import {
   Document,
   KeywordExtractor,
   OpenAI,
-  SimpleNodeParser,
+  SentenceSplitter,
 } from "llamaindex";
 
 (async () => {
   const openaiLLM = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0 });
 
-  const nodeParser = new SimpleNodeParser();
+  const nodeParser = new SentenceSplitter();
 
   const nodes = nodeParser.getNodesFromDocuments([
     new Document({ text: "banana apple orange pear peach watermelon" }),
diff --git a/examples/extractors/questionsAnsweredExtractor.ts b/examples/extractors/questionsAnsweredExtractor.ts
@@ -2,13 +2,13 @@ import {
   Document,
   OpenAI,
   QuestionsAnsweredExtractor,
-  SimpleNodeParser,
+  SentenceSplitter,
 } from "llamaindex";
 
 (async () => {
   const openaiLLM = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0 });
 
-  const nodeParser = new SimpleNodeParser();
+  const nodeParser = new SentenceSplitter();
 
   const nodes = nodeParser.getNodesFromDocuments([
     new Document({
diff --git a/examples/extractors/summaryExtractor.ts b/examples/extractors/summaryExtractor.ts
@@ -1,14 +1,14 @@
 import {
   Document,
   OpenAI,
-  SimpleNodeParser,
+  SentenceSplitter,
   SummaryExtractor,
 } from "llamaindex";
 
 (async () => {
   const openaiLLM = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0 });
 
-  const nodeParser = new SimpleNodeParser();
+  const nodeParser = new SentenceSplitter();
 
   const nodes = nodeParser.getNodesFromDocuments([
     new Document({
diff --git a/examples/extractors/titleExtractor.ts b/examples/extractors/titleExtractor.ts
@@ -1,11 +1,11 @@
-import { Document, OpenAI, SimpleNodeParser, TitleExtractor } from "llamaindex";
+import { Document, OpenAI, SentenceSplitter, TitleExtractor } from "llamaindex";
 
 import essay from "../essay";
 
 (async () => {
   const openaiLLM = new OpenAI({ model: "gpt-3.5-turbo-0125", temperature: 0 });
 
-  const nodeParser = new SimpleNodeParser({});
+  const nodeParser = new SentenceSplitter({});
 
   const nodes = nodeParser.getNodesFromDocuments([
     new Document({
diff --git a/examples/jupyter/nodeparser.ipynb b/examples/jupyter/nodeparser.ipynb
@@ -7,10 +7,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import {\n",
-    "  Document,\n",
-    "  SimpleNodeParser\n",
-    "} from \"npm:llamaindex\";"
+    "import { Document, SentenceSplitter } from \"npm:llamaindex\";"
    ]
   },
   {
@@ -45,7 +42,7 @@
     }
    ],
    "source": [
-    "const nodeParser = new SimpleNodeParser();\n",
+    "const nodeParser = new SentenceSplitter();\n",
     "const nodes = nodeParser.getNodesFromDocuments([\n",
     "    new Document({ text: \"I am 10 years old. John is 20 years old.\" }),\n",
     "]);\n",
diff --git a/examples/lowlevel.ts b/examples/lowlevel.ts
@@ -2,12 +2,12 @@ import {
   Document,
   NodeWithScore,
   ResponseSynthesizer,
-  SimpleNodeParser,
+  SentenceSplitter,
   TextNode,
 } from "llamaindex";
 
 (async () => {
-  const nodeParser = new SimpleNodeParser();
+  const nodeParser = new SentenceSplitter();
   const nodes = nodeParser.getNodesFromDocuments([
     new Document({ text: "I am 10 years old. John is 20 years old." }),
   ]);
diff --git a/examples/pipeline/ingestion.ts b/examples/pipeline/ingestion.ts
@@ -5,7 +5,7 @@ import {
   IngestionPipeline,
   MetadataMode,
   OpenAIEmbedding,
-  SimpleNodeParser,
+  SentenceSplitter,
 } from "llamaindex";
 
 async function main() {
@@ -18,7 +18,7 @@ async function main() {
   const document = new Document({ text: essay, id_: path });
   const pipeline = new IngestionPipeline({
     transformations: [
-      new SimpleNodeParser({ chunkSize: 1024, chunkOverlap: 20 }),
+      new SentenceSplitter({ chunkSize: 1024, chunkOverlap: 20 }),
       new OpenAIEmbedding(),
     ],
   });
diff --git a/examples/routerQueryEngine.ts b/examples/routerQueryEngine.ts
@@ -1,9 +1,9 @@
 import {
   OpenAI,
   RouterQueryEngine,
+  SentenceSplitter,
   Settings,
   SimpleDirectoryReader,
-  SimpleNodeParser,
   SummaryIndex,
   VectorStoreIndex,
 } from "llamaindex";
@@ -12,7 +12,7 @@ import {
 Settings.llm = new OpenAI();
 
 // Update node parser
-Settings.nodeParser = new SimpleNodeParser({
+Settings.nodeParser = new SentenceSplitter({
   chunkSize: 1024,
 });
 
diff --git a/examples/summaryIndex.ts b/examples/summaryIndex.ts
@@ -1,15 +1,15 @@
 import {
   Document,
+  SentenceSplitter,
   Settings,
-  SimpleNodeParser,
   SummaryIndex,
   SummaryRetrieverMode,
 } from "llamaindex";
 
 import essay from "./essay";
 
 // Update node parser
-Settings.nodeParser = new SimpleNodeParser({
+Settings.nodeParser = new SentenceSplitter({
   chunkSize: 40,
 });
 
diff --git a/packages/llamaindex/tests/MetadataExtractors.test.ts b/packages/llamaindex/tests/MetadataExtractors.test.ts
@@ -10,7 +10,7 @@ import {
   TitleExtractor,
 } from "llamaindex/extractors/index";
 import { OpenAI } from "llamaindex/llm/openai";
-import { SimpleNodeParser } from "llamaindex/nodeParsers/index";
+import { SentenceSplitter } from "llamaindex/nodeParsers/index";
 import { afterAll, beforeAll, describe, expect, test, vi } from "vitest";
 import {
   DEFAULT_LLM_TEXT_OUTPUT,
@@ -45,7 +45,7 @@ describe("[MetadataExtractor]: Extractors should populate the metadata", () => {
   });
 
   test("[MetadataExtractor] KeywordExtractor returns excerptKeywords metadata", async () => {
-    const nodeParser = new SimpleNodeParser();
+    const nodeParser = new SentenceSplitter();
 
     const nodes = nodeParser.getNodesFromDocuments([
       new Document({ text: DEFAULT_LLM_TEXT_OUTPUT }),
@@ -64,7 +64,7 @@ describe("[MetadataExtractor]: Extractors should populate the metadata", () => {
   });
 
   test("[MetadataExtractor] TitleExtractor returns documentTitle metadata", async () => {
-    const nodeParser = new SimpleNodeParser();
+    const nodeParser = new SentenceSplitter();
 
     const nodes = nodeParser.getNodesFromDocuments([
       new Document({ text: DEFAULT_LLM_TEXT_OUTPUT }),
@@ -83,7 +83,7 @@ describe("[MetadataExtractor]: Extractors should populate the metadata", () => {
   });
 
   test("[MetadataExtractor] QuestionsAnsweredExtractor returns questionsThisExcerptCanAnswer metadata", async () => {
-    const nodeParser = new SimpleNodeParser();
+    const nodeParser = new SentenceSplitter();
 
     const nodes = nodeParser.getNodesFromDocuments([
       new Document({ text: DEFAULT_LLM_TEXT_OUTPUT }),
@@ -103,7 +103,7 @@ describe("[MetadataExtractor]: Extractors should populate the metadata", () => {
   });
 
   test("[MetadataExtractor] SumamryExtractor returns sectionSummary metadata", async () => {
-    const nodeParser = new SimpleNodeParser();
+    const nodeParser = new SentenceSplitter();
 
     const nodes = nodeParser.getNodesFromDocuments([
       new Document({ text: DEFAULT_LLM_TEXT_OUTPUT }),
diff --git a/packages/llamaindex/tests/ingestion/IngestionCache.test.ts b/packages/llamaindex/tests/ingestion/IngestionCache.test.ts
@@ -5,7 +5,7 @@ import {
   IngestionCache,
   getTransformationHash,
 } from "llamaindex/ingestion/IngestionCache";
-import { SimpleNodeParser } from "llamaindex/nodeParsers/index";
+import { SentenceSplitter } from "llamaindex/nodeParsers/index";
 import { beforeAll, describe, expect, test } from "vitest";
 
 describe("IngestionCache", () => {
@@ -32,7 +32,7 @@ describe("getTransformationHash", () => {
 
   beforeAll(() => {
     nodes = [new TextNode({ text: "some text", id_: "some id" })];
-    transform = new SimpleNodeParser({
+    transform = new SentenceSplitter({
       chunkOverlap: 10,
       chunkSize: 1024,
     });
@@ -66,7 +66,7 @@ describe("getTransformationHash", () => {
     const result1 = getTransformationHash(nodes, transform);
     const result2 = getTransformationHash(
       nodes,
-      new SimpleNodeParser({
+      new SentenceSplitter({
         chunkOverlap: 10,
         chunkSize: 512,
       }),