Skip to content

Commit cbd4766

Browse files
authored
correct name of doc/docx loader
1 parent 73dd68d commit cbd4766

File tree

2 files changed

+4
-3
lines changed

2 files changed

+4
-3
lines changed

src/constants.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,8 @@
273273

274274
DOCUMENT_LOADERS = {
275275
".pdf": "PyMuPDFLoader",
276-
".docx": "Docx2txtLoader",
276+
".docx": "UnstructuredWordDocumentLoader",
277+
".doc": "UnstructuredWordDocumentLoader",
277278
".txt": "TextLoader",
278279
".enex": "EverNoteLoader",
279280
".epub": "UnstructuredEPubLoader",

src/document_processor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from langchain.text_splitter import RecursiveCharacterTextSplitter
88
from langchain.document_loaders import (
99
PyMuPDFLoader,
10-
Docx2txtLoader,
10+
UnstructuredWordDocumentLoader,
1111
TextLoader,
1212
EverNoteLoader,
1313
UnstructuredEPubLoader,
@@ -67,7 +67,7 @@ def load_single_document(file_path: Path) -> Document:
6767
elif file_extension == ".epub":
6868
loader = UnstructuredEPubLoader(str(file_path), mode="single", strategy="fast")
6969
elif file_extension == ".docx":
70-
loader = Docx2txtLoader(str(file_path), mode="single", strategy="fast")
70+
loader = UnstructuredWordDocumentLoader(str(file_path), mode="single", strategy="fast")
7171
elif file_extension == ".rtf":
7272
loader = UnstructuredRTFLoader(str(file_path), mode="single", strategy="fast")
7373
elif file_extension == ".odt":

0 commit comments

Comments
 (0)