1
1
"""Utility functions for the LLM Change Agent."""
2
2
3
- import os
4
- from pathlib import Path
5
3
from typing import Union
6
4
7
- import requests
8
5
import yaml
9
6
from langchain .agents import AgentExecutor
10
7
from langchain .agents .react .agent import create_react_agent
11
8
from langchain .tools .retriever import create_retriever_tool
12
9
from langchain_chroma import Chroma
10
+ from langchain_community .document_loaders import WebBaseLoader
13
11
from langchain_core .documents import Document
14
12
from langchain_openai import OpenAIEmbeddings
15
13
from langchain_text_splitters import RecursiveCharacterTextSplitter
23
21
KGCL_SCHEMA ,
24
22
ONTODIFF_DOCS ,
25
23
OPENAI_KEY ,
24
+ VECTO_DB_PATH ,
25
+ VECTOR_STORE ,
26
26
)
27
27
from llm_change_agent .templates .templates import get_issue_analyzer_template , grammar_explanation
28
28
29
- PROJ_DIR = Path (__file__ ).parents [1 ].resolve ()
30
- RAG_DOCS_DIR = PROJ_DIR / "rag_docs"
31
- os .makedirs (RAG_DOCS_DIR , exist_ok = True )
32
-
33
29
34
30
def get_openai_models ():
35
31
"""Get the list of OpenAI models."""
@@ -156,33 +152,33 @@ def get_kgcl_grammar():
156
152
return {"lark" : lark_file , "explanation" : grammar_notes }
157
153
158
154
159
- def get_diff_docs ():
160
- """Download the diff docs."""
161
- for url in ONTODIFF_DOCS :
162
- # Extract the document name from the URL
163
- doc_name = url .split ("/" )[- 2 ]
164
- doc_path = RAG_DOCS_DIR / f"{ doc_name } .yaml"
155
+ # def get_diff_docs():
156
+ # """Download the diff docs."""
157
+ # for url in ONTODIFF_DOCS:
158
+ # # Extract the document name from the URL
159
+ # doc_name = url.split("/")[-2]
160
+ # doc_path = RAG_DOCS_DIR / f"{doc_name}.yaml"
165
161
166
- # Check if the file already exists
167
- if not doc_path .exists ():
168
- try :
169
- # Download the content from the URL
170
- response = requests .get (url , timeout = 10 )
171
- response .raise_for_status () # Raise an error for bad status codes
162
+ # # Check if the file already exists
163
+ # if not doc_path.exists():
164
+ # try:
165
+ # # Download the content from the URL
166
+ # response = requests.get(url, timeout=10)
167
+ # response.raise_for_status() # Raise an error for bad status codes
172
168
173
- # Write the content to the file
174
- with open (doc_path , "w" ) as doc_file :
175
- doc_file .write (response .text )
169
+ # # Write the content to the file
170
+ # with open(doc_path, "w") as doc_file:
171
+ # doc_file.write(response.text)
176
172
177
- print (f"Downloaded and saved: { doc_name } " )
178
- yield response .text
173
+ # print(f"Downloaded and saved: {doc_name}")
174
+ # yield response.text
179
175
180
- except requests .RequestException as e :
181
- print (f"Failed to download { url } : { e } " )
182
- else :
183
- with open (doc_path , "r" ) as doc_file :
184
- print (f"Reading from file: { doc_name } " )
185
- yield doc_file .read ()
176
+ # except requests.RequestException as e:
177
+ # print(f"Failed to download {url}: {e}")
178
+ # else:
179
+ # with open(doc_path, "r") as doc_file:
180
+ # print(f"Reading from file: {doc_name}")
181
+ # yield doc_file.read()
186
182
187
183
188
184
def split_documents (document : Union [str , Document ]):
@@ -203,12 +199,21 @@ def execute_agent(llm, prompt):
203
199
# docs_list = (
204
200
# split_documents(str(schema)) + split_documents(grammar["lark"]) + split_documents(grammar["explanation"])
205
201
# )
206
- docs_list = split_documents (grammar ["lark" ]) + split_documents (grammar ["explanation" ])
207
- # ! Comment the following 2 lines to speed up the execution.
208
- # diff_doc_generator = get_diff_docs()
209
- # docs_list = [split_doc for doc in diff_doc_generator for split_doc in split_documents(doc)]
202
+ grammar_docs_list = split_documents (grammar ["lark" ]) + split_documents (grammar ["explanation" ])
203
+ if VECTO_DB_PATH .exists ():
204
+ vectorstore = Chroma (
205
+ embedding_function = OpenAIEmbeddings (show_progress_bar = True ), persist_directory = str (VECTOR_STORE )
206
+ )
207
+ else :
208
+
209
+ list_of_doc_lists = [WebBaseLoader (url , show_progress = True ).load () for url in ONTODIFF_DOCS ]
210
+ diff_docs_list = [split_doc for docs in list_of_doc_lists for doc in docs for split_doc in split_documents (doc )]
211
+ docs_list = grammar_docs_list + diff_docs_list
212
+
213
+ vectorstore = Chroma .from_documents (
214
+ documents = docs_list , embedding = OpenAIEmbeddings (show_progress_bar = True ), persist_directory = str (VECTOR_STORE )
215
+ )
210
216
211
- vectorstore = Chroma .from_documents (documents = docs_list , embedding = OpenAIEmbeddings (show_progress_bar = True ))
212
217
retriever = vectorstore .as_retriever (search_kwargs = {"k" : 1 })
213
218
tool = create_retriever_tool (retriever , "change_agent_retriever" , "Change Agent Retriever" )
214
219
tools = [tool ]
0 commit comments