1
1
"""Utility functions for the LLM Change Agent."""
2
2
3
+ import os
4
+ from pathlib import Path
5
+ from typing import Union
6
+
7
+ import requests
3
8
import yaml
4
9
from langchain .agents import AgentExecutor
5
10
from langchain .agents .react .agent import create_react_agent
11
16
from openai import OpenAI
12
17
13
18
from llm_change_agent .config .llm_config import AnthropicConfig , CBORGConfig , LLMConfig , OllamaConfig , OpenAIConfig
14
- from llm_change_agent .constants import ANTHROPIC_KEY , CBORG_KEY , KGCL_GRAMMAR , KGCL_SCHEMA , OPENAI_KEY
19
+ from llm_change_agent .constants import (
20
+ ANTHROPIC_KEY ,
21
+ CBORG_KEY ,
22
+ KGCL_GRAMMAR ,
23
+ KGCL_SCHEMA ,
24
+ ONTODIFF_DOCS ,
25
+ OPENAI_KEY ,
26
+ )
15
27
from llm_change_agent .templates .templates import get_issue_analyzer_template , grammar_explanation
16
28
29
+ PROJ_DIR = Path (__file__ ).parents [1 ].resolve ()
30
+ RAG_DOCS_DIR = PROJ_DIR / "rag_docs"
31
+ os .makedirs (RAG_DOCS_DIR , exist_ok = True )
32
+
17
33
18
34
def get_openai_models ():
19
35
"""Get the list of OpenAI models."""
@@ -140,9 +156,41 @@ def get_kgcl_grammar():
140
156
return {"lark" : lark_file , "explanation" : grammar_notes }
141
157
142
158
143
- def split_documents (document : str ):
159
+ def get_diff_docs ():
160
+ """Download the diff docs."""
161
+ for url in ONTODIFF_DOCS :
162
+ # Extract the document name from the URL
163
+ doc_name = url .split ("/" )[- 2 ]
164
+ doc_path = RAG_DOCS_DIR / f"{ doc_name } .yaml"
165
+
166
+ # Check if the file already exists
167
+ if not doc_path .exists ():
168
+ try :
169
+ # Download the content from the URL
170
+ response = requests .get (url , timeout = 10 )
171
+ response .raise_for_status () # Raise an error for bad status codes
172
+
173
+ # Write the content to the file
174
+ with open (doc_path , "w" ) as doc_file :
175
+ doc_file .write (response .text )
176
+
177
+ print (f"Downloaded and saved: { doc_name } " )
178
+ yield response .text
179
+
180
+ except requests .RequestException as e :
181
+ print (f"Failed to download { url } : { e } " )
182
+ else :
183
+ with open (doc_path , "r" ) as doc_file :
184
+ print (f"Reading from file: { doc_name } " )
185
+ yield doc_file .read ()
186
+
187
+
188
+ def split_documents (document : Union [str , Document ]):
144
189
"""Split the document into a list of documents."""
145
- doc_object = (Document (page_content = document ),)
190
+ if isinstance (document , Document ):
191
+ doc_object = (document ,)
192
+ else :
193
+ doc_object = (Document (page_content = document ),)
146
194
splitter = RecursiveCharacterTextSplitter (chunk_size = 1000 , chunk_overlap = 200 )
147
195
splits = splitter .split_documents (doc_object )
148
196
return splits
@@ -155,9 +203,12 @@ def execute_agent(llm, prompt):
155
203
# docs_list = (
156
204
# split_documents(str(schema)) + split_documents(grammar["lark"]) + split_documents(grammar["explanation"])
157
205
# )
158
-
159
206
docs_list = split_documents (grammar ["lark" ]) + split_documents (grammar ["explanation" ])
160
- vectorstore = Chroma .from_documents (documents = docs_list , embedding = OpenAIEmbeddings ())
207
+ # ! Comment the following 2 lines to speed up the execution.
208
+ # diff_doc_generator = get_diff_docs()
209
+ # docs_list = [split_doc for doc in diff_doc_generator for split_doc in split_documents(doc)]
210
+
211
+ vectorstore = Chroma .from_documents (documents = docs_list , embedding = OpenAIEmbeddings (show_progress_bar = True ))
161
212
retriever = vectorstore .as_retriever (search_kwargs = {"k" : 1 })
162
213
tool = create_retriever_tool (retriever , "change_agent_retriever" , "Change Agent Retriever" )
163
214
tools = [tool ]
0 commit comments