added curies as a tool

hrshdhgd · hrshdhgd · commit c293201f44e2 · 2024-08-30T15:34:12.000-05:00
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -18,6 +18,7 @@ langchain-ollama = "^0.1.1"
 langchain-anthropic = "^0.1.22"
 langchain-chroma = "^0.1.3"
 pystow = "^0.5.4"
+curies = "^0.7.10"
 
 [tool.poetry.group.dev.dependencies]
 pytest = {version = ">=8.3.2"}
diff --git a/src/llm_change_agent/cli.py b/src/llm_change_agent/cli.py
@@ -9,7 +9,7 @@
 from llm_change_agent.constants import PROVIDER_DEFAULT_MODEL_MAP
 from llm_change_agent.evaluations.evaluator import run_evaluate
 from llm_change_agent.llm_agent import LLMChangeAgent
-from llm_change_agent.utils.click_utils import validate_path_or_url_or_ontology
+from llm_change_agent.utils.general_utils import validate_path_or_url_or_ontology
 from llm_change_agent.utils.llm_utils import (
     get_anthropic_models,
     get_lbl_cborg_models,
diff --git a/src/llm_change_agent/evaluations/evaluator.py b/src/llm_change_agent/evaluations/evaluator.py
@@ -22,12 +22,13 @@
     PR_CLOSED_ISSUES_KEY,
     PULL_REQUESTS_KEY,
 )
-from llm_change_agent.utils.llm_utils import extract_commands
+from llm_change_agent.utils.llm_utils import extract_commands, normalize_changes
 
 logger = logging.getLogger(__name__)
 logger.info("Evaluating the LLM Change Agent.")
 
 
+
 def download_document(url, input_dir):
     """Download the document from the URL."""
     if not os.path.exists(input_dir):
@@ -115,6 +116,7 @@ def run_llm_change_agent(prompt, provider, model, docs: List[Any] = None) -> Lis
         ctx.params["model"] = model
         ctx.params["docs"] = docs
         response = extract_commands(execute.invoke(ctx))
+        print(response)
         kgcl_commands = [command for command in ast.literal_eval(response)]
         return kgcl_commands
 
@@ -172,11 +174,44 @@ def generate_changes_via_llm(eval_dir, output_dir, provider, model):
     print(f"Predicted changes saved to {output_sub_dir}")
 
 
-def compare_changes():
+def compare_changes(expected_dir:Path, output_dir:Path):
     """Compare the actual changes with the predicted changes."""
-    import pdb
+    # For each document in the expected directory, there is a corresponding document in the output directory
+
+    output_files = list(output_dir.rglob("*.yaml"))
+
+    # output_files_dict is : {provider_model: {filename: file_path}}
+    output_files_list_of_dicts = [
+        {f"{file.parts[-3]}_{file.parts[-2]}": {file.name:file}} for file in output_files
+    ]
+    
+    for model_output in output_files_list_of_dicts:
+        for provider_model, file_info in model_output.items():
+            for filename, filepath in file_info.items():
+                filename = filepath.name
+                expected_file = expected_dir / filename
+                output_file = filepath
+                with open(expected_file, "r") as ex , open(output_file, "r") as out:
+                    expected_yaml = yaml.safe_load(ex)
+                    output_yaml = yaml.safe_load(out)
+                expected_yaml_subset = {k: v for k, v in expected_yaml.items() if k in output_yaml}
+                for pr_id, output_changes in output_yaml.items():
+                    expected_change = expected_yaml_subset.get(pr_id)
+                    if len(output_changes) > 0:
+                        compare_output_vs_expected(expected_change, output_changes)
+
+
+
+def compare_output_vs_expected(expected_changes, output_changes:List):
+    """Compare the expected changes with the output changes."""
+    output_changes = normalize_changes(output_changes)
+    accuracy = 0.0
+    total = len(expected_changes)
+    correct = 0
+    import pdb; pdb.set_trace()
+    
+        
 
-    pdb.set_trace()
 
 
 def run_evaluate(model: str, provider: str):
@@ -194,22 +229,5 @@ def run_evaluate(model: str, provider: str):
 
     generate_changes_via_llm(model=model, provider=provider, eval_dir=eval_dir, output_dir=output_dir)
 
-    # compare_changes()
-
-    # logger.info("Split the YAML documents randomly into RAG and Evaluation documents 80% and 20%.")
-    # random.shuffle(ONTODIFF_DOCS)
-    # split_index = int(len(ONTODIFF_DOCS) * 0.8)
-    # rag_docs = ONTODIFF_DOCS[:split_index]
-    # eval_docs = ONTODIFF_DOCS[split_index:]
-
-    # logger.info("Run llm_change_agent with the RAG documents.")
-    # run_llm_change_agent(rag_docs)
-
-    # logger.info("Run the evaluation script with the Evaluation documents.")
-    # run_evaluation_script(eval_docs)
-
-    # logger.info("Compare the actual `changes` with the predicted `changes` from the llm_change_agent.")
-    # compare_changes()
+    compare_changes(expected_dir=expected_dir, output_dir=output_dir)
 
-    # logger.info("Evaluation completed.")
-    # return
diff --git a/src/llm_change_agent/templates/templates.py b/src/llm_change_agent/templates/templates.py
@@ -13,10 +13,17 @@ def get_issue_analyzer_template():
         You have the following tools at your disposal to help you with this task:
         {tools}
         You also have the KGCL grammar in lark format: {grammar} along with an explanation of the grammar: {explanation}.
-        You MUST use CURIEs/IRIs for every entity and relationship. You've been provided with JSON documents to find CURIEs/IRIs
+        You MUST use CURIEs for every entity and relationship. You've been provided with JSON documents to find CURIEs/IRIs
         for entities and relationships. Do not manufacture CURIEs/IRIs. Make sure it is retrieved from these
-        documents if absent in the GitHub issues provided. The final answer should be JUST a list of KGCL commands, nothing else.
-        Keep the verbosity of the response to non-existent. It should be concise and to the point.
+        documents if absent in the GitHub issues provided. If you end up with a IRI to represent an entity, use
+        the tool 'compress_iri' from {tools} to derive a CURIE from it. If you end up with the label for the entity,
+        try to retrieve its CURIE/IRI from the JSON docs and get CURIE using {tools}.
+        
+        For e.g.: if you have a change `delete edge MONDO:0005772 rdfs:subClassOf <immune system disease>`
+        It should be converted to `delete edge MONDO:0005772 rdfs:subClassOf MONDO:0005046`.
+          
+        The final answer should be JUST a list of KGCL commands, nothing else.
+        Keep the verbosity of the response to zero. It should be concise and to the point.
 
         It is fine if you are not able to form any commands. You can just return an empty list.
 
diff --git a/src/llm_change_agent/utils/general_utils.py b/src/llm_change_agent/utils/general_utils.py
@@ -33,3 +33,17 @@ def validate_path_or_url_or_ontology(ctx, param, value):
                 raise click.BadParameter(f"{val} is not a valid URL, file path, or ontology name") from None
 
     return validated_values
+
+
+def jaccard_similarity(statement1, statement2):
+    """Calculate the Jaccard similarity coefficient between two statements."""
+    # Split the statements into sets of words
+    set1 = set(statement1.split())
+    set2 = set(statement2.split())
+
+    # Calculate the intersection and union of the sets
+    intersection = set1.intersection(set2)
+    union = set1.union(set2)
+
+    # Calculate the Jaccard similarity coefficient
+    return len(intersection) / len(union)
diff --git a/src/llm_change_agent/utils/llm_utils.py b/src/llm_change_agent/utils/llm_utils.py
@@ -5,10 +5,12 @@
 from pathlib import Path
 from typing import Union
 
+import curies
 import yaml
 from langchain.agents import AgentExecutor
 from langchain.agents.react.agent import create_react_agent
 from langchain.tools.retriever import create_retriever_tool
+from langchain_core.tools import tool
 from langchain_chroma import Chroma
 from langchain_community.document_loaders import WebBaseLoader
 from langchain_core.documents import Document
@@ -272,7 +274,7 @@ def execute_agent(llm, prompt, docs):
 
     retriever = vectorstore.as_retriever(search_kwargs={"k": 1})
     tool = create_retriever_tool(retriever, "change_agent_retriever", "Change Agent Retriever")
-    tools = [tool]
+    tools = [tool, compress_iri]
     template = get_issue_analyzer_template()
     react_agent = create_react_agent(llm=llm, tools=tools, prompt=template)
     agent_executor = AgentExecutor(agent=react_agent, tools=tools, handle_parsing_errors=True, verbose=True)
@@ -316,3 +318,22 @@ def extract_commands(command):
         return match.group(0)
     else:
         return cleaned_command
+
+def normalize_changes(changes):
+    for idx, change in enumerate(changes):
+        if any(string.startswith("<http") or string.startswith("http") for string in change.split()):
+            iri = [string for string in change.split() if string.startswith("<http")or string.startswith("http")]
+            # Replace the strings in the list with the curie using converter.compress(item)
+            for _, item in enumerate(iri):
+                stripped_item = item.strip('<>')
+                compressed_item = compress_iri(stripped_item) if compress_iri(stripped_item) else item
+                # Update the original change list with the compressed item
+                change = change.replace(item, compressed_item)
+                changes[idx] = change
+    return changes
+
+@tool
+def compress_iri(iri: str) -> str:
+    """Compress the IRI."""
+    converter = curies.get_obo_converter()
+    return converter.compress(iri)