Skip to content

Commit ab1a785

Browse files
committed
Added metrics
1 parent 8124bd9 commit ab1a785

File tree

9 files changed

+173
-53
lines changed

9 files changed

+173
-53
lines changed

Makefile

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
.PHONY: all gpt-4o-2024-08-06 gemini-pro claude-sonnet
2+
3+
# Default target to run all evaluations
4+
all: gpt-4o gemini-pro claude-sonnet
5+
6+
# Target for evaluating with gpt-4o-2024-08-06 model
7+
gpt-4o:
8+
time llm-change evaluate --model gpt-4o-2024-08-06
9+
10+
# Target for evaluating with google/gemini-pro model
11+
gemini-pro:
12+
time llm-change evaluate --model google/gemini-pro
13+
14+
# Target for evaluating with anthropic/claude-sonnet model
15+
claude-sonnet:
16+
time llm-change evaluate --model anthropic/claude-sonnet

src/llm_change_agent/evaluations/evaluator.py

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -191,9 +191,10 @@ def compare_changes(expected_dir: Path, output_dir: Path):
191191

192192
# output_files_dict is : {provider_model: {filename: file_path}}
193193
output_files_list_of_dicts = [{f"{file.parts[-3]}_{file.parts[-2]}": {file.name: file}} for file in output_files]
194-
194+
jaccard_score_dict = {}
195195
for model_output in output_files_list_of_dicts:
196-
for _provider_model, file_info in model_output.items():
196+
for provider_model, file_info in model_output.items():
197+
jaccard_score_dict[provider_model] = {}
197198
for filename, filepath in file_info.items():
198199
filename = filepath.name
199200
expected_file = expected_dir / filename
@@ -205,19 +206,32 @@ def compare_changes(expected_dir: Path, output_dir: Path):
205206
for pr_id, output_changes in output_yaml.items():
206207
expected_change = expected_yaml_subset.get(pr_id)
207208
if len(output_changes) > 0:
208-
compare_output_vs_expected(expected_change, output_changes)
209-
logger.info(f"Finished comparing changes for {_provider_model}")
209+
jaccard_score_dict[provider_model][pr_id] = get_comparison_metrics(expected_change, output_changes)
210+
logger.info(f"Finished comparing changes for {provider_model}")
211+
with open(output_dir / "metrics.yaml", "a") as f:
212+
yaml.dump(jaccard_score_dict, f, sort_keys=False, default_flow_style=False)
210213

211214

212-
def compare_output_vs_expected(expected_changes, output_changes: List):
215+
def get_comparison_metrics(expected_changes:List, output_changes: List):
213216
"""Compare the expected changes with the output changes."""
214217
output_changes = normalize_to_curies_in_changes(output_changes)
215-
# accuracy = 0.0
216-
# total = len(expected_changes)
217-
# correct = 0
218-
# import pdb
219-
220-
# pdb.set_trace()
218+
expected_changes = normalize_to_curies_in_changes(expected_changes)
219+
# Calculate Jaccard between the expected and output changes
220+
expected_changes_set = set(expected_changes)
221+
output_changes_set = set(output_changes)
222+
intersection = expected_changes_set.intersection(output_changes_set)
223+
union = expected_changes_set.union(output_changes_set)
224+
jaccard = len(intersection) / len(union)
225+
logger.info(f"Jaccard similarity between expected and output changes: {jaccard}")
226+
# Caclulate accuracy between the expected and output changes
227+
accuracy = len(intersection) / len(expected_changes_set)
228+
229+
metrics = {
230+
"jaccard": jaccard,
231+
"accuracy": accuracy,
232+
}
233+
234+
return metrics
221235

222236

223237
def run_evaluate(model: str, provider: str):
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
cborg_old_google_gemini-pro:
2+
pr405:
3+
jaccard: 0.0
4+
accuracy: 0.0
5+
pr396:
6+
jaccard: 0.0
7+
accuracy: 0.0
8+
pr328:
9+
jaccard: 0.0
10+
accuracy: 0.0
11+
cborg_old_anthropic_claude-sonnet:
12+
pr427:
13+
jaccard: 0.0
14+
accuracy: 0.0
15+
pr405:
16+
jaccard: 0.0
17+
accuracy: 0.0
18+
pr396:
19+
jaccard: 0.0
20+
accuracy: 0.0
21+
pr328:
22+
jaccard: 0.0
23+
accuracy: 0.0
24+
openai_old_gpt-4o-2024-08-06:
25+
pr328:
26+
jaccard: 0.0
27+
accuracy: 0.0
28+
openai_gpt-4o-2024-08-06:
29+
pr396:
30+
jaccard: 0.0
31+
accuracy: 0.0
Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,16 @@
11
pr1450:
22
- obsolete ENVO:02500019
3-
pr1330:
4-
- change definition of ENVO:01001113 from 'conposed' to 'composed'
3+
pr1330: []
54
pr1289:
65
- remove synonym 'garden' for 'garden'
76
- remove synonym 'ocean' for 'ocean'
87
- remove synonym 'sea' for 'sea'
98
- remove synonym 'stream' for 'stream'
109
- remove synonym 'reservoir' for 'reservoir'
11-
pr1254:
12-
- create_class 'land_surface'
13-
- deepen 'land_surface' from ENVO:01000813 to 'land_surface'
10+
pr1254: []
1411
pr1113: []
1512
pr1011: []
1613
pr970: []
1714
pr928: []
18-
pr928: []
15+
pr928:
16+
- create node ENVO:00003095 'enrichment culture'

src/llm_change_agent/evaluations/output/openai/gpt-4o-2024-08-06/geneontology_go_ontology.yaml

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,30 @@ pr18361:
1414
- rename GO:0150135 from 'biologica_process' to 'biological_process'
1515
pr16459: []
1616
pr15065: []
17-
pr14730: []
17+
pr14730:
18+
- create node GO:XXXXXXX 'apical constriction involved in ventral furrow formation'
19+
- create edge GO:XXXXXXX is_a GO:0003384
20+
- create edge GO:XXXXXXX part_of GO:0007370
21+
- add definition 'The actin-mediated process that results in the contraction of the
22+
apical end of a polarized columnar epithelial cell, contributing to the process
23+
of the formation of a ventral indentation (furrow) from the blastoderm epithelium,
24+
which is internalized to form a tube in the interior of the embryo, marking the
25+
start of gastrulation.' to GO:XXXXXXX
26+
- create node GO:YYYYYYY 'positive regulation of apical constriction involved in ventral
27+
furrow formation'
28+
- add definition 'Any process that activates or increases the frequency, rate or extent
29+
of apical constriction involved in ventral furrow formation.' to GO:YYYYYYY
1830
pr14554: []
1931
pr14269: []
2032
pr14042:
2133
- obsolete GO:0044711
22-
pr13974: []
34+
pr13974:
35+
- rename GO:0099614 from 'protein targeting to spore cell wall' to 'protein localization
36+
to spore cell wall'
37+
- rename GO:0099613 from 'protein targeting to cell wall' to 'protein localization
38+
to cell wall'
39+
- rename GO:0006622 from 'protein targeting to lysosome' to 'protein transport to
40+
lysosome'
41+
- merge GO:0045036 with GO:0072596 and rename to 'protein transport to chloroplast'
42+
- merge GO:0006626 with GO:0072655 and rename to 'protein transport to mitochondrion'
43+
- merge GO:0006625 with GO:0072663 and rename to 'protein transport to peroxisome'

src/llm_change_agent/evaluations/output/openai/gpt-4o-2024-08-06/monarch_initiative_mondo.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@ pr7082: []
22
pr6376:
33
- obsolete MONDO:0033211 with replacement MONDO:0017868
44
pr5194: []
5-
pr4598:
6-
- rename 'ICD10' to 'ICD10CM'
7-
- delete NIFSTD
8-
- delete MTH
9-
pr4459: []
5+
pr4598: []
6+
pr4459:
7+
- obsolete MONDO:0016050 with replacement MONDO:0011841
8+
- change synonym 'ENCEPHALOPATHY, THIAMINE-RESPONSIVE' for MONDO:0016050 from related
9+
to exact
1010
pr4042: []
1111
pr2053: []
1212
pr1644: []

src/llm_change_agent/evaluations/output/openai/gpt-4o-2024-08-06/obophenotype_cell_ontology.yaml

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,53 @@
1-
pr2433: []
1+
pr2433:
2+
- create node NTR-cxg 'nasal serous secreting cell'
3+
- add definition 'A serous secreting cell that is part of the nasal cavity. PMID:37291214'
4+
to NTR-cxg
5+
- create exact synonym 'SMG serous (nasal)' for NTR-cxg
6+
- create edge NTR-cxg rdfs:subClassOf CL:0000313
7+
- create edge NTR-cxg located_in UBERON:0001707
28
pr2114:
39
- change annotation of CL:0000568 with MESH:A11.070 to MESH:D001078
4-
- delete edge CL:0000132 rdfs:subClassOf MESH:A09.371.060.067.31
5-
- delete edge CL:0000333 rdfs:subClassOf MESH:A16.254.600
6-
- create edge CL:0000333 rdfs:subClassOf doi:10.1016/j.stem.2015.02.017
10+
- change annotation of CL:0000568 with reference from definition to class
11+
- change annotation of CL:0000132 with MESH:A09.371.060.067.31 to None
12+
- change annotation of CL:0000333 with MESH:A16.254.600 to doi:10.1016/j.stem.2015.02.017
713
- change annotation of CL:0000577 with MESH:A03.492.766.440.250 to MESH:D004759
8-
- delete edge CL:0000588 rdfs:subClassOf MESH:A11.329.679
9-
- delete edge CL:0000547 rdfs:subClassOf MESH:A11.118.290.350.200
10-
pr2108:
11-
- remove synonym 'syncytial trophoblast' for CL:0000525
12-
- remove synonym 'syncytial trophoblast' for UBERON:0000371
14+
- change annotation of CL:0000577 with reference from definition to class
15+
- change annotation of CL:0000588 with MESH:A11.329.679 to None
16+
- change annotation of CL:0000547 with MESH:A11.118.290.350.200 to None
17+
pr2108: []
1318
pr1907: []
1419
pr1696: []
1520
pr1565: []
16-
pr1518: []
17-
pr1465: []
18-
pr1242: []
21+
pr1518:
22+
- create node CL:0009045 'B cell of medullary sinus of lymph node'
23+
- create node CL:0009046 'T cell of medullary sinus of lymph node'
24+
- create node CL:0009047 'macrophage of medullary sinus of lymph node'
25+
pr1465:
26+
- create node CL:0009097 'embryonic skeletal muscle fiber'
27+
- add definition 'A skeletal muscle fiber found in an embryo. In mammalian embryos,
28+
skeletal muscle expresses myosin heavy chain-embryonic (MyHC-emb, encoded by the
29+
MYH3), which regulates skeletal muscle development. PMID:7196501|PMID:32094117.'
30+
to CL:0009097
31+
- create edge CL:0009097 rdfs:subClassOf CL:0008002
32+
- create edge CL:0009097 RO:0002202 CL:0000515
33+
- create edge CL:0009097 RO:0002495 UBERON:0000068
34+
- create node CL:0009098 'neonatal skeletal muscle fiber'
35+
- add definition 'A skeletal muscle fiber found at the fetal and neonatal stages.
36+
In mammalian fetuses and neonates, skeletal muscle expresses myosin heavy chain-neonatal
37+
(MyHC-neo, encoded by the MYH8 gene). This expression disappears shortly after birth
38+
and is replaced by expression of adult heavy chain myosins. PMID:7196501|PMID:26180627.'
39+
to CL:0009098
40+
- create edge CL:0009098 rdfs:subClassOf CL:0008002
41+
- create edge CL:0009098 RO:0002202 CL:0000515
42+
- create edge CL:0009098 RO:0002495 UBERON:0007221
43+
pr1242:
44+
- rename CL:0011003 'magnocellular neurosecretory cell' to 'oxytocin-secreting magnocellular
45+
neurosecretory cell'
46+
- rename CL:0011112 'magnocellular neurosecretory cell' to 'vasopressin-secreting
47+
magnocellular neurosecretory cell'
48+
- create node CL:0012000 'magnocellular neurosecretory cell'
49+
- create edge CL:0012000 rdfs:subClassOf CL:0011003
50+
- create edge CL:0012000 rdfs:subClassOf CL:0011112
1951
pr639:
2052
- create edge CL:0000451 is_a CL:0000145
2153
- create edge CL:0000235 is_a CL:0000145
Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,25 @@
11
pr3318:
2-
- change definition of UBERON:0002336 to 'White matter structure containing massive
3-
numbers of commissural fibers connecting cortical areas in the two cerebral hemispheres.
4-
It is subdivided into a genu, a rostrum, a body, and a splenium. (MM).'
5-
pr3318:
62
- change definition of UBERON:0002336 from 'White matter structure containing massive
73
numbers of commissural fibers connecting cortical areas in the two cerebral hemispheres.it
84
is subdivided into a genu, a rostrum, a body, and a splenium. (MM).' to 'White matter
95
structure containing massive numbers of commissural fibers connecting cortical areas
106
in the two cerebral hemispheres. It is subdivided into a genu, a rostrum, a body,
117
and a splenium. (MM).'
8+
pr3318:
9+
- change definition of UBERON:0002336 to 'White matter structure containing massive
10+
numbers of commissural fibers connecting cortical areas in the two cerebral hemispheres.
11+
It is subdivided into a genu, a rostrum, a body, and a splenium. (MM).'
1212
pr3204:
1313
- change definition of UBERON:0002113 from 'A paired organ of the urinary tract which
1414
has the production of urine as its primary function' to 'A paired organ of the urinary
1515
tract that produces urine and maintains bodily fluid homeostasis, blood pressure,
1616
pH levels, red blood cell production and bone health.'
17-
pr3106: []
17+
pr3106:
18+
- change relationship between <vasculature> and <entity> from <branching_part_of>
19+
to <connecting_branch_of>
20+
- rename <connecting_branch_of> to 'branches_from'
1821
pr3060: []
19-
pr3050:
20-
- create edge UBERON:0000949 rdfs:subClassOf UBERON:0015203
22+
pr3050: []
2123
pr3048: []
2224
pr2982: []
2325
pr2793: []
@@ -31,7 +33,4 @@ pr1995:
3133
consists of hepatocytes radiating outward from a hepatic vein.'
3234
- change definition of UBERON:0006729 to 'The extravascular space in the liver between
3335
the parenchymal cells and a sinusoid that contains the blood plasma.'
34-
pr1995:
35-
- change definition of UBERON:0002926 from 'A epithelium...' to 'An epithelium...'
36-
- change definition of UBERON:0012251 from 'A epithelium...' to 'An epithelium...'
37-
- change definition of UBERON:0015807 from 'A epithelium...' to 'An epithelium...'
36+
pr1995: []

src/llm_change_agent/evaluations/output/openai/gpt-4o-2024-08-06/pato_ontology_pato.yaml

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,22 @@ pr427: []
22
pr405: []
33
pr396: []
44
pr396: []
5+
pr396:
6+
- create node <http://example.org/untufted_pyramidal_morphology> 'untufted pyramidal
7+
morphology'
8+
- add definition 'A pyramidal cell morphology that inheres in neurons which lack a
9+
clear tuft formation but extend to large radial distances. PMID:30715238' to <http://example.org/untufted_pyramidal_morphology>
10+
- create edge <http://example.org/untufted_pyramidal_morphology> rdfs:subClassOf <http://example.org/pyramidal_morphology>
511
pr396: []
612
pr396: []
713
pr396: []
8-
pr396: []
9-
pr396: []
10-
pr328:
11-
- add definition 'Existing or functioning as though broken into separate parts; disorganized;
12-
disunified.' to PATO:0001509
13-
- deepen PATO:0001509 from PATO:0000141 to PATO:0000141
14+
pr396:
15+
- create node <standard_pyramidal_morphology> 'standard pyramidal morphology'
16+
- create exact synonym 'vertical pyramidal morphology' for <standard_pyramidal_morphology>
17+
- create exact synonym 'pyramidal morphology' for <standard_pyramidal_morphology>
18+
- add definition 'A pyramidal cell morphology that inheres in neurons which have a
19+
standard pyramidal shape with its apical dendrite aligned vertically.' to <standard_pyramidal_morphology>
20+
- create edge <standard_pyramidal_morphology> rdfs:subClassOf <pyramidal_morphology>
21+
- create edge <standard_pyramidal_morphology> owl:disjointWith <horizontal_pyramidal_morphology>
22+
pr328: []
1423
pr270: []

0 commit comments

Comments
 (0)