Skip to content

DON'T MERGE YET: update: API calls to pass API key via auth headers (All routes including evaluation routes) #78

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions src/e2etests/judgment_client_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,11 @@ def test_fetch_traces_by_time_period(self, client: JudgmentClient):
for hours in [1, 3, 6, 12, 24, 72, 168]:
response = requests.post(
f"{SERVER_URL}/traces/fetch_by_time_period/",
json={"hours": hours, "judgment_api_key": API_KEY}
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {API_KEY}"
},
json={"hours": hours}
)
assert response.status_code == 200
data = response.json()
Expand All @@ -388,17 +392,24 @@ def test_fetch_traces_invalid_period(self, client: JudgmentClient):
for hours in [0, 2, 4]:
response = requests.post(
f"{SERVER_URL}/traces/fetch_by_time_period/",
json={"hours": hours, "judgment_api_key": API_KEY}
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {API_KEY}"
},
json={"hours": hours}
)
assert response.status_code == 400

def test_fetch_traces_missing_api_key(self, client: JudgmentClient):
"""Test missing API key scenario."""
response = requests.post(
f"{SERVER_URL}/traces/fetch_by_time_period/",
headers={
"Content-Type": "application/json",
},
json={"hours": 12}
)
assert response.status_code == 422
assert response.status_code in [401, 403]

@pytest.mark.skipif(not os.getenv("GOOGLE_APPLICATION_CREDENTIALS"),
reason="VertexAI credentials not configured")
Expand Down
11 changes: 10 additions & 1 deletion src/e2etests/test_tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import time
import asyncio
from typing import List
import pytest

# Third-party imports
from openai import OpenAI
Expand Down Expand Up @@ -136,9 +137,17 @@ def trace_manager_client():
async def test_token_counting(trace_manager_client):
input = "Write a poem about Nissan R32 GTR"

@pytest.fixture
def test_input():
"""Fixture providing default test input"""
return "What if these shoes don't fit?"

@pytest.mark.asyncio
async def test_evaluation_mixed(test_input):
PROJECT_NAME = "TestingPoemBot"
print(f"Using test input: {test_input}")
with judgment.trace("Use-claude-hehexd123", project_name=PROJECT_NAME, overwrite=True) as trace:
upper = await make_upper(input)
upper = await make_upper(test_input)
result = await make_poem(upper)
await answer_user_question("What if these shoes don't fit?")

Expand Down
27 changes: 25 additions & 2 deletions src/judgeval/common/tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,10 +199,11 @@ def fetch_trace(self, trace_id: str):
JUDGMENT_TRACES_FETCH_API_URL,
json={
"trace_id": trace_id,
"judgment_api_key": self.judgment_api_key,
# "judgment_api_key": self.judgment_api_key,
},
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.judgment_api_key}"
}
)

Expand All @@ -225,6 +226,7 @@ def save_trace(self, trace_data: dict, empty_save: bool):
json=trace_data,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.judgment_api_key}"
}
)

Expand All @@ -248,6 +250,7 @@ def delete_trace(self, trace_id: str):
},
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.judgment_api_key}"
}
)

Expand All @@ -263,11 +266,12 @@ def delete_traces(self, trace_ids: List[str]):
response = requests.delete(
JUDGMENT_TRACES_DELETE_API_URL,
json={
"judgment_api_key": self.judgment_api_key,
# "judgment_api_key": self.judgment_api_key,
"trace_ids": trace_ids,
},
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.judgment_api_key}"
}
)

Expand Down Expand Up @@ -576,6 +580,25 @@ def save(self, empty_save: bool = False, overwrite: bool = False) -> Tuple[str,

self.trace_manager_client.save_trace(trace_data, empty_save)


# Save trace data by making POST request to API
response = requests.post(
JUDGMENT_TRACES_SAVE_API_URL,
json=trace_data,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.tracer.api_key}" # Bearer token format
}
)

if response.status_code == HTTPStatus.BAD_REQUEST:
raise ValueError(f"Failed to save trace data: Check your Trace name for conflicts, set overwrite=True to overwrite existing traces: {response.text}")
elif response.status_code != HTTPStatus.OK:
raise ValueError(f"Failed to save trace data: {response.text}")

if not empty_save and "ui_results_url" in response.json():
rprint(f"\nπŸ” You can view your trace data here: [rgb(106,0,255)]{response.json()['ui_results_url']}[/]\n")

return self.trace_id, trace_data

def delete(self):
Expand Down
24 changes: 18 additions & 6 deletions src/judgeval/data/datasets/eval_dataset_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,16 @@ def push(self, dataset: EvalDataset, alias: str,overwrite: Optional[bool] = Fals
"ground_truths": [g.to_dict() for g in dataset.ground_truths],
"examples": [e.to_dict() for e in dataset.examples],
"overwrite": overwrite,
"judgment_api_key": dataset.judgment_api_key
# "judgment_api_key": dataset.judgment_api_key
}
try:
response = requests.post(
JUDGMENT_DATASETS_PUSH_API_URL,
json=content
json=content,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.judgment_api_key}"
}
)
if response.status_code == 500:
error(f"Server error during push: {content.get('message')}")
Expand Down Expand Up @@ -115,13 +119,17 @@ def pull(self, alias: str) -> EvalDataset:
)
request_body = {
"alias": alias,
"judgment_api_key": self.judgment_api_key
# "judgment_api_key": self.judgment_api_key
}

try:
response = requests.post(
JUDGMENT_DATASETS_PULL_API_URL,
json=request_body
json=request_body,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.judgment_api_key}"
}
)
response.raise_for_status()
except requests.exceptions.RequestException as e:
Expand Down Expand Up @@ -169,13 +177,17 @@ def pull_all_user_dataset_stats(self) -> dict:
total=100,
)
request_body = {
"judgment_api_key": self.judgment_api_key
# "judgment_api_key": self.judgment_api_key
}

try:
response = requests.post(
JUDGMENT_DATASETS_PULL_ALL_API_URL,
json=request_body
json=request_body,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.judgment_api_key}"
}
)
response.raise_for_status()
except requests.exceptions.RequestException as e:
Expand Down
28 changes: 23 additions & 5 deletions src/judgeval/judgment_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,10 @@ def pull_eval(self, project_name: str, eval_run_name: str) -> List[Dict[str, Uni
eval_name=eval_run_name,
judgment_api_key=self.judgment_api_key)
eval_run = requests.post(JUDGMENT_EVAL_FETCH_API_URL,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.judgment_api_key}"
},
json=eval_run_request_body.model_dump())
if eval_run.status_code != requests.codes.ok:
raise ValueError(f"Error fetching eval results: {eval_run.json()}")
Expand Down Expand Up @@ -213,6 +217,7 @@ def delete_eval(self, project_name: str, eval_run_name: str) -> bool:
json=eval_run_request_body.model_dump(),
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.judgment_api_key}"
})
if response.status_code != requests.codes.ok:
raise ValueError(f"Error deleting eval results: {response.json()}")
Expand All @@ -235,6 +240,7 @@ def delete_project_evals(self, project_name: str) -> bool:
},
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.judgment_api_key}"
})
if response.status_code != requests.codes.ok:
raise ValueError(f"Error deleting eval results: {response.json()}")
Expand All @@ -246,7 +252,11 @@ def _validate_api_key(self):
"""
response = requests.post(
f"{ROOT_API}/validate_api_key/",
json={"api_key": self.judgment_api_key}
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.judgment_api_key}",
},
json={} # Empty body now
)
if response.status_code == 200:
return True, response.json()
Expand All @@ -268,12 +278,16 @@ def fetch_classifier_scorer(self, slug: str) -> ClassifierScorer:
"""
request_body = {
"slug": slug,
"judgment_api_key": self.judgment_api_key
# "judgment_api_key": self.judgment_api_key
}

response = requests.post(
f"{ROOT_API}/fetch_scorer/",
json=request_body
json=request_body,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.judgment_api_key}"
}
)

if response.status_code == 500:
Expand Down Expand Up @@ -306,13 +320,17 @@ def push_classifier_scorer(self, scorer: ClassifierScorer, slug: str = None) ->
"name": scorer.name,
"conversation": scorer.conversation,
"options": scorer.options,
"judgment_api_key": self.judgment_api_key,
# "judgment_api_key": self.judgment_api_key,
"slug": slug
}

response = requests.post(
f"{ROOT_API}/save_scorer/",
json=request_body
json=request_body,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.judgment_api_key}"
}
)

if response.status_code == 500:
Expand Down
15 changes: 14 additions & 1 deletion src/judgeval/run_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,12 @@ def execute_api_eval(evaluation_run: EvaluationRun) -> List[Dict]:
try:
# submit API request to execute evals
payload = evaluation_run.model_dump(warnings=False)
response = requests.post(JUDGMENT_EVAL_API_URL, json=payload)
response = requests.post(
JUDGMENT_EVAL_API_URL, headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {evaluation_run.judgment_api_key}"
},
json=payload)
response_data = response.json()
except Exception as e:
error(f"Error: {e}")
Expand Down Expand Up @@ -151,6 +156,10 @@ def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_k
try:
response = requests.post(
f"{ROOT_API}/eval-run-name-exists/",
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {judgment_api_key}"
},
json={
"eval_name": eval_name,
"project_name": project_name,
Expand Down Expand Up @@ -188,6 +197,10 @@ def log_evaluation_results(merged_results: List[ScoringResult], evaluation_run:
try:
res = requests.post(
JUDGMENT_EVAL_LOG_API_URL,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {evaluation_run.judgment_api_key}"
},
json={
"results": [result.to_dict() for result in merged_results],
"judgment_api_key": evaluation_run.judgment_api_key,
Expand Down