Skip to content

Organization Integration #86

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions src/e2etests/judgment_client_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
# Constants
SERVER_URL = os.getenv("JUDGMENT_API_URL", "http://localhost:8000")
API_KEY = os.getenv("JUDGMENT_API_KEY")
ORGANIZATION_ID = os.getenv("ORGANIZATION_ID")

if not API_KEY:
pytest.skip("JUDGMENT_API_KEY not set", allow_module_level=True)
Expand All @@ -45,7 +46,7 @@
@pytest.fixture(scope="session")
def client() -> JudgmentClient:
"""Create a single JudgmentClient instance for all tests."""
return JudgmentClient(judgment_api_key=API_KEY)
return JudgmentClient(judgment_api_key=API_KEY, organization_id=ORGANIZATION_ID)

@pytest.fixture
def random_name() -> str:
Expand Down Expand Up @@ -396,7 +397,8 @@ def test_fetch_traces_by_time_period(self, client: JudgmentClient):
f"{SERVER_URL}/traces/fetch_by_time_period/",
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {API_KEY}"
"Authorization": f"Bearer {API_KEY}",
"X-Organization-Id": ORGANIZATION_ID
},
json={"hours": hours}
)
Expand All @@ -411,7 +413,8 @@ def test_fetch_traces_invalid_period(self, client: JudgmentClient):
f"{SERVER_URL}/traces/fetch_by_time_period/",
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {API_KEY}"
"Authorization": f"Bearer {API_KEY}",
"X-Organization-Id": ORGANIZATION_ID
},
json={"hours": hours}
)
Expand All @@ -423,6 +426,7 @@ def test_fetch_traces_missing_api_key(self, client: JudgmentClient):
f"{SERVER_URL}/traces/fetch_by_time_period/",
headers={
"Content-Type": "application/json",
"X-Organization-Id": ORGANIZATION_ID
},
json={"hours": 12}
)
Expand Down
8 changes: 4 additions & 4 deletions src/e2etests/test_tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ async def make_poem(input: str) -> str:
@pytest.fixture
def trace_manager_client():
"""Fixture to initialize TraceManagerClient."""
return TraceManagerClient(judgment_api_key=os.getenv("JUDGMENT_API_KEY"))
return TraceManagerClient(judgment_api_key=os.getenv("JUDGMENT_API_KEY"), organization_id=os.getenv("ORGANIZATION_ID"))

@pytest.mark.asyncio
async def test_token_counting(trace_manager_client):
Expand Down Expand Up @@ -188,11 +188,11 @@ async def test_trace_delete(trace_manager_client):

@pytest.mark.asyncio
async def test_trace_delete_batch(trace_manager_client):
with judgment.trace("TEST_RUN", project_name="TEST", overwrite=True) as trace:
with judgment.trace("TEST_RUN2", project_name="TEST", overwrite=True) as trace:
pass
trace.save()

with judgment.trace("TEST_RUN2", project_name="TEST2", overwrite=True) as trace2:
with judgment.trace("TEST_RUN3", project_name="TEST2", overwrite=True) as trace2:
pass
trace2.save()

Expand Down Expand Up @@ -221,7 +221,7 @@ async def run_selected_tests(test_names: list[str]):
test_names (list[str]): List of test function names to run (without 'test_' prefix)
"""

trace_manager_client = TraceManagerClient(judgment_api_key=os.getenv("JUDGMENT_API_KEY"))
trace_manager_client = TraceManagerClient(judgment_api_key=os.getenv("JUDGMENT_API_KEY"), organization_id=os.getenv("ORGANIZATION_ID"))
print("Client initialized successfully")
print("*" * 40)

Expand Down
26 changes: 17 additions & 9 deletions src/judgeval/common/tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,9 @@ class TraceManagerClient:
- Saving a trace
- Deleting a trace
"""
def __init__(self, judgment_api_key: str):
def __init__(self, judgment_api_key: str, organization_id: str):
self.judgment_api_key = judgment_api_key
self.organization_id = organization_id

def fetch_trace(self, trace_id: str):
"""
Expand All @@ -199,11 +200,11 @@ def fetch_trace(self, trace_id: str):
JUDGMENT_TRACES_FETCH_API_URL,
json={
"trace_id": trace_id,
# "judgment_api_key": self.judgment_api_key,
},
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.judgment_api_key}"
"Authorization": f"Bearer {self.judgment_api_key}",
"X-Organization-Id": self.organization_id
}
)

Expand Down Expand Up @@ -250,7 +251,8 @@ def delete_trace(self, trace_id: str):
},
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.judgment_api_key}"
"Authorization": f"Bearer {self.judgment_api_key}",
"X-Organization-Id": self.organization_id
}
)

Expand All @@ -266,12 +268,12 @@ def delete_traces(self, trace_ids: List[str]):
response = requests.delete(
JUDGMENT_TRACES_DELETE_API_URL,
json={
# "judgment_api_key": self.judgment_api_key,
"trace_ids": trace_ids,
},
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.judgment_api_key}"
"Authorization": f"Bearer {self.judgment_api_key}",
"X-Organization-Id": self.organization_id
}
)

Expand All @@ -294,7 +296,7 @@ def __init__(self, tracer, trace_id: str, name: str, project_name: str = "defaul
self.span_type = None
self._current_span: Optional[TraceEntry] = None
self.overwrite = overwrite
self.trace_manager_client = TraceManagerClient(tracer.api_key) # Manages DB operations for trace data
self.trace_manager_client = TraceManagerClient(tracer.api_key, tracer.organization_id) # Manages DB operations for trace data

@contextmanager
def span(self, name: str, span_type: SpanType = "span"):
Expand Down Expand Up @@ -371,6 +373,7 @@ def async_evaluate(
raise ValueError(f"Failed to load scorers: {str(e)}")

eval_run = EvaluationRun(
organization_id=self.tracer.organization_id,
log_results=log_results,
project_name=self.project_name,
eval_name=f"{self.name.capitalize()}-"
Expand Down Expand Up @@ -546,7 +549,8 @@ def save(self, empty_save: bool = False, overwrite: bool = False) -> Tuple[str,
# Create trace document
trace_data = {
"trace_id": self.trace_id,
"api_key": self.tracer.api_key,
"judgment_api_key": self.tracer.api_key,
"organization_id": os.getenv("ORGANIZATION_ID"),
"name": self.name,
"project_name": self.project_name,
"created_at": datetime.fromtimestamp(self.start_time).isoformat(),
Expand Down Expand Up @@ -612,14 +616,18 @@ def __new__(cls, *args, **kwargs):
cls._instance = super(Tracer, cls).__new__(cls)
return cls._instance

def __init__(self, api_key: str = os.getenv("JUDGMENT_API_KEY"), project_name: str = "default_project"):
def __init__(self, api_key: str = os.getenv("JUDGMENT_API_KEY"), project_name: str = "default_project", organization_id: str = os.getenv("ORGANIZATION_ID")):
if not hasattr(self, 'initialized'):
if not api_key:
raise ValueError("Tracer must be configured with a Judgment API key")

if not organization_id:
raise ValueError("Tracer must be configured with an Organization ID")

self.api_key: str = api_key
self.project_name: str = project_name
self.client: JudgmentClient = JudgmentClient(judgment_api_key=api_key)
self.organization_id: str = organization_id
self.depth: int = 0
self._current_trace: Optional[str] = None
self.initialized: bool = True
Expand Down
5 changes: 3 additions & 2 deletions src/judgeval/data/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ class EvalDataset:
_alias: Union[str, None] = field(default=None)
_id: Union[str, None] = field(default=None)
judgment_api_key: str = field(default="")

organization_id: str = field(default="")
def __init__(self,
judgment_api_key: str = os.getenv("JUDGMENT_API_KEY"),
organization_id: str = os.getenv("ORGANIZATION_ID"),
ground_truths: List[GroundTruthExample] = [],
examples: List[Example] = [],
):
Expand All @@ -31,7 +32,7 @@ def __init__(self,
self._alias = None
self._id = None
self.judgment_api_key = judgment_api_key

self.organization_id = organization_id

def add_from_json(self, file_path: str) -> None:
debug(f"Loading dataset from JSON file: {file_path}")
Expand Down
15 changes: 8 additions & 7 deletions src/judgeval/data/datasets/eval_dataset_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@


class EvalDatasetClient:
def __init__(self, judgment_api_key: str):
def __init__(self, judgment_api_key: str, organization_id: str):
self.judgment_api_key = judgment_api_key
self.organization_id = organization_id

def create_dataset(self) -> EvalDataset:
return EvalDataset(judgment_api_key=self.judgment_api_key)
Expand Down Expand Up @@ -57,15 +58,15 @@ def push(self, dataset: EvalDataset, alias: str, overwrite: Optional[bool] = Fal
"ground_truths": [g.to_dict() for g in dataset.ground_truths],
"examples": [e.to_dict() for e in dataset.examples],
"overwrite": overwrite,
# "judgment_api_key": dataset.judgment_api_key
}
try:
response = requests.post(
JUDGMENT_DATASETS_PUSH_API_URL,
json=content,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.judgment_api_key}"
"Authorization": f"Bearer {self.judgment_api_key}",
"X-Organization-Id": self.organization_id
}
)
if response.status_code == 500:
Expand Down Expand Up @@ -120,7 +121,6 @@ def pull(self, alias: str) -> EvalDataset:
)
request_body = {
"alias": alias,
# "judgment_api_key": self.judgment_api_key
}

try:
Expand All @@ -129,7 +129,8 @@ def pull(self, alias: str) -> EvalDataset:
json=request_body,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.judgment_api_key}"
"Authorization": f"Bearer {self.judgment_api_key}",
"X-Organization-Id": self.organization_id
}
)
response.raise_for_status()
Expand Down Expand Up @@ -178,7 +179,6 @@ def pull_all_user_dataset_stats(self) -> dict:
total=100,
)
request_body = {
# "judgment_api_key": self.judgment_api_key
}

try:
Expand All @@ -187,7 +187,8 @@ def pull_all_user_dataset_stats(self) -> dict:
json=request_body,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.judgment_api_key}"
"Authorization": f"Bearer {self.judgment_api_key}",
"X-Organization-Id": self.organization_id
}
)
response.raise_for_status()
Expand Down
1 change: 1 addition & 0 deletions src/judgeval/evaluation_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class EvaluationRun(BaseModel):

# The user will specify whether they want log_results when they call run_eval
log_results: bool = False # NOTE: log_results has to be set first because it is used to validate project_name and eval_name
organization_id: Optional[str] = None
project_name: Optional[str] = None
eval_name: Optional[str] = None
examples: List[Example]
Expand Down
30 changes: 18 additions & 12 deletions src/judgeval/judgment_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@ class EvalRunRequestBody(BaseModel):


class JudgmentClient:
def __init__(self, judgment_api_key: str = os.getenv("JUDGMENT_API_KEY")):
def __init__(self, judgment_api_key: str = os.getenv("JUDGMENT_API_KEY"), organization_id: str = os.getenv("ORGANIZATION_ID")):
self.judgment_api_key = judgment_api_key
self.eval_dataset_client = EvalDatasetClient(judgment_api_key)
self.organization_id = organization_id
self.eval_dataset_client = EvalDatasetClient(judgment_api_key, organization_id)

# Verify API key is valid
result, response = self._validate_api_key()
Expand Down Expand Up @@ -78,7 +79,8 @@ def run_evaluation(
model=model,
aggregator=aggregator,
metadata=metadata,
judgment_api_key=self.judgment_api_key
judgment_api_key=self.judgment_api_key,
organization_id=self.organization_id
)
return run_eval(eval, override)
except ValueError as e:
Expand Down Expand Up @@ -115,7 +117,8 @@ def evaluate_dataset(
model=model,
aggregator=aggregator,
metadata=metadata,
judgment_api_key=self.judgment_api_key
judgment_api_key=self.judgment_api_key,
organization_id=self.organization_id
)
return run_eval(evaluation_run)
except ValueError as e:
Expand Down Expand Up @@ -189,7 +192,8 @@ def pull_eval(self, project_name: str, eval_run_name: str) -> List[Dict[str, Uni
eval_run = requests.post(JUDGMENT_EVAL_FETCH_API_URL,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.judgment_api_key}"
"Authorization": f"Bearer {self.judgment_api_key}",
"X-Organization-Id": self.organization_id
},
json=eval_run_request_body.model_dump())
if eval_run.status_code != requests.codes.ok:
Expand Down Expand Up @@ -222,7 +226,8 @@ def delete_eval(self, project_name: str, eval_run_name: str) -> bool:
json=eval_run_request_body.model_dump(),
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.judgment_api_key}"
"Authorization": f"Bearer {self.judgment_api_key}",
"X-Organization-Id": self.organization_id
})
if response.status_code != requests.codes.ok:
raise ValueError(f"Error deleting eval results: {response.json()}")
Expand All @@ -241,11 +246,12 @@ def delete_project_evals(self, project_name: str) -> bool:
response = requests.delete(JUDGMENT_EVAL_DELETE_PROJECT_API_URL,
json={
"project_name": project_name,
"judgment_api_key": self.judgment_api_key
"judgment_api_key": self.judgment_api_key,
},
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.judgment_api_key}"
"Authorization": f"Bearer {self.judgment_api_key}",
"X-Organization-Id": self.organization_id
})
if response.status_code != requests.codes.ok:
raise ValueError(f"Error deleting eval results: {response.json()}")
Expand Down Expand Up @@ -283,15 +289,15 @@ def fetch_classifier_scorer(self, slug: str) -> ClassifierScorer:
"""
request_body = {
"slug": slug,
# "judgment_api_key": self.judgment_api_key
}

response = requests.post(
f"{ROOT_API}/fetch_scorer/",
json=request_body,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.judgment_api_key}"
"Authorization": f"Bearer {self.judgment_api_key}",
"X-Organization-Id": self.organization_id
}
)

Expand Down Expand Up @@ -325,7 +331,6 @@ def push_classifier_scorer(self, scorer: ClassifierScorer, slug: str = None) ->
"name": scorer.name,
"conversation": scorer.conversation,
"options": scorer.options,
# "judgment_api_key": self.judgment_api_key,
"slug": slug
}

Expand All @@ -334,7 +339,8 @@ def push_classifier_scorer(self, scorer: ClassifierScorer, slug: str = None) ->
json=request_body,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.judgment_api_key}"
"Authorization": f"Bearer {self.judgment_api_key}",
"X-Organization-Id": self.organization_id
}
)

Expand Down
Loading