JudgmentLabs · JCamyre · Mar 1, 2025 · Feb 28, 2025 · Feb 28, 2025
diff --git a/src/e2etests/judgment_client_test.py b/src/e2etests/judgment_client_test.py
@@ -37,6 +37,7 @@
 # Constants
 SERVER_URL = os.getenv("JUDGMENT_API_URL", "http://localhost:8000")
 API_KEY = os.getenv("JUDGMENT_API_KEY")
+ORGANIZATION_ID = os.getenv("ORGANIZATION_ID")
 
 if not API_KEY:
     pytest.skip("JUDGMENT_API_KEY not set", allow_module_level=True)
@@ -45,7 +46,7 @@
 @pytest.fixture(scope="session")
 def client() -> JudgmentClient:
     """Create a single JudgmentClient instance for all tests."""
-    return JudgmentClient(judgment_api_key=API_KEY)
+    return JudgmentClient(judgment_api_key=API_KEY, organization_id=ORGANIZATION_ID)
 
 @pytest.fixture
 def random_name() -> str:
@@ -396,7 +397,8 @@ def test_fetch_traces_by_time_period(self, client: JudgmentClient):
                 f"{SERVER_URL}/traces/fetch_by_time_period/",
                 headers={
                     "Content-Type": "application/json",
-                    "Authorization": f"Bearer {API_KEY}"
+                    "Authorization": f"Bearer {API_KEY}",
+                    "X-Organization-Id": ORGANIZATION_ID
                 },
                 json={"hours": hours}
             )
@@ -411,7 +413,8 @@ def test_fetch_traces_invalid_period(self, client: JudgmentClient):
                 f"{SERVER_URL}/traces/fetch_by_time_period/",
                 headers={
                     "Content-Type": "application/json",
-                    "Authorization": f"Bearer {API_KEY}"
+                    "Authorization": f"Bearer {API_KEY}",
+                    "X-Organization-Id": ORGANIZATION_ID
                 },
                 json={"hours": hours}
             )
@@ -423,6 +426,7 @@ def test_fetch_traces_missing_api_key(self, client: JudgmentClient):
             f"{SERVER_URL}/traces/fetch_by_time_period/",
             headers={
                 "Content-Type": "application/json",
+                "X-Organization-Id": ORGANIZATION_ID
             },
             json={"hours": 12}
         )

diff --git a/src/e2etests/test_tracer.py b/src/e2etests/test_tracer.py
@@ -131,7 +131,7 @@ async def make_poem(input: str) -> str:
 @pytest.fixture
 def trace_manager_client():
     """Fixture to initialize TraceManagerClient."""
-    return TraceManagerClient(judgment_api_key=os.getenv("JUDGMENT_API_KEY"))
+    return TraceManagerClient(judgment_api_key=os.getenv("JUDGMENT_API_KEY"), organization_id=os.getenv("ORGANIZATION_ID"))
 
 @pytest.mark.asyncio
 async def test_token_counting(trace_manager_client):
@@ -188,11 +188,11 @@ async def test_trace_delete(trace_manager_client):
 
 @pytest.mark.asyncio
 async def test_trace_delete_batch(trace_manager_client):
-    with judgment.trace("TEST_RUN", project_name="TEST", overwrite=True) as trace:
+    with judgment.trace("TEST_RUN2", project_name="TEST", overwrite=True) as trace:
         pass
     trace.save()
 
-    with judgment.trace("TEST_RUN2", project_name="TEST2", overwrite=True) as trace2:
+    with judgment.trace("TEST_RUN3", project_name="TEST2", overwrite=True) as trace2:
         pass
     trace2.save()
 
@@ -221,7 +221,7 @@ async def run_selected_tests(test_names: list[str]):
         test_names (list[str]): List of test function names to run (without 'test_' prefix)
     """
 
-    trace_manager_client = TraceManagerClient(judgment_api_key=os.getenv("JUDGMENT_API_KEY"))
+    trace_manager_client = TraceManagerClient(judgment_api_key=os.getenv("JUDGMENT_API_KEY"), organization_id=os.getenv("ORGANIZATION_ID"))
     print("Client initialized successfully")
     print("*" * 40)
 

diff --git a/src/judgeval/common/tracer.py b/src/judgeval/common/tracer.py
@@ -188,8 +188,9 @@ class TraceManagerClient:
     - Saving a trace
     - Deleting a trace
     """
-    def __init__(self, judgment_api_key: str):
+    def __init__(self, judgment_api_key: str, organization_id: str):
         self.judgment_api_key = judgment_api_key
+        self.organization_id = organization_id
 
     def fetch_trace(self, trace_id: str):
         """
@@ -199,11 +200,11 @@ def fetch_trace(self, trace_id: str):
             JUDGMENT_TRACES_FETCH_API_URL,
             json={
                 "trace_id": trace_id,
-                # "judgment_api_key": self.judgment_api_key,
             },
             headers={
                 "Content-Type": "application/json",
-                "Authorization": f"Bearer {self.judgment_api_key}"
+                "Authorization": f"Bearer {self.judgment_api_key}",
+                "X-Organization-Id": self.organization_id
             }
         )
 
@@ -250,7 +251,8 @@ def delete_trace(self, trace_id: str):
             },
             headers={
                 "Content-Type": "application/json",
-                "Authorization": f"Bearer {self.judgment_api_key}"
+                "Authorization": f"Bearer {self.judgment_api_key}",
+                "X-Organization-Id": self.organization_id
             }
         )
 
@@ -266,12 +268,12 @@ def delete_traces(self, trace_ids: List[str]):
         response = requests.delete(
             JUDGMENT_TRACES_DELETE_API_URL,
             json={
-                # "judgment_api_key": self.judgment_api_key,
                 "trace_ids": trace_ids,
             },
             headers={
                 "Content-Type": "application/json",
-                "Authorization": f"Bearer {self.judgment_api_key}"
+                "Authorization": f"Bearer {self.judgment_api_key}",
+                "X-Organization-Id": self.organization_id
             }
         )
 
@@ -294,7 +296,7 @@ def __init__(self, tracer, trace_id: str, name: str, project_name: str = "defaul
         self.span_type = None
         self._current_span: Optional[TraceEntry] = None
         self.overwrite = overwrite
-        self.trace_manager_client = TraceManagerClient(tracer.api_key)  # Manages DB operations for trace data
+        self.trace_manager_client = TraceManagerClient(tracer.api_key, tracer.organization_id)  # Manages DB operations for trace data
 
     @contextmanager
     def span(self, name: str, span_type: SpanType = "span"):
@@ -371,6 +373,7 @@ def async_evaluate(
             raise ValueError(f"Failed to load scorers: {str(e)}")
 
         eval_run = EvaluationRun(
+            organization_id=self.tracer.organization_id,
             log_results=log_results,
             project_name=self.project_name,
             eval_name=f"{self.name.capitalize()}-"
@@ -546,7 +549,8 @@ def save(self, empty_save: bool = False, overwrite: bool = False) -> Tuple[str,
         # Create trace document
         trace_data = {
             "trace_id": self.trace_id,
-            "api_key": self.tracer.api_key,
+            "judgment_api_key": self.tracer.api_key,
+            "organization_id": os.getenv("ORGANIZATION_ID"),
             "name": self.name,
             "project_name": self.project_name,
             "created_at": datetime.fromtimestamp(self.start_time).isoformat(),
@@ -612,14 +616,18 @@ def __new__(cls, *args, **kwargs):
             cls._instance = super(Tracer, cls).__new__(cls)
         return cls._instance
 
-    def __init__(self, api_key: str = os.getenv("JUDGMENT_API_KEY"), project_name: str = "default_project"):
+    def __init__(self, api_key: str = os.getenv("JUDGMENT_API_KEY"), project_name: str = "default_project", organization_id: str = os.getenv("ORGANIZATION_ID")):
         if not hasattr(self, 'initialized'):
             if not api_key:
                 raise ValueError("Tracer must be configured with a Judgment API key")
 
+            if not organization_id:
+                raise ValueError("Tracer must be configured with an Organization ID")
+
             self.api_key: str = api_key
             self.project_name: str = project_name
             self.client: JudgmentClient = JudgmentClient(judgment_api_key=api_key)
+            self.organization_id: str = organization_id
             self.depth: int = 0
             self._current_trace: Optional[str] = None
             self.initialized: bool = True

diff --git a/src/judgeval/data/datasets/dataset.py b/src/judgeval/data/datasets/dataset.py
@@ -17,9 +17,10 @@ class EvalDataset:
     _alias: Union[str, None] = field(default=None)
     _id: Union[str, None] = field(default=None)
     judgment_api_key: str = field(default="")
-
+    organization_id: str = field(default="")
     def __init__(self, 
                  judgment_api_key: str = os.getenv("JUDGMENT_API_KEY"),  
+                 organization_id: str = os.getenv("ORGANIZATION_ID"),
                  ground_truths: List[GroundTruthExample] = [], 
                  examples: List[Example] = [],
                  ):
@@ -31,7 +32,7 @@ def __init__(self,
         self._alias = None
         self._id = None
         self.judgment_api_key = judgment_api_key
-
+        self.organization_id = organization_id
 
     def add_from_json(self, file_path: str) -> None:
         debug(f"Loading dataset from JSON file: {file_path}")

diff --git a/src/judgeval/data/datasets/eval_dataset_client.py b/src/judgeval/data/datasets/eval_dataset_client.py
@@ -18,8 +18,9 @@
 
 
 class EvalDatasetClient:
-    def __init__(self, judgment_api_key: str):
+    def __init__(self, judgment_api_key: str, organization_id: str):
         self.judgment_api_key = judgment_api_key
+        self.organization_id = organization_id
 
     def create_dataset(self) -> EvalDataset:
         return EvalDataset(judgment_api_key=self.judgment_api_key)
@@ -57,15 +58,15 @@ def push(self, dataset: EvalDataset, alias: str, overwrite: Optional[bool] = Fal
                     "ground_truths": [g.to_dict() for g in dataset.ground_truths],
                     "examples": [e.to_dict() for e in dataset.examples],
                     "overwrite": overwrite,
-                    # "judgment_api_key": dataset.judgment_api_key
                 }
             try:
                 response = requests.post(
                     JUDGMENT_DATASETS_PUSH_API_URL, 
                     json=content,
                     headers={
                         "Content-Type": "application/json",
-                        "Authorization": f"Bearer {self.judgment_api_key}"
+                        "Authorization": f"Bearer {self.judgment_api_key}",
+                        "X-Organization-Id": self.organization_id
                     }
                 )
                 if response.status_code == 500:
@@ -120,7 +121,6 @@ def pull(self, alias: str) -> EvalDataset:
                 )
                 request_body = {
                     "alias": alias,
-                    # "judgment_api_key": self.judgment_api_key
                 }
 
                 try:
@@ -129,7 +129,8 @@ def pull(self, alias: str) -> EvalDataset:
                         json=request_body,
                         headers={
                             "Content-Type": "application/json",
-                            "Authorization": f"Bearer {self.judgment_api_key}"
+                            "Authorization": f"Bearer {self.judgment_api_key}",
+                            "X-Organization-Id": self.organization_id
                         }
                     )
                     response.raise_for_status()
@@ -178,7 +179,6 @@ def pull_all_user_dataset_stats(self) -> dict:
                     total=100,
                 )
                 request_body = {
-                    # "judgment_api_key": self.judgment_api_key
                 }
 
                 try:
@@ -187,7 +187,8 @@ def pull_all_user_dataset_stats(self) -> dict:
                         json=request_body,
                         headers={
                             "Content-Type": "application/json",
-                            "Authorization": f"Bearer {self.judgment_api_key}"
+                            "Authorization": f"Bearer {self.judgment_api_key}",
+                            "X-Organization-Id": self.organization_id
                         }
                     )
                     response.raise_for_status()

diff --git a/src/judgeval/evaluation_run.py b/src/judgeval/evaluation_run.py
@@ -24,6 +24,7 @@ class EvaluationRun(BaseModel):
 
     # The user will specify whether they want log_results when they call run_eval
     log_results: bool = False  # NOTE: log_results has to be set first because it is used to validate project_name and eval_name
+    organization_id: Optional[str] = None
     project_name: Optional[str] = None
     eval_name: Optional[str] = None
     examples: List[Example]

diff --git a/src/judgeval/judgment_client.py b/src/judgeval/judgment_client.py
@@ -34,9 +34,10 @@ class EvalRunRequestBody(BaseModel):
 
 
 class JudgmentClient:
-    def __init__(self, judgment_api_key: str = os.getenv("JUDGMENT_API_KEY")):
+    def __init__(self, judgment_api_key: str = os.getenv("JUDGMENT_API_KEY"), organization_id: str = os.getenv("ORGANIZATION_ID")):
         self.judgment_api_key = judgment_api_key
-        self.eval_dataset_client = EvalDatasetClient(judgment_api_key)
+        self.organization_id = organization_id
+        self.eval_dataset_client = EvalDatasetClient(judgment_api_key, organization_id)
 
         # Verify API key is valid
         result, response = self._validate_api_key()
@@ -78,7 +79,8 @@ def run_evaluation(
                 model=model,
                 aggregator=aggregator,
                 metadata=metadata,
-                judgment_api_key=self.judgment_api_key
+                judgment_api_key=self.judgment_api_key, 
+                organization_id=self.organization_id
             )
             return run_eval(eval, override)
         except ValueError as e:
@@ -115,7 +117,8 @@ def evaluate_dataset(
                 model=model,
                 aggregator=aggregator,
                 metadata=metadata,
-                judgment_api_key=self.judgment_api_key
+                judgment_api_key=self.judgment_api_key,
+                organization_id=self.organization_id
             )
             return run_eval(evaluation_run)
         except ValueError as e:
@@ -189,7 +192,8 @@ def pull_eval(self, project_name: str, eval_run_name: str) -> List[Dict[str, Uni
         eval_run = requests.post(JUDGMENT_EVAL_FETCH_API_URL,
                                  headers={
                                     "Content-Type": "application/json",
-                                    "Authorization": f"Bearer {self.judgment_api_key}"
+                                    "Authorization": f"Bearer {self.judgment_api_key}",
+                                    "X-Organization-Id": self.organization_id
                                  },
                                  json=eval_run_request_body.model_dump())
         if eval_run.status_code != requests.codes.ok:
@@ -222,7 +226,8 @@ def delete_eval(self, project_name: str, eval_run_name: str) -> bool:
                         json=eval_run_request_body.model_dump(),
                         headers={
                             "Content-Type": "application/json",
-                            "Authorization": f"Bearer {self.judgment_api_key}"
+                            "Authorization": f"Bearer {self.judgment_api_key}",
+                            "X-Organization-Id": self.organization_id
                         })
         if response.status_code != requests.codes.ok:
             raise ValueError(f"Error deleting eval results: {response.json()}")
@@ -241,11 +246,12 @@ def delete_project_evals(self, project_name: str) -> bool:
         response = requests.delete(JUDGMENT_EVAL_DELETE_PROJECT_API_URL, 
                         json={
                             "project_name": project_name,
-                            "judgment_api_key": self.judgment_api_key
+                            "judgment_api_key": self.judgment_api_key,
                         },
                         headers={
                             "Content-Type": "application/json",
-                            "Authorization": f"Bearer {self.judgment_api_key}"
+                            "Authorization": f"Bearer {self.judgment_api_key}",
+                            "X-Organization-Id": self.organization_id
                         })
         if response.status_code != requests.codes.ok:
             raise ValueError(f"Error deleting eval results: {response.json()}")
@@ -283,15 +289,15 @@ def fetch_classifier_scorer(self, slug: str) -> ClassifierScorer:
         """
         request_body = {
             "slug": slug,
-            # "judgment_api_key": self.judgment_api_key
         }
 
         response = requests.post(
             f"{ROOT_API}/fetch_scorer/",
             json=request_body,
             headers={
                 "Content-Type": "application/json",
-                "Authorization": f"Bearer {self.judgment_api_key}"
+                "Authorization": f"Bearer {self.judgment_api_key}",
+                "X-Organization-Id": self.organization_id
             }
         )
 
@@ -325,7 +331,6 @@ def push_classifier_scorer(self, scorer: ClassifierScorer, slug: str = None) ->
             "name": scorer.name,
             "conversation": scorer.conversation,
             "options": scorer.options,
-            # "judgment_api_key": self.judgment_api_key, 
             "slug": slug
         }
 
@@ -334,7 +339,8 @@ def push_classifier_scorer(self, scorer: ClassifierScorer, slug: str = None) ->
             json=request_body,
             headers={
                 "Content-Type": "application/json",
-                "Authorization": f"Bearer {self.judgment_api_key}"
+                "Authorization": f"Bearer {self.judgment_api_key}",
+                "X-Organization-Id": self.organization_id
             }
         )