diff --git a/Pipfile b/Pipfile
index a0a69601..ba30d8f0 100644
--- a/Pipfile
+++ b/Pipfile
@@ -14,6 +14,8 @@ uvicorn = "*"
 deepeval = "*"
 supabase = "*"
 requests = "*"
+pandas = "*"
+anthropic = "*"
 
 [dev-packages]
 pytest = "*"
diff --git a/judgeval/constants.py b/judgeval/constants.py
index 1fc1c082..788e8347 100644
--- a/judgeval/constants.py
+++ b/judgeval/constants.py
@@ -5,7 +5,7 @@
 from enum import Enum
 import litellm
 
-class APIScorer(Enum):  
+class APIScorer(str, Enum):  
     """
     Collection of proprietary scorers implemented by Judgment.
 
@@ -20,7 +20,13 @@ class APIScorer(Enum):
     CONTEXTUAL_RELEVANCY = "contextual_relevancy"
     CONTEXTUAL_PRECISION = "contextual_precision"
     TOOL_CORRECTNESS = "tool_correctness"
-    
+
+    @classmethod
+    def _missing_(cls, value):
+        # Handle case-insensitive lookup
+        for member in cls:
+            if member.value == value.lower():
+                return member
 
 ROOT_API = "http://127.0.0.1:8000"
 # ROOT_API = "https://api.judgmentlabs.ai"  # TODO replace this with the actual API root
diff --git a/judgeval/data/api_example.py b/judgeval/data/api_example.py
index 6a03cce5..48ae1170 100644
--- a/judgeval/data/api_example.py
+++ b/judgeval/data/api_example.py
@@ -13,28 +13,24 @@ class ProcessExample(BaseModel):
     """
     name: str
     input: Optional[str] = None
-    actual_output: Optional[str] = Field(None, alias="actualOutput")
-    expected_output: Optional[str] = Field(None, alias="expectedOutput")
-    context: Optional[list] = Field(None)
-    retrieval_context: Optional[list] = Field(None, alias="retrievalContext")
-    tools_called: Optional[list] = Field(None, alias="toolsCalled")
-    expected_tools: Optional[list] = Field(None, alias="expectedTools")
+    actual_output: Optional[str] = None
+    expected_output: Optional[str] = None
+    context: Optional[list] = None
+    retrieval_context: Optional[list] = None
+    tools_called: Optional[list] = None
+    expected_tools: Optional[list] = None
 
     # make these optional, not all test cases in a conversation will be evaluated
-    success: Union[bool, None] = Field(None)
-    scorers_data: Union[List[ScorerData], None] = Field(
-        None, alias="scorersData"
-    )
-    run_duration: Union[float, None] = Field(None, alias="runDuration")
-    evaluation_cost: Union[float, None] = Field(None, alias="evaluationCost")
+    success: Optional[bool] = None
+    scorers_data: Optional[List[ScorerData]] = None
+    run_duration: Optional[float] = None 
+    evaluation_cost: Optional[float] = None
 
-    order: Union[int, None] = Field(None)
+    order: Optional[int] =  None
     # These should map 1 to 1 from golden
-    additional_metadata: Optional[Dict] = Field(
-        None, alias="additionalMetadata"
-    )
-    comments: Optional[str] = Field(None)
-    trace_id: Optional[str] = Field(None)
+    additional_metadata: Optional[Dict] = None
+    comments: Optional[str] = None
+    trace_id: Optional[str] = None
     model_config = ConfigDict(arbitrary_types_allowed=True)
 
     def update_scorer_data(self, scorer_data: ScorerData):
@@ -65,12 +61,12 @@ def update_run_duration(self, run_duration: float):
     @model_validator(mode="before")
     def check_input(cls, values: Dict[str, Any]):
         input = values.get("input")
-        actual_output = values.get("actualOutput")
+        actual_output = values.get("actual_output")
 
         if (input is None or actual_output is None):
-            error(f"Validation error: Required fields missing. input={input}, actualOutput={actual_output}")
+            error(f"Validation error: Required fields missing. input={input}, actual_output={actual_output}")
             raise ValueError(
-                "'input' and 'actualOutput' must be provided."
+                "'input' and 'actual_output' must be provided."
             )
 
         return values
@@ -97,18 +93,18 @@ def create_process_example(
     process_ex = ProcessExample(
         name=name,
         input=example.input,
-        actualOutput=example.actual_output,
-        expectedOutput=example.expected_output,
+        actual_output=example.actual_output,
+        expected_output=example.expected_output,
         context=example.context,
-        retrievalContext=example.retrieval_context,
-        toolsCalled=example.tools_called,
-        expectedTools=example.expected_tools,
+        retrieval_context=example.retrieval_context,
+        tools_called=example.tools_called,
+        expected_tools=example.expected_tools,
         success=success,
-        scorersData=scorers_data,
-        runDuration=None,
-        evaluationCost=None,
+        scorers_data=scorers_data,
+        run_duration=None,
+        evaluation_cost=None,
         order=order,
-        additionalMetadata=example.additional_metadata,
+        additional_metadata=example.additional_metadata,
         trace_id=example.trace_id
     )
     return process_ex
diff --git a/judgeval/data/datasets/utils.py b/judgeval/data/datasets/utils.py
index ca844a78..b1558f17 100644
--- a/judgeval/data/datasets/utils.py
+++ b/judgeval/data/datasets/utils.py
@@ -14,6 +14,11 @@ def examples_to_ground_truths(examples: List[Example]) -> List[GroundTruthExampl
     Returns:
         List[GroundTruthExample]: A list of `GroundTruthExample` objects.
     """
+
+    if not isinstance(examples, list):
+        raise TypeError("Input should be a list of `Example` objects")
+
+    ground_truths = []
     ground_truths = []
     for e in examples:
         g_truth = {
@@ -45,6 +50,10 @@ def ground_truths_to_examples(
     Returns:
         List[Example]: A list of `Example` objects.
     """
+
+    if not isinstance(ground_truths, list):
+        raise TypeError("Input should be a list of `GroundTruthExample` objects")
+
     examples = []
     for index, ground_truth in enumerate(ground_truths):
         e = Example(
diff --git a/judgeval/data/example.py b/judgeval/data/example.py
index 3df12064..38238f7a 100644
--- a/judgeval/data/example.py
+++ b/judgeval/data/example.py
@@ -37,41 +37,6 @@ class Example(BaseModel):
     timestamp: Optional[str] = None
     trace_id: Optional[str] = None
 
-    def __post_init__(self):
-        # Ensure `context` is None or a list of strings
-        if self.context is not None:
-            if not isinstance(self.context, list) or not all(
-                isinstance(item, str) for item in self.context
-            ):
-                raise TypeError("'context' must be None or a list of strings")
-
-        # Ensure `retrieval_context` is None or a list of strings
-        if self.retrieval_context is not None:
-            if not isinstance(self.retrieval_context, list) or not all(
-                isinstance(item, str) for item in self.retrieval_context
-            ):
-                raise TypeError(
-                    "'retrieval_context' must be None or a list of strings"
-                )
-
-        # Ensure `tools_called` is None or a list of strings
-        if self.tools_called is not None:
-            if not isinstance(self.tools_called, list) or not all(
-                isinstance(item, str) for item in self.tools_called
-            ):
-                raise TypeError(
-                    "'tools_called' must be None or a list of strings"
-                )
-
-        # Ensure `expected_tools` is None or a list of strings
-        if self.expected_tools is not None:
-            if not isinstance(self.expected_tools, list) or not all(
-                isinstance(item, str) for item in self.expected_tools
-            ):
-                raise TypeError(
-                    "'expected_tools' must be None or a list of strings"
-                )
-
     def __init__(self, **data):
         super().__init__(**data)
         # Set timestamp if not provided
diff --git a/judgeval/scorers/custom_scorer.py b/judgeval/scorers/custom_scorer.py
index 9c9a9944..75816e7d 100644
--- a/judgeval/scorers/custom_scorer.py
+++ b/judgeval/scorers/custom_scorer.py
@@ -9,7 +9,6 @@
 from abc import abstractmethod
 
 from judgeval.common.logger import debug, info, warning, error
-from judgeval.data import Example
 from judgeval.judges import judgevalJudge
 from judgeval.judges.utils import create_judge
 
@@ -84,7 +83,7 @@ def _add_model(self, model: Optional[Union[str, List[str], judgevalJudge]] = Non
         self.evaluation_model = self.model.get_model_name()
 
     @abstractmethod
-    def score_example(self, example: Example, *args, **kwargs) -> float:
+    def score_example(self, example, *args, **kwargs) -> float:
         """
         Measures the score on a single example
         """
@@ -93,7 +92,7 @@ def score_example(self, example: Example, *args, **kwargs) -> float:
         raise NotImplementedError("You must implement the `score` method in your custom scorer")
 
     @abstractmethod
-    async def a_score_example(self, example: Example, *args, **kwargs) -> float:
+    async def a_score_example(self, example, *args, **kwargs) -> float:
         """
         Asynchronously measures the score on a single example
         """
diff --git a/judgeval/scorers/prompt_scorer.py b/judgeval/scorers/prompt_scorer.py
index a8d259a7..6ba77172 100644
--- a/judgeval/scorers/prompt_scorer.py
+++ b/judgeval/scorers/prompt_scorer.py
@@ -68,7 +68,7 @@ def score_example(
         """
         Synchronous method for scoring an example using the prompt criteria.
         """
-        with scorer_progress_meter(self, _show_indicator=_show_indicator):
+        with scorer_progress_meter(self, display_meter=_show_indicator):
             if self.async_mode:
                 loop = get_or_create_event_loop()
                 loop.run_until_complete(
@@ -217,7 +217,7 @@ def enforce_prompt_format(self, judge_prompt: List[dict], schema: dict):
             # create formatting string for schema enforcement
             # schema is a map between key and type of the value 
             for key, key_type in schema.items():
-                SCHEMA_ENFORCEMENT_PROMPT += f'"{key}": <{key}> ({key_type}), '
+                SCHEMA_ENFORCEMENT_PROMPT += f'"{key}": <{key}> ({key_type.__name__}), '
             SCHEMA_ENFORCEMENT_PROMPT = SCHEMA_ENFORCEMENT_PROMPT[:-2] + "}"  # remove trailing comma and space
             judge_prompt[0]["content"] += SCHEMA_ENFORCEMENT_PROMPT
             return judge_prompt
diff --git a/judgeval/scorers/score.py b/judgeval/scorers/score.py
index 84af194f..b16352e8 100644
--- a/judgeval/scorers/score.py
+++ b/judgeval/scorers/score.py
@@ -273,8 +273,15 @@ async def a_execute_scoring(
     semaphore = asyncio.Semaphore(max_concurrent)
 
     async def execute_with_semaphore(func: Callable, *args, **kwargs):
-        async with semaphore:
-            return await func(*args, **kwargs)
+        try:
+            async with semaphore:
+                return await func(*args, **kwargs)
+        except Exception as e:
+            error(f"Error executing function: {e}")
+            if kwargs.get('ignore_errors', False):
+                # Return None when ignoring errors
+                return None
+            raise
 
     if verbose_mode is not None:
         for scorer in scorers:
@@ -406,7 +413,7 @@ async def a_eval_examples_helper(
     # the results and update the process example with the scorer data
     for scorer in scorers:
         # At this point, the scorer has been executed and already contains data.
-        if scorer.skipped:
+        if getattr(scorer, 'skipped', False):
             continue
         
         scorer_data = create_scorer_data(scorer)  # Fetch scorer data from completed scorer evaluation
diff --git a/tests/data/datasets/test_dataset.py b/tests/data/datasets/test_dataset.py
new file mode 100644
index 00000000..32a307fa
--- /dev/null
+++ b/tests/data/datasets/test_dataset.py
@@ -0,0 +1,177 @@
+import pytest
+import json
+import pandas as pd
+from unittest.mock import Mock, patch, mock_open
+from judgeval.data.datasets.dataset import EvalDataset
+from judgeval.data import Example
+from judgeval.data.datasets.ground_truth import GroundTruthExample
+
+@pytest.fixture
+def sample_example():
+    return Example(
+        input="test input",
+        actual_output="test output",
+        expected_output="expected output",
+        context=["context1", "context2"],
+        retrieval_context=["retrieval1"],
+        additional_metadata={"key": "value"},
+        tools_called=["tool1"],
+        expected_tools=["tool1", "tool2"],
+        name="test example"
+    )
+
+@pytest.fixture
+def sample_ground_truth():
+    return GroundTruthExample(
+        input="test input",
+        expected_output="expected output",
+        context=["context1"],
+        retrieval_context=["retrieval1"],
+        additional_metadata={"key": "value"},
+        tools_called=["tool1"],
+        expected_tools=["tool1"],
+        comments="test comment",
+        source_file="test.py"
+    )
+
+@pytest.fixture
+def dataset():
+    return EvalDataset(judgment_api_key="test_key")
+
+def test_init():
+    dataset = EvalDataset(judgment_api_key="test_key")
+    assert dataset.judgment_api_key == "test_key"
+    assert dataset.ground_truths == []
+    assert dataset.examples == []
+    assert dataset._alias is None
+    assert dataset._id is None
+
+def test_add_example(dataset, sample_example):
+    dataset.add_example(sample_example)
+    assert len(dataset.examples) == 1
+    assert dataset.examples[0] == sample_example
+
+def test_add_ground_truth(dataset, sample_ground_truth):
+    dataset.add_ground_truth(sample_ground_truth)
+    assert len(dataset.ground_truths) == 1
+    assert dataset.ground_truths[0] == sample_ground_truth
+
+@patch('requests.post')
+def test_push_success(mock_post, dataset, sample_example):
+    # Setup mock response
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = {"_alias": "test_alias", "_id": "test_id"}
+    mock_post.return_value = mock_response
+
+    # Add example and push
+    dataset.add_example(sample_example)
+    result = dataset.push("test_alias")
+
+    assert result is True
+    assert dataset._alias == "test_alias"
+    assert dataset._id == "test_id"
+    mock_post.assert_called_once()
+
+@patch('requests.post')
+def test_push_server_error(mock_post, dataset):
+    mock_response = Mock()
+    mock_response.status_code = 500
+    mock_post.return_value = mock_response
+
+    result = dataset.push("test_alias")
+    assert result is False
+
+    mock_post.assert_called_once()
+
+@patch('requests.post')
+def test_pull_success(mock_post, dataset):
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = {
+        "ground_truths": [{"input": "test", "expected_output": "test"}],
+        "examples": [{"input": "test", "actual_output": "test"}],
+        "_alias": "test_alias",
+        "_id": "test_id"
+    }
+    mock_post.return_value = mock_response
+
+    dataset.pull("test_alias")
+    assert len(dataset.ground_truths) == 1
+    assert len(dataset.examples) == 1
+    assert dataset._alias == "test_alias"
+    assert dataset._id == "test_id"
+
+@patch('builtins.open', new_callable=mock_open)
+def test_add_from_json(mock_file, dataset):
+    json_data = {
+        "examples": [{"input": "test", "actual_output": "test"}],
+        "ground_truths": [{"input": "test", "expected_output": "test"}]
+    }
+    mock_file.return_value.__enter__.return_value.read.return_value = json.dumps(json_data)
+
+    dataset.add_from_json("test.json")
+    assert len(dataset.examples) == 1
+    assert len(dataset.ground_truths) == 1
+
+@patch('pandas.read_csv')
+def test_add_from_csv(mock_read_csv, dataset):
+    mock_df = pd.DataFrame({
+        'input': ['test1', 'test2'],
+        'actual_output': ['output1', 'output2'],
+        'expected_output': ['expected1', 'expected2'],
+        'context': ['ctx1', 'ctx2'],
+        'retrieval_context': ['ret1', 'ret2'],
+        'additional_metadata': ['{}', '{}'],
+        'tools_called': ['tool1', 'tool2'],
+        'expected_tools': ['tool1', 'tool2'],
+        'name': ['name1', None],
+        'comments': [None, 'comment2'],
+        'source_file': [None, 'file2'],
+        'example': [True, False]
+    })
+    mock_read_csv.return_value = mock_df
+
+    dataset.add_from_csv("test.csv")
+    assert len(dataset.examples) == 1
+    assert len(dataset.ground_truths) == 1
+
+def test_save_as_json(dataset, sample_example, tmp_path):
+    dataset.add_example(sample_example)
+    save_path = tmp_path / "test_dir"
+    dataset.save_as("json", str(save_path), "test_save")
+    
+    assert (save_path / "test_save.json").exists()
+    with open(save_path / "test_save.json") as f:
+        saved_data = json.load(f)
+        assert "examples" in saved_data
+        assert "ground_truths" in saved_data
+
+def test_save_as_csv(dataset, sample_example, tmp_path):
+    dataset.add_example(sample_example)
+    save_path = tmp_path / "test_dir"
+    dataset.save_as("csv", str(save_path), "test_save")
+    
+    assert (save_path / "test_save.csv").exists()
+    df = pd.read_csv(save_path / "test_save.csv")
+    assert len(df) == 1
+    assert "input" in df.columns
+
+def test_save_as_invalid_type(dataset):
+    with pytest.raises(TypeError):
+        dataset.save_as("invalid", "test_dir")
+
+def test_iter_and_len(dataset, sample_example):
+    dataset.add_example(sample_example)
+    assert len(dataset) == 1
+    examples = list(dataset)
+    assert len(examples) == 1
+    assert examples[0] == sample_example
+
+def test_str_representation(dataset, sample_example, sample_ground_truth):
+    dataset.add_example(sample_example)
+    dataset.add_ground_truth(sample_ground_truth)
+    str_rep = str(dataset)
+    assert "EvalDataset" in str_rep
+    assert "ground_truths" in str_rep
+    assert "examples" in str_rep
diff --git a/tests/data/datasets/test_dataset_utils.py b/tests/data/datasets/test_dataset_utils.py
new file mode 100644
index 00000000..736c15a4
--- /dev/null
+++ b/tests/data/datasets/test_dataset_utils.py
@@ -0,0 +1,110 @@
+import pytest
+from typing import List
+
+from judgeval.data import Example
+from judgeval.data.datasets.ground_truth import GroundTruthExample
+from judgeval.data.datasets.utils import examples_to_ground_truths, ground_truths_to_examples
+
+
+@pytest.fixture
+def sample_example() -> Example:
+    return Example(
+        input="test input",
+        actual_output="actual result",
+        expected_output="expected result",
+        context=["some context"],
+        retrieval_context=["retrieval info"],
+        tools_called=["tool1", "tool2"],
+        expected_tools=["tool1"],
+        additional_metadata={"key": "value"},
+    )
+
+@pytest.fixture
+def sample_ground_truth() -> GroundTruthExample:
+    return GroundTruthExample(
+        input="test input",
+        actual_output="actual result",
+        expected_output="expected result",
+        context=["some context"],
+        retrieval_context=["retrieval info"],
+        tools_called=["tool1", "tool2"],
+        expected_tools=["tool1"],
+        additional_metadata={"key": "value"},
+        comments="test comment"
+    )
+
+
+class TestExamplesToGroundTruths:
+    def test_empty_list(self):
+        """Test conversion of empty list."""
+        result = examples_to_ground_truths([])
+        assert isinstance(result, list)
+        assert len(result) == 0
+
+    def test_single_example(self, sample_example):
+        """Test conversion of a single example."""
+        result = examples_to_ground_truths([sample_example])
+        assert len(result) == 1
+        assert isinstance(result[0], GroundTruthExample)
+        assert result[0].input == sample_example.input
+        assert result[0].actual_output == sample_example.actual_output
+        assert result[0].expected_output == sample_example.expected_output
+
+    def test_multiple_examples(self, sample_example):
+        """Test conversion of multiple examples."""
+        examples = [sample_example, sample_example]
+        result = examples_to_ground_truths(examples)
+        assert len(result) == 2
+        assert all(isinstance(gt, GroundTruthExample) for gt in result)
+
+    def test_none_input(self):
+        """Test handling of None input."""
+        with pytest.raises(TypeError):
+            examples_to_ground_truths(None)
+
+    def test_invalid_input_type(self):
+        """Test handling of invalid input type."""
+        with pytest.raises(TypeError):
+            examples_to_ground_truths("not a list")
+
+
+class TestGroundTruthsToExamples:
+    def test_empty_list(self):
+        """Test conversion of empty list."""
+        result = ground_truths_to_examples([])
+        assert isinstance(result, list)
+        assert len(result) == 0
+
+    def test_single_ground_truth(self, sample_ground_truth):
+        """Test conversion of a single ground truth."""
+        result = ground_truths_to_examples([sample_ground_truth])
+        assert len(result) == 1
+        assert isinstance(result[0], Example)
+        assert result[0].input == sample_ground_truth.input
+        assert result[0].actual_output == sample_ground_truth.actual_output
+        assert result[0].expected_output == sample_ground_truth.expected_output
+
+    def test_multiple_ground_truths(self, sample_ground_truth):
+        """Test conversion of multiple ground truths."""
+        ground_truths = [sample_ground_truth, sample_ground_truth]
+        result = ground_truths_to_examples(ground_truths)
+        assert len(result) == 2
+        assert all(isinstance(ex, Example) for ex in result)
+
+    def test_none_input(self):
+        """Test handling of None input."""
+        with pytest.raises(TypeError):
+            ground_truths_to_examples(None)
+
+    def test_invalid_input_type(self):
+        """Test handling of invalid input type."""
+        with pytest.raises(TypeError):
+            ground_truths_to_examples("not a list")
+
+    def test_preserves_metadata(self, sample_ground_truth):
+        """Test that all metadata is preserved during conversion."""
+        result = ground_truths_to_examples([sample_ground_truth])[0]
+        assert result.additional_metadata == sample_ground_truth.additional_metadata
+        assert result.tools_called == sample_ground_truth.tools_called
+        assert result.expected_tools == sample_ground_truth.expected_tools
+        
\ No newline at end of file
diff --git a/tests/data/datasets/test_ground_truth.py b/tests/data/datasets/test_ground_truth.py
new file mode 100644
index 00000000..58f58c52
--- /dev/null
+++ b/tests/data/datasets/test_ground_truth.py
@@ -0,0 +1,128 @@
+import pytest
+from judgeval.data.datasets.ground_truth import GroundTruthExample
+
+
+def test_ground_truth_example_minimal():
+    """Test creation with only required field (input)"""
+    example = GroundTruthExample(input="test input")
+    assert example.input == "test input"
+    assert example.actual_output is None
+    assert example.expected_output is None
+
+
+def test_ground_truth_example_full():
+    """Test creation with all fields populated"""
+    example = GroundTruthExample(
+        input="test input",
+        actual_output="actual result",
+        expected_output="expected result",
+        context=["context1", "context2"],
+        retrieval_context=["retrieved1", "retrieved2"],
+        additional_metadata={"key": "value"},
+        comments="test comment",
+        tools_called=["tool1", "tool2"],
+        expected_tools=["expected_tool1"],
+        source_file="test.txt"
+    )
+    
+    assert example.input == "test input"
+    assert example.actual_output == "actual result"
+    assert example.expected_output == "expected result"
+    assert example.context == ["context1", "context2"]
+    assert example.retrieval_context == ["retrieved1", "retrieved2"]
+    assert example.additional_metadata == {"key": "value"}
+    assert example.comments == "test comment"
+    assert example.tools_called == ["tool1", "tool2"]
+    assert example.expected_tools == ["expected_tool1"]
+    assert example.source_file == "test.txt"
+
+
+def test_ground_truth_example_to_dict():
+    """Test the to_dict method returns correct dictionary"""
+    example = GroundTruthExample(
+        input="test input",
+        actual_output="actual result",
+        comments="test comment"
+    )
+    
+    expected_dict = {
+        "input": "test input",
+        "actual_output": "actual result",
+        "expected_output": None,
+        "context": None,
+        "retrieval_context": None,
+        "additional_metadata": None,
+        "comments": "test comment",
+        "tools_called": None,
+        "expected_tools": None,
+        "source_file": None,
+    }
+    
+    assert example.to_dict() == expected_dict
+
+
+def test_ground_truth_example_str_representation():
+    """Test the string representation of the class"""
+    example = GroundTruthExample(
+        input="test input",
+        actual_output="actual result"
+    )
+    
+    expected_str = (
+        "GroundTruthExample("
+        "input=test input, "
+        "actual_output=actual result, "
+        "expected_output=None, "
+        "context=None, "
+        "retrieval_context=None, "
+        "additional_metadata=None, "
+        "comments=None, "
+        "tools_called=None, "
+        "expected_tools=None, "
+        "source_file=None)"
+    )
+    
+    assert str(example) == expected_str
+
+
+def test_ground_truth_example_missing_input():
+    """Test that creating instance without required 'input' field raises error"""
+    with pytest.raises(ValueError):
+        GroundTruthExample()
+
+
+def test_ground_truth_example_invalid_types():
+    """Test that invalid types raise validation errors"""
+    with pytest.raises(ValueError):
+        GroundTruthExample(input="test", context="not a list")
+    
+    with pytest.raises(ValueError):
+        GroundTruthExample(input="test", tools_called="not a list")
+    
+    with pytest.raises(ValueError):
+        GroundTruthExample(input="test", additional_metadata="not a dict")
+
+
+def test_ground_truth_example_empty_lists():
+    """Test that empty lists are valid for list fields"""
+    example = GroundTruthExample(
+        input="test",
+        context=[],
+        retrieval_context=[],
+        tools_called=[],
+        expected_tools=[]
+    )
+    assert example.context == []
+    assert example.retrieval_context == []
+    assert example.tools_called == []
+    assert example.expected_tools == []
+
+
+def test_ground_truth_example_empty_dict():
+    """Test that empty dict is valid for additional_metadata"""
+    example = GroundTruthExample(
+        input="test",
+        additional_metadata={}
+    )
+    assert example.additional_metadata == {}
+    
\ No newline at end of file
diff --git a/tests/data/test_api_example.py b/tests/data/test_api_example.py
new file mode 100644
index 00000000..3e992497
--- /dev/null
+++ b/tests/data/test_api_example.py
@@ -0,0 +1,153 @@
+import pytest
+from judgeval.data.api_example import ProcessExample, create_process_example
+from judgeval.data.example import Example
+from judgeval.data.scorer_data import ScorerData
+
+# Test data fixtures
+@pytest.fixture
+def basic_example():
+    return Example(
+        name="test_case",
+        input="test input",
+        actual_output="actual output",
+        expected_output="expected output"
+    )
+
+@pytest.fixture
+def basic_scorer_data():
+    return ScorerData(
+        name="test_scorer",
+        threshold=1.0,
+        success=True,
+        score=1.0,
+        metadata={"key": "value"}
+    )
+
+class TestProcessExample:
+    def test_create_basic_process_example(self):
+        """Test creating a basic ProcessExample with required fields"""
+        process_ex = ProcessExample(
+            name="test",
+            input="test input",
+            actual_output="test output"
+        )
+        assert process_ex.name == "test"
+        assert process_ex.input == "test input"
+        assert process_ex.actual_output == "test output"
+
+    def test_validation_error_missing_input(self):
+        """Test validation error when input is missing"""
+        with pytest.raises(ValueError) as exc_info:
+            ProcessExample(
+                name="test",
+                actual_output="test output"
+            )
+        assert "'input' and 'actual_output' must be provided" in str(exc_info.value)
+
+    def test_validation_error_missing_actual_output(self):
+        """Test validation error when actual_output is missing"""
+        with pytest.raises(ValueError) as exc_info:
+            ProcessExample(
+                name="test",
+                input="test input"
+            )
+        assert "'input' and 'actual_output' must be provided" in str(exc_info.value)
+
+    def test_update_scorer_data_initial(self, basic_scorer_data):
+        """Test updating scorer data for the first time"""
+        process_ex = ProcessExample(
+            name="test",
+            input="test input",
+            actual_output="test output"
+        )
+        process_ex.update_scorer_data(basic_scorer_data)
+        
+        assert process_ex.success == True
+        assert len(process_ex.scorers_data) == 1
+        assert process_ex.scorers_data[0] == basic_scorer_data
+
+    def test_update_scorer_data_multiple(self, basic_scorer_data):
+        """Test updating scorer data multiple times"""
+        process_ex = ProcessExample(
+            name="test",
+            input="test input",
+            actual_output="test output"
+        )
+        
+        # Add first scorer
+        process_ex.update_scorer_data(basic_scorer_data)
+        
+        # Add second scorer with failure
+        failed_scorer = ScorerData(
+            name="failed_scorer",
+            threshold=1.0,
+            success=False,
+            score=0.0,
+            metadata={}
+        )
+        process_ex.update_scorer_data(failed_scorer)
+        
+        assert process_ex.success == False
+        assert len(process_ex.scorers_data) == 2
+        assert process_ex.scorers_data[1] == failed_scorer
+
+    def test_update_run_duration(self):
+        """Test updating run duration"""
+        process_ex = ProcessExample(
+            name="test",
+            input="test input",
+            actual_output="test output"
+        )
+        process_ex.update_run_duration(1.5)
+        assert process_ex.run_duration == 1.5
+
+class TestCreateProcessExample:
+    def test_create_process_example_basic(self, basic_example):
+        """Test creating ProcessExample from basic Example"""
+        process_ex = create_process_example(basic_example)
+        
+        assert process_ex.name == "test_case"
+        assert process_ex.input == "test input"
+        assert process_ex.actual_output == "actual output"
+        assert process_ex.expected_output == "expected output"
+        assert process_ex.success == True
+        assert process_ex.scorers_data == []
+        assert process_ex.run_duration is None
+        assert process_ex.evaluation_cost is None
+
+    def test_create_process_example_no_name(self):
+        """Test creating ProcessExample from Example without name"""
+        example = Example(
+            input="test input",
+            actual_output="actual output"
+        )
+        process_ex = create_process_example(example)
+        
+        assert process_ex.name == "Test Case Placeholder"
+        assert process_ex.input == "test input"
+        assert process_ex.actual_output == "actual output"
+
+    def test_create_process_example_with_all_fields(self):
+        """Test creating ProcessExample with all possible fields"""
+        example = Example(
+            name="full_test",
+            input="test input",
+            actual_output="actual output",
+            expected_output="expected output",
+            context=["context1", "context2"],
+            retrieval_context=["retrieval1", "retrieval2"],
+            tools_called=["tool1", "tool2"],
+            expected_tools=["expected_tool1"],
+            additional_metadata={"key": "value"},
+            trace_id="trace123"
+        )
+        
+        process_ex = create_process_example(example)
+        
+        assert process_ex.name == "full_test"
+        assert process_ex.context == ["context1", "context2"]
+        assert process_ex.retrieval_context == ["retrieval1", "retrieval2"]
+        assert process_ex.tools_called == ["tool1", "tool2"]
+        assert process_ex.expected_tools == ["expected_tool1"]
+        assert process_ex.additional_metadata == {"key": "value"}
+        assert process_ex.trace_id == "trace123"
diff --git a/tests/data/test_example.py b/tests/data/test_example.py
new file mode 100644
index 00000000..6a31b80e
--- /dev/null
+++ b/tests/data/test_example.py
@@ -0,0 +1,133 @@
+"""
+Unit tests for the Example class
+"""
+
+import pytest
+from datetime import datetime
+from pydantic import ValidationError
+from judgeval.data.example import Example
+
+
+def test_basic_example_creation():
+    example = Example(
+        input="test input",
+        actual_output="test output"
+    )
+    assert example.input == "test input"
+    assert example.actual_output == "test output"
+    assert example.expected_output is None
+    assert example.timestamp is not None
+    # Verify timestamp format
+    datetime.strptime(example.timestamp, "%Y%m%d_%H%M%S")
+
+
+def test_full_example_creation():
+    example = Example(
+        input="test input",
+        actual_output="test output",
+        expected_output="expected output",
+        context=["context1", "context2"],
+        retrieval_context=["retrieval1", "retrieval2"],
+        additional_metadata={"key": "value"},
+        tools_called=["tool1", "tool2"],
+        expected_tools=["expected_tool1"],
+        name="test example",
+        example_id="123",
+        timestamp="20240101_120000",
+        trace_id="trace123"
+    )
+    
+    assert example.input == "test input"
+    assert example.actual_output == "test output"
+    assert example.expected_output == "expected output"
+    assert example.context == ["context1", "context2"]
+    assert example.retrieval_context == ["retrieval1", "retrieval2"]
+    assert example.additional_metadata == {"key": "value"}
+    assert example.tools_called == ["tool1", "tool2"]
+    assert example.expected_tools == ["expected_tool1"]
+    assert example.name == "test example"
+    assert example.example_id == "123"
+    assert example.timestamp == "20240101_120000"
+    assert example.trace_id == "trace123"
+
+
+def test_to_dict():
+    example = Example(
+        input="test input",
+        actual_output="test output",
+        name="test example"
+    )
+    
+    example_dict = example.to_dict()
+    assert example_dict["input"] == "test input"
+    assert example_dict["actual_output"] == "test output"
+    assert example_dict["name"] == "test example"
+    assert "timestamp" in example_dict
+
+
+def test_string_representation():
+    example = Example(
+        input="test input",
+        actual_output="test output"
+    )
+    
+    str_repr = str(example)
+    assert "input=test input" in str_repr
+    assert "actual_output=test output" in str_repr
+
+
+# Error cases
+
+def test_missing_input():
+    with pytest.raises(ValidationError):
+        Example(actual_output="test output")
+
+
+def test_missing_actual_output():
+    with pytest.raises(ValidationError):
+        Example(input="test input")
+
+
+def test_invalid_context_type():
+    with pytest.raises(ValidationError):
+        Example(
+            input="test",
+            actual_output="test",
+            context="invalid context type"  # Should be list of strings
+        )
+
+
+def test_invalid_context_content():
+    with pytest.raises(ValidationError):
+        Example(
+            input="test",
+            actual_output="test",
+            context=["valid", 123]  # Should be all strings
+        )
+
+
+def test_invalid_retrieval_context():
+    with pytest.raises(ValidationError):
+        Example(
+            input="test",
+            actual_output="test",
+            retrieval_context=[1, 2, 3]  # Should be list of strings
+        )
+
+
+def test_invalid_tools_called():
+    with pytest.raises(ValidationError):
+        Example(
+            input="test",
+            actual_output="test",
+            tools_called={"tool1": "value"}  # Should be list of strings
+        )
+
+
+def test_invalid_expected_tools():
+    with pytest.raises(ValidationError):
+        Example(
+            input="test",
+            actual_output="test",
+            expected_tools=[1, "tool2"]  # Should be list of strings
+        )
diff --git a/tests/data/test_result.py b/tests/data/test_result.py
new file mode 100644
index 00000000..afe60440
--- /dev/null
+++ b/tests/data/test_result.py
@@ -0,0 +1,121 @@
+import pytest
+from judgeval.data.result import ScoringResult, generate_scoring_result
+from judgeval.data.api_example import ProcessExample
+from judgeval.data.scorer_data import ScorerData
+
+@pytest.fixture
+def sample_scorer_data():
+    return ScorerData(
+        name="test_scorer",
+        threshold=1.0,
+        success=True,
+        score=0.8,
+        metadata={"key": "value"}
+    )
+
+@pytest.fixture
+def sample_process_example(sample_scorer_data):
+    return ProcessExample(
+        name="test_example",
+        input="test input",
+        actual_output="actual output",
+        expected_output="expected output",
+        context=["context1", "context2"],
+        retrieval_context=["retrieval1"],
+        success=True,
+        scorers_data=[sample_scorer_data]
+    )
+
+class TestScoringResult:
+    def test_basic_initialization(self):
+        """Test basic initialization with minimal required fields"""
+        result = ScoringResult(success=True, scorers_data=[])
+        assert result.success is True
+        assert result.scorers_data == []
+        assert result.input is None
+        assert result.actual_output is None
+
+    def test_full_initialization(self, sample_scorer_data):
+        """Test initialization with all fields"""
+        result = ScoringResult(
+            success=True,
+            scorers_data=[sample_scorer_data],
+            input="test input",
+            actual_output="actual output",
+            expected_output="expected output",
+            context=["context"],
+            retrieval_context=["retrieval"],
+            trace_id="trace123"
+        )
+        
+        assert result.success is True
+        assert len(result.scorers_data) == 1
+        assert result.input == "test input"
+        assert result.actual_output == "actual output"
+        assert result.expected_output == "expected output"
+        assert result.context == ["context"]
+        assert result.retrieval_context == ["retrieval"]
+        assert result.trace_id == "trace123"
+
+    def test_to_dict_conversion(self, sample_scorer_data):
+        """Test conversion to dictionary"""
+        result = ScoringResult(
+            success=True,
+            scorers_data=[sample_scorer_data],
+            input="test"
+        )
+        
+        dict_result = result.to_dict()
+        assert isinstance(dict_result, dict)
+        assert dict_result["success"] is True
+        assert len(dict_result["scorers_data"]) == 1
+        assert dict_result["input"] == "test"
+        assert dict_result["actual_output"] is None
+
+    def test_to_dict_with_none_scorers(self):
+        """Test conversion to dictionary when scorers_data is None"""
+        result = ScoringResult(success=False, scorers_data=None)
+        dict_result = result.to_dict()
+        assert dict_result["scorers_data"] is None
+
+    def test_string_representation(self, sample_scorer_data):
+        """Test string representation of ScoringResult"""
+        result = ScoringResult(success=True, scorers_data=[sample_scorer_data])
+        str_result = str(result)
+        assert "ScoringResult" in str_result
+        assert "success=True" in str_result
+
+class TestGenerateScoringResult:
+    def test_generate_from_process_example(self, sample_process_example):
+        """Test generating ScoringResult from ProcessExample"""
+        result = generate_scoring_result(sample_process_example)
+        
+        assert isinstance(result, ScoringResult)
+        assert result.success == sample_process_example.success
+        assert result.input == sample_process_example.input
+        assert result.actual_output == sample_process_example.actual_output
+        assert result.expected_output == sample_process_example.expected_output
+        assert result.context == sample_process_example.context
+        assert result.retrieval_context == sample_process_example.retrieval_context
+        assert result.trace_id == sample_process_example.trace_id
+
+    def test_generate_with_minimal_process_example(self):
+        """Test generating ScoringResult from minimal ProcessExample"""
+        minimal_example = ProcessExample(
+            name="minimal",
+            input="test",
+            actual_output="output",
+            success=True,
+            scorers_data=[]
+        )
+        
+        result = generate_scoring_result(minimal_example)
+        assert isinstance(result, ScoringResult)
+        assert result.success is True
+        assert result.scorers_data == []
+        assert result.input == "test"
+        assert result.actual_output == "output"
+        assert result.expected_output is None
+        assert result.context is None
+        assert result.retrieval_context is None
+        assert result.trace_id is None
diff --git a/tests/data/test_scorer_data.py b/tests/data/test_scorer_data.py
new file mode 100644
index 00000000..1f1e7829
--- /dev/null
+++ b/tests/data/test_scorer_data.py
@@ -0,0 +1,294 @@
+import pytest
+from typing import Dict, Optional
+
+from judgeval.data.scorer_data import ScorerData, create_scorer_data
+from judgeval.scorers.custom_scorer import CustomScorer
+
+
+class MockCustomScorer(CustomScorer):
+    """Mock implementation of CustomScorer for testing"""
+    def __init__(
+        self,
+        score_type: str = "mock_scorer",
+        threshold: float = 0.7,
+        score: Optional[float] = None,
+        score_breakdown: Optional[Dict] = None,
+        reason: Optional[str] = None,
+        success: Optional[bool] = None,
+        evaluation_model: Optional[str] = "gpt-4",
+        strict_mode: bool = False,
+        error: Optional[str] = None,
+        evaluation_cost: Optional[float] = None,
+        verbose_logs: Optional[str] = None,
+        additional_metadata: Optional[Dict] = None
+    ):
+        super().__init__(
+            score_type=score_type,
+            threshold=threshold,
+            score=score,
+            score_breakdown=score_breakdown,
+            reason=reason,
+            success=success,
+            evaluation_model=evaluation_model,
+            strict_mode=strict_mode,
+            error=error,
+            evaluation_cost=evaluation_cost,
+            verbose_logs=verbose_logs,
+            additional_metadata=additional_metadata
+        )
+        self.__name__ = score_type
+
+    def score_example(self, example, *args, **kwargs):
+        pass
+
+    async def a_score_example(self, example, *args, **kwargs):
+        pass
+
+    def success_check(self) -> bool:
+        return self.score >= self.threshold if self.score is not None else False
+
+
+@pytest.fixture
+def successful_scorer():
+    """
+    Fixture for a scorer that executes successfully and stores the results of the evaluation
+    """
+    return MockCustomScorer(
+        score_type="test_scorer",
+        threshold=0.7,
+        score=0.8,
+        reason="Test passed successfully",
+        evaluation_model="gpt-4",
+        strict_mode=True,
+        evaluation_cost=0.1,
+        verbose_logs="Detailed test logs",
+        additional_metadata={"key": "value"}
+    )
+
+
+@pytest.fixture
+def failed_scorer():
+    """
+    Fixture for a scorer that does not pass its threshold expectation
+    """
+    return MockCustomScorer(
+        score_type="test_scorer",
+        threshold=0.7,
+        score=0.6,
+        reason="Test failed",
+        evaluation_model="gpt-4",
+        strict_mode=True,
+        evaluation_cost=0.1,
+        verbose_logs="Detailed test logs"
+    )
+
+
+@pytest.fixture
+def error_scorer():
+    """
+    Fixture for a scorer that encounters an error during execution
+    """
+    return MockCustomScorer(
+        score_type="test_scorer",
+        threshold=0.7,
+        error="Test execution failed",
+        evaluation_model="gpt-4",
+        evaluation_cost=0.1,
+        verbose_logs="Error logs"
+    )
+
+
+def test_scorer_data_successful_case(successful_scorer):
+    """Test ScorerData creation for a successful evaluation"""
+    scorer_data = create_scorer_data(successful_scorer)
+    
+    assert scorer_data.name == "test_scorer"
+    assert scorer_data.threshold == 0.7
+    assert scorer_data.score == 0.8
+    assert scorer_data.success is True
+    assert scorer_data.reason == "Test passed successfully"
+    assert scorer_data.strict_mode is True
+    assert scorer_data.evaluation_model == "gpt-4"
+    assert scorer_data.error is None
+    assert scorer_data.evaluation_cost == 0.1
+    assert scorer_data.verbose_logs == "Detailed test logs"
+    assert scorer_data.additional_metadata == {"key": "value"}
+
+
+def test_scorer_data_failed_case(failed_scorer):
+    """Test ScorerData creation for a failed evaluation"""
+    scorer_data = create_scorer_data(failed_scorer)
+    
+    assert scorer_data.name == "test_scorer"
+    assert scorer_data.threshold == 0.7
+    assert scorer_data.score == 0.6
+    assert scorer_data.success is False
+    assert scorer_data.reason == "Test failed"
+    assert scorer_data.error is None
+
+
+def test_scorer_data_error_case(error_scorer):
+    """Test ScorerData creation when an error occurs"""
+    scorer_data = create_scorer_data(error_scorer)
+    
+    assert scorer_data.name == "test_scorer"
+    assert scorer_data.threshold == 0.7
+    assert scorer_data.score is None
+    assert scorer_data.success is False
+    assert scorer_data.reason is None
+    assert scorer_data.error == "Test execution failed"
+
+
+def test_scorer_data_to_dict(successful_scorer):
+    """Test the to_dict method of ScorerData"""
+    scorer_data = create_scorer_data(successful_scorer)
+    data_dict = scorer_data.to_dict()
+    
+    assert isinstance(data_dict, dict)
+    assert data_dict["name"] == "test_scorer"
+    assert data_dict["threshold"] == 0.7
+    assert data_dict["score"] == 0.8
+    assert data_dict["success"] is True
+    assert data_dict["reason"] == "Test passed successfully"
+    assert data_dict["strict_mode"] is True
+    assert data_dict["evaluation_model"] == "gpt-4"
+    assert data_dict["error"] is None
+    assert data_dict["evaluation_cost"] == 0.1
+    assert data_dict["verbose_logs"] == "Detailed test logs"
+    assert data_dict["additional_metadata"] == {"key": "value"}
+
+
+def test_scorer_data_direct_creation():
+    """Test direct creation of ScorerData object"""
+    scorer_data = ScorerData(
+        name="direct_test",
+        threshold=0.5,
+        success=True,
+        score=0.75,
+        reason="Direct creation test",
+        strict_mode=True,
+        evaluation_model="gpt-4",
+        error=None,
+        evaluation_cost=0.2,
+        verbose_logs="Test logs",
+        additional_metadata={"test": "data"}
+    )
+    
+    assert scorer_data.name == "direct_test"
+    assert scorer_data.threshold == 0.5
+    assert scorer_data.success is True
+    assert scorer_data.score == 0.75
+
+
+def test_scorer_data_minimal_creation():
+    """Test creation of ScorerData with minimal required fields"""
+    scorer_data = ScorerData(
+        name="minimal_test",
+        threshold=0.5,
+        success=True
+    )
+    
+    assert scorer_data.name == "minimal_test"
+    assert scorer_data.threshold == 0.5
+    assert scorer_data.success is True
+    assert scorer_data.score is None
+    assert scorer_data.reason is None
+    assert scorer_data.strict_mode is None
+    assert scorer_data.evaluation_model is None
+    assert scorer_data.error is None
+    assert scorer_data.evaluation_cost is None
+    assert scorer_data.verbose_logs is None
+    assert scorer_data.additional_metadata is None
+
+
+def test_scorer_data_to_dict_minimal():
+    """Test to_dict method with minimal required fields"""
+    scorer_data = ScorerData(
+        name="minimal_test",
+        threshold=0.5,
+        success=True
+    )
+    data_dict = scorer_data.to_dict()
+    
+    assert isinstance(data_dict, dict)
+    assert data_dict["name"] == "minimal_test"
+    assert data_dict["threshold"] == 0.5
+    assert data_dict["success"] is True
+    assert data_dict["score"] is None
+    assert data_dict["reason"] is None
+    assert data_dict["strict_mode"] is None
+    assert data_dict["evaluation_model"] is None
+    assert data_dict["error"] is None
+    assert data_dict["evaluation_cost"] is None
+    assert data_dict["verbose_logs"] is None
+    assert data_dict["additional_metadata"] is None
+
+def test_scorer_data_to_dict_with_list_model():
+    """Test to_dict method when evaluation_model is a list"""
+    scorer_data = ScorerData(
+        name="list_model_test",
+        threshold=0.5,
+        success=True,
+        evaluation_model=["gpt-4", "gpt-3.5-turbo"]
+    )
+    data_dict = scorer_data.to_dict()
+    
+    assert isinstance(data_dict["evaluation_model"], list)
+    assert data_dict["evaluation_model"] == ["gpt-4", "gpt-3.5-turbo"]
+
+def test_scorer_data_to_dict_with_error():
+    """Test to_dict method with error information"""
+    scorer_data = ScorerData(
+        name="error_test",
+        threshold=0.5,
+        success=False,
+        error="Test error message"
+    )
+    data_dict = scorer_data.to_dict()
+    
+    assert data_dict["error"] == "Test error message"
+    assert data_dict["success"] is False
+    assert data_dict["score"] is None
+
+
+def test_scorer_data_to_dict_all_parameters():
+    """Test to_dict method with all possible parameters set"""
+    test_metadata = {
+        "model_tokens": 150,
+        "completion_tokens": 50,
+        "custom_field": "custom_value"
+    }
+    
+    scorer_data = ScorerData(
+        name="full_test",
+        threshold=0.75,
+        success=True,
+        score=0.85,
+        reason="Comprehensive test case",
+        strict_mode=True,
+        evaluation_model=["gpt-4", "gpt-3.5-turbo"],
+        error=None,
+        evaluation_cost=0.123,
+        verbose_logs="Detailed execution logs\nwith multiple lines",
+        additional_metadata=test_metadata
+    )
+    data_dict = scorer_data.to_dict()
+    
+    # Verify all fields are present and have correct values
+    assert isinstance(data_dict, dict)
+    assert data_dict["name"] == "full_test"
+    assert data_dict["threshold"] == 0.75
+    assert data_dict["success"] is True
+    assert data_dict["score"] == 0.85
+    assert data_dict["reason"] == "Comprehensive test case"
+    assert data_dict["strict_mode"] is True
+    assert data_dict["evaluation_model"] == ["gpt-4", "gpt-3.5-turbo"]
+    assert data_dict["error"] is None
+    assert data_dict["evaluation_cost"] == 0.123
+    assert data_dict["verbose_logs"] == "Detailed execution logs\nwith multiple lines"
+    assert data_dict["additional_metadata"] == test_metadata
+    
+    # Verify the metadata dictionary contains all expected fields
+    assert data_dict["additional_metadata"]["model_tokens"] == 150
+    assert data_dict["additional_metadata"]["completion_tokens"] == 50
+    assert data_dict["additional_metadata"]["custom_field"] == "custom_value"
diff --git a/tests/scorers/test_base_scorer.py b/tests/scorers/test_base_scorer.py
new file mode 100644
index 00000000..d369997d
--- /dev/null
+++ b/tests/scorers/test_base_scorer.py
@@ -0,0 +1,65 @@
+import pytest
+from pydantic import ValidationError
+
+from judgeval.scorers.base_scorer import JudgmentScorer
+from judgeval.constants import APIScorer
+
+@pytest.fixture
+def valid_scorer_params():
+    return {
+        "threshold": 0.8,
+        "score_type": APIScorer.FAITHFULNESS
+    }
+
+def test_judgment_scorer_creation_with_enum():
+    """Test creating JudgmentScorer with APIScorer enum value"""
+    scorer = JudgmentScorer(threshold=0.8, score_type=APIScorer.FAITHFULNESS)
+    assert scorer.threshold == 0.8
+    assert scorer.score_type == "faithfulness"
+
+def test_judgment_scorer_creation_with_string():
+    """Test creating JudgmentScorer with string value"""
+    scorer = JudgmentScorer(threshold=0.8, score_type="faithfulness")
+    assert scorer.threshold == 0.8
+    assert scorer.score_type == "faithfulness"
+
+def test_judgment_scorer_creation_with_uppercase_string():
+    """Test creating JudgmentScorer with uppercase string value"""
+    scorer = JudgmentScorer(threshold=0.8, score_type="FAITHFULNESS")
+    assert scorer.threshold == 0.8
+    assert scorer.score_type == "faithfulness"
+
+def test_judgment_scorer_str_representation():
+    """Test the string representation of JudgmentScorer"""
+    scorer = JudgmentScorer(threshold=0.8, score_type=APIScorer.FAITHFULNESS)
+    expected_str = "JudgmentScorer(score_type=faithfulness, threshold=0.8)"
+    assert str(scorer) == expected_str
+
+@pytest.mark.parametrize("invalid_score_type", [
+    123,  # integer
+    None,  # None
+    True,  # boolean
+    ["faithfulness"],  # list
+    {"type": "faithfulness"},  # dict
+])
+def test_judgment_scorer_invalid_score_type(invalid_score_type):
+    """Test creating JudgmentScorer with invalid score_type values"""
+    with pytest.raises(ValidationError) as exc_info:
+        JudgmentScorer(threshold=0.8, score_type=invalid_score_type)
+    
+    assert "Input should be" in str(exc_info.value)
+
+def test_judgment_scorer_invalid_string_value():
+    """Test creating JudgmentScorer with invalid string value"""
+    with pytest.raises(ValidationError):
+        JudgmentScorer(threshold=0.8, score_type="INVALID_METRIC")
+
+def test_judgment_scorer_threshold_validation():
+    """Test threshold validation"""
+    # Test float values
+    scorer = JudgmentScorer(threshold=0.5, score_type=APIScorer.FAITHFULNESS)
+    assert scorer.threshold == 0.5
+
+    # Test integer values (should be converted to float)
+    scorer = JudgmentScorer(threshold=1, score_type=APIScorer.FAITHFULNESS)
+    assert scorer.threshold == 1.0 
diff --git a/tests/scorers/test_custom_scorer.py b/tests/scorers/test_custom_scorer.py
new file mode 100644
index 00000000..c01b12a9
--- /dev/null
+++ b/tests/scorers/test_custom_scorer.py
@@ -0,0 +1,152 @@
+import asyncio
+import pytest
+from unittest.mock import Mock, patch
+from typing import Dict, Optional
+
+from judgeval.scorers.custom_scorer import CustomScorer
+from judgeval.judges import judgevalJudge
+from judgeval.common.exceptions import InvalidJudgeModelError
+
+class MockJudge(judgevalJudge):
+    """Mock implementation of judgevalJudge for testing"""
+    def load_model(self, *args, **kwargs):
+        return Mock()
+    
+    def generate(self, *args, **kwargs) -> str:
+        return "mock response"
+    
+    async def a_generate(self, *args, **kwargs) -> str:
+        return "mock async response"
+    
+    def get_model_name(self, *args, **kwargs) -> str:
+        return "mock-model"
+
+class SampleScorer(CustomScorer):
+    """Concrete implementation of CustomScorer for testing"""
+    def score_example(self, example, *args, **kwargs) -> float:
+        return 0.8
+    
+    async def a_score_example(self, example, *args, **kwargs) -> float:
+        return 0.9
+    
+    def success_check(self) -> bool:
+        return self.score >= self.threshold if self.score is not None else False
+
+@pytest.fixture
+def basic_scorer():
+    return SampleScorer(
+        score_type="test_scorer",
+        threshold=0.7
+    )
+
+@pytest.fixture
+def mock_judge():
+    return MockJudge(model_name="mock-model")
+
+class TestCustomScorer:
+    def test_initialization(self):
+        """Test basic initialization with minimal parameters"""
+        scorer = SampleScorer(score_type="test", threshold=0.5)
+        assert scorer.score_type == "test"
+        assert scorer.threshold == 0.5
+        assert scorer.score is None
+        assert scorer.async_mode is True
+        assert scorer.verbose_mode is True
+
+    def test_initialization_with_all_params(self):
+        """Test initialization with all optional parameters"""
+        additional_metadata = {"key": "value"}
+        scorer = SampleScorer(
+            score_type="test",
+            threshold=0.5,
+            score=0.8,
+            score_breakdown={"detail": 0.8},
+            reason="test reason",
+            success=True,
+            evaluation_model="gpt-4",
+            strict_mode=True,
+            async_mode=False,
+            verbose_mode=False,
+            include_reason=True,
+            error=None,
+            evaluation_cost=0.01,
+            verbose_logs="test logs",
+            additional_metadata=additional_metadata
+        )
+        
+        assert scorer.score == 0.8
+        assert scorer.score_breakdown == {"detail": 0.8}
+        assert scorer.reason == "test reason"
+        assert scorer.success is True
+        assert scorer.strict_mode is True
+        assert scorer.async_mode is False
+        assert scorer.additional_metadata == additional_metadata
+
+    @patch('judgeval.scorers.custom_scorer.create_judge')
+    def test_add_model_success(self, mock_create_judge, mock_judge, basic_scorer):
+        """Test successful model addition"""
+        mock_create_judge.return_value = (mock_judge, True)
+        
+        scorer = basic_scorer
+        scorer._add_model("mock-model")
+        
+        assert scorer.evaluation_model == "mock-model"
+        assert scorer.using_native_model is True
+        mock_create_judge.assert_called_once_with("mock-model")
+
+    @patch('judgeval.scorers.custom_scorer.create_judge')
+    def test_add_model_error(self, mock_create_judge, basic_scorer):
+        """Test model addition with invalid model"""
+        mock_create_judge.side_effect = InvalidJudgeModelError("Invalid model")
+        
+        scorer = basic_scorer
+        with pytest.raises(InvalidJudgeModelError):
+            scorer._add_model("invalid-model")
+
+    def test_score_example_implementation(self, basic_scorer):
+        """Test score_example returns expected value"""
+        score = basic_scorer.score_example({"test": "example"})
+        assert score == 0.8
+
+    @pytest.mark.asyncio
+    async def test_a_score_example_implementation(self, basic_scorer):
+        """Test async score_example returns expected value"""
+        score = await basic_scorer.a_score_example({"test": "example"})
+        assert score == 0.9
+
+    def test_success_check_implementation(self, basic_scorer):
+        """Test success_check with various scores"""
+        # Test with score above threshold
+        basic_scorer.score = 0.8
+        assert basic_scorer.success_check() is True
+
+        # Test with score below threshold
+        basic_scorer.score = 0.6
+        assert basic_scorer.success_check() is False
+
+        # Test with no score
+        basic_scorer.score = None
+        assert basic_scorer.success_check() is False
+
+    def test_str_representation(self, basic_scorer):
+        """Test string representation of scorer"""
+        str_rep = str(basic_scorer)
+        assert "CustomScorer" in str_rep
+        assert "test_scorer" in str_rep
+        assert "0.7" in str_rep  # threshold value
+
+    def test_abstract_methods_base_class(self):
+        """Test that abstract methods raise NotImplementedError when not implemented"""
+        class IncompleteScorer(CustomScorer):
+            pass
+
+        scorer = IncompleteScorer(score_type="test", threshold=0.5)
+        
+        with pytest.raises(NotImplementedError):
+            scorer.score_example({})
+            
+        with pytest.raises(NotImplementedError):
+            asyncio.run(scorer.a_score_example({}))
+            
+        with pytest.raises(NotImplementedError):
+            scorer.success_check()
diff --git a/tests/scorers/test_prompt_scorer.py b/tests/scorers/test_prompt_scorer.py
new file mode 100644
index 00000000..4bdf3e89
--- /dev/null
+++ b/tests/scorers/test_prompt_scorer.py
@@ -0,0 +1,156 @@
+import pytest
+from unittest.mock import MagicMock, AsyncMock
+from typing import List, Dict
+
+from judgeval.data import Example
+from judgeval.scorers.prompt_scorer import PromptScorer, ClassifierScorer
+
+# Test fixtures
+@pytest.fixture
+def example():
+    return Example(
+        input="This is a test input",
+        actual_output="This is a test response",
+        expected_output="Expected response",
+        context=["Some context"],
+        retrieval_context=["Retrieved context"],
+        tools_called=["tool1", "tool2"],
+        expected_tools=["tool1"]
+    )
+
+@pytest.fixture
+def mock_model():
+    model = MagicMock()
+    model.generate = MagicMock(return_value='{"score": 0.8, "reason": "Test reason"}')
+    model.a_generate = AsyncMock(return_value='{"score": 0.8, "reason": "Test reason"}')
+    return model
+
+# Simple implementation of PromptScorer for testing
+class SampleScorer(PromptScorer):
+    def build_measure_prompt(self, example: Example) -> List[dict]:
+        return [
+            {"role": "system", "content": "Test system prompt"},
+            {"role": "user", "content": f"Response: {example.actual_output}"}
+        ]
+    
+    def build_schema(self) -> dict:
+        return {"score": float, "reason": str}
+    
+    def process_response(self, response: dict):
+        return response["score"], response["reason"]
+    
+    def success_check(self, **kwargs) -> bool:
+        return self.result >= self.threshold
+
+# Tests for PromptScorer
+class TestPromptScorer:
+    def test_init(self):
+        scorer = SampleScorer("test_scorer")
+        assert scorer.name == "test_scorer"
+        assert scorer.threshold == 0.5
+        assert scorer.include_reason is True
+        assert scorer.async_mode is True
+        
+    def test_init_strict_mode(self):
+        scorer = SampleScorer("test_scorer", strict_mode=True)
+        assert scorer.threshold == 1
+        
+    def test_enforce_prompt_format(self):
+        scorer = SampleScorer("test_scorer")
+        prompt = [{"role": "system", "content": "Base prompt"}]
+        schema = {"score": float, "reason": str}
+        
+        formatted = scorer.enforce_prompt_format(prompt, schema)
+        assert "JSON format" in formatted[0]["content"]
+        assert '"score": <score> (float)' in formatted[0]["content"]
+        assert '"reason": <reason> (str)' in formatted[0]["content"]
+        
+    def test_enforce_prompt_format_invalid_input(self):
+        scorer = SampleScorer("test_scorer")
+        with pytest.raises(TypeError):
+            scorer.enforce_prompt_format("invalid", {})
+            
+    @pytest.mark.asyncio
+    async def test_a_score_example(self, example, mock_model):
+        scorer = SampleScorer("test_scorer")
+        scorer.model = mock_model
+        
+        result = await scorer.a_score_example(example, _show_indicator=False)
+        assert result == 0.8
+        assert scorer.reason == "Test reason"
+        
+    def test_score_example_sync(self, example, mock_model):
+        scorer = SampleScorer("test_scorer", async_mode=False)
+        scorer.model = mock_model
+        
+        result = scorer.score_example(example, _show_indicator=False)
+        assert result == 0.8
+        assert scorer.reason == "Test reason"
+
+# Tests for ClassifierScorer
+class TestClassifierScorer:
+    @pytest.fixture
+    def classifier_conversation(self):
+        return [
+            {"role": "system", "content": "Evaluate if {{actual_output}} is positive"},
+            {"role": "user", "content": "Please analyze."}
+        ]
+    
+    @pytest.fixture
+    def classifier_options(self):
+        return {"positive": 1.0, "negative": 0.0}
+    
+    def test_classifier_init(self, classifier_conversation, classifier_options):
+        scorer = ClassifierScorer(
+            "test_classifier",
+            classifier_conversation,
+            classifier_options
+        )
+        assert scorer.conversation == classifier_conversation
+        assert scorer.options == classifier_options
+        
+    def test_build_measure_prompt(self, example, classifier_conversation, classifier_options):
+        scorer = ClassifierScorer(
+            "test_classifier",
+            classifier_conversation,
+            classifier_options
+        )
+        
+        prompt = scorer.build_measure_prompt(example)
+        assert "This is a test response" in prompt[0]["content"]
+        
+    def test_process_response(self, classifier_conversation, classifier_options):
+        scorer = ClassifierScorer(
+            "test_classifier",
+            classifier_conversation,
+            classifier_options
+        )
+        
+        response = {"choice": "positive", "reason": "Test reason"}
+        score, reason = scorer.process_response(response)
+        assert score == 1.0
+        assert reason == "Test reason"
+        
+    def test_process_response_invalid_choice(self, classifier_conversation, classifier_options):
+        scorer = ClassifierScorer(
+            "test_classifier",
+            classifier_conversation,
+            classifier_options
+        )
+        
+        response = {"choice": "invalid", "reason": "Test reason"}
+        with pytest.raises(ValueError):
+            scorer.process_response(response)
+            
+    def test_success_check(self, classifier_conversation, classifier_options):
+        scorer = ClassifierScorer(
+            "test_classifier",
+            classifier_conversation,
+            classifier_options
+        )
+        
+        scorer.score = 1.0
+        assert scorer.success_check() is True
+        
+        scorer.score = 0.0
+        assert scorer.success_check() is False
diff --git a/tests/scorers/test_score.py b/tests/scorers/test_score.py
new file mode 100644
index 00000000..08354fd9
--- /dev/null
+++ b/tests/scorers/test_score.py
@@ -0,0 +1,974 @@
+import pytest
+from unittest.mock import AsyncMock, Mock, patch
+from rich.progress import Progress, SpinnerColumn, TextColumn
+import asyncio
+
+from judgeval.scorers.score import (safe_a_score_example, 
+                                    score_task, 
+                                    score_with_indicator,
+                                    a_execute_scoring,
+                                    a_eval_examples_helper)
+from judgeval.scorers import CustomScorer
+from judgeval.data import Example, ScoringResult, ProcessExample, ScorerData
+from judgeval.common.exceptions import MissingTestCaseParamsError
+
+
+class MockCustomScorer(CustomScorer):
+    def score_example(self, example, *args, **kwargs):
+        pass
+
+    async def a_score_example(self, example, *args, **kwargs):
+        pass
+
+    def success_check(self):
+        return True
+
+
+@pytest.fixture
+def example():
+    return Example(
+        input="test input",
+        actual_output="test output",
+        example_id="test_id"
+    )
+
+
+@pytest.fixture
+def basic_scorer():
+    return MockCustomScorer(
+        score_type="test_scorer",
+        threshold=0.5
+    )
+
+
+@pytest.fixture
+def scorers(basic_scorer):
+    """Fixture providing a list of test scorers"""
+    return [
+        MockCustomScorer(score_type="test_scorer", threshold=0.5),
+        MockCustomScorer(score_type="test_scorer", threshold=0.5)
+    ]
+
+
+@pytest.fixture
+def progress():
+    return Progress(
+        SpinnerColumn(style="rgb(106,0,255)"),
+        TextColumn("[progress.description]{task.description}"),
+        transient=True
+    )
+
+
+@pytest.mark.asyncio
+async def test_successful_scoring(example, basic_scorer):
+    """Test basic successful scoring case"""
+    basic_scorer.a_score_example = AsyncMock()
+    
+    await safe_a_score_example(
+        scorer=basic_scorer,
+        example=example,
+        ignore_errors=True,
+        skip_on_missing_params=True
+    )
+    
+    basic_scorer.a_score_example.assert_called_once_with(example, _show_indicator=False)
+    assert basic_scorer.error is None
+    assert not hasattr(basic_scorer, 'skipped') or not basic_scorer.skipped
+
+
+@pytest.mark.asyncio
+async def test_missing_params_with_skip(example, basic_scorer):
+    """Test handling of MissingTestCaseParamsError when skip_on_missing_params is True"""
+    async def mock_score(*args, **kwargs):
+        raise MissingTestCaseParamsError("Missing required params")
+    
+    basic_scorer.a_score_example = AsyncMock(side_effect=mock_score)
+    
+    await safe_a_score_example(
+        scorer=basic_scorer,
+        example=example,
+        ignore_errors=True,
+        skip_on_missing_params=True
+    )
+    
+    assert basic_scorer.skipped is True
+    assert basic_scorer.error is None
+
+
+@pytest.mark.asyncio
+async def test_missing_params_with_ignore_errors(example, basic_scorer):
+    """Test handling of MissingTestCaseParamsError when ignore_errors is True but not skipping"""
+    async def mock_score(*args, **kwargs):
+        raise MissingTestCaseParamsError("Missing required params")
+    
+    basic_scorer.a_score_example = AsyncMock(side_effect=mock_score)
+    
+    await safe_a_score_example(
+        scorer=basic_scorer,
+        example=example,
+        ignore_errors=True,
+        skip_on_missing_params=False
+    )
+    
+    assert basic_scorer.error == "Missing required params"
+    assert basic_scorer.success is False
+
+
+@pytest.mark.asyncio
+async def test_missing_params_raises_error(example, basic_scorer):
+    """Test that MissingTestCaseParamsError is raised when appropriate"""
+    async def mock_score(*args, **kwargs):
+        raise MissingTestCaseParamsError("Missing required params")
+    
+    basic_scorer.a_score_example = AsyncMock(side_effect=mock_score)
+    
+    with pytest.raises(MissingTestCaseParamsError):
+        await safe_a_score_example(
+            scorer=basic_scorer,
+            example=example,
+            ignore_errors=False,
+            skip_on_missing_params=False
+        )
+
+
+@pytest.mark.asyncio
+async def test_type_error_handling(example, basic_scorer):
+    """Test handling of TypeError when _show_indicator is not accepted"""
+    calls = []
+    
+    async def mock_score(*args, **kwargs):
+        calls.append(kwargs)
+        if '_show_indicator' in kwargs:
+            raise TypeError("_show_indicator not accepted")
+        return True
+    
+    basic_scorer.a_score_example = AsyncMock(side_effect=mock_score)
+    
+    await safe_a_score_example(
+        scorer=basic_scorer,
+        example=example,
+        ignore_errors=True,
+        skip_on_missing_params=True
+    )
+    
+    assert len(calls) == 2  # Should try twice - once with _show_indicator, once without
+    assert '_show_indicator' in calls[0]  # First attempt includes _show_indicator
+    assert '_show_indicator' not in calls[1]  # Second attempt doesn't include _show_indicator
+
+
+@pytest.mark.asyncio
+async def test_general_exception_with_ignore(example, basic_scorer):
+    """Test handling of general exceptions when ignore_errors is True"""
+    async def mock_score(*args, **kwargs):
+        raise ValueError("Test error")
+    
+    basic_scorer.a_score_example = AsyncMock(side_effect=mock_score)
+    
+    await safe_a_score_example(
+        scorer=basic_scorer,
+        example=example,
+        ignore_errors=True,
+        skip_on_missing_params=True
+    )
+    
+    assert basic_scorer.error == "Test error"
+    assert basic_scorer.success is False
+
+
+@pytest.mark.asyncio
+async def test_general_exception_raises(example, basic_scorer):
+    """Test that general exceptions are raised when ignore_errors is False"""
+    async def mock_score(*args, **kwargs):
+        raise ValueError("Test error")
+    
+    basic_scorer.a_score_example = AsyncMock(side_effect=mock_score)
+    
+    with pytest.raises(ValueError):
+        await safe_a_score_example(
+            scorer=basic_scorer,
+            example=example,
+            ignore_errors=False,
+            skip_on_missing_params=True
+        )
+
+
+@pytest.mark.asyncio
+async def test_error_with_missing_params(example, basic_scorer):
+    """Test handling of TypeError followed by MissingTestCaseParamsError"""
+    calls = []
+    
+    async def mock_score(*args, **kwargs):
+        calls.append(kwargs)
+        if '_show_indicator' in kwargs:
+            raise TypeError("_show_indicator not accepted")
+        raise MissingTestCaseParamsError("Missing params")
+    
+    basic_scorer.a_score_example = AsyncMock(side_effect=mock_score)
+    
+    await safe_a_score_example(
+        scorer=basic_scorer,
+        example=example,
+        ignore_errors=True,
+        skip_on_missing_params=True
+    )
+    
+    assert basic_scorer.skipped is True
+    assert len(calls) == 2
+
+
+@pytest.mark.asyncio
+async def test_task_successful_scoring(example, basic_scorer, progress):
+    """Test basic successful scoring case with progress tracking"""
+    task_id = progress.add_task(description="Test Task", total=100)
+    basic_scorer.a_score_example = AsyncMock()
+    
+    with progress:
+        await score_task(
+            task_id=task_id,
+            progress=progress,
+            scorer=basic_scorer,
+            example=example
+        )
+    
+    basic_scorer.a_score_example.assert_called_once_with(example, _show_indicator=False)
+    assert progress.tasks[task_id].completed == 100
+    assert "Completed" in progress.tasks[task_id].description
+
+
+@pytest.mark.asyncio
+async def test_task_missing_params_with_skip(example, basic_scorer, progress):
+    """Test handling of MissingTestCaseParamsError when skip_on_missing_params is True"""
+    task_id = progress.add_task(description="Test Task", total=100)
+    
+    async def mock_score(*args, **kwargs):
+        raise MissingTestCaseParamsError("Missing required params")
+    
+    basic_scorer.a_score_example = AsyncMock(side_effect=mock_score)
+    
+    with progress:
+        await score_task(
+            task_id=task_id,
+            progress=progress,
+            scorer=basic_scorer,
+            example=example,
+            skip_on_missing_params=True
+        )
+    
+    assert basic_scorer.skipped is True
+    assert not progress.tasks[task_id].completed  # Task should not be marked as complete
+
+
+@pytest.mark.asyncio
+async def test_task_missing_params_with_ignore_errors(example, basic_scorer, progress):
+    """Test handling of MissingTestCaseParamsError when ignore_errors is True"""
+    task_id = progress.add_task(description="Test Task", total=100)
+    
+    async def mock_score(*args, **kwargs):
+        raise MissingTestCaseParamsError("Missing required params")
+    
+    basic_scorer.a_score_example = AsyncMock(side_effect=mock_score)
+    
+    with progress:
+        await score_task(
+            task_id=task_id,
+            progress=progress,
+            scorer=basic_scorer,
+            example=example,
+            skip_on_missing_params=False,
+            ignore_errors=True
+        )
+    
+    assert basic_scorer.error == "Missing required params"
+    assert basic_scorer.success is False
+    assert progress.tasks[task_id].completed == 100
+    assert "Failed" in progress.tasks[task_id].description
+
+
+@pytest.mark.asyncio
+async def test_task_missing_params_raises_error(example, basic_scorer, progress):
+    """Test that MissingTestCaseParamsError is raised when appropriate"""
+    task_id = progress.add_task(description="Test Task", total=100)
+    
+    async def mock_score(*args, **kwargs):
+        raise MissingTestCaseParamsError("Missing required params")
+    
+    basic_scorer.a_score_example = AsyncMock(side_effect=mock_score)
+    
+    with pytest.raises(MissingTestCaseParamsError):
+        with progress:
+            await score_task(
+                task_id=task_id,
+                progress=progress,
+                scorer=basic_scorer,
+                example=example,
+                skip_on_missing_params=False,
+                ignore_errors=False
+            )
+
+
+@pytest.mark.asyncio
+async def test_task_type_error_handling(example, basic_scorer, progress):
+    """Test handling of TypeError when _show_indicator is not accepted"""
+    task_id = progress.add_task(description="Test Task", total=100)
+    calls = []
+    
+    async def mock_score(*args, **kwargs):
+        calls.append(kwargs)
+        if '_show_indicator' in kwargs:
+            raise TypeError("_show_indicator not accepted")
+        return True
+    
+    basic_scorer.a_score_example = AsyncMock(side_effect=mock_score)
+    
+    with progress:
+        await score_task(
+            task_id=task_id,
+            progress=progress,
+            scorer=basic_scorer,
+            example=example
+        )
+    
+    assert len(calls) == 2  # Should try twice - once with _show_indicator, once without
+    assert progress.tasks[task_id].completed == 100
+    assert "Completed" in progress.tasks[task_id].description
+
+
+@pytest.mark.asyncio
+async def test_task_general_exception_with_ignore(example, basic_scorer, progress):
+    """Test handling of general exceptions when ignore_errors is True"""
+    task_id = progress.add_task(description="Test Task", total=100)
+    
+    async def mock_score(*args, **kwargs):
+        raise ValueError("Test error")
+    
+    basic_scorer.a_score_example = AsyncMock(side_effect=mock_score)
+    
+    with progress:
+        await score_task(
+            task_id=task_id,
+            progress=progress,
+            scorer=basic_scorer,
+            example=example,
+            ignore_errors=True
+        )
+    
+    assert basic_scorer.error == "Test error"
+    assert basic_scorer.success is False
+    assert progress.tasks[task_id].completed == 100
+    assert "Failed" in progress.tasks[task_id].description
+
+
+@pytest.mark.asyncio
+async def test_task_general_exception_raises(example, basic_scorer, progress):
+    """Test that general exceptions are raised when ignore_errors is False"""
+    task_id = progress.add_task(description="Test Task", total=100)
+    
+    async def mock_score(*args, **kwargs):
+        raise ValueError("Test error")
+    
+    basic_scorer.a_score_example = AsyncMock(side_effect=mock_score)
+    
+    with pytest.raises(ValueError):
+        with progress:
+            await score_task(
+                task_id=task_id,
+                progress=progress,
+                scorer=basic_scorer,
+                example=example,
+                ignore_errors=False
+            )
+
+
+@pytest.mark.asyncio
+async def test_task_progress_timing(example, basic_scorer, progress):
+    """Test that timing information is correctly added to progress description"""
+    task_id = progress.add_task(description="Test Task", total=100)
+    
+    async def mock_score(*args, **kwargs):
+        await asyncio.sleep(0.1)  # Simulate some work
+        return True
+    
+    basic_scorer.a_score_example = AsyncMock(side_effect=mock_score)
+    
+    with progress:
+        await score_task(
+            task_id=task_id,
+            progress=progress,
+            scorer=basic_scorer,
+            example=example
+        )
+    
+    assert "(" in progress.tasks[task_id].description
+    assert "s)" in progress.tasks[task_id].description  # Should show timing
+
+
+@pytest.mark.asyncio
+@patch('judgeval.scorers.score.safe_a_score_example')
+@patch('judgeval.scorers.score.score_task')
+async def test_score_with_indicator_no_show(mock_score_task, mock_safe_score, example, scorers):
+    """Test scoring without showing the indicator"""
+    mock_safe_score.return_value = AsyncMock()()
+
+    await score_with_indicator(
+        scorers=scorers,
+        example=example,
+        ignore_errors=True,
+        skip_on_missing_params=True,
+        show_indicator=False
+    )
+
+    assert mock_safe_score.call_count == 2  # Called once for each scorer
+    assert mock_score_task.call_count == 0  # Should not be called when show_indicator is False
+
+@pytest.mark.asyncio
+@patch('judgeval.scorers.score.Progress')
+@patch('judgeval.scorers.score.score_task')
+@patch('judgeval.scorers.score.scorer_console_msg')
+async def test_score_with_indicator_show(mock_console_msg, mock_score_task, mock_progress, example, scorers):
+    """Test scoring with progress indicator"""
+    mock_progress_instance = Mock()
+    mock_progress.return_value.__enter__.return_value = mock_progress_instance
+    mock_progress_instance.add_task.return_value = 1
+    mock_score_task.return_value = AsyncMock()()
+    mock_console_msg.return_value = "Test Progress Message"
+
+    await score_with_indicator(
+        scorers=scorers,
+        example=example,
+        ignore_errors=True,
+        skip_on_missing_params=True,
+        show_indicator=True
+    )
+
+    assert mock_progress_instance.add_task.call_count == 2  # Called once for each scorer
+    assert mock_score_task.call_count == 2  # Called once for each scorer
+
+@pytest.mark.asyncio
+async def test_score_with_indicator_error_handling(example, scorers):
+    """Test error handling during scoring"""
+    # Make first scorer raise an error
+    async def mock_error(*args, **kwargs):
+        raise ValueError("Test error")
+    
+    async def mock_success(*args, **kwargs):
+        # Simulate successful scoring
+        scorers[1].success = True
+        return True
+    
+    scorers[0].a_score_example = AsyncMock(side_effect=mock_error)
+    scorers[1].a_score_example = AsyncMock(side_effect=mock_success)
+
+    await score_with_indicator(
+        scorers=scorers,
+        example=example,
+        ignore_errors=True,
+        skip_on_missing_params=True,
+        show_indicator=False
+    )
+    
+    assert scorers[0].error == "Test error"
+    assert scorers[0].success is False
+    assert scorers[1].error is None
+    assert scorers[1].success is True
+
+@pytest.mark.asyncio
+async def test_score_with_indicator_missing_params(example, scorers):
+    """Test handling of missing parameters"""
+    async def mock_missing_params(*args, **kwargs):
+        raise MissingTestCaseParamsError("Missing params")
+    
+    # Set up mock for first scorer to raise error
+    scorers[0].a_score_example = AsyncMock(side_effect=mock_missing_params)
+    # Set up mock for second scorer to succeed
+    scorers[1].a_score_example = AsyncMock(return_value=True)
+
+    await score_with_indicator(
+        scorers=scorers,
+        example=example,
+        ignore_errors=True,
+        skip_on_missing_params=True,
+        show_indicator=False
+    )
+    
+    assert scorers[0].skipped is True
+    assert not hasattr(scorers[1], 'skipped')  # Second scorer should not be skipped, so attribute shouldn't exist
+
+@pytest.mark.asyncio
+async def test_score_with_indicator_raises_error(example, scorers):
+    """Test that errors are raised when ignore_errors is False"""
+    async def mock_error(*args, **kwargs):
+        raise ValueError("Test error")
+
+    scorers[0].a_score_example = AsyncMock(side_effect=mock_error)
+
+    with pytest.raises(ValueError):
+        await score_with_indicator(
+            scorers=scorers,
+            example=example,
+            ignore_errors=False,  # Errors should be raised
+            skip_on_missing_params=True,
+            show_indicator=False
+        )
+
+@pytest.mark.asyncio
+@patch('judgeval.scorers.score.Progress')
+async def test_score_with_indicator_empty_scorers(mock_progress, example):
+    """Test handling of empty scorers list"""
+    await score_with_indicator(
+        scorers=[],
+        example=example,
+        ignore_errors=True,
+        skip_on_missing_params=True,
+        show_indicator=False
+    )
+
+    mock_progress.assert_not_called()
+
+@pytest.mark.asyncio
+@patch('judgeval.scorers.score.Progress')
+async def test_score_with_indicator_concurrent_execution(mock_progress, example, scorers):
+    """Test that scorers are executed concurrently"""
+    completed_order = []
+    
+    async def mock_delayed_score(*args, **kwargs):
+        await asyncio.sleep(0.1)  # First scorer
+        completed_order.append(1)
+
+    async def mock_quick_score(*args, **kwargs):
+        completed_order.append(2)  # Second scorer
+
+    # Create two separate scorer instances instead of using the same one twice
+    scorer1 = MockCustomScorer(score_type="test_scorer", threshold=0.5)
+    scorer2 = MockCustomScorer(score_type="test_scorer", threshold=0.5)
+    
+    scorer1.a_score_example = AsyncMock(side_effect=mock_delayed_score)
+    scorer2.a_score_example = AsyncMock(side_effect=mock_quick_score)
+
+    await score_with_indicator(
+        scorers=[scorer1, scorer2],  # Use the new separate instances
+        example=example,
+        ignore_errors=True,
+        skip_on_missing_params=True,
+        show_indicator=False
+    )
+
+    # Second scorer should complete before first scorer due to delay
+    assert completed_order == [2, 1]
+
+
+@pytest.fixture
+def mock_example():
+    return Example(
+        input="test input",
+        actual_output="test output",
+        example_id="test_id",
+        timestamp="20241225_000004"
+    )
+
+@pytest.fixture
+def mock_examples():
+    return [
+        Example(input=f"test input {i}", 
+               actual_output=f"test output {i}", 
+               example_id=f"test_id_{i}",
+               timestamp="20241225_000004")
+        for i in range(3)
+    ]
+
+@pytest.fixture
+def mock_scorer():
+    class MockScorer(CustomScorer):
+        def __init__(self):
+            self.success = None
+            self.error = None
+            self.skipped = False
+            self.verbose_mode = False
+            self._add_model = Mock()
+        
+    return MockScorer()
+
+@pytest.fixture
+def mock_scoring_result():
+    return Mock(spec=ScoringResult)
+
+# Tests
+@pytest.mark.asyncio
+@patch('judgeval.scorers.score.clone_scorers')
+@patch('judgeval.scorers.score.a_eval_examples_helper')
+async def test_basic_execution(mock_helper, mock_clone_scorers, mock_examples, mock_scorer, mock_scoring_result):
+    """Test basic execution with single scorer and multiple examples"""
+    # Setup mocks
+    mock_clone_scorers.return_value = [mock_scorer]
+    mock_helper.return_value = None
+    
+    results = await a_execute_scoring(
+        examples=mock_examples,
+        scorers=[mock_scorer],
+        show_indicator=False
+    )
+    
+    assert len(results) == len(mock_examples)
+    assert mock_helper.call_count == len(mock_examples)
+    assert mock_clone_scorers.call_count == len(mock_examples)
+
+@pytest.mark.asyncio
+@patch('judgeval.scorers.score.clone_scorers')
+@patch('judgeval.scorers.score.a_eval_examples_helper')
+async def test_empty_scorers(mock_helper, mock_clone_scorers, mock_examples):
+    """Test execution with no scorers"""
+    results = await a_execute_scoring(
+        examples=mock_examples,
+        scorers=[],
+        show_indicator=False
+    )
+    
+    assert len(results) == len(mock_examples)
+    mock_helper.assert_not_called()
+    mock_clone_scorers.assert_not_called()
+
+@pytest.mark.asyncio
+@patch('judgeval.scorers.score.clone_scorers')
+@patch('judgeval.scorers.score.a_eval_examples_helper')
+async def test_empty_examples(mock_helper, mock_clone_scorers, mock_scorer):
+    """Test execution with no examples"""
+    results = await a_execute_scoring(
+        examples=[],
+        scorers=[mock_scorer],
+        show_indicator=False
+    )
+    
+    assert len(results) == 0
+    mock_helper.assert_not_called()
+    mock_clone_scorers.assert_not_called()
+
+@pytest.mark.asyncio
+@patch('judgeval.scorers.score.clone_scorers')
+@patch('judgeval.scorers.score.a_eval_examples_helper')
+async def test_error_handling(mock_helper, mock_clone_scorers, mock_examples, mock_scorer):
+    """Test error handling when helper raises exception"""
+    mock_clone_scorers.return_value = [mock_scorer]
+    mock_helper.side_effect = ValueError("Test error")
+    
+    # Test with ignore_errors=True
+    results = await a_execute_scoring(
+        examples=mock_examples,
+        scorers=[mock_scorer],
+        ignore_errors=True,
+        skip_on_missing_params=True,
+        show_indicator=False,
+        _use_bar_indicator=False
+    )
+    
+    # Add assertions to verify error was handled
+    assert len(results) == len(mock_examples)
+    assert all(result is None for result in results)  # Results should be None when errors are ignored
+    
+    # Test with ignore_errors=False
+    with pytest.raises(ValueError):
+        await a_execute_scoring(
+            examples=mock_examples,
+            scorers=[mock_scorer],
+            ignore_errors=False,
+            skip_on_missing_params=True,
+            show_indicator=False,
+            _use_bar_indicator=False
+        )
+
+@pytest.mark.asyncio
+@patch('judgeval.scorers.score.clone_scorers')
+@patch('judgeval.scorers.score.a_eval_examples_helper')
+async def test_max_concurrent_limit(mock_helper, mock_clone_scorers, mock_examples, mock_scorer):
+    """Test concurrent execution limit"""
+    mock_clone_scorers.return_value = [mock_scorer]
+    
+    async def delayed_execution(*args, **kwargs):
+        await asyncio.sleep(0.1)
+        return None
+    
+    mock_helper.side_effect = delayed_execution
+    
+    start_time = asyncio.get_event_loop().time()
+    
+    await a_execute_scoring(
+        examples=mock_examples,
+        scorers=[mock_scorer],
+        max_concurrent=1,  # Force sequential execution
+        show_indicator=False
+    )
+    
+    end_time = asyncio.get_event_loop().time()
+    duration = end_time - start_time
+    
+    # Duration should be at least (num_examples * 0.1) seconds due to sequential execution
+    assert duration >= len(mock_examples) * 0.1
+
+@pytest.mark.asyncio
+@patch('judgeval.scorers.score.clone_scorers')
+@patch('judgeval.scorers.score.a_eval_examples_helper')
+async def test_throttle_value(mock_helper, mock_clone_scorers, mock_examples, mock_scorer):
+    """Test throttling between tasks"""
+    mock_clone_scorers.return_value = [mock_scorer]
+    start_time = asyncio.get_event_loop().time()
+    
+    await a_execute_scoring(
+        examples=mock_examples,
+        scorers=[mock_scorer],
+        throttle_value=0.1,
+        show_indicator=False
+    )
+    
+    end_time = asyncio.get_event_loop().time()
+    duration = end_time - start_time
+    
+    # Duration should be at least (num_examples - 1) * throttle_value
+    assert duration >= (len(mock_examples) - 1) * 0.1
+
+@pytest.mark.asyncio
+@patch('judgeval.scorers.score.clone_scorers')
+@patch('judgeval.scorers.score.a_eval_examples_helper')
+@patch('judgeval.scorers.score.tqdm_asyncio')
+async def test_progress_indicator(mock_tqdm, mock_helper, mock_clone_scorers, mock_examples, mock_scorer):
+    """Test progress indicator functionality"""
+    mock_clone_scorers.return_value = [mock_scorer]
+    
+    await a_execute_scoring(
+        examples=mock_examples,
+        scorers=[mock_scorer],
+        show_indicator=True,
+        _use_bar_indicator=True
+    )
+    
+    assert mock_tqdm.called
+    mock_helper.assert_called()
+
+@pytest.mark.asyncio
+@patch('judgeval.scorers.score.clone_scorers')
+@patch('judgeval.scorers.score.a_eval_examples_helper')
+async def test_model_assignment(mock_helper, mock_clone_scorers, mock_examples, mock_scorer):
+    """Test model assignment to scorers"""
+    mock_clone_scorers.return_value = [mock_scorer]
+    test_model = "test_model"
+    
+    await a_execute_scoring(
+        examples=mock_examples,
+        scorers=[mock_scorer],
+        model=test_model,
+        show_indicator=False
+    )
+    
+    mock_scorer._add_model.assert_called_once_with(test_model)
+
+@pytest.mark.asyncio
+@patch('judgeval.scorers.score.clone_scorers')
+@patch('judgeval.scorers.score.a_eval_examples_helper')
+async def test_verbose_mode_setting(mock_helper, mock_clone_scorers, mock_examples, mock_scorer):
+    """Test verbose mode is properly set on scorers"""
+    mock_clone_scorers.return_value = [mock_scorer]
+    
+    await a_execute_scoring(
+        examples=mock_examples,
+        scorers=[mock_scorer],
+        verbose_mode=True,
+        show_indicator=False
+    )
+    
+    assert mock_scorer.verbose_mode is True
+
+
+@pytest.fixture
+def mock_example():
+    """Create a mock Example object"""
+    return Example(
+        name="test_example",
+        input="test input",
+        actual_output="test output",
+        expected_output="expected output",
+        context=["context1", "context2"],
+        retrieval_context=["retrieval1"],
+        trace_id="test_trace_123"
+    )
+
+@pytest.fixture
+def mock_scorer():
+    """Create a mock CustomScorer"""
+    scorer = Mock(spec=CustomScorer)
+    scorer.__name__ = "MockScorer"
+    scorer.threshold = 0.8
+    scorer.strict_mode = True
+    scorer.evaluation_model = "test-model"
+    scorer.score = 0.9
+    scorer.reason = "Test reason"
+    scorer.success_check.return_value = True
+    scorer.evaluation_cost = 0.1
+    scorer.verbose_logs = "Test logs"
+    scorer.additional_metadata = {"key": "value"}
+    scorer.skipped = False
+    scorer.error = None
+    return scorer
+
+@pytest.fixture
+def mock_scoring_results():
+    """Create a mock list to store ScoringResults"""
+    return [None] * 3  # List with 3 None elements
+
+@pytest.fixture
+def mock_process_example(mock_example):
+    """Create a mock ProcessExample"""
+    return ProcessExample(
+        name=mock_example.name,
+        input=mock_example.input,
+        actual_output=mock_example.actual_output,
+        expected_output=mock_example.expected_output,
+        context=mock_example.context,
+        retrieval_context=mock_example.retrieval_context,
+        trace_id=mock_example.trace_id
+    )
+
+@pytest.mark.asyncio
+async def test_a_eval_examples_helper_success(
+    mock_example,
+    mock_scorer,
+    mock_scoring_results,
+    mock_process_example
+):
+    """Test successful execution of a_eval_examples_helper"""
+    
+    # Create list of scorers
+    scorers = [mock_scorer]
+    
+    # Mock the external functions
+    with patch('judgeval.scorers.score.create_process_example', return_value=mock_process_example) as mock_create_process, \
+         patch('judgeval.scorers.score.score_with_indicator', new_callable=AsyncMock) as mock_score_with_indicator, \
+         patch('judgeval.scorers.score.create_scorer_data') as mock_create_scorer_data, \
+         patch('judgeval.scorers.score.generate_scoring_result') as mock_generate_result:
+        
+        # Setup mock returns
+        mock_scorer_data = ScorerData(
+            name=mock_scorer.__name__,
+            threshold=mock_scorer.threshold,
+            success=True,
+            score=mock_scorer.score,
+            reason=mock_scorer.reason,
+            strict_mode=mock_scorer.strict_mode,
+            evaluation_model=mock_scorer.evaluation_model,
+            error=None,
+            evaluation_cost=mock_scorer.evaluation_cost,
+            verbose_logs=mock_scorer.verbose_logs,
+            additional_metadata=mock_scorer.additional_metadata
+        )
+        mock_create_scorer_data.return_value = mock_scorer_data
+        
+        mock_scoring_result = ScoringResult(
+            success=True,
+            scorers_data=[mock_scorer_data],
+            input=mock_example.input,
+            actual_output=mock_example.actual_output,
+            expected_output=mock_example.expected_output,
+            context=mock_example.context,
+            retrieval_context=mock_example.retrieval_context,
+            trace_id=mock_example.trace_id
+        )
+        mock_generate_result.return_value = mock_scoring_result
+
+        # Execute the function
+        await a_eval_examples_helper(
+            scorers=scorers,
+            example=mock_example,
+            scoring_results=mock_scoring_results,
+            score_index=0,
+            ignore_errors=True,
+            skip_on_missing_params=True,
+            show_indicator=True,
+            _use_bar_indicator=False,
+            pbar=None
+        )
+
+        # Verify the calls
+        mock_create_process.assert_called_once_with(mock_example)
+        mock_score_with_indicator.assert_called_once_with(
+            scorers=scorers,
+            example=mock_example,
+            skip_on_missing_params=True,
+            ignore_errors=True,
+            show_indicator=True
+        )
+        mock_create_scorer_data.assert_called_once_with(mock_scorer)
+        mock_generate_result.assert_called_once_with(mock_process_example)
+        
+        # Verify the result was stored correctly
+        assert mock_scoring_results[0] == mock_scoring_result
+
+@pytest.mark.asyncio
+async def test_a_eval_examples_helper_with_skipped_scorer(
+    mock_example,
+    mock_scorer,
+    mock_scoring_results,
+    mock_process_example
+):
+    """Test execution when scorer is skipped"""
+    
+    scorers = [mock_scorer]
+    
+    with patch('judgeval.scorers.score.create_process_example', return_value=mock_process_example) as mock_create_process, \
+         patch('judgeval.scorers.score.score_with_indicator', new_callable=AsyncMock) as mock_score_with_indicator, \
+         patch('judgeval.scorers.score.create_scorer_data') as mock_create_scorer_data, \
+         patch('judgeval.scorers.score.generate_scoring_result') as mock_generate_result:
+        
+        # Mock score_with_indicator to simulate skipped scorer behavior
+        async def mock_score(*args, **kwargs):
+            # Set scorer as skipped after score_with_indicator is called
+            mock_scorer.skipped = True
+            return None
+            
+        mock_score_with_indicator.side_effect = mock_score
+        
+        await a_eval_examples_helper(
+            scorers=scorers,
+            example=mock_example,
+            scoring_results=mock_scoring_results,
+            score_index=1,
+            ignore_errors=True,
+            skip_on_missing_params=True,
+            show_indicator=True,
+            _use_bar_indicator=False,
+            pbar=None
+        )
+
+        # Verify that create_scorer_data was not called since scorer was skipped
+        mock_create_scorer_data.assert_not_called()
+        
+        # Verify that generate_scoring_result was still called (but with no scorer data)
+        mock_generate_result.assert_called_once_with(mock_process_example)
+
+@pytest.mark.asyncio
+async def test_a_eval_examples_helper_with_progress_bar(
+    mock_example,
+    mock_scorer,
+    mock_scoring_results,
+    mock_process_example
+):
+    """Test execution with progress bar"""
+    
+    scorers = [mock_scorer]
+    mock_pbar = Mock()
+    
+    with patch('judgeval.scorers.score.create_process_example', return_value=mock_process_example), \
+         patch('judgeval.scorers.score.score_with_indicator', new_callable=AsyncMock), \
+         patch('judgeval.scorers.score.create_scorer_data'), \
+         patch('judgeval.scorers.score.generate_scoring_result'):
+        
+        await a_eval_examples_helper(
+            scorers=scorers,
+            example=mock_example,
+            scoring_results=mock_scoring_results,
+            score_index=2,
+            ignore_errors=True,
+            skip_on_missing_params=True,
+            show_indicator=True,
+            _use_bar_indicator=True,
+            pbar=mock_pbar
+        )
+
+        # Verify progress bar was updated
+        mock_pbar.update.assert_called_once_with(1)
+
diff --git a/tests/scorers/test_scorer_utils.py b/tests/scorers/test_scorer_utils.py
new file mode 100644
index 00000000..c10ac0a6
--- /dev/null
+++ b/tests/scorers/test_scorer_utils.py
@@ -0,0 +1,175 @@
+import pytest
+import asyncio
+import json
+from unittest.mock import MagicMock, patch
+from rich.console import Console
+
+from judgeval.scorers.utils import (
+    clone_scorers,
+    scorer_console_msg,
+    scorer_progress_meter,
+    parse_response_json,
+    print_verbose_logs,
+    create_verbose_logs,
+    get_or_create_event_loop,
+)
+from judgeval.scorers import CustomScorer
+from judgeval.data import Example
+
+
+class MockCustomScorer(CustomScorer):
+    """Mock implementation of CustomScorer for testing"""
+    def __init__(self, **kwargs):
+        super().__init__(
+            score_type="mock_scorer",
+            threshold=0.7,
+            **kwargs
+        )
+        self.__name__ = "MockScorer"
+
+    def score_example(self, example: Example, *args, **kwargs) -> float:
+        return 1.0
+
+    async def a_score_example(self, example: Example, *args, **kwargs) -> float:
+        return 1.0
+
+    def success_check(self) -> bool:
+        return True
+
+
+@pytest.fixture
+def mock_scorer():
+    return MockCustomScorer(
+        evaluation_model="gpt-4",
+        strict_mode=True,
+        async_mode=True,
+        verbose_mode=True
+    )
+
+
+@pytest.fixture
+def mock_scorers():
+    return [
+        MockCustomScorer(evaluation_model="gpt-4o"),
+        MockCustomScorer(evaluation_model="gpt-4o")
+    ]
+
+
+def test_clone_scorers(mock_scorers):
+    """Test that scorers are properly cloned with all attributes"""
+    cloned = clone_scorers(mock_scorers)
+    
+    assert len(cloned) == len(mock_scorers)
+    for original, clone in zip(mock_scorers, cloned):
+        assert type(original) == type(clone)
+        assert original.score_type == clone.score_type
+        assert original.threshold == clone.threshold
+        assert original.evaluation_model == clone.evaluation_model
+
+
+def test_scorer_console_msg(mock_scorer):
+    """Test console message formatting"""
+    # Test with default async_mode
+    msg = scorer_console_msg(mock_scorer)
+    assert "MockScorer" in msg
+    assert "gpt-4" in msg
+    assert "async_mode=True" in msg
+
+    # Test with explicit async_mode
+    msg = scorer_console_msg(mock_scorer, async_mode=False)
+    assert "async_mode=False" in msg
+
+
+@pytest.mark.asyncio
+async def test_scorer_progress_meter(mock_scorer, capsys):
+    """Test progress meter display"""
+    # Test with display_meter=True
+    with scorer_progress_meter(mock_scorer, display_meter=True):
+        pass
+    
+    # Test with display_meter=False
+    with scorer_progress_meter(mock_scorer, display_meter=False):
+        pass
+
+
+def test_parse_response_json_valid():
+    """Test parsing valid JSON responses"""
+    valid_json = '{"score": 0.8, "reason": "test"}'
+    result = parse_response_json(valid_json)
+    assert result == {"score": 0.8, "reason": "test"}
+
+    # Test JSON with surrounding text
+    text_with_json = 'Some text {"score": 0.9} more text'
+    result = parse_response_json(text_with_json)
+    assert result == {"score": 0.9}
+
+
+def test_parse_response_json_invalid(mock_scorer):
+    """
+    Test parsing invalid JSON responses, but still completes the JSON parsing without error.
+    """
+    invalid_json = '{"score": 0.8, "reason": "test"'  # Missing closing brace
+
+    # the parse_response_json function should add the missing brace and parse the JSON
+    assert parse_response_json(invalid_json, scorer=mock_scorer) == {"score": 0.8, "reason": "test"}
+    assert mock_scorer.error is None
+
+def test_parse_response_json_missing_beginning_brace(mock_scorer):
+    """
+    Test that parse_response_json raises an error when JSON is missing opening brace.
+    """
+    invalid_json = 'score": 0.8, "reason": "test}'  # Missing opening brace
+
+    with pytest.raises(ValueError) as exc_info:
+        parse_response_json(invalid_json, scorer=mock_scorer)
+    
+    assert "Evaluation LLM outputted an invalid JSON" in str(exc_info.value)
+    assert mock_scorer.error is not None
+
+
+def test_create_verbose_logs(mock_scorer, capsys):
+    """Test verbose logs creation"""
+    steps = ["Step 1", "Step 2", "Final step"]
+    logs = create_verbose_logs(mock_scorer, steps)
+
+    assert "Step 1" in logs
+    assert "Step 2" in logs
+
+    # Check printed output when verbose_mode is True
+    captured = capsys.readouterr()
+    assert "MockScorer Verbose Logs" in captured.out
+
+    # Test with verbose_mode=False
+    mock_scorer.verbose_mode = False
+    create_verbose_logs(mock_scorer, steps)
+    captured = capsys.readouterr()
+    assert captured.out == ""
+
+
+@pytest.mark.asyncio
+async def test_get_or_create_event_loop():
+    """Test event loop creation and retrieval"""
+    # Remove the is_running check since the loop will be running under pytest-asyncio
+    loop = get_or_create_event_loop()
+    assert isinstance(loop, asyncio.AbstractEventLoop)
+
+    # Test with running loop
+    async def dummy_task():
+        pass
+    
+    loop.create_task(dummy_task())
+    loop2 = get_or_create_event_loop()
+    assert loop2 is not None
+
+    assert loop.is_running()
+
+
+def test_print_verbose_logs(capsys):
+    """Test verbose logs printing"""
+    metric = "TestMetric"
+    logs = "Test logs content"
+    print_verbose_logs(metric, logs)
+    
+    captured = capsys.readouterr()
+    assert "TestMetric Verbose Logs" in captured.out
+    assert "Test logs content" in captured.out