Skip to content

Commit f9d6189

Browse files
authored
Merge pull request #73 from JudgmentLabs/trace-token-aggregations
add: count total tokens for trace runs
2 parents 5ae3f77 + dd4bbb4 commit f9d6189

File tree

2 files changed

+47
-11
lines changed

2 files changed

+47
-11
lines changed

src/e2etests/test_tracer.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -122,20 +122,38 @@ async def make_poem(input: str) -> str:
122122
print(f"Error generating poem: {e}")
123123
return ""
124124

125+
async def test_token_counting(trace_data: dict):
126+
"""Test that token counts are properly aggregated from different LLM API calls."""
127+
# Verify token counts exist and are properly aggregated
128+
token_counts = trace_data["token_counts"]
129+
assert token_counts["prompt_tokens"] > 0, "Prompt tokens should be counted"
130+
assert token_counts["completion_tokens"] > 0, "Completion tokens should be counted"
131+
assert token_counts["total_tokens"] > 0, "Total tokens should be counted"
132+
assert token_counts["total_tokens"] == (
133+
token_counts["prompt_tokens"] + token_counts["completion_tokens"]
134+
), "Total tokens should be equal to the sum of prompt and completion tokens"
135+
136+
# Print token counts for verification
137+
print("\nToken Count Results:")
138+
print(f"Prompt Tokens: {token_counts['prompt_tokens']}")
139+
print(f"Completion Tokens: {token_counts['completion_tokens']}")
140+
print(f"Total Tokens: {token_counts['total_tokens']}")
141+
125142
async def test_evaluation_mixed(input):
126143
PROJECT_NAME = "TestingPoemBot"
127144
with judgment.trace("Use-claude-hehexd123", project_name=PROJECT_NAME, overwrite=True) as trace:
128145
upper = await make_upper(input)
129146
result = await make_poem(upper)
130147
await answer_user_question("What if these shoes don't fit?")
131-
132-
trace.save()
133148

134-
trace.print()
135-
136-
return result
149+
# Save trace data and test token counting
150+
trace_id, trace_data = trace.save()
151+
await test_token_counting(trace_data)
152+
153+
trace.print()
154+
return result
137155

138156
if __name__ == "__main__":
139-
# Use a more meaningful test input
157+
# Run both tests
140158
test_input = "Write a poem about Nissan R32 GTR"
141-
asyncio.run(test_evaluation_mixed(test_input))
159+
asyncio.run(test_evaluation_mixed(test_input))

src/judgeval/common/tracer.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,24 @@ def save(self, empty_save: bool = False, overwrite: bool = False) -> Tuple[str,
383383
raw_entries = [entry.to_dict() for entry in self.entries]
384384
condensed_entries = self.condense_trace(raw_entries)
385385

386+
# Calculate total token counts from LLM API calls
387+
total_prompt_tokens = 0
388+
total_completion_tokens = 0
389+
total_tokens = 0
390+
391+
for entry in condensed_entries:
392+
if entry.get("span_type") == "llm" and isinstance(entry.get("output"), dict):
393+
usage = entry["output"].get("usage", {})
394+
# Handle OpenAI/Together format
395+
if "prompt_tokens" in usage:
396+
total_prompt_tokens += usage.get("prompt_tokens", 0)
397+
total_completion_tokens += usage.get("completion_tokens", 0)
398+
# Handle Anthropic format
399+
elif "input_tokens" in usage:
400+
total_prompt_tokens += usage.get("input_tokens", 0)
401+
total_completion_tokens += usage.get("output_tokens", 0)
402+
total_tokens += usage.get("total_tokens", 0)
403+
386404
# Create trace document
387405
trace_data = {
388406
"trace_id": self.trace_id,
@@ -392,10 +410,10 @@ def save(self, empty_save: bool = False, overwrite: bool = False) -> Tuple[str,
392410
"created_at": datetime.fromtimestamp(self.start_time).isoformat(),
393411
"duration": total_duration,
394412
"token_counts": {
395-
"prompt_tokens": 0, # Dummy value
396-
"completion_tokens": 0, # Dummy value
397-
"total_tokens": 0, # Dummy value
398-
}, # TODO: Add token counts
413+
"prompt_tokens": total_prompt_tokens,
414+
"completion_tokens": total_completion_tokens,
415+
"total_tokens": total_tokens,
416+
},
399417
"entries": condensed_entries,
400418
"empty_save": empty_save,
401419
"overwrite": overwrite

0 commit comments

Comments
 (0)