@@ -815,6 +815,8 @@ def __init__(
815
815
== "true" ,
816
816
enable_evaluations : bool = os .getenv ("JUDGMENT_EVALUATIONS" , "true" ).lower ()
817
817
== "true" ,
818
+ show_trace_urls : bool = os .getenv ("JUDGMENT_SHOW_TRACE_URLS" , "true" ).lower ()
819
+ == "true" ,
818
820
# S3 configuration
819
821
use_s3 : bool = False ,
820
822
s3_bucket_name : Optional [str ] = None ,
@@ -859,6 +861,7 @@ def __init__(
859
861
self .traces : List [Trace ] = []
860
862
self .enable_monitoring : bool = enable_monitoring
861
863
self .enable_evaluations : bool = enable_evaluations
864
+ self .show_trace_urls : bool = show_trace_urls
862
865
self .class_identifiers : Dict [
863
866
str , str
864
867
] = {} # Dictionary to store class identifiers
@@ -1731,6 +1734,93 @@ def _cleanup_on_exit(self):
1731
1734
f"Error during background service shutdown: { e } "
1732
1735
)
1733
1736
1737
+ def trace_to_message_history (
1738
+ self , trace : Union [Trace , TraceClient ]
1739
+ ) -> List [Dict [str , str ]]:
1740
+ """
1741
+ Extract message history from a trace for training purposes.
1742
+
1743
+ This method processes trace spans to reconstruct the conversation flow,
1744
+ extracting messages in chronological order from LLM, user, and tool spans.
1745
+
1746
+ Args:
1747
+ trace: Trace or TraceClient instance to extract messages from
1748
+
1749
+ Returns:
1750
+ List of message dictionaries with 'role' and 'content' keys
1751
+
1752
+ Raises:
1753
+ ValueError: If no trace is provided
1754
+ """
1755
+ if not trace :
1756
+ raise ValueError ("No trace provided" )
1757
+
1758
+ # Handle both Trace and TraceClient objects
1759
+ if isinstance (trace , TraceClient ):
1760
+ spans = trace .trace_spans
1761
+ else :
1762
+ spans = trace .trace_spans if hasattr (trace , "trace_spans" ) else []
1763
+
1764
+ messages = []
1765
+ first_found = False
1766
+
1767
+ # Process spans in chronological order
1768
+ for span in sorted (
1769
+ spans , key = lambda s : s .created_at if hasattr (s , "created_at" ) else 0
1770
+ ):
1771
+ # Skip spans without output (except for first LLM span which may have input messages)
1772
+ if span .output is None and span .span_type != "llm" :
1773
+ continue
1774
+
1775
+ if span .span_type == "llm" :
1776
+ # For the first LLM span, extract input messages (system + user prompts)
1777
+ if not first_found and hasattr (span , "inputs" ) and span .inputs :
1778
+ input_messages = span .inputs .get ("messages" , [])
1779
+ if input_messages :
1780
+ first_found = True
1781
+ # Add input messages (typically system and user messages)
1782
+ for msg in input_messages :
1783
+ if (
1784
+ isinstance (msg , dict )
1785
+ and "role" in msg
1786
+ and "content" in msg
1787
+ ):
1788
+ messages .append (
1789
+ {"role" : msg ["role" ], "content" : msg ["content" ]}
1790
+ )
1791
+
1792
+ # Add assistant response from span output
1793
+ if span .output is not None :
1794
+ messages .append ({"role" : "assistant" , "content" : str (span .output )})
1795
+
1796
+ elif span .span_type == "user" :
1797
+ # Add user messages
1798
+ if span .output is not None :
1799
+ messages .append ({"role" : "user" , "content" : str (span .output )})
1800
+
1801
+ elif span .span_type == "tool" :
1802
+ # Add tool responses as user messages (common pattern in training)
1803
+ if span .output is not None :
1804
+ messages .append ({"role" : "user" , "content" : str (span .output )})
1805
+
1806
+ return messages
1807
+
1808
+ def get_current_message_history (self ) -> List [Dict [str , str ]]:
1809
+ """
1810
+ Get message history from the current trace.
1811
+
1812
+ Returns:
1813
+ List of message dictionaries from the current trace context
1814
+
1815
+ Raises:
1816
+ ValueError: If no current trace is found
1817
+ """
1818
+ current_trace = self .get_current_trace ()
1819
+ if not current_trace :
1820
+ raise ValueError ("No current trace found" )
1821
+
1822
+ return self .trace_to_message_history (current_trace )
1823
+
1734
1824
1735
1825
def _get_current_trace (
1736
1826
trace_across_async_contexts : bool = Tracer .trace_across_async_contexts ,
@@ -1746,7 +1836,7 @@ def wrap(
1746
1836
) -> Any :
1747
1837
"""
1748
1838
Wraps an API client to add tracing capabilities.
1749
- Supports OpenAI, Together, Anthropic, and Google GenAI clients.
1839
+ Supports OpenAI, Together, Anthropic, Google GenAI clients, and TrainableModel .
1750
1840
Patches both '.create' and Anthropic's '.stream' methods using a wrapper class.
1751
1841
"""
1752
1842
(
@@ -1871,6 +1961,39 @@ async def wrapper(*args, **kwargs):
1871
1961
setattr (client .chat .completions , "create" , wrapped (original_create ))
1872
1962
elif isinstance (client , (groq_AsyncGroq )):
1873
1963
setattr (client .chat .completions , "create" , wrapped_async (original_create ))
1964
+
1965
+ # Check for TrainableModel from judgeval.common.trainer
1966
+ try :
1967
+ from judgeval .common .trainer import TrainableModel
1968
+
1969
+ if isinstance (client , TrainableModel ):
1970
+ # Define a wrapper function that can be reapplied to new model instances
1971
+ def wrap_model_instance (model_instance ):
1972
+ """Wrap a model instance with tracing functionality"""
1973
+ if hasattr (model_instance , "chat" ) and hasattr (
1974
+ model_instance .chat , "completions"
1975
+ ):
1976
+ if hasattr (model_instance .chat .completions , "create" ):
1977
+ setattr (
1978
+ model_instance .chat .completions ,
1979
+ "create" ,
1980
+ wrapped (model_instance .chat .completions .create ),
1981
+ )
1982
+ if hasattr (model_instance .chat .completions , "acreate" ):
1983
+ setattr (
1984
+ model_instance .chat .completions ,
1985
+ "acreate" ,
1986
+ wrapped_async (model_instance .chat .completions .acreate ),
1987
+ )
1988
+
1989
+ # Register the wrapper function with the TrainableModel
1990
+ client ._register_tracer_wrapper (wrap_model_instance )
1991
+
1992
+ # Apply wrapping to the current model
1993
+ wrap_model_instance (client ._current_model )
1994
+ except ImportError :
1995
+ pass # TrainableModel not available
1996
+
1874
1997
return client
1875
1998
1876
1999
@@ -1977,6 +2100,22 @@ def _get_client_config(
1977
2100
return "GROQ_API_CALL" , client .chat .completions .create , None , None , None
1978
2101
elif isinstance (client , (groq_AsyncGroq )):
1979
2102
return "GROQ_API_CALL" , client .chat .completions .create , None , None , None
2103
+
2104
+ # Check for TrainableModel
2105
+ try :
2106
+ from judgeval .common .trainer import TrainableModel
2107
+
2108
+ if isinstance (client , TrainableModel ):
2109
+ return (
2110
+ "FIREWORKS_TRAINABLE_MODEL_CALL" ,
2111
+ client ._current_model .chat .completions .create ,
2112
+ None ,
2113
+ None ,
2114
+ None ,
2115
+ )
2116
+ except ImportError :
2117
+ pass # TrainableModel not available
2118
+
1980
2119
raise ValueError (f"Unsupported client type: { type (client )} " )
1981
2120
1982
2121
@@ -2155,6 +2294,37 @@ def _format_output_data(
2155
2294
cache_creation_input_tokens ,
2156
2295
)
2157
2296
2297
+ # Check for TrainableModel
2298
+ try :
2299
+ from judgeval .common .trainer import TrainableModel
2300
+
2301
+ if isinstance (client , TrainableModel ):
2302
+ # TrainableModel uses Fireworks LLM internally, so response format should be similar to OpenAI
2303
+ if (
2304
+ hasattr (response , "model" )
2305
+ and hasattr (response , "usage" )
2306
+ and hasattr (response , "choices" )
2307
+ ):
2308
+ model_name = response .model
2309
+ prompt_tokens = response .usage .prompt_tokens if response .usage else 0
2310
+ completion_tokens = (
2311
+ response .usage .completion_tokens if response .usage else 0
2312
+ )
2313
+ message_content = response .choices [0 ].message .content
2314
+
2315
+ # Use LiteLLM cost calculation with fireworks_ai prefix
2316
+ # LiteLLM supports Fireworks AI models for cost calculation when prefixed with "fireworks_ai/"
2317
+ fireworks_model_name = f"fireworks_ai/{ model_name } "
2318
+ return message_content , _create_usage (
2319
+ fireworks_model_name ,
2320
+ prompt_tokens ,
2321
+ completion_tokens ,
2322
+ cache_read_input_tokens ,
2323
+ cache_creation_input_tokens ,
2324
+ )
2325
+ except ImportError :
2326
+ pass # TrainableModel not available
2327
+
2158
2328
judgeval_logger .warning (f"Unsupported client type: { type (client )} " )
2159
2329
return None , None
2160
2330
0 commit comments