diff --git a/api/src/main/java/com/theokanning/openai/completion/CompletionRequest.java b/api/src/main/java/com/theokanning/openai/completion/CompletionRequest.java index 64eb9d2..37f64d5 100644 --- a/api/src/main/java/com/theokanning/openai/completion/CompletionRequest.java +++ b/api/src/main/java/com/theokanning/openai/completion/CompletionRequest.java @@ -41,10 +41,20 @@ public class CompletionRequest { * The maximum number of tokens to generate. * Requests can use up to 2048 tokens shared between prompt and completion. * (One token is roughly 4 characters for normal English text) + * + * @deprecated use {@link #maxCompletionTokens} on models after GPT-4. */ + @Deprecated @JsonProperty("max_tokens") Integer maxTokens; + /** + * The maximum number of tokens to generate in the completion. + * This is the replacement for {@link #maxTokens} with the new models. + */ + @JsonProperty("max_completion_tokens") + Integer maxCompletionTokens; + /** * What sampling temperature to use. Higher values means the model will take more risks. * Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. diff --git a/api/src/main/java/com/theokanning/openai/completion/chat/ChatCompletionRequest.java b/api/src/main/java/com/theokanning/openai/completion/chat/ChatCompletionRequest.java index 3291b8c..59b4441 100644 --- a/api/src/main/java/com/theokanning/openai/completion/chat/ChatCompletionRequest.java +++ b/api/src/main/java/com/theokanning/openai/completion/chat/ChatCompletionRequest.java @@ -81,10 +81,20 @@ public class ChatCompletionRequest { /** * The maximum number of tokens allowed for the generated answer. By default, the number of tokens the model can return will * be (4096 - prompt tokens). + * + * @deprecated use {@link #maxCompletionTokens} on models after GPT-4. */ + @Deprecated @JsonProperty("max_tokens") Integer maxTokens; + /** + * The maximum number of tokens to generate in the completion. + * This is the replacement for {@link #maxTokens} with the new models. + */ + @JsonProperty("max_completion_tokens") + Integer maxCompletionTokens; + /** * Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, * increasing the model's likelihood to talk about new topics. diff --git a/service/src/test/java/com/theokanning/openai/service/ChatCompletionTest.java b/service/src/test/java/com/theokanning/openai/service/ChatCompletionTest.java index f91d9f2..0fa36cf 100644 --- a/service/src/test/java/com/theokanning/openai/service/ChatCompletionTest.java +++ b/service/src/test/java/com/theokanning/openai/service/ChatCompletionTest.java @@ -39,7 +39,7 @@ void createChatCompletion() { .model("gpt-4o-mini") .messages(messages) .n(5) - .maxTokens(50) + .maxCompletionTokens(50) .logitBias(new HashMap<>()) .build(); @@ -61,7 +61,7 @@ void streamChatCompletion() { .model("gpt-4o-mini") .messages(messages) .n(1) - .maxTokens(50) + .maxCompletionTokens(50) .logitBias(new HashMap<>()) .stream(true) .build(); @@ -83,7 +83,7 @@ void streamOptionsChatCompletion() { .model("gpt-4o-mini") .messages(messages) .n(1) - .maxTokens(50) + .maxCompletionTokens(50) .logitBias(new HashMap<>()) .stream(true) .streamOptions(StreamOption.INCLUDE) @@ -108,7 +108,7 @@ void createChatCompletionWithJsonMode() { .model("gpt-4o-mini") .messages(messages) .responseFormat(ChatResponseFormat.JSON_OBJECT) - .maxTokens(50) + .maxCompletionTokens(50) .logitBias(new HashMap<>()) .build(); @@ -137,7 +137,7 @@ void createChatCompletionWithJsonSchema() throws JsonProcessingException { .model("gpt-4o-2024-08-06") .messages(messages) .responseFormat(responseFormat) - .maxTokens(1000) + .maxCompletionTokens(1000) .build(); ChatCompletionChoice choice = service.createChatCompletion(chatCompletionRequest).getChoices().get(0); @@ -180,7 +180,7 @@ void createChatCompletionWithFunctions() { .messages(messages) .functions(functions) .n(1) - .maxTokens(100) + .maxCompletionTokens(100) .logitBias(new HashMap<>()) .build(); @@ -212,7 +212,7 @@ void createChatCompletionWithFunctions() { .messages(messages) .functions(functions) .n(1) - .maxTokens(100) + .maxCompletionTokens(100) .logitBias(new HashMap<>()) .build(); @@ -254,7 +254,7 @@ void createChatCompletionWithDynamicFunctions() { .messages(messages) .functions(Collections.singletonList(function)) .n(1) - .maxTokens(100) + .maxCompletionTokens(100) .logitBias(new HashMap<>()) .build(); @@ -281,7 +281,7 @@ void zeroArgStreamFunctionTest() { .messages(messages) .functions(Collections.singletonList(FunctionDefinition.builder().name("get_today").description("Get the current date").executor((o) -> LocalDate.now()).build())) .n(1) - .maxTokens(100) + .maxCompletionTokens(100) .logitBias(new HashMap<>()) .build(); AssistantMessage accumulatedMessage = service.mapStreamToAccumulator(service.streamChatCompletion(chatCompletionRequest)) @@ -308,7 +308,7 @@ void zeroArgStreamToolTest() { new ChatTool(FunctionDefinition.builder().name("get_today").description("Get the current date").executor((o) -> LocalDate.now()).build()) )) .n(1) - .maxTokens(100) + .maxCompletionTokens(100) .logitBias(new HashMap<>()) .streamOptions(StreamOption.INCLUDE) .build(); @@ -342,7 +342,7 @@ void streamChatCompletionWithFunctions() { .messages(messages) .functions(functions) .n(1) - .maxTokens(100) + .maxCompletionTokens(100) .logitBias(new HashMap<>()) .build(); @@ -376,7 +376,7 @@ void streamChatCompletionWithFunctions() { .messages(messages) .functions(functions) .n(1) - .maxTokens(100) + .maxCompletionTokens(100) .logitBias(new HashMap<>()) .build(); @@ -418,7 +418,7 @@ void streamChatCompletionWithDynamicFunctions() { .messages(messages) .functions(Collections.singletonList(function)) .n(1) - .maxTokens(100) + .maxCompletionTokens(100) .logitBias(new HashMap<>()) .build(); @@ -451,7 +451,7 @@ void createChatCompletionWithToolFunctions() { .tools(Arrays.asList(tool)) .toolChoice(ToolChoice.AUTO) .n(1) - .maxTokens(100) + .maxCompletionTokens(100) .build(); ChatCompletionChoice choice = service.createChatCompletion(chatCompletionRequest).getChoices().get(0); @@ -485,7 +485,7 @@ void createChatCompletionWithToolFunctions() { .tools(Arrays.asList(tool)) .toolChoice(ToolChoice.AUTO) .n(1) - .maxTokens(100) + .maxCompletionTokens(100) .logitBias(new HashMap<>()) .build(); @@ -539,7 +539,7 @@ void createChatCompletionWithMultipleToolCalls() { .tools(tools) .toolChoice(ToolChoice.AUTO) .n(1) - .maxTokens(100) + .maxCompletionTokens(100) .logitBias(new HashMap<>()) .build(); @@ -574,7 +574,7 @@ void createChatCompletionWithMultipleToolCalls() { .tools(tools) .toolChoice(ToolChoice.AUTO) .n(1) - .maxTokens(100) + .maxCompletionTokens(100) .logitBias(new HashMap<>()) .build(); @@ -602,7 +602,7 @@ void createChatCompletionWithMultipleToolCalls() { .tools(tools) .toolChoice(ToolChoice.AUTO) .n(1) - .maxTokens(100) + .maxCompletionTokens(100) .logitBias(new HashMap<>()) .build(); @@ -631,7 +631,7 @@ void createImageChatCompletion() { .model("gpt-4o") .messages(messages) .n(1) - .maxTokens(200) + .maxCompletionTokens(200) .build(); ChatCompletionChoice choice = service.createChatCompletion(chatCompletionRequest).getChoices().get(0); @@ -652,7 +652,7 @@ void createLocalImageChatCompletion() throws URISyntaxException { .model("gpt-4o-mini") .messages(messages) .n(1) - .maxTokens(200) + .maxCompletionTokens(200) .build(); ChatCompletionChoice choice = service.createChatCompletion(chatCompletionRequest).getChoices().get(0); @@ -673,7 +673,7 @@ void createInputAudioChatCompletion() throws URISyntaxException { .model("gpt-4o-audio-preview") .messages(messages) .n(1) - .maxTokens(200) + .maxCompletionTokens(200) .modalities(Arrays.asList("text", "audio")) .audio(new Audio("alloy", "wav")) .build(); @@ -728,7 +728,7 @@ void streamChatMultipleToolCalls() { .tools(tools) .toolChoice(ToolChoice.AUTO) .n(1) - .maxTokens(200) + .maxCompletionTokens(200) .build(); AssistantMessage accumulatedMessage = service.mapStreamToAccumulator(service.streamChatCompletion(chatCompletionRequest)) @@ -765,7 +765,7 @@ void streamChatMultipleToolCalls() { .tools(tools) .toolChoice(ToolChoice.AUTO) .n(1) - .maxTokens(100) + .maxCompletionTokens(100) .logitBias(new HashMap<>()) .build(); @@ -795,7 +795,7 @@ void streamChatMultipleToolCalls() { .tools(tools) .toolChoice(ToolChoice.AUTO) .n(1) - .maxTokens(100) + .maxCompletionTokens(100) .logitBias(new HashMap<>()) .build();