diff --git a/api/src/main/java/com/theokanning/openai/completion/CompletionRequest.java b/api/src/main/java/com/theokanning/openai/completion/CompletionRequest.java
index 64eb9d2..37f64d5 100644
--- a/api/src/main/java/com/theokanning/openai/completion/CompletionRequest.java
+++ b/api/src/main/java/com/theokanning/openai/completion/CompletionRequest.java
@@ -41,10 +41,20 @@ public class CompletionRequest {
      * The maximum number of tokens to generate.
      * Requests can use up to 2048 tokens shared between prompt and completion.
      * (One token is roughly 4 characters for normal English text)
+     *
+     * @deprecated use {@link #maxCompletionTokens} on models after GPT-4.
      */
+    @Deprecated
     @JsonProperty("max_tokens")
     Integer maxTokens;
 
+    /**
+     * The maximum number of tokens to generate in the completion.
+     * This is the replacement for {@link #maxTokens} with the new models.
+     */
+    @JsonProperty("max_completion_tokens")
+    Integer maxCompletionTokens;
+
     /**
      * What sampling temperature to use. Higher values means the model will take more risks.
      * Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer.
diff --git a/api/src/main/java/com/theokanning/openai/completion/chat/ChatCompletionRequest.java b/api/src/main/java/com/theokanning/openai/completion/chat/ChatCompletionRequest.java
index 3291b8c..59b4441 100644
--- a/api/src/main/java/com/theokanning/openai/completion/chat/ChatCompletionRequest.java
+++ b/api/src/main/java/com/theokanning/openai/completion/chat/ChatCompletionRequest.java
@@ -81,10 +81,20 @@ public class ChatCompletionRequest {
     /**
      * The maximum number of tokens allowed for the generated answer. By default, the number of tokens the model can return will
      * be (4096 - prompt tokens).
+     *
+     * @deprecated use {@link #maxCompletionTokens} on models after GPT-4.
      */
+    @Deprecated
     @JsonProperty("max_tokens")
     Integer maxTokens;
 
+    /**
+     * The maximum number of tokens to generate in the completion.
+     * This is the replacement for {@link #maxTokens} with the new models.
+     */
+    @JsonProperty("max_completion_tokens")
+    Integer maxCompletionTokens;
+
     /**
      * Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far,
      * increasing the model's likelihood to talk about new topics.
diff --git a/service/src/test/java/com/theokanning/openai/service/ChatCompletionTest.java b/service/src/test/java/com/theokanning/openai/service/ChatCompletionTest.java
index f91d9f2..0fa36cf 100644
--- a/service/src/test/java/com/theokanning/openai/service/ChatCompletionTest.java
+++ b/service/src/test/java/com/theokanning/openai/service/ChatCompletionTest.java
@@ -39,7 +39,7 @@ void createChatCompletion() {
                 .model("gpt-4o-mini")
                 .messages(messages)
                 .n(5)
-                .maxTokens(50)
+                .maxCompletionTokens(50)
                 .logitBias(new HashMap<>())
                 .build();
 
@@ -61,7 +61,7 @@ void streamChatCompletion() {
                 .model("gpt-4o-mini")
                 .messages(messages)
                 .n(1)
-                .maxTokens(50)
+                .maxCompletionTokens(50)
                 .logitBias(new HashMap<>())
                 .stream(true)
                 .build();
@@ -83,7 +83,7 @@ void streamOptionsChatCompletion() {
                 .model("gpt-4o-mini")
                 .messages(messages)
                 .n(1)
-                .maxTokens(50)
+                .maxCompletionTokens(50)
                 .logitBias(new HashMap<>())
                 .stream(true)
                 .streamOptions(StreamOption.INCLUDE)
@@ -108,7 +108,7 @@ void createChatCompletionWithJsonMode() {
                 .model("gpt-4o-mini")
                 .messages(messages)
                 .responseFormat(ChatResponseFormat.JSON_OBJECT)
-                .maxTokens(50)
+                .maxCompletionTokens(50)
                 .logitBias(new HashMap<>())
                 .build();
 
@@ -137,7 +137,7 @@ void createChatCompletionWithJsonSchema() throws JsonProcessingException {
                 .model("gpt-4o-2024-08-06")
                 .messages(messages)
                 .responseFormat(responseFormat)
-                .maxTokens(1000)
+                .maxCompletionTokens(1000)
                 .build();
 
         ChatCompletionChoice choice = service.createChatCompletion(chatCompletionRequest).getChoices().get(0);
@@ -180,7 +180,7 @@ void createChatCompletionWithFunctions() {
                 .messages(messages)
                 .functions(functions)
                 .n(1)
-                .maxTokens(100)
+                .maxCompletionTokens(100)
                 .logitBias(new HashMap<>())
                 .build();
 
@@ -212,7 +212,7 @@ void createChatCompletionWithFunctions() {
                 .messages(messages)
                 .functions(functions)
                 .n(1)
-                .maxTokens(100)
+                .maxCompletionTokens(100)
                 .logitBias(new HashMap<>())
                 .build();
 
@@ -254,7 +254,7 @@ void createChatCompletionWithDynamicFunctions() {
                 .messages(messages)
                 .functions(Collections.singletonList(function))
                 .n(1)
-                .maxTokens(100)
+                .maxCompletionTokens(100)
                 .logitBias(new HashMap<>())
                 .build();
 
@@ -281,7 +281,7 @@ void zeroArgStreamFunctionTest() {
                 .messages(messages)
                 .functions(Collections.singletonList(FunctionDefinition.builder().name("get_today").description("Get the current date").executor((o) -> LocalDate.now()).build()))
                 .n(1)
-                .maxTokens(100)
+                .maxCompletionTokens(100)
                 .logitBias(new HashMap<>())
                 .build();
         AssistantMessage accumulatedMessage = service.mapStreamToAccumulator(service.streamChatCompletion(chatCompletionRequest))
@@ -308,7 +308,7 @@ void zeroArgStreamToolTest() {
                         new ChatTool(FunctionDefinition.builder().name("get_today").description("Get the current date").executor((o) -> LocalDate.now()).build())
                 ))
                 .n(1)
-                .maxTokens(100)
+                .maxCompletionTokens(100)
                 .logitBias(new HashMap<>())
                 .streamOptions(StreamOption.INCLUDE)
                 .build();
@@ -342,7 +342,7 @@ void streamChatCompletionWithFunctions() {
                 .messages(messages)
                 .functions(functions)
                 .n(1)
-                .maxTokens(100)
+                .maxCompletionTokens(100)
                 .logitBias(new HashMap<>())
                 .build();
 
@@ -376,7 +376,7 @@ void streamChatCompletionWithFunctions() {
                 .messages(messages)
                 .functions(functions)
                 .n(1)
-                .maxTokens(100)
+                .maxCompletionTokens(100)
                 .logitBias(new HashMap<>())
                 .build();
 
@@ -418,7 +418,7 @@ void streamChatCompletionWithDynamicFunctions() {
                 .messages(messages)
                 .functions(Collections.singletonList(function))
                 .n(1)
-                .maxTokens(100)
+                .maxCompletionTokens(100)
                 .logitBias(new HashMap<>())
                 .build();
 
@@ -451,7 +451,7 @@ void createChatCompletionWithToolFunctions() {
                 .tools(Arrays.asList(tool))
                 .toolChoice(ToolChoice.AUTO)
                 .n(1)
-                .maxTokens(100)
+                .maxCompletionTokens(100)
                 .build();
 
         ChatCompletionChoice choice = service.createChatCompletion(chatCompletionRequest).getChoices().get(0);
@@ -485,7 +485,7 @@ void createChatCompletionWithToolFunctions() {
                 .tools(Arrays.asList(tool))
                 .toolChoice(ToolChoice.AUTO)
                 .n(1)
-                .maxTokens(100)
+                .maxCompletionTokens(100)
                 .logitBias(new HashMap<>())
                 .build();
 
@@ -539,7 +539,7 @@ void createChatCompletionWithMultipleToolCalls() {
                 .tools(tools)
                 .toolChoice(ToolChoice.AUTO)
                 .n(1)
-                .maxTokens(100)
+                .maxCompletionTokens(100)
                 .logitBias(new HashMap<>())
                 .build();
 
@@ -574,7 +574,7 @@ void createChatCompletionWithMultipleToolCalls() {
                 .tools(tools)
                 .toolChoice(ToolChoice.AUTO)
                 .n(1)
-                .maxTokens(100)
+                .maxCompletionTokens(100)
                 .logitBias(new HashMap<>())
                 .build();
 
@@ -602,7 +602,7 @@ void createChatCompletionWithMultipleToolCalls() {
                 .tools(tools)
                 .toolChoice(ToolChoice.AUTO)
                 .n(1)
-                .maxTokens(100)
+                .maxCompletionTokens(100)
                 .logitBias(new HashMap<>())
                 .build();
 
@@ -631,7 +631,7 @@ void createImageChatCompletion() {
                 .model("gpt-4o")
                 .messages(messages)
                 .n(1)
-                .maxTokens(200)
+                .maxCompletionTokens(200)
                 .build();
 
         ChatCompletionChoice choice = service.createChatCompletion(chatCompletionRequest).getChoices().get(0);
@@ -652,7 +652,7 @@ void createLocalImageChatCompletion() throws URISyntaxException {
                 .model("gpt-4o-mini")
                 .messages(messages)
                 .n(1)
-                .maxTokens(200)
+                .maxCompletionTokens(200)
                 .build();
 
         ChatCompletionChoice choice = service.createChatCompletion(chatCompletionRequest).getChoices().get(0);
@@ -673,7 +673,7 @@ void createInputAudioChatCompletion() throws URISyntaxException {
                 .model("gpt-4o-audio-preview")
                 .messages(messages)
                 .n(1)
-                .maxTokens(200)
+                .maxCompletionTokens(200)
                 .modalities(Arrays.asList("text", "audio"))
                 .audio(new Audio("alloy", "wav"))
                 .build();
@@ -728,7 +728,7 @@ void streamChatMultipleToolCalls() {
                 .tools(tools)
                 .toolChoice(ToolChoice.AUTO)
                 .n(1)
-                .maxTokens(200)
+                .maxCompletionTokens(200)
                 .build();
 
         AssistantMessage accumulatedMessage = service.mapStreamToAccumulator(service.streamChatCompletion(chatCompletionRequest))
@@ -765,7 +765,7 @@ void streamChatMultipleToolCalls() {
                 .tools(tools)
                 .toolChoice(ToolChoice.AUTO)
                 .n(1)
-                .maxTokens(100)
+                .maxCompletionTokens(100)
                 .logitBias(new HashMap<>())
                 .build();
 
@@ -795,7 +795,7 @@ void streamChatMultipleToolCalls() {
                 .tools(tools)
                 .toolChoice(ToolChoice.AUTO)
                 .n(1)
-                .maxTokens(100)
+                .maxCompletionTokens(100)
                 .logitBias(new HashMap<>())
                 .build();