zucchini-nlp
diff --git a/‎docs/source/en/_toctree.yml
Lines changed: 4 additions & 0 deletions b/‎docs/source/en/_toctree.yml
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/source/en/index.md
Lines changed: 0 additions & 1 deletion b/‎docs/source/en/index.md
Lines changed: 0 additions & 1 deletion
diff --git a/‎docs/source/en/model_doc/qwen3.md
Lines changed: 59 additions & 0 deletions b/‎docs/source/en/model_doc/qwen3.md
Lines changed: 59 additions & 0 deletions
diff --git a/‎docs/source/en/model_doc/qwen3_moe.md
Lines changed: 58 additions & 0 deletions b/‎docs/source/en/model_doc/qwen3_moe.md
Lines changed: 58 additions & 0 deletions
diff --git a/‎src/transformers/__init__.py
Lines changed: 40 additions & 0 deletions b/‎src/transformers/__init__.py
Lines changed: 40 additions & 0 deletions
diff --git a/‎src/transformers/models/__init__.py
Lines changed: 2 additions & 0 deletions b/‎src/transformers/models/__init__.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/transformers/models/auto/configuration_auto.py
Lines changed: 4 additions & 0 deletions b/‎src/transformers/models/auto/configuration_auto.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/transformers/models/auto/modeling_auto.py
Lines changed: 10 additions & 0 deletions b/‎src/transformers/models/auto/modeling_auto.py
Lines changed: 10 additions & 0 deletions
diff --git a/‎src/transformers/models/auto/tokenization_auto.py
Lines changed: 14 additions & 0 deletions b/‎src/transformers/models/auto/tokenization_auto.py
Lines changed: 14 additions & 0 deletions
diff --git a/‎src/transformers/models/qwen2_moe/configuration_qwen2_moe.py
Lines changed: 1 addition & 2 deletions b/‎src/transformers/models/qwen2_moe/configuration_qwen2_moe.py
Lines changed: 1 addition & 2 deletions
@@ -603,6 +603,10 @@
         title: Qwen2
       - local: model_doc/qwen2_moe
         title: Qwen2MoE
+      - local: model_doc/qwen3
+        title: Qwen3
+      - local: model_doc/qwen3_moe
+        title: Qwen3MoE
       - local: model_doc/rag
         title: RAG
       - local: model_doc/realm
 
@@ -43,4 +43,3 @@ Transformers is designed for developers and machine learning engineers and resea
   </a>
 </div>
 
-Join us on the Hugging Face [Hub](https://huggingface.co/), [Discord](https://discord.com/invite/JfAtkvEtRb), or [forum](https://discuss.huggingface.co/) to collaborate and build models, datasets, and applications together.
@@ -0,0 +1,59 @@
+<!--Copyright 2024 The Qwen Team and The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
+rendered properly in your Markdown viewer.
+
+-->
+
+# Qwen3
+
+## Overview
+
+To be released with the official model launch.
+
+### Model Details
+
+To be released with the official model launch.
+
+
+## Usage tips
+
+To be released with the official model launch.
+
+## Qwen3Config
+
+[[autodoc]] Qwen3Config
+
+## Qwen3Model
+
+[[autodoc]] Qwen3Model
+    - forward
+
+## Qwen3ForCausalLM
+
+[[autodoc]] Qwen3ForCausalLM
+    - forward
+
+## Qwen3ForSequenceClassification
+
+[[autodoc]] Qwen3ForSequenceClassification
+    - forward
+
+## Qwen3ForTokenClassification
+
+[[autodoc]] Qwen3ForTokenClassification
+    - forward
+
+## Qwen3ForQuestionAnswering
+
+[[autodoc]] Qwen3ForQuestionAnswering
+    - forward
@@ -0,0 +1,58 @@
+<!--Copyright 2024 The Qwen Team and The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
+rendered properly in your Markdown viewer.
+
+-->
+
+# Qwen3MoE
+
+## Overview
+
+To be released with the official model launch.
+
+### Model Details
+
+To be released with the official model launch.
+
+## Usage tips
+
+To be released with the official model launch.
+
+## Qwen3MoeConfig
+
+[[autodoc]] Qwen3MoeConfig
+
+## Qwen3MoeModel
+
+[[autodoc]] Qwen3MoeModel
+    - forward
+
+## Qwen3MoeForCausalLM
+
+[[autodoc]] Qwen3MoeForCausalLM
+    - forward
+
+## Qwen3MoeForSequenceClassification
+
+[[autodoc]] Qwen3MoeForSequenceClassification
+    - forward
+
+## Qwen3MoeForTokenClassification
+
+[[autodoc]] Qwen3MoeForTokenClassification
+    - forward
+
+## Qwen3MoeForQuestionAnswering
+
+[[autodoc]] Qwen3MoeForQuestionAnswering
+    - forward
@@ -744,6 +744,8 @@
         "Qwen2VLConfig",
         "Qwen2VLProcessor",
     ],
+    "models.qwen3": ["Qwen3Config"],
+    "models.qwen3_moe": ["Qwen3MoeConfig"],
     "models.rag": ["RagConfig", "RagRetriever", "RagTokenizer"],
     "models.recurrent_gemma": ["RecurrentGemmaConfig"],
     "models.reformer": ["ReformerConfig"],
@@ -3441,6 +3443,26 @@
             "Qwen2VLPreTrainedModel",
         ]
     )
+    _import_structure["models.qwen3"].extend(
+        [
+            "Qwen3ForCausalLM",
+            "Qwen3ForQuestionAnswering",
+            "Qwen3ForSequenceClassification",
+            "Qwen3ForTokenClassification",
+            "Qwen3Model",
+            "Qwen3PreTrainedModel",
+        ]
+    )
+    _import_structure["models.qwen3_moe"].extend(
+        [
+            "Qwen3MoeForCausalLM",
+            "Qwen3MoeForQuestionAnswering",
+            "Qwen3MoeForSequenceClassification",
+            "Qwen3MoeForTokenClassification",
+            "Qwen3MoeModel",
+            "Qwen3MoePreTrainedModel",
+        ]
+    )
     _import_structure["models.rag"].extend(
         [
             "RagModel",
@@ -5993,6 +6015,8 @@
         Qwen2VLConfig,
         Qwen2VLProcessor,
     )
+    from .models.qwen3 import Qwen3Config
+    from .models.qwen3_moe import Qwen3MoeConfig
     from .models.rag import RagConfig, RagRetriever, RagTokenizer
     from .models.recurrent_gemma import RecurrentGemmaConfig
     from .models.reformer import ReformerConfig
@@ -8293,6 +8317,22 @@
             Qwen2VLModel,
             Qwen2VLPreTrainedModel,
         )
+        from .models.qwen3 import (
+            Qwen3ForCausalLM,
+            Qwen3ForQuestionAnswering,
+            Qwen3ForSequenceClassification,
+            Qwen3ForTokenClassification,
+            Qwen3Model,
+            Qwen3PreTrainedModel,
+        )
+        from .models.qwen3_moe import (
+            Qwen3MoeForCausalLM,
+            Qwen3MoeForQuestionAnswering,
+            Qwen3MoeForSequenceClassification,
+            Qwen3MoeForTokenClassification,
+            Qwen3MoeModel,
+            Qwen3MoePreTrainedModel,
+        )
         from .models.rag import (
             RagModel,
             RagPreTrainedModel,
 
@@ -230,6 +230,8 @@
     qwen2_audio,
     qwen2_moe,
     qwen2_vl,
+    qwen3,
+    qwen3_moe,
     rag,
     recurrent_gemma,
     reformer,
 
@@ -254,6 +254,8 @@
         ("qwen2_audio_encoder", "Qwen2AudioEncoderConfig"),
         ("qwen2_moe", "Qwen2MoeConfig"),
         ("qwen2_vl", "Qwen2VLConfig"),
+        ("qwen3", "Qwen3Config"),
+        ("qwen3_moe", "Qwen3MoeConfig"),
         ("rag", "RagConfig"),
         ("realm", "RealmConfig"),
         ("recurrent_gemma", "RecurrentGemmaConfig"),
@@ -609,6 +611,8 @@
         ("qwen2_audio_encoder", "Qwen2AudioEncoder"),
         ("qwen2_moe", "Qwen2MoE"),
         ("qwen2_vl", "Qwen2VL"),
+        ("qwen3", "Qwen3"),
+        ("qwen3_moe", "Qwen3MoE"),
         ("rag", "RAG"),
         ("realm", "REALM"),
         ("recurrent_gemma", "RecurrentGemma"),
 
@@ -233,6 +233,8 @@
         ("qwen2_audio_encoder", "Qwen2AudioEncoder"),
         ("qwen2_moe", "Qwen2MoeModel"),
         ("qwen2_vl", "Qwen2VLModel"),
+        ("qwen3", "Qwen3Model"),
+        ("qwen3_moe", "Qwen3MoeModel"),
         ("recurrent_gemma", "RecurrentGemmaModel"),
         ("reformer", "ReformerModel"),
         ("regnet", "RegNetModel"),
@@ -576,6 +578,8 @@
         ("qdqbert", "QDQBertLMHeadModel"),
         ("qwen2", "Qwen2ForCausalLM"),
         ("qwen2_moe", "Qwen2MoeForCausalLM"),
+        ("qwen3", "Qwen3ForCausalLM"),
+        ("qwen3_moe", "Qwen3MoeForCausalLM"),
         ("recurrent_gemma", "RecurrentGemmaForCausalLM"),
         ("reformer", "ReformerModelWithLMHead"),
         ("rembert", "RemBertForCausalLM"),
@@ -1072,6 +1076,8 @@
         ("qdqbert", "QDQBertForSequenceClassification"),
         ("qwen2", "Qwen2ForSequenceClassification"),
         ("qwen2_moe", "Qwen2MoeForSequenceClassification"),
+        ("qwen3", "Qwen3ForSequenceClassification"),
+        ("qwen3_moe", "Qwen3MoeForSequenceClassification"),
         ("reformer", "ReformerForSequenceClassification"),
         ("rembert", "RemBertForSequenceClassification"),
         ("roberta", "RobertaForSequenceClassification"),
@@ -1153,6 +1159,8 @@
         ("qdqbert", "QDQBertForQuestionAnswering"),
         ("qwen2", "Qwen2ForQuestionAnswering"),
         ("qwen2_moe", "Qwen2MoeForQuestionAnswering"),
+        ("qwen3", "Qwen3ForQuestionAnswering"),
+        ("qwen3_moe", "Qwen3MoeForQuestionAnswering"),
         ("reformer", "ReformerForQuestionAnswering"),
         ("rembert", "RemBertForQuestionAnswering"),
         ("roberta", "RobertaForQuestionAnswering"),
@@ -1257,6 +1265,8 @@
         ("qdqbert", "QDQBertForTokenClassification"),
         ("qwen2", "Qwen2ForTokenClassification"),
         ("qwen2_moe", "Qwen2MoeForTokenClassification"),
+        ("qwen3", "Qwen3ForTokenClassification"),
+        ("qwen3_moe", "Qwen3MoeForTokenClassification"),
         ("rembert", "RemBertForTokenClassification"),
         ("roberta", "RobertaForTokenClassification"),
         ("roberta-prelayernorm", "RobertaPreLayerNormForTokenClassification"),
 
@@ -454,6 +454,20 @@
                 ),
             ),
             ("qwen2_vl", ("Qwen2Tokenizer", "Qwen2TokenizerFast" if is_tokenizers_available() else None)),
+            (
+                "qwen3",
+                (
+                    "Qwen2Tokenizer",
+                    "Qwen2TokenizerFast" if is_tokenizers_available() else None,
+                ),
+            ),
+            (
+                "qwen3_moe",
+                (
+                    "Qwen2Tokenizer",
+                    "Qwen2TokenizerFast" if is_tokenizers_available() else None,
+                ),
+            ),
             ("rag", ("RagTokenizer", None)),
             ("realm", ("RealmTokenizer", "RealmTokenizerFast" if is_tokenizers_available() else None)),
             (
 
@@ -26,8 +26,7 @@ class Qwen2MoeConfig(PretrainedConfig):
     r"""
     This is the configuration class to store the configuration of a [`Qwen2MoeModel`]. It is used to instantiate a
     Qwen2MoE model according to the specified arguments, defining the model architecture. Instantiating a configuration
-    with the defaults will yield a similar configuration to that of
-    Qwen1.5-MoE-A2.7B" [Qwen/Qwen1.5-MoE-A2.7B"](https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B").
+    with the defaults will yield a similar configuration to that of [Qwen/Qwen1.5-MoE-A2.7B](https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B).
 
     Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
     documentation from [`PretrainedConfig`] for more information.
Original file line number	Diff line number	Diff line change
`@@ -43,4 +43,3 @@ Transformers is designed for developers and machine learning engineers and resea`
`43`	`43`	`</a>`
`44`	`44`	`</div>`
`45`	`45`
`46`		`-Join us on the Hugging Face [Hub](https://huggingface.co/), [Discord](https://discord.com/invite/JfAtkvEtRb), or [forum](https://discuss.huggingface.co/) to collaborate and build models, datasets, and applications together.`