|
539 | 539 | Model.register(
|
540 | 540 | dict(
|
541 | 541 | model_id = "Qwen3-0.6B",
|
542 |
| - supported_engines=[vllm_qwen3_engin084], |
| 542 | + supported_engines=[vllm_qwen3_engin091], |
543 | 543 | supported_instances=[
|
544 | 544 | g5d2xlarge_instance,
|
545 | 545 | g5d4xlarge_instance,
|
|
573 | 573 | Model.register(
|
574 | 574 | dict(
|
575 | 575 | model_id = "Qwen3-1.7B",
|
576 |
| - supported_engines=[vllm_qwen3_engin084], |
| 576 | + supported_engines=[vllm_qwen3_engin091], |
577 | 577 | supported_instances=[
|
578 | 578 | g5d2xlarge_instance,
|
579 | 579 | g5d4xlarge_instance,
|
|
608 | 608 | Model.register(
|
609 | 609 | dict(
|
610 | 610 | model_id = "Qwen3-4B",
|
611 |
| - supported_engines=[vllm_qwen3_engin084], |
| 611 | + supported_engines=[vllm_qwen3_engin091], |
612 | 612 | supported_instances=[
|
613 | 613 | g5d2xlarge_instance,
|
614 | 614 | g5d4xlarge_instance,
|
|
644 | 644 | Model.register(
|
645 | 645 | dict(
|
646 | 646 | model_id = "Qwen3-14B-AWQ",
|
647 |
| - supported_engines=[vllm_qwen3_engin084], |
| 647 | + supported_engines=[vllm_qwen3_engin091], |
648 | 648 | supported_instances=[
|
649 | 649 | g5d2xlarge_instance,
|
650 | 650 | g5d4xlarge_instance,
|
|
679 | 679 | Model.register(
|
680 | 680 | dict(
|
681 | 681 | model_id = "Qwen3-14B",
|
682 |
| - supported_engines=[vllm_qwen3_engin084], |
| 682 | + supported_engines=[vllm_qwen3_engin091], |
683 | 683 | supported_instances=[
|
684 | 684 | g5d12xlarge_instance,
|
685 | 685 | g5d24xlarge_instance,
|
|
714 | 714 | # Model.register(
|
715 | 715 | # dict(
|
716 | 716 | # model_id = "Qwen3-14B-FP8",
|
717 |
| -# supported_engines=[vllm_qwen3_engin084], |
| 717 | +# supported_engines=[vllm_qwen3_engin091], |
718 | 718 | # supported_instances=[
|
719 | 719 | # g5d2xlarge_instance,
|
720 | 720 | # g5d4xlarge_instance,
|
|
750 | 750 | Model.register(
|
751 | 751 | dict(
|
752 | 752 | model_id = "Qwen3-32B-AWQ",
|
753 |
| - supported_engines=[vllm_qwen3_engin084], |
| 753 | + supported_engines=[vllm_qwen3_engin091], |
754 | 754 | supported_instances=[
|
755 | 755 | g5d12xlarge_instance,
|
756 | 756 | g5d24xlarge_instance,
|
|
784 | 784 | Model.register(
|
785 | 785 | dict(
|
786 | 786 | model_id = "Qwen3-32B",
|
787 |
| - supported_engines=[vllm_qwen3_engin084], |
| 787 | + supported_engines=[vllm_qwen3_engin091], |
788 | 788 | supported_instances=[
|
789 | 789 | g5d12xlarge_instance,
|
790 | 790 | g5d24xlarge_instance,
|
|
817 | 817 | Model.register(
|
818 | 818 | dict(
|
819 | 819 | model_id = "Qwen3-30B-A3B",
|
820 |
| - supported_engines=[vllm_qwen3_engin084], |
| 820 | + supported_engines=[vllm_qwen3_engin091], |
821 | 821 | supported_instances=[
|
822 | 822 | g5d12xlarge_instance,
|
823 | 823 | g5d24xlarge_instance,
|
|
850 | 850 | Model.register(
|
851 | 851 | dict(
|
852 | 852 | model_id = "Qwen3-235B-A22B",
|
853 |
| - supported_engines=[vllm_qwen3_engin084], |
| 853 | + supported_engines=[vllm_qwen3_engin091], |
854 | 854 | supported_instances=[
|
855 | 855 | local_instance
|
856 | 856 | ],
|
|
874 | 874 | Model.register(
|
875 | 875 | dict(
|
876 | 876 | model_id = "Qwen3-235B-A22B-FP8",
|
877 |
| - supported_engines=[vllm_qwen3_engin084], |
| 877 | + supported_engines=[vllm_qwen3_engin091], |
878 | 878 | supported_instances=[
|
879 | 879 | local_instance
|
880 | 880 | ],
|
|
0 commit comments