Skip to content

Commit b58ae7a

Browse files
authored
Simplify frontend language (sgl-project#9029)
1 parent 6345069 commit b58ae7a

File tree

9 files changed

+19
-17
lines changed

9 files changed

+19
-17
lines changed

benchmark/gsm8k/bench_sglang.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import numpy as np
99

10-
from sglang.api import set_default_backend
10+
from sglang.lang.api import set_default_backend
1111
from sglang.test.test_utils import (
1212
add_common_sglang_args_and_parse,
1313
dump_bench_raw_result,

benchmark/hellaswag/bench_sglang.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import numpy as np
77

8-
from sglang.api import set_default_backend
8+
from sglang.lang.api import set_default_backend
99
from sglang.test.test_utils import (
1010
add_common_sglang_args_and_parse,
1111
select_sglang_backend,

python/pyproject.toml

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,10 @@ runtime_common = [
4747
"sentencepiece",
4848
"soundfile==0.13.1",
4949
"scipy",
50-
"torchao==0.9.0",
51-
"transformers==4.55.0",
5250
"timm==1.0.16",
5351
"tiktoken",
52+
"torchao==0.9.0",
53+
"transformers==4.55.0",
5454
"uvicorn",
5555
"uvloop",
5656
"xgrammar==0.1.22",
@@ -84,6 +84,9 @@ srt_hip = [
8484
"petit_kernel==0.0.2",
8585
]
8686

87+
# CPU: torch wheel for CPU needs to be installed from https://download.pytorch.org/whl/cpu
88+
srt_cpu = ["sglang[runtime_common]", "einops"]
89+
8790
# xpu is not enabled in public vllm and torch whl,
8891
# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
8992
srt_xpu = ["sglang[runtime_common]"]
@@ -92,8 +95,6 @@ srt_xpu = ["sglang[runtime_common]"]
9295
# https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html
9396
srt_hpu = ["sglang[runtime_common]"]
9497

95-
# CPU: torch wheel for CPU needs to be installed from https://download.pytorch.org/whl/cpu
96-
srt_cpu = ["sglang[runtime_common]", "einops"]
9798
# https://vllm-ascend.readthedocs.io/en/latest/installation.html
9899
srt_npu = ["sglang[runtime_common]"]
99100

@@ -112,12 +113,12 @@ test = [
112113
"sentence_transformers",
113114
"pytest",
114115
]
115-
all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[torch_memory_saver]", "sglang[decord]"]
116-
all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[decord]"]
117-
all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[decord]"]
118-
all_hpu = ["sglang[srt_hpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[decord]"]
119-
all_cpu = ["sglang[srt_cpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[decord]"]
120-
all_npu = ["sglang[srt_npu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[decord]"]
116+
all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[torch_memory_saver]", "sglang[decord]"]
117+
all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
118+
all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
119+
all_hpu = ["sglang[srt_hpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
120+
all_cpu = ["sglang[srt_cpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
121+
all_npu = ["sglang[srt_npu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
121122

122123
dev = ["sglang[all]", "sglang[test]"]
123124
dev_hip = ["sglang[all_hip]", "sglang[test]"]

python/sglang/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
- `srt`: The backend engine for running local models. (SRT = SGLang Runtime).
66
- `test`: The test utilities.
77
- `api.py`: The public APIs.
8-
- `bench_offline_throughput.py`: Benchmark the throughput in the offline mode.
8+
- `bench_offline_throughput.py`: Benchmark the performance in the offline mode.
99
- `bench_one_batch.py`: Benchmark the latency of running a single static batch without a server.
1010
- `bench_one_batch_server.py`: Benchmark the latency of running a single batch with a server.
1111
- `bench_serving.py`: Benchmark online serving with dynamic requests.

python/sglang/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
# SGLang public APIs
22

33
# Frontend Language APIs
4-
from sglang.api import (
4+
from sglang.global_config import global_config
5+
from sglang.lang.api import (
56
Engine,
67
Runtime,
78
assistant,
@@ -25,13 +26,13 @@
2526
user_end,
2627
video,
2728
)
28-
from sglang.global_config import global_config
2929
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
3030
from sglang.lang.choices import (
3131
greedy_token_selection,
3232
token_length_normalized,
3333
unconditional_likelihood_normalized,
3434
)
35+
from sglang.srt.entrypoints.engine import Engine
3536
from sglang.utils import LazyImport
3637
from sglang.version import __version__
3738

File renamed without changes.

python/sglang/lang/backend/__init__.py

Whitespace-only changes.

python/sglang/test/few_shot_gsm8k.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
import numpy as np
1414

15-
from sglang.api import set_default_backend
15+
from sglang.lang.api import set_default_backend
1616
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
1717
from sglang.utils import download_and_cache_file, dump_state_text, read_jsonl
1818

python/sglang/test/few_shot_gsm8k_engine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import numpy as np
99

1010
import sglang as sgl
11-
from sglang.api import set_default_backend
11+
from sglang.lang.api import set_default_backend
1212
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
1313
from sglang.utils import download_and_cache_file, dump_state_text, read_jsonl
1414

0 commit comments

Comments
 (0)