sgl-project · hnyls2002 · May 11, 2025 · Apr 22, 2025 · Apr 22, 2025 · Apr 23, 2025
diff --git a/python/sglang/srt/openai_api/adapter.py b/python/sglang/srt/openai_api/adapter.py
@@ -175,6 +175,32 @@ def guess_chat_template_name_from_model_path(model_path):
         )
 
 
+def _validate_prompt(prompt: str):
+    """Validate that the prompt is not empty or whitespace only."""
+    is_invalid = False
+
+    # Check for empty/whitespace string
+    if isinstance(prompt, str):
+        is_invalid = not prompt.strip()
+    # Check for various invalid list cases: [], [""], [" "], [[]]
+    elif isinstance(prompt, list):
+        is_invalid = not prompt or (
+            len(prompt) == 1
+            and (
+                (isinstance(prompt[0], str) and not prompt[0].strip())
+                or (isinstance(prompt[0], list) and not prompt[0])
+            )
+        )
+
+    if is_invalid:
+        raise HTTPException(
+            status_code=400,
+            detail="Input cannot be empty or contain only whitespace.",
+        )
+
+    return prompt
+
+
 async def v1_files_create(
     file: UploadFile, purpose: str, file_storage_path: str = None
 ):
@@ -1755,6 +1781,8 @@ def v1_embedding_request(all_requests, tokenizer_manager):
 
     for request in all_requests:
         prompt = request.input
+        # Check for empty/whitespace string
+        prompt = _validate_prompt(request.input)
         assert (
             type(prompt) is first_prompt_type
         ), "All prompts must be of the same type in file input settings"

diff --git a/test/srt/models/test_embedding_models.py b/test/srt/models/test_embedding_models.py
@@ -16,12 +16,20 @@
 import random
 import unittest
 
+import openai
 import torch
 from transformers import AutoConfig, AutoTokenizer
 
 from sglang.test.runners import DEFAULT_PROMPTS, HFRunner, SRTRunner
 from sglang.test.test_utils import CustomTestCase, get_similarities, is_in_ci
 
+if is_in_ci():
+    from patch import launch_server_cmd
+else:
+    from sglang.utils import launch_server_cmd
+
+from sglang.utils import terminate_process, wait_for_server
+
 MODELS = [
     ("Alibaba-NLP/gte-Qwen2-1.5B-instruct", 1, 1e-5),
     ("intfloat/e5-mistral-7b-instruct", 1, 1e-5),
@@ -105,6 +113,35 @@ def test_prefill_logits(self):
                     DEFAULT_PROMPTS, model, tp_size, torch_dtype, prefill_tolerance
                 )
 
+    def test_empty_string_embedding(self):
+        """Test embedding an empty string."""
+        model_path = MODELS[0][0]
+        embedding_process, port = launch_server_cmd(
+            f"""python3 -m sglang.launch_server --model-path {model_path} """
+            """--host 127.0.0.1 --is-embedding"""
+        )
+        try:
+            wait_for_server(f"http://127.0.0.1:{port}")
+
+            client = openai.Client(
+                base_url=f"http://127.0.0.1:{port}/v1", api_key="None"
+            )
+
+            # Text embedding example with empty string
+            text = ""
+            # Expect a BadRequestError for empty input
+            with self.assertRaises(openai.BadRequestError) as cm:
+                client.embeddings.create(
+                    model=model_path,
+                    input=text,
+                )
+
+            # Optionally, check the error message or status code if needed
+            self.assertEqual(cm.exception.status_code, 400)
+
+        finally:
+            terminate_process(embedding_process)
+
 
 if __name__ == "__main__":
     unittest.main()