ovchynnikov · ovchynnikov · May 17, 2025 · May 17, 2025 · coderabbitai · May 17, 2025
diff --git a/src/main.py b/src/main.py
@@ -462,7 +462,23 @@ async def respond_with_llm_message(update):
                 json={
                     "prompt": prompt,
                     "n_predict": 1024,
-                    "temperature": 0.7,
+                    "temperature": 0.8,
+                    "top_k": 40,
+                    "top_p": 0.95,
+                    "min_p": 0.05,
+                    "dynatemp_range": 0,
+                    "dynatemp_exponent": 1,
+                    "typical_p": 1,
+                    "xtc_probability": 0,
+                    "xtc_threshold": 0.1,
+                    "repeat_last_n": 64,
+                    "repeat_penalty": 1,
+                    "presence_penalty": 0,
+                    "frequency_penalty": 0,
+                    "dry_multiplier": 0,
+                    "dry_base": 1.75,
+                    "dry_allowed_length": 2,
+                    "dry_penalty_last_n": -1,
-                    "temperature": 0.8,
-                    "top_k": 40,
-                    "top_p": 0.95,
-                    "min_p": 0.05,
-                    "dynatemp_range": 0,
-                    "dynatemp_exponent": 1,
-                    "typical_p": 1,
-                    "xtc_probability": 0,
-                    "xtc_threshold": 0.1,
-                    "repeat_last_n": 64,
-                    "repeat_penalty": 1,
-                    "presence_penalty": 0,
-                    "frequency_penalty": 0,
-                    "dry_multiplier": 0,
-                    "dry_base": 1.75,
-                    "dry_allowed_length": 2,
-                    "dry_penalty_last_n": -1,
+async def respond_with_llm_message(update):
+    """Handle LLM responses when bot is mentioned."""
+    message_text = update.message.text
+    # Remove bot mention and any punctuation after it
+    prompt = re.sub(r'ботяра[^\w\s]*', '', message_text.lower()).strip()
+
+    # Load LLM parameters from environment variables or use defaults
+    temperature = float(os.getenv("LLM_TEMPERATURE", "0.8"))
+    top_k = int(os.getenv("LLM_TOP_K", "40"))
+    top_p = float(os.getenv("LLM_TOP_P", "0.95"))
+
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.post(
+                f"{LLM_API_ADDR}/completion",
+                json={
+                    "prompt": prompt,
+                    "n_predict": 1024,
+                    "temperature": temperature,    # Controls randomness in text generation
+                    "top_k": top_k,               # Limits token selection to the k most likely tokens
+                    "top_p": top_p,               # Nucleus sampling threshold
+                    "min_p": 0.05,
+                    "dynatemp_range": 0,
+                    "dynatemp_exponent": 1,
+                    "typical_p": 1,
+                    "xtc_probability": 0,
+                    "xtc_threshold": 0.1,
+                    "repeat_last_n": 64,
+                    "repeat_penalty": 1,
+                    "presence_penalty": 0,
+                    "frequency_penalty": 0,
+                    "dry_multiplier": 0,
+                    "dry_base": 1.75,
+                    "dry_allowed_length": 2,
+                    "dry_penalty_last_n": -1,
+                },
+            ) as resp:
+                # …
-                    "temperature": 0.8,
-                    "top_k": 40,
-                    "top_p": 0.95,
-                    "min_p": 0.05,
-                    "dynatemp_range": 0,
-                    "dynatemp_exponent": 1,
-                    "typical_p": 1,
-                    "xtc_probability": 0,
-                    "xtc_threshold": 0.1,
-                    "repeat_last_n": 64,
-                    "repeat_penalty": 1,
-                    "presence_penalty": 0,
-                    "frequency_penalty": 0,
-                    "dry_multiplier": 0,
-                    "dry_base": 1.75,
-                    "dry_allowed_length": 2,
-                    "dry_penalty_last_n": -1,
+async def respond_with_llm_message(update):
+    """Handle LLM responses when bot is mentioned."""
+    message_text = update.message.text
+    # Remove bot mention and any punctuation after it
+    prompt = re.sub(r'ботяра[^\w\s]*', '', message_text.lower()).strip()
+
+    # Load LLM parameters from environment variables or use defaults
+    temperature = float(os.getenv("LLM_TEMPERATURE", "0.8"))
+    top_k = int(os.getenv("LLM_TOP_K", "40"))
+    top_p = float(os.getenv("LLM_TOP_P", "0.95"))
+
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.post(
+                f"{LLM_API_ADDR}/completion",
+                json={
+                    "prompt": prompt,
+                    "n_predict": 1024,
+                    "temperature": temperature,    # Controls randomness in text generation
+                    "top_k": top_k,               # Limits token selection to the k most likely tokens
+                    "top_p": top_p,               # Nucleus sampling threshold
+                    "min_p": 0.05,
+                    "dynatemp_range": 0,
+                    "dynatemp_exponent": 1,
+                    "typical_p": 1,
+                    "xtc_probability": 0,
+                    "xtc_threshold": 0.1,
+                    "repeat_last_n": 64,
+                    "repeat_penalty": 1,
+                    "presence_penalty": 0,
+                    "frequency_penalty": 0,
+                    "dry_multiplier": 0,
+                    "dry_base": 1.75,
+                    "dry_allowed_length": 2,
+                    "dry_penalty_last_n": -1,
+                },
+            ) as resp:
+                # …
                     "stop": ["</s>", "User:", "Assistant:"],
                 },
             ) as response: