Merge pull request #106 from ovchynnikov/dev

ovchynnikov · web-flow · commit 4be94338875a · 2025-05-17T04:17:36.000-07:00
increase LLM response length
diff --git a/.github/workflows/github-actions-push-image.yml b/.github/workflows/github-actions-push-image.yml
@@ -45,7 +45,7 @@ jobs:
 
       - name: Build and push Docker image
         id: push
-        uses: docker/build-push-action@14487ce63c7a62a4a324b0bfb37086795e31c6c1
+        uses: docker/build-push-action@1dc73863535b631f98b2378be8619f83b136f4a0
         with:
           context: .
           file: ./Dockerfile
diff --git a/src/main.py b/src/main.py
@@ -34,8 +34,8 @@
 # Reply with user data for Healthcheck
 send_user_info_with_healthcheck = os.getenv("SEND_USER_INFO_WITH_HEALTHCHECK", "False").lower() == "true"
 USE_LLM = os.getenv("USE_LLM", "False").lower() == "true"
-LLM_MODEL = os.getenv("LLM_MODEL", "gemma3:4b")
-LLM_API_ADDR = os.getenv("LLM_API_ADDR", "http://localhost:11434")
+# LLM_MODEL = os.getenv("LLM_MODEL", "gemma3:4b")
+LLM_API_ADDR = os.getenv("LLM_API_ADDR", "http://localhost:11435")
 TELEGRAM_WRITE_TIMEOUT = 8000
 TELEGRAM_READ_TIMEOUT = 8000
 
@@ -461,14 +461,14 @@ async def respond_with_llm_message(update):
                 f"{LLM_API_ADDR}/completion",
                 json={
                     "prompt": prompt,
-                    "n_predict": 200,
+                    "n_predict": 1024,
                     "temperature": 0.7,
                     "stop": ["</s>", "User:", "Assistant:"],
                 },
             ) as response:
                 if response.status == 200:
                     result = await response.json()
-                    bot_response = result.get("content", "Sorry, I couldn't generate a response.")
+                    bot_response = result.get("content", "Sorry, I couldn't generate a response.").strip()
                 else:
                     bot_response = "Sorry, I encountered an error while processing your request."