Merge pull request #27 from Sherlock113/docs/update-structure

Sherlock113 · web-flow · commit 72970b3bf66f · 2025-08-06T18:24:09.000+08:00
docs: Update structure
diff --git a/docs/inference-optimization/img/bento-different-inference-optimizations.png b/docs/inference-optimization/img/bento-different-inference-optimizations.png
diff --git a/docs/inference-optimization/index.mdx b/docs/inference-optimization/index.mdx
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 3
+sidebar_position: 4
 sidebar_custom_props: 
     icon: /img/speed.svg
 ---
diff --git a/docs/inference-optimization/llm-inference-metrics.md b/docs/inference-optimization/llm-inference-metrics.md
@@ -10,6 +10,7 @@ keywords:
 ---
 
 import LinkList from '@site/src/components/LinkList';
+import Button from '@site/src/components/Button';
 
 # Key metrics for LLM inference
 
@@ -105,6 +106,16 @@ There are two common ways to measure throughput:
     - GPU memory bandwidth and compute utilization
     
     As the number of concurrent requests increases, the total TPS also grows, until the LLM hits the saturation point of available compute resources. Beyond this point, performance might decrease because the LLM is over capacity.
+
+---
+
+At Bento, we offer deployment and inference optimization strategies tailored to your use case. You can easily leverage them to optimize for throughput, latency, or cost.
+
+![bento-different-inference-optimizations.png](./img/bento-different-inference-optimizations.png)
+
+<div style={{ margin: '3rem 0' }}>
+[<Button>Talk to us</Button>](https://l.bentoml.com/contact-us-llm-inference-handbook)
+</div>
     
 ## Goodput
 
diff --git a/docs/infrastructure-and-operations/index.mdx b/docs/infrastructure-and-operations/index.mdx
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 4
+sidebar_position: 3
 sidebar_custom_props: 
     icon: /img/setting.svg
 ---
diff --git a/src/components/Chat/index.tsx b/src/components/Chat/index.tsx
@@ -38,9 +38,9 @@ function Chat() {
           </button>
           <h4>Talk to Us</h4>
           <p>
-            At Bento, we're working to help enterprises leverage the latest
-            advancements in LLM inference with ease. Have questions about LLM
-            inference? Let's talk.
+            At Bento, we help customers build custom LLM serving solutions 
+            tailored for speed, quality, or cost. Schedule a call to 
+            learn how we make it easy to apply advanced inference optimizations to your use case.
           </p>
           <div>
             <a