|
1 |
| -site_name: 'FastDeploy 2.0: 大模型部署' |
| 1 | +site_name: 'FastDeploy 2.0: Large Language Model Deployement' |
2 | 2 | nav:
|
3 | 3 | - 'FastDeploy 2.0': index.md
|
4 |
| - - 快速开始: |
5 |
| - - '10分钟上手ERNIE 4.5模型部署': get_started/quick_start.md |
6 |
| - - '10分钟上手ERNIE 4.5多模态模型部署': get_started/quick_start_vl.md |
7 |
| - - 安装: |
8 |
| - - 'Nvidia GPU安装': get_started/installation/nvidia_gpu.md |
9 |
| - - 昆仑芯P800安装: get_started/installation/kunlunxin.md |
10 |
| - - ERNIE-X1思考模型部署: get_started/ernie-x1.md |
11 |
| - - 'ERNIE-4.5-VL多模模型部署': get_started/ernie-4.5-vl.md |
12 |
| - - 'ERNIE-4.5模型部署': get_started/ernie-4.5.md |
13 |
| - - 服务化部署: |
14 |
| - - 使用方式: serving/README.md |
15 |
| - - 监控metrics: serving/metrics.md |
16 |
| - - 负载调度: serving/scheduler.md |
17 |
| - - 离线推理: offline_inference.md |
18 |
| - - 部署特性: |
| 4 | + - 'Quick Start': |
| 5 | + - Installation: |
| 6 | + - 'Nvidia GPU': get_started/installation/nvidia_gpu.md |
| 7 | + - 'KunlunXin XPU': get_started/installation/kunlunxin_xpu.md |
| 8 | + - 'Enflame S60': get_started/installation/Enflame_gcu.md |
| 9 | + - 'Iluvatar CoreX': get_started/installation/iluvatar_gpu.md |
| 10 | + - 'Quick Deployment For ERNIE-4.5-21B-A3B': get_started/quick_start.md |
| 11 | + - 'Quick Deployment for ERNIE-4.5-VL-28B-A3B': get_started/quick_start_vl.md |
| 12 | + - 'ERNIE-4.5-300B-A47B': get_started/ernie-4.5.md |
| 13 | + - 'ERNIE-4.5-VL-424B-A47B': get_started/ernie-4.5-vl.md |
| 14 | + - 'Online Serving': |
| 15 | + - 'OpenAI-Compitable API Server': online_serving/README.md |
| 16 | + - 'Monitor Metrics': online_serving/metrics.md |
| 17 | + - 'Scheduler': online_serving/scheduler.md |
| 18 | + - 'Offline Inference': offline_inference.md |
| 19 | + - Quantiation: |
| 20 | + - 'Overview': quantization/README.md |
| 21 | + - 'Online Quantization': quantization/online_quantization.md |
| 22 | + - 'WINT2 Quantization': quantization/wint2.md |
| 23 | + - Features: |
19 | 24 | - 'Prefix Caching': features/prefix_caching.md
|
20 |
| - - '分离式部署': features/disaggregated.md |
21 |
| - - 'Chunked Prefill与128K长文部署': features/chunked_prefill.md |
22 |
| - - '多实例负载均衡': features/load_balance.md |
23 |
| - - '投机解码': features/speculative_decoding.md |
24 |
| - - '结构化输出': features/structured_outputs.md |
25 |
| - - '思维链输出': features/reasoning_output.md |
26 |
| - - 'Tool Calling': features/tool_calling.md |
27 |
| - - 量化加速: |
28 |
| - - 无损量化: quantization/inflight_quantization.md |
29 |
| - - 'ERNIE-4.5 weight only int2规范说明': quantization/ernie_wint2.md |
30 |
| - - 支持模型列表: supported_models.md |
| 25 | + - 'Disaggration': features/disaggregated.md |
| 26 | + - 'Chunked Prefill': features/chunked_prefill.md |
| 27 | + - 'Load Balance': features/load_balance.md |
| 28 | + - 'Speculative Decoding': features/speculative_decoding.md |
| 29 | + - 'Structured Outputs': features/structured_outputs.md |
| 30 | + - 'Reasoning Output': features/reasoning_output.md |
| 31 | + - 'Supported Models': supported_models.md |
31 | 32 | - Benchmark: benchmark.md
|
32 |
| - - 架构设计: |
33 |
| - - 代码模块说明: design/code_guide.md |
34 |
| - - AppendAttention: design/append_attention.md |
35 |
| - - 使用问题: |
36 |
| - - FAQ: usage/faq.md |
37 |
| - - 日志说明: usage/log.md |
38 |
| - - 算子编译: usage/build_ops.md |
39 |
| - - 自定义算子导入: usage/contribution_guide.md |
40 |
| - - 增加硬件支持: usage/how_to_support_new_device.md |
| 33 | + - Usage: |
| 34 | + - 'Log Description': usage/log.md |
| 35 | + - 'Code Overview': usage/code_overview.md |
| 36 | + - 'Environment Variables': usage/environment_variables.md |
41 | 37 | theme:
|
42 | 38 | name: 'material'
|
43 | 39 | highlightjs: true
|
|
0 commit comments