@@ -27,219 +27,254 @@ git clone https://github.com/opea-project/GenAIComps.git
27
27
export OPEA_GENAICOMPS_ROOT=$( pwd) /GenAIComps
28
28
```
29
29
30
+ ## Prerequisites
31
+
32
+ You must create a user account with [ HuggingFace] and obtain permission to use the gated LLM models by adhering to the guidelines provided on the respective model's webpage. The environment variables ` LLM_MODEL ` would be the HuggingFace model id and the ` HF_TOKEN ` is your HuggugFace account's "User Access Token".
33
+
30
34
## 🚀1. Start Microservice with Python (Option 1)
31
35
32
36
To start the LLM microservice, you need to install python packages first.
33
37
34
38
### 1.1 Install Requirements
35
39
36
40
``` bash
41
+ # Install opea-comps
37
42
pip install opea-comps
38
- pip install -r ${OPEA_GENAICOMPS_ROOT} /comps/llms/requirements.txt
39
-
40
- # Install requirements of your choice of microservice in the text-generation folder (tgi, vllm, vllm-ray, etc.)
41
- export MICROSERVICE_DIR=your_chosen_microservice
42
-
43
- pip install -r ${OPEA_GENAICOMPS_ROOT} /comps/llms/text-generation/${MICROSERVICE_DIR} /requirements.txt
44
- ```
45
43
46
- Set an environment variable ` your_ip ` to the IP address of the machine where you would like to consume the microservice.
44
+ # Install requirements from comps/llms
45
+ cd ${OPEA_GENAICOMPS_ROOT} /comps/llms
47
46
48
- ``` bash
49
- # For example, this command would set the IP address of your currently logged-in machine.
50
- export your_ip=$( hostname -I | awk ' {print $1}' )
47
+ pip install -r requirements.txt
51
48
```
52
49
53
50
### 1.2 Start LLM Service with Python Script
54
51
55
52
#### 1.2.1 Start the TGI Service
56
53
57
- ``` bash
58
- export TGI_LLM_ENDPOINT=" http://${your_ip} :8008"
59
- python ${OPEA_GENAICOMPS_ROOT} /comps/llms/text-generation/tgi/llm.py
60
- python ${OPEA_GENAICOMPS_ROOT} /comps/llms/text-generation/tgi/llm.py
61
- ```
62
-
63
- #### 1.2.2 Start the vLLM Service
54
+ Install the requirements for TGI Service
64
55
65
56
``` bash
66
- export vLLM_LLM_ENDPOINT=" http://${your_ip} :8008"
67
- python ${OPEA_GENAICOMPS_ROOT} /comps/llms/text-generation/vllm/llm.py
68
- python ${OPEA_GENAICOMPS_ROOT} /comps/llms/text-generation/vllm/llm.py
69
- ```
70
-
71
- #### 1.2.3 Start the Ray Service
57
+ cd ${OPEA_GENAICOMPS_ROOT} /comps/llms/text-generation/tgi
72
58
73
- ``` bash
74
- export RAY_Serve_ENDPOINT=" http://${your_ip} :8008"
75
- python ${OPEA_GENAICOMPS_ROOT} /comps/llms/text-generation/ray_serve/llm.py
76
- python ${OPEA_GENAICOMPS_ROOT} /comps/llms/text-generation/ray_serve/llm.py
59
+ pip install -r requirements.txt
77
60
```
78
61
79
- ## 🚀2. Start Microservice with Docker (Option 2)
80
-
81
- You can use either a published docker image or build your own docker image with the respective microservice Dockerfile of your choice. You must create a user account with [ HuggingFace] and obtain permission to use the restricted LLM models by adhering to the guidelines provided on the respective model's webpage.
82
-
83
- ### 2.1 Start LLM Service with published image
84
-
85
- #### 2.1.1 Start TGI Service
62
+ Execute the docker run command to initiate the backend, along with the Python script that launches the microservice.
86
63
87
64
``` bash
88
- export HF_LLM_MODEL=${your_hf_llm_model}
65
+ export TGI_HOST_IP=$( hostname -I | awk ' {print $1}' ) # This sets IP of the current machine
66
+ export LLM_MODEL=${your_hf_llm_model}
67
+ export DATA_DIR=$HOME /data # Location to download the model
89
68
export HF_TOKEN=${your_hf_api_token}
90
69
91
- docker run \
70
+ # Initiate the backend
71
+ docker run -d \
92
72
-p 8008:80 \
93
73
-e HF_TOKEN=${HF_TOKEN} \
94
- -v ./data :/data \
74
+ -v ${DATA_DIR} :/data \
95
75
--name tgi_service \
96
76
--shm-size 1g \
97
77
ghcr.io/huggingface/text-generation-inference:1.4 \
98
- --model-id ${HF_LLM_MODEL}
78
+ --model-id ${LLM_MODEL}
79
+
80
+ # Start the microservice with an endpoint as the above docker run command
81
+ export TGI_LLM_ENDPOINT=" http://${TGI_HOST_IP} :8008"
82
+
83
+ python llm.py
99
84
```
100
85
101
- #### 2.1.2 Start vLLM Service
86
+ #### 1.2.2 Start the vLLM Service
87
+
88
+ Install the requirements for vLLM Service
102
89
103
90
``` bash
104
- # Use the script to build the docker image as opea/vllm:cpu
105
- bash ${OPEA_GENAICOMPS_ROOT} /comps/llms/text-generation/vllm/build_docker_vllm.sh cpu
91
+ cd ${OPEA_GENAICOMPS_ROOT} /comps/llms/text-generation/vllm/langchain
92
+
93
+ pip install -r requirements.txt
94
+ ```
106
95
107
- export HF_LLM_MODEL=${your_hf_llm_model}
96
+ Execute the docker run command to initiate the backend, along with the Python script that launches the microservice.
97
+
98
+ ``` bash
99
+ export vLLM_HOST_IP=$( hostname -I | awk ' {print $1}' ) # This sets IP of the current machine
100
+ export LLM_MODEL=${your_hf_llm_model}
101
+ export DATA_DIR=$HOME /data # Location to download the model
108
102
export HF_TOKEN=${your_hf_api_token}
109
103
110
- docker run -it \
104
+ # Build the image first as opea/vllm:cpu
105
+ bash ${OPEA_GENAICOMPS_ROOT} /comps/llms/text-generation/vllm/langchain/dependency/build_docker_vllm.sh cpu
106
+
107
+ # Initiate the backend
108
+ docker run -d -it \
111
109
--name vllm_service \
112
110
-p 8008:80 \
113
111
-e HF_TOKEN=${HF_TOKEN} \
114
112
-e VLLM_CPU_KVCACHE_SPACE=40 \
115
- -v ./data :/data \
113
+ -v ${DATA_DIR} :/data \
116
114
opea/vllm:cpu \
117
- --model ${HF_LLM_MODEL}
115
+ --model ${LLM_MODEL} \
118
116
--port 80
117
+
118
+ # Start the microservice with an endpoint as the above docker run command
119
+ export vLLM_ENDPOINT=" http://${vLLM_HOST_IP} :8008"
120
+
121
+ python llm.py
122
+ ```
123
+
124
+ #### 1.2.3 Start the Ray Service
125
+
126
+ Install the requirements for Ray Service
127
+
128
+ ``` bash
129
+ cd ${OPEA_GENAICOMPS_ROOT} /comps/llms/text-generation/vllm/ray
130
+
131
+ pip install -r requirements.txt
119
132
```
120
133
121
- #### 2.1.3 Start Ray Service
134
+ Execute the docker run command to initiate the backend, along with the Python script that launches the microservice.
122
135
123
136
``` bash
124
- export HF_LLM_MODEL=${your_hf_llm_model}
125
- export HF_CHAT_PROCESSOR=${your_hf_chatprocessor}
137
+ export vLLM_RAY_HOST_IP=$( hostname -I | awk ' {print $1}' ) # This sets IP of the current machine
138
+ export LLM_MODEL=${your_hf_llm_model}
139
+ export DATA_DIR=$HOME /data # Location to download the model
126
140
export HF_TOKEN=${your_hf_api_token}
127
- export TRUST_REMOTE_CODE=True
128
141
129
- docker run -it \
142
+ # Build the image first as opea/vllm:cpu
143
+ bash ${OPEA_GENAICOMPS_ROOT} /comps/llms/text-generation/vllm/ray/dependency/build_docker_vllmray.sh
144
+
145
+ # Initiate the backend
146
+ docker run \
147
+ --name=" vllm-ray-service" \
130
148
--runtime=habana \
131
- --name ray_serve_service \
149
+ -v $DATA_DIR :/data \
150
+ -e HABANA_VISIBLE_DEVICES=all \
132
151
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
133
152
--cap-add=sys_nice \
134
153
--ipc=host \
135
- -p 8008:80 \
154
+ -p 8006:8000 \
136
155
-e HF_TOKEN=$HF_TOKEN \
137
- -e TRUST_REMOTE_CODE=$TRUST_REMOTE_CODE \
138
- opea/llm-ray:latest \
156
+ opea/vllm_ray:habana \
139
157
/bin/bash -c " \
140
158
ray start --head && \
141
- python api_server_openai.py \
142
- --port_number 80 \
143
- --model_id_or_path ${HF_LLM_MODEL} \
144
- --chat_processor ${HF_CHAT_PROCESSOR} "
145
- ```
146
-
147
- ### 2.2 Start LLM Service with image built from source
148
-
149
- If you start an LLM microservice with docker, the ` docker_compose_llm.yaml ` file will automatically start a TGI/vLLM service with docker.
150
-
151
- #### 2.2.1 Setup Environment Variables
159
+ python vllm_ray_openai.py \
160
+ --port_number 8000 \
161
+ --model_id_or_path $LLM_MODEL \
162
+ --tensor_parallel_size 2 \
163
+ --enforce_eager False"
152
164
153
- In order to start TGI and LLM services, you need to setup the following environment variables first.
165
+ # Start the microservice with an endpoint as the above docker run command
166
+ export vLLM_RAY_ENDPOINT=" http://${vLLM_RAY_HOST_IP} :8006"
154
167
155
- ``` bash
156
- export HF_TOKEN=${your_hf_api_token}
157
- export TGI_LLM_ENDPOINT=" http://${your_ip} :8008"
158
- export LLM_MODEL_ID=${your_hf_llm_model}
168
+ python llm.py
159
169
```
160
170
161
- In order to start vLLM and LLM services, you need to setup the following environment variables first.
162
-
163
- ``` bash
164
- export HF_TOKEN=${your_hf_api_token}
165
- export vLLM_LLM_ENDPOINT=" http://${your_ip} :8008"
166
- export LLM_MODEL_ID=${your_hf_llm_model}
167
- ```
168
-
169
- In order to start Ray serve and LLM services, you need to setup the following environment variables first.
171
+ ## 🚀2. Start Microservice with Docker (Option 2)
170
172
171
- ``` bash
172
- export HF_TOKEN=${your_hf_api_token}
173
- export RAY_Serve_ENDPOINT=" http://${your_ip} :8008"
174
- export LLM_MODEL=${your_hf_llm_model}
175
- export CHAT_PROCESSOR=" ChatModelLlama"
176
- ```
173
+ In order to start the microservices with docker, you need to build the docker images first for the microservice.
177
174
178
- ### 2.2 Build Docker Image
175
+ ### 2.1 Build Docker Image
179
176
180
- #### 2.2 .1 TGI
177
+ #### 2.1 .1 TGI
181
178
182
179
``` bash
180
+ # Build the microservice docker
183
181
cd ${OPEA_GENAICOMPS_ROOT}
184
182
185
183
docker build \
186
- -t opea/llm-tgi:latest \
187
184
--build-arg https_proxy=$https_proxy \
188
185
--build-arg http_proxy=$http_proxy \
186
+ -t opea/llm-tgi:latest \
189
187
-f comps/llms/text-generation/tgi/Dockerfile .
190
188
```
191
189
192
- #### 2.2.2 vLLM
193
-
194
- Build vllm docker.
190
+ #### 2.1.2 vLLM
195
191
196
192
``` bash
197
- bash ${OPEA_GENAICOMPS_ROOT} /comps/llms/text-generation/vllm/langchain/dependency/build_docker_vllm.sh
198
- ```
199
-
200
- Build microservice docker.
193
+ # Build vllm docker
194
+ bash ${OPEA_GENAICOMPS_ROOT} /comps/llms/text-generation/vllm/langchain/dependency/build_docker_vllm.sh hpu
201
195
202
- ``` bash
196
+ # Build the microservice docker
203
197
cd ${OPEA_GENAICOMPS_ROOT}
204
198
205
199
docker build \
206
- -t opea/llm-vllm:latest \
207
200
--build-arg https_proxy=$https_proxy \
208
201
--build-arg http_proxy=$http_proxy \
202
+ -t opea/llm-vllm:latest \
209
203
-f comps/llms/text-generation/vllm/langchain/Dockerfile .
210
204
```
211
205
212
- #### 2.2.3 Ray Serve
213
-
214
- Build Ray Serve docker.
206
+ #### 2.1.3 Ray
215
207
216
208
``` bash
209
+ # Build the Ray Serve docker
217
210
bash ${OPEA_GENAICOMPS_ROOT} /comps/llms/text-generation/vllm/ray/dependency/build_docker_vllmray.sh
218
- ```
219
-
220
- Build microservice docker.
221
211
222
- ``` bash
212
+ # Build the microservice docker
223
213
cd ${OPEA_GENAICOMPS_ROOT}
224
214
225
215
docker build \
226
- -t opea/llm-ray:latest \
227
216
--build-arg https_proxy=$https_proxy \
228
217
--build-arg http_proxy=$http_proxy \
218
+ -t opea/llm-vllm-ray:latest \
229
219
-f comps/llms/text-generation/vllm/ray/Dockerfile .
230
220
```
231
221
222
+ ### 2.2 Start LLM Service with the built image
223
+
232
224
To start a docker container, you have two options:
233
225
234
226
- A. Run Docker with CLI
235
227
- B. Run Docker with Docker Compose
236
228
237
- You can choose one as needed.
229
+ You can choose one as needed. If you start an LLM microservice with docker compose, the ` docker_compose_llm.yaml ` file will automatically start both endpoint and the microservice docker.
230
+
231
+ #### 2.2.1 Setup Environment Variables
232
+
233
+ In order to start TGI and LLM services, you need to setup the following environment variables first.
234
+
235
+ ``` bash
236
+ export HF_TOKEN=${your_hf_api_token}
237
+ export TGI_LLM_ENDPOINT=" http://${your_ip} :8008"
238
+ export LLM_MODEL=${your_hf_llm_model}
239
+ export DATA_DIR=$HOME /data
240
+ ```
241
+
242
+ In order to start vLLM and LLM services, you need to setup the following environment variables first.
243
+
244
+ ``` bash
245
+ export HF_TOKEN=${your_hf_api_token}
246
+ export vLLM_LLM_ENDPOINT=" http://${your_ip} :8008"
247
+ export LLM_MODEL=${your_hf_llm_model}
248
+ ```
249
+
250
+ In order to start Ray serve and LLM services, you need to setup the following environment variables first.
251
+
252
+ ``` bash
253
+ export HF_TOKEN=${your_hf_api_token}
254
+ export RAY_Serve_ENDPOINT=" http://${your_ip} :8008"
255
+ export LLM_MODEL=${your_hf_llm_model}
256
+ export CHAT_PROCESSOR=" ChatModelLlama"
257
+ ```
238
258
239
259
### 2.3 Run Docker with CLI (Option A)
240
260
241
261
#### 2.3.1 TGI
242
262
263
+ Start TGI endpoint.
264
+
265
+ ``` bash
266
+ docker run -d \
267
+ -p 8008:80 \
268
+ -e HF_TOKEN=${HF_TOKEN} \
269
+ -v ${DATA_DIR} :/data \
270
+ --name tgi_service \
271
+ --shm-size 1g \
272
+ ghcr.io/huggingface/text-generation-inference:1.4 \
273
+ --model-id ${LLM_MODEL}
274
+ ```
275
+
276
+ Start TGI microservice
277
+
243
278
``` bash
244
279
docker run -d \
245
280
--name=" llm-tgi-server" \
@@ -272,7 +307,7 @@ docker run \
272
307
-e no_proxy=${no_proxy} \
273
308
-e vLLM_LLM_ENDPOINT=$vLLM_LLM_ENDPOINT \
274
309
-e HF_TOKEN=$HF_TOKEN \
275
- -e LLM_MODEL_ID= $LLM_MODEL_ID \
310
+ -e LLM_MODEL= $LLM_MODEL \
276
311
opea/llm-vllm:latest
277
312
```
278
313
@@ -365,10 +400,10 @@ curl http://${your_ip}:8008/v1/chat/completions \
365
400
"model": ${your_hf_llm_model},
366
401
"messages": [
367
402
{"role": "assistant", "content": "You are a helpful assistant."},
368
- {"role": "user", "content": "What is Deep Learning?"},
403
+ {"role": "user", "content": "What is Deep Learning?"}
369
404
],
370
405
"max_tokens": 32,
371
- "stream": True
406
+ "stream": true
372
407
}'
373
408
```
374
409
0 commit comments