Skip to content

Commit 6e2da51

Browse files
authored
Replace time.time() to time.perf_counter() for benchmarking. (#6178)
Signed-off-by: Lifu Huang <lifu.hlf@gmail.com>
1 parent e9a47f4 commit 6e2da51

File tree

61 files changed

+158
-158
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+158
-158
lines changed

benchmark/bench_in_batch_prefix/bench_in_batch_prefix.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,11 @@ def test_batch_by_batch(all_prompts, gen_len):
6464

6565
tot_time = 0
6666
for i in range(len(all_prompts)):
67-
tic = time.time()
67+
tic = time.perf_counter()
6868
text_qa.run_batch(
6969
list(zip(all_prompts[i], [gen_len] * len(all_prompts[i]))),
7070
)
71-
tot_time += time.time() - tic
71+
tot_time += time.perf_counter() - tic
7272

7373
return tot_time
7474

@@ -78,13 +78,13 @@ def test_batch_by_batch_with_hint(all_prompts, gen_len):
7878

7979
tot_time = 0
8080
for i in range(len(all_prompts)):
81-
tic = time.time()
81+
tic = time.perf_counter()
8282
# Send a hint to cache the prefix
8383
text_qa.run_batch(list(zip(all_prompts[i][:1], [gen_len])))
8484
# Send the batch
8585
text_qa.run_batch(list(zip(all_prompts[i], [gen_len] * len(all_prompts[i]))))
8686

87-
tot_time += time.time() - tic
87+
tot_time += time.perf_counter() - tic
8888

8989
return tot_time
9090

@@ -94,11 +94,11 @@ def test_send_all(all_prompts, gen_len):
9494

9595
all_prompts = [x for prompt_list in all_prompts for x in prompt_list]
9696

97-
tic = time.time()
97+
tic = time.perf_counter()
9898
text_qa.run_batch(
9999
list(zip(all_prompts, [gen_len] * len(all_prompts))),
100100
)
101-
tot_time = time.time() - tic
101+
tot_time = time.perf_counter() - tic
102102

103103
return tot_time
104104

benchmark/benchmark_batch/benchmark_batch.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def send_batch_request(endpoint, prompts, gen_tokens, request_id):
8181
}
8282
data = {"text": prompts, "sampling_params": sampling_params}
8383

84-
start_time = time.time()
84+
start_time = time.perf_counter()
8585
try:
8686
response = requests.post(
8787
endpoint.base_url + "/generate", json=data, timeout=3600
@@ -90,7 +90,7 @@ def send_batch_request(endpoint, prompts, gen_tokens, request_id):
9090
error = response.json()
9191
raise RuntimeError(f"Request {request_id} failed: {error}")
9292
result = response.json()
93-
elapsed_time = (time.time() - start_time) * 1000 # Convert to ms
93+
elapsed_time = (time.perf_counter() - start_time) * 1000 # Convert to ms
9494
avg_per_prompt = elapsed_time / len(prompts) if prompts else 0
9595
return request_id, elapsed_time, avg_per_prompt, True, len(prompts)
9696
except Exception as e:
@@ -104,7 +104,7 @@ def run_benchmark(endpoint, batched_prompts, batch_size, gen_tokens):
104104
num_requests = len(batched_prompts)
105105

106106
# Record start time for total latency
107-
benchmark_start_time = time.time()
107+
benchmark_start_time = time.perf_counter()
108108

109109
for i, batch_prompts in enumerate(batched_prompts):
110110
request_id = i + 1
@@ -119,7 +119,7 @@ def run_benchmark(endpoint, batched_prompts, batch_size, gen_tokens):
119119
results.append(result)
120120

121121
# Calculate total latency
122-
total_latency = (time.time() - benchmark_start_time) * 1000 # Convert to ms
122+
total_latency = (time.perf_counter() - benchmark_start_time) * 1000 # Convert to ms
123123

124124
return results, total_latency
125125

benchmark/benchmark_batch/benchmark_tokenizer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,20 +44,20 @@ def benchmark_sequential_vs_batch(prompts, batch_size, tokenizer):
4444
for run in range(NUM_RUNS):
4545
batch_prompts = prompts[:batch_size] # Use same prompts for fair comparison
4646

47-
start_time = time.time()
47+
start_time = time.perf_counter()
4848
for prompt in batch_prompts:
4949
tokens = tokenizer.encode(prompt)
50-
sequential_time = (time.time() - start_time) * 1000
50+
sequential_time = (time.perf_counter() - start_time) * 1000
5151
sequential_times.append(sequential_time)
5252

5353
# Batch tokenization using tokenizer()
5454
batch_times = []
5555
for run in range(NUM_RUNS):
5656
batch_prompts = prompts[:batch_size] # Use same prompts for fair comparison
5757

58-
start_time = time.time()
58+
start_time = time.perf_counter()
5959
tokens = tokenizer(batch_prompts)
60-
batch_time = (time.time() - start_time) * 1000
60+
batch_time = (time.perf_counter() - start_time) * 1000
6161
batch_times.append(batch_time)
6262

6363
return {

benchmark/generative_agents/bench_other.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ async def get_one_answer_async(arg):
3939
answer = await call_generate(**arg, temperature=0)
4040
states.append(answer)
4141

42-
tic = time.time()
42+
tic = time.perf_counter()
4343
# we always sequentially execute agent calls to maintain its dependency
4444
if args.backend != "lmql":
4545
for arg in tqdm(arguments):
@@ -50,7 +50,7 @@ async def get_one_answer_async(arg):
5050
loop = asyncio.get_event_loop()
5151
for arg in tqdm(arguments):
5252
loop.run_until_complete(get_one_answer_async(arg))
53-
latency = time.time() - tic
53+
latency = time.perf_counter() - tic
5454

5555
print(f"Latency: {latency:.3f}")
5656

benchmark/generative_agents/bench_sglang.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,14 @@ def main(args):
3535

3636
states = []
3737
# Run requests
38-
tic = time.time()
38+
tic = time.perf_counter()
3939
for a in arguments:
4040
# only a single key in the dict
4141
for func, arg in a.items():
4242
result = func.run(**arg)
4343
result.sync()
4444
states.append(result)
45-
latency = time.time() - tic
45+
latency = time.perf_counter() - tic
4646

4747
# Compute accuracy
4848
print(f"Latency: {latency:.3f}")

benchmark/gsm8k/bench_other.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def get_one_answer(i):
7575
)
7676
states[i] = answer
7777

78-
tic = time.time()
78+
tic = time.perf_counter()
7979
if args.parallel == 1:
8080
for i in tqdm(range(len(questions))):
8181
get_one_answer(i)
@@ -106,9 +106,9 @@ async def batched_call(batch_size):
106106
for j in range(len(rets)):
107107
states[i + j] = rets[j]
108108

109-
tic = time.time()
109+
tic = time.perf_counter()
110110
asyncio.run(batched_call(batch_size=args.parallel))
111-
latency = time.time() - tic
111+
latency = time.perf_counter() - tic
112112

113113
preds = []
114114
for i in range(len(states)):

benchmark/gsm8k/bench_sglang.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,14 +84,14 @@ def few_shot_gsm8k(s, question):
8484
#####################################
8585

8686
# Run requests
87-
tic = time.time()
87+
tic = time.perf_counter()
8888
states = few_shot_gsm8k.run_batch(
8989
arguments,
9090
temperature=0,
9191
num_threads=args.parallel,
9292
progress_bar=True,
9393
)
94-
latency = time.time() - tic
94+
latency = time.perf_counter() - tic
9595

9696
preds = []
9797
for i in range(len(states)):

benchmark/hellaswag/bench_other.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def get_one_answer(i):
5757
context=few_shot_examples + questions[i], choices=choices[i]
5858
)
5959

60-
tic = time.time()
60+
tic = time.perf_counter()
6161
if args.parallel == 1:
6262
for i in tqdm(range(len(questions))):
6363
get_one_answer(i)
@@ -82,10 +82,10 @@ async def batched_call(batch_size):
8282
for j in range(len(rets)):
8383
preds[i + j] = rets[j]
8484

85-
tic = time.time()
85+
tic = time.perf_counter()
8686
asyncio.run(batched_call(batch_size=args.parallel))
8787

88-
latency = time.time() - tic
88+
latency = time.perf_counter() - tic
8989

9090
# Compute accuracy
9191
acc = np.mean(np.array(preds) == np.array(labels))

benchmark/hellaswag/bench_sglang.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,15 +68,15 @@ def few_shot_hellaswag(s, question, choices):
6868
#####################################
6969

7070
# Run requests
71-
tic = time.time()
71+
tic = time.perf_counter()
7272
rets = few_shot_hellaswag.run_batch(
7373
arguments,
7474
temperature=0,
7575
num_threads=args.parallel,
7676
progress_bar=True,
7777
)
7878
preds = [choices[i].index(rets[i]["answer"]) for i in range(len(rets))]
79-
latency = time.time() - tic
79+
latency = time.perf_counter() - tic
8080

8181
# Compute accuracy
8282
acc = np.mean(np.array(preds) == np.array(labels))

benchmark/hicache/bench_multiturn.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ async def handle_request(self, item):
261261
client_id, payload = item
262262
response = await async_request_sglang_generate(payload, self.url, self.pbar)
263263
if self.pbar.n == self.pbar.total:
264-
self.finished_time = time.time()
264+
self.finished_time = time.perf_counter()
265265
self.response_queue.put((client_id, response))
266266
except Exception as e:
267267
print(f"Request failed: {e}")
@@ -334,7 +334,7 @@ def run(self):
334334
request_thread = threading.Thread(target=self.request_sender, daemon=True)
335335
response_thread = threading.Thread(target=self.response_handler, daemon=True)
336336

337-
self.start_time = time.time()
337+
self.start_time = time.perf_counter()
338338
request_thread.start()
339339
response_thread.start()
340340

0 commit comments

Comments
 (0)