Skip to content

Commit

Permalink
Add k6 benchmark for chat threads
Browse files Browse the repository at this point in the history
  • Loading branch information
nstogner committed Dec 13, 2024
1 parent 47cce1b commit 83433cc
Show file tree
Hide file tree
Showing 11 changed files with 22,741 additions and 3 deletions.
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# More info: https://docs.docker.com/engine/reference/builder/#dockerignore-file
# Ignore build and test binaries.
bin/
benchmarks/
charts/
components/
docs/
Expand Down
1 change: 1 addition & 0 deletions benchmarks/chat/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ShareGPT_V3_unfiltered_cleaned_split.json
5 changes: 5 additions & 0 deletions benchmarks/chat/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
ShareGPT_V3_unfiltered_cleaned_split.json:
wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json

prepare-message-threads: ShareGPT_V3_unfiltered_cleaned_split.json
python prepare-message-threads.py
87 changes: 87 additions & 0 deletions benchmarks/chat/k6.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import { check } from 'k6';
import { scenario } from 'k6/execution';
import http from 'k6/http';
import { Trend, Counter } from 'k6/metrics';

const model_addr = __ENV.MODEL_ADDR;
const model_id = __ENV.MODEL_ID;
const timePerToken = new Trend('time_per_token', true);
const tokens = new Counter('tokens');
const new_tokens = new Counter('new_tokens');
const input_tokens = new Counter('input_tokens');
const max_new_tokens = 50;

const messageThreads = JSON.parse(open("message-threads.json"))

export const options = {
thresholds: {
http_req_failed: ['rate==0'],
},
scenarios: {
chat: {
executor: 'shared-iterations',
// Number of VUs to run concurrently.
vus: 20,
// Total number of script iterations to execute across all VUs (b/c using 'shared-iterations' executor).
iterations: 200,
maxDuration: '120s',
},
},
};

export default function run() {
const headers = { 'Content-Type': 'application/json' };
const msgThread = messageThreads[scenario.iterationInTest % messageThreads.length];
var payload = {
"messages": [],
"temperature": 0,
"model": `${model_id}`,
"max_tokens": max_new_tokens
};

// console.log(`Message thread: ${JSON.stringify(msgThread)}`);

// Iterate over all the messages in the thread, appending the completions to the same payload.
for (let i = 0; i < msgThread["userMessages"].length; i++) {
payload.messages.push({
"role": "user",
"content": msgThread["userMessages"][i]
});
//console.log(`Payload: ${JSON.stringify(payload)}`);

const res = http.post(`http://${model_addr}/v1/chat/completions`, JSON.stringify(payload), {
headers,
});
if (res.status >= 400 && res.status < 500) {
return;
}

check(res, {
'Post status is 200': (res) => res.status === 200,
});
const duration = res.timings.duration;

if (res.status === 200) {
// console.log(`Status: ${res.status}`);
const body = res.json();

const completion_tokens = body.usage.completion_tokens;
const prompt_tokens = body.usage.prompt_tokens;
const latency_ms_per_token = duration / completion_tokens;

new_tokens.add(completion_tokens);
input_tokens.add(prompt_tokens);
timePerToken.add(latency_ms_per_token);
tokens.add(completion_tokens + prompt_tokens);

const msg0 = body.choices[0].message;
payload.messages.push({
"role": msg0.role,
"content": msg0.content
});
} else {
console.log(`Error Status: ${res.status}`);
console.log(`Response: ${res.body}`);
}
}
}
18 changes: 18 additions & 0 deletions benchmarks/chat/k8s/pod.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
apiVersion: v1
kind: Pod
metadata:
name: chat-benchmark
spec:
restartPolicy: Never
containers:
- name: k6
image: grafana/k6
command: ["sleep", "infinity"]
#args: ["run", "/config/k6.js"] #, "--http-debug"]
volumeMounts:
- name: work
mountPath: /work
volumes:
- name: work
configMap:
name: chat-benchmark
Loading

0 comments on commit 83433cc

Please sign in to comment.