Skip to content

Commit

Permalink
Merge pull request #276 from auxten/main
Browse files Browse the repository at this point in the history
  • Loading branch information
rschu1ze authored Dec 12, 2024
2 parents 8367368 + 4418b3f commit 0f9621d
Show file tree
Hide file tree
Showing 11 changed files with 333 additions and 324 deletions.
3 changes: 2 additions & 1 deletion chdb-dataframe/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

sudo apt-get update
sudo apt-get install -y python3-pip
pip install --break-system-packages pandas chdb
pip install --break-system-packages pandas
pip install --break-system-packages chdb==2.2.0b1

# Download the data
wget --no-verbose --continue https://datasets.clickhouse.com/hits_compatible/athena/hits.parquet
Expand Down
13 changes: 11 additions & 2 deletions chdb-dataframe/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,31 @@
hits["EventDate"] = pd.to_datetime(hits["EventDate"], unit="D")

# fix all object columns to string
start = timeit.default_timer()
for col in hits.columns:
if hits[col].dtype == "O":
hits[col] = hits[col].astype(str)

print("Dataframe(numpy) normalization time:", timeit.default_timer() - start)

queries = []
with open("queries.sql") as f:
queries = f.readlines()

queries_times = []

# conn = chdb.connect("./tmp?verbose&log-level=test")
conn = chdb.connect("./tmp")
i = 0
for q in queries:
i += 1
times = []
for _ in range(3):
start = timeit.default_timer()
result = chdb.query(q, "Null")
result = conn.query(q, "Null")
end = timeit.default_timer()
times.append(end - start)
print(f"Q{i}: ", times)
queries_times.append(times)

result_json = {
Expand All @@ -61,7 +70,7 @@
# if cpuinfo contains "AMD EPYC 9654" update machine and write result into results/epyc-9654.json
if "AMD EPYC 9654" in open("/proc/cpuinfo").read():
result_json["machine"] = "EPYC 9654, 384G"
with open("results/epyc-9654.json", "w") as f:
with open("results/epyc-9654-2.2.json", "w") as f:
f.write(json.dumps(result_json, indent=4))
else:
# write result into results/c6a.metal.json
Expand Down
Loading

0 comments on commit 0f9621d

Please sign in to comment.