From 89d6c764f48589798c0ee552e857f51f83fb94b0 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Fri, 13 Dec 2024 20:51:57 -0800 Subject: [PATCH] Add DTrace scripts to Nexus zone (#7244) - Adds a script for tracing all transactions run by Nexus, including their overall latency and the number of statements in each one. - Move all existing scripts to a Nexus subdirectory, and then include that whole directory in the Omicron zone for Nexus itself. - Closes #7224 --- Cargo.lock | 3 +- package-manifest.toml | 1 + .../{ => nexus}/aggregate-query-latency.d | 0 tools/dtrace/{ => nexus}/slowest-queries.d | 2 +- tools/dtrace/{ => nexus}/trace-db-queries.d | 0 tools/dtrace/nexus/trace-transactions.d | 67 +++++++++++++++++++ workspace-hack/Cargo.toml | 2 + 7 files changed, 73 insertions(+), 2 deletions(-) rename tools/dtrace/{ => nexus}/aggregate-query-latency.d (100%) rename tools/dtrace/{ => nexus}/slowest-queries.d (99%) rename tools/dtrace/{ => nexus}/trace-db-queries.d (100%) create mode 100755 tools/dtrace/nexus/trace-transactions.d diff --git a/Cargo.lock b/Cargo.lock index c9fc792f72..6a44c7af3d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7398,6 +7398,7 @@ dependencies = [ "getrandom", "group", "hashbrown 0.15.1", + "heck 0.4.1", "hex", "hickory-proto", "hmac", @@ -11020,7 +11021,7 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917" dependencies = [ - "heck 0.5.0", + "heck 0.4.1", "proc-macro2", "quote", "syn 2.0.87", diff --git a/package-manifest.toml b/package-manifest.toml index 809c1ce6ca..83b1ba8168 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -125,6 +125,7 @@ source.paths = [ { from = "smf/nexus/{{rack-topology}}", to = "/var/svc/manifest/site/nexus" }, { from = "out/console-assets", to = "/var/nexus/static" }, { from = "schema/crdb", to = "/var/nexus/schema/crdb" }, + { from = "tools/dtrace/nexus", to = "/opt/oxide/dtrace/nexus" }, ] output.type = "zone" setup_hint = """ diff --git a/tools/dtrace/aggregate-query-latency.d b/tools/dtrace/nexus/aggregate-query-latency.d similarity index 100% rename from tools/dtrace/aggregate-query-latency.d rename to tools/dtrace/nexus/aggregate-query-latency.d diff --git a/tools/dtrace/slowest-queries.d b/tools/dtrace/nexus/slowest-queries.d similarity index 99% rename from tools/dtrace/slowest-queries.d rename to tools/dtrace/nexus/slowest-queries.d index 76e22de22f..08b40d4d79 100755 --- a/tools/dtrace/slowest-queries.d +++ b/tools/dtrace/nexus/slowest-queries.d @@ -35,7 +35,7 @@ diesel_db$target:::query-done query[this->conn_id] = NULL; } -tick-5s +tick-10s { printf("\n%Y\n", walltimestamp); trunc(@, 5); diff --git a/tools/dtrace/trace-db-queries.d b/tools/dtrace/nexus/trace-db-queries.d similarity index 100% rename from tools/dtrace/trace-db-queries.d rename to tools/dtrace/nexus/trace-db-queries.d diff --git a/tools/dtrace/nexus/trace-transactions.d b/tools/dtrace/nexus/trace-transactions.d new file mode 100755 index 0000000000..baf7818f72 --- /dev/null +++ b/tools/dtrace/nexus/trace-transactions.d @@ -0,0 +1,67 @@ +#!/usr/sbin/dtrace -qs + +/* Trace all transactions to the control plane database with their latency */ + +dtrace:::BEGIN +{ + printf("Tracing all database transactions for nexus PID %d, use Ctrl-C to exit\n", $target); +} + +/* + * Record the start and number of statements for each transaction. + * + * Note that we're using the Nexus-provided transaction start / done probes. + * This lets us associate the other data we might collect (number of statements, + * ustacks, etc) with the Nexus code itself. Although there are transaction + * start / done probes in `diesel-dtrace`, the existing way we run transactions + * with `async-bb8-diesel` involves spawning a future to run the transactions on + * a blocking thread-pool. That spawning makes it impossible to associate the + * context in which the `diesel-dtrace` probes fire with the Nexus code that + * actually spawned the transaction itself. + */ +nexus_db_queries$target:::transaction-start +{ + this->key = copyinstr(arg0); + transaction_names[this->key] = copyinstr(arg1); + ts[this->key] = timestamp; + n_statements[this->key] = 0; + printf( + "Started transaction '%s' on conn %s\n", + transaction_names[this->key], + json(this->key, "ok") + ); +} + +/* + * When a query runs in the context of a transaction (on the same connection), + * bump the statement counter. + */ +diesel_db$target:::query-start +/ts[copyinstr(arg1)]/ +{ + n_statements[copyinstr(arg1)] += 1 +} + +/* + * As transactions complete, print the number of statements we ran and the + * duration. + */ +nexus_db_queries$target:::transaction-done +/ts[copyinstr(arg0)]/ +{ + this->key = copyinstr(arg0); + this->conn_id = json(this->key, "ok"); + this->latency = (timestamp - ts[this->key]) / 1000; + this->n_statements = n_statements[this->key]; + printf( + "%s %d statement(s) in transaction '%s' on connection %s (%d us)\n", + arg2 ? "COMMIT" : "ROLLBACK", + n_statements[this->key], + transaction_names[this->key], + this->conn_id, + this->latency + ); + ts[this->key] = 0; + n_statements[this->key] = 0; + transaction_names[this->key] = 0; +} diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 31677ed8c1..678170b25e 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -63,6 +63,7 @@ generic-array = { version = "0.14.7", default-features = false, features = ["mor getrandom = { version = "0.2.15", default-features = false, features = ["js", "rdrand", "std"] } group = { version = "0.13.0", default-features = false, features = ["alloc"] } hashbrown = { version = "0.15.1" } +heck = { version = "0.4.1" } hex = { version = "0.4.3", features = ["serde"] } hickory-proto = { version = "0.24.1", features = ["text-parsing"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } @@ -182,6 +183,7 @@ generic-array = { version = "0.14.7", default-features = false, features = ["mor getrandom = { version = "0.2.15", default-features = false, features = ["js", "rdrand", "std"] } group = { version = "0.13.0", default-features = false, features = ["alloc"] } hashbrown = { version = "0.15.1" } +heck = { version = "0.4.1" } hex = { version = "0.4.3", features = ["serde"] } hickory-proto = { version = "0.24.1", features = ["text-parsing"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] }