From 66031e2d7bfd796bbca7418bf24d65b01a4ada9b Mon Sep 17 00:00:00 2001 From: Jakob Meier Date: Fri, 24 May 2024 17:09:41 +0200 Subject: [PATCH] congestion: add metrics for prepare_tx (#11390) - Count how many transactions are rejected due to congestion and hence dropped from the pool. Store it as a histogram for detailed insights. - Keep track of the gas limit for ty conversions, as a simple gauge since this should be a relatively stable value. - Keep track of how much gas is actually used for tx conversions, in a histogram. --------- Co-authored-by: Jan Ciolek <149345204+jancionear@users.noreply.github.com> --- chain/chain/src/runtime/metrics.rs | 42 ++++++++++++++++++++++++++++++ chain/chain/src/runtime/mod.rs | 25 +++++++++++++++--- 2 files changed, 64 insertions(+), 3 deletions(-) diff --git a/chain/chain/src/runtime/metrics.rs b/chain/chain/src/runtime/metrics.rs index 6dbf49b08a7..4739bf96894 100644 --- a/chain/chain/src/runtime/metrics.rs +++ b/chain/chain/src/runtime/metrics.rs @@ -36,6 +36,48 @@ pub(crate) static PREPARE_TX_SIZE: Lazy = Lazy::new(|| { .unwrap() }); +pub(crate) static PREPARE_TX_GAS: Lazy = Lazy::new(|| { + try_create_histogram_vec( + "near_prepare_tx_gas", + "How much gas was spent for processing new transactions when producing a chunk.", + &["shard_id"], + // 100e9 = 100 Ggas + // A transaction with no actions costs 108 Ggas to process. + // A typical function call costs ~300 Ggas. + // The absolute maximum is defined by `max_tx_gas` = 500 Tgas. + // This ranges from 100 Ggas to 409.6 Tgas as the last bucket boundary. + Some(exponential_buckets(100e9, 2.0, 12).unwrap()), + ) + .unwrap() +}); + +pub(crate) static PREPARE_TX_REJECTED: Lazy = Lazy::new(|| { + try_create_histogram_vec( + "near_prepare_tx_rejected", + "The number of transactions rejected when producing a chunk.", + // possible reasons: + // - invalid_tx The tx failed validation or the signer has not enough funds. + // - invalid_block_hash The block_hash field on the tx is expired or not on the canonical chain. + // - congestion The receiver shard is congested. + &["shard_id", "reason"], + // Histogram boundaries are inclusive. Pick the first boundary below 1 + // to have 0 values as a separate bucket. + // In exclusive boundaries, this would be equivalent to: + // [0, 10, 100, 1_000, 10_000] + Some(exponential_buckets(0.99999, 10.0, 6).unwrap()), + ) + .unwrap() +}); + +pub(crate) static CONGESTION_PREPARE_TX_GAS_LIMIT: Lazy = Lazy::new(|| { + try_create_int_gauge_vec( + "near_congestion_prepare_tx_gas_limit", + "How much gas the shard spends at most per chunk to convert new transactions to receipts.", + &["shard_id"], + ) + .unwrap() +}); + pub static APPLYING_CHUNKS_TIME: Lazy = Lazy::new(|| { try_create_histogram_vec( "near_applying_chunks_time", diff --git a/chain/chain/src/runtime/mod.rs b/chain/chain/src/runtime/mod.rs index 4f5865034c0..32847c922f2 100644 --- a/chain/chain/src/runtime/mod.rs +++ b/chain/chain/src/runtime/mod.rs @@ -787,6 +787,10 @@ impl RuntimeAdapter for NightshadeRuntime { let size_limit = transactions_gas_limit / (runtime_config.wasm_config.ext_costs.gas_cost(ExtCosts::storage_write_value_byte) + runtime_config.wasm_config.ext_costs.gas_cost(ExtCosts::storage_read_value_byte)); + // for metrics only + let mut rejected_due_to_congestion = 0; + let mut rejected_invalid_tx = 0; + let mut rejected_invalid_for_chain = 0; // Add new transactions to the result until some limit is hit or the transactions run out. loop { @@ -836,6 +840,7 @@ impl RuntimeAdapter for NightshadeRuntime { ); if !congestion_control.shard_accepts_transactions() { tracing::trace!(target: "runtime", tx=?tx.get_hash(), "discarding transaction due to congestion"); + rejected_due_to_congestion += 1; continue; } } @@ -844,6 +849,7 @@ impl RuntimeAdapter for NightshadeRuntime { // Verifying the transaction is on the same chain and hasn't expired yet. if !chain_validate(&tx) { tracing::trace!(target: "runtime", tx=?tx.get_hash(), "discarding transaction that failed chain validation"); + rejected_invalid_for_chain += 1; continue; } @@ -867,6 +873,7 @@ impl RuntimeAdapter for NightshadeRuntime { } Err(RuntimeError::InvalidTxError(err)) => { tracing::trace!(target: "runtime", tx=?tx.get_hash(), ?err, "discarding transaction that is invalid"); + rejected_invalid_tx += 1; state_update.rollback(); } Err(RuntimeError::StorageError(err)) => { @@ -881,9 +888,21 @@ impl RuntimeAdapter for NightshadeRuntime { } } debug!(target: "runtime", "Transaction filtering results {} valid out of {} pulled from the pool", result.transactions.len(), num_checked_transactions); - metrics::PREPARE_TX_SIZE - .with_label_values(&[&shard_id.to_string()]) - .observe(total_size as f64); + let shard_label = shard_id.to_string(); + metrics::PREPARE_TX_SIZE.with_label_values(&[&shard_label]).observe(total_size as f64); + metrics::PREPARE_TX_REJECTED + .with_label_values(&[&shard_label, "congestion"]) + .observe(rejected_due_to_congestion as f64); + metrics::PREPARE_TX_REJECTED + .with_label_values(&[&shard_label, "invalid_tx"]) + .observe(rejected_invalid_tx as f64); + metrics::PREPARE_TX_REJECTED + .with_label_values(&[&shard_label, "invalid_block_hash"]) + .observe(rejected_invalid_for_chain as f64); + metrics::PREPARE_TX_GAS.with_label_values(&[&shard_label]).observe(total_gas_burnt as f64); + metrics::CONGESTION_PREPARE_TX_GAS_LIMIT + .with_label_values(&[&shard_label]) + .set(i64::try_from(transactions_gas_limit).unwrap_or(i64::MAX)); result.storage_proof = state_update.trie.recorded_storage().map(|s| s.nodes); Ok(result) }