From 284a4dcc6030601880c288f9cc3cafae833085b7 Mon Sep 17 00:00:00 2001
From: gregorydemay <112856886+gregorydemay@users.noreply.github.com>
Date: Tue, 3 Dec 2024 09:10:51 +0100
Subject: [PATCH] refactor: metrics (#341)

Clean-up metrics:

1. removed because unused: `cyclesWithdrawn`, `errNoPermission` and
`errHostNotAllowed`
2. Added `RejectionCode` to `errHttpOutcall` to be able to track all
failed HTTPs outcalls at the protocol level (e.g. a `SysTransient` error
is often due to the infamous `No consensus could be reached. Replicas
had different responses`
3. Unify metrics related to memory to follow the convention of
`stable_memory_bytes` and `heap_memory_bytes`
---
 candid/evm_rpc.did              |  8 ++--
 evm_rpc_types/src/result/mod.rs | 27 ++++++-----
 src/constants.rs                |  2 -
 src/http.rs                     | 12 +----
 src/metrics.rs                  | 42 +++++++++--------
 src/types.rs                    | 25 ++++++++---
 src/validate.rs                 | 39 +---------------
 tests/mock.rs                   | 32 ++++++++-----
 tests/tests.rs                  | 80 +++++++++++++++++++++++++++++++--
 9 files changed, 158 insertions(+), 109 deletions(-)

diff --git a/candid/evm_rpc.did b/candid/evm_rpc.did
index 4a496cc6..4f38aeee 100644
--- a/candid/evm_rpc.did
+++ b/candid/evm_rpc.did
@@ -132,10 +132,7 @@ type Metrics = record {
   responses : vec record { record { text; text; text }; nat64 };
   inconsistentResponses : vec record { record { text; text }; nat64 };
   cyclesCharged : vec record { record { text; text }; nat };
-  cyclesWithdrawn : nat;
-  errNoPermission : nat64;
-  errHttpOutcall : vec record { record { text; text }; nat64 };
-  errHostNotAllowed : vec record { text; nat64 };
+  errHttpOutcall : vec record { record { text; text; RejectionCode }; nat64 };
 };
 type MultiFeeHistoryResult = variant {
   Consistent : FeeHistoryResult;
@@ -292,6 +289,9 @@ service : (InstallArgs) -> {
   eth_call : (RpcServices, opt RpcConfig, CallArgs) -> (MultiCallResult);
   request : (RpcService, json : text, maxResponseBytes : nat64) -> (RequestResult);
   requestCost : (RpcService, json : text, maxResponseBytes : nat64) -> (RequestCostResult) query;
+  
+  // DEBUG endpoint to retrieve metrics accumulated by the EVM RPC canister.
+  // NOTE: this method exists for debugging purposes, backward compatibility is not guaranteed.
   getMetrics : () -> (Metrics) query;
   getNodesInSubnet : () -> (numberOfNodes : nat32) query;
   getProviders : () -> (vec Provider) query;
diff --git a/evm_rpc_types/src/result/mod.rs b/evm_rpc_types/src/result/mod.rs
index 61223130..14f51902 100644
--- a/evm_rpc_types/src/result/mod.rs
+++ b/evm_rpc_types/src/result/mod.rs
@@ -4,6 +4,7 @@ mod tests;
 use crate::RpcService;
 use candid::{CandidType, Deserialize};
 use ic_cdk::api::call::RejectionCode;
+use std::fmt::Debug;
 use thiserror::Error;
 
 pub type RpcResult<T> = Result<T, RpcError>;
@@ -34,28 +35,26 @@ impl<T> MultiRpcResult<T> {
             ),
         }
     }
+}
 
-    pub fn consistent(self) -> Option<RpcResult<T>> {
+impl<T: Debug> MultiRpcResult<T> {
+    pub fn expect_consistent(self) -> RpcResult<T> {
         match self {
-            MultiRpcResult::Consistent(result) => Some(result),
-            MultiRpcResult::Inconsistent(_) => None,
+            MultiRpcResult::Consistent(result) => result,
+            MultiRpcResult::Inconsistent(inconsistent_result) => {
+                panic!("Expected consistent, but got: {:?}", inconsistent_result)
+            }
         }
     }
 
-    pub fn inconsistent(self) -> Option<Vec<(RpcService, RpcResult<T>)>> {
+    pub fn expect_inconsistent(self) -> Vec<(RpcService, RpcResult<T>)> {
         match self {
-            MultiRpcResult::Consistent(_) => None,
-            MultiRpcResult::Inconsistent(results) => Some(results),
+            MultiRpcResult::Consistent(consistent_result) => {
+                panic!("Expected inconsistent:, but got: {:?}", consistent_result)
+            }
+            MultiRpcResult::Inconsistent(results) => results,
         }
     }
-
-    pub fn expect_consistent(self) -> RpcResult<T> {
-        self.consistent().expect("expected consistent results")
-    }
-
-    pub fn expect_inconsistent(self) -> Vec<(RpcService, RpcResult<T>)> {
-        self.inconsistent().expect("expected inconsistent results")
-    }
 }
 
 impl<T> From<RpcResult<T>> for MultiRpcResult<T> {
diff --git a/src/constants.rs b/src/constants.rs
index 53e5af8b..be854088 100644
--- a/src/constants.rs
+++ b/src/constants.rs
@@ -43,5 +43,3 @@ pub const ETH_SEPOLIA_CHAIN_ID: u64 = 11155111;
 pub const ARBITRUM_ONE_CHAIN_ID: u64 = 42161;
 pub const BASE_MAINNET_CHAIN_ID: u64 = 8453;
 pub const OPTIMISM_MAINNET_CHAIN_ID: u64 = 10;
-
-pub const SERVICE_HOSTS_BLOCKLIST: &[&str] = &[];
diff --git a/src/http.rs b/src/http.rs
index 23f10a9f..c89eb7c0 100644
--- a/src/http.rs
+++ b/src/http.rs
@@ -1,7 +1,7 @@
 use crate::{
     accounting::{get_cost_with_collateral, get_http_request_cost},
     add_metric_entry,
-    constants::{CONTENT_TYPE_HEADER_LOWERCASE, CONTENT_TYPE_VALUE, SERVICE_HOSTS_BLOCKLIST},
+    constants::{CONTENT_TYPE_HEADER_LOWERCASE, CONTENT_TYPE_VALUE},
     memory::is_demo_active,
     types::{MetricRpcHost, MetricRpcMethod, ResolvedRpcService},
     util::canonicalize_json,
@@ -69,14 +69,6 @@ pub async fn http_request(
         }
     };
     let rpc_host = MetricRpcHost(host.to_string());
-    if SERVICE_HOSTS_BLOCKLIST.contains(&rpc_host.0.as_str()) {
-        add_metric_entry!(err_host_not_allowed, rpc_host.clone(), 1);
-        return Err(ValidationError::Custom(format!(
-            "Disallowed RPC service host: {}",
-            rpc_host.0
-        ))
-        .into());
-    }
     if !is_demo_active() {
         let cycles_available = ic_cdk::api::call::msg_cycles_available128();
         let cycles_cost_with_collateral = get_cost_with_collateral(cycles_cost);
@@ -102,7 +94,7 @@ pub async fn http_request(
             Ok(response)
         }
         Err((code, message)) => {
-            add_metric_entry!(err_http_outcall, (rpc_method, rpc_host), 1);
+            add_metric_entry!(err_http_outcall, (rpc_method, rpc_host, code), 1);
             Err(HttpOutcallError::IcError { code, message }.into())
         }
     }
diff --git a/src/metrics.rs b/src/metrics.rs
index 04a1d945..ec1728f7 100644
--- a/src/metrics.rs
+++ b/src/metrics.rs
@@ -52,6 +52,8 @@ impl EncoderExtensions for ic_metrics_encoder::MetricsEncoder<Vec<u8>> {
 }
 
 pub fn encode_metrics(w: &mut ic_metrics_encoder::MetricsEncoder<Vec<u8>>) -> std::io::Result<()> {
+    const WASM_PAGE_SIZE_IN_BYTES: f64 = 65536.0;
+
     crate::memory::UNSTABLE_METRICS.with(|m| {
         let m = m.borrow();
 
@@ -66,20 +68,22 @@ pub fn encode_metrics(w: &mut ic_metrics_encoder::MetricsEncoder<Vec<u8>>) -> st
             "Canister version",
         )?;
         w.encode_gauge(
-            "evmrpc_stable_memory_pages",
-            ic_cdk::api::stable::stable_size().metric_value(),
-            "Size of the stable memory allocated by this canister measured in 64-bit Wasm pages",
+            "stable_memory_bytes",
+            ic_cdk::api::stable::stable_size() as f64 * WASM_PAGE_SIZE_IN_BYTES,
+            "Size of the stable memory allocated by this canister.",
+        )?;
+
+        w.encode_gauge(
+            "heap_memory_bytes",
+            heap_memory_size_bytes() as f64,
+            "Size of the heap memory allocated by this canister.",
         )?;
+
         w.counter_entries(
             "evmrpc_cycles_charged",
             &m.cycles_charged,
             "Number of cycles charged for RPC calls",
         );
-        w.encode_counter(
-            "evmrpc_cycles_withdrawn",
-            m.cycles_withdrawn.metric_value(),
-            "Number of accumulated cycles withdrawn by RPC providers",
-        )?;
         w.counter_entries(
             "evmrpc_requests",
             &m.requests,
@@ -100,17 +104,19 @@ pub fn encode_metrics(w: &mut ic_metrics_encoder::MetricsEncoder<Vec<u8>>) -> st
             &m.err_http_outcall,
             "Number of unsuccessful HTTP outcalls",
         );
-        w.counter_entries(
-            "evmrpc_err_host_not_allowed",
-            &m.err_host_not_allowed,
-            "Number of HostNotAllowed errors",
-        );
-        w.encode_counter(
-            "evmrpc_err_no_permission",
-            m.err_no_permission.metric_value(),
-            "Number of NoPermission errors",
-        )?;
 
         Ok(())
     })
 }
+
+/// Returns the amount of heap memory in bytes that has been allocated.
+#[cfg(target_arch = "wasm32")]
+pub fn heap_memory_size_bytes() -> usize {
+    const WASM_PAGE_SIZE_BYTES: usize = 65536;
+    core::arch::wasm32::memory_size(0) * WASM_PAGE_SIZE_BYTES
+}
+
+#[cfg(not(any(target_arch = "wasm32")))]
+pub fn heap_memory_size_bytes() -> usize {
+    0
+}
diff --git a/src/types.rs b/src/types.rs
index 9e05b6b4..11833fe2 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -3,6 +3,7 @@ use crate::memory::get_api_key;
 use crate::util::hostname_from_url;
 use crate::validate::validate_api_key;
 use candid::CandidType;
+use ic_cdk::api::call::RejectionCode;
 use ic_cdk::api::management_canister::http_request::HttpHeader;
 use ic_stable_structures::storable::Bound;
 use ic_stable_structures::Storable;
@@ -115,6 +116,22 @@ impl MetricLabels for MetricHttpStatusCode {
     }
 }
 
+impl MetricLabels for RejectionCode {
+    fn metric_labels(&self) -> Vec<(&str, &str)> {
+        let code = match self {
+            RejectionCode::NoError => "NO_ERROR",
+            RejectionCode::SysFatal => "SYS_FATAL",
+            RejectionCode::SysTransient => "SYS_TRANSIENT",
+            RejectionCode::DestinationInvalid => "DESTINATION_INVALID",
+            RejectionCode::CanisterReject => "CANISTER_REJECT",
+            RejectionCode::CanisterError => "CANISTER_ERROR",
+            RejectionCode::Unknown => "UNKNOWN",
+        };
+
+        vec![("code", code)]
+    }
+}
+
 #[derive(Clone, Debug, Default, PartialEq, Eq, CandidType, Deserialize)]
 pub struct Metrics {
     pub requests: HashMap<(MetricRpcMethod, MetricRpcHost), u64>,
@@ -123,14 +140,8 @@ pub struct Metrics {
     pub inconsistent_responses: HashMap<(MetricRpcMethod, MetricRpcHost), u64>,
     #[serde(rename = "cyclesCharged")]
     pub cycles_charged: HashMap<(MetricRpcMethod, MetricRpcHost), u128>,
-    #[serde(rename = "cyclesWithdrawn")]
-    pub cycles_withdrawn: u128,
-    #[serde(rename = "errNoPermission")]
-    pub err_no_permission: u64,
     #[serde(rename = "errHttpOutcall")]
-    pub err_http_outcall: HashMap<(MetricRpcMethod, MetricRpcHost), u64>,
-    #[serde(rename = "errHostNotAllowed")]
-    pub err_host_not_allowed: HashMap<MetricRpcHost, u64>,
+    pub err_http_outcall: HashMap<(MetricRpcMethod, MetricRpcHost, RejectionCode), u64>,
 }
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
diff --git a/src/validate.rs b/src/validate.rs
index 846ff36a..1687358e 100644
--- a/src/validate.rs
+++ b/src/validate.rs
@@ -1,19 +1,4 @@
-use crate::{
-    constants::{SERVICE_HOSTS_BLOCKLIST, VALID_API_KEY_CHARS},
-    util::hostname_from_url,
-};
-
-pub fn validate_hostname(hostname: &str) -> Result<(), &'static str> {
-    if SERVICE_HOSTS_BLOCKLIST.contains(&hostname) {
-        Err("Hostname not allowed")
-    } else {
-        Ok(())
-    }
-}
-
-pub fn validate_url_pattern(url_pattern: &str) -> Result<(), &'static str> {
-    validate_hostname(&hostname_from_url(url_pattern).ok_or("Invalid hostname in URL")?)
-}
+use crate::constants::VALID_API_KEY_CHARS;
 
 pub fn validate_api_key(api_key: &str) -> Result<(), &'static str> {
     if api_key.is_empty() {
@@ -34,28 +19,6 @@ pub fn validate_api_key(api_key: &str) -> Result<(), &'static str> {
 mod test {
     use super::*;
 
-    #[test]
-    pub fn test_validate_url_pattern() {
-        assert_eq!(validate_url_pattern("https://example.com"), Ok(()));
-        assert_eq!(validate_url_pattern("https://example.com/v1/rpc"), Ok(()));
-        assert_eq!(
-            validate_url_pattern("https://example.com/{API_KEY}"),
-            Ok(())
-        );
-        assert_eq!(
-            validate_url_pattern("https://{API_KEY}"),
-            Err("Invalid hostname in URL")
-        );
-        assert_eq!(
-            validate_url_pattern("https://{API_KEY}/v1/rpc"),
-            Err("Invalid hostname in URL")
-        );
-        assert_eq!(
-            validate_url_pattern("https://{API_KEY}/{API_KEY}"),
-            Err("Invalid hostname in URL")
-        );
-    }
-
     #[test]
     pub fn test_validate_api_key() {
         assert_eq!(validate_api_key("abc"), Ok(()));
diff --git a/tests/mock.rs b/tests/mock.rs
index 65e71781..c1d3945b 100644
--- a/tests/mock.rs
+++ b/tests/mock.rs
@@ -1,5 +1,7 @@
+use ic_cdk::api::call::RejectionCode;
 use pocket_ic::common::rest::{
-    CanisterHttpHeader, CanisterHttpMethod, CanisterHttpReply, CanisterHttpRequest,
+    CanisterHttpHeader, CanisterHttpMethod, CanisterHttpReject, CanisterHttpReply,
+    CanisterHttpRequest, CanisterHttpResponse,
 };
 use std::collections::BTreeSet;
 
@@ -32,11 +34,25 @@ impl MockOutcallBuilder {
             request_headers: None,
             request_body: None,
             max_response_bytes: None,
-            response: CanisterHttpReply {
+            response: CanisterHttpResponse::CanisterHttpReply(CanisterHttpReply {
                 status,
                 headers: vec![],
                 body: body.into().0,
-            },
+            }),
+        })
+    }
+
+    pub fn new_error(code: RejectionCode, message: impl ToString) -> Self {
+        Self(MockOutcall {
+            method: None,
+            url: None,
+            request_headers: None,
+            request_body: None,
+            max_response_bytes: None,
+            response: CanisterHttpResponse::CanisterHttpReject(CanisterHttpReject {
+                reject_code: code as u64,
+                message: message.to_string(),
+            }),
         })
     }
 
@@ -77,14 +93,6 @@ impl MockOutcallBuilder {
         self
     }
 
-    pub fn with_response_header(mut self, name: String, value: String) -> Self {
-        self.0
-            .response
-            .headers
-            .push(CanisterHttpHeader { name, value });
-        self
-    }
-
     pub fn build(self) -> MockOutcall {
         self.0
     }
@@ -103,7 +111,7 @@ pub struct MockOutcall {
     pub request_headers: Option<Vec<CanisterHttpHeader>>,
     pub request_body: Option<MockJsonRequestBody>,
     pub max_response_bytes: Option<u64>,
-    pub response: CanisterHttpReply,
+    pub response: CanisterHttpResponse,
 }
 
 impl MockOutcall {
diff --git a/tests/tests.rs b/tests/tests.rs
index b3dd7e34..51e5783f 100644
--- a/tests/tests.rs
+++ b/tests/tests.rs
@@ -15,14 +15,13 @@ use evm_rpc_types::{
     InstallArgs, JsonRpcError, MultiRpcResult, Nat256, Provider, ProviderError, RpcApi, RpcConfig,
     RpcError, RpcResult, RpcService, RpcServices,
 };
+use ic_cdk::api::call::RejectionCode;
 use ic_cdk::api::management_canister::http_request::HttpHeader;
 use ic_cdk::api::management_canister::main::CanisterId;
 use ic_test_utilities_load_wasm::load_wasm;
 use maplit::hashmap;
 use mock::{MockOutcall, MockOutcallBuilder};
-use pocket_ic::common::rest::{
-    CanisterHttpMethod, CanisterHttpResponse, MockCanisterHttpResponse, RawMessageId,
-};
+use pocket_ic::common::rest::{CanisterHttpMethod, MockCanisterHttpResponse, RawMessageId};
 use pocket_ic::{CanisterSettings, PocketIc, WasmResult};
 use serde::{de::DeserializeOwned, Deserialize, Serialize};
 use serde_json::json;
@@ -416,7 +415,7 @@ impl<R: CandidType + DeserializeOwned> CallFlow<R> {
         let response = MockCanisterHttpResponse {
             subnet_id: request.subnet_id,
             request_id: request.request_id,
-            response: CanisterHttpResponse::CanisterHttpReply(mock.response.clone()),
+            response: mock.response.clone(),
             additional_responses: vec![],
         };
         self.setup.env.mock_canister_http_response(response);
@@ -1345,6 +1344,79 @@ fn candid_rpc_should_return_inconsistent_results_with_error() {
     );
 }
 
+#[test]
+fn candid_rpc_should_return_inconsistent_results_with_consensus_error() {
+    const CONSENSUS_ERROR: &str =
+        "No consensus could be reached. Replicas had different responses.";
+
+    let setup = EvmRpcSetup::new().mock_api_keys();
+    let result = setup
+        .eth_get_transaction_count(
+            RpcServices::EthMainnet(None),
+            Some(RpcConfig {
+                response_consensus: Some(ConsensusStrategy::Threshold {
+                    total: Some(3),
+                    min: 2,
+                }),
+                ..Default::default()
+            }),
+            evm_rpc_types::GetTransactionCountArgs {
+                address: "0xdAC17F958D2ee523a2206206994597C13D831ec7"
+                    .parse()
+                    .unwrap(),
+                block: evm_rpc_types::BlockTag::Latest,
+            },
+        )
+        .mock_http_once(MockOutcallBuilder::new_error(
+            RejectionCode::SysTransient,
+            CONSENSUS_ERROR,
+        ))
+        .mock_http_once(MockOutcallBuilder::new(
+            200,
+            r#"{"jsonrpc":"2.0","id":0,"result":"0x1"}"#,
+        ))
+        .mock_http_once(MockOutcallBuilder::new_error(
+            RejectionCode::SysTransient,
+            CONSENSUS_ERROR,
+        ))
+        .wait()
+        .expect_inconsistent();
+
+    assert_eq!(
+        result,
+        vec![
+            (
+                RpcService::EthMainnet(EthMainnetService::PublicNode),
+                Ok(1_u8.into())
+            ),
+            (
+                RpcService::EthMainnet(EthMainnetService::BlockPi),
+                Err(RpcError::HttpOutcallError(HttpOutcallError::IcError {
+                    code: RejectionCode::SysTransient,
+                    message: CONSENSUS_ERROR.to_string()
+                }))
+            ),
+            (
+                RpcService::EthMainnet(EthMainnetService::Cloudflare),
+                Err(RpcError::HttpOutcallError(HttpOutcallError::IcError {
+                    code: RejectionCode::SysTransient,
+                    message: CONSENSUS_ERROR.to_string()
+                }))
+            ),
+        ]
+    );
+
+    let rpc_method = || RpcMethod::EthGetTransactionCount.into();
+    let err_http_outcall = setup.get_metrics().err_http_outcall;
+    assert_eq!(
+        err_http_outcall,
+        hashmap! {
+            (rpc_method(), BLOCKPI_ETH_HOSTNAME.into(), RejectionCode::SysTransient) => 1,
+            (rpc_method(), CLOUDFLARE_HOSTNAME.into(), RejectionCode::SysTransient) => 1,
+        },
+    );
+}
+
 #[test]
 fn candid_rpc_should_return_inconsistent_results_with_unexpected_http_status() {
     let setup = EvmRpcSetup::new().mock_api_keys();