diff --git a/.drone.yml b/.drone.yml index 39e6141f60..ffb5002f08 100644 --- a/.drone.yml +++ b/.drone.yml @@ -484,6 +484,7 @@ steps: - name: nctl-nightly-tests <<: *buildenv + image: casperlabs/node-build-u2004 environment: AWS_ACCESS_KEY_ID: from_secret: put-drone-aws-ak @@ -517,4 +518,4 @@ volumes: temp: {} trigger: - cron: [ nightly-tests-cron ] + cron: [ nightly-tests-cron, nightly-tests-cron-1-6 ] diff --git a/Cargo.lock b/Cargo.lock index f749f0003d..875c055bfb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -203,7 +203,7 @@ checksum = "a941c39708478e8eea39243b5983f1c42d2717b3620ee91f4a52115fd02ac43f" dependencies = [ "itertools 0.9.0", "proc-macro-error", - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "syn 1.0.109", ] @@ -214,6 +214,12 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6" +[[package]] +name = "array-init" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d62b7694a562cdf5a74227903507c56ab2cc8bdd1f781ed5cb4cf9c9f810bfc" + [[package]] name = "assert-json-diff" version = "2.0.2" @@ -250,7 +256,7 @@ version = "0.1.68" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" dependencies = [ - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "syn 2.0.15", ] @@ -342,6 +348,12 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" +[[package]] +name = "bimap" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "230c5f1ca6a325a32553f8640d31ac9b49f2411e901e427570154868b46da4f7" + [[package]] name = "bincode" version = "1.3.3" @@ -460,11 +472,33 @@ version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" +[[package]] +name = "burn" +version = "0.1.0" +dependencies = [ + "casper-contract", + "casper-types", +] + [[package]] name = "bytemuck" -version = "1.14.0" +version = "1.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6" +checksum = "ed2490600f404f2b94c167e31d3ed1d5f3c225a0f3b80230053b3e0b7b962bd9" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4da9a32f3fed317401fa3c862968128267c3106685286e15d5aaa3d7389c2f60" +dependencies = [ + "proc-macro2 1.0.70", + "quote 1.0.26", + "syn 2.0.15", +] [[package]] name = "byteorder" @@ -599,7 +633,7 @@ dependencies = [ "serde", "serde_bytes", "serde_json", - "strum", + "strum 0.24.1", "tempfile", "thiserror", "tracing", @@ -658,6 +692,7 @@ dependencies = [ "ansi_term", "anyhow", "aquamarine", + "array-init", "assert-json-diff", "assert_matches", "async-trait", @@ -678,7 +713,6 @@ dependencies = [ "fake_instant", "fs2", "futures", - "futures-io", "hex-buffer-serde 0.3.0", "hex_fmt", "hostname", @@ -686,6 +720,7 @@ dependencies = [ "humantime", "hyper", "itertools 0.10.5", + "juliet", "libc", "linked-hash-map", "lmdb-rkv", @@ -723,13 +758,12 @@ dependencies = [ "static_assertions", "stats_alloc", "structopt", - "strum", + "strum 0.24.1", "sys-info", "tempfile", "thiserror", "tokio", "tokio-openssl", - "tokio-serde", "tokio-stream", "tokio-util 0.6.10", "toml", @@ -780,7 +814,7 @@ dependencies = [ "serde_bytes", "serde_json", "serde_test", - "strum", + "strum 0.24.1", "tempfile", "thiserror", "uint", @@ -962,7 +996,7 @@ checksum = "ae6371b8bdc8b7d3959e9cf7b22d4435ef3e79e138688421ec654acf8c81b008" dependencies = [ "heck 0.4.1", "proc-macro-error", - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "syn 1.0.109", ] @@ -974,7 +1008,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9644cd56d6b87dbe899ef8b053e331c0637664e9e21a33dfcdc36093f5c5c4" dependencies = [ "heck 0.4.1", - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "syn 2.0.15", ] @@ -1489,7 +1523,7 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83fdaf97f4804dcebfa5862639bc9ce4121e82140bec2a987ac5140294865b5b" dependencies = [ - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "syn 2.0.15", ] @@ -1502,9 +1536,9 @@ checksum = "c2e66c9d817f1720209181c316d28635c050fa304f9c79e47a520882661b7308" [[package]] name = "datasize" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c88ad90721dc8e2ebe1430ac2f59c5bdcd74478baa68da26f30f33b0fe997f11" +checksum = "e65c07d59e45d77a8bda53458c24a828893a99ac6cdd9c84111e09176ab739a2" dependencies = [ "datasize_derive", "fake_instant", @@ -1515,11 +1549,11 @@ dependencies = [ [[package]] name = "datasize_derive" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b0415ec81945214410892a00d4b5dd4566f6263205184248e018a3fe384a61e" +checksum = "613e4ee15899913285b7612004bbd490abd605be7b11d35afada5902fb6b91d5" dependencies = [ - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "syn 1.0.109", ] @@ -1549,7 +1583,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" dependencies = [ "convert_case", - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "rustc_version", "syn 1.0.109", @@ -1741,18 +1775,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "educe" -version = "0.4.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "079044df30bb07de7d846d41a184c4b00e66ebdac93ee459253474f3a47e50ae" -dependencies = [ - "enum-ordinalize", - "proc-macro2 1.0.56", - "quote 1.0.26", - "syn 1.0.109", -] - [[package]] name = "ee-1071-regression" version = "0.1.0" @@ -1999,25 +2021,11 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e94aa31f7c0dc764f57896dc615ddd76fc13b0d5dca7eb6cc5e018a5a09ec06" dependencies = [ - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "syn 1.0.109", ] -[[package]] -name = "enum-ordinalize" -version = "3.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a62bb1df8b45ecb7ffa78dca1c17a438fb193eb083db0b1b494d2a61bcb5096a" -dependencies = [ - "num-bigint", - "num-traits", - "proc-macro2 1.0.56", - "quote 1.0.26", - "rustc_version", - "syn 1.0.109", -] - [[package]] name = "env_logger" version = "0.9.3" @@ -2290,7 +2298,7 @@ version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "syn 2.0.15", ] @@ -3206,9 +3214,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.3.24" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9" +checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" dependencies = [ "bytes", "fnv", @@ -3482,7 +3490,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2", + "socket2 0.4.9", "tokio", "tower-service", "tracing", @@ -3656,6 +3664,24 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "juliet" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4336a0d5e38193caafe774bd2be027cf5aa3c3e45b3f1bda1791fcacc9e9951d" +dependencies = [ + "array-init", + "bimap", + "bytemuck", + "bytes", + "futures", + "once_cell", + "strum 0.25.0", + "thiserror", + "tokio", + "tracing", +] + [[package]] name = "k256" version = "0.13.1" @@ -3721,9 +3747,9 @@ checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67" [[package]] name = "libc" -version = "0.2.149" +version = "0.2.150" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" +checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" [[package]] name = "libloading" @@ -4051,7 +4077,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" dependencies = [ "libc", - "log", "wasi", "windows-sys 0.48.0", ] @@ -4128,7 +4153,7 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91761aed67d03ad966ef783ae962ef9bbaca728d2dd7ceb7939ec110fffad998" dependencies = [ - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "syn 1.0.109", ] @@ -4298,7 +4323,7 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "876a53fff98e03a936a674b29568b0e605f06b29372c2489ff4de23f1949743d" dependencies = [ - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "syn 1.0.109", ] @@ -4386,9 +4411,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.17.1" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] name = "oorandom" @@ -4423,7 +4448,7 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "syn 2.0.15", ] @@ -4621,16 +4646,16 @@ version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "069bdb1e05adc7a8990dce9cc75370895fbe4e3d58b9b73bf1aee56359344a55" dependencies = [ - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "syn 1.0.109", ] [[package]] name = "pin-project-lite" -version = "0.2.9" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" [[package]] name = "pin-utils" @@ -4727,7 +4752,7 @@ version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "30490e0852e58402b8fae0d39897b08a24f493023a4d6cf56b2e30f31ed57548" dependencies = [ - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "regex", "syn 1.0.109", @@ -4801,7 +4826,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" dependencies = [ "proc-macro-error-attr", - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "syn 1.0.109", "version_check", @@ -4813,7 +4838,7 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" dependencies = [ - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "version_check", ] @@ -4829,9 +4854,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.56" +version = "1.0.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" +checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" dependencies = [ "unicode-ident", ] @@ -4884,7 +4909,7 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9fa06db3abc95f048e0afa371db5569b24912bb98a8e2e2e89c75c5b43bc2aa8" dependencies = [ - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "syn 1.0.109", ] @@ -4983,7 +5008,7 @@ version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" dependencies = [ - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", ] [[package]] @@ -5550,7 +5575,7 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "791c2c848cff1abaeae34fef7e70da5f93171d9eea81ce0fe969a1df627a61a8" dependencies = [ - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "serde_derive_internals", "syn 1.0.109", @@ -5653,7 +5678,7 @@ version = "1.0.160" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "291a097c63d8497e00160b166a967a4a79c64f3facdd01cbd7502231688d77df" dependencies = [ - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "syn 2.0.15", ] @@ -5664,7 +5689,7 @@ version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1dbab34ca63057a1f15280bdf3c39f2b1eb1b54c17e98360e511637aef7418c6" dependencies = [ - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "syn 1.0.109", ] @@ -5687,7 +5712,7 @@ version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bcec881020c684085e55a25f7fd888954d56609ef363479dc5a1305eb0d40cab" dependencies = [ - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "syn 2.0.15", ] @@ -5874,6 +5899,16 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "socket2" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05ffd9c0a93b7543e062e759284fcf5f5e3b098501104bfbdde4d404db792871" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "spin" version = "0.9.8" @@ -5964,7 +5999,7 @@ checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0" dependencies = [ "heck 0.3.3", "proc-macro-error", - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "syn 1.0.109", ] @@ -5975,7 +6010,16 @@ version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" dependencies = [ - "strum_macros", + "strum_macros 0.24.3", +] + +[[package]] +name = "strum" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" +dependencies = [ + "strum_macros 0.25.3", ] [[package]] @@ -5985,12 +6029,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" dependencies = [ "heck 0.4.1", - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "rustversion", "syn 1.0.109", ] +[[package]] +name = "strum_macros" +version = "0.25.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" +dependencies = [ + "heck 0.4.1", + "proc-macro2 1.0.70", + "quote 1.0.26", + "rustversion", + "syn 2.0.15", +] + [[package]] name = "subtle" version = "2.4.1" @@ -6014,7 +6071,7 @@ version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" dependencies = [ - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "unicode-ident", ] @@ -6025,7 +6082,7 @@ version = "2.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822" dependencies = [ - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "unicode-ident", ] @@ -6124,7 +6181,7 @@ version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "syn 2.0.15", ] @@ -6195,28 +6252,29 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.28.0" +version = "1.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3c786bf8134e5a3a166db9b29ab8f48134739014a3eca7bc6bfa95d673b136f" +checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" dependencies = [ - "autocfg", + "backtrace", "bytes", "libc", "mio 0.8.11", "num_cpus", + "parking_lot 0.12.1", "pin-project-lite", - "socket2", + "socket2 0.5.6", "tokio-macros", "windows-sys 0.48.0", ] [[package]] name = "tokio-macros" -version = "2.1.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" +checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "syn 2.0.15", ] @@ -6243,21 +6301,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "tokio-serde" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "911a61637386b789af998ee23f50aa30d5fd7edcec8d6d3dedae5e5815205466" -dependencies = [ - "bincode", - "bytes", - "educe", - "futures-core", - "futures-sink", - "pin-project", - "serde", -] - [[package]] name = "tokio-stream" version = "0.1.14" @@ -6290,6 +6333,7 @@ checksum = "36943ee01a6d67977dd3f84a5a1d2efeb4ada3a1ae771cadfaa535d9d9fc6507" dependencies = [ "bytes", "futures-core", + "futures-io", "futures-sink", "log", "pin-project-lite", @@ -6365,7 +6409,7 @@ version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0f57e3ca2a01450b1a921183a9c9cbfda207fd822cef4ccb00a65402cbba7a74" dependencies = [ - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "syn 2.0.15", ] @@ -6766,7 +6810,7 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99d0801cec07737d88cb900e6419f6f68733867f90b3faaa837e84692e101bf0" dependencies = [ - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "pulldown-cmark", "regex", "semver", @@ -6831,9 +6875,9 @@ dependencies = [ [[package]] name = "walrus" -version = "0.20.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a7b95ecf5892b48104914fa021721699bb8149ae754cff50a22daeb7df0928f" +checksum = "2c03529cd0c4400a2449f640d2f27cd1b48c3065226d15e26d98e4429ab0adb7" dependencies = [ "anyhow", "gimli 0.26.2", @@ -6852,7 +6896,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a6e5bd22c71e77d60140b0bd5be56155a37e5bd14e24f5f87298040d0cc40d7" dependencies = [ "heck 0.3.3", - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "syn 1.0.109", ] @@ -6924,7 +6968,7 @@ dependencies = [ "bumpalo", "log", "once_cell", - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "syn 2.0.15", "wasm-bindgen-shared", @@ -6958,7 +7002,7 @@ version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ - "proc-macro2 1.0.56", + "proc-macro2 1.0.70", "quote 1.0.26", "syn 2.0.15", "wasm-bindgen-backend", @@ -7197,6 +7241,15 @@ dependencies = [ "windows-targets 0.48.5", ] +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.4", +] + [[package]] name = "windows-targets" version = "0.42.2" @@ -7227,6 +7280,21 @@ dependencies = [ "windows_x86_64_msvc 0.48.5", ] +[[package]] +name = "windows-targets" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" +dependencies = [ + "windows_aarch64_gnullvm 0.52.4", + "windows_aarch64_msvc 0.52.4", + "windows_i686_gnu 0.52.4", + "windows_i686_msvc 0.52.4", + "windows_x86_64_gnu 0.52.4", + "windows_x86_64_gnullvm 0.52.4", + "windows_x86_64_msvc 0.52.4", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.42.2" @@ -7239,6 +7307,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" + [[package]] name = "windows_aarch64_msvc" version = "0.42.2" @@ -7251,6 +7325,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" + [[package]] name = "windows_i686_gnu" version = "0.42.2" @@ -7263,6 +7343,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" +[[package]] +name = "windows_i686_gnu" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" + [[package]] name = "windows_i686_msvc" version = "0.42.2" @@ -7275,6 +7361,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" +[[package]] +name = "windows_i686_msvc" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" + [[package]] name = "windows_x86_64_gnu" version = "0.42.2" @@ -7287,6 +7379,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.2" @@ -7299,6 +7397,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" + [[package]] name = "windows_x86_64_msvc" version = "0.42.2" @@ -7311,6 +7415,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" + [[package]] name = "winit" version = "0.21.0" diff --git a/Cargo.toml b/Cargo.toml index 2dac867eba..9215474c95 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,8 @@ members = [ "utils/highway-rewards-analysis", "utils/highway-state-grapher", ] +# Ensures we do not pull in all the features of dev dependencies when building. +resolver = "2" default-members = [ "ci/casper_updater", @@ -33,13 +35,6 @@ default-members = [ exclude = ["utils/nctl/remotes/casper-client-rs"] -resolver = "2" - -# Include debug symbols in the release build of `casper-engine-tests` so that `simple-transfer` will yield useful -# perf data. -[profile.release.package.casper-engine-tests] -debug = true - [profile.release] codegen-units = 1 lto = true @@ -47,3 +42,7 @@ lto = true [profile.bench] codegen-units = 1 lto = true + +[profile.release-with-debug] +inherits = "release" +debug = true diff --git a/Makefile b/Makefile index 1f3637cf92..3ed221fec9 100644 --- a/Makefile +++ b/Makefile @@ -144,7 +144,7 @@ lint-smart-contracts: .PHONY: audit-rs audit-rs: - $(CARGO) audit + $(CARGO) audit --ignore RUSTSEC-2024-0019 .PHONY: audit-as audit-as: diff --git a/README.md b/README.md index cf237a65eb..fce80fb573 100644 --- a/README.md +++ b/README.md @@ -153,6 +153,8 @@ RUST_LOG=info cargo run --release -- validator resources/local/config.toml If the environment variable is unset, it is equivalent to setting `RUST_LOG=error`. +When developing and running unit tests, setting `NODE_TEST_LOG=json` will cause the log messages produced by the tests to be JSON-formatted. + ### Log message format A typical log message will look like: diff --git a/build_wasm_package.sh b/build_wasm_package.sh index b1437556c4..68935a1f94 100755 --- a/build_wasm_package.sh +++ b/build_wasm_package.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash abspath() { # generate absolute path from relative path diff --git a/ci/nightly-test.sh b/ci/nightly-test.sh index 5e385a93b9..f801a69a8c 100755 --- a/ci/nightly-test.sh +++ b/ci/nightly-test.sh @@ -51,6 +51,9 @@ function start_run_teardown() { SETUP_ARGS+=("config_path=$CONFIG_TOML") fi + # Github actions hint for grouping log lines + echo "::group::{$RUN_CMD}" + # Setup nctl files for test echo "Setting up network: nctl-assets-setup ${SETUP_ARGS[@]}" nctl-assets-setup "${SETUP_ARGS[@]}" @@ -70,6 +73,8 @@ function start_run_teardown() { # Cleanup after test completion popd nctl-assets-teardown + # End Github actions hint for grouping tests + echo "::endgroup::" sleep 1 } diff --git a/execution_engine/benches/trie_bench.rs b/execution_engine/benches/trie_bench.rs index ef11e40cdf..6c91a8528e 100644 --- a/execution_engine/benches/trie_bench.rs +++ b/execution_engine/benches/trie_bench.rs @@ -42,19 +42,19 @@ fn deserialize_trie_node(b: &mut Bencher) { } fn serialize_trie_node_pointer(b: &mut Bencher) { - let node = Trie::::Extension { - affix: (0..255).collect(), - pointer: Pointer::NodePointer(Digest::hash([0; 32])), - }; + let node = Trie::::extension( + (0..255).collect(), + Pointer::NodePointer(Digest::hash([0; 32])), + ); b.iter(|| ToBytes::to_bytes(black_box(&node))); } fn deserialize_trie_node_pointer(b: &mut Bencher) { - let node = Trie::::Extension { - affix: (0..255).collect(), - pointer: Pointer::NodePointer(Digest::hash([0; 32])), - }; + let node = Trie::::extension( + (0..255).collect(), + Pointer::NodePointer(Digest::hash([0; 32])), + ); let node_bytes = node.to_bytes().unwrap(); b.iter(|| Trie::::from_bytes(black_box(&node_bytes))); diff --git a/execution_engine/src/core/engine_state/engine_config.rs b/execution_engine/src/core/engine_state/engine_config.rs index 1801775fa3..7f0df3ef25 100644 --- a/execution_engine/src/core/engine_state/engine_config.rs +++ b/execution_engine/src/core/engine_state/engine_config.rs @@ -30,13 +30,8 @@ pub const DEFAULT_MAX_STORED_VALUE_SIZE: u32 = 8 * 1024 * 1024; pub const DEFAULT_MINIMUM_DELEGATION_AMOUNT: u64 = 500 * 1_000_000_000; /// Default value for strict argument checking. pub const DEFAULT_STRICT_ARGUMENT_CHECKING: bool = false; -/// 91 days / 7 days in a week = 13 weeks -/// Length of total vesting schedule in days. -const VESTING_SCHEDULE_LENGTH_DAYS: usize = 91; -const DAY_MILLIS: usize = 24 * 60 * 60 * 1000; /// Default length of total vesting schedule period expressed in days. -pub const DEFAULT_VESTING_SCHEDULE_LENGTH_MILLIS: u64 = - VESTING_SCHEDULE_LENGTH_DAYS as u64 * DAY_MILLIS as u64; +pub const DEFAULT_VESTING_SCHEDULE_LENGTH_MILLIS: u64 = 0; /// Default value for allowing auction bids. pub const DEFAULT_ALLOW_AUCTION_BIDS: bool = true; /// Default value for allowing unrestricted transfers. diff --git a/execution_engine/src/core/engine_state/executable_deploy_item.rs b/execution_engine/src/core/engine_state/executable_deploy_item.rs index de8953a1ba..f1bf9da932 100644 --- a/execution_engine/src/core/engine_state/executable_deploy_item.rs +++ b/execution_engine/src/core/engine_state/executable_deploy_item.rs @@ -107,8 +107,6 @@ impl ContractPackageIdentifier { Clone, DataSize, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, JsonSchema, )] #[serde(deny_unknown_fields)] -#[cfg_attr(feature = "gens", derive(strum::EnumDiscriminants))] -#[cfg_attr(feature = "gens", strum_discriminants(derive(strum::EnumIter)))] pub enum ExecutableDeployItem { /// Executable specified as raw bytes that represent WASM code and an instance of /// [`RuntimeArgs`]. diff --git a/execution_engine/src/core/engine_state/execution_effect.rs b/execution_engine/src/core/engine_state/execution_effect.rs index b1b17ecf2b..372d7edf3b 100644 --- a/execution_engine/src/core/engine_state/execution_effect.rs +++ b/execution_engine/src/core/engine_state/execution_effect.rs @@ -31,6 +31,7 @@ impl From for ExecutionEffect { | Transform::AddUInt256(_) | Transform::AddUInt512(_) | Transform::AddKeys(_) => ops.insert_add(key, Op::Add), + Transform::Prune => ops.insert_add(key, Op::Prune), }; transforms.insert_add(key, transform); } diff --git a/execution_engine/src/core/engine_state/mod.rs b/execution_engine/src/core/engine_state/mod.rs index d887526552..20d13b02b3 100644 --- a/execution_engine/src/core/engine_state/mod.rs +++ b/execution_engine/src/core/engine_state/mod.rs @@ -493,7 +493,7 @@ where match self .state - .delete_keys(correlation_id, state_root_hash, keys_to_delete) + .prune_keys(correlation_id, state_root_hash, keys_to_delete) { Ok(DeleteResult::Deleted(post_state_hash)) => { Ok(PruneResult::Success { post_state_hash }) diff --git a/execution_engine/src/core/engine_state/op.rs b/execution_engine/src/core/engine_state/op.rs index 36b4d3b3c6..98ea211dfa 100644 --- a/execution_engine/src/core/engine_state/op.rs +++ b/execution_engine/src/core/engine_state/op.rs @@ -14,6 +14,8 @@ pub enum Op { Write, /// Add a value into a `Key`. Add, + /// Prune a value under a `Key`. + Prune, /// No operation. #[default] NoOp, @@ -52,6 +54,7 @@ impl From<&Op> for casper_types::OpKind { Op::Write => casper_types::OpKind::Write, Op::Add => casper_types::OpKind::Add, Op::NoOp => casper_types::OpKind::NoOp, + Op::Prune => casper_types::OpKind::Delete, } } } diff --git a/execution_engine/src/core/runtime/auction_internal.rs b/execution_engine/src/core/runtime/auction_internal.rs index d24f398c89..b835f3a99a 100644 --- a/execution_engine/src/core/runtime/auction_internal.rs +++ b/execution_engine/src/core/runtime/auction_internal.rs @@ -98,12 +98,15 @@ where account_hash: AccountHash, unbonding_purses: Vec, ) -> Result<(), Error> { - self.context - .metered_write_gs_unsafe( - Key::Unbond(account_hash), - StoredValue::Unbonding(unbonding_purses), - ) - .map_err(|exec_error| >::from(exec_error).unwrap_or(Error::Storage)) + let unbond_key = Key::Unbond(account_hash); + if unbonding_purses.is_empty() { + self.context.prune_gs_unsafe(unbond_key); + Ok(()) + } else { + self.context + .metered_write_gs_unsafe(unbond_key, StoredValue::Unbonding(unbonding_purses)) + .map_err(|exec_error| >::from(exec_error).unwrap_or(Error::Storage)) + } } fn record_era_info(&mut self, _era_id: EraId, era_summary: EraInfo) -> Result<(), Error> { diff --git a/execution_engine/src/core/runtime/host_function_flag.rs b/execution_engine/src/core/runtime/host_function_flag.rs index 79c486177a..09c526b4d1 100644 --- a/execution_engine/src/core/runtime/host_function_flag.rs +++ b/execution_engine/src/core/runtime/host_function_flag.rs @@ -76,6 +76,7 @@ mod tests { assert!(flag.is_in_host_function_scope()); { + #[allow(clippy::redundant_clone)] let cloned_flag = flag.clone(); assert_eq!(cloned_flag.counter.get(), 1); assert!(cloned_flag.is_in_host_function_scope()); diff --git a/execution_engine/src/core/runtime/mint_internal.rs b/execution_engine/src/core/runtime/mint_internal.rs index 1b0aa81c29..98d7a8ef88 100644 --- a/execution_engine/src/core/runtime/mint_internal.rs +++ b/execution_engine/src/core/runtime/mint_internal.rs @@ -90,6 +90,14 @@ where ) -> Result, execution::Error> { self.context.read_account(&Key::Account(*account_hash)) } + + fn validate_writeable(&self, key: &Key) -> Result<(), execution::Error> { + self.context.validate_writeable(key) + } + + fn validate_key(&self, key: &Key) -> Result<(), execution::Error> { + self.context.validate_key(key) + } } // TODO: update Mint + StorageProvider to better handle errors diff --git a/execution_engine/src/core/runtime/mod.rs b/execution_engine/src/core/runtime/mod.rs index ffbdc70207..07739afdde 100644 --- a/execution_engine/src/core/runtime/mod.rs +++ b/execution_engine/src/core/runtime/mod.rs @@ -650,6 +650,15 @@ where let result: Result<(), mint::Error> = mint_runtime.reduce_total_supply(amount); CLValue::from_t(result).map_err(Self::reverter) })(), + // Type: `fn burn(purse: URef, amount: U512)` + mint::METHOD_BURN => (|| { + mint_runtime.charge_system_contract_call(mint_costs.burn)?; + + let purse: URef = Self::get_named_argument(runtime_args, mint::ARG_PURSE)?; + let amount: U512 = Self::get_named_argument(runtime_args, mint::ARG_AMOUNT)?; + let result: Result<(), mint::Error> = mint_runtime.burn(purse, amount); + CLValue::from_t(result).map_err(Self::reverter) + })(), // Type: `fn create() -> URef` mint::METHOD_CREATE => (|| { mint_runtime.charge_system_contract_call(mint_costs.create)?; diff --git a/execution_engine/src/core/runtime_context/mod.rs b/execution_engine/src/core/runtime_context/mod.rs index 716af1b305..c2a0461f57 100644 --- a/execution_engine/src/core/runtime_context/mod.rs +++ b/execution_engine/src/core/runtime_context/mod.rs @@ -694,7 +694,7 @@ where } /// Validates whether keys used in the `value` are not forged. - fn validate_value(&self, value: &StoredValue) -> Result<(), Error> { + pub(crate) fn validate_value(&self, value: &StoredValue) -> Result<(), Error> { match value { StoredValue::CLValue(cl_value) => self.validate_cl_value(cl_value), StoredValue::Account(account) => { @@ -768,7 +768,7 @@ where } /// Validates if a [`Key`] refers to a [`URef`] and has a write bit set. - fn validate_writeable(&self, key: &Key) -> Result<(), Error> { + pub(crate) fn validate_writeable(&self, key: &Key) -> Result<(), Error> { if self.is_writeable(key) { Ok(()) } else { @@ -924,6 +924,17 @@ where Ok(()) } + /// Prune a key from the global state. + /// + /// Use with caution - there is no validation done as the key is assumed to be validated + /// already. + pub(crate) fn prune_gs_unsafe(&mut self, key: K) + where + K: Into, + { + self.tracking_copy.borrow_mut().prune(key.into()); + } + /// Writes data to a global state and charges for bytes stored. /// /// This method performs full validation of the key to be written. diff --git a/execution_engine/src/core/tracking_copy/mod.rs b/execution_engine/src/core/tracking_copy/mod.rs index 76342e85c3..6eb1379d93 100644 --- a/execution_engine/src/core/tracking_copy/mod.rs +++ b/execution_engine/src/core/tracking_copy/mod.rs @@ -350,6 +350,12 @@ impl> TrackingCopy { self.journal.push((normalized_key, Transform::Write(value))); } + /// Prunes a `key`. + pub(crate) fn prune(&mut self, key: Key) { + let normalized_key = key.normalize(); + self.journal.push((normalized_key, Transform::Prune)); + } + /// Ok(None) represents missing key to which we want to "add" some value. /// Ok(Some(unit)) represents successful operation. /// Err(error) is reserved for unexpected errors when accessing global @@ -414,11 +420,15 @@ impl> TrackingCopy { }; match transform.clone().apply(current_value) { - Ok(new_value) => { + Ok(Some(new_value)) => { self.cache.insert_write(normalized_key, new_value); self.journal.push((normalized_key, transform)); Ok(AddResult::Success) } + Ok(None) => { + self.journal.push((normalized_key, transform)); + Ok(AddResult::Success) + } Err(transform::Error::TypeMismatch(type_mismatch)) => { Ok(AddResult::TypeMismatch(type_mismatch)) } diff --git a/execution_engine/src/shared/host_function_costs.rs b/execution_engine/src/shared/host_function_costs.rs index 293ff1d007..1cb08f9ff2 100644 --- a/execution_engine/src/shared/host_function_costs.rs +++ b/execution_engine/src/shared/host_function_costs.rs @@ -200,12 +200,10 @@ pub struct HostFunctionCosts { /// Cost of calling the `read_value` host function. pub read_value: HostFunction<[Cost; 3]>, /// Cost of calling the `dictionary_get` host function. - #[serde(alias = "read_value_local")] pub dictionary_get: HostFunction<[Cost; 3]>, /// Cost of calling the `write` host function. pub write: HostFunction<[Cost; 4]>, /// Cost of calling the `dictionary_put` host function. - #[serde(alias = "write_local")] pub dictionary_put: HostFunction<[Cost; 4]>, /// Cost of calling the `add` host function. pub add: HostFunction<[Cost; 4]>, diff --git a/execution_engine/src/shared/system_config/mint_costs.rs b/execution_engine/src/shared/system_config/mint_costs.rs index 6e65e4d146..2cd461ca7d 100644 --- a/execution_engine/src/shared/system_config/mint_costs.rs +++ b/execution_engine/src/shared/system_config/mint_costs.rs @@ -8,6 +8,8 @@ use serde::{Deserialize, Serialize}; pub const DEFAULT_MINT_COST: u32 = 2_500_000_000; /// Default cost of the `reduce_total_supply` mint entry point. pub const DEFAULT_REDUCE_TOTAL_SUPPLY_COST: u32 = 10_000; +/// Default cost of the `burn` mint entry point. +pub const DEFAULT_BURN_COST: u32 = 10_000; /// Default cost of the `create` mint entry point. pub const DEFAULT_CREATE_COST: u32 = 2_500_000_000; /// Default cost of the `balance` mint entry point. @@ -27,6 +29,8 @@ pub struct MintCosts { pub mint: u32, /// Cost of calling the `reduce_total_supply` entry point. pub reduce_total_supply: u32, + /// Cost of calling the `burn` entry point. + pub burn: u32, /// Cost of calling the `create` entry point. pub create: u32, /// Cost of calling the `balance` entry point. @@ -44,6 +48,7 @@ impl Default for MintCosts { Self { mint: DEFAULT_MINT_COST, reduce_total_supply: DEFAULT_REDUCE_TOTAL_SUPPLY_COST, + burn: DEFAULT_BURN_COST, create: DEFAULT_CREATE_COST, balance: DEFAULT_BALANCE_COST, transfer: DEFAULT_TRANSFER_COST, @@ -60,6 +65,7 @@ impl ToBytes for MintCosts { let Self { mint, reduce_total_supply, + burn, create, balance, transfer, @@ -74,6 +80,7 @@ impl ToBytes for MintCosts { ret.append(&mut transfer.to_bytes()?); ret.append(&mut read_base_round_reward.to_bytes()?); ret.append(&mut mint_into_existing_purse.to_bytes()?); + ret.append(&mut burn.to_bytes()?); Ok(ret) } @@ -82,6 +89,7 @@ impl ToBytes for MintCosts { let Self { mint, reduce_total_supply, + burn, create, balance, transfer, @@ -91,6 +99,7 @@ impl ToBytes for MintCosts { mint.serialized_length() + reduce_total_supply.serialized_length() + + burn.serialized_length() + create.serialized_length() + balance.serialized_length() + transfer.serialized_length() @@ -108,11 +117,13 @@ impl FromBytes for MintCosts { let (transfer, rem) = FromBytes::from_bytes(rem)?; let (read_base_round_reward, rem) = FromBytes::from_bytes(rem)?; let (mint_into_existing_purse, rem) = FromBytes::from_bytes(rem)?; + let (burn, rem) = FromBytes::from_bytes(rem)?; Ok(( Self { mint, reduce_total_supply, + burn, create, balance, transfer, @@ -128,6 +139,7 @@ impl Distribution for Standard { fn sample(&self, rng: &mut R) -> MintCosts { MintCosts { mint: rng.gen(), + burn: rng.gen(), reduce_total_supply: rng.gen(), create: rng.gen(), balance: rng.gen(), @@ -149,6 +161,7 @@ pub mod gens { pub fn mint_costs_arb()( mint in num::u32::ANY, reduce_total_supply in num::u32::ANY, + burn in num::u32::ANY, create in num::u32::ANY, balance in num::u32::ANY, transfer in num::u32::ANY, @@ -158,6 +171,7 @@ pub mod gens { MintCosts { mint, reduce_total_supply, + burn, create, balance, transfer, diff --git a/execution_engine/src/shared/transform.rs b/execution_engine/src/shared/transform.rs index 1e141cdda8..e7ff9c8181 100644 --- a/execution_engine/src/shared/transform.rs +++ b/execution_engine/src/shared/transform.rs @@ -87,6 +87,8 @@ pub enum Transform { /// /// This transform assumes that the existing stored value is either an Account or a Contract. AddKeys(NamedKeys), + /// Prunes a key. + Prune, /// Represents the case where applying a transform would cause an error. #[data_size(skip)] Failure(Error), @@ -168,24 +170,26 @@ where impl Transform { /// Applies the transformation on a specified stored value instance. /// - /// This method produces a new [`StoredValue`] instance based on the [`Transform`] variant. - pub fn apply(self, stored_value: StoredValue) -> Result { + /// This method produces a new [`StoredValue`] instance based on the [`Transform`] variant. If a + /// given transform is a [`Transform::Prune`] then `None` is returned as the [`StoredValue`] is + /// consumed but no new value is produced. + pub fn apply(self, stored_value: StoredValue) -> Result, Error> { match self { - Transform::Identity => Ok(stored_value), - Transform::Write(new_value) => Ok(new_value), - Transform::AddInt32(to_add) => wrapping_addition(stored_value, to_add), - Transform::AddUInt64(to_add) => wrapping_addition(stored_value, to_add), - Transform::AddUInt128(to_add) => wrapping_addition(stored_value, to_add), - Transform::AddUInt256(to_add) => wrapping_addition(stored_value, to_add), - Transform::AddUInt512(to_add) => wrapping_addition(stored_value, to_add), + Transform::Identity => Ok(Some(stored_value)), + Transform::Write(new_value) => Ok(Some(new_value)), + Transform::AddInt32(to_add) => Ok(Some(wrapping_addition(stored_value, to_add)?)), + Transform::AddUInt64(to_add) => Ok(Some(wrapping_addition(stored_value, to_add)?)), + Transform::AddUInt128(to_add) => Ok(Some(wrapping_addition(stored_value, to_add)?)), + Transform::AddUInt256(to_add) => Ok(Some(wrapping_addition(stored_value, to_add)?)), + Transform::AddUInt512(to_add) => Ok(Some(wrapping_addition(stored_value, to_add)?)), Transform::AddKeys(mut keys) => match stored_value { StoredValue::Contract(mut contract) => { contract.named_keys_append(&mut keys); - Ok(StoredValue::Contract(contract)) + Ok(Some(StoredValue::Contract(contract))) } StoredValue::Account(mut account) => { account.named_keys_append(&mut keys); - Ok(StoredValue::Account(account)) + Ok(Some(StoredValue::Account(account))) } StoredValue::CLValue(cl_value) => { let expected = "Contract or Account".to_string(); @@ -233,6 +237,11 @@ impl Transform { Err(StoredValueTypeMismatch::new(expected, found).into()) } }, + Transform::Prune => { + // Prune does not produce new values, it just consumes a stored value that it + // receives. + Ok(None) + } Transform::Failure(error) => Err(error), } } @@ -276,11 +285,14 @@ impl Add for Transform { (a @ Transform::Failure(_), _) => a, (_, b @ Transform::Failure(_)) => b, (_, b @ Transform::Write(_)) => b, + (_, Transform::Prune) => Transform::Prune, + (Transform::Prune, b) => b, (Transform::Write(v), b) => { // second transform changes value being written match b.apply(v) { + Ok(Some(new_value)) => Transform::Write(new_value), + Ok(None) => Transform::Prune, Err(error) => Transform::Failure(error), - Ok(new_value) => Transform::Write(new_value), } } (Transform::AddInt32(i), b) => match b { @@ -384,6 +396,7 @@ impl From<&Transform> for casper_types::Transform { .collect(), ), Transform::Failure(error) => casper_types::Transform::Failure(error.to_string()), + Transform::Prune => casper_types::Transform::Prune, } } } @@ -414,6 +427,7 @@ pub mod gens { buf.copy_from_slice(&u); Transform::AddUInt512(buf.into()) }), + Just(Transform::Prune) ] } } @@ -429,7 +443,7 @@ mod tests { }; use super::*; - use std::collections::BTreeMap; + use std::{collections::BTreeMap, convert::TryInto}; const ZERO_ARRAY: [u8; 32] = [0; 32]; const ZERO_PUBLIC_KEY: AccountHash = AccountHash::new(ZERO_ARRAY); @@ -474,6 +488,16 @@ mod tests { const ONE_U512: U512 = U512([1, 0, 0, 0, 0, 0, 0, 0]); const MAX_U512: U512 = U512([MAX_U64; 8]); + fn add_transforms(value: u32) -> Vec { + vec![ + Transform::AddInt32(value.try_into().expect("positive value")), + Transform::AddUInt64(value.into()), + Transform::AddUInt128(value.into()), + Transform::AddUInt256(value.into()), + Transform::AddUInt512(value.into()), + ] + } + #[test] fn i32_overflow() { let max = std::i32::MAX; @@ -488,8 +512,18 @@ mod tests { let transform_overflow = Transform::AddInt32(max) + Transform::AddInt32(1); let transform_underflow = Transform::AddInt32(min) + Transform::AddInt32(-1); - assert_eq!(apply_overflow.expect("Unexpected overflow"), min_value); - assert_eq!(apply_underflow.expect("Unexpected underflow"), max_value); + assert_eq!( + apply_overflow + .expect("Unexpected overflow") + .expect("New value"), + min_value + ); + assert_eq!( + apply_underflow + .expect("Unexpected underflow") + .expect("New value"), + max_value + ); assert_eq!(transform_overflow, min.into()); assert_eq!(transform_underflow, max.into()); @@ -522,9 +556,9 @@ mod tests { let transform_overflow_uint = max_transform + one_transform; let transform_underflow = min_transform + Transform::AddInt32(-1); - assert_eq!(apply_overflow, Ok(zero_value.clone())); - assert_eq!(apply_overflow_uint, Ok(zero_value)); - assert_eq!(apply_underflow, Ok(max_value)); + assert_eq!(apply_overflow, Ok(Some(zero_value.clone()))); + assert_eq!(apply_overflow_uint, Ok(Some(zero_value))); + assert_eq!(apply_underflow, Ok(Some(max_value))); assert_eq!(transform_overflow, zero.into()); assert_eq!(transform_overflow_uint, zero.into()); @@ -863,4 +897,57 @@ mod tests { assert_eq!(ZERO_U512, add(MAX_U512, ONE_U512)); assert_eq!(MAX_U512 - 1, add(MAX_U512, MAX_U512)); } + + #[test] + fn delete_should_produce_correct_transform() { + { + // prune + write == write + let lhs = Transform::Prune; + let rhs = Transform::Write(StoredValue::CLValue(CLValue::unit())); + + let new_transform = lhs + rhs.clone(); + assert_eq!(new_transform, rhs); + } + + { + // prune + identity == prune (prune modifies the global state, identity does not + // modify, so we need to preserve prune) + let new_transform = Transform::Prune + Transform::Identity; + assert_eq!(new_transform, Transform::Prune); + } + + { + // prune + failure == failure + let failure = Transform::Failure(Error::Serialization(bytesrepr::Error::Formatting)); + let new_transform = Transform::Prune + failure.clone(); + assert_eq!(new_transform, failure); + } + + { + // write + prune == prune + let lhs = Transform::Write(StoredValue::CLValue(CLValue::unit())); + let rhs = Transform::Prune; + + let new_transform = lhs + rhs.clone(); + assert_eq!(new_transform, rhs); + } + + { + // add + prune == prune + for lhs in add_transforms(123) { + let rhs = Transform::Prune; + let new_transform = lhs + rhs.clone(); + assert_eq!(new_transform, rhs); + } + } + + { + // prune + add == add + for rhs in add_transforms(123) { + let lhs = Transform::Prune; + let new_transform = lhs + rhs.clone(); + assert_eq!(new_transform, rhs); + } + } + } } diff --git a/execution_engine/src/shared/wasm_prep.rs b/execution_engine/src/shared/wasm_prep.rs index 2ed98cd4b1..80a36ceb4c 100644 --- a/execution_engine/src/shared/wasm_prep.rs +++ b/execution_engine/src/shared/wasm_prep.rs @@ -8,6 +8,12 @@ use thiserror::Error; use super::wasm_config::WasmConfig; use crate::core::execution; +const ATOMIC_OPCODE_PREFIX: u8 = 0xfe; +const BULK_OPCODE_PREFIX: u8 = 0xfc; +const SIGN_EXT_OPCODE_START: u8 = 0xc0; +const SIGN_EXT_OPCODE_END: u8 = 0xc4; +const SIMD_OPCODE_PREFIX: u8 = 0xfd; + const DEFAULT_GAS_MODULE_NAME: &str = "env"; /// Name of the internal gas function injected by [`casper_wasm_utils::inject_gas_counter`]. const INTERNAL_GAS_FUNCTION_NAME: &str = "gas"; @@ -405,7 +411,40 @@ pub fn preprocess( /// Returns a parity Module from the given bytes without making modifications or checking limits. pub fn deserialize(module_bytes: &[u8]) -> Result { - casper_wasm::deserialize_buffer::(module_bytes).map_err(Into::into) + casper_wasm::deserialize_buffer::(module_bytes).map_err(|deserialize_error| { + match deserialize_error { + casper_wasm::SerializationError::UnknownOpcode(BULK_OPCODE_PREFIX) => { + PreprocessingError::Deserialize( + "Bulk memory operations are not supported".to_string(), + ) + } + casper_wasm::SerializationError::UnknownOpcode(SIMD_OPCODE_PREFIX) => { + PreprocessingError::Deserialize("SIMD operations are not supported".to_string()) + } + casper_wasm::SerializationError::UnknownOpcode(ATOMIC_OPCODE_PREFIX) => { + PreprocessingError::Deserialize("Atomic operations are not supported".to_string()) + } + casper_wasm::SerializationError::UnknownOpcode( + SIGN_EXT_OPCODE_START..=SIGN_EXT_OPCODE_END, + ) => PreprocessingError::Deserialize( + "Sign extension operations are not supported".to_string(), + ), + casper_wasm::SerializationError::Other( + "Enable the multi_value feature to deserialize more than one function result", + ) => { + // Due to the way parity-wasm crate works, it's always deserializes opcodes + // from multi_value proposal but if the feature is not enabled, then it will + // error with very specific message (as compared to other extensions). + // + // That's OK since we'd prefer to not inspect deserialized bytecode. We + // can simply replace the error message with a more user friendly one. + PreprocessingError::Deserialize( + "Multi value extension is not supported".to_string(), + ) + } + _ => deserialize_error.into(), + } + }) } /// Creates new wasm module from entry points. @@ -443,7 +482,10 @@ mod tests { builder, elements::{CodeSection, Instructions}, }; - use walrus::{FunctionBuilder, ModuleConfig, ValType}; + use walrus::{ + ir::{Instr, UnaryOp, Unop}, + FunctionBuilder, ModuleConfig, ValType, + }; use super::*; @@ -645,8 +687,316 @@ mod tests { .expect_err("should fail with an error"); assert!( matches!(&error, PreprocessingError::Deserialize(msg) - // TODO: GH-3762 will improve the error message for unsupported wasm proposals. - if msg == "Enable the multi_value feature to deserialize more than one function result"), + if msg == "Multi value extension is not supported"), + "{:?}", + error, + ); + } + + #[test] + fn should_not_accept_atomics_proposal_wasm() { + let module_bytes = { + let mut module = walrus::Module::with_config(ModuleConfig::new()); + + let _memory_id = module.memories.add_local(false, 11, None); + + let mut func_with_atomics = FunctionBuilder::new(&mut module.types, &[], &[]); + + func_with_atomics.func_body().atomic_fence(); + + let func_with_atomics = func_with_atomics.finish(vec![], &mut module.funcs); + + let mut call_func = FunctionBuilder::new(&mut module.types, &[], &[]); + + call_func.func_body().call(func_with_atomics); + + let call = call_func.finish(Vec::new(), &mut module.funcs); + + module.exports.add(DEFAULT_ENTRY_POINT_NAME, call); + + module.emit_wasm() + }; + let error = preprocess(WasmConfig::default(), &module_bytes) + .expect_err("should fail with an error"); + assert!( + matches!(&error, PreprocessingError::Deserialize(msg) + if msg == "Atomic operations are not supported"), + "{:?}", + error, + ); + } + + #[test] + fn should_not_accept_bulk_proposal_wasm() { + let module_bytes = { + let mut module = walrus::Module::with_config(ModuleConfig::new()); + + let memory_id = module.memories.add_local(false, 11, None); + + let mut func_with_bulk = FunctionBuilder::new(&mut module.types, &[], &[]); + + func_with_bulk.func_body().memory_copy(memory_id, memory_id); + + let func_with_bulk = func_with_bulk.finish(vec![], &mut module.funcs); + + let mut call_func = FunctionBuilder::new(&mut module.types, &[], &[]); + + call_func.func_body().call(func_with_bulk); + + let call = call_func.finish(Vec::new(), &mut module.funcs); + + module.exports.add(DEFAULT_ENTRY_POINT_NAME, call); + + module.emit_wasm() + }; + let error = preprocess(WasmConfig::default(), &module_bytes) + .expect_err("should fail with an error"); + assert!( + matches!(&error, PreprocessingError::Deserialize(msg) + if msg == "Bulk memory operations are not supported"), + "{:?}", + error, + ); + } + + #[test] + fn should_not_accept_simd_proposal_wasm() { + let module_bytes = { + let mut module = walrus::Module::with_config(ModuleConfig::new()); + + let _memory_id = module.memories.add_local(false, 11, None); + + let mut func_with_simd = FunctionBuilder::new(&mut module.types, &[], &[]); + + func_with_simd.func_body().v128_bitselect(); + + let func_with_simd = func_with_simd.finish(vec![], &mut module.funcs); + + let mut call_func = FunctionBuilder::new(&mut module.types, &[], &[]); + + call_func.func_body().call(func_with_simd); + + let call = call_func.finish(Vec::new(), &mut module.funcs); + + module.exports.add(DEFAULT_ENTRY_POINT_NAME, call); + + module.emit_wasm() + }; + let error = preprocess(WasmConfig::default(), &module_bytes) + .expect_err("should fail with an error"); + assert!( + matches!(&error, PreprocessingError::Deserialize(msg) + if msg == "SIMD operations are not supported"), + "{:?}", + error, + ); + } + + #[test] + fn should_not_accept_sign_ext_i32_e8s_proposal_wasm() { + let module_bytes = { + let mut module = walrus::Module::with_config(ModuleConfig::new()); + + let _memory_id = module.memories.add_local(false, 11, None); + + let mut func_with_sign_ext = FunctionBuilder::new(&mut module.types, &[], &[]); + + func_with_sign_ext.func_body().i32_const(0); + + { + let mut body = func_with_sign_ext.func_body(); + let instructions = body.instrs_mut(); + let (instr, _) = instructions.get_mut(0).unwrap(); + *instr = Instr::Unop(Unop { + op: UnaryOp::I32Extend8S, + }); + } + + let func_with_sign_ext = func_with_sign_ext.finish(vec![], &mut module.funcs); + + let mut call_func = FunctionBuilder::new(&mut module.types, &[], &[]); + + call_func.func_body().call(func_with_sign_ext); + + let call = call_func.finish(Vec::new(), &mut module.funcs); + + module.exports.add(DEFAULT_ENTRY_POINT_NAME, call); + + module.emit_wasm() + }; + let error = preprocess(WasmConfig::default(), &module_bytes) + .expect_err("should fail with an error"); + assert!( + matches!(&error, PreprocessingError::Deserialize(msg) + if msg == "Sign extension operations are not supported"), + "{:?}", + error, + ); + } + + #[test] + fn should_not_accept_sign_ext_i32_e16s_proposal_wasm() { + let module_bytes = { + let mut module = walrus::Module::with_config(ModuleConfig::new()); + + let _memory_id = module.memories.add_local(false, 11, None); + + let mut func_with_sign_ext = FunctionBuilder::new(&mut module.types, &[], &[]); + + func_with_sign_ext.func_body().i32_const(0); + + { + let mut body = func_with_sign_ext.func_body(); + let instructions = body.instrs_mut(); + let (instr, _) = instructions.get_mut(0).unwrap(); + *instr = Instr::Unop(Unop { + op: UnaryOp::I32Extend16S, + }); + } + + let func_with_sign_ext = func_with_sign_ext.finish(vec![], &mut module.funcs); + + let mut call_func = FunctionBuilder::new(&mut module.types, &[], &[]); + + call_func.func_body().call(func_with_sign_ext); + + let call = call_func.finish(Vec::new(), &mut module.funcs); + + module.exports.add(DEFAULT_ENTRY_POINT_NAME, call); + + module.emit_wasm() + }; + let error = preprocess(WasmConfig::default(), &module_bytes) + .expect_err("should fail with an error"); + assert!( + matches!(&error, PreprocessingError::Deserialize(msg) + if msg == "Sign extension operations are not supported"), + "{:?}", + error, + ); + } + + #[test] + fn should_not_accept_sign_ext_i64_e8s_proposal_wasm() { + let module_bytes = { + let mut module = walrus::Module::with_config(ModuleConfig::new()); + + let _memory_id = module.memories.add_local(false, 11, None); + + let mut func_with_sign_ext = FunctionBuilder::new(&mut module.types, &[], &[]); + + func_with_sign_ext.func_body().i32_const(0); + + { + let mut body = func_with_sign_ext.func_body(); + let instructions = body.instrs_mut(); + let (instr, _) = instructions.get_mut(0).unwrap(); + *instr = Instr::Unop(Unop { + op: UnaryOp::I64Extend8S, + }); + } + + let func_with_sign_ext = func_with_sign_ext.finish(vec![], &mut module.funcs); + + let mut call_func = FunctionBuilder::new(&mut module.types, &[], &[]); + + call_func.func_body().call(func_with_sign_ext); + + let call = call_func.finish(Vec::new(), &mut module.funcs); + + module.exports.add(DEFAULT_ENTRY_POINT_NAME, call); + + module.emit_wasm() + }; + let error = preprocess(WasmConfig::default(), &module_bytes) + .expect_err("should fail with an error"); + assert!( + matches!(&error, PreprocessingError::Deserialize(msg) + if msg == "Sign extension operations are not supported"), + "{:?}", + error, + ); + } + + #[test] + fn should_not_accept_sign_ext_i64_e16s_proposal_wasm() { + let module_bytes = { + let mut module = walrus::Module::with_config(ModuleConfig::new()); + + let _memory_id = module.memories.add_local(false, 11, None); + + let mut func_with_sign_ext = FunctionBuilder::new(&mut module.types, &[], &[]); + + func_with_sign_ext.func_body().i32_const(0); + + { + let mut body = func_with_sign_ext.func_body(); + let instructions = body.instrs_mut(); + let (instr, _) = instructions.get_mut(0).unwrap(); + *instr = Instr::Unop(Unop { + op: UnaryOp::I64Extend16S, + }); + } + + let func_with_sign_ext = func_with_sign_ext.finish(vec![], &mut module.funcs); + + let mut call_func = FunctionBuilder::new(&mut module.types, &[], &[]); + + call_func.func_body().call(func_with_sign_ext); + + let call = call_func.finish(Vec::new(), &mut module.funcs); + + module.exports.add(DEFAULT_ENTRY_POINT_NAME, call); + + module.emit_wasm() + }; + let error = preprocess(WasmConfig::default(), &module_bytes) + .expect_err("should fail with an error"); + assert!( + matches!(&error, PreprocessingError::Deserialize(msg) + if msg == "Sign extension operations are not supported"), + "{:?}", + error, + ); + } + + #[test] + fn should_not_accept_sign_ext_i64_e32s_proposal_wasm() { + let module_bytes = { + let mut module = walrus::Module::with_config(ModuleConfig::new()); + + let _memory_id = module.memories.add_local(false, 11, None); + + let mut func_with_sign_ext = FunctionBuilder::new(&mut module.types, &[], &[]); + + func_with_sign_ext.func_body().i32_const(0); + + { + let mut body = func_with_sign_ext.func_body(); + let instructions = body.instrs_mut(); + let (instr, _) = instructions.get_mut(0).unwrap(); + *instr = Instr::Unop(Unop { + op: UnaryOp::I64Extend32S, + }); + } + + let func_with_sign_ext = func_with_sign_ext.finish(vec![], &mut module.funcs); + + let mut call_func = FunctionBuilder::new(&mut module.types, &[], &[]); + + call_func.func_body().call(func_with_sign_ext); + + let call = call_func.finish(Vec::new(), &mut module.funcs); + + module.exports.add(DEFAULT_ENTRY_POINT_NAME, call); + + module.emit_wasm() + }; + let error = preprocess(WasmConfig::default(), &module_bytes) + .expect_err("should fail with an error"); + assert!( + matches!(&error, PreprocessingError::Deserialize(msg) + if msg == "Sign extension operations are not supported"), "{:?}", error, ); diff --git a/execution_engine/src/storage/global_state/in_memory.rs b/execution_engine/src/storage/global_state/in_memory.rs index a132e74457..1f31f95e17 100644 --- a/execution_engine/src/storage/global_state/in_memory.rs +++ b/execution_engine/src/storage/global_state/in_memory.rs @@ -284,7 +284,7 @@ impl StateProvider for InMemoryGlobalState { Ok(missing_descendants) } - fn delete_keys( + fn prune_keys( &self, correlation_id: CorrelationId, mut root: Digest, diff --git a/execution_engine/src/storage/global_state/lmdb.rs b/execution_engine/src/storage/global_state/lmdb.rs index dab903d229..a27a85e7bd 100644 --- a/execution_engine/src/storage/global_state/lmdb.rs +++ b/execution_engine/src/storage/global_state/lmdb.rs @@ -92,7 +92,7 @@ impl LmdbGlobalState { &self, correlation_id: CorrelationId, prestate_hash: Digest, - stored_values: HashMap, + stored_values: HashMap>, ) -> Result { let scratch_trie = self.get_scratch_store(); let new_state_root = put_stored_values::<_, _, error::Error>( @@ -293,8 +293,8 @@ impl StateProvider for LmdbGlobalState { Ok(missing_hashes) } - /// Delete keys. - fn delete_keys( + /// Prune keys. + fn prune_keys( &self, correlation_id: CorrelationId, mut state_root_hash: Digest, @@ -329,6 +329,8 @@ impl StateProvider for LmdbGlobalState { #[cfg(test)] mod tests { + use std::{collections::BTreeSet, iter::FromIterator}; + use lmdb::DatabaseFlags; use tempfile::tempdir; @@ -360,24 +362,32 @@ mod tests { ] } + const KEY_ACCOUNT_1: Key = Key::Account(AccountHash::new([1u8; 32])); + const KEY_ACCOUNT_2: Key = Key::Account(AccountHash::new([2u8; 32])); + const KEY_ACCOUNT_3: Key = Key::Account(AccountHash::new([3u8; 32])); + fn create_test_pairs_updated() -> [TestPair; 3] { [ TestPair { - key: Key::Account(AccountHash::new([1u8; 32])), + key: KEY_ACCOUNT_1, value: StoredValue::CLValue(CLValue::from_t("one".to_string()).unwrap()), }, TestPair { - key: Key::Account(AccountHash::new([2u8; 32])), + key: KEY_ACCOUNT_2, value: StoredValue::CLValue(CLValue::from_t("two".to_string()).unwrap()), }, TestPair { - key: Key::Account(AccountHash::new([3u8; 32])), + key: KEY_ACCOUNT_3, value: StoredValue::CLValue(CLValue::from_t(3_i32).unwrap()), }, ] } - fn create_test_state(pairs_creator: fn() -> [TestPair; 2]) -> (LmdbGlobalState, Digest) { + fn create_test_state(pairs_creator: F) -> (LmdbGlobalState, Digest) + where + T: AsRef<[TestPair]>, + F: FnOnce() -> T, + { let correlation_id = CorrelationId::new(); let temp_dir = tempdir().unwrap(); let environment = Arc::new( @@ -397,7 +407,7 @@ mod tests { { let mut txn = ret.environment.create_read_write_txn().unwrap(); - for TestPair { key, value } in &(pairs_creator)() { + for TestPair { key, value } in pairs_creator().as_ref() { match write::<_, _, _, LmdbTrieStore, error::Error>( correlation_id, &mut txn, @@ -466,6 +476,67 @@ mod tests { } } + #[test] + fn commit_updates_state_with_delete() { + let correlation_id = CorrelationId::new(); + let test_pairs_updated = create_test_pairs_updated(); + + let (state, root_hash) = create_test_state(create_test_pairs_updated); + + let effects: AdditiveMap = { + let mut tmp = AdditiveMap::new(); + + let head = test_pairs_updated[..test_pairs_updated.len() - 1].to_vec(); + let tail = test_pairs_updated[test_pairs_updated.len() - 1..].to_vec(); + assert_eq!(head.len() + tail.len(), test_pairs_updated.len()); + + for TestPair { key, value } in &head { + tmp.insert(*key, Transform::Write(value.to_owned())); + } + for TestPair { key, .. } in &tail { + tmp.insert(*key, Transform::Prune); + } + + tmp + }; + + let updated_hash = state.commit(correlation_id, root_hash, effects).unwrap(); + + assert_ne!( + root_hash, updated_hash, + "Post state root hash is expected to be different than pre state root hash" + ); + + let updated_checkout = state.checkout(updated_hash).unwrap().unwrap(); + + let all_keys = updated_checkout + .keys_with_prefix(correlation_id, &[]) + .unwrap(); + assert_eq!( + BTreeSet::from_iter(all_keys), + BTreeSet::from_iter(vec![KEY_ACCOUNT_1, KEY_ACCOUNT_2,]) + ); + + let account_1 = updated_checkout + .read(correlation_id, &KEY_ACCOUNT_1) + .unwrap(); + assert_eq!(account_1, Some(test_pairs_updated[0].clone().value)); + + let account_2 = updated_checkout + .read(correlation_id, &KEY_ACCOUNT_2) + .unwrap(); + assert_eq!(account_2, Some(test_pairs_updated[1].clone().value)); + + let account_3 = updated_checkout + .read(correlation_id, &KEY_ACCOUNT_3) + .unwrap(); + assert_eq!( + account_3, None, + "Account {:?} should be deleted", + KEY_ACCOUNT_3 + ); + } + #[test] fn commit_updates_state_and_original_state_stays_intact() { let correlation_id = CorrelationId::new(); diff --git a/execution_engine/src/storage/global_state/mod.rs b/execution_engine/src/storage/global_state/mod.rs index 46c501c763..39897691a4 100644 --- a/execution_engine/src/storage/global_state/mod.rs +++ b/execution_engine/src/storage/global_state/mod.rs @@ -32,7 +32,7 @@ use crate::{ }, }; -use super::trie_store::operations::DeleteResult; +use super::trie_store::operations::{delete, DeleteResult}; /// A trait expressing the reading of state. This trait is used to abstract the underlying store. pub trait StateReader { @@ -123,8 +123,8 @@ pub trait StateProvider { trie_raw: &[u8], ) -> Result, Self::Error>; - /// Delete key from the global state. - fn delete_keys( + /// Prune keys from the global state. + fn prune_keys( &self, correlation_id: CorrelationId, root: Digest, @@ -138,7 +138,7 @@ pub fn put_stored_values<'a, R, S, E>( store: &S, correlation_id: CorrelationId, prestate_hash: Digest, - stored_values: HashMap, + stored_values: HashMap>, ) -> Result where R: TransactionSource<'a, Handle = S::Handle>, @@ -152,17 +152,43 @@ where if maybe_root.is_none() { return Err(CommitError::RootNotFound(prestate_hash).into()); }; - for (key, value) in stored_values.iter() { - let write_result = - write::<_, _, _, _, E>(correlation_id, &mut txn, store, &state_root, key, value)?; - match write_result { - WriteResult::Written(root_hash) => { - state_root = root_hash; + for (key, maybe_value) in stored_values.iter() { + match maybe_value { + Some(value) => { + let write_result = write::<_, _, _, _, E>( + correlation_id, + &mut txn, + store, + &state_root, + key, + value, + )?; + match write_result { + WriteResult::Written(root_hash) => { + state_root = root_hash; + } + WriteResult::AlreadyExists => (), + WriteResult::RootNotFound => { + error!(?state_root, ?key, ?value, "Error writing new value"); + return Err(CommitError::WriteRootNotFound(state_root).into()); + } + } } - WriteResult::AlreadyExists => (), - WriteResult::RootNotFound => { - error!(?state_root, ?key, ?value, "Error writing new value"); - return Err(CommitError::WriteRootNotFound(state_root).into()); + None => { + let delete_result = + delete::<_, _, _, _, E>(correlation_id, &mut txn, store, &state_root, key)?; + match delete_result { + DeleteResult::Deleted(root_hash) => { + state_root = root_hash; + } + DeleteResult::DoesNotExist => { + return Err(CommitError::KeyNotFound(*key).into()); + } + DeleteResult::RootNotFound => { + error!(?state_root, ?key, "Error deleting value"); + return Err(CommitError::WriteRootNotFound(state_root).into()); + } + } } } } @@ -198,7 +224,7 @@ where let read_result = read::<_, _, _, _, E>(correlation_id, &txn, store, &state_root, &key)?; let value = match (read_result, transform) { - (ReadResult::NotFound, Transform::Write(new_value)) => new_value, + (ReadResult::NotFound, Transform::Write(new_value)) => Some(new_value), (ReadResult::NotFound, transform) => { error!( ?state_root, @@ -231,17 +257,40 @@ where } }; - let write_result = - write::<_, _, _, _, E>(correlation_id, &mut txn, store, &state_root, &key, &value)?; - - match write_result { - WriteResult::Written(root_hash) => { - state_root = root_hash; + match value { + Some(value) => { + let write_result = write::<_, _, _, _, E>( + correlation_id, + &mut txn, + store, + &state_root, + &key, + &value, + )?; + + match write_result { + WriteResult::Written(root_hash) => { + state_root = root_hash; + } + WriteResult::AlreadyExists => (), + WriteResult::RootNotFound => { + error!(?state_root, ?key, ?value, "Error writing new value"); + return Err(CommitError::WriteRootNotFound(state_root).into()); + } + } } - WriteResult::AlreadyExists => (), - WriteResult::RootNotFound => { - error!(?state_root, ?key, ?value, "Error writing new value"); - return Err(CommitError::WriteRootNotFound(state_root).into()); + None => { + match delete::<_, _, _, _, E>(correlation_id, &mut txn, store, &state_root, &key)? { + DeleteResult::Deleted(root_hash) => { + state_root = root_hash; + } + DeleteResult::DoesNotExist => { + return Err(CommitError::KeyNotFound(key).into()); + } + DeleteResult::RootNotFound => { + return Err(CommitError::RootNotFound(state_root).into()); + } + } } } } diff --git a/execution_engine/src/storage/global_state/scratch.rs b/execution_engine/src/storage/global_state/scratch.rs index 8b1a1442ad..757bce073e 100644 --- a/execution_engine/src/storage/global_state/scratch.rs +++ b/execution_engine/src/storage/global_state/scratch.rs @@ -31,7 +31,7 @@ use crate::{ type SharedCache = Arc>; struct Cache { - cached_values: HashMap, + cached_values: HashMap)>, } impl Cache { @@ -41,21 +41,24 @@ impl Cache { } } - fn insert_write(&mut self, key: Key, value: StoredValue) { + fn insert_write(&mut self, key: Key, value: Option) { self.cached_values.insert(key, (true, value)); } fn insert_read(&mut self, key: Key, value: StoredValue) { - self.cached_values.entry(key).or_insert((false, value)); + self.cached_values + .entry(key) + .or_insert((false, Some(value))); } fn get(&self, key: &Key) -> Option<&StoredValue> { - self.cached_values.get(key).map(|(_dirty, value)| value) + let maybe_value = self.cached_values.get(key).map(|(_dirty, value)| value)?; + maybe_value.as_ref() } /// Consumes self and returns only written values as values that were only read must be filtered /// out to prevent unnecessary writes. - fn into_dirty_writes(self) -> HashMap { + fn into_dirty_writes(self) -> HashMap> { self.cached_values .into_iter() .filter_map(|(key, (dirty, value))| if dirty { Some((key, value)) } else { None }) @@ -104,7 +107,7 @@ impl ScratchGlobalState { } /// Consume self and return inner cache. - pub fn into_inner(self) -> HashMap { + pub fn into_inner(self) -> HashMap> { let cache = mem::replace(&mut *self.cache.write().unwrap(), Cache::new()); cache.into_dirty_writes() } @@ -204,7 +207,7 @@ impl CommitProvider for ScratchGlobalState { for (key, transform) in effects.into_iter() { let cached_value = self.cache.read().unwrap().get(&key).cloned(); let value = match (cached_value, transform) { - (None, Transform::Write(new_value)) => new_value, + (None, Transform::Write(new_value)) => Some(new_value), (None, transform) => { // It might be the case that for `Add*` operations we don't have the previous // value in cache yet. @@ -328,7 +331,7 @@ impl StateProvider for ScratchGlobalState { Ok(missing_descendants) } - fn delete_keys( + fn prune_keys( &self, correlation_id: CorrelationId, mut state_root_hash: Digest, @@ -376,14 +379,18 @@ mod tests { value: StoredValue, } + const KEY_ACCOUNT_1: Key = Key::Account(AccountHash::new([1u8; 32])); + const KEY_ACCOUNT_2: Key = Key::Account(AccountHash::new([2u8; 32])); + const KEY_ACCOUNT_3: Key = Key::Account(AccountHash::new([3u8; 32])); + fn create_test_pairs() -> [TestPair; 2] { [ TestPair { - key: Key::Account(AccountHash::new([1_u8; 32])), + key: KEY_ACCOUNT_1, value: StoredValue::CLValue(CLValue::from_t(1_i32).unwrap()), }, TestPair { - key: Key::Account(AccountHash::new([2_u8; 32])), + key: KEY_ACCOUNT_2, value: StoredValue::CLValue(CLValue::from_t(2_i32).unwrap()), }, ] @@ -392,15 +399,15 @@ mod tests { fn create_test_pairs_updated() -> [TestPair; 3] { [ TestPair { - key: Key::Account(AccountHash::new([1u8; 32])), + key: KEY_ACCOUNT_1, value: StoredValue::CLValue(CLValue::from_t("one".to_string()).unwrap()), }, TestPair { - key: Key::Account(AccountHash::new([2u8; 32])), + key: KEY_ACCOUNT_2, value: StoredValue::CLValue(CLValue::from_t("two".to_string()).unwrap()), }, TestPair { - key: Key::Account(AccountHash::new([3u8; 32])), + key: KEY_ACCOUNT_3, value: StoredValue::CLValue(CLValue::from_t(3_i32).unwrap()), }, ] @@ -428,7 +435,11 @@ mod tests { root_hash: Digest, } - fn create_test_state() -> TestState { + fn create_test_state(pairs_creator: F) -> TestState + where + T: AsRef<[TestPair]>, + F: FnOnce() -> T, + { let correlation_id = CorrelationId::new(); let temp_dir = tempdir().unwrap(); let environment = Arc::new( @@ -448,7 +459,7 @@ mod tests { { let mut txn = state.environment.create_read_write_txn().unwrap(); - for TestPair { key, value } in &create_test_pairs() { + for TestPair { key, value } in pairs_creator().as_ref() { match write::<_, _, _, LmdbTrieStore, error::Error>( correlation_id, &mut txn, @@ -482,7 +493,7 @@ mod tests { let correlation_id = CorrelationId::new(); let test_pairs_updated = create_test_pairs_updated(); - let TestState { state, root_hash } = create_test_state(); + let TestState { state, root_hash } = create_test_state(create_test_pairs); let scratch = state.create_scratch(); @@ -515,13 +526,10 @@ mod tests { for key in all_keys { assert!(stored_values.get(&key).is_some()); - assert_eq!( - stored_values.get(&key), - updated_checkout - .read(correlation_id, &key) - .unwrap() - .as_ref() - ); + let lhs = stored_values.get(&key); + let stored_value = updated_checkout.read(correlation_id, &key).unwrap(); + let rhs = Some(&stored_value); + assert_eq!(lhs, rhs,); } for TestPair { key, value } in test_pairs_updated.iter().cloned() { @@ -532,17 +540,94 @@ mod tests { } } + #[test] + fn commit_updates_state_with_delete() { + let correlation_id = CorrelationId::new(); + let test_pairs_updated = create_test_pairs_updated(); + + let TestState { state, root_hash } = create_test_state(create_test_pairs_updated); + + let scratch = state.create_scratch(); + + let effects: AdditiveMap = { + let mut tmp = AdditiveMap::new(); + + let head = test_pairs_updated[..test_pairs_updated.len() - 1].to_vec(); + let tail = test_pairs_updated[test_pairs_updated.len() - 1..].to_vec(); + assert_eq!(head.len() + tail.len(), test_pairs_updated.len()); + + for TestPair { key, value } in &head { + tmp.insert(*key, Transform::Write(value.to_owned())); + } + for TestPair { key, .. } in &tail { + tmp.insert(*key, Transform::Prune); + } + + tmp + }; + + let scratch_root_hash = scratch + .commit(correlation_id, root_hash, effects.clone()) + .unwrap(); + + assert_eq!( + scratch_root_hash, root_hash, + "ScratchGlobalState should not modify the state root, as it does no hashing" + ); + + let lmdb_hash = state.commit(correlation_id, root_hash, effects).unwrap(); + let updated_checkout = state.checkout(lmdb_hash).unwrap().unwrap(); + + let all_keys = updated_checkout + .keys_with_prefix(correlation_id, &[]) + .unwrap(); + + let stored_values = scratch.into_inner(); + assert_eq!( + all_keys.len(), + stored_values.len() - 1, + "Should delete one key from the global state" + ); + + for key in all_keys { + assert!(stored_values.get(&key).is_some()); + let lhs = stored_values.get(&key).cloned(); + let rhs = updated_checkout.read(correlation_id, &key).unwrap(); + + assert_eq!(lhs, Some(rhs)); + } + + let account_1 = updated_checkout + .read(correlation_id, &KEY_ACCOUNT_1) + .unwrap(); + assert_eq!(account_1, Some(test_pairs_updated[0].clone().value)); + + let account_2 = updated_checkout + .read(correlation_id, &KEY_ACCOUNT_2) + .unwrap(); + assert_eq!(account_2, Some(test_pairs_updated[1].clone().value)); + + let account_3 = updated_checkout + .read(correlation_id, &KEY_ACCOUNT_3) + .unwrap(); + assert_eq!( + account_3, None, + "Account {:?} should be deleted", + KEY_ACCOUNT_3 + ); + } + #[test] fn commit_updates_state_with_add() { let correlation_id = CorrelationId::new(); let test_pairs_updated = create_test_pairs_updated(); // create two lmdb instances, with a scratch instance on the first - let TestState { state, root_hash } = create_test_state(); + let TestState { state, root_hash } = create_test_state(create_test_pairs); let TestState { state: state2, root_hash: state_2_root_hash, - } = create_test_state(); + } = create_test_state(create_test_pairs); let scratch = state.create_scratch(); @@ -599,7 +684,7 @@ mod tests { let TestState { state, root_hash, .. - } = create_test_state(); + } = create_test_state(create_test_pairs); let scratch = state.create_scratch(); diff --git a/execution_engine/src/storage/store/mod.rs b/execution_engine/src/storage/store/mod.rs index 19ea5f8953..2db3851ba0 100644 --- a/execution_engine/src/storage/store/mod.rs +++ b/execution_engine/src/storage/store/mod.rs @@ -21,6 +21,24 @@ pub trait Store { /// `handle` returns the underlying store. fn handle(&self) -> Self::Handle; + /// Deserialize a value. + #[inline] + fn deserialize_value(&self, bytes: &[u8]) -> Result + where + V: FromBytes, + { + bytesrepr::deserialize_from_slice(bytes) + } + + /// Serialize a value. + #[inline] + fn serialize_value(&self, value: &V) -> Result, bytesrepr::Error> + where + V: ToBytes, + { + value.to_bytes() + } + /// Returns an optional value (may exist or not) as read through a transaction, or an error /// of the associated `Self::Error` variety. fn get(&self, txn: &T, key: &K) -> Result, Self::Error> @@ -33,7 +51,7 @@ pub trait Store { let raw = self.get_raw(txn, key)?; match raw { Some(bytes) => { - let value = bytesrepr::deserialize_from_slice(bytes)?; + let value = self.deserialize_value(&bytes)?; Ok(Some(value)) } None => Ok(None), @@ -61,7 +79,8 @@ pub trait Store { V: ToBytes, Self::Error: From, { - self.put_raw(txn, key, Cow::from(value.to_bytes()?)) + let serialized_value = self.serialize_value(value)?; + self.put_raw(txn, key, Cow::from(serialized_value)) } /// Puts a raw `value` into the store at `key` within a transaction, potentially returning an diff --git a/execution_engine/src/storage/trie/gens.rs b/execution_engine/src/storage/trie/gens.rs index 53485c3b25..955324ea22 100644 --- a/execution_engine/src/storage/trie/gens.rs +++ b/execution_engine/src/storage/trie/gens.rs @@ -32,10 +32,8 @@ pub fn trie_leaf_arb() -> impl Strategy> { } pub fn trie_extension_arb() -> impl Strategy> { - (vec(any::(), 0..32), trie_pointer_arb()).prop_map(|(affix, pointer)| Trie::Extension { - affix: affix.into(), - pointer, - }) + (vec(any::(), 0..32), trie_pointer_arb()) + .prop_map(|(affix, pointer)| Trie::extension(affix, pointer)) } pub fn trie_node_arb() -> impl Strategy> { diff --git a/execution_engine/src/storage/trie/mod.rs b/execution_engine/src/storage/trie/mod.rs index 7cc67aba5a..a091d51844 100644 --- a/execution_engine/src/storage/trie/mod.rs +++ b/execution_engine/src/storage/trie/mod.rs @@ -1,7 +1,7 @@ //! Core types for a Merkle Trie use std::{ - convert::TryInto, + convert::{TryFrom, TryInto}, fmt::{self, Debug, Display, Formatter}, iter::Flatten, mem::MaybeUninit, @@ -9,7 +9,6 @@ use std::{ }; use datasize::DataSize; -use either::Either; use num_derive::{FromPrimitive, ToPrimitive}; use num_traits::{FromPrimitive, ToPrimitive}; use serde::{ @@ -511,40 +510,112 @@ impl Trie { } } -pub(crate) type LazyTrieLeaf = Either>; +/// Bytes representation of a `Trie` that is a `Trie::Leaf` variant. +/// The bytes for this trie leaf also include the `Trie::Tag`. +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct TrieLeafBytes(Bytes); -pub(crate) fn lazy_trie_tag(bytes: &[u8]) -> Option { - bytes.first().copied().and_then(TrieTag::from_u8) +impl TrieLeafBytes { + pub(crate) fn bytes(&self) -> &Bytes { + &self.0 + } + + pub(crate) fn try_deserialize_leaf_key( + &self, + ) -> Result<(K, &[u8]), bytesrepr::Error> { + let (tag_byte, rem) = u8::from_bytes(&self.0)?; + let tag = TrieTag::from_u8(tag_byte).ok_or(bytesrepr::Error::Formatting)?; + assert_eq!( + tag, + TrieTag::Leaf, + "Unexpected layout for trie leaf bytes. Expected `TrieTag::Leaf` but got {:?}", + tag + ); + K::from_bytes(rem) + } } -pub(crate) fn lazy_trie_deserialize( - bytes: Bytes, -) -> Result, bytesrepr::Error> -where - K: FromBytes, - V: FromBytes, -{ - let trie_tag = lazy_trie_tag(&bytes); +impl From<&[u8]> for TrieLeafBytes { + fn from(value: &[u8]) -> Self { + Self(value.into()) + } +} - if trie_tag == Some(TrieTag::Leaf) { - Ok(Either::Left(bytes)) - } else { - let deserialized: Trie = bytesrepr::deserialize(bytes.into())?; - Ok(Either::Right(deserialized)) +impl From> for TrieLeafBytes { + fn from(value: Vec) -> Self { + Self(value.into()) } } -pub(crate) fn lazy_trie_iter_children( - trie_bytes: &LazyTrieLeaf, -) -> DescendantsIterator { - match trie_bytes { - Either::Left(_) => { - // Leaf bytes does not have any children - DescendantsIterator::ZeroOrOne(None) +/// Like `Trie` but does not deserialize the leaf when constructed. +#[derive(Debug, Clone, PartialEq)] +pub(crate) enum LazilyDeserializedTrie { + /// Serialized trie leaf bytes + Leaf(TrieLeafBytes), + /// Trie node. + Node { pointer_block: Box }, + /// Trie extension node. + Extension { affix: Bytes, pointer: Pointer }, +} + +impl LazilyDeserializedTrie { + pub(crate) fn iter_children(&self) -> DescendantsIterator { + match self { + LazilyDeserializedTrie::Leaf(_) => { + // Leaf bytes does not have any children + DescendantsIterator::ZeroOrOne(None) + } + LazilyDeserializedTrie::Node { pointer_block } => DescendantsIterator::PointerBlock { + iter: pointer_block.0.iter().flatten(), + }, + LazilyDeserializedTrie::Extension { pointer, .. } => { + DescendantsIterator::ZeroOrOne(Some(pointer.into_hash())) + } } - Either::Right(trie) => { - // Trie::Node or Trie::Extension has children - trie.iter_children() + } +} + +impl FromBytes for LazilyDeserializedTrie { + fn from_bytes(bytes: &[u8]) -> Result<(Self, &[u8]), bytesrepr::Error> { + let (tag_byte, rem) = u8::from_bytes(bytes)?; + let tag = TrieTag::from_u8(tag_byte).ok_or(bytesrepr::Error::Formatting)?; + match tag { + TrieTag::Leaf => Ok((LazilyDeserializedTrie::Leaf(bytes.into()), &[])), + TrieTag::Node => { + let (pointer_block, rem) = PointerBlock::from_bytes(rem)?; + Ok(( + LazilyDeserializedTrie::Node { + pointer_block: Box::new(pointer_block), + }, + rem, + )) + } + TrieTag::Extension => { + let (affix, rem) = FromBytes::from_bytes(rem)?; + let (pointer, rem) = Pointer::from_bytes(rem)?; + Ok((LazilyDeserializedTrie::Extension { affix, pointer }, rem)) + } + } + } +} + +impl TryFrom> for LazilyDeserializedTrie +where + K: ToBytes, + V: ToBytes, +{ + type Error = bytesrepr::Error; + + fn try_from(value: Trie) -> Result { + match value { + Trie::Leaf { .. } => { + let serialized_bytes = ToBytes::to_bytes(&value)?; + Ok(LazilyDeserializedTrie::Leaf(serialized_bytes.into())) + } + Trie::Node { pointer_block } => Ok(LazilyDeserializedTrie::Node { pointer_block }), + Trie::Extension { affix, pointer } => { + Ok(LazilyDeserializedTrie::Extension { affix, pointer }) + } } } } @@ -596,6 +667,8 @@ where } fn write_bytes(&self, writer: &mut Vec) -> Result<(), bytesrepr::Error> { + // NOTE: When changing this make sure all partial deserializers that are referencing + // `LazyTrieLeaf` are also updated. writer.push(u8::from(self.tag())); match self { Trie::Leaf { key, value } => { @@ -640,6 +713,24 @@ impl FromBytes for Trie { } } +impl TryFrom for Trie { + type Error = bytesrepr::Error; + + fn try_from(value: LazilyDeserializedTrie) -> Result { + match value { + LazilyDeserializedTrie::Leaf(leaf_bytes) => { + let (key, value_bytes) = leaf_bytes.try_deserialize_leaf_key()?; + let value = bytesrepr::deserialize_from_slice(value_bytes)?; + Ok(Self::Leaf { key, value }) + } + LazilyDeserializedTrie::Node { pointer_block } => Ok(Self::Node { pointer_block }), + LazilyDeserializedTrie::Extension { affix, pointer } => { + Ok(Self::Extension { affix, pointer }) + } + } + } +} + pub(crate) mod operations { use casper_types::bytesrepr::{self, ToBytes}; diff --git a/execution_engine/src/storage/trie/tests.rs b/execution_engine/src/storage/trie/tests.rs index b0f87a43f0..b21169d5cb 100644 --- a/execution_engine/src/storage/trie/tests.rs +++ b/execution_engine/src/storage/trie/tests.rs @@ -92,12 +92,65 @@ mod pointer_block { } mod proptests { + use std::convert::TryInto; + use proptest::prelude::*; use casper_hashing::Digest; - use casper_types::{bytesrepr, gens::key_arb, Key, StoredValue}; + use casper_types::{ + bytesrepr::{self, deserialize_from_slice, FromBytes, ToBytes}, + gens::key_arb, + Key, StoredValue, + }; + + use crate::storage::trie::{gens::*, LazilyDeserializedTrie, PointerBlock, Trie}; + + fn test_trie_roundtrip_to_lazy_trie(trie: &Trie) + where + K: ToBytes + FromBytes + PartialEq + std::fmt::Debug + Clone, + V: ToBytes + FromBytes + PartialEq + std::fmt::Debug + Clone, + { + let serialized = ToBytes::to_bytes(trie).expect("Unable to serialize data"); + + let expected_lazy_trie_leaf: LazilyDeserializedTrie = (*trie) + .clone() + .try_into() + .expect("Cannot convert Trie to LazilyDeserializedTrie"); + + let deserialized_from_slice: LazilyDeserializedTrie = + deserialize_from_slice(&serialized).expect("Unable to deserialize data"); + assert_eq!(expected_lazy_trie_leaf, deserialized_from_slice); + assert_eq!( + *trie, + deserialized_from_slice + .clone() + .try_into() + .expect("Expected to be able to convert LazilyDeserializedTrie to Trie") + ); + if let LazilyDeserializedTrie::Leaf(leaf_bytes) = deserialized_from_slice { + let (key, _) = leaf_bytes + .try_deserialize_leaf_key::() + .expect("Should have been able to deserialize key"); + assert_eq!(key, *trie.key().unwrap()); + }; - use crate::storage::trie::{gens::*, PointerBlock, Trie}; + let deserialized: LazilyDeserializedTrie = + bytesrepr::deserialize(serialized).expect("Unable to deserialize data"); + assert_eq!(expected_lazy_trie_leaf, deserialized); + assert_eq!( + *trie, + deserialized + .clone() + .try_into() + .expect("Expected to be able to convert LazilyDeserializedTrie to Trie") + ); + if let LazilyDeserializedTrie::Leaf(leaf_bytes) = deserialized { + let (key, _) = leaf_bytes + .try_deserialize_leaf_key::() + .expect("Should have been able to deserialize key"); + assert_eq!(key, *trie.key().unwrap()); + }; + } proptest! { #[test] @@ -120,6 +173,21 @@ mod proptests { bytesrepr::test_serialization_roundtrip(&trie_leaf); } + #[test] + fn bytesrepr_roundtrip_trie_leaf_to_lazy_trie(trie_leaf in trie_leaf_arb()) { + test_trie_roundtrip_to_lazy_trie(&trie_leaf) + } + + #[test] + fn bytesrepr_roundtrip_trie_extension_to_lazy_trie(trie_extension in trie_extension_arb()) { + test_trie_roundtrip_to_lazy_trie(&trie_extension) + } + + #[test] + fn bytesrepr_roundtrip_trie_node_to_lazy_trie(trie_node in trie_node_arb()) { + test_trie_roundtrip_to_lazy_trie(&trie_node); + } + #[test] fn bytesrepr_roundtrip_trie_extension(trie_extension in trie_extension_arb()) { bytesrepr::test_serialization_roundtrip(&trie_extension); diff --git a/execution_engine/src/storage/trie_store/lmdb.rs b/execution_engine/src/storage/trie_store/lmdb.rs index 01131e3659..973539497c 100644 --- a/execution_engine/src/storage/trie_store/lmdb.rs +++ b/execution_engine/src/storage/trie_store/lmdb.rs @@ -122,7 +122,7 @@ use crate::storage::{ global_state::CommitError, store::Store, transaction_source::{lmdb::LmdbEnvironment, Readable, TransactionSource, Writable}, - trie::{self, LazyTrieLeaf, Trie}, + trie::{LazilyDeserializedTrie, Trie}, trie_store::{self, TrieStore}, }; @@ -219,9 +219,8 @@ impl ScratchTrieStore { continue; }; - let lazy_trie: LazyTrieLeaf = - trie::lazy_trie_deserialize(trie_bytes.clone())?; - tries_to_write.extend(trie::lazy_trie_iter_children(&lazy_trie)); + let lazy_trie: LazilyDeserializedTrie = bytesrepr::deserialize_from_slice(trie_bytes)?; + tries_to_write.extend(lazy_trie.iter_children()); Store::>::put_raw( &*self.store, diff --git a/execution_engine/src/storage/trie_store/operations/mod.rs b/execution_engine/src/storage/trie_store/operations/mod.rs index 726143dc99..97ca009a25 100644 --- a/execution_engine/src/storage/trie_store/operations/mod.rs +++ b/execution_engine/src/storage/trie_store/operations/mod.rs @@ -1,3 +1,4 @@ +pub(crate) mod store_wrappers; #[cfg(test)] mod tests; @@ -5,7 +6,6 @@ mod tests; use std::collections::HashSet; use std::{borrow::Cow, cmp, collections::VecDeque, convert::TryInto, mem}; -use either::Either; use num_traits::FromPrimitive; use tracing::{error, warn}; @@ -15,16 +15,19 @@ use casper_types::bytesrepr::{self, Bytes, FromBytes, ToBytes}; use crate::{ shared::newtypes::CorrelationId, storage::{ + store::Store, transaction_source::{Readable, Writable}, trie::{ - self, merkle_proof::{TrieMerkleProof, TrieMerkleProofStep}, - Parents, Pointer, PointerBlock, Trie, TrieTag, RADIX, USIZE_EXCEEDS_U8, + LazilyDeserializedTrie, Parents, Pointer, PointerBlock, Trie, TrieTag, RADIX, + USIZE_EXCEEDS_U8, }, trie_store::TrieStore, }, }; +use self::store_wrappers::NonDeserializingStore; + #[allow(clippy::enum_variant_names)] #[derive(Debug, PartialEq, Eq)] pub enum ReadResult { @@ -58,6 +61,8 @@ where { let path: Vec = key.to_bytes()?; + let store = store_wrappers::OnceDeserializingStore::new(store); + let mut depth: usize = 0; let mut current: Trie = match store.get(txn, root)? { Some(root) => root, @@ -289,60 +294,24 @@ where }) } -struct TrieScan { - tip: Trie, - parents: Parents, -} - -impl TrieScan { - fn new(tip: Trie, parents: Parents) -> Self { - TrieScan { tip, parents } - } -} - -/// Returns a [`TrieScan`] from the given key at a given root in a given store. -/// A scan consists of the deepest trie variant found at that key, a.k.a. the -/// "tip", along with the parents of that variant. Parents are ordered by -/// their depth from the root (shallow to deep). -fn scan( - txn: &T, - store: &S, - key_bytes: &[u8], - root: &Trie, -) -> Result, E> -where - K: ToBytes + FromBytes + Clone, - V: ToBytes + FromBytes + Clone, - T: Readable, - S: TrieStore, - S::Error: From, - E: From + From, -{ - let root_bytes = root.to_bytes()?; - let TrieScanRaw { tip, parents } = - scan_raw::(txn, store, key_bytes, root_bytes.into())?; - let tip = match tip { - Either::Left(trie_leaf_bytes) => bytesrepr::deserialize(trie_leaf_bytes.to_vec())?, - Either::Right(tip) => tip, - }; - Ok(TrieScan::new(tip, parents)) -} - struct TrieScanRaw { - tip: Either>, + tip: LazilyDeserializedTrie, parents: Parents, } impl TrieScanRaw { - fn new(tip: Either>, parents: Parents) -> Self { + fn new(tip: LazilyDeserializedTrie, parents: Parents) -> Self { TrieScanRaw { tip, parents } } } -/// Just like scan, however we don't parse the tip. +/// Returns a [`TrieScanRaw`] from the given key at a given root in a given store. +/// A scan consists of the deepest trie variant found at that key, a.k.a. the +/// "tip", along the with the parents of that variant. Parents are ordered by +/// their depth from the root (shallow to deep). The tip is not parsed. fn scan_raw( txn: &T, - store: &S, + store: &NonDeserializingStore, key_bytes: &[u8], root_bytes: Bytes, ) -> Result, E> @@ -356,24 +325,17 @@ where { let path = key_bytes; - let mut current_trie; let mut current = root_bytes; let mut depth: usize = 0; let mut acc: Parents = Vec::new(); loop { - let maybe_trie_leaf = trie::lazy_trie_deserialize(current)?; - current_trie = match maybe_trie_leaf { - leaf_bytes @ Either::Left(_) => return Ok(TrieScanRaw::new(leaf_bytes, acc)), - Either::Right(trie_object) => trie_object, - }; - match current_trie { - _leaf @ Trie::Leaf { .. } => { - // since we are checking if this is a leaf and skipping, we do not expect to ever - // hit this. - unreachable!() + let maybe_trie_leaf = bytesrepr::deserialize_from_slice(¤t)?; + match maybe_trie_leaf { + leaf_bytes @ LazilyDeserializedTrie::Leaf(_) => { + return Ok(TrieScanRaw::new(leaf_bytes, acc)) } - Trie::Node { pointer_block } => { + LazilyDeserializedTrie::Node { pointer_block } => { let index = { assert!(depth < path.len(), "depth must be < {}", path.len()); path[depth] @@ -387,7 +349,7 @@ where Some(pointer) => pointer, None => { return Ok(TrieScanRaw::new( - Either::Right(Trie::Node { pointer_block }), + LazilyDeserializedTrie::Node { pointer_block }, acc, )); } @@ -407,11 +369,11 @@ where } } } - Trie::Extension { affix, pointer } => { + LazilyDeserializedTrie::Extension { affix, pointer } => { let sub_path = &path[depth..depth + affix.len()]; if sub_path != affix.as_slice() { return Ok(TrieScanRaw::new( - Either::Right(Trie::Extension { affix, pointer }), + LazilyDeserializedTrie::Extension { affix, pointer }, acc, )); } @@ -423,7 +385,7 @@ where }; current = next; depth += affix.len(); - acc.push((index, Trie::Extension { affix, pointer })) + acc.push((index, Trie::extension(affix.into(), pointer))) } None => { panic!( @@ -461,6 +423,7 @@ where S::Error: From, E: From + From, { + let store = store_wrappers::NonDeserializingStore::new(store); let root_trie_bytes = match store.get_raw(txn, root)? { None => return Ok(DeleteResult::RootNotFound), Some(root_trie) => root_trie, @@ -468,23 +431,16 @@ where let key_bytes = key_to_delete.to_bytes()?; let TrieScanRaw { tip, mut parents } = - scan_raw::<_, _, _, _, E>(txn, store, &key_bytes, root_trie_bytes)?; + scan_raw::<_, _, _, _, E>(txn, &store, &key_bytes, root_trie_bytes)?; // Check that tip is a leaf match tip { - Either::Left(bytes) + LazilyDeserializedTrie::Leaf(leaf_bytes) if { // Partially deserialize a key of a leaf node to ensure that we can only continue if // the key matches what we're looking for. - let ((tag_u8, key), _rem): ((u8, K), _) = FromBytes::from_bytes(&bytes)?; - let trie_tag = TrieTag::from_u8(tag_u8); // _rem contains bytes of serialized V, but we don't need to inspect it. - assert_eq!( - trie_tag, - Some(TrieTag::Leaf), - "Tip should contain leaf bytes, but has tag {:?}", - trie_tag - ); + let (key, _rem) = leaf_bytes.try_deserialize_leaf_key::()?; key == *key_to_delete } => {} _ => return Ok(DeleteResult::DoesNotExist), @@ -586,10 +542,8 @@ where // this extension might need to be combined with a grandparent // extension. Trie::Node { .. } => { - let new_extension: Trie = Trie::Extension { - affix: vec![sibling_idx].into(), - pointer: sibling_pointer, - }; + let new_extension: Trie = + Trie::extension(vec![sibling_idx], sibling_pointer); let trie_key = new_extension.trie_hash()?; new_elements.push((trie_key, new_extension)) } @@ -604,10 +558,7 @@ where } => { let mut new_affix = vec![sibling_idx]; new_affix.extend(Vec::::from(extension_affix)); - let new_extension: Trie = Trie::Extension { - affix: new_affix.into(), - pointer, - }; + let new_extension: Trie = Trie::extension(new_affix, pointer); let trie_key = new_extension.trie_hash()?; new_elements.push((trie_key, new_extension)) } @@ -643,10 +594,8 @@ where new_affix.extend_from_slice(child_affix.as_slice()); *child_affix = new_affix.into(); *trie_key = { - let new_extension: Trie = Trie::Extension { - affix: child_affix.to_owned(), - pointer: pointer.to_owned(), - }; + let new_extension: Trie = + Trie::extension(child_affix.to_owned().into(), pointer.to_owned()); new_extension.trie_hash()? } } @@ -923,57 +872,61 @@ where S::Error: From, E: From + From, { - match store.get(txn, root)? { + let store = store_wrappers::NonDeserializingStore::new(store); + match store.get_raw(txn, root)? { None => Ok(WriteResult::RootNotFound), - Some(current_root) => { + Some(current_root_bytes) => { let new_leaf = Trie::Leaf { key: key.to_owned(), value: value.to_owned(), }; let path: Vec = key.to_bytes()?; - let TrieScan { tip, parents } = - scan::(txn, store, &path, ¤t_root)?; + let TrieScanRaw { tip, parents } = + scan_raw::(txn, &store, &path, current_root_bytes)?; let new_elements: Vec<(Digest, Trie)> = match tip { - // If the "tip" is the same as the new leaf, then the leaf - // is already in the Trie. - Trie::Leaf { .. } if new_leaf == tip => Vec::new(), - // If the "tip" is an existing leaf with the same key as the - // new leaf, but the existing leaf and new leaf have different - // values, then we are in the situation where we are "updating" - // an existing leaf. - Trie::Leaf { - key: ref leaf_key, - value: ref leaf_value, - } if key == leaf_key && value != leaf_value => rehash(new_leaf, parents)?, - // If the "tip" is an existing leaf with a different key than - // the new leaf, then we are in a situation where the new leaf - // shares some common prefix with the existing leaf. - Trie::Leaf { - key: ref existing_leaf_key, - .. - } if key != existing_leaf_key => { - let existing_leaf_path = existing_leaf_key.to_bytes()?; - let (new_node, parents) = reparent_leaf(&path, &existing_leaf_path, parents)?; - let parents = add_node_to_parents(&path, new_node, parents); - rehash(new_leaf, parents)? + LazilyDeserializedTrie::Leaf(leaf_bytes) => { + let (existing_leaf_key, existing_value_bytes) = + leaf_bytes.try_deserialize_leaf_key()?; + + if key != &existing_leaf_key { + // If the "tip" is an existing leaf with a different key than + // the new leaf, then we are in a situation where the new leaf + // shares some common prefix with the existing leaf. + let existing_leaf_path = existing_leaf_key.to_bytes()?; + let (new_node, parents) = + reparent_leaf(&path, &existing_leaf_path, parents)?; + let parents = add_node_to_parents(&path, new_node, parents); + rehash(new_leaf, parents)? + } else { + let new_value_bytes = value.to_bytes()?; + if new_value_bytes != existing_value_bytes { + // If the "tip" is an existing leaf with the same key as the + // new leaf, but the existing leaf and new leaf have different + // values, then we are in the situation where we are "updating" + // an existing leaf. + rehash(new_leaf, parents)? + } else { + // Both key and values are the same. + // If the "tip" is the same as the new leaf, then the leaf + // is already in the Trie. + Vec::new() + } + } } - // This case is unreachable, but the compiler can't figure - // that out. - Trie::Leaf { .. } => unreachable!(), // If the "tip" is an existing node, then we can add a pointer // to the new leaf to the node's pointer block. - node @ Trie::Node { .. } => { - let parents = add_node_to_parents(&path, node, parents); + node @ LazilyDeserializedTrie::Node { .. } => { + let parents = add_node_to_parents(&path, node.try_into()?, parents); rehash(new_leaf, parents)? } // If the "tip" is an extension node, then we must modify or // replace it, adding a node where necessary. - extension @ Trie::Extension { .. } => { + extension @ LazilyDeserializedTrie::Extension { .. } => { let SplitResult { new_node, parents, maybe_hashed_child_extension, - } = split_extension(&path, extension, parents)?; + } = split_extension(&path, extension.try_into()?, parents)?; let parents = add_node_to_parents(&path, new_node, parents); if let Some(hashed_extension) = maybe_hashed_child_extension { let mut ret = vec![hashed_extension]; @@ -1027,16 +980,16 @@ enum KeysIteratorState> { Failed, } -struct VisitedTrieNode { - trie: Trie, +struct VisitedTrieNode { + trie: LazilyDeserializedTrie, maybe_index: Option, path: Vec, } pub struct KeysIterator<'a, 'b, K, V, T, S: TrieStore> { initial_descend: VecDeque, - visited: Vec>, - store: &'a S, + visited: Vec, + store: NonDeserializingStore<'a, K, V, S>, txn: &'b T, state: KeysIteratorState, } @@ -1068,25 +1021,37 @@ where mut path, }) = self.visited.pop() { - let mut maybe_next_trie: Option> = None; + let mut maybe_next_trie: Option = None; match trie { - Trie::Leaf { key, .. } => { - let key_bytes = match key.to_bytes() { - Ok(bytes) => bytes, - Err(e) => { - self.state = KeysIteratorState::Failed; - return Some(Err(e.into())); - } - }; - debug_assert!(key_bytes.starts_with(&path)); + LazilyDeserializedTrie::Leaf(leaf_bytes) => { + let leaf_bytes = leaf_bytes.bytes(); + if leaf_bytes.is_empty() { + self.state = KeysIteratorState::Failed; + return Some(Err(bytesrepr::Error::Formatting.into())); + } + + let key_bytes = &leaf_bytes[1..]; // Skip `Trie::Leaf` tag + debug_assert!( + key_bytes.starts_with(&path), + "Expected key bytes to start with the current path" + ); + // only return the leaf if it matches the initial descend path path.extend(&self.initial_descend); if key_bytes.starts_with(&path) { + // Only deserializes K when we're absolutely sure the path matches. + let (key, _stored_value): (K, _) = match K::from_bytes(key_bytes) { + Ok(key) => key, + Err(error) => { + self.state = KeysIteratorState::Failed; + return Some(Err(error.into())); + } + }; return Some(Ok(key)); } } - Trie::Node { ref pointer_block } => { + LazilyDeserializedTrie::Node { ref pointer_block } => { // if we are still initially descending (and initial_descend is not empty), take // the first index we should descend to, otherwise take maybe_index from the // visited stack @@ -1098,14 +1063,28 @@ where .unwrap_or_default(); while index < RADIX { if let Some(ref pointer) = pointer_block[index] { - maybe_next_trie = match self.store.get(self.txn, pointer.hash()) { - Ok(trie) => trie, - Err(e) => { - self.state = KeysIteratorState::Failed; - return Some(Err(e)); + maybe_next_trie = { + match self.store.get_raw(self.txn, pointer.hash()) { + Ok(Some(trie_bytes)) => { + match bytesrepr::deserialize_from_slice(&trie_bytes) { + Ok(lazy_trie) => Some(lazy_trie), + Err(error) => { + self.state = KeysIteratorState::Failed; + return Some(Err(error.into())); + } + } + } + Ok(None) => None, + Err(error) => { + self.state = KeysIteratorState::Failed; + return Some(Err(error)); + } } }; - debug_assert!(maybe_next_trie.is_some()); + debug_assert!( + maybe_next_trie.is_some(), + "Trie at the pointer is expected to exist" + ); if self.initial_descend.pop_front().is_none() { self.visited.push(VisitedTrieNode { trie, @@ -1125,7 +1104,7 @@ where index += 1; } } - Trie::Extension { affix, pointer } => { + LazilyDeserializedTrie::Extension { affix, pointer } => { let descend_len = cmp::min(self.initial_descend.len(), affix.len()); let check_prefix = self .initial_descend @@ -1136,14 +1115,27 @@ where // if we are not, the check_prefix will be empty, so we will enter the if // anyway if affix.starts_with(&check_prefix) { - maybe_next_trie = match self.store.get(self.txn, pointer.hash()) { - Ok(trie) => trie, + maybe_next_trie = match self.store.get_raw(self.txn, pointer.hash()) { + Ok(Some(trie_bytes)) => { + match bytesrepr::deserialize_from_slice(&trie_bytes) { + Ok(lazy_trie) => Some(lazy_trie), + Err(error) => { + self.state = KeysIteratorState::Failed; + return Some(Err(error.into())); + } + } + } + Ok(None) => None, Err(e) => { self.state = KeysIteratorState::Failed; return Some(Err(e)); } }; - debug_assert!({ matches!(&maybe_next_trie, Some(Trie::Node { .. })) }); + debug_assert!( + matches!(&maybe_next_trie, Some(LazilyDeserializedTrie::Node { .. }),), + "Expected a LazilyDeserializedTrie::Node but received {:?}", + maybe_next_trie + ); path.extend(affix); } } @@ -1178,17 +1170,24 @@ where S: TrieStore, S::Error: From, { - let (visited, init_state): (Vec>, _) = match store.get(txn, root) { + let store = store_wrappers::NonDeserializingStore::new(store); + let (visited, init_state): (Vec, _) = match store.get_raw(txn, root) { Ok(None) => (vec![], KeysIteratorState::Ok), Err(e) => (vec![], KeysIteratorState::ReturnError(e)), - Ok(Some(current_root)) => ( - vec![VisitedTrieNode { - trie: current_root, - maybe_index: None, - path: vec![], - }], - KeysIteratorState::Ok, - ), + Ok(Some(current_root_bytes)) => match bytesrepr::deserialize_from_slice(current_root_bytes) + { + Ok(lazy_trie) => { + let visited = vec![VisitedTrieNode { + trie: lazy_trie, + maybe_index: None, + path: vec![], + }]; + let init_state = KeysIteratorState::Ok; + + (visited, init_state) + } + Err(error) => (vec![], KeysIteratorState::ReturnError(error.into())), + }, }; KeysIterator { diff --git a/execution_engine/src/storage/trie_store/operations/store_wrappers.rs b/execution_engine/src/storage/trie_store/operations/store_wrappers.rs new file mode 100644 index 0000000000..2cb03b774e --- /dev/null +++ b/execution_engine/src/storage/trie_store/operations/store_wrappers.rs @@ -0,0 +1,240 @@ +use std::marker::PhantomData; +#[cfg(debug_assertions)] +use std::{ + collections::HashSet, + sync::{Arc, Mutex}, +}; + +use casper_hashing::Digest; +use casper_types::bytesrepr::{self, FromBytes, ToBytes}; + +use crate::storage::{ + store::Store, + transaction_source::{Readable, Writable}, + trie::Trie, + trie_store::TrieStore, +}; + +/// A [`TrieStore`] wrapper that panics in debug mode whenever an attempt to deserialize [`V`] is +/// made, otherwise it behaves as a [`TrieStore`]. +/// +/// To ensure this wrapper has zero overhead, a debug assertion is used. +pub(crate) struct NonDeserializingStore<'a, K, V, S>(&'a S, PhantomData<*const (K, V)>) +where + S: TrieStore; + +impl<'a, K, V, S> NonDeserializingStore<'a, K, V, S> +where + S: TrieStore, +{ + pub(crate) fn new(store: &'a S) -> Self { + Self(store, PhantomData) + } +} + +impl<'a, K, V, S> Store> for NonDeserializingStore<'a, K, V, S> +where + S: TrieStore, +{ + type Error = S::Error; + + type Handle = S::Handle; + + #[inline] + fn handle(&self) -> Self::Handle { + self.0.handle() + } + + #[inline] + fn deserialize_value(&self, bytes: &[u8]) -> Result, bytesrepr::Error> + where + Trie: FromBytes, + { + #[cfg(debug_assertions)] + { + let trie: Trie = self.0.deserialize_value(bytes)?; + if let Trie::Leaf { .. } = trie { + panic!("Tried to deserialize a value but expected no deserialization to happen.") + } + Ok(trie) + } + #[cfg(not(debug_assertions))] + { + self.0.deserialize_value(bytes) + } + } + + #[inline] + fn serialize_value(&self, value: &Trie) -> Result, bytesrepr::Error> + where + Trie: ToBytes, + { + self.0.serialize_value(value) + } + + #[inline] + fn get(&self, txn: &T, key: &Digest) -> Result>, Self::Error> + where + T: Readable, + Digest: AsRef<[u8]>, + Trie: FromBytes, + Self::Error: From, + { + self.0.get(txn, key) + } + + #[inline] + fn get_raw(&self, txn: &T, key: &Digest) -> Result, Self::Error> + where + T: Readable, + Digest: AsRef<[u8]>, + Self::Error: From, + { + self.0.get_raw(txn, key) + } + + #[inline] + fn put(&self, txn: &mut T, key: &Digest, value: &Trie) -> Result<(), Self::Error> + where + T: Writable, + Digest: AsRef<[u8]>, + Trie: ToBytes, + Self::Error: From, + { + self.0.put(txn, key, value) + } + + #[inline] + fn put_raw( + &self, + txn: &mut T, + key: &Digest, + value_bytes: std::borrow::Cow<'_, [u8]>, + ) -> Result<(), Self::Error> + where + T: Writable, + Digest: AsRef<[u8]>, + Self::Error: From, + { + self.0.put_raw(txn, key, value_bytes) + } +} + +pub(crate) struct OnceDeserializingStore<'a, K: ToBytes, V: ToBytes, S: TrieStore> { + store: &'a S, + #[cfg(debug_assertions)] + deserialize_tracking: Arc>>, + _marker: PhantomData<*const (K, V)>, +} + +impl<'a, K, V, S> OnceDeserializingStore<'a, K, V, S> +where + K: ToBytes, + V: ToBytes, + S: TrieStore, +{ + pub(crate) fn new(store: &'a S) -> Self { + Self { + store, + #[cfg(debug_assertions)] + deserialize_tracking: Arc::new(Mutex::new(HashSet::new())), + _marker: PhantomData, + } + } +} + +impl<'a, K, V, S> Store> for OnceDeserializingStore<'a, K, V, S> +where + K: ToBytes, + V: ToBytes, + S: TrieStore, +{ + type Error = S::Error; + + type Handle = S::Handle; + + #[inline] + fn handle(&self) -> Self::Handle { + self.store.handle() + } + + #[inline] + fn deserialize_value(&self, bytes: &[u8]) -> Result, bytesrepr::Error> + where + Trie: FromBytes, + { + #[cfg(debug_assertions)] + { + let trie: Trie = self.store.deserialize_value(bytes)?; + if let Trie::Leaf { .. } = trie { + let trie_hash = trie.trie_hash()?; + let mut tracking = self.deserialize_tracking.lock().expect("Poisoned lock"); + if tracking.get(&trie_hash).is_some() { + panic!("Tried to deserialize a value more than once."); + } else { + tracking.insert(trie_hash); + } + } + Ok(trie) + } + #[cfg(not(debug_assertions))] + { + self.store.deserialize_value(bytes) + } + } + + #[inline] + fn serialize_value(&self, value: &Trie) -> Result, bytesrepr::Error> + where + Trie: ToBytes, + { + self.store.serialize_value(value) + } + + #[inline] + fn get(&self, txn: &T, key: &Digest) -> Result>, Self::Error> + where + T: Readable, + Digest: AsRef<[u8]>, + Trie: FromBytes, + Self::Error: From, + { + self.store.get(txn, key) + } + + #[inline] + fn get_raw(&self, txn: &T, key: &Digest) -> Result, Self::Error> + where + T: Readable, + Digest: AsRef<[u8]>, + Self::Error: From, + { + self.store.get_raw(txn, key) + } + + #[inline] + fn put(&self, txn: &mut T, key: &Digest, value: &Trie) -> Result<(), Self::Error> + where + T: Writable, + Digest: AsRef<[u8]>, + Trie: ToBytes, + Self::Error: From, + { + self.store.put(txn, key, value) + } + + #[inline] + fn put_raw( + &self, + txn: &mut T, + key: &Digest, + value_bytes: std::borrow::Cow<'_, [u8]>, + ) -> Result<(), Self::Error> + where + T: Writable, + Digest: AsRef<[u8]>, + Self::Error: From, + { + self.store.put_raw(txn, key, value_bytes) + } +} diff --git a/execution_engine/src/storage/trie_store/operations/tests/bytesrepr_utils.rs b/execution_engine/src/storage/trie_store/operations/tests/bytesrepr_utils.rs new file mode 100644 index 0000000000..7c44d0f9af --- /dev/null +++ b/execution_engine/src/storage/trie_store/operations/tests/bytesrepr_utils.rs @@ -0,0 +1,43 @@ +use casper_types::bytesrepr::{self, FromBytes, ToBytes}; + +#[derive(PartialEq, Eq, Debug, Clone)] +pub(crate) struct PanickingFromBytes(T); + +impl PanickingFromBytes { + pub(crate) fn new(inner: T) -> PanickingFromBytes { + PanickingFromBytes(inner) + } +} + +impl FromBytes for PanickingFromBytes +where + T: FromBytes, +{ + fn from_bytes(_: &[u8]) -> Result<(Self, &[u8]), bytesrepr::Error> { + unreachable!("This type is expected to never deserialize."); + } +} + +impl ToBytes for PanickingFromBytes +where + T: ToBytes, +{ + fn into_bytes(self) -> Result, bytesrepr::Error> + where + Self: Sized, + { + self.0.into_bytes() + } + + fn write_bytes(&self, writer: &mut Vec) -> Result<(), bytesrepr::Error> { + self.0.write_bytes(writer) + } + + fn to_bytes(&self) -> Result, bytesrepr::Error> { + self.0.to_bytes() + } + + fn serialized_length(&self) -> usize { + self.0.serialized_length() + } +} diff --git a/execution_engine/src/storage/trie_store/operations/tests/delete.rs b/execution_engine/src/storage/trie_store/operations/tests/delete.rs index 6ab12a7549..cf661445fb 100644 --- a/execution_engine/src/storage/trie_store/operations/tests/delete.rs +++ b/execution_engine/src/storage/trie_store/operations/tests/delete.rs @@ -1,30 +1,41 @@ use super::*; -use crate::storage::{transaction_source::Writable, trie_store::operations::DeleteResult}; +use crate::storage::trie_store::operations::DeleteResult; -fn checked_delete( +fn checked_delete<'a, K, V, R, WR, S, WS, E>( correlation_id: CorrelationId, - txn: &mut T, + environment: &'a R, + write_environment: &'a WR, store: &S, + write_store: &WS, root: &Digest, key_to_delete: &K, ) -> Result where K: ToBytes + FromBytes + Clone + std::fmt::Debug + Eq, V: ToBytes + FromBytes + Clone + std::fmt::Debug, - T: Readable + Writable, + R: TransactionSource<'a, Handle = S::Handle>, + WR: TransactionSource<'a, Handle = WS::Handle>, S: TrieStore, - S::Error: From, - E: From + From, + WS: TrieStore>, + S::Error: From, + WS::Error: From, + E: From + From + From + From + From, { - let _counter = TestValue::before_operation(TestOperation::Delete); - let delete_result = - operations::delete::(correlation_id, txn, store, root, key_to_delete); - let counter = TestValue::after_operation(TestOperation::Delete); - assert_eq!(counter, 0, "Delete should never deserialize a value"); + let mut txn = write_environment.create_read_write_txn()?; + let delete_result = operations::delete::, _, WS, E>( + correlation_id, + &mut txn, + write_store, + root, + key_to_delete, + ); + txn.commit()?; let delete_result = delete_result?; + let rtxn = environment.create_read_write_txn()?; if let DeleteResult::Deleted(new_root) = delete_result { - operations::check_integrity::(correlation_id, txn, store, vec![new_root])?; + operations::check_integrity::(correlation_id, &rtxn, store, vec![new_root])?; } + rtxn.commit()?; Ok(delete_result) } @@ -32,10 +43,13 @@ mod partial_tries { use super::*; use crate::storage::trie_store::operations::DeleteResult; - fn delete_from_partial_trie_had_expected_results<'a, K, V, R, S, E>( + #[allow(clippy::too_many_arguments)] + fn delete_from_partial_trie_had_expected_results<'a, K, V, R, WR, S, WS, E>( correlation_id: CorrelationId, environment: &'a R, + write_environment: &'a WR, store: &S, + write_store: &WS, root: &Digest, key_to_delete: &K, expected_root_after_delete: &Digest, @@ -45,17 +59,27 @@ mod partial_tries { K: ToBytes + FromBytes + Clone + Eq + std::fmt::Debug, V: ToBytes + FromBytes + Clone + Eq + std::fmt::Debug, R: TransactionSource<'a, Handle = S::Handle>, + WR: TransactionSource<'a, Handle = WS::Handle>, S: TrieStore, + WS: TrieStore>, S::Error: From, - E: From + From + From, + WS::Error: From, + E: From + + From + + From + + From + + From, { - let mut txn = environment.create_read_write_txn()?; + let rtxn = environment.create_read_txn()?; // The assert below only works with partial tries - assert_eq!(store.get(&txn, expected_root_after_delete)?, None); - let root_after_delete = match checked_delete::( + assert_eq!(store.get(&rtxn, expected_root_after_delete)?, None); + rtxn.commit()?; + let root_after_delete = match checked_delete::( correlation_id, - &mut txn, + environment, + write_environment, store, + write_store, root, key_to_delete, )? { @@ -64,9 +88,11 @@ mod partial_tries { DeleteResult::RootNotFound => panic!("root should be found"), }; assert_eq!(root_after_delete, *expected_root_after_delete); + let rtxn = environment.create_read_txn()?; for HashedTrie { hash, trie } in expected_tries_after_delete { - assert_eq!(store.get(&txn, hash)?, Some(trie.clone())); + assert_eq!(store.get(&rtxn, hash)?, Some(trie.clone())); } + rtxn.commit()?; Ok(()) } @@ -79,9 +105,19 @@ mod partial_tries { let key_to_delete = &TEST_LEAVES[i]; let context = LmdbTestContext::new(&initial_tries).unwrap(); - delete_from_partial_trie_had_expected_results::( + delete_from_partial_trie_had_expected_results::< + TestKey, + TestValue, + _, + _, + _, + _, + error::Error, + >( correlation_id, &context.environment, + &context.environment, + &context.store, &context.store, &initial_root_hash, key_to_delete.key().unwrap(), @@ -101,9 +137,19 @@ mod partial_tries { let key_to_delete = &TEST_LEAVES[i]; let context = InMemoryTestContext::new(&initial_tries).unwrap(); - delete_from_partial_trie_had_expected_results::( + delete_from_partial_trie_had_expected_results::< + TestKey, + TestValue, + _, + _, + _, + _, + error::Error, + >( correlation_id, &context.environment, + &context.environment, + &context.store, &context.store, &initial_root_hash, key_to_delete.key().unwrap(), @@ -114,10 +160,21 @@ mod partial_tries { } } - fn delete_non_existent_key_from_partial_trie_should_return_does_not_exist<'a, K, V, R, S, E>( + fn delete_non_existent_key_from_partial_trie_should_return_does_not_exist< + 'a, + K, + V, + R, + WR, + S, + WS, + E, + >( correlation_id: CorrelationId, environment: &'a R, + write_environment: &'a WR, store: &S, + write_store: &WS, root: &Digest, key_to_delete: &K, ) -> Result<(), E> @@ -125,13 +182,26 @@ mod partial_tries { K: ToBytes + FromBytes + Clone + Eq + std::fmt::Debug, V: ToBytes + FromBytes + Clone + Eq + std::fmt::Debug, R: TransactionSource<'a, Handle = S::Handle>, + WR: TransactionSource<'a, Handle = WS::Handle>, S: TrieStore, + WS: TrieStore>, S::Error: From, - E: From + From + From, + WS::Error: From, + E: From + + From + + From + + From + + From, { - let mut txn = environment.create_read_write_txn()?; - match checked_delete::(correlation_id, &mut txn, store, root, key_to_delete)? - { + match checked_delete::( + correlation_id, + environment, + write_environment, + store, + write_store, + root, + key_to_delete, + )? { DeleteResult::Deleted(_) => panic!("should not delete"), DeleteResult::DoesNotExist => Ok(()), DeleteResult::RootNotFound => panic!("root should be found"), @@ -151,10 +221,14 @@ mod partial_tries { TestValue, _, _, + _, + _, error::Error, >( correlation_id, &context.environment, + &context.environment, + &context.store, &context.store, &initial_root_hash, key_to_delete.key().unwrap(), @@ -176,10 +250,14 @@ mod partial_tries { TestValue, _, _, + _, + _, error::Error, >( correlation_id, &context.environment, + &context.environment, + &context.store, &context.store, &initial_root_hash, key_to_delete.key().unwrap(), @@ -190,6 +268,7 @@ mod partial_tries { } mod full_tries { + use super::*; use std::ops::RangeInclusive; use proptest::{collection, prelude::*}; @@ -209,7 +288,7 @@ mod full_tries { operations::{ delete, tests::{ - InMemoryTestContext, LmdbTestContext, TestKey, TestOperation, TestValue, + InMemoryTestContext, LmdbTestContext, TestKey, TestValue, TEST_TRIE_GENERATORS, }, write, DeleteResult, WriteResult, @@ -231,21 +310,23 @@ mod full_tries { K: ToBytes + FromBytes + Clone + Eq + std::fmt::Debug, V: ToBytes + FromBytes + Clone + Eq + std::fmt::Debug, R: TransactionSource<'a, Handle = S::Handle>, - S: TrieStore, + S: TrieStore>, S::Error: From, E: From + From + From, { - let mut txn = environment.create_read_write_txn()?; + let mut txn: R::ReadWriteTransaction = environment.create_read_write_txn()?; + let mut roots = Vec::new(); // Insert the key-value pairs, keeping track of the roots as we go for (key, value) in pairs { - if let WriteResult::Written(new_root) = write::( + let new_value = PanickingFromBytes::new(value.clone()); + if let WriteResult::Written(new_root) = write::, _, _, E>( correlation_id, &mut txn, store, roots.last().unwrap_or(root), key, - value, + &new_value, )? { roots.push(new_root); } else { @@ -255,11 +336,13 @@ mod full_tries { // Delete the key-value pairs, checking the resulting roots as we go let mut current_root = roots.pop().unwrap_or_else(|| root.to_owned()); for (key, _value) in pairs.iter().rev() { - let _counter = TestValue::before_operation(TestOperation::Delete); - let delete_result = - delete::(correlation_id, &mut txn, store, ¤t_root, key); - let counter = TestValue::after_operation(TestOperation::Delete); - assert_eq!(counter, 0, "Delete should never deserialize a value"); + let delete_result = delete::, _, _, E>( + correlation_id, + &mut txn, + store, + ¤t_root, + key, + ); if let DeleteResult::Deleted(new_root) = delete_result? { current_root = roots.pop().unwrap_or_else(|| root.to_owned()); assert_eq!(new_root, current_root); @@ -332,28 +415,36 @@ mod full_tries { K: ToBytes + FromBytes + Clone + Eq + std::fmt::Debug, V: ToBytes + FromBytes + Clone + Eq + std::fmt::Debug, R: TransactionSource<'a, Handle = S::Handle>, - S: TrieStore, + S: TrieStore>, S::Error: From, E: From + From + From, { - let mut txn = environment.create_read_write_txn()?; + let mut txn: R::ReadWriteTransaction = environment.create_read_write_txn()?; let mut expected_root = *root; // Insert the key-value pairs, keeping track of the roots as we go for (key, value) in pairs_to_insert.iter() { - if let WriteResult::Written(new_root) = - write::(correlation_id, &mut txn, store, &expected_root, key, value)? - { + let new_value = PanickingFromBytes::new(value.clone()); + if let WriteResult::Written(new_root) = write::, _, _, E>( + correlation_id, + &mut txn, + store, + &expected_root, + key, + &new_value, + )? { expected_root = new_root; } else { panic!("Could not write pair") } } for key in keys_to_delete.iter() { - let _counter = TestValue::before_operation(TestOperation::Delete); - let delete_result = - delete::(correlation_id, &mut txn, store, &expected_root, key); - let counter = TestValue::after_operation(TestOperation::Delete); - assert_eq!(counter, 0, "Delete should never deserialize a value"); + let delete_result = delete::, _, _, E>( + correlation_id, + &mut txn, + store, + &expected_root, + key, + ); match delete_result? { DeleteResult::Deleted(new_root) => { expected_root = new_root; @@ -372,9 +463,15 @@ mod full_tries { let mut actual_root = *root; for (key, value) in pairs_to_insert_less_deleted.iter() { - if let WriteResult::Written(new_root) = - write::(correlation_id, &mut txn, store, &actual_root, key, value)? - { + let new_value = PanickingFromBytes::new(value.clone()); + if let WriteResult::Written(new_root) = write::, _, _, E>( + correlation_id, + &mut txn, + store, + &actual_root, + key, + &new_value, + )? { actual_root = new_root; } else { panic!("Could not write pair") diff --git a/execution_engine/src/storage/trie_store/operations/tests/ee_699.rs b/execution_engine/src/storage/trie_store/operations/tests/ee_699.rs index 6d8927ac91..c6c89aed96 100644 --- a/execution_engine/src/storage/trie_store/operations/tests/ee_699.rs +++ b/execution_engine/src/storage/trie_store/operations/tests/ee_699.rs @@ -302,10 +302,14 @@ mod empty_tries { _, _, _, + _, + _, in_memory::Error, >( correlation_id, &context.environment, + &context.environment, + &context.store, &context.store, &initial_states, &TEST_LEAVES, diff --git a/execution_engine/src/storage/trie_store/operations/tests/keys.rs b/execution_engine/src/storage/trie_store/operations/tests/keys.rs index 32aa55dee7..3ebd8d112f 100644 --- a/execution_engine/src/storage/trie_store/operations/tests/keys.rs +++ b/execution_engine/src/storage/trie_store/operations/tests/keys.rs @@ -1,4 +1,5 @@ mod partial_tries { + use crate::{ shared::newtypes::CorrelationId, storage::{ @@ -7,8 +8,8 @@ mod partial_tries { trie_store::operations::{ self, tests::{ - InMemoryTestContext, LmdbTestContext, TestKey, TestValue, TEST_LEAVES, - TEST_TRIE_GENERATORS, + bytesrepr_utils::PanickingFromBytes, InMemoryTestContext, LmdbTestContext, + TestKey, TestValue, TEST_LEAVES, TEST_TRIE_GENERATORS, }, }, }, @@ -34,7 +35,7 @@ mod partial_tries { }; let actual = { let txn = context.environment.create_read_txn().unwrap(); - let mut tmp = operations::keys::( + let mut tmp = operations::keys::, _, _>( correlation_id, &txn, &context.store, @@ -70,7 +71,7 @@ mod partial_tries { }; let actual = { let txn = context.environment.create_read_txn().unwrap(); - let mut tmp = operations::keys::( + let mut tmp = operations::keys::, _, _>( correlation_id, &txn, &context.store, @@ -88,6 +89,7 @@ mod partial_tries { } mod full_tries { + use casper_hashing::Digest; use crate::{ @@ -98,8 +100,8 @@ mod full_tries { trie_store::operations::{ self, tests::{ - InMemoryTestContext, TestKey, TestValue, EMPTY_HASHED_TEST_TRIES, TEST_LEAVES, - TEST_TRIE_GENERATORS, + bytesrepr_utils::PanickingFromBytes, InMemoryTestContext, TestKey, TestValue, + EMPTY_HASHED_TEST_TRIES, TEST_LEAVES, TEST_TRIE_GENERATORS, }, }, }, @@ -131,7 +133,7 @@ mod full_tries { }; let actual = { let txn = context.environment.create_read_txn().unwrap(); - let mut tmp = operations::keys::( + let mut tmp = operations::keys::, _, _>( correlation_id, &txn, &context.store, @@ -162,8 +164,8 @@ mod keys_iterator { trie_store::operations::{ self, tests::{ - hash_test_tries, HashedTestTrie, HashedTrie, InMemoryTestContext, TestKey, - TestValue, TEST_LEAVES, + bytesrepr_utils::PanickingFromBytes, hash_test_tries, HashedTestTrie, + HashedTrie, InMemoryTestContext, TestKey, TestValue, TEST_LEAVES, }, }, }, @@ -221,7 +223,7 @@ mod keys_iterator { let correlation_id = CorrelationId::new(); let context = return_on_err!(InMemoryTestContext::new(&tries)); let txn = return_on_err!(context.environment.create_read_txn()); - let _tmp = operations::keys::( + let _tmp = operations::keys::, _, _>( correlation_id, &txn, &context.store, @@ -231,21 +233,21 @@ mod keys_iterator { } #[test] - #[should_panic] + #[should_panic = "Expected a LazilyDeserializedTrie::Node but received"] fn should_panic_on_leaf_after_extension() { let (root_hash, tries) = return_on_err!(create_invalid_extension_trie()); test_trie(root_hash, tries); } #[test] - #[should_panic] + #[should_panic = "Expected key bytes to start with the current path"] fn should_panic_when_key_not_matching_path() { let (root_hash, tries) = return_on_err!(create_invalid_path_trie()); test_trie(root_hash, tries); } #[test] - #[should_panic] + #[should_panic = "Trie at the pointer is expected to exist"] fn should_panic_on_pointer_to_nonexisting_hash() { let (root_hash, tries) = return_on_err!(create_invalid_hash_trie()); test_trie(root_hash, tries); @@ -253,6 +255,7 @@ mod keys_iterator { } mod keys_with_prefix_iterator { + use crate::{ shared::newtypes::CorrelationId, storage::{ @@ -260,7 +263,10 @@ mod keys_with_prefix_iterator { trie::Trie, trie_store::operations::{ self, - tests::{create_6_leaf_trie, InMemoryTestContext, TestKey, TestValue, TEST_LEAVES}, + tests::{ + bytesrepr_utils::PanickingFromBytes, create_6_leaf_trie, InMemoryTestContext, + TestKey, TestValue, TEST_LEAVES, + }, }, }, }; @@ -285,15 +291,16 @@ mod keys_with_prefix_iterator { .create_read_txn() .expect("should create a read txn"); let expected = expected_keys(prefix); - let mut actual = operations::keys_with_prefix::( - correlation_id, - &txn, - &context.store, - &root_hash, - prefix, - ) - .filter_map(Result::ok) - .collect::>(); + let mut actual = + operations::keys_with_prefix::, _, _>( + correlation_id, + &txn, + &context.store, + &root_hash, + prefix, + ) + .filter_map(Result::ok) + .collect::>(); actual.sort(); assert_eq!(expected, actual); } diff --git a/execution_engine/src/storage/trie_store/operations/tests/mod.rs b/execution_engine/src/storage/trie_store/operations/tests/mod.rs index 6a7c1ec6c9..21a3fd46b1 100644 --- a/execution_engine/src/storage/trie_store/operations/tests/mod.rs +++ b/execution_engine/src/storage/trie_store/operations/tests/mod.rs @@ -1,3 +1,4 @@ +pub(crate) mod bytesrepr_utils; mod delete; mod ee_699; mod keys; @@ -7,12 +8,7 @@ mod scan; mod synchronize; mod write; -use std::{ - cell::RefCell, - collections::{BTreeMap, HashMap}, - convert, - ops::Not, -}; +use std::{collections::HashMap, convert, ops::Not}; use lmdb::DatabaseFlags; use tempfile::{tempdir, TempDir}; @@ -40,6 +36,8 @@ use crate::{ }, }; +use self::bytesrepr_utils::PanickingFromBytes; + const TEST_KEY_LENGTH: usize = 7; /// A short key type for tests. @@ -67,57 +65,10 @@ impl FromBytes for TestKey { const TEST_VAL_LENGTH: usize = 6; -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] -pub(crate) enum TestOperation { - Delete, // Deleting an existing value should not deserialize V -} - -type Counter = BTreeMap; - -thread_local! { - static FROMBYTES_INSIDE_OPERATION: RefCell = RefCell::new(Default::default()); - static FROMBYTES_COUNTER: RefCell = RefCell::new(Default::default()); -} - /// A short value type for tests. #[derive(Debug, Copy, Clone, PartialEq, Eq)] struct TestValue([u8; TEST_VAL_LENGTH]); -impl TestValue { - pub(crate) fn before_operation(op: TestOperation) -> usize { - FROMBYTES_INSIDE_OPERATION.with(|flag| { - *flag.borrow_mut().entry(op).or_default() += 1; - }); - - FROMBYTES_COUNTER.with(|counter| { - let mut counter = counter.borrow_mut(); - let old = counter.get(&op).copied().unwrap_or_default(); - *counter.entry(op).or_default() = 0; - old - }) - } - - pub(crate) fn after_operation(op: TestOperation) -> usize { - FROMBYTES_INSIDE_OPERATION.with(|flag| { - *flag.borrow_mut().get_mut(&op).unwrap() -= 1; - }); - - FROMBYTES_COUNTER.with(|counter| counter.borrow().get(&op).copied().unwrap()) - } - - pub(crate) fn increment() { - let flag = FROMBYTES_INSIDE_OPERATION.with(|flag| flag.borrow().clone()); - let op = TestOperation::Delete; - if let Some(value) = flag.get(&op) { - if *value > 0 { - FROMBYTES_COUNTER.with(|counter| { - *counter.borrow_mut().entry(op).or_default() += 1; - }); - } - } - } -} - impl ToBytes for TestValue { fn to_bytes(&self) -> Result, bytesrepr::Error> { Ok(self.0.to_vec()) @@ -134,8 +85,6 @@ impl FromBytes for TestValue { let mut ret = [0u8; TEST_VAL_LENGTH]; ret.copy_from_slice(key); - TestValue::increment(); - Ok((TestValue(ret), rem)) } } @@ -651,7 +600,9 @@ where if let Trie::Leaf { key, value } = leaf { let maybe_value: ReadResult = read::<_, _, _, _, E>(correlation_id, txn, store, root, key)?; - ret.push(ReadResult::Found(*value) == maybe_value) + if let ReadResult::Found(value_found) = maybe_value { + ret.push(*value == value_found); + } } else { panic!("leaves should only contain leaves") } @@ -796,7 +747,7 @@ where K: ToBytes + FromBytes + Clone + Eq + std::fmt::Debug, V: ToBytes + FromBytes + Clone + Eq, R: TransactionSource<'a, Handle = S::Handle>, - S: TrieStore, + S: TrieStore>, S::Error: From, E: From + From + From, { @@ -805,12 +756,19 @@ where return Ok(results); } let mut root_hash = root_hash.to_owned(); - let mut txn = environment.create_read_write_txn()?; + let mut txn: R::ReadWriteTransaction = environment.create_read_write_txn()?; for leaf in leaves.iter() { if let Trie::Leaf { key, value } = leaf { - let write_result = - write::<_, _, _, _, E>(correlation_id, &mut txn, store, &root_hash, key, value)?; + let new_value = PanickingFromBytes::new(value.clone()); + let write_result = write::, _, _, E>( + correlation_id, + &mut txn, + store, + &root_hash, + key, + &new_value, + )?; match write_result { WriteResult::Written(hash) => { root_hash = hash; @@ -877,10 +835,11 @@ where S::Error: From, E: From + From + From, { - let txn = environment.create_read_txn()?; + let txn: R::ReadTransaction = environment.create_read_txn()?; for (index, root_hash) in root_hashes.iter().enumerate() { for (key, value) in &pairs[..=index] { let result = read::<_, _, _, _, E>(correlation_id, &txn, store, root_hash, key)?; + if ReadResult::Found(*value) != result { return Ok(false); } @@ -919,7 +878,7 @@ where K: ToBytes + FromBytes + Clone + Eq + std::fmt::Debug, V: ToBytes + FromBytes + Clone + Eq, R: TransactionSource<'a, Handle = S::Handle>, - S: TrieStore, + S: TrieStore>, S::Error: From, E: From + From + From, { @@ -931,7 +890,15 @@ where let mut txn = environment.create_read_write_txn()?; for (key, value) in pairs.iter() { - match write::<_, _, _, _, E>(correlation_id, &mut txn, store, &root_hash, key, value)? { + let new_val = PanickingFromBytes::new(value.clone()); + match write::, _, _, E>( + correlation_id, + &mut txn, + store, + &root_hash, + key, + &new_val, + )? { WriteResult::Written(hash) => { root_hash = hash; } @@ -944,10 +911,12 @@ where Ok(results) } -fn writes_to_n_leaf_empty_trie_had_expected_results<'a, K, V, R, S, E>( +fn writes_to_n_leaf_empty_trie_had_expected_results<'a, K, V, R, WR, S, WS, E>( correlation_id: CorrelationId, environment: &'a R, + writable_environment: &'a WR, store: &S, + writable_store: &WS, states: &[Digest], test_leaves: &[Trie], ) -> Result, E> @@ -955,17 +924,20 @@ where K: ToBytes + FromBytes + Clone + Eq + std::fmt::Debug + Copy + Ord, V: ToBytes + FromBytes + Clone + Eq + std::fmt::Debug + Copy, R: TransactionSource<'a, Handle = S::Handle>, + WR: TransactionSource<'a, Handle = WS::Handle>, S: TrieStore, + WS: TrieStore>, S::Error: From, - E: From + From + From, + WS::Error: From, + E: From + From + From + From + From, { let mut states = states.to_vec(); // Write set of leaves to the trie let hashes = write_leaves::<_, _, _, _, E>( correlation_id, - environment, - store, + writable_environment, + writable_store, states.last().unwrap(), test_leaves, )? diff --git a/execution_engine/src/storage/trie_store/operations/tests/scan.rs b/execution_engine/src/storage/trie_store/operations/tests/scan.rs index 5d8b74d7ea..14cfaa8816 100644 --- a/execution_engine/src/storage/trie_store/operations/tests/scan.rs +++ b/execution_engine/src/storage/trie_store/operations/tests/scan.rs @@ -1,3 +1,5 @@ +use std::convert::TryInto; + use casper_hashing::Digest; use super::*; @@ -5,7 +7,8 @@ use crate::{ shared::newtypes::CorrelationId, storage::{ error::{self, in_memory}, - trie_store::operations::{scan, TrieScan}, + trie::LazilyDeserializedTrie, + trie_store::operations::{scan_raw, store_wrappers, TrieScanRaw}, }, }; @@ -26,29 +29,51 @@ where let root = store .get(&txn, root_hash)? .expect("check_scan received an invalid root hash"); - let TrieScan { mut tip, parents } = - scan::(&txn, store, key, &root)?; + let root_bytes = root.to_bytes()?; + let store = store_wrappers::NonDeserializingStore::new(store); + let TrieScanRaw { mut tip, parents } = scan_raw::( + &txn, + &store, + key, + root_bytes.into(), + )?; for (index, parent) in parents.into_iter().rev() { let expected_tip_hash = { - let tip_bytes = tip.to_bytes().unwrap(); - Digest::hash(&tip_bytes) + match tip { + LazilyDeserializedTrie::Leaf(leaf_bytes) => Digest::hash(leaf_bytes.bytes()), + node @ LazilyDeserializedTrie::Node { .. } + | node @ LazilyDeserializedTrie::Extension { .. } => { + let tip_bytes = TryInto::>::try_into(node)? + .to_bytes() + .unwrap(); + Digest::hash(&tip_bytes) + } + } }; match parent { Trie::Leaf { .. } => panic!("parents should not contain any leaves"), Trie::Node { pointer_block } => { let pointer_tip_hash = pointer_block[::from(index)].map(|ptr| *ptr.hash()); assert_eq!(Some(expected_tip_hash), pointer_tip_hash); - tip = Trie::Node { pointer_block }; + tip = LazilyDeserializedTrie::Node { pointer_block }; } Trie::Extension { affix, pointer } => { let pointer_tip_hash = pointer.hash().to_owned(); assert_eq!(expected_tip_hash, pointer_tip_hash); - tip = Trie::Extension { affix, pointer }; + tip = LazilyDeserializedTrie::Extension { affix, pointer }; } } } - assert_eq!(root, tip); + + assert!( + matches!( + tip, + LazilyDeserializedTrie::Node { .. } | LazilyDeserializedTrie::Extension { .. }, + ), + "Unexpected leaf found" + ); + assert_eq!(root, tip.try_into()?); txn.commit()?; Ok(()) } diff --git a/execution_engine/src/storage/trie_store/operations/tests/write.rs b/execution_engine/src/storage/trie_store/operations/tests/write.rs index 314fdedd7c..1c4e0917a9 100644 --- a/execution_engine/src/storage/trie_store/operations/tests/write.rs +++ b/execution_engine/src/storage/trie_store/operations/tests/write.rs @@ -13,9 +13,11 @@ mod empty_tries { let context = LmdbTestContext::new(&tries).unwrap(); let initial_states = vec![root_hash]; - writes_to_n_leaf_empty_trie_had_expected_results::<_, _, _, _, error::Error>( + writes_to_n_leaf_empty_trie_had_expected_results::<_, _, _, _, _, _, error::Error>( correlation_id, &context.environment, + &context.environment, + &context.store, &context.store, &initial_states, &TEST_LEAVES_NON_COLLIDING[..num_leaves], @@ -32,9 +34,11 @@ mod empty_tries { let context = InMemoryTestContext::new(&tries).unwrap(); let initial_states = vec![root_hash]; - writes_to_n_leaf_empty_trie_had_expected_results::<_, _, _, _, in_memory::Error>( + writes_to_n_leaf_empty_trie_had_expected_results::<_, _, _, _, _, _, in_memory::Error>( correlation_id, &context.environment, + &context.environment, + &context.store, &context.store, &initial_states, &TEST_LEAVES_NON_COLLIDING[..num_leaves], @@ -51,9 +55,11 @@ mod empty_tries { let context = LmdbTestContext::new(&tries).unwrap(); let initial_states = vec![root_hash]; - writes_to_n_leaf_empty_trie_had_expected_results::<_, _, _, _, error::Error>( + writes_to_n_leaf_empty_trie_had_expected_results::<_, _, _, _, _, _, error::Error>( correlation_id, &context.environment, + &context.environment, + &context.store, &context.store, &initial_states, &TEST_LEAVES[..num_leaves], @@ -70,9 +76,11 @@ mod empty_tries { let context = InMemoryTestContext::new(&tries).unwrap(); let initial_states = vec![root_hash]; - writes_to_n_leaf_empty_trie_had_expected_results::<_, _, _, _, in_memory::Error>( + writes_to_n_leaf_empty_trie_had_expected_results::<_, _, _, _, _, _, in_memory::Error>( correlation_id, &context.environment, + &context.environment, + &context.store, &context.store, &initial_states, &TEST_LEAVES[..num_leaves], @@ -118,18 +126,27 @@ mod empty_tries { mod partial_tries { use super::*; - fn noop_writes_to_n_leaf_partial_trie_had_expected_results<'a, R, S, E>( + fn noop_writes_to_n_leaf_partial_trie_had_expected_results<'a, R, WR, S, WS, E>( correlation_id: CorrelationId, environment: &'a R, + write_environment: &'a WR, store: &S, + writable_store: &WS, states: &[Digest], num_leaves: usize, ) -> Result<(), E> where R: TransactionSource<'a, Handle = S::Handle>, + WR: TransactionSource<'a, Handle = WS::Handle>, S: TrieStore, + WS: TrieStore>, S::Error: From, - E: From + From + From, + WS::Error: From, + E: From + + From + + From + + From + + From, { // Check that the expected set of leaves is in the trie check_leaves::<_, _, _, _, E>( @@ -142,10 +159,10 @@ mod partial_tries { )?; // Rewrite that set of leaves - let write_results = write_leaves::<_, _, _, _, E>( + let write_results = write_leaves::( correlation_id, - environment, - store, + write_environment, + writable_store, &states[0], &TEST_LEAVES[..num_leaves], )?; @@ -173,9 +190,11 @@ mod partial_tries { let context = LmdbTestContext::new(&tries).unwrap(); let states = vec![root_hash]; - noop_writes_to_n_leaf_partial_trie_had_expected_results::<_, _, error::Error>( + noop_writes_to_n_leaf_partial_trie_had_expected_results::<_, _, _, _, error::Error>( correlation_id, &context.environment, + &context.environment, + &context.store, &context.store, &states, num_leaves, @@ -192,9 +211,11 @@ mod partial_tries { let context = InMemoryTestContext::new(&tries).unwrap(); let states = vec![root_hash]; - noop_writes_to_n_leaf_partial_trie_had_expected_results::<_, _, in_memory::Error>( + noop_writes_to_n_leaf_partial_trie_had_expected_results::<_, _, _, _, in_memory::Error>( correlation_id, &context.environment, + &context.environment, + &context.store, &context.store, &states, num_leaves, @@ -203,18 +224,27 @@ mod partial_tries { } } - fn update_writes_to_n_leaf_partial_trie_had_expected_results<'a, R, S, E>( + fn update_writes_to_n_leaf_partial_trie_had_expected_results<'a, R, WR, S, WS, E>( correlation_id: CorrelationId, environment: &'a R, + write_environment: &'a WR, store: &S, + writable_store: &WS, states: &[Digest], num_leaves: usize, ) -> Result<(), E> where R: TransactionSource<'a, Handle = S::Handle>, + WR: TransactionSource<'a, Handle = WS::Handle>, S: TrieStore, + WS: TrieStore>, S::Error: From, - E: From + From + From, + WS::Error: From, + E: From + + From + + From + + From + + From, { let mut states = states.to_owned(); @@ -243,8 +273,8 @@ mod partial_tries { let current_root = states.last().unwrap(); let results = write_leaves::<_, _, _, _, E>( correlation_id, - environment, - store, + write_environment, + writable_store, current_root, &[leaf.to_owned()], )?; @@ -279,9 +309,11 @@ mod partial_tries { let context = LmdbTestContext::new(&tries).unwrap(); let initial_states = vec![root_hash]; - update_writes_to_n_leaf_partial_trie_had_expected_results::<_, _, error::Error>( + update_writes_to_n_leaf_partial_trie_had_expected_results::<_, _, _, _, error::Error>( correlation_id, &context.environment, + &context.environment, + &context.store, &context.store, &initial_states, num_leaves, @@ -298,9 +330,11 @@ mod partial_tries { let context = InMemoryTestContext::new(&tries).unwrap(); let states = vec![root_hash]; - update_writes_to_n_leaf_partial_trie_had_expected_results::<_, _, in_memory::Error>( + update_writes_to_n_leaf_partial_trie_had_expected_results::<_, _, _, _, in_memory::Error>( correlation_id, &context.environment, + &context.environment, + &context.store, &context.store, &states, num_leaves, @@ -313,18 +347,27 @@ mod partial_tries { mod full_tries { use super::*; - fn noop_writes_to_n_leaf_full_trie_had_expected_results<'a, R, S, E>( + fn noop_writes_to_n_leaf_full_trie_had_expected_results<'a, R, WR, S, WS, E>( correlation_id: CorrelationId, environment: &'a R, + write_environment: &'a WR, store: &S, + write_store: &WS, states: &[Digest], index: usize, ) -> Result<(), E> where R: TransactionSource<'a, Handle = S::Handle>, + WR: TransactionSource<'a, Handle = WS::Handle>, S: TrieStore, + WS: TrieStore>, S::Error: From, - E: From + From + From, + WS::Error: From, + E: From + + From + + From + + From + + From, { // Check that the expected set of leaves is in the trie at every state reference for (num_leaves, state) in states[..index].iter().enumerate() { @@ -341,8 +384,8 @@ mod full_tries { // Rewrite that set of leaves let write_results = write_leaves::<_, _, _, _, E>( correlation_id, - environment, - store, + write_environment, + write_store, states.last().unwrap(), &TEST_LEAVES[..index], )?; @@ -377,9 +420,11 @@ mod full_tries { context.update(&tries).unwrap(); states.push(root_hash); - noop_writes_to_n_leaf_full_trie_had_expected_results::<_, _, error::Error>( + noop_writes_to_n_leaf_full_trie_had_expected_results::<_, _, _, _, error::Error>( correlation_id, &context.environment, + &context.environment, + &context.store, &context.store, &states, index, @@ -399,9 +444,11 @@ mod full_tries { context.update(&tries).unwrap(); states.push(root_hash); - noop_writes_to_n_leaf_full_trie_had_expected_results::<_, _, in_memory::Error>( + noop_writes_to_n_leaf_full_trie_had_expected_results::<_, _, _, _, in_memory::Error>( correlation_id, &context.environment, + &context.environment, + &context.store, &context.store, &states, index, @@ -410,18 +457,27 @@ mod full_tries { } } - fn update_writes_to_n_leaf_full_trie_had_expected_results<'a, R, S, E>( + fn update_writes_to_n_leaf_full_trie_had_expected_results<'a, R, WR, S, WS, E>( correlation_id: CorrelationId, environment: &'a R, + write_environment: &'a WR, store: &S, + write_store: &WS, states: &[Digest], num_leaves: usize, ) -> Result<(), E> where R: TransactionSource<'a, Handle = S::Handle>, + WR: TransactionSource<'a, Handle = WS::Handle>, S: TrieStore, + WS: TrieStore>, S::Error: From, - E: From + From + From, + WS::Error: From, + E: From + + From + + From + + From + + From, { let mut states = states.to_vec(); @@ -440,8 +496,8 @@ mod full_tries { // Write set of leaves to the trie let hashes = write_leaves::<_, _, _, _, E>( correlation_id, - environment, - store, + write_environment, + write_store, states.last().unwrap(), &TEST_LEAVES_UPDATED[..num_leaves], )? @@ -501,9 +557,11 @@ mod full_tries { context.update(&tries).unwrap(); states.push(root_hash); - update_writes_to_n_leaf_full_trie_had_expected_results::<_, _, error::Error>( + update_writes_to_n_leaf_full_trie_had_expected_results::<_, _, _, _, error::Error>( correlation_id, &context.environment, + &context.environment, + &context.store, &context.store, &states, num_leaves, @@ -523,9 +581,11 @@ mod full_tries { context.update(&tries).unwrap(); states.push(root_hash); - update_writes_to_n_leaf_full_trie_had_expected_results::<_, _, in_memory::Error>( + update_writes_to_n_leaf_full_trie_had_expected_results::<_, _, _, _, in_memory::Error>( correlation_id, &context.environment, + &context.environment, + &context.store, &context.store, &states, num_leaves, @@ -534,17 +594,26 @@ mod full_tries { } } - fn node_writes_to_5_leaf_full_trie_had_expected_results<'a, R, S, E>( + fn node_writes_to_5_leaf_full_trie_had_expected_results<'a, R, WR, S, WS, E>( correlation_id: CorrelationId, environment: &'a R, + write_environment: &'a WR, store: &S, + write_store: &WS, states: &[Digest], ) -> Result<(), E> where R: TransactionSource<'a, Handle = S::Handle>, + WR: TransactionSource<'a, Handle = WS::Handle>, S: TrieStore, + WS: TrieStore>, S::Error: From, - E: From + From + From, + WS::Error: From, + E: From + + From + + From + + From + + From, { let mut states = states.to_vec(); let num_leaves = TEST_LEAVES_LENGTH; @@ -564,8 +633,8 @@ mod full_tries { // Write set of leaves to the trie let hashes = write_leaves::<_, _, _, _, E>( correlation_id, - environment, - store, + write_environment, + write_store, states.last().unwrap(), &TEST_LEAVES_ADJACENTS, )? @@ -625,9 +694,11 @@ mod full_tries { states.push(root_hash); } - node_writes_to_5_leaf_full_trie_had_expected_results::<_, _, error::Error>( + node_writes_to_5_leaf_full_trie_had_expected_results::<_, _, _, _, error::Error>( correlation_id, &context.environment, + &context.environment, + &context.store, &context.store, &states, ) @@ -646,9 +717,11 @@ mod full_tries { states.push(root_hash); } - node_writes_to_5_leaf_full_trie_had_expected_results::<_, _, in_memory::Error>( + node_writes_to_5_leaf_full_trie_had_expected_results::<_, _, _, _, in_memory::Error>( correlation_id, &context.environment, + &context.environment, + &context.store, &context.store, &states, ) diff --git a/execution_engine/src/storage/trie_store/tests/mod.rs b/execution_engine/src/storage/trie_store/tests/mod.rs index a122f3ee7b..436c9bf6bf 100644 --- a/execution_engine/src/storage/trie_store/tests/mod.rs +++ b/execution_engine/src/storage/trie_store/tests/mod.rs @@ -47,10 +47,7 @@ fn create_data() -> Vec> { let ext_node: Trie = { let affix = vec![1u8, 0]; let pointer = Pointer::NodePointer(node_2_hash); - Trie::Extension { - affix: affix.into(), - pointer, - } + Trie::extension(affix, pointer) }; let ext_node_hash = Digest::hash(ext_node.to_bytes().unwrap()); diff --git a/execution_engine/src/system/mint.rs b/execution_engine/src/system/mint.rs index 3a0175d64b..06600afbd7 100644 --- a/execution_engine/src/system/mint.rs +++ b/execution_engine/src/system/mint.rs @@ -1,3 +1,4 @@ +pub(crate) mod detail; pub(crate) mod runtime_provider; pub(crate) mod storage_provider; pub(crate) mod system_provider; @@ -54,6 +55,32 @@ pub trait Mint: RuntimeProvider + StorageProvider + SystemProvider { Ok(purse_uref) } + /// Burns native tokens. + fn burn(&mut self, purse: URef, amount: U512) -> Result<(), Error> { + let purse_key = Key::URef(purse); + self.validate_writeable(&purse_key) + .map_err(|_| Error::ForgedReference)?; + self.validate_key(&purse_key) + .map_err(|_| Error::ForgedReference)?; + + let source_balance: U512 = match self.read_balance(purse)? { + Some(source_balance) => source_balance, + None => return Err(Error::PurseNotFound), + }; + + let new_balance = match source_balance.checked_sub(amount) { + Some(value) => value, + None => U512::zero(), + }; + + // source_balance is >= than new_balance + // this should block user from reducing totaly supply beyond what they own + let burned_amount = source_balance - new_balance; + + self.write_balance(purse, new_balance)?; + detail::reduce_total_supply_unsafe(self, burned_amount) + } + /// Reduce total supply by `amount`. Returns unit on success, otherwise /// an error. fn reduce_total_supply(&mut self, amount: U512) -> Result<(), Error> { @@ -63,29 +90,7 @@ pub trait Mint: RuntimeProvider + StorageProvider + SystemProvider { return Err(Error::InvalidTotalSupplyReductionAttempt); } - if amount.is_zero() { - return Ok(()); // no change to supply - } - - // get total supply or error - let total_supply_uref = match self.get_key(TOTAL_SUPPLY_KEY) { - Some(Key::URef(uref)) => uref, - Some(_) => return Err(Error::MissingKey), // TODO - None => return Err(Error::MissingKey), - }; - let total_supply: U512 = self - .read(total_supply_uref)? - .ok_or(Error::TotalSupplyNotFound)?; - - // decrease total supply - let reduced_total_supply = total_supply - .checked_sub(amount) - .ok_or(Error::ArithmeticOverflow)?; - - // update total supply - self.write(total_supply_uref, reduced_total_supply)?; - - Ok(()) + detail::reduce_total_supply_unsafe(self, amount) } /// Read balance of given `purse`. diff --git a/execution_engine/src/system/mint/detail.rs b/execution_engine/src/system/mint/detail.rs new file mode 100644 index 0000000000..60db175d56 --- /dev/null +++ b/execution_engine/src/system/mint/detail.rs @@ -0,0 +1,39 @@ +use casper_types::{ + system::{mint, mint::TOTAL_SUPPLY_KEY}, + Key, U512, +}; + +use crate::system::mint::{runtime_provider::RuntimeProvider, storage_provider::StorageProvider}; + +// Please do not expose this to the user! +pub(crate) fn reduce_total_supply_unsafe

( + auction: &mut P, + amount: U512, +) -> Result<(), mint::Error> +where + P: StorageProvider + RuntimeProvider + ?Sized, +{ + if amount.is_zero() { + return Ok(()); // no change to supply + } + + // get total supply or error + let total_supply_uref = match auction.get_key(TOTAL_SUPPLY_KEY) { + Some(Key::URef(uref)) => uref, + Some(_) => return Err(mint::Error::MissingKey), // TODO + None => return Err(mint::Error::MissingKey), + }; + let total_supply: U512 = auction + .read(total_supply_uref)? + .ok_or(mint::Error::TotalSupplyNotFound)?; + + // decrease total supply + let reduced_total_supply = total_supply + .checked_sub(amount) + .ok_or(mint::Error::ArithmeticOverflow)?; + + // update total supply + auction.write(total_supply_uref, reduced_total_supply)?; + + Ok(()) +} diff --git a/execution_engine/src/system/mint/runtime_provider.rs b/execution_engine/src/system/mint/runtime_provider.rs index 294af5ca4b..d649866d9b 100644 --- a/execution_engine/src/system/mint/runtime_provider.rs +++ b/execution_engine/src/system/mint/runtime_provider.rs @@ -48,4 +48,13 @@ pub trait RuntimeProvider { /// Checks if users can perform unrestricted transfers. This option is valid only for private /// chains. fn allow_unrestricted_transfers(&self) -> bool; + + /// Validates if a [`Key`] refers to a [`URef`] and has a write bit set. + fn validate_writeable(&self, key: &Key) -> Result<(), execution::Error>; + + /// Validates whether key is not forged (whether it can be found in the + /// `named_keys`) and whether the version of a key that contract wants + /// to use, has access rights that are less powerful than access rights' + /// of the key in the `named_keys`. + fn validate_key(&self, key: &Key) -> Result<(), execution::Error>; } diff --git a/execution_engine_testing/test_support/src/wasm_test_builder.rs b/execution_engine_testing/test_support/src/wasm_test_builder.rs index f1da45b5a6..47304a93b8 100644 --- a/execution_engine_testing/test_support/src/wasm_test_builder.rs +++ b/execution_engine_testing/test_support/src/wasm_test_builder.rs @@ -527,6 +527,26 @@ impl LmdbWasmTestBuilder { .expect("unable to run step request against scratch global state"); self } + /// Executes a request to call the system auction contract. + pub fn run_auction_with_scratch( + &mut self, + era_end_timestamp_millis: u64, + evicted_validators: Vec, + ) -> &mut Self { + let auction = self.get_auction_contract_hash(); + let run_request = ExecuteRequestBuilder::contract_call_by_hash( + *SYSTEM_ADDR, + auction, + METHOD_RUN_AUCTION, + runtime_args! { + ARG_ERA_END_TIMESTAMP_MILLIS => era_end_timestamp_millis, + ARG_EVICTED_VALIDATORS => evicted_validators, + }, + ) + .build(); + self.scratch_exec_and_commit(run_request).expect_success(); + self + } } impl WasmTestBuilder diff --git a/execution_engine_testing/tests/src/test/regression/ee_1119.rs b/execution_engine_testing/tests/src/test/regression/ee_1119.rs index 2c1dce3c68..561fa9116e 100644 --- a/execution_engine_testing/tests/src/test/regression/ee_1119.rs +++ b/execution_engine_testing/tests/src/test/regression/ee_1119.rs @@ -233,11 +233,11 @@ fn should_run_ee_1119_dont_slash_delegated_validators() { builder.exec(slash_request_2).expect_success().commit(); let unbond_purses: UnbondingPurses = builder.get_unbonds(); - assert_eq!(unbond_purses.len(), 1); + assert!(unbond_purses.is_empty()); assert!(!unbond_purses.contains_key(&*DEFAULT_ACCOUNT_ADDR)); - assert!(unbond_purses.get(&VALIDATOR_1_ADDR).unwrap().is_empty()); + assert!(!unbond_purses.contains_key(&VALIDATOR_1_ADDR)); let bids: Bids = builder.get_bids(); let validator_1_bid = bids.get(&VALIDATOR_1).unwrap(); diff --git a/execution_engine_testing/tests/src/test/regression/ee_1120.rs b/execution_engine_testing/tests/src/test/regression/ee_1120.rs index a69fe33b3e..3343e289ad 100644 --- a/execution_engine_testing/tests/src/test/regression/ee_1120.rs +++ b/execution_engine_testing/tests/src/test/regression/ee_1120.rs @@ -4,7 +4,7 @@ use num_traits::Zero; use once_cell::sync::Lazy; use casper_engine_test_support::{ - utils, ExecuteRequestBuilder, InMemoryWasmTestBuilder, DEFAULT_ACCOUNTS, DEFAULT_ACCOUNT_ADDR, + utils, ExecuteRequestBuilder, LmdbWasmTestBuilder, DEFAULT_ACCOUNTS, DEFAULT_ACCOUNT_ADDR, DEFAULT_ACCOUNT_INITIAL_BALANCE, MINIMUM_ACCOUNT_CREATION_BALANCE, SYSTEM_ADDR, }; use casper_execution_engine::core::engine_state::{ @@ -84,7 +84,8 @@ fn should_run_ee_1120_slash_delegators() { }; let run_genesis_request = utils::create_run_genesis_request(accounts); - let mut builder = InMemoryWasmTestBuilder::default(); + let tempdir = tempfile::tempdir().unwrap(); + let mut builder = LmdbWasmTestBuilder::new_with_production_chainspec(tempdir.path()); builder.run_genesis(&run_genesis_request); let transfer_request_1 = ExecuteRequestBuilder::standard( @@ -97,7 +98,10 @@ fn should_run_ee_1120_slash_delegators() { ) .build(); - builder.exec(transfer_request_1).expect_success().commit(); + builder + .scratch_exec_and_commit(transfer_request_1) + .expect_success(); + builder.write_scratch_to_db(); let transfer_request_2 = ExecuteRequestBuilder::standard( *DEFAULT_ACCOUNT_ADDR, @@ -109,7 +113,11 @@ fn should_run_ee_1120_slash_delegators() { ) .build(); - builder.exec(transfer_request_2).expect_success().commit(); + builder + .scratch_exec_and_commit(transfer_request_2) + .expect_success() + .commit(); + builder.write_scratch_to_db(); let auction = builder.get_auction_contract_hash(); @@ -149,19 +157,16 @@ fn should_run_ee_1120_slash_delegators() { .build(); builder - .exec(delegate_exec_request_1) - .expect_success() - .commit(); + .scratch_exec_and_commit(delegate_exec_request_1) + .expect_success(); builder - .exec(delegate_exec_request_2) - .expect_success() - .commit(); + .scratch_exec_and_commit(delegate_exec_request_2) + .expect_success(); builder - .exec(delegate_exec_request_3) - .expect_success() - .commit(); + .scratch_exec_and_commit(delegate_exec_request_3) + .expect_success(); // Ensure that initial bid entries exist for validator 1 and validator 2 let initial_bids: Bids = builder.get_bids(); @@ -209,10 +214,18 @@ fn should_run_ee_1120_slash_delegators() { ) .build(); - builder.exec(undelegate_request_1).commit().expect_success(); - builder.exec(undelegate_request_2).commit().expect_success(); - builder.exec(undelegate_request_3).commit().expect_success(); - + builder + .scratch_exec_and_commit(undelegate_request_1) + .expect_success(); + builder.write_scratch_to_db(); + builder + .scratch_exec_and_commit(undelegate_request_2) + .expect_success(); + builder.write_scratch_to_db(); + builder + .scratch_exec_and_commit(undelegate_request_3) + .expect_success(); + builder.write_scratch_to_db(); // Check unbonding purses before slashing let unbond_purses_before: UnbondingPurses = builder.get_unbonds(); @@ -289,7 +302,10 @@ fn should_run_ee_1120_slash_delegators() { ) .build(); - builder.exec(slash_request_1).expect_success().commit(); + builder + .scratch_exec_and_commit(slash_request_1) + .expect_success(); + builder.write_scratch_to_db(); // Compare bids after slashing validator 2 let bids_after: Bids = builder.get_bids(); @@ -346,7 +362,8 @@ fn should_run_ee_1120_slash_delegators() { ) .build(); - builder.exec(slash_request_2).expect_success().commit(); + builder.scratch_exec_and_commit(slash_request_2); + builder.write_scratch_to_db(); let bids_after: Bids = builder.get_bids(); assert_eq!(bids_after.len(), 2); @@ -355,12 +372,6 @@ fn should_run_ee_1120_slash_delegators() { assert!(validator_1_bid.staked_amount().is_zero()); let unbond_purses_after: UnbondingPurses = builder.get_unbonds(); - assert!(unbond_purses_after - .get(&VALIDATOR_1_ADDR) - .unwrap() - .is_empty()); - assert!(unbond_purses_after - .get(&VALIDATOR_2_ADDR) - .unwrap() - .is_empty()); + assert!(!unbond_purses_after.contains_key(&VALIDATOR_1_ADDR)); + assert!(!unbond_purses_after.contains_key(&VALIDATOR_2_ADDR)); } diff --git a/execution_engine_testing/tests/src/test/regression/gov_116.rs b/execution_engine_testing/tests/src/test/regression/gov_116.rs index a86303f32b..0e5eb26a08 100644 --- a/execution_engine_testing/tests/src/test/regression/gov_116.rs +++ b/execution_engine_testing/tests/src/test/regression/gov_116.rs @@ -241,6 +241,7 @@ fn should_not_retain_genesis_validator_slot_protection_after_vesting_period_elap #[ignore] #[test] +#[allow(deprecated)] fn should_retain_genesis_validator_slot_protection() { const CASPER_VESTING_SCHEDULE_PERIOD_MILLIS: u64 = 91 * DAY_MILLIS; const CASPER_LOCKED_FUNDS_PERIOD_MILLIS: u64 = 90 * DAY_MILLIS; @@ -347,7 +348,11 @@ fn should_retain_genesis_validator_slot_protection() { pks }; assert_eq!( - next_validator_set_4, expected_validators, - "actual next validator set does not match expected validator set" + next_validator_set_4, + expected_validators, + "actual next validator set does not match expected validator set (diff {:?})", + expected_validators + .difference(&next_validator_set_4) + .collect::>(), ); } diff --git a/execution_engine_testing/tests/src/test/system_contracts/auction/bids.rs b/execution_engine_testing/tests/src/test/system_contracts/auction/bids.rs index 9423b9238f..5c53344904 100644 --- a/execution_engine_testing/tests/src/test/system_contracts/auction/bids.rs +++ b/execution_engine_testing/tests/src/test/system_contracts/auction/bids.rs @@ -5,10 +5,11 @@ use num_traits::{One, Zero}; use once_cell::sync::Lazy; use casper_engine_test_support::{ - utils, ExecuteRequestBuilder, InMemoryWasmTestBuilder, StepRequestBuilder, DEFAULT_ACCOUNTS, + ExecuteRequestBuilder, InMemoryWasmTestBuilder, StepRequestBuilder, DEFAULT_ACCOUNTS, DEFAULT_ACCOUNT_ADDR, DEFAULT_ACCOUNT_INITIAL_BALANCE, DEFAULT_CHAINSPEC_REGISTRY, DEFAULT_EXEC_CONFIG, DEFAULT_GENESIS_CONFIG_HASH, DEFAULT_GENESIS_TIMESTAMP_MILLIS, - DEFAULT_LOCKED_FUNDS_PERIOD_MILLIS, DEFAULT_PROTOCOL_VERSION, DEFAULT_UNBONDING_DELAY, + DEFAULT_MAX_ASSOCIATED_KEYS, DEFAULT_MAX_RUNTIME_CALL_STACK_HEIGHT, DEFAULT_PROTOCOL_VERSION, + DEFAULT_SYSTEM_CONFIG, DEFAULT_UNBONDING_DELAY, DEFAULT_WASM_CONFIG, MINIMUM_ACCOUNT_CREATION_BALANCE, PRODUCTION_RUN_GENESIS_REQUEST, SYSTEM_ADDR, TIMESTAMP_MILLIS_INCREMENT, }; @@ -16,18 +17,21 @@ use casper_execution_engine::{ core::{ engine_state::{ self, - engine_config::DEFAULT_MINIMUM_DELEGATION_AMOUNT, - genesis::{ExecConfigBuilder, GenesisAccount, GenesisValidator}, + engine_config::{DEFAULT_MINIMUM_DELEGATION_AMOUNT, DEFAULT_STRICT_ARGUMENT_CHECKING}, + genesis::{ + ExecConfigBuilder, GenesisAccount, GenesisValidator, DEFAULT_AUCTION_DELAY, + DEFAULT_ROUND_SEIGNIORAGE_RATE, DEFAULT_VALIDATOR_SLOTS, + }, run_genesis_request::RunGenesisRequest, step::EvictItem, - EngineConfigBuilder, Error, RewardItem, + EngineConfig, EngineConfigBuilder, Error, ExecConfig, RewardItem, + DEFAULT_MAX_QUERY_DEPTH, }, execution, }, storage::global_state::in_memory::InMemoryGlobalState, }; use casper_types::{ - self, account::AccountHash, api_error::ApiError, runtime_args, @@ -160,6 +164,59 @@ const DAY_MILLIS: u64 = 24 * 60 * 60 * 1000; const CASPER_VESTING_SCHEDULE_PERIOD_MILLIS: u64 = 91 * DAY_MILLIS; const CASPER_LOCKED_FUNDS_PERIOD_MILLIS: u64 = 90 * DAY_MILLIS; +#[allow(deprecated)] +fn setup(accounts: Vec) -> InMemoryWasmTestBuilder { + let engine_config = EngineConfig::new( + DEFAULT_MAX_QUERY_DEPTH, + DEFAULT_MAX_ASSOCIATED_KEYS, + DEFAULT_MAX_RUNTIME_CALL_STACK_HEIGHT, + DEFAULT_MINIMUM_DELEGATION_AMOUNT, + DEFAULT_STRICT_ARGUMENT_CHECKING, + CASPER_VESTING_SCHEDULE_PERIOD_MILLIS, + None, + *DEFAULT_WASM_CONFIG, + *DEFAULT_SYSTEM_CONFIG, + ); + + let run_genesis_request = { + let exec_config = { + let wasm_config = *DEFAULT_WASM_CONFIG; + let system_config = *DEFAULT_SYSTEM_CONFIG; + let validator_slots = DEFAULT_VALIDATOR_SLOTS; + let auction_delay = DEFAULT_AUCTION_DELAY; + let locked_funds_period_millis = CASPER_LOCKED_FUNDS_PERIOD_MILLIS; + let round_seigniorage_rate = DEFAULT_ROUND_SEIGNIORAGE_RATE; + let unbonding_delay = DEFAULT_UNBONDING_DELAY; + let genesis_timestamp_millis = DEFAULT_GENESIS_TIMESTAMP_MILLIS; + #[allow(deprecated)] + ExecConfig::new( + accounts, + wasm_config, + system_config, + validator_slots, + auction_delay, + locked_funds_period_millis, + round_seigniorage_rate, + unbonding_delay, + genesis_timestamp_millis, + ) + }; + + RunGenesisRequest::new( + *DEFAULT_GENESIS_CONFIG_HASH, + *DEFAULT_PROTOCOL_VERSION, + exec_config, + DEFAULT_CHAINSPEC_REGISTRY.clone(), + ) + }; + + let mut builder = InMemoryWasmTestBuilder::new_with_config(engine_config); + + builder.run_genesis(&run_genesis_request); + + builder +} + #[ignore] #[test] fn should_add_new_bid() { @@ -174,11 +231,7 @@ fn should_add_new_bid() { tmp }; - let run_genesis_request = utils::create_run_genesis_request(accounts); - - let mut builder = InMemoryWasmTestBuilder::default(); - - builder.run_genesis(&run_genesis_request); + let mut builder = setup(accounts); let exec_request_1 = ExecuteRequestBuilder::standard( *BID_ACCOUNT_1_ADDR, @@ -218,11 +271,7 @@ fn should_increase_existing_bid() { tmp }; - let run_genesis_request = utils::create_run_genesis_request(accounts); - - let mut builder = InMemoryWasmTestBuilder::default(); - - builder.run_genesis(&run_genesis_request); + let mut builder = setup(accounts); let exec_request_1 = ExecuteRequestBuilder::standard( *BID_ACCOUNT_1_ADDR, @@ -277,11 +326,7 @@ fn should_decrease_existing_bid() { tmp }; - let run_genesis_request = utils::create_run_genesis_request(accounts); - - let mut builder = InMemoryWasmTestBuilder::default(); - - builder.run_genesis(&run_genesis_request); + let mut builder = setup(accounts); let bid_request = ExecuteRequestBuilder::standard( *BID_ACCOUNT_1_ADDR, @@ -345,11 +390,7 @@ fn should_run_delegate_and_undelegate() { tmp }; - let run_genesis_request = utils::create_run_genesis_request(accounts); - - let mut builder = InMemoryWasmTestBuilder::default(); - - builder.run_genesis(&run_genesis_request); + let mut builder = setup(accounts); let transfer_request_1 = ExecuteRequestBuilder::standard( *DEFAULT_ACCOUNT_ADDR, @@ -525,11 +566,7 @@ fn should_calculate_era_validators() { tmp }; - let run_genesis_request = utils::create_run_genesis_request(accounts); - - let mut builder = InMemoryWasmTestBuilder::default(); - - builder.run_genesis(&run_genesis_request); + let mut builder = setup(accounts); let transfer_request_1 = ExecuteRequestBuilder::standard( *DEFAULT_ACCOUNT_ADDR, @@ -587,7 +624,7 @@ fn should_calculate_era_validators() { assert_eq!(pre_era_id, EraId::from(0)); builder.run_auction( - DEFAULT_GENESIS_TIMESTAMP_MILLIS + DEFAULT_LOCKED_FUNDS_PERIOD_MILLIS, + DEFAULT_GENESIS_TIMESTAMP_MILLIS + CASPER_LOCKED_FUNDS_PERIOD_MILLIS, Vec::new(), ); @@ -1004,11 +1041,7 @@ fn should_fail_to_get_era_validators() { tmp }; - let run_genesis_request = utils::create_run_genesis_request(accounts); - - let mut builder = InMemoryWasmTestBuilder::default(); - - builder.run_genesis(&run_genesis_request); + let mut builder = setup(accounts); assert_eq!( builder.get_validator_weights(EraId::MAX), @@ -1035,11 +1068,7 @@ fn should_use_era_validators_endpoint_for_first_era() { tmp }; - let run_genesis_request = utils::create_run_genesis_request(accounts); - - let mut builder = InMemoryWasmTestBuilder::default(); - - builder.run_genesis(&run_genesis_request); + let mut builder = setup(accounts); let validator_weights = builder .get_validator_weights(INITIAL_ERA_ID) @@ -1093,11 +1122,7 @@ fn should_calculate_era_validators_multiple_new_bids() { tmp }; - let run_genesis_request = utils::create_run_genesis_request(accounts); - - let mut builder = InMemoryWasmTestBuilder::default(); - - builder.run_genesis(&run_genesis_request); + let mut builder = setup(accounts); let genesis_validator_weights = builder .get_validator_weights(INITIAL_ERA_ID) @@ -1164,7 +1189,7 @@ fn should_calculate_era_validators_multiple_new_bids() { // run auction and compute validators for new era builder.run_auction( - DEFAULT_GENESIS_TIMESTAMP_MILLIS + DEFAULT_LOCKED_FUNDS_PERIOD_MILLIS, + DEFAULT_GENESIS_TIMESTAMP_MILLIS + CASPER_LOCKED_FUNDS_PERIOD_MILLIS, Vec::new(), ); // Verify first era validators @@ -1261,12 +1286,9 @@ fn undelegated_funds_should_be_released() { delegator_1_validator_1_delegate_request, ]; - let mut timestamp_millis = - DEFAULT_GENESIS_TIMESTAMP_MILLIS + DEFAULT_LOCKED_FUNDS_PERIOD_MILLIS; + let mut timestamp_millis = DEFAULT_GENESIS_TIMESTAMP_MILLIS + CASPER_LOCKED_FUNDS_PERIOD_MILLIS; - let mut builder = InMemoryWasmTestBuilder::default(); - - builder.run_genesis(&PRODUCTION_RUN_GENESIS_REQUEST); + let mut builder = setup(DEFAULT_ACCOUNTS.clone()); for request in post_genesis_requests { builder.exec(request).commit().expect_success(); @@ -1387,12 +1409,9 @@ fn fully_undelegated_funds_should_be_released() { delegator_1_validator_1_delegate_request, ]; - let mut timestamp_millis = - DEFAULT_GENESIS_TIMESTAMP_MILLIS + DEFAULT_LOCKED_FUNDS_PERIOD_MILLIS; - - let mut builder = InMemoryWasmTestBuilder::default(); + let mut timestamp_millis = DEFAULT_GENESIS_TIMESTAMP_MILLIS + CASPER_LOCKED_FUNDS_PERIOD_MILLIS; - builder.run_genesis(&PRODUCTION_RUN_GENESIS_REQUEST); + let mut builder = setup(DEFAULT_ACCOUNTS.clone()); for request in post_genesis_requests { builder.exec(request).commit().expect_success(); @@ -1548,12 +1567,9 @@ fn should_undelegate_delegators_when_validator_unbonds() { validator_1_partial_withdraw_bid, ]; - let mut timestamp_millis = - DEFAULT_GENESIS_TIMESTAMP_MILLIS + DEFAULT_LOCKED_FUNDS_PERIOD_MILLIS; - - let mut builder = InMemoryWasmTestBuilder::default(); + let mut timestamp_millis = DEFAULT_GENESIS_TIMESTAMP_MILLIS + CASPER_LOCKED_FUNDS_PERIOD_MILLIS; - builder.run_genesis(&PRODUCTION_RUN_GENESIS_REQUEST); + let mut builder = setup(DEFAULT_ACCOUNTS.clone()); for request in post_genesis_requests { builder.exec(request).commit().expect_success(); @@ -1785,12 +1801,9 @@ fn should_undelegate_delegators_when_validator_fully_unbonds() { delegator_2_delegate_request, ]; - let mut timestamp_millis = - DEFAULT_GENESIS_TIMESTAMP_MILLIS + DEFAULT_LOCKED_FUNDS_PERIOD_MILLIS; + let mut timestamp_millis = DEFAULT_GENESIS_TIMESTAMP_MILLIS + CASPER_LOCKED_FUNDS_PERIOD_MILLIS; - let mut builder = InMemoryWasmTestBuilder::default(); - - builder.run_genesis(&PRODUCTION_RUN_GENESIS_REQUEST); + let mut builder = setup(DEFAULT_ACCOUNTS.clone()); for request in post_genesis_requests { builder.exec(request).commit().expect_success(); @@ -1970,11 +1983,7 @@ fn should_handle_evictions() { let mut timestamp = DEFAULT_GENESIS_TIMESTAMP_MILLIS; - let run_genesis_request = utils::create_run_genesis_request(accounts); - - let mut builder = InMemoryWasmTestBuilder::default(); - - builder.run_genesis(&run_genesis_request); + let mut builder = setup(accounts); builder.exec(system_fund_request).commit().expect_success(); @@ -2113,11 +2122,7 @@ fn should_validate_orphaned_genesis_delegators() { tmp }; - let run_genesis_request = utils::create_run_genesis_request(accounts); - - let mut builder = InMemoryWasmTestBuilder::default(); - - builder.run_genesis(&run_genesis_request); + let _builder = setup(accounts); } #[should_panic(expected = "DuplicatedDelegatorEntry")] @@ -2168,11 +2173,7 @@ fn should_validate_duplicated_genesis_delegators() { tmp }; - let run_genesis_request = utils::create_run_genesis_request(accounts); - - let mut builder = InMemoryWasmTestBuilder::default(); - - builder.run_genesis(&run_genesis_request); + let _builder = setup(accounts); } #[should_panic(expected = "InvalidDelegationRate")] @@ -2193,11 +2194,7 @@ fn should_validate_delegation_rate_of_genesis_validator() { tmp }; - let run_genesis_request = utils::create_run_genesis_request(accounts); - - let mut builder = InMemoryWasmTestBuilder::default(); - - builder.run_genesis(&run_genesis_request); + let _builder = setup(accounts); } #[should_panic(expected = "InvalidBondAmount")] @@ -2215,11 +2212,7 @@ fn should_validate_bond_amount_of_genesis_validator() { tmp }; - let run_genesis_request = utils::create_run_genesis_request(accounts); - - let mut builder = InMemoryWasmTestBuilder::default(); - - builder.run_genesis(&run_genesis_request); + let _builder = setup(accounts); } #[ignore] @@ -2252,11 +2245,7 @@ fn should_setup_genesis_delegators() { tmp }; - let run_genesis_request = utils::create_run_genesis_request(accounts); - - let mut builder = InMemoryWasmTestBuilder::default(); - - builder.run_genesis(&run_genesis_request); + let mut builder = setup(accounts); let _account_1 = builder .get_account(*ACCOUNT_1_ADDR) @@ -2317,11 +2306,7 @@ fn should_not_partially_undelegate_uninitialized_vesting_schedule() { tmp }; - let run_genesis_request = utils::create_run_genesis_request(accounts); - - let mut builder = InMemoryWasmTestBuilder::default(); - - builder.run_genesis(&run_genesis_request); + let mut builder = setup(accounts); let fund_delegator_account = ExecuteRequestBuilder::standard( *DEFAULT_ACCOUNT_ADDR, @@ -2391,11 +2376,7 @@ fn should_not_fully_undelegate_uninitialized_vesting_schedule() { tmp }; - let run_genesis_request = utils::create_run_genesis_request(accounts); - - let mut builder = InMemoryWasmTestBuilder::default(); - - builder.run_genesis(&run_genesis_request); + let mut builder = setup(accounts); let fund_delegator_account = ExecuteRequestBuilder::standard( *DEFAULT_ACCOUNT_ADDR, @@ -2964,9 +2945,7 @@ fn should_reset_delegators_stake_after_slashing() { delegator_2_validator_2_delegate_request, ]; - let mut builder = InMemoryWasmTestBuilder::default(); - - builder.run_genesis(&PRODUCTION_RUN_GENESIS_REQUEST); + let mut builder = setup(DEFAULT_ACCOUNTS.clone()); for request in post_genesis_requests { builder.exec(request).expect_success().commit(); @@ -3115,11 +3094,7 @@ fn should_validate_genesis_delegators_bond_amount() { tmp }; - let run_genesis_request = utils::create_run_genesis_request(accounts); - - let mut builder = InMemoryWasmTestBuilder::default(); - - builder.run_genesis(&run_genesis_request); + let _builder = setup(accounts); } fn check_validator_slots_for_accounts(accounts: usize) { @@ -3149,11 +3124,7 @@ fn check_validator_slots_for_accounts(accounts: usize) { tmp }; - let run_genesis_request = utils::create_run_genesis_request(accounts); - - let mut builder = InMemoryWasmTestBuilder::default(); - - builder.run_genesis(&run_genesis_request); + let _builder = setup(accounts); } #[should_panic(expected = "InvalidValidatorSlots")] @@ -3255,9 +3226,7 @@ fn should_delegate_and_redelegate() { delegator_1_validator_1_delegate_request, ]; - let mut builder = InMemoryWasmTestBuilder::default(); - - builder.run_genesis(&PRODUCTION_RUN_GENESIS_REQUEST); + let mut builder = setup(DEFAULT_ACCOUNTS.clone()); for request in post_genesis_requests { builder.exec(request).commit().expect_success(); @@ -3438,9 +3407,7 @@ fn should_handle_redelegation_to_inactive_validator() { delegator_2_validator_1_delegate_request, ]; - let mut builder = InMemoryWasmTestBuilder::default(); - - builder.run_genesis(&PRODUCTION_RUN_GENESIS_REQUEST); + let mut builder = setup(DEFAULT_ACCOUNTS.clone()); for request in post_genesis_requests { builder.exec(request).commit().expect_success(); @@ -3535,9 +3502,7 @@ fn should_handle_redelegation_to_inactive_validator() { #[ignore] #[test] fn should_enforce_minimum_delegation_amount() { - let mut builder = InMemoryWasmTestBuilder::default(); - - builder.run_genesis(&PRODUCTION_RUN_GENESIS_REQUEST); + let mut builder = setup(DEFAULT_ACCOUNTS.clone()); let transfer_to_validator_1 = ExecuteRequestBuilder::standard( *DEFAULT_ACCOUNT_ADDR, @@ -3616,9 +3581,7 @@ fn should_enforce_minimum_delegation_amount() { #[ignore] #[test] fn should_allow_delegations_with_minimal_floor_amount() { - let mut builder = InMemoryWasmTestBuilder::default(); - - builder.run_genesis(&PRODUCTION_RUN_GENESIS_REQUEST); + let mut builder = setup(DEFAULT_ACCOUNTS.clone()); let transfer_to_validator_1 = ExecuteRequestBuilder::standard( *DEFAULT_ACCOUNT_ADDR, diff --git a/execution_engine_testing/tests/src/test/system_contracts/auction_bidding.rs b/execution_engine_testing/tests/src/test/system_contracts/auction_bidding.rs index e669a7d875..edbd548816 100644 --- a/execution_engine_testing/tests/src/test/system_contracts/auction_bidding.rs +++ b/execution_engine_testing/tests/src/test/system_contracts/auction_bidding.rs @@ -1,11 +1,12 @@ use num_traits::Zero; use casper_engine_test_support::{ - utils, ExecuteRequestBuilder, InMemoryWasmTestBuilder, UpgradeRequestBuilder, DEFAULT_ACCOUNTS, - DEFAULT_ACCOUNT_ADDR, DEFAULT_ACCOUNT_PUBLIC_KEY, DEFAULT_GENESIS_TIMESTAMP_MILLIS, - DEFAULT_LOCKED_FUNDS_PERIOD_MILLIS, DEFAULT_PAYMENT, DEFAULT_PROPOSER_PUBLIC_KEY, - DEFAULT_PROTOCOL_VERSION, DEFAULT_UNBONDING_DELAY, MINIMUM_ACCOUNT_CREATION_BALANCE, - PRODUCTION_RUN_GENESIS_REQUEST, SYSTEM_ADDR, TIMESTAMP_MILLIS_INCREMENT, + utils, ExecuteRequestBuilder, InMemoryWasmTestBuilder, LmdbWasmTestBuilder, StepRequestBuilder, + UpgradeRequestBuilder, DEFAULT_ACCOUNTS, DEFAULT_ACCOUNT_ADDR, DEFAULT_ACCOUNT_PUBLIC_KEY, + DEFAULT_GENESIS_TIMESTAMP_MILLIS, DEFAULT_LOCKED_FUNDS_PERIOD_MILLIS, DEFAULT_PAYMENT, + DEFAULT_PROPOSER_PUBLIC_KEY, DEFAULT_PROTOCOL_VERSION, DEFAULT_UNBONDING_DELAY, + MINIMUM_ACCOUNT_CREATION_BALANCE, PRODUCTION_RUN_GENESIS_REQUEST, SYSTEM_ADDR, + TIMESTAMP_MILLIS_INCREMENT, }; use casper_execution_engine::core::{ engine_state::{ @@ -181,10 +182,7 @@ fn should_run_successful_bond_and_unbond_and_slashing() { builder.exec(exec_request_5).expect_success().commit(); let unbond_purses: UnbondingPurses = builder.get_unbonds(); - assert!(unbond_purses - .get(&*DEFAULT_ACCOUNT_ADDR) - .unwrap() - .is_empty()); + assert!(!unbond_purses.contains_key(&*DEFAULT_ACCOUNT_ADDR)); let bids: Bids = builder.get_bids(); let default_account_bid = bids.get(&DEFAULT_ACCOUNT_PUBLIC_KEY).unwrap(); @@ -540,10 +538,189 @@ fn should_run_successful_bond_and_unbond_with_release() { ); let unbond_purses: UnbondingPurses = builder.get_unbonds(); - assert!(unbond_purses + assert!(!unbond_purses.contains_key(&*DEFAULT_ACCOUNT_ADDR)); + + let bids: Bids = builder.get_bids(); + assert!(!bids.is_empty()); + + let bid = bids.get(&default_public_key_arg).expect("should have bid"); + let bid_purse = *bid.bonding_purse(); + assert_eq!( + builder.get_purse_balance(bid_purse), + U512::from(GENESIS_ACCOUNT_STAKE) - unbond_amount, // remaining funds + ); +} + +#[ignore] +#[test] +fn should_run_successful_bond_and_unbond_with_release_on_lmdb() { + let default_public_key_arg = DEFAULT_ACCOUNT_PUBLIC_KEY.clone(); + + let tempdir = tempfile::tempdir().expect("should create tempdir"); + + let mut builder = LmdbWasmTestBuilder::new_with_production_chainspec(tempdir.path()); + builder.run_genesis(&PRODUCTION_RUN_GENESIS_REQUEST); + + let default_account = builder + .get_account(*DEFAULT_ACCOUNT_ADDR) + .expect("should have default account"); + + let unbonding_purse = default_account.main_purse(); + + let exec_request = ExecuteRequestBuilder::standard( + *DEFAULT_ACCOUNT_ADDR, + CONTRACT_TRANSFER_TO_ACCOUNT, + runtime_args! { + "target" => *SYSTEM_ADDR, + "amount" => U512::from(TRANSFER_AMOUNT* 2) + }, + ) + .build(); + + builder + .scratch_exec_and_commit(exec_request) + .expect_success(); + builder.write_scratch_to_db(); + + let _system_account = builder + .get_account(*SYSTEM_ADDR) + .expect("should get account 1"); + + let _default_account = builder + .get_account(*DEFAULT_ACCOUNT_ADDR) + .expect("should get account 1"); + + let exec_request_1 = ExecuteRequestBuilder::standard( + *DEFAULT_ACCOUNT_ADDR, + CONTRACT_ADD_BID, + runtime_args! { + ARG_AMOUNT => U512::from(GENESIS_ACCOUNT_STAKE), + ARG_PUBLIC_KEY => default_public_key_arg.clone(), + ARG_DELEGATION_RATE => DELEGATION_RATE, + }, + ) + .build(); + + builder + .scratch_exec_and_commit(exec_request_1) + .expect_success(); + builder.write_scratch_to_db(); + + let bids: Bids = builder.get_bids(); + let bid = bids.get(&default_public_key_arg).expect("should have bid"); + let bid_purse = *bid.bonding_purse(); + assert_eq!( + builder.get_purse_balance(bid_purse), + GENESIS_ACCOUNT_STAKE.into() + ); + + let unbond_purses: UnbondingPurses = builder.get_unbonds(); + assert_eq!(unbond_purses.len(), 0); + + // + // Advance era by calling run_auction + // + let step_request = StepRequestBuilder::new() + .with_parent_state_hash(builder.get_post_state_hash()) + .with_protocol_version(ProtocolVersion::V1_0_0) + .with_next_era_id(builder.get_era().successor()) + .with_run_auction(true) + .build(); + + builder.step_with_scratch(step_request); + + builder.write_scratch_to_db(); + + // + // Partial unbond + // + + let unbond_amount = U512::from(GENESIS_ACCOUNT_STAKE) - 1; + + let exec_request_2 = ExecuteRequestBuilder::standard( + *DEFAULT_ACCOUNT_ADDR, + CONTRACT_WITHDRAW_BID, + runtime_args! { + ARG_AMOUNT => unbond_amount, + ARG_PUBLIC_KEY => default_public_key_arg.clone(), + }, + ) + .build(); + + builder + .scratch_exec_and_commit(exec_request_2) + .expect_success(); + + builder.write_scratch_to_db(); + + let unbond_purses: UnbondingPurses = builder.get_unbonds(); + assert_eq!(unbond_purses.len(), 1); + + let unbond_list = unbond_purses .get(&*DEFAULT_ACCOUNT_ADDR) - .unwrap() - .is_empty()); + .expect("should have unbond"); + assert_eq!(unbond_list.len(), 1); + assert_eq!( + unbond_list[0].validator_public_key(), + &default_public_key_arg, + ); + assert!(unbond_list[0].is_validator()); + + assert_eq!(unbond_list[0].era_of_creation(), INITIAL_ERA_ID + 1); + + let unbond_era_1 = unbond_list[0].era_of_creation(); + + let account_balance_before_auction = builder.get_purse_balance(unbonding_purse); + + let unbond_purses: UnbondingPurses = builder.get_unbonds(); + assert_eq!(unbond_purses.len(), 1); + + let unbond_list = unbond_purses + .get(&DEFAULT_ACCOUNT_ADDR) + .expect("should have unbond"); + assert_eq!(unbond_list.len(), 1); + assert_eq!( + unbond_list[0].validator_public_key(), + &default_public_key_arg, + ); + assert!(unbond_list[0].is_validator()); + + assert_eq!( + builder.get_purse_balance(unbonding_purse), + account_balance_before_auction, // Not paid yet + ); + + let unbond_era_2 = unbond_list[0].era_of_creation(); + + assert_eq!(unbond_era_2, unbond_era_1); // era of withdrawal didn't change since first run + + let era_id_before = builder.get_era(); + // + // Advance state to hit the unbonding period + // + for _ in 0..=builder.get_unbonding_delay() { + let step_request = StepRequestBuilder::new() + .with_parent_state_hash(builder.get_post_state_hash()) + .with_protocol_version(ProtocolVersion::V1_0_0) + .with_next_era_id(builder.get_era().successor()) + .with_run_auction(true) + .build(); + + builder.step_with_scratch(step_request); + + builder.write_scratch_to_db(); + } + + let era_id_after = builder.get_era(); + + assert_ne!(era_id_before, era_id_after); + + let unbond_purses: UnbondingPurses = builder.get_unbonds(); + assert!( + !unbond_purses.contains_key(&*DEFAULT_ACCOUNT_ADDR), + "{:?}", + unbond_purses + ); let bids: Bids = builder.get_bids(); assert!(!bids.is_empty()); @@ -732,10 +909,7 @@ fn should_run_successful_unbond_funds_after_changing_unbonding_delay() { ); let unbond_purses: UnbondingPurses = builder.get_unbonds(); - assert!(unbond_purses - .get(&*DEFAULT_ACCOUNT_ADDR) - .unwrap() - .is_empty()); + assert!(!unbond_purses.contains_key(&*DEFAULT_ACCOUNT_ADDR)); let bids: Bids = builder.get_bids(); assert!(!bids.is_empty()); diff --git a/execution_engine_testing/tests/src/test/system_contracts/mint.rs b/execution_engine_testing/tests/src/test/system_contracts/mint.rs new file mode 100644 index 0000000000..0a92e1562b --- /dev/null +++ b/execution_engine_testing/tests/src/test/system_contracts/mint.rs @@ -0,0 +1,206 @@ +use casper_engine_test_support::{ + auction, ExecuteRequestBuilder, LmdbWasmTestBuilder, DEFAULT_ACCOUNT_ADDR, +}; +use casper_types::{runtime_args, RuntimeArgs, URef, U512}; + +use tempfile::TempDir; + +const TEST_DELEGATOR_INITIAL_ACCOUNT_BALANCE: u64 = 1_000_000 * 1_000_000_000; + +const CONTRACT_BURN: &str = "burn.wasm"; +const CONTRACT_TRANSFER_TO_NAMED_PURSE: &str = "transfer_to_named_purse.wasm"; + +const ARG_AMOUNT: &str = "amount"; + +const ARG_PURSE_NAME: &str = "purse_name"; + +#[ignore] +#[test] +fn should_empty_purse_when_burning_above_balance() { + let data_dir = TempDir::new().expect("should create temp dir"); + let mut builder = LmdbWasmTestBuilder::new(data_dir.as_ref()); + let source = *DEFAULT_ACCOUNT_ADDR; + + let delegator_keys = auction::generate_public_keys(1); + let validator_keys = auction::generate_public_keys(1); + + auction::run_genesis_and_create_initial_accounts( + &mut builder, + &validator_keys, + delegator_keys + .iter() + .map(|public_key| public_key.to_account_hash()) + .collect::>(), + U512::from(TEST_DELEGATOR_INITIAL_ACCOUNT_BALANCE), + ); + + let initial_supply = builder.total_supply(None); + let purse_name = "purse"; + let purse_amount = U512::from(10_000_000_000u64); + + // Create purse and transfer tokens to it + let exec_request = ExecuteRequestBuilder::standard( + source, + CONTRACT_TRANSFER_TO_NAMED_PURSE, + runtime_args! { + ARG_PURSE_NAME => purse_name, + ARG_AMOUNT => purse_amount, + }, + ) + .build(); + + builder.exec(exec_request).expect_success().commit(); + + let account = builder.get_account(source).expect("should have account"); + + let purse_uref: URef = account.named_keys()[purse_name] + .into_uref() + .expect("should be uref"); + + assert_eq!( + builder + .get_purse_balance_result(purse_uref) + .motes() + .cloned() + .unwrap(), + purse_amount + ); + + // Burn part of tokens in a purse + let num_of_tokens_to_burn = U512::from(2_000_000_000u64); + let num_of_tokens_after_burn = U512::from(8_000_000_000u64); + + let exec_request = ExecuteRequestBuilder::standard( + source, + CONTRACT_BURN, + runtime_args! { + ARG_PURSE_NAME => purse_name, + ARG_AMOUNT => num_of_tokens_to_burn, + }, + ) + .build(); + + builder.exec(exec_request).expect_success().commit(); + + assert_eq!( + builder + .get_purse_balance_result(purse_uref) + .motes() + .cloned() + .unwrap(), + num_of_tokens_after_burn + ); + + // Burn rest of tokens in a purse + let num_of_tokens_to_burn = U512::from(8_000_000_000u64); + let num_of_tokens_after_burn = U512::zero(); + + let exec_request = ExecuteRequestBuilder::standard( + source, + CONTRACT_BURN, + runtime_args! { + ARG_PURSE_NAME => purse_name, + ARG_AMOUNT => num_of_tokens_to_burn, + }, + ) + .build(); + + builder.exec(exec_request).expect_success().commit(); + + assert_eq!( + builder + .get_purse_balance_result(purse_uref) + .motes() + .cloned() + .unwrap(), + num_of_tokens_after_burn + ); + + let supply_after_burns = builder.total_supply(None); + let expected_supply_after_burns = initial_supply - U512::from(10_000_000_000u64); + + assert_eq!(supply_after_burns, expected_supply_after_burns); +} + +#[ignore] +#[test] +fn should_not_burn_excess_tokens() { + let data_dir = TempDir::new().expect("should create temp dir"); + let mut builder = LmdbWasmTestBuilder::new(data_dir.as_ref()); + let source = *DEFAULT_ACCOUNT_ADDR; + + let delegator_keys = auction::generate_public_keys(1); + let validator_keys = auction::generate_public_keys(1); + + auction::run_genesis_and_create_initial_accounts( + &mut builder, + &validator_keys, + delegator_keys + .iter() + .map(|public_key| public_key.to_account_hash()) + .collect::>(), + U512::from(TEST_DELEGATOR_INITIAL_ACCOUNT_BALANCE), + ); + + let initial_supply = builder.total_supply(None); + let purse_name = "purse"; + let purse_amount = U512::from(10_000_000_000u64); + + // Create purse and transfer tokens to it + let exec_request = ExecuteRequestBuilder::standard( + source, + CONTRACT_TRANSFER_TO_NAMED_PURSE, + runtime_args! { + ARG_PURSE_NAME => purse_name, + ARG_AMOUNT => purse_amount, + }, + ) + .build(); + + builder.exec(exec_request).expect_success().commit(); + + let account = builder.get_account(source).expect("should have account"); + + let purse_uref: URef = account.named_keys()[purse_name] + .into_uref() + .expect("should be uref"); + + assert_eq!( + builder + .get_purse_balance_result(purse_uref) + .motes() + .cloned() + .unwrap(), + purse_amount + ); + + // Try to burn more then in a purse + let num_of_tokens_to_burn = U512::MAX; + let num_of_tokens_after_burn = U512::zero(); + + let exec_request = ExecuteRequestBuilder::standard( + source, + CONTRACT_BURN, + runtime_args! { + ARG_PURSE_NAME => purse_name, + ARG_AMOUNT => num_of_tokens_to_burn, + }, + ) + .build(); + + builder.exec(exec_request).expect_success().commit(); + + assert_eq!( + builder + .get_purse_balance_result(purse_uref) + .motes() + .cloned() + .unwrap(), + num_of_tokens_after_burn, + ); + + let supply_after_burns = builder.total_supply(None); + let expected_supply_after_burns = initial_supply - U512::from(10_000_000_000u64); + + assert_eq!(supply_after_burns, expected_supply_after_burns); +} diff --git a/execution_engine_testing/tests/src/test/system_contracts/mod.rs b/execution_engine_testing/tests/src/test/system_contracts/mod.rs index 9a75a324de..a2fe0ef6ef 100644 --- a/execution_engine_testing/tests/src/test/system_contracts/mod.rs +++ b/execution_engine_testing/tests/src/test/system_contracts/mod.rs @@ -2,5 +2,6 @@ mod auction; mod auction_bidding; mod genesis; mod handle_payment; +mod mint; mod standard_payment; mod upgrade; diff --git a/json_rpc/src/lib.rs b/json_rpc/src/lib.rs index f82a79cce6..0e9fc049a9 100644 --- a/json_rpc/src/lib.rs +++ b/json_rpc/src/lib.rs @@ -40,7 +40,7 @@ //! let path = "rpc"; //! let max_body_bytes = 1024; //! let allow_unknown_fields = false; -//! let route = casper_json_rpc::route(path, max_body_bytes, handlers, allow_unknown_fields); +//! let route = casper_json_rpc::route(path, max_body_bytes, handlers, allow_unknown_fields, None); //! //! // Convert it into a `Service` and run it. //! let make_svc = hyper::service::make_service_fn(move |_| { @@ -96,6 +96,7 @@ pub use response::Response; const JSON_RPC_VERSION: &str = "2.0"; /// Specifies the CORS origin +#[derive(Debug)] pub enum CorsOrigin { /// Any (*) origin is allowed. Any, @@ -103,32 +104,31 @@ pub enum CorsOrigin { Specified(String), } -/// Constructs a set of warp filters suitable for use in a JSON-RPC server. -/// -/// `path` specifies the exact HTTP path for JSON-RPC requests, e.g. "rpc" will match requests on -/// exactly "/rpc", and not "/rpc/other". -/// -/// `max_body_bytes` sets an upper limit for the number of bytes in the HTTP request body. For -/// further details, see -/// [`warp::filters::body::content_length_limit`](https://docs.rs/warp/latest/warp/filters/body/fn.content_length_limit.html). -/// -/// `handlers` is the map of functions to which incoming requests will be dispatched. These are -/// keyed by the JSON-RPC request's "method". -/// -/// If `allow_unknown_fields` is `false`, requests with unknown fields will cause the server to -/// respond with an error. -/// -/// For further details, see the docs for the [`filters`] functions. -pub fn route>( - path: P, - max_body_bytes: u32, - handlers: RequestHandlers, - allow_unknown_fields: bool, -) -> BoxedFilter<(impl Reply,)> { - filters::base_filter(path, max_body_bytes) - .and(filters::main_filter(handlers, allow_unknown_fields)) - .recover(filters::handle_rejection) - .boxed() +impl CorsOrigin { + /// Converts the [`CorsOrigin`] into a CORS [`Builder`](warp::cors::Builder). + #[inline] + pub fn to_cors_builder(&self) -> warp::cors::Builder { + match self { + CorsOrigin::Any => warp::cors().allow_any_origin(), + CorsOrigin::Specified(origin) => warp::cors().allow_origin(origin.as_str()), + } + } + + /// Parses a [`CorsOrigin`] from a given configuration string. + /// + /// The input string will be parsed as follows: + /// + /// * `""` (empty string): No CORS Origin (i.e. returns [`None`]). + /// * `"*"`: [`CorsOrigin::Any`]. + /// * otherwise, returns `CorsOrigin::Specified(raw)`. + #[inline] + pub fn parse_str>(raw: T) -> Option { + match raw.as_ref() { + "" => None, + "*" => Some(CorsOrigin::Any), + _ => Some(CorsOrigin::Specified(raw.to_string())), + } + } } /// Constructs a set of warp filters suitable for use in a JSON-RPC server. @@ -146,32 +146,52 @@ pub fn route>( /// If `allow_unknown_fields` is `false`, requests with unknown fields will cause the server to /// respond with an error. /// -/// Note that this is a convenience function combining the lower-level functions in [`filters`] -/// along with [a warp CORS filter](https://docs.rs/warp/latest/warp/filters/cors/index.html) which +/// If `cors_header` is `Some`, it is used to add a [a warp CORS +/// filter](https://docs.rs/warp/latest/warp/filters/cors/index.html) which +/// /// * allows any origin or specified origin /// * allows "content-type" as a header /// * allows the method "POST" /// /// For further details, see the docs for the [`filters`] functions. -pub fn route_with_cors>( +pub fn route>( path: P, max_body_bytes: u32, handlers: RequestHandlers, allow_unknown_fields: bool, - cors_header: &CorsOrigin, -) -> BoxedFilter<(impl Reply,)> { - filters::base_filter(path, max_body_bytes) + cors_header: Option<&CorsOrigin>, +) -> BoxedFilter<(Box,)> { + let base = filters::base_filter(path, max_body_bytes) .and(filters::main_filter(handlers, allow_unknown_fields)) - .recover(filters::handle_rejection) - .with(match cors_header { - CorsOrigin::Any => warp::cors() - .allow_any_origin() - .allow_header(CONTENT_TYPE) - .allow_method(Method::POST), - CorsOrigin::Specified(origin) => warp::cors() - .allow_origin(origin.as_str()) - .allow_header(CONTENT_TYPE) - .allow_method(Method::POST), - }) - .boxed() + .recover(filters::handle_rejection); + + if let Some(cors_origin) = cors_header { + let cors = cors_origin + .to_cors_builder() + .allow_header(CONTENT_TYPE) + .allow_method(Method::POST) + .build(); + base.with(cors).map(box_reply).boxed() + } else { + base.map(box_reply).boxed() + } +} + +/// Boxes a reply of a warp filter. +/// +/// Can be combined with [`Filter::boxed`] through [`Filter::map`] to erase the type on filters: +/// +/// ```rust +/// use warp::{Filter, filters::BoxedFilter, http::Response, reply::Reply}; +/// # use casper_json_rpc::box_reply; +/// +/// let filter: BoxedFilter<(Box,)> = warp::any() +/// .map(|| Response::builder().body("hello world")) +/// .map(box_reply).boxed(); +/// # drop(filter); +/// ``` +#[inline(always)] +pub fn box_reply(reply: T) -> Box { + let boxed: Box = Box::new(reply); + boxed } diff --git a/node/CHANGELOG.md b/node/CHANGELOG.md index 137581ee6f..3dd00915ad 100644 --- a/node/CHANGELOG.md +++ b/node/CHANGELOG.md @@ -11,6 +11,32 @@ All notable changes to this project will be documented in this file. The format +## Unreleased + +### Added +* Add `network.maximum_frame_size` to the chainspec +* Add `tcp_connect_timeout`, `setup_timeout`, `tcp_connect_attempts`, `tcp_connect_base_backoff`, `significant_error_backoff`, `permanent_error_backoff`, `successful_reconnect_delay`, `flaky_connection_threshold`, `max_incoming_connections` and `max_outgoing_connections` to the `network.conman` section in the config. +* `use_validator_broadcast` can now be configured to control the node's broadcast behavior. +* `use_mixed_gossip` can now be configured to enable or disable the node's gossip peer selection. +* Add `net_gossip_requests`, `net_overflow_buffer_count` and `net_overflow_buffer_bytes` metrics. +* Add a new family of metrics per channel has been added, namely `request_out_count` `request_out_bytes` `response_in_count` `response_in_bytes` `request_in_count` `request_in_bytes` `response_out_count` and `response_out_bytes`. These are labelled with each channel. + + +### Changed +* The node's connection model has changed, now only establishing a single connection per peer. The direction of the connection is chosen based on the randomly generated `NodeID`s. +* Node-to-node communication is now based on the [`juliet`](https://docs.rs/juliet) networking protocol, allowing for multiplexed communication that includes backpressure. This will result in some operations having lower latency and increased reliability under load. +* Rename `BlockValidator` component to `ProposedBlockValidator`, and corresponding config section `block_validator` to `proposed_block_validator`. +* Many previously chatty log messages have been rate limited. This in turn allowed increasing some `DEBUG` level messages to the more appropriate `WARN`, as they are no longer infinitely remotely triggerable. + +### Removed +* The `max_in_flight_demands` and `max_incoming_message_rate_non_validators` settings has been removed from the network section of the configuration file due to the changes in the underlying networking protocol. +* The `max_addr_pending_time` setting has been removed due to new connection management. +* The `max_incoming_peer_connections` setting has been removed, we only allow a single connection per peer now. +* The `max_outgoing_byte_rate_non_validators` setting has been removed. +* The tarpit feature has been removed along with the respective `tarpit_version_threshold`, `tarpit_duration` and `tarpit_chance` configuration settings. +* The validation of the maximum network message size setting in the chainspec based on specimen generation has been removed. +* The following metrics have been deprecated and will constantly show as `0`: `net_queued_messages`, `net_out_count_protocol`, `net_out_count_consensus`, `net_out_count_deploy_gossip`, `net_out_count_block_gossip`, `net_out_count_finality_signature_gossip`, `net_out_count_address_gossip`, `net_out_count_deploy_transfer`, `net_out_count_block_transfer`, `net_out_count_trie_transfer`, `net_out_count_other`, `net_out_bytes_protocol`, `net_out_bytes_consensus`, `net_out_bytes_deploy_gossip`, `net_out_bytes_block_gossip`, `net_out_bytes_finality_signature_gossip`, `net_out_bytes_address_gossip`, `net_out_bytes_deploy_transfer`, `net_out_bytes_block_transfer`, `net_out_bytes_trie_transfer`, `net_out_bytes_other`, `net_out_state_connecting`, `net_out_state_waiting`, `net_out_state_connected`, `net_out_state_blocked`, `net_out_state_loopback`, `net_in_count_protocol`, `net_in_count_consensus`, `net_in_count_deploy_gossip`, `net_in_count_block_gossip`, `net_in_count_finality_signature_gossip`, `net_in_count_address_gossip`, `net_in_count_deploy_transfer`, `net_in_count_block_transfer`, `net_in_count_trie_transfer`, `net_in_count_other`, `net_in_bytes_protocol`, `net_in_bytes_consensus`, `net_in_bytes_deploy_gossip`, `net_in_bytes_block_gossip`, `net_in_bytes_finality_signature_gossip`, `net_in_bytes_address_gossip`, `net_in_bytes_deploy_transfer`, `net_in_bytes_block_transfer`, `net_in_bytes_trie_transfer`, `net_in_bytes_other`, `requests_for_trie_accepted`, `requests_for_trie_finished` and `accumulated_outgoing_limiter_delay`. + ## 1.5.6 ### Changed @@ -30,6 +56,11 @@ All notable changes to this project will be documented in this file. The format ## 1.5.4 ### Added +* The network handshake now contains the hash of the chainspec used and will be successful only if they match. +* Add an `identity` option to load existing network identity certificates signed by a CA. +* TLS connection keys can now be logged using the `network.keylog_location` setting (similar to `SSLKEYLOGFILE` envvar found in other applications). +* Add a `lock_status` field to the JSON representation of the `ContractPackage` values. +* Unit tests can be run with JSON log output by setting a `NODE_TEST_LOG=json` environment variable. * New environment variable `CL_EVENT_QUEUE_DUMP_THRESHOLD` to enable dumping of queue event counts to log when a certain threshold is exceeded. * Add initial support for private chains. * Add support for CA signed client certificates for private chains. @@ -41,6 +72,8 @@ All notable changes to this project will be documented in this file. The format * `core.round_seigniorage_rate` reduced to `[7, 175070816]`. * `highway.block_gas_limit` reduced to `4_000_000_000_000`. * The `state_identifier` parameter of the `query_global_state` JSON-RPC method is now optional. If no `state_identifier` is specified, the highest complete block known to the node will be used to fulfill the request. +* The underlying network protocol has been changed, now supports multiplexing for better latency and proper backpressuring across nodes. +* Any metrics containing queue names "network_low_priority" and "network_incoming" have had said portion renamed to "message_low_priority" and "message_incoming". * `state_get_account_info` RPC handler can now handle an `AccountIdentifier` as a parameter. * Replace the `sync_to_genesis` node config field with `sync_handling`. * The new `sync_handling` field accepts three values: @@ -48,6 +81,7 @@ All notable changes to this project will be documented in this file. The format - `ttl` - node will attempt to acquire all block data to comply with time to live enforcement - `nosync` - node will only acquire blocks moving forward * Make the `network.estimator_weights` section of the node config more fine-grained to provide more precise throttling of non-validator traffic. +* Any IPv6 address resolved for the node's own public IP will now be ignored, resulting in fewer connectivity issues on nodes misconfigured due to using an older installation script. ### Removed * The section `consensus.highway.round_success_meter` has been removed from the config file as no longer relevant with the introduction of a new method of determining the round exponent in Highway. @@ -60,6 +94,9 @@ All notable changes to this project will be documented in this file. The format ### Security * Update `openssl` to version 0.10.55 as mitigation for [RUSTSEC-2023-0044](https://rustsec.org/advisories/RUSTSEC-2023-0044). +### Removed +* There is no more weighted rate limiting on incoming traffic, instead the nodes dynamically adjusts allowed rates from peers based on available resources. This resulted in the removal of the `estimator_weights` configuration option and the `accumulated_incoming_limiter_delay` metric. + ## 1.5.3 diff --git a/node/Cargo.toml b/node/Cargo.toml index ad82ef56a1..1b9b4b7d4d 100644 --- a/node/Cargo.toml +++ b/node/Cargo.toml @@ -16,6 +16,7 @@ exclude = ["proptest-regressions"] ansi_term = "0.12.1" anyhow = "1" aquamarine = "0.1.12" +array-init = "2.0.1" async-trait = "0.1.50" backtrace = "0.3.50" base16 = "0.2.1" @@ -32,8 +33,7 @@ either = { version = "1", features = ["serde"] } enum-iterator = "0.6.0" erased-serde = "0.3.18" fs2 = "0.4.3" -futures = "0.3.5" -futures-io = "0.3.5" +futures = "0.3.21" hex-buffer-serde = "0.3.0" hex_fmt = "0.3.0" hostname = "0.3.0" @@ -41,6 +41,7 @@ http = "0.2.1" humantime = "2.1.0" hyper = "0.14.26" itertools = "0.10.0" +juliet = { version = "0.3.0", features = ["tracing"] } libc = "0.2.66" linked-hash-map = "0.5.3" lmdb-rkv = "0.14" @@ -71,16 +72,15 @@ signature = "1" smallvec = { version = "1", features = ["serde"] } static_assertions = "1" stats_alloc = "0.1.8" -structopt = "0.3.14" +structopt = "0.3.26" strum = { version = "0.24.1", features = ["strum_macros", "derive"] } sys-info = "0.8.0" tempfile = "3.4.0" thiserror = "1" -tokio = { version = "1", features = ["macros", "net", "rt-multi-thread", "sync", "time"] } +tokio = { version = "1.37.0", features = ["macros", "net", "rt-multi-thread", "sync", "time", "parking_lot"] } tokio-openssl = "0.6.1" -tokio-serde = { version = "0.8.0", features = ["bincode"] } tokio-stream = { version = "0.1.4", features = ["sync"] } -tokio-util = { version = "0.6.4", features = ["codec"] } +tokio-util = { version = "0.6.4", features = ["codec", "compat"] } toml = "0.5.6" tower = { version = "0.4.6", features = ["limit"] } tracing = "0.1.18" diff --git a/node/src/components.rs b/node/src/components.rs index 491edb0c4e..9b88954abb 100644 --- a/node/src/components.rs +++ b/node/src/components.rs @@ -46,7 +46,6 @@ pub(crate) mod block_accumulator; pub(crate) mod block_synchronizer; -pub(crate) mod block_validator; pub mod consensus; pub mod contract_runtime; pub(crate) mod deploy_acceptor; @@ -55,6 +54,7 @@ pub(crate) mod diagnostics_port; pub(crate) mod event_stream_server; pub(crate) mod fetcher; pub(crate) mod gossiper; +pub(crate) mod proposed_block_validator; // The `in_memory_network` is public for use in doctests. #[cfg(test)] pub mod in_memory_network; @@ -188,7 +188,7 @@ pub(crate) trait PortBoundComponent: InitializedComponent { } match self.listen(effect_builder) { - Ok(effects) => (effects, ComponentState::Initialized), + Ok(effects) => (effects, ComponentState::Initializing), Err(error) => (Effects::new(), ComponentState::Fatal(format!("{}", error))), } } @@ -199,7 +199,11 @@ pub(crate) trait PortBoundComponent: InitializedComponent { ) -> Result, Self::Error>; } +/// A component that is subscribing to changes in the validator set. pub(crate) trait ValidatorBoundComponent: Component { + /// Notifies the component that the validator set has changed. + /// + /// This function is guaranteed to be called whenever a new era begins. fn handle_validators( &mut self, effect_builder: EffectBuilder, diff --git a/node/src/components/block_accumulator.rs b/node/src/components/block_accumulator.rs index 00a448e658..bd77e9724e 100644 --- a/node/src/components/block_accumulator.rs +++ b/node/src/components/block_accumulator.rs @@ -59,10 +59,6 @@ pub(crate) use sync_instruction::SyncInstruction; const COMPONENT_NAME: &str = "block_accumulator"; -/// If a peer "informs" us about more than the expected number of new blocks times this factor, -/// they are probably spamming, and we refuse to create new block acceptors for them. -const PEER_RATE_LIMIT_MULTIPLIER: usize = 2; - /// A cache of pending blocks and finality signatures that are gossiped to this node. /// /// Announces new blocks and finality signatures once they become valid. @@ -243,18 +239,9 @@ impl BlockAccumulator { block_timestamps.pop_front(); } - // Assume a block time of at least 1 millisecond, so we don't divide by zero. - let min_block_time = self.min_block_time.max(TimeDiff::from_millis(1)); - let expected_blocks = (purge_interval / min_block_time) as usize; - let max_block_count = PEER_RATE_LIMIT_MULTIPLIER.saturating_mul(expected_blocks); - if block_timestamps.len() >= max_block_count { - warn!( - ?sender, %block_hash, - "rejecting block hash from peer who sent us more than {} within {}", - max_block_count, self.purge_interval, - ); - return; - } + // Rate limiting has has been removed here, as it was incorrectly triggered by block + // hashes passed in through historical sync. + block_timestamps.push_back((block_hash, Timestamp::now())); } @@ -805,9 +792,19 @@ impl Component for BlockAccumulator { self.upsert_acceptor(block_hash, era_id, Some(sender)); Effects::new() } - Event::ReceivedBlock { block, sender } => { + Event::ReceivedBlock { + block, + sender, + ticket, + } => { let meta_block = MetaBlock::new(block, vec![], MetaBlockState::new()); - self.register_block(effect_builder, meta_block, Some(sender)) + let rv = self.register_block(effect_builder, meta_block, Some(sender)); + + // We are considering the work complete once we have created the effects. It may be + // beneficial to pass the ticket through further to also include the IO work. + drop(ticket); + + rv } Event::CreatedFinalitySignature { finality_signature } => { debug!(%finality_signature, "BlockAccumulator: CreatedFinalitySignature"); @@ -816,8 +813,18 @@ impl Component for BlockAccumulator { Event::ReceivedFinalitySignature { finality_signature, sender, + ticket, } => { - self.register_finality_signature(effect_builder, *finality_signature, Some(sender)) + let rv = self.register_finality_signature( + effect_builder, + *finality_signature, + Some(sender), + ); + + // After registering the signature, we consider the work complete. + drop(ticket); + + rv } Event::ExecutedBlock { meta_block } => { let height = meta_block.block.header().height(); diff --git a/node/src/components/block_accumulator/error.rs b/node/src/components/block_accumulator/error.rs index a766588fa3..a7ed1cbf2e 100644 --- a/node/src/components/block_accumulator/error.rs +++ b/node/src/components/block_accumulator/error.rs @@ -5,7 +5,7 @@ use casper_types::{crypto, EraId}; use crate::types::{BlockHash, BlockValidationError, MetaBlockMergeError, NodeId}; -#[derive(Error, Debug)] +#[derive(Clone, Error, Debug)] pub(crate) enum InvalidGossipError { #[error("received cryptographically invalid block for: {block_hash} from: {peer} with error: {validation_error}")] Block { @@ -30,7 +30,7 @@ impl InvalidGossipError { } } -#[derive(Error, Debug)] +#[derive(Clone, Copy, Error, Debug)] pub(crate) enum Bogusness { #[error("peer is not a validator in current era")] NotAValidator, @@ -38,7 +38,7 @@ pub(crate) enum Bogusness { SignatureEraIdMismatch, } -#[derive(Error, Debug)] +#[derive(Clone, Error, Debug)] pub(crate) enum Error { #[error(transparent)] InvalidGossip(Box), diff --git a/node/src/components/block_accumulator/event.rs b/node/src/components/block_accumulator/event.rs index 0c7a7ee60e..13318fba0e 100644 --- a/node/src/components/block_accumulator/event.rs +++ b/node/src/components/block_accumulator/event.rs @@ -8,6 +8,7 @@ use derive_more::From; use casper_types::EraId; use crate::{ + components::network::Ticket, effect::requests::BlockAccumulatorRequest, types::{Block, BlockHash, BlockSignatures, FinalitySignature, MetaBlock, NodeId}, }; @@ -24,6 +25,7 @@ pub(crate) enum Event { ReceivedBlock { block: Arc, sender: NodeId, + ticket: Ticket, }, CreatedFinalitySignature { finality_signature: Box, @@ -31,6 +33,7 @@ pub(crate) enum Event { ReceivedFinalitySignature { finality_signature: Box, sender: NodeId, + ticket: Ticket, }, ExecutedBlock { meta_block: MetaBlock, @@ -60,7 +63,11 @@ impl Display for Event { sender, block_hash ) } - Event::ReceivedBlock { block, sender } => { + Event::ReceivedBlock { + block, + sender, + ticket: _, + } => { write!(f, "received {} from {}", block, sender) } Event::CreatedFinalitySignature { finality_signature } => { @@ -69,6 +76,7 @@ impl Display for Event { Event::ReceivedFinalitySignature { finality_signature, sender, + ticket: _, } => { write!(f, "received {} from {}", finality_signature, sender) } diff --git a/node/src/components/block_accumulator/metrics.rs b/node/src/components/block_accumulator/metrics.rs index 5e44639b02..e0e3661bc0 100644 --- a/node/src/components/block_accumulator/metrics.rs +++ b/node/src/components/block_accumulator/metrics.rs @@ -1,44 +1,32 @@ use prometheus::{IntGauge, Registry}; -use crate::unregister_metric; +use crate::utils::registered_metric::{RegisteredMetric, RegistryExt}; /// Metrics for the block accumulator component. #[derive(Debug)] pub(super) struct Metrics { /// Total number of BlockAcceptors contained in the BlockAccumulator. - pub(super) block_acceptors: IntGauge, + pub(super) block_acceptors: RegisteredMetric, /// Number of child block hashes that we know of and that will be used in order to request next /// blocks. - pub(super) known_child_blocks: IntGauge, - registry: Registry, + pub(super) known_child_blocks: RegisteredMetric, } impl Metrics { /// Creates a new instance of the block accumulator metrics, using the given prefix. pub fn new(registry: &Registry) -> Result { - let block_acceptors = IntGauge::new( + let block_acceptors = registry.new_int_gauge( "block_accumulator_block_acceptors".to_string(), "number of block acceptors in the Block Accumulator".to_string(), )?; - let known_child_blocks = IntGauge::new( + let known_child_blocks = registry.new_int_gauge( "block_accumulator_known_child_blocks".to_string(), "number of blocks received by the Block Accumulator for which we know the hash of the child block".to_string(), )?; - registry.register(Box::new(block_acceptors.clone()))?; - registry.register(Box::new(known_child_blocks.clone()))?; - Ok(Metrics { block_acceptors, known_child_blocks, - registry: registry.clone(), }) } } - -impl Drop for Metrics { - fn drop(&mut self) { - unregister_metric!(self.registry, self.block_acceptors); - unregister_metric!(self.registry, self.known_child_blocks); - } -} diff --git a/node/src/components/block_accumulator/tests.rs b/node/src/components/block_accumulator/tests.rs index 8d5d2f04b0..96605c357a 100644 --- a/node/src/components/block_accumulator/tests.rs +++ b/node/src/components/block_accumulator/tests.rs @@ -26,7 +26,7 @@ use crate::{ ALICE_NODE_ID, ALICE_PUBLIC_KEY, ALICE_SECRET_KEY, BOB_NODE_ID, BOB_PUBLIC_KEY, BOB_SECRET_KEY, CAROL_PUBLIC_KEY, CAROL_SECRET_KEY, }, - network::Identity as NetworkIdentity, + network::{Identity as NetworkIdentity, Ticket}, storage::{self, Storage}, }, effect::{ @@ -303,10 +303,9 @@ fn upsert_acceptor() { accumulator.register_local_tip(0, EraId::new(0)); - let max_block_count = - PEER_RATE_LIMIT_MULTIPLIER * ((config.purge_interval / block_time) as usize); + let target_block_count = 10; - for _ in 0..max_block_count { + for _ in 0..target_block_count { accumulator.upsert_acceptor( BlockHash::random(&mut rng), Some(era0), @@ -314,23 +313,18 @@ fn upsert_acceptor() { ); } - assert_eq!(accumulator.block_acceptors.len(), max_block_count); + assert_eq!(accumulator.block_acceptors.len(), target_block_count); let block_hash = BlockHash::random(&mut rng); - // Alice has sent us too many blocks; we don't register this one. - accumulator.upsert_acceptor(block_hash, Some(era0), Some(*ALICE_NODE_ID)); - assert_eq!(accumulator.block_acceptors.len(), max_block_count); - assert!(!accumulator.block_acceptors.contains_key(&block_hash)); - // Bob hasn't sent us anything yet. But we don't insert without an era ID. accumulator.upsert_acceptor(block_hash, None, Some(*BOB_NODE_ID)); - assert_eq!(accumulator.block_acceptors.len(), max_block_count); + assert_eq!(accumulator.block_acceptors.len(), target_block_count); assert!(!accumulator.block_acceptors.contains_key(&block_hash)); // With an era ID he's allowed to tell us about this one. accumulator.upsert_acceptor(block_hash, Some(era0), Some(*BOB_NODE_ID)); - assert_eq!(accumulator.block_acceptors.len(), max_block_count + 1); + assert_eq!(accumulator.block_acceptors.len(), target_block_count + 1); assert!(accumulator.block_acceptors.contains_key(&block_hash)); // And if Alice tells us about it _now_, we'll register her as a peer. @@ -353,14 +347,14 @@ fn upsert_acceptor() { }; // This should lead to a purge of said acceptor, therefore enabling us to // add another one for Alice. - assert_eq!(accumulator.block_acceptors.len(), max_block_count + 1); + assert_eq!(accumulator.block_acceptors.len(), target_block_count + 1); accumulator.upsert_acceptor( BlockHash::random(&mut rng), Some(era0), Some(*ALICE_NODE_ID), ); // Acceptor was added. - assert_eq!(accumulator.block_acceptors.len(), max_block_count + 2); + assert_eq!(accumulator.block_acceptors.len(), target_block_count + 2); // The timestamp was purged. assert_ne!( accumulator @@ -1578,6 +1572,7 @@ async fn block_accumulator_reactor_flow() { let event = super::Event::ReceivedFinalitySignature { finality_signature: Box::new(fin_sig_1.clone()), sender: peer_1, + ticket: Ticket::create_dummy(), }; let effects = block_accumulator.handle_event(effect_builder, &mut rng, event); assert!(effects.is_empty()); @@ -1590,6 +1585,7 @@ async fn block_accumulator_reactor_flow() { let event = super::Event::ReceivedBlock { block: Arc::new(block_1.clone()), sender: peer_2, + ticket: Ticket::create_dummy(), }; effect_builder .into_inner() @@ -1630,6 +1626,7 @@ async fn block_accumulator_reactor_flow() { let event = super::Event::ReceivedBlock { block: Arc::new(block_2.clone()), sender: peer_2, + ticket: Ticket::create_dummy(), }; let effects = block_accumulator.handle_event(effect_builder, &mut rng, event); assert!(effects.is_empty()); @@ -1832,6 +1829,7 @@ async fn block_accumulator_reactor_flow() { let event = super::Event::ReceivedBlock { block: Arc::new(older_block.clone()), sender: peer_1, + ticket: Ticket::create_dummy(), }; let effects = block_accumulator.handle_event(effect_builder, &mut rng, event); assert!(effects.is_empty()); @@ -1857,6 +1855,7 @@ async fn block_accumulator_reactor_flow() { let event = super::Event::ReceivedFinalitySignature { finality_signature: Box::new(older_block_signature), sender: peer_2, + ticket: Ticket::create_dummy(), }; let effects = block_accumulator.handle_event(effect_builder, &mut rng, event); assert!(effects.is_empty()); @@ -1892,6 +1891,7 @@ async fn block_accumulator_reactor_flow() { let event = super::Event::ReceivedFinalitySignature { finality_signature: Box::new(old_era_signature), sender: peer_2, + ticket: Ticket::create_dummy(), }; let effects = block_accumulator.handle_event(effect_builder, &mut rng, event); assert!(effects.is_empty()); @@ -1972,6 +1972,7 @@ async fn block_accumulator_doesnt_purge_with_delayed_block_execution() { let event = super::Event::ReceivedFinalitySignature { finality_signature: Box::new(fin_sig_bob.clone()), sender: peer_1, + ticket: Ticket::create_dummy(), }; let effects = block_accumulator.handle_event(effect_builder, &mut rng, event); assert!(effects.is_empty()); @@ -1979,6 +1980,7 @@ async fn block_accumulator_doesnt_purge_with_delayed_block_execution() { let event = super::Event::ReceivedFinalitySignature { finality_signature: Box::new(fin_sig_carol.clone()), sender: peer_1, + ticket: Ticket::create_dummy(), }; let effects = block_accumulator.handle_event(effect_builder, &mut rng, event); assert!(effects.is_empty()); @@ -1999,6 +2001,7 @@ async fn block_accumulator_doesnt_purge_with_delayed_block_execution() { let event = super::Event::ReceivedBlock { block: Arc::new(block_1.clone()), sender: peer_2, + ticket: Ticket::create_dummy(), }; effect_builder .into_inner() diff --git a/node/src/components/block_synchronizer.rs b/node/src/components/block_synchronizer.rs index 008415cfd6..2c5bc14f51 100644 --- a/node/src/components/block_synchronizer.rs +++ b/node/src/components/block_synchronizer.rs @@ -57,6 +57,7 @@ use crate::{ FinalitySignature, FinalitySignatureId, FinalizedBlock, LegacyDeploy, MetaBlock, MetaBlockState, NodeId, SyncLeap, SyncLeapIdentifier, TrieOrChunk, ValidatorMatrix, }, + utils::rate_limited::rate_limited, NodeRng, }; @@ -1052,8 +1053,11 @@ impl BlockSynchronizer { hash_being_synced, hash_requested, } => { - warn!(%hash_being_synced, %hash_requested, - "BlockSynchronizer: global state sync is processing another request"); + rate_limited!( + PROCESSING_ANOTHER_REQUEST, + |dropped| warn!(%hash_being_synced, %hash_requested, dropped, + "BlockSynchronizer: global state sync is processing another request") + ); (None, Vec::new()) } } diff --git a/node/src/components/block_synchronizer/block_acquisition.rs b/node/src/components/block_synchronizer/block_acquisition.rs index 199df8a5ed..314ea0ded1 100644 --- a/node/src/components/block_synchronizer/block_acquisition.rs +++ b/node/src/components/block_synchronizer/block_acquisition.rs @@ -5,7 +5,7 @@ use std::{ use datasize::DataSize; use derive_more::Display; -use tracing::{debug, error, info, trace, warn}; +use tracing::{debug, error, trace, warn}; use casper_hashing::Digest; use casper_types::{ProtocolVersion, PublicKey}; @@ -525,7 +525,7 @@ impl BlockAcquisitionState { let new_state = match self { BlockAcquisitionState::Initialized(block_hash, signatures) => { if header.block_hash() == *block_hash { - info!( + debug!( "BlockAcquisition: registering header for: {:?}, height: {}", block_hash, header.height() @@ -575,7 +575,7 @@ impl BlockAcquisitionState { actual: *actual_block_hash, }); } - info!( + debug!( "BlockAcquisition: registering block for: {}", header.block_hash() ); @@ -842,7 +842,7 @@ impl BlockAcquisitionState { | BlockAcquisitionState::Complete(..) => return Ok(None), }; let ret = currently_acquiring_sigs.then_some(acceptance); - info!( + debug!( signature=%cloned_sig, ?ret, "BlockAcquisition: registering finality signature for: {}", @@ -869,7 +869,7 @@ impl BlockAcquisitionState { BlockAcquisitionState::HaveBlock(block, signatures, acquired) if !need_execution_state => { - info!( + debug!( "BlockAcquisition: registering approvals hashes for: {}", block.hash() ); @@ -885,7 +885,7 @@ impl BlockAcquisitionState { if need_execution_state => { deploys.apply_approvals_hashes(approvals_hashes)?; - info!( + debug!( "BlockAcquisition: registering approvals hashes for: {}", block.hash() ); @@ -926,7 +926,7 @@ impl BlockAcquisitionState { BlockAcquisitionState::HaveBlock(block, signatures, deploys) if need_execution_state => { - info!( + debug!( "BlockAcquisition: registering global state for: {}", block.hash() ); @@ -980,7 +980,7 @@ impl BlockAcquisitionState { _, acq @ ExecutionResultsAcquisition::Needed { .. }, ) if need_execution_state => { - info!( + debug!( "BlockAcquisition: registering execution results hash for: {}", block.hash() ); @@ -1023,7 +1023,7 @@ impl BlockAcquisitionState { deploys, exec_results_acq, ) if need_execution_state => { - info!( + debug!( "BlockAcquisition: registering execution result or chunk for: {}", block.hash() ); @@ -1109,7 +1109,7 @@ impl BlockAcquisitionState { deploys, ExecutionResultsAcquisition::Complete { checksum, .. }, ) if need_execution_state => { - info!( + debug!( "BlockAcquisition: registering execution results stored notification for: {}", block.hash() ); @@ -1184,7 +1184,7 @@ impl BlockAcquisitionState { return Ok(None); } }; - info!("BlockAcquisition: registering deploy for: {}", block.hash()); + debug!("BlockAcquisition: registering deploy for: {}", block.hash()); let maybe_acceptance = deploys.apply_deploy(deploy_id); if deploys.needs_deploy().is_none() { let new_state = @@ -1237,7 +1237,7 @@ impl BlockAcquisitionState { ) -> Result<(), BlockAcquisitionError> { match self { BlockAcquisitionState::HaveFinalizedBlock(block_hash, _, _, enqueued) => { - info!( + debug!( "BlockAcquisition: registering block enqueued for execution for: {}", block_hash ); @@ -1269,7 +1269,7 @@ impl BlockAcquisitionState { let new_state = match self { BlockAcquisitionState::HaveFinalizedBlock(block, _, _, _) => { - info!( + debug!( "BlockAcquisition: registering block executed for: {}", *block.hash() ); @@ -1304,7 +1304,7 @@ impl BlockAcquisitionState { let new_state = match self { BlockAcquisitionState::HaveStrictFinalitySignatures(block, _) => { - info!( + debug!( "BlockAcquisition: registering marked complete for: {}", *block.hash() ); diff --git a/node/src/components/block_synchronizer/metrics.rs b/node/src/components/block_synchronizer/metrics.rs index 541fa5f09c..786e731c8a 100644 --- a/node/src/components/block_synchronizer/metrics.rs +++ b/node/src/components/block_synchronizer/metrics.rs @@ -1,6 +1,6 @@ use prometheus::{Histogram, Registry}; -use crate::{unregister_metric, utils}; +use crate::utils::registered_metric::{RegisteredMetric, RegistryExt}; const HIST_SYNC_DURATION_NAME: &str = "historical_block_sync_duration_seconds"; const HIST_SYNC_DURATION_HELP: &str = "duration (in sec) to synchronize a historical block"; @@ -17,10 +17,9 @@ const EXPONENTIAL_BUCKET_COUNT: usize = 10; #[derive(Debug)] pub(super) struct Metrics { /// Time duration for the historical synchronizer to get a block. - pub(super) historical_block_sync_duration: Histogram, + pub(super) historical_block_sync_duration: RegisteredMetric, /// Time duration for the forward synchronizer to get a block. - pub(super) forward_block_sync_duration: Histogram, - registry: Registry, + pub(super) forward_block_sync_duration: RegisteredMetric, } impl Metrics { @@ -33,26 +32,16 @@ impl Metrics { )?; Ok(Metrics { - historical_block_sync_duration: utils::register_histogram_metric( - registry, + historical_block_sync_duration: registry.new_histogram( HIST_SYNC_DURATION_NAME, HIST_SYNC_DURATION_HELP, buckets.clone(), )?, - forward_block_sync_duration: utils::register_histogram_metric( - registry, + forward_block_sync_duration: registry.new_histogram( FWD_SYNC_DURATION_NAME, FWD_SYNC_DURATION_HELP, buckets, )?, - registry: registry.clone(), }) } } - -impl Drop for Metrics { - fn drop(&mut self) { - unregister_metric!(self.registry, self.historical_block_sync_duration); - unregister_metric!(self.registry, self.forward_block_sync_duration); - } -} diff --git a/node/src/components/block_synchronizer/trie_accumulator.rs b/node/src/components/block_synchronizer/trie_accumulator.rs index 70605020b9..9c50c286a2 100644 --- a/node/src/components/block_synchronizer/trie_accumulator.rs +++ b/node/src/components/block_synchronizer/trie_accumulator.rs @@ -11,7 +11,7 @@ use derive_more::From; use rand::seq::SliceRandom; use serde::Serialize; use thiserror::Error; -use tracing::{debug, error, trace, warn}; +use tracing::{debug, trace, warn}; use casper_execution_engine::storage::trie::TrieRaw; use casper_hashing::{ChunkWithProof, Digest}; @@ -168,7 +168,7 @@ impl TrieAccumulator { match trie_or_chunk { TrieOrChunk::Value(trie) => match self.partial_chunks.remove(&hash) { None => { - error!(%hash, "fetched a trie we didn't request!"); + debug!(%hash, "fetched a trie we didn't request!"); Effects::new() } Some(partial_chunks) => { @@ -194,7 +194,7 @@ impl TrieAccumulator { let count = chunk.proof().count(); let mut partial_chunks = match self.partial_chunks.remove(&digest) { None => { - error!(%digest, %index, "got a chunk that wasn't requested"); + debug!(%digest, %index, "got a chunk that wasn't requested"); return Effects::new(); } Some(partial_chunks) => partial_chunks, @@ -281,7 +281,7 @@ where let peer = match peers.last() { Some(peer) => *peer, None => { - error!(%hash, "tried to fetch trie with no peers available"); + debug!(%hash, "tried to fetch trie with no peers available"); return responder.respond(Err(Error::NoPeers(hash))).ignore(); } }; @@ -298,24 +298,26 @@ where match fetch_result { Err(error) => match self.partial_chunks.remove(hash) { None => { - error!(%id, + debug!(%id, "got a fetch result for a chunk we weren't trying to fetch", ); Effects::new() } Some(mut partial_chunks) => { - debug!(%error, %id, "error fetching trie chunk"); partial_chunks.mark_peer_unreliable(error.peer()); // try with the next peer, if possible match partial_chunks.next_peer().cloned() { - Some(next_peer) => self.try_download_chunk( - effect_builder, - id, - next_peer, - partial_chunks, - ), + Some(next_peer) => { + debug!(%error, %id, "error fetching trie chunk, trying next"); + self.try_download_chunk( + effect_builder, + id, + next_peer, + partial_chunks, + ) + } None => { - warn!(%id, "couldn't fetch chunk"); + warn!(%id, %error, "couldn't fetch chunk"); let faulty_peers = partial_chunks.unreliable_peers.clone(); partial_chunks.respond(Err(Error::PeersExhausted( Box::new(error), diff --git a/node/src/components/consensus.rs b/node/src/components/consensus.rs index 9524aa9617..b479e5eb19 100644 --- a/node/src/components/consensus.rs +++ b/node/src/components/consensus.rs @@ -28,9 +28,10 @@ use std::{ use datasize::DataSize; use derive_more::From; use serde::{Deserialize, Serialize}; +use thiserror::Error; use tracing::{info, trace}; -use casper_types::{EraId, Timestamp}; +use casper_types::{EraId, PublicKey, Timestamp}; use crate::{ components::Component, @@ -40,18 +41,21 @@ use crate::{ PeerBehaviorAnnouncement, }, diagnostics_port::DumpConsensusStateRequest, - incoming::{ConsensusDemand, ConsensusMessageIncoming}, + incoming::{ConsensusMessageIncoming, ConsensusRequestMessageIncoming}, requests::{ - BlockValidationRequest, ChainspecRawBytesRequest, ConsensusRequest, - ContractRuntimeRequest, DeployBufferRequest, NetworkInfoRequest, NetworkRequest, - StorageRequest, + ChainspecRawBytesRequest, ConsensusRequest, ContractRuntimeRequest, + DeployBufferRequest, NetworkInfoRequest, NetworkRequest, + ProposedBlockValidationRequest, StorageRequest, }, EffectBuilder, EffectExt, Effects, }, failpoints::FailpointActivation, protocol::Message, reactor::ReactorEvent, - types::{BlockHash, BlockHeader, BlockPayload, NodeId}, + types::{ + appendable_block::AddError, BlockHash, BlockHeader, BlockPayload, DeployHash, + DeployOrTransferHash, NodeId, + }, NodeRng, }; use protocols::{highway::HighwayProtocol, zug::Zug}; @@ -64,39 +68,23 @@ pub(crate) use era_supervisor::{debug::EraDump, EraSupervisor, SerializedMessage #[cfg(test)] pub(crate) use highway_core::highway::Vertex as HighwayVertex; pub(crate) use leader_sequence::LeaderSequence; -pub(crate) use protocols::highway::max_rounds_per_era; #[cfg(test)] -pub(crate) use protocols::highway::HighwayMessage; +pub(crate) use protocols::highway::{max_rounds_per_era, HighwayMessage}; pub(crate) use validator_change::ValidatorChange; const COMPONENT_NAME: &str = "consensus"; -#[allow(clippy::arithmetic_side_effects)] -mod relaxed { - // This module exists solely to exempt the `EnumDiscriminants` macro generated code from the - // module-wide `clippy::arithmetic_side_effects` lint. - - use casper_types::{EraId, PublicKey}; - use datasize::DataSize; - use serde::{Deserialize, Serialize}; - use strum::EnumDiscriminants; - - use super::era_supervisor::SerializedMessage; - - #[derive(DataSize, Clone, Serialize, Deserialize, EnumDiscriminants)] - #[strum_discriminants(derive(strum::EnumIter))] - pub(crate) enum ConsensusMessage { - /// A protocol message, to be handled by the instance in the specified era. - Protocol { - era_id: EraId, - payload: SerializedMessage, - }, - /// A request for evidence against the specified validator, from any era that is still - /// bonded in `era_id`. - EvidenceRequest { era_id: EraId, pub_key: PublicKey }, - } +#[derive(DataSize, Clone, Serialize, Deserialize)] +pub(crate) enum ConsensusMessage { + /// A protocol message, to be handled by the instance in the specified era. + Protocol { + era_id: EraId, + payload: SerializedMessage, + }, + /// A request for evidence against the specified validator, from any era that is still + /// bonded in `era_id`. + EvidenceRequest { era_id: EraId, pub_key: PublicKey }, } -pub(crate) use relaxed::{ConsensusMessage, ConsensusMessageDiscriminants}; /// A request to be handled by the consensus protocol instance in a particular era. #[derive(DataSize, Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash, From)] @@ -134,11 +122,130 @@ pub struct NewBlockPayload { /// The result of validation of a ProposedBlock. #[derive(DataSize, Debug, From)] -pub struct ResolveValidity { +pub struct ValidationResult { era_id: EraId, sender: NodeId, proposed_block: ProposedBlock, - valid: bool, + error: Option, +} + +#[derive(Clone, DataSize, Debug, Error, Serialize)] +/// A proposed block validation error. +// TODO: This error probably needs to move to a different component. +pub enum ValidationError { + /// A deploy hash in the proposed block has been found in an ancestor block. + #[error("deploy hash {0} has been replayed")] + ContainsReplayedDeploy(DeployHash), + /// A deploy could not be fetched from any of the identified holders. + #[error("exhausted potential holders of proposed block, missing {} deploys", missing_deploys.len())] + ExhaustedBlockHolders { + /// The deploys still missing. + missing_deploys: Vec, + }, + /// An already invalid block was submitted for validation. + /// + /// This is likely a bug in the node itself. + #[error("validation of failed block, likely a bug")] + ValidationOfFailedBlock, + /// The submitted block is already in process of being validated. + /// + /// This is likely a bug, since no block should be submitted for validation twice. + #[error("duplicate validation attempt, likely a bug")] + DuplicateValidationAttempt, + /// Found deploy in storage, but did not match the hash requested. + /// + /// This indicates a corrupted storage. + // Note: It seems rather mean to ban peers for our own corrupted storage. + #[error("local storage appears corrupted, deploy mismatch when asked for deploy {0}")] + InternalDataCorruption(DeployOrTransferHash), + /// The deploy we received + /// + /// This is likely a bug, since the deploy fetcher should ensure that this does not happen. + #[error("received wrong or invalid deploy from peer when asked for deploy {0}")] + WrongDeploySent(DeployOrTransferHash), + /// A contained deploy has no valid deploy footprint. + #[error("no valid deploy footprint for deploy {deploy_hash}: {error}")] + DeployHasInvalidFootprint { + /// Hash of deploy that failed. + deploy_hash: DeployOrTransferHash, + /// The error reported when trying to footprint it. + // Note: The respective error is hard to serialize and make `Sync`-able, so it is inlined + // in string form here. + error: String, + }, + /// Too many non-transfer deploys in block. + #[error("block exceeds limit of non-transfer deploys of {0}")] + ExceedsNonTransferDeployLimit(usize), + /// Too many non-transfer deploys in block. + #[error("block exceeds limit of transfers of {0}")] + ExceedsTransferLimit(usize), + /// The approvals hash could not be serialized. + // Note: `bytesrepr::Error` does not implement `std::error::Error`. + #[error("failed to serialize approvals hash: {0}")] + CannotSerializeApprovalsHash(String), + /// A duplicated deploy was found within the block. + #[error("duplicate deploy {0} in block")] + DuplicateDeploy(DeployOrTransferHash), + /// Exhausted all peers while trying to validate block. + #[error("peers exhausted")] + PeersExhausted, + /// Failed to construct a `GetRequest`. + #[error("could not construct GetRequest for {id}, peer {peer}")] + CouldNotConstructGetRequest { + /// The `GetRequest`'s ID, serialized as string + id: String, + /// The peer ID the `GetRequest` was directed at. + peer: Box, + }, + /// Validation data mismatch. + #[error("validation data mismatch on {id}, peer {peer}")] + ValidationMetadataMismatch { + /// The item's ID for which validation data did not match. + id: String, + /// The peer ID involved. + peer: Box, + }, + /// The validation state was found to be `InProgress`. + #[error("encountered in-progress validation state after completion, likely a bug")] + InProgressAfterCompletion, + /// A given deploy could not be included in the block by adding it to the appendable block. + #[error("failed to include deploy {deploy_hash} in block")] + DeployInclusionFailure { + /// Hash of the deploy that was rejected. + deploy_hash: DeployOrTransferHash, + /// The underlying error of the appendable block. + #[source] + error: AddError, + }, +} + +impl ValidationResult { + /// Creates a new valid `ValidationResult`. + #[inline(always)] + fn new_valid(era_id: EraId, sender: NodeId, proposed_block: ProposedBlock) -> Self { + Self { + era_id, + sender, + proposed_block, + error: None, + } + } + + /// Creates a new invalid `ValidationResult`. + #[inline(always)] + fn new_invalid( + era_id: EraId, + sender: NodeId, + proposed_block: ProposedBlock, + error: ValidationError, + ) -> Self { + Self { + era_id, + sender, + proposed_block, + error: Some(error), + } + } } /// Consensus component event. @@ -150,9 +257,9 @@ pub(crate) enum Event { /// A variant used with failpoints - when a message arrives, we fire this event with a delay, /// and it also causes the message to be handled. DelayedIncoming(ConsensusMessageIncoming), - /// An incoming demand message. + /// An incoming request message. #[from] - DemandIncoming(ConsensusDemand), + RequestMessageIncoming(ConsensusRequestMessageIncoming), /// A scheduled event to be handled by a specified era. Timer { era_id: EraId, @@ -171,7 +278,7 @@ pub(crate) enum Event { header_hash: BlockHash, }, /// The proposed block has been validated. - ResolveValidity(ResolveValidity), + ResolveValidity(ValidationResult), /// Deactivate the era with the given ID, unless the number of faulty validators increases. DeactivateEra { era_id: EraId, @@ -237,14 +344,26 @@ impl Display for ConsensusRequestMessage { impl Display for Event { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self { - Event::Incoming(ConsensusMessageIncoming { sender, message }) => { + Event::Incoming(ConsensusMessageIncoming { + sender, + message, + ticket: _, + }) => { write!(f, "message from {:?}: {}", sender, message) } - Event::DelayedIncoming(ConsensusMessageIncoming { sender, message }) => { + Event::DelayedIncoming(ConsensusMessageIncoming { + sender, + message, + ticket: _, + }) => { write!(f, "delayed message from {:?}: {}", sender, message) } - Event::DemandIncoming(demand) => { - write!(f, "demand from {:?}: {}", demand.sender, demand.request_msg) + Event::RequestMessageIncoming(incoming) => { + write!( + f, + "request message from {:?}: {}", + incoming.sender, incoming.message + ) } Event::Timer { era_id, @@ -280,19 +399,28 @@ impl Display for Event { "A block has been added to the linear chain: {}", header_hash, ), - Event::ResolveValidity(ResolveValidity { + Event::ResolveValidity(ValidationResult { era_id, sender, proposed_block, - valid, - }) => write!( - f, - "Proposed block received from {:?} for {} is {}: {:?}", - sender, - era_id, - if *valid { "valid" } else { "invalid" }, - proposed_block, - ), + error, + }) => { + write!( + f, + "Proposed block received from {:?} for {} is ", + sender, era_id + )?; + + if let Some(err) = error { + write!(f, "invalid ({})", err)?; + } else { + f.write_str("valid")?; + }; + + write!(f, ": {:?}", proposed_block)?; + + Ok(()) + } Event::DeactivateEra { era_id, faulty_num, .. } => write!( @@ -312,11 +440,11 @@ pub(crate) trait ReactorEventT: + From + Send + From> - + From + + From + From + From + From - + From + + From + From + From + From @@ -330,12 +458,12 @@ impl ReactorEventT for REv where REv: ReactorEvent + From + Send - + From + + From + From> + From + From + From - + From + + From + From + From + From @@ -345,72 +473,6 @@ impl ReactorEventT for REv where { } -mod specimen_support { - use crate::utils::specimen::{largest_variant, Cache, LargestSpecimen, SizeEstimator}; - - use super::{ - protocols::{highway, zug}, - ClContext, ConsensusMessage, ConsensusMessageDiscriminants, ConsensusRequestMessage, - EraRequest, SerializedMessage, - }; - - impl LargestSpecimen for ConsensusMessage { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - largest_variant::(estimator, |variant| { - match variant { - ConsensusMessageDiscriminants::Protocol => { - let zug_payload = SerializedMessage::from_message( - &zug::Message::::largest_specimen(estimator, cache), - ); - let highway_payload = SerializedMessage::from_message( - &highway::HighwayMessage::::largest_specimen( - estimator, cache, - ), - ); - - let payload = if zug_payload.as_raw().len() > highway_payload.as_raw().len() - { - zug_payload - } else { - highway_payload - }; - - ConsensusMessage::Protocol { - era_id: LargestSpecimen::largest_specimen(estimator, cache), - payload, - } - } - ConsensusMessageDiscriminants::EvidenceRequest => { - ConsensusMessage::EvidenceRequest { - era_id: LargestSpecimen::largest_specimen(estimator, cache), - pub_key: LargestSpecimen::largest_specimen(estimator, cache), - } - } - } - }) - } - } - - impl LargestSpecimen for ConsensusRequestMessage { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - let zug_sync_request = SerializedMessage::from_message( - &zug::SyncRequest::::largest_specimen(estimator, cache), - ); - - ConsensusRequestMessage { - era_id: LargestSpecimen::largest_specimen(estimator, cache), - payload: zug_sync_request, - } - } - } - - impl LargestSpecimen for EraRequest { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - EraRequest::Zug(LargestSpecimen::largest_specimen(estimator, cache)) - } - } -} - impl Component for EraSupervisor where REv: ReactorEventT, @@ -433,26 +495,36 @@ where Event::Action { era_id, action_id } => { self.handle_action(effect_builder, rng, era_id, action_id) } - Event::Incoming(ConsensusMessageIncoming { sender, message }) => { + Event::Incoming(ConsensusMessageIncoming { + sender, + message, + ticket, + }) => { let delay_by = self.message_delay_failpoint.fire(rng).cloned(); if let Some(delay) = delay_by { effect_builder .set_timeout(Duration::from_millis(delay)) .event(move |_| { - Event::DelayedIncoming(ConsensusMessageIncoming { sender, message }) + Event::DelayedIncoming(ConsensusMessageIncoming { + sender, + message, + ticket, + }) }) } else { - self.handle_message(effect_builder, rng, sender, *message) + self.handle_message(effect_builder, rng, sender, *message, ticket) } } - Event::DelayedIncoming(ConsensusMessageIncoming { sender, message }) => { - self.handle_message(effect_builder, rng, sender, *message) - } - Event::DemandIncoming(ConsensusDemand { + Event::DelayedIncoming(ConsensusMessageIncoming { + sender, + message, + ticket, + }) => self.handle_message(effect_builder, rng, sender, *message, ticket), + Event::RequestMessageIncoming(ConsensusRequestMessageIncoming { sender, - request_msg: demand, - auto_closing_responder, - }) => self.handle_demand(effect_builder, rng, sender, demand, auto_closing_responder), + message, + ticket, + }) => self.handle_request_message(effect_builder, rng, sender, message, ticket), Event::NewBlockPayload(new_block_payload) => { self.handle_new_block_payload(effect_builder, rng, new_block_payload) } diff --git a/node/src/components/consensus/cl_context.rs b/node/src/components/consensus/cl_context.rs index 251f1022dd..b0765e285d 100644 --- a/node/src/components/consensus/cl_context.rs +++ b/node/src/components/consensus/cl_context.rs @@ -78,21 +78,3 @@ impl Context for ClContext { true } } - -mod specimen_support { - use super::Keypair; - use crate::utils::specimen::{Cache, LargestSpecimen, SizeEstimator}; - use casper_types::{PublicKey, SecretKey}; - use std::sync::Arc; - - impl LargestSpecimen for Keypair { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - let secret_key = SecretKey::largest_specimen(estimator, cache); - let public_key = PublicKey::from(&secret_key); - Keypair { - secret_key: Arc::new(secret_key), - public_key, - } - } - } -} diff --git a/node/src/components/consensus/consensus_protocol.rs b/node/src/components/consensus/consensus_protocol.rs index 016530d99d..fbce803bcb 100644 --- a/node/src/components/consensus/consensus_protocol.rs +++ b/node/src/components/consensus/consensus_protocol.rs @@ -17,7 +17,7 @@ use crate::{ NodeRng, }; -use super::era_supervisor::SerializedMessage; +use super::{era_supervisor::SerializedMessage, ValidationError}; /// Information about the context in which a new block is created. #[derive(Clone, DataSize, Eq, PartialEq, Debug, Ord, PartialOrd, Hash)] @@ -281,7 +281,7 @@ pub(crate) trait ConsensusProtocol: Send { fn resolve_validity( &mut self, proposed_block: ProposedBlock, - valid: bool, + validation_error: Option, now: Timestamp, ) -> ProtocolOutcomes; diff --git a/node/src/components/consensus/era_supervisor.rs b/node/src/components/consensus/era_supervisor.rs index 4ac3a6efd6..de906a00f8 100644 --- a/node/src/components/consensus/era_supervisor.rs +++ b/node/src/components/consensus/era_supervisor.rs @@ -44,17 +44,18 @@ use crate::{ metrics::Metrics, validator_change::{ValidatorChange, ValidatorChanges}, ActionId, ChainspecConsensusExt, Config, ConsensusMessage, ConsensusRequestMessage, - Event, HighwayProtocol, NewBlockPayload, ReactorEventT, ResolveValidity, TimerId, Zug, + Event, HighwayProtocol, NewBlockPayload, ReactorEventT, TimerId, ValidationResult, Zug, }, - network::blocklist::BlocklistJustification, + network::{blocklist::BlocklistJustification, Ticket}, }, + consensus::ValidationError, effect::{ announcements::FatalAnnouncement, - requests::{BlockValidationRequest, ContractRuntimeRequest, StorageRequest}, - AutoClosingResponder, EffectBuilder, EffectExt, Effects, Responder, + requests::{ContractRuntimeRequest, ProposedBlockValidationRequest, StorageRequest}, + EffectBuilder, EffectExt, Effects, Responder, }, failpoints::Failpoint, - fatal, protocol, + fatal, types::{ chainspec::ConsensusProtocolName, BlockHash, BlockHeader, Chainspec, Deploy, DeployHash, DeployOrTransferHash, FinalizedApprovals, FinalizedBlock, MetaBlockState, NodeId, @@ -697,14 +698,22 @@ impl EraSupervisor { rng: &mut NodeRng, sender: NodeId, msg: ConsensusMessage, + ticket: Ticket, ) -> Effects { match msg { ConsensusMessage::Protocol { era_id, payload } => { trace!(era = era_id.value(), "received a consensus message"); - self.delegate_to_era(effect_builder, rng, era_id, move |consensus, rng| { - consensus.handle_message(rng, sender, payload, Timestamp::now()) - }) + let rv = + self.delegate_to_era(effect_builder, rng, era_id, move |consensus, rng| { + consensus.handle_message(rng, sender, payload, Timestamp::now()) + }); + + // TODO: This is suboptimal, but best effort, until more fine-grained threading of + // tickets through consensus is implemented. + drop(ticket); + + rv } ConsensusMessage::EvidenceRequest { era_id, pub_key } => match self.current_era() { None => Effects::new(), @@ -713,35 +722,44 @@ impl EraSupervisor { || !self.open_eras.contains_key(&era_id) { trace!(era = era_id.value(), "not handling message; era too old"); + drop(ticket); return Effects::new(); } - self.iter_past(era_id, PAST_EVIDENCE_ERAS) + + let rv = self + .iter_past(era_id, PAST_EVIDENCE_ERAS) .flat_map(|e_id| { self.delegate_to_era(effect_builder, rng, e_id, |consensus, _| { consensus.send_evidence(sender, &pub_key) }) }) - .collect() + .collect(); + + // TODO: As above, requires more fine-grained threading of tickets. + drop(ticket); + + rv } }, } } - pub(super) fn handle_demand( + pub(super) fn handle_request_message( &mut self, effect_builder: EffectBuilder, rng: &mut NodeRng, sender: NodeId, - request: Box, - auto_closing_responder: AutoClosingResponder, + message: Box, + ticket: Ticket, ) -> Effects { - let ConsensusRequestMessage { era_id, payload } = *request; + let ConsensusRequestMessage { era_id, payload } = *message; trace!(era = era_id.value(), "received a consensus request"); match self.open_eras.get_mut(&era_id) { None => { self.log_missing_era(era_id); - auto_closing_responder.respond_none().ignore() + drop(ticket); + Effects::new() } Some(era) => { let (outcomes, response) = @@ -751,12 +769,16 @@ impl EraSupervisor { self.handle_consensus_outcomes(effect_builder, rng, era_id, outcomes); if let Some(payload) = response { effects.extend( - auto_closing_responder - .respond(ConsensusMessage::Protocol { era_id, payload }.into()) + effect_builder + .send_message_and_drop_ticket( + sender, + ConsensusMessage::Protocol { era_id, payload }.into(), + ticket, + ) .ignore(), ); } else { - effects.extend(auto_closing_responder.respond_none().ignore()); + drop(ticket); } effects } @@ -878,36 +900,38 @@ impl EraSupervisor { &mut self, effect_builder: EffectBuilder, rng: &mut NodeRng, - resolve_validity: ResolveValidity, + result: ValidationResult, ) -> Effects { - let ResolveValidity { - era_id, - sender, - proposed_block, - valid, - } = resolve_validity; self.metrics.proposed_block(); let mut effects = Effects::new(); - if !valid { + if let Some(ref error) = result.error { effects.extend({ effect_builder .announce_block_peer_with_justification( - sender, - BlocklistJustification::SentInvalidConsensusValue { era: era_id }, + result.sender, + BlocklistJustification::SentInvalidConsensusValue { + era: result.era_id, + cause: error.clone(), + }, ) .ignore() }); } - if self - .open_eras - .get_mut(&era_id) - .map_or(false, |era| era.resolve_validity(&proposed_block, valid)) - { - effects.extend( - self.delegate_to_era(effect_builder, rng, era_id, |consensus, _| { - consensus.resolve_validity(proposed_block.clone(), valid, Timestamp::now()) - }), - ); + if self.open_eras.get_mut(&result.era_id).map_or(false, |era| { + era.resolve_validity(&result.proposed_block, result.error.as_ref()) + }) { + effects.extend(self.delegate_to_era( + effect_builder, + rng, + result.era_id, + |consensus, _| { + consensus.resolve_validity( + result.proposed_block.clone(), + result.error, + Timestamp::now(), + ) + }, + )); } effects } @@ -996,7 +1020,7 @@ impl EraSupervisor { } ProtocolOutcome::CreatedTargetedMessage(payload, to) => { let message = ConsensusMessage::Protocol { era_id, payload }; - effect_builder.enqueue_message(to, message.into()).ignore() + effect_builder.try_send_message(to, message.into()).ignore() } ProtocolOutcome::CreatedMessageToRandomPeer(payload) => { let message = ConsensusMessage::Protocol { era_id, payload }; @@ -1004,7 +1028,7 @@ impl EraSupervisor { async move { let peers = effect_builder.get_fully_connected_peers(1).await; if let Some(to) = peers.into_iter().next() { - effect_builder.enqueue_message(to, message.into()).await; + effect_builder.try_send_message(to, message.into()).await; } } .ignore() @@ -1015,7 +1039,7 @@ impl EraSupervisor { async move { let peers = effect_builder.get_fully_connected_peers(1).await; if let Some(to) = peers.into_iter().next() { - effect_builder.enqueue_message(to, message.into()).await; + effect_builder.try_send_message(to, message.into()).await; } } .ignore() @@ -1167,12 +1191,12 @@ impl EraSupervisor { return self.resolve_validity( effect_builder, rng, - ResolveValidity { + ValidationResult::new_invalid( era_id, sender, proposed_block, - valid: false, - }, + ValidationError::ContainsReplayedDeploy(deploy_hash), + ), ); } let mut effects = Effects::new(); @@ -1214,7 +1238,7 @@ impl EraSupervisor { rng, e_id, |consensus, _| { - consensus.resolve_validity(proposed_block, true, Timestamp::now()) + consensus.resolve_validity(proposed_block, None, Timestamp::now()) }, )); } @@ -1395,7 +1419,7 @@ async fn check_deploys_for_replay_in_previous_eras_and_validate_block( proposed_block: ProposedBlock, ) -> Event where - REv: From + From, + REv: From + From, { let deploys_era_ids = effect_builder .get_deploys_era_ids( @@ -1415,25 +1439,25 @@ where // block_payload within the current era to determine if we are facing a replay // attack. if deploy_era_id < proposed_block_era_id { - return Event::ResolveValidity(ResolveValidity { - era_id: proposed_block_era_id, + return Event::ResolveValidity(ValidationResult::new_valid( + proposed_block_era_id, sender, - proposed_block: proposed_block.clone(), - valid: false, - }); + proposed_block.clone(), + )); } } let sender_for_validate_block: NodeId = sender; - let valid = effect_builder + let error = effect_builder .validate_block(sender_for_validate_block, proposed_block.clone()) - .await; + .await + .err(); - Event::ResolveValidity(ResolveValidity { + Event::ResolveValidity(ValidationResult { era_id: proposed_block_era_id, sender, proposed_block, - valid, + error, }) } diff --git a/node/src/components/consensus/era_supervisor/era.rs b/node/src/components/consensus/era_supervisor/era.rs index 73fe2814cc..80c117f147 100644 --- a/node/src/components/consensus/era_supervisor/era.rs +++ b/node/src/components/consensus/era_supervisor/era.rs @@ -10,10 +10,13 @@ use tracing::{debug, warn}; use casper_types::{PublicKey, Timestamp, U512}; -use crate::components::consensus::{ - cl_context::ClContext, - consensus_protocol::{ConsensusProtocol, ProposedBlock}, - protocols::{highway::HighwayProtocol, zug::Zug}, +use crate::{ + components::consensus::{ + cl_context::ClContext, + consensus_protocol::{ConsensusProtocol, ProposedBlock}, + protocols::{highway::HighwayProtocol, zug::Zug}, + }, + consensus::ValidationError, }; const CASPER_ENABLE_DETAILED_CONSENSUS_METRICS_ENV_VAR: &str = @@ -118,9 +121,9 @@ impl Era { pub(crate) fn resolve_validity( &mut self, proposed_block: &ProposedBlock, - valid: bool, + validation_error: Option<&ValidationError>, ) -> bool { - if valid { + if validation_error.is_none() { if let Some(vs) = self.validation_states.get_mut(proposed_block) { if !vs.missing_evidence.is_empty() { vs.validated = true; diff --git a/node/src/components/consensus/highway_core/active_validator.rs b/node/src/components/consensus/highway_core/active_validator.rs index 71871317ad..249f5a397c 100644 --- a/node/src/components/consensus/highway_core/active_validator.rs +++ b/node/src/components/consensus/highway_core/active_validator.rs @@ -1,3 +1,4 @@ +#![allow(clippy::arithmetic_side_effects)] use std::{ fmt::{self, Debug}, fs::{self, File}, diff --git a/node/src/components/consensus/highway_core/endorsement.rs b/node/src/components/consensus/highway_core/endorsement.rs index 99be3dbcee..7194a85f60 100644 --- a/node/src/components/consensus/highway_core/endorsement.rs +++ b/node/src/components/consensus/highway_core/endorsement.rs @@ -50,24 +50,6 @@ impl Endorsement { } } -mod specimen_support { - use crate::{ - components::consensus::ClContext, - utils::specimen::{Cache, LargestSpecimen, SizeEstimator}, - }; - - use super::Endorsement; - - impl LargestSpecimen for Endorsement { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - Endorsement { - unit: LargestSpecimen::largest_specimen(estimator, cache), - creator: LargestSpecimen::largest_specimen(estimator, cache), - } - } - } -} - /// Testimony that creator of `unit` was seen honest /// by `endorser` at the moment of creating this endorsement. #[derive(Clone, DataSize, Debug, Eq, PartialEq, Hash, Serialize, Deserialize)] diff --git a/node/src/components/consensus/highway_core/evidence.rs b/node/src/components/consensus/highway_core/evidence.rs index 0b53f57728..5a27f10259 100644 --- a/node/src/components/consensus/highway_core/evidence.rs +++ b/node/src/components/consensus/highway_core/evidence.rs @@ -1,10 +1,12 @@ use std::iter; +use datasize::DataSize; use itertools::Itertools; +use serde::{Deserialize, Serialize}; use thiserror::Error; use crate::components::consensus::{ - highway_core::{highway::SignedWireUnit, state::Params}, + highway_core::{endorsement::SignedEndorsement, highway::SignedWireUnit, state::Params}, traits::Context, utils::{ValidatorIndex, Validators}, }; @@ -34,53 +36,35 @@ pub enum EvidenceError { Signature, } -#[allow(clippy::arithmetic_side_effects)] -pub mod relaxed { - // This module exists solely to exempt the `EnumDiscriminants` macro generated code from the - // module-wide `clippy::arithmetic_side_effects` lint. +/// Evidence that a validator is faulty. +#[derive(Clone, DataSize, Debug, Eq, PartialEq, Serialize, Deserialize, Hash)] +#[serde(bound( + serialize = "C::Hash: Serialize", + deserialize = "C::Hash: Deserialize<'de>", +))] - use datasize::DataSize; - use serde::{Deserialize, Serialize}; - use strum::EnumDiscriminants; - - use crate::components::consensus::{ - highway_core::{endorsement::SignedEndorsement, highway::SignedWireUnit}, - traits::Context, - }; - - /// Evidence that a validator is faulty. - #[derive( - Clone, DataSize, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, EnumDiscriminants, - )] - #[serde(bound( - serialize = "C::Hash: Serialize", - deserialize = "C::Hash: Deserialize<'de>", - ))] - #[strum_discriminants(derive(strum::EnumIter))] - pub enum Evidence - where - C: Context, - { - /// The validator produced two units with the same sequence number. - Equivocation(SignedWireUnit, SignedWireUnit), - /// The validator endorsed two conflicting units. - Endorsements { - /// The endorsement for `unit1`. - endorsement1: SignedEndorsement, - /// The unit with the lower (or equal) sequence number. - unit1: SignedWireUnit, - /// The endorsement for `unit2`, by the same creator as endorsement1. - endorsement2: SignedEndorsement, - /// The unit with the higher (or equal) sequence number, on a conflicting fork of the - /// same creator as `unit1`. - unit2: SignedWireUnit, - /// The predecessors of `unit2`, back to the same sequence number as `unit1`, in - /// reverse chronological order. - swimlane2: Vec>, - }, - } +pub enum Evidence +where + C: Context, +{ + /// The validator produced two units with the same sequence number. + Equivocation(SignedWireUnit, SignedWireUnit), + /// The validator endorsed two conflicting units. + Endorsements { + /// The endorsement for `unit1`. + endorsement1: SignedEndorsement, + /// The unit with the lower (or equal) sequence number. + unit1: SignedWireUnit, + /// The endorsement for `unit2`, by the same creator as endorsement1. + endorsement2: SignedEndorsement, + /// The unit with the higher (or equal) sequence number, on a conflicting fork of the + /// same creator as `unit1`. + unit2: SignedWireUnit, + /// The predecessors of `unit2`, back to the same sequence number as `unit1`, in + /// reverse chronological order. + swimlane2: Vec>, + }, } -pub use relaxed::{Evidence, EvidenceDiscriminants}; impl Evidence { /// Returns the ID of the faulty validator. @@ -177,48 +161,3 @@ impl Evidence { Ok(()) } } - -mod specimen_support { - - use crate::{ - components::consensus::ClContext, - utils::specimen::{ - estimator_max_rounds_per_era, largest_variant, vec_of_largest_specimen, Cache, - LargestSpecimen, SizeEstimator, - }, - }; - - use super::{Evidence, EvidenceDiscriminants}; - - impl LargestSpecimen for Evidence { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - largest_variant::(estimator, |variant| match variant - { - EvidenceDiscriminants::Equivocation => Evidence::Equivocation( - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - ), - EvidenceDiscriminants::Endorsements => { - if estimator.parameter_bool("endorsements_enabled") { - Evidence::Endorsements { - endorsement1: LargestSpecimen::largest_specimen(estimator, cache), - unit1: LargestSpecimen::largest_specimen(estimator, cache), - endorsement2: LargestSpecimen::largest_specimen(estimator, cache), - unit2: LargestSpecimen::largest_specimen(estimator, cache), - swimlane2: vec_of_largest_specimen( - estimator, - estimator_max_rounds_per_era(estimator), - cache, - ), - } - } else { - Evidence::Equivocation( - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - ) - } - } - }) - } - } -} diff --git a/node/src/components/consensus/highway_core/finality_detector.rs b/node/src/components/consensus/highway_core/finality_detector.rs index bc7078ce4a..299e459d49 100644 --- a/node/src/components/consensus/highway_core/finality_detector.rs +++ b/node/src/components/consensus/highway_core/finality_detector.rs @@ -1,4 +1,5 @@ //! Functions for detecting finality of proposed blocks and calculating rewards. +#![allow(clippy::arithmetic_side_effects)] mod horizon; mod rewards; diff --git a/node/src/components/consensus/highway_core/highway.rs b/node/src/components/consensus/highway_core/highway.rs index 596a2cdacc..13bc44ebd4 100644 --- a/node/src/components/consensus/highway_core/highway.rs +++ b/node/src/components/consensus/highway_core/highway.rs @@ -1,4 +1,5 @@ //! The implementation of the Highway consensus protocol. +#![allow(clippy::arithmetic_side_effects)] mod vertex; diff --git a/node/src/components/consensus/highway_core/highway/vertex.rs b/node/src/components/consensus/highway_core/highway/vertex.rs index 59fb9aeec3..7d03734644 100644 --- a/node/src/components/consensus/highway_core/highway/vertex.rs +++ b/node/src/components/consensus/highway_core/highway/vertex.rs @@ -8,6 +8,7 @@ use casper_types::Timestamp; use crate::components::consensus::{ highway_core::{ endorsement::SignedEndorsement, + evidence::Evidence, highway::{PingError, VertexError}, state::Panorama, }, @@ -15,81 +16,47 @@ use crate::components::consensus::{ utils::{ValidatorIndex, Validators}, }; -#[allow(clippy::arithmetic_side_effects)] -mod relaxed { - // This module exists solely to exempt the `EnumDiscriminants` macro generated code from the - // module-wide `clippy::arithmetic_side_effects` lint. - - use casper_types::Timestamp; - use datasize::DataSize; - use serde::{Deserialize, Serialize}; - use strum::EnumDiscriminants; - - use crate::components::consensus::{ - highway_core::evidence::Evidence, traits::Context, utils::ValidatorIndex, - }; - - use super::{Endorsements, Ping, SignedWireUnit}; - - /// A dependency of a `Vertex` that can be satisfied by one or more other vertices. - #[derive( - DataSize, - Clone, - Debug, - Eq, - PartialEq, - PartialOrd, - Ord, - Hash, - Serialize, - Deserialize, - EnumDiscriminants, - )] - #[serde(bound( - serialize = "C::Hash: Serialize", - deserialize = "C::Hash: Deserialize<'de>", - ))] - #[strum_discriminants(derive(strum::EnumIter))] - pub enum Dependency - where - C: Context, - { - /// The hash of a unit. - Unit(C::Hash), - /// The index of the validator against which evidence is needed. - Evidence(ValidatorIndex), - /// The hash of the unit to be endorsed. - Endorsement(C::Hash), - /// The ping by a particular validator for a particular timestamp. - Ping(ValidatorIndex, Timestamp), - } - - /// An element of the protocol state, that might depend on other elements. - /// - /// It is the vertex in a directed acyclic graph, whose edges are dependencies. - #[derive( - DataSize, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash, EnumDiscriminants, - )] - #[serde(bound( - serialize = "C::Hash: Serialize", - deserialize = "C::Hash: Deserialize<'de>", - ))] - #[strum_discriminants(derive(strum::EnumIter))] - pub enum Vertex - where - C: Context, - { - /// A signed unit of the consensus DAG. - Unit(SignedWireUnit), - /// Evidence of a validator's transgression. - Evidence(Evidence), - /// Endorsements for a unit. - Endorsements(Endorsements), - /// A ping conveying the activity of its creator. - Ping(Ping), - } +/// A dependency of a `Vertex` that can be satisfied by one or more other vertices. +#[derive(DataSize, Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, Serialize, Deserialize)] +#[serde(bound( + serialize = "C::Hash: Serialize", + deserialize = "C::Hash: Deserialize<'de>", +))] +pub enum Dependency +where + C: Context, +{ + /// The hash of a unit. + Unit(C::Hash), + /// The index of the validator against which evidence is needed. + Evidence(ValidatorIndex), + /// The hash of the unit to be endorsed. + Endorsement(C::Hash), + /// The ping by a particular validator for a particular timestamp. + Ping(ValidatorIndex, Timestamp), +} + +/// An element of the protocol state, that might depend on other elements. +/// +/// It is the vertex in a directed acyclic graph, whose edges are dependencies. +#[derive(DataSize, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash)] +#[serde(bound( + serialize = "C::Hash: Serialize", + deserialize = "C::Hash: Deserialize<'de>", +))] +pub enum Vertex +where + C: Context, +{ + /// A signed unit of the consensus DAG. + Unit(SignedWireUnit), + /// Evidence of a validator's transgression. + Evidence(Evidence), + /// Endorsements for a unit. + Endorsements(Endorsements), + /// A ping conveying the activity of its creator. + Ping(Ping), } -pub use relaxed::{Dependency, DependencyDiscriminants, Vertex, VertexDiscriminants}; impl Dependency { /// Returns whether this identifies a unit, as opposed to other types of vertices. @@ -175,134 +142,6 @@ impl Vertex { } } -mod specimen_support { - use super::{ - Dependency, DependencyDiscriminants, Endorsements, HashedWireUnit, Ping, SignedEndorsement, - SignedWireUnit, Vertex, VertexDiscriminants, WireUnit, - }; - use crate::{ - components::consensus::ClContext, - utils::specimen::{ - btree_set_distinct_from_prop, largest_variant, vec_prop_specimen, Cache, - LargestSpecimen, SizeEstimator, - }, - }; - - impl LargestSpecimen for Vertex { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - largest_variant::(estimator, |variant| match variant { - VertexDiscriminants::Unit => { - Vertex::Unit(LargestSpecimen::largest_specimen(estimator, cache)) - } - VertexDiscriminants::Evidence => { - Vertex::Evidence(LargestSpecimen::largest_specimen(estimator, cache)) - } - VertexDiscriminants::Endorsements => { - if estimator.parameter_bool("endorsements_enabled") { - Vertex::Endorsements(LargestSpecimen::largest_specimen(estimator, cache)) - } else { - Vertex::Ping(LargestSpecimen::largest_specimen(estimator, cache)) - } - } - VertexDiscriminants::Ping => { - Vertex::Ping(LargestSpecimen::largest_specimen(estimator, cache)) - } - }) - } - } - - impl LargestSpecimen for Dependency { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - largest_variant::(estimator, |variant| { - match variant { - DependencyDiscriminants::Unit => { - Dependency::Unit(LargestSpecimen::largest_specimen(estimator, cache)) - } - DependencyDiscriminants::Evidence => { - Dependency::Evidence(LargestSpecimen::largest_specimen(estimator, cache)) - } - DependencyDiscriminants::Endorsement => { - Dependency::Endorsement(LargestSpecimen::largest_specimen(estimator, cache)) - } - DependencyDiscriminants::Ping => Dependency::Ping( - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - ), - } - }) - } - } - - impl LargestSpecimen for SignedWireUnit { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - SignedWireUnit { - hashed_wire_unit: LargestSpecimen::largest_specimen(estimator, cache), - signature: LargestSpecimen::largest_specimen(estimator, cache), - } - } - } - - impl LargestSpecimen for Endorsements { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - Endorsements { - unit: LargestSpecimen::largest_specimen(estimator, cache), - endorsers: if estimator.parameter_bool("endorsements_enabled") { - vec_prop_specimen(estimator, "validator_count", cache) - } else { - Vec::new() - }, - } - } - } - - impl LargestSpecimen for SignedEndorsement { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - SignedEndorsement::new( - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - ) - } - } - - impl LargestSpecimen for Ping { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - Ping { - creator: LargestSpecimen::largest_specimen(estimator, cache), - timestamp: LargestSpecimen::largest_specimen(estimator, cache), - instance_id: LargestSpecimen::largest_specimen(estimator, cache), - signature: LargestSpecimen::largest_specimen(estimator, cache), - } - } - } - - impl LargestSpecimen for HashedWireUnit { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - if let Some(item) = cache.get::() { - return item.clone(); - } - - let hash = LargestSpecimen::largest_specimen(estimator, cache); - let wire_unit = LargestSpecimen::largest_specimen(estimator, cache); - cache.set(HashedWireUnit { hash, wire_unit }).clone() - } - } - - impl LargestSpecimen for WireUnit { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - WireUnit { - panorama: LargestSpecimen::largest_specimen(estimator, cache), - creator: LargestSpecimen::largest_specimen(estimator, cache), - instance_id: LargestSpecimen::largest_specimen(estimator, cache), - value: LargestSpecimen::largest_specimen(estimator, cache), - seq_number: LargestSpecimen::largest_specimen(estimator, cache), - timestamp: LargestSpecimen::largest_specimen(estimator, cache), - round_exp: LargestSpecimen::largest_specimen(estimator, cache), - endorsed: btree_set_distinct_from_prop(estimator, "validator_count", cache), - } - } - } -} - /// A `WireUnit` together with its hash and a cryptographic signature by its creator. #[derive(DataSize, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash)] #[serde(bound( diff --git a/node/src/components/consensus/highway_core/state.rs b/node/src/components/consensus/highway_core/state.rs index 2352ff8ac5..50a56a5d1e 100644 --- a/node/src/components/consensus/highway_core/state.rs +++ b/node/src/components/consensus/highway_core/state.rs @@ -1,3 +1,4 @@ +#![allow(clippy::arithmetic_side_effects)] mod block; mod index_panorama; mod panorama; diff --git a/node/src/components/consensus/highway_core/state/index_panorama.rs b/node/src/components/consensus/highway_core/state/index_panorama.rs index ee175e7a80..88c4281f84 100644 --- a/node/src/components/consensus/highway_core/state/index_panorama.rs +++ b/node/src/components/consensus/highway_core/state/index_panorama.rs @@ -54,16 +54,3 @@ impl IndexPanorama { validator_map } } - -mod specimen_support { - use crate::utils::specimen::{Cache, LargestSpecimen, SizeEstimator}; - - use super::IndexObservation; - - impl LargestSpecimen for IndexObservation { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - // This is the largest variant since the other one is empty: - IndexObservation::NextSeq(LargestSpecimen::largest_specimen(estimator, cache)) - } - } -} diff --git a/node/src/components/consensus/highway_core/state/panorama.rs b/node/src/components/consensus/highway_core/state/panorama.rs index 320712541b..8217540aae 100644 --- a/node/src/components/consensus/highway_core/state/panorama.rs +++ b/node/src/components/consensus/highway_core/state/panorama.rs @@ -1,6 +1,8 @@ use std::{collections::HashSet, fmt::Debug}; +use datasize::DataSize; use itertools::Itertools; +use serde::{Deserialize, Serialize}; use casper_types::Timestamp; @@ -13,37 +15,23 @@ use crate::components::consensus::{ utils::{ValidatorIndex, ValidatorMap}, }; -#[allow(clippy::arithmetic_side_effects)] -mod relaxed { - // This module exists solely to exempt the `EnumDiscriminants` macro generated code from the - // module-wide `clippy::arithmetic_side_effects` lint. - - use datasize::DataSize; - use serde::{Deserialize, Serialize}; - use strum::EnumDiscriminants; - - use crate::components::consensus::traits::Context; - - /// The observed behavior of a validator at some point in time. - #[derive(Clone, DataSize, Eq, PartialEq, Serialize, Deserialize, Hash, EnumDiscriminants)] - #[serde(bound( - serialize = "C::Hash: Serialize", - deserialize = "C::Hash: Deserialize<'de>", - ))] - #[strum_discriminants(derive(strum::EnumIter))] - pub enum Observation - where - C: Context, - { - /// No unit by that validator was observed yet. - None, - /// The validator's latest unit. - Correct(C::Hash), - /// The validator has been seen - Faulty, - } +/// The observed behavior of a validator at some point in time. +#[derive(Clone, DataSize, Eq, PartialEq, Serialize, Deserialize, Hash)] +#[serde(bound( + serialize = "C::Hash: Serialize", + deserialize = "C::Hash: Deserialize<'de>", +))] +pub enum Observation +where + C: Context, +{ + /// No unit by that validator was observed yet. + None, + /// The validator's latest unit. + Correct(C::Hash), + /// The validator has been seen + Faulty, } -pub use relaxed::{Observation, ObservationDiscriminants}; impl Debug for Observation where @@ -251,29 +239,3 @@ impl Panorama { Ok(()) } } - -mod specimen_support { - use crate::{ - components::consensus::ClContext, - utils::specimen::{largest_variant, Cache, LargestSpecimen, SizeEstimator}, - }; - - use super::{Observation, ObservationDiscriminants}; - - impl LargestSpecimen for Observation { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - if let Some(item) = cache.get::() { - return item.clone(); - } - - let correct = LargestSpecimen::largest_specimen(estimator, cache); - cache - .set(largest_variant(estimator, |variant| match variant { - ObservationDiscriminants::None => Observation::None, - ObservationDiscriminants::Correct => Observation::Correct(correct), - ObservationDiscriminants::Faulty => Observation::Faulty, - })) - .clone() - } - } -} diff --git a/node/src/components/consensus/highway_core/state/tallies.rs b/node/src/components/consensus/highway_core/state/tallies.rs index 6444f246d5..263958d29f 100644 --- a/node/src/components/consensus/highway_core/state/tallies.rs +++ b/node/src/components/consensus/highway_core/state/tallies.rs @@ -1,3 +1,5 @@ +#![allow(clippy::arithmetic_side_effects)] + use std::{ collections::BTreeMap, iter::{self, Extend, FromIterator}, diff --git a/node/src/components/consensus/highway_core/state/tests.rs b/node/src/components/consensus/highway_core/state/tests.rs index eb9a0b4408..df13d6f41b 100644 --- a/node/src/components/consensus/highway_core/state/tests.rs +++ b/node/src/components/consensus/highway_core/state/tests.rs @@ -500,6 +500,8 @@ fn validate_lnc_mixed_citations() -> Result<(), AddUnitError> { if !ENABLE_ENDORSEMENTS { return Ok(()); } + + #[rustfmt::skip] // Eric's vote should not require an endorsement as his unit e0 cites equivocator Carol before // the fork. // @@ -545,6 +547,8 @@ fn validate_lnc_transitive_endorsement() -> Result<(), AddUnitError if !ENABLE_ENDORSEMENTS { return Ok(()); } + + #[rustfmt::skip] // Endorsements should be transitive to descendants. // c1 doesn't have to be endorsed, it is enough that c0 is. // @@ -582,6 +586,8 @@ fn validate_lnc_cite_descendant_of_equivocation() -> Result<(), AddUnitError Synchronizer { // state after `dep` is added, rather than `transitive_dependency`. self.add_missing_dependency(dep.clone(), pv); // If we already have the dependency and it is a proposal that is currently being - // handled by the block validator, and this sender is already known as a source, - // do nothing. + // handled by the proposed block validator, and this sender is already known as a + // source, do nothing. if pending_values .values() .flatten() @@ -403,8 +403,9 @@ impl Synchronizer { continue; } // If we already have the dependency and it is a proposal that is currently being - // handled by the block validator, and this sender is not yet known as a source, - // we return the proposal as if this sender had sent it to us, so they get added. + // handled by the proposed block validator, and this sender is not yet known as a + // source, we return the proposal as if this sender had sent it to us, so they get + // added. if let Some((vv, _)) = pending_values .values() .flatten() diff --git a/node/src/components/consensus/metrics.rs b/node/src/components/consensus/metrics.rs index 0409ee4eca..5bf1d411b7 100644 --- a/node/src/components/consensus/metrics.rs +++ b/node/src/components/consensus/metrics.rs @@ -2,55 +2,51 @@ use prometheus::{Gauge, IntGauge, Registry}; use casper_types::Timestamp; -use crate::{types::FinalizedBlock, unregister_metric}; +use crate::{ + types::FinalizedBlock, + utils::registered_metric::{RegisteredMetric, RegistryExt}, +}; /// Network metrics to track Consensus #[derive(Debug)] pub(super) struct Metrics { /// Gauge to track time between proposal and finalization. - finalization_time: Gauge, + finalization_time: RegisteredMetric, /// Amount of finalized blocks. - finalized_block_count: IntGauge, + finalized_block_count: RegisteredMetric, /// Timestamp of the most recently accepted block payload. - time_of_last_proposed_block: IntGauge, + time_of_last_proposed_block: RegisteredMetric, /// Timestamp of the most recently finalized block. - time_of_last_finalized_block: IntGauge, + time_of_last_finalized_block: RegisteredMetric, /// The current era. - pub(super) consensus_current_era: IntGauge, - /// Registry component. - registry: Registry, + pub(super) consensus_current_era: RegisteredMetric, } impl Metrics { pub(super) fn new(registry: &Registry) -> Result { - let finalization_time = Gauge::new( + let finalization_time = registry.new_gauge( "finalization_time", "the amount of time, in milliseconds, between proposal and finalization of the latest finalized block", )?; let finalized_block_count = - IntGauge::new("amount_of_blocks", "the number of blocks finalized so far")?; - let time_of_last_proposed_block = IntGauge::new( + registry.new_int_gauge("amount_of_blocks", "the number of blocks finalized so far")?; + let time_of_last_proposed_block = registry.new_int_gauge( "time_of_last_block_payload", "timestamp of the most recently accepted block payload", )?; - let time_of_last_finalized_block = IntGauge::new( + let time_of_last_finalized_block = registry.new_int_gauge( "time_of_last_finalized_block", "timestamp of the most recently finalized block", )?; let consensus_current_era = - IntGauge::new("consensus_current_era", "the current era in consensus")?; - registry.register(Box::new(finalization_time.clone()))?; - registry.register(Box::new(finalized_block_count.clone()))?; - registry.register(Box::new(consensus_current_era.clone()))?; - registry.register(Box::new(time_of_last_proposed_block.clone()))?; - registry.register(Box::new(time_of_last_finalized_block.clone()))?; + registry.new_int_gauge("consensus_current_era", "the current era in consensus")?; + Ok(Metrics { finalization_time, finalized_block_count, time_of_last_proposed_block, time_of_last_finalized_block, consensus_current_era, - registry: registry.clone(), }) } @@ -70,13 +66,3 @@ impl Metrics { .set(Timestamp::now().millis() as i64); } } - -impl Drop for Metrics { - fn drop(&mut self) { - unregister_metric!(self.registry, self.finalization_time); - unregister_metric!(self.registry, self.finalized_block_count); - unregister_metric!(self.registry, self.consensus_current_era); - unregister_metric!(self.registry, self.time_of_last_finalized_block); - unregister_metric!(self.registry, self.time_of_last_proposed_block); - } -} diff --git a/node/src/components/consensus/protocols/common.rs b/node/src/components/consensus/protocols/common.rs index a5f5a819bf..32846d9275 100644 --- a/node/src/components/consensus/protocols/common.rs +++ b/node/src/components/consensus/protocols/common.rs @@ -1,4 +1,5 @@ //! Utilities common to different consensus algorithms. +#![allow(clippy::arithmetic_side_effects)] use itertools::Itertools; use num_rational::Ratio; diff --git a/node/src/components/consensus/protocols/highway.rs b/node/src/components/consensus/protocols/highway.rs index 24b9f65531..ab93637c14 100644 --- a/node/src/components/consensus/protocols/highway.rs +++ b/node/src/components/consensus/protocols/highway.rs @@ -1,3 +1,5 @@ +#![allow(clippy::arithmetic_side_effects)] + pub(crate) mod config; mod participation; mod round_success_meter; @@ -17,6 +19,7 @@ use itertools::Itertools; use num_rational::Ratio; use num_traits::CheckedMul; use rand::RngCore; +use serde::{Deserialize, Serialize}; use tracing::{debug, error, info, trace, warn}; use casper_types::{system::auction::BLOCK_REWARD, TimeDiff, Timestamp, U512}; @@ -39,11 +42,13 @@ use crate::{ synchronizer::Synchronizer, }, protocols, - traits::{ConsensusValueT, Context}, + traits::{ConsensusNetworkMessage, ConsensusValueT, Context}, utils::ValidatorIndex, ActionId, TimerId, }, + consensus::ValidationError, types::{Chainspec, NodeId}, + utils::display_error, NodeRng, }; @@ -671,90 +676,28 @@ impl HighwayProtocol { } } -#[allow(clippy::arithmetic_side_effects)] -mod relaxed { - // This module exists solely to exempt the `EnumDiscriminants` macro generated code from the - // module-wide `clippy::arithmetic_side_effects` lint. - - use datasize::DataSize; - use serde::{Deserialize, Serialize}; - use strum::EnumDiscriminants; - - use crate::components::consensus::{ - highway_core::{ - highway::{Dependency, Vertex}, - state::IndexPanorama, - }, - traits::{ConsensusNetworkMessage, Context}, - utils::ValidatorIndex, - }; - - #[derive( - DataSize, Clone, Serialize, Deserialize, Debug, PartialEq, Eq, EnumDiscriminants, Hash, - )] - #[serde(bound( - serialize = "C::Hash: Serialize", - deserialize = "C::Hash: Deserialize<'de>", - ))] - #[strum_discriminants(derive(strum::EnumIter))] - pub(crate) enum HighwayMessage - where - C: Context, - { - NewVertex(Vertex), - // A dependency request. u64 is a random UUID identifying the request. - RequestDependency(u64, Dependency), - RequestDependencyByHeight { - uuid: u64, - vid: ValidatorIndex, - unit_seq_number: u64, - }, - LatestStateRequest(IndexPanorama), - } - - impl ConsensusNetworkMessage for HighwayMessage {} -} -pub(crate) use relaxed::{HighwayMessage, HighwayMessageDiscriminants}; - -mod specimen_support { - use crate::{ - components::consensus::ClContext, - utils::specimen::{largest_variant, Cache, LargestSpecimen, SizeEstimator}, - }; - - use super::{HighwayMessage, HighwayMessageDiscriminants}; - - impl LargestSpecimen for HighwayMessage { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - largest_variant::(estimator, |variant| { - match variant { - HighwayMessageDiscriminants::NewVertex => HighwayMessage::NewVertex( - LargestSpecimen::largest_specimen(estimator, cache), - ), - HighwayMessageDiscriminants::RequestDependency => { - HighwayMessage::RequestDependency( - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - ) - } - HighwayMessageDiscriminants::RequestDependencyByHeight => { - HighwayMessage::RequestDependencyByHeight { - uuid: LargestSpecimen::largest_specimen(estimator, cache), - vid: LargestSpecimen::largest_specimen(estimator, cache), - unit_seq_number: LargestSpecimen::largest_specimen(estimator, cache), - } - } - HighwayMessageDiscriminants::LatestStateRequest => { - HighwayMessage::LatestStateRequest(LargestSpecimen::largest_specimen( - estimator, cache, - )) - } - } - }) - } - } +#[derive(DataSize, Clone, Serialize, Deserialize, Debug, PartialEq, Eq, Hash)] +#[serde(bound( + serialize = "C::Hash: Serialize", + deserialize = "C::Hash: Deserialize<'de>", +))] +pub(crate) enum HighwayMessage +where + C: Context, +{ + NewVertex(Vertex), + // A dependency request. u64 is a random UUID identifying the request. + RequestDependency(u64, Dependency), + RequestDependencyByHeight { + uuid: u64, + vid: ValidatorIndex, + unit_seq_number: u64, + }, + LatestStateRequest(IndexPanorama), } +impl ConsensusNetworkMessage for HighwayMessage {} + impl ConsensusProtocol for HighwayProtocol where C: Context + 'static, @@ -1020,25 +963,19 @@ where fn resolve_validity( &mut self, proposed_block: ProposedBlock, - valid: bool, + validation_error: Option, now: Timestamp, ) -> ProtocolOutcomes { - if valid { - let mut outcomes = self - .pending_values - .remove(&proposed_block) - .into_iter() - .flatten() - .flat_map(|(vv, _)| self.add_valid_vertex(vv, now)) - .collect_vec(); - outcomes.extend(self.synchronizer.remove_satisfied_deps(&self.highway)); - outcomes.extend(self.detect_finality()); - outcomes - } else { + if let Some(error) = validation_error { // TODO: Report proposer as faulty? // Drop vertices dependent on the invalid value. let dropped_vertices = self.pending_values.remove(&proposed_block); - warn!(?proposed_block, ?dropped_vertices, "proposal is invalid"); + warn!( + error = display_error(&error), + ?proposed_block, + ?dropped_vertices, + "proposal is invalid" + ); let dropped_vertex_ids = dropped_vertices .into_iter() .flatten() @@ -1049,10 +986,21 @@ where .collect(); // recursively remove vertices depending on the dropped ones let _faulty_senders = self.synchronizer.invalid_vertices(dropped_vertex_ids); - // We don't disconnect from the faulty senders here: The block validator considers the - // value "invalid" even if it just couldn't download the deploys, which could just be - // because the original sender went offline. + // We don't disconnect from the faulty senders here: The proposed block validator + // considers the value "invalid" even if it just couldn't download the deploys, which + // could just be because the original sender went offline. vec![] + } else { + let mut outcomes = self + .pending_values + .remove(&proposed_block) + .into_iter() + .flatten() + .flat_map(|(vv, _)| self.add_valid_vertex(vv, now)) + .collect_vec(); + outcomes.extend(self.synchronizer.remove_satisfied_deps(&self.highway)); + outcomes.extend(self.detect_finality()); + outcomes } } diff --git a/node/src/components/consensus/protocols/zug.rs b/node/src/components/consensus/protocols/zug.rs index 1f825339d6..a1d6b31128 100644 --- a/node/src/components/consensus/protocols/zug.rs +++ b/node/src/components/consensus/protocols/zug.rs @@ -1,3 +1,4 @@ +#![allow(clippy::arithmetic_side_effects)] //! # The Zug consensus protocol. //! //! This protocol requires that at most _f_ out of _n > 3 f_ validators (by weight) are faulty. It @@ -96,6 +97,7 @@ use crate::{ utils::{ValidatorIndex, ValidatorMap, Validators, Weight}, ActionId, LeaderSequence, TimerId, }, + consensus::ValidationError, types::{Chainspec, NodeId}, utils, NodeRng, }; @@ -1706,8 +1708,8 @@ impl Zug { true } - /// Sends a proposal to the `BlockValidator` component for validation. If no validation is - /// needed, immediately calls `insert_proposal`. + /// Sends a proposal to the `ProposedBlockValidator` component for validation. If no validation + /// is needed, immediately calls `insert_proposal`. fn validate_proposal( &mut self, round_id: RoundId, @@ -2260,7 +2262,7 @@ where fn resolve_validity( &mut self, proposed_block: ProposedBlock, - valid: bool, + validation_error: Option, now: Timestamp, ) -> ProtocolOutcomes { let rounds_and_node_ids = self @@ -2269,7 +2271,7 @@ where .into_iter() .flatten(); let mut outcomes = vec![]; - if valid { + if validation_error.is_none() { for (round_id, proposal, _sender) in rounds_and_node_ids { info!(our_idx = self.our_idx(), %round_id, %proposal, "handling valid proposal"); if self.round_mut(round_id).insert_proposal(proposal.clone()) { @@ -2284,9 +2286,9 @@ where outcomes.extend(self.update(now)); } else { for (round_id, proposal, sender) in rounds_and_node_ids { - // We don't disconnect from the faulty sender here: The block validator considers - // the value "invalid" even if it just couldn't download the deploys, which could - // just be because the original sender went offline. + // We don't disconnect from the faulty sender here: The proposed block validator + // considers the value "invalid" even if it just couldn't download the deploys, + // which could just be because the original sender went offline. let validator_index = self.leader(round_id).0; info!( our_idx = self.our_idx(), @@ -2431,151 +2433,3 @@ where Some(self.params.min_block_time()) } } - -mod specimen_support { - use std::collections::BTreeSet; - - use crate::{ - components::consensus::{utils::ValidatorIndex, ClContext}, - utils::specimen::{ - btree_map_distinct_from_prop, btree_set_distinct_from_prop, largest_variant, - vec_prop_specimen, Cache, LargeUniqueSequence, LargestSpecimen, SizeEstimator, - }, - }; - - use super::{ - message::{ - Content, ContentDiscriminants, Message, MessageDiscriminants, SignedMessage, - SyncResponse, - }, - proposal::Proposal, - SyncRequest, - }; - - impl LargestSpecimen for Message { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - largest_variant::( - estimator, - |variant| match variant { - MessageDiscriminants::SyncResponse => { - Message::SyncResponse(LargestSpecimen::largest_specimen(estimator, cache)) - } - MessageDiscriminants::Proposal => Message::Proposal { - round_id: LargestSpecimen::largest_specimen(estimator, cache), - instance_id: LargestSpecimen::largest_specimen(estimator, cache), - proposal: LargestSpecimen::largest_specimen(estimator, cache), - echo: LargestSpecimen::largest_specimen(estimator, cache), - }, - MessageDiscriminants::Signed => { - Message::Signed(LargestSpecimen::largest_specimen(estimator, cache)) - } - MessageDiscriminants::Evidence => Message::Evidence( - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - ), - }, - ) - } - } - - impl LargestSpecimen for SyncRequest { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - SyncRequest { - round_id: LargestSpecimen::largest_specimen(estimator, cache), - proposal_hash: LargestSpecimen::largest_specimen(estimator, cache), - has_proposal: LargestSpecimen::largest_specimen(estimator, cache), - first_validator_idx: LargestSpecimen::largest_specimen(estimator, cache), - echoes: LargestSpecimen::largest_specimen(estimator, cache), - true_votes: LargestSpecimen::largest_specimen(estimator, cache), - false_votes: LargestSpecimen::largest_specimen(estimator, cache), - active: LargestSpecimen::largest_specimen(estimator, cache), - faulty: LargestSpecimen::largest_specimen(estimator, cache), - instance_id: LargestSpecimen::largest_specimen(estimator, cache), - } - } - } - - impl LargeUniqueSequence for ValidatorIndex - where - E: SizeEstimator, - { - fn large_unique_sequence( - _estimator: &E, - count: usize, - _cache: &mut Cache, - ) -> BTreeSet { - Iterator::map((0..u32::MAX).rev(), ValidatorIndex::from) - .take(count) - .collect() - } - } - - impl LargestSpecimen for SyncResponse { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - SyncResponse { - round_id: LargestSpecimen::largest_specimen(estimator, cache), - proposal_or_hash: LargestSpecimen::largest_specimen(estimator, cache), - echo_sigs: btree_map_distinct_from_prop(estimator, "validator_count", cache), - true_vote_sigs: btree_map_distinct_from_prop(estimator, "validator_count", cache), - false_vote_sigs: btree_map_distinct_from_prop(estimator, "validator_count", cache), - signed_messages: vec_prop_specimen(estimator, "validator_count", cache), - evidence: vec_prop_specimen(estimator, "validator_count", cache), - instance_id: LargestSpecimen::largest_specimen(estimator, cache), - } - } - } - - impl LargestSpecimen for Proposal { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - Proposal { - timestamp: LargestSpecimen::largest_specimen(estimator, cache), - maybe_block: LargestSpecimen::largest_specimen(estimator, cache), - maybe_parent_round_id: LargestSpecimen::largest_specimen(estimator, cache), - inactive: Some(btree_set_distinct_from_prop( - estimator, - "validator_count", - cache, - )), - } - } - } - - impl LargestSpecimen for ValidatorIndex { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - u32::largest_specimen(estimator, cache).into() - } - } - - impl LargestSpecimen for SignedMessage { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - SignedMessage::sign_new( - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - &LargestSpecimen::largest_specimen(estimator, cache), - ) - } - } - - impl LargestSpecimen for Content { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - if let Some(item) = cache.get::() { - return *item; - } - - let item = largest_variant::(estimator, |variant| { - match variant { - ContentDiscriminants::Echo => { - Content::Echo(LargestSpecimen::largest_specimen(estimator, cache)) - } - ContentDiscriminants::Vote => { - Content::Vote(LargestSpecimen::largest_specimen(estimator, cache)) - } - } - }); - *cache.set(item) - } - } -} diff --git a/node/src/components/consensus/protocols/zug/des_testing.rs b/node/src/components/consensus/protocols/zug/des_testing.rs index a6a6865652..7e3baca855 100644 --- a/node/src/components/consensus/protocols/zug/des_testing.rs +++ b/node/src/components/consensus/protocols/zug/des_testing.rs @@ -603,7 +603,7 @@ where self.call_validator(delivery_time, &validator_id, |consensus| { consensus .zug_mut() - .resolve_validity(proposed_block, true, delivery_time) + .resolve_validity(proposed_block, None, delivery_time) })? } ZugMessage::NewEvidence(_) => vec![], // irrelevant to consensus diff --git a/node/src/components/consensus/protocols/zug/message.rs b/node/src/components/consensus/protocols/zug/message.rs index 8fd0fcf1c9..a6f84c1a50 100644 --- a/node/src/components/consensus/protocols/zug/message.rs +++ b/node/src/components/consensus/protocols/zug/message.rs @@ -14,85 +14,61 @@ use crate::{ utils::ds, }; -#[allow(clippy::arithmetic_side_effects)] -mod relaxed { - // This module exists solely to exempt the `EnumDiscriminants` macro generated code from the - // module-wide `clippy::arithmetic_side_effects` lint. - - use datasize::DataSize; - use serde::{Deserialize, Serialize}; - use strum::EnumDiscriminants; - - use crate::components::consensus::{ - protocols::zug::{proposal::Proposal, RoundId}, - traits::{ConsensusNetworkMessage, Context}, - }; - - use super::{SignedMessage, SyncResponse}; - - /// The content of a message in the main protocol, as opposed to the proposal, and to sync - /// messages, which are somewhat decoupled from the rest of the protocol. These messages, - /// along with the instance and round ID, are signed by the active validators. - #[derive( - Clone, Serialize, Deserialize, Debug, PartialEq, Eq, Hash, DataSize, EnumDiscriminants, - )] - #[serde(bound( - serialize = "C::Hash: Serialize", - deserialize = "C::Hash: Deserialize<'de>", - ))] - #[strum_discriminants(derive(strum::EnumIter))] - pub(crate) enum Content - where - C: Context, - { - /// By signing the echo of a proposal hash a validator affirms that this is the first (and - /// usually only) proposal by the round leader that they have received. A quorum of echoes - /// is a requirement for a proposal to become accepted. - Echo(C::Hash), - /// By signing a `true` vote a validator confirms that they have accepted a proposal in - /// this round before the timeout. If there is a quorum of `true` votes, the - /// proposal becomes finalized, together with its ancestors. - /// - /// A `false` vote means they timed out waiting for a proposal to get accepted. A quorum of - /// `false` votes allows the next round's leader to make a proposal without waiting for - /// this round's. - Vote(bool), - } - - /// All messages of the protocol. - #[derive( - DataSize, Clone, Serialize, Deserialize, Debug, PartialEq, Eq, EnumDiscriminants, Hash, - )] - #[serde(bound( - serialize = "C::Hash: Serialize", - deserialize = "C::Hash: Deserialize<'de>", - ))] - #[strum_discriminants(derive(strum::EnumIter))] - pub(crate) enum Message - where - C: Context, - { - /// Signatures, proposals and evidence the requester was missing. - SyncResponse(SyncResponse), - /// A proposal for a new block. This does not contain any signature; instead, the proposer - /// is expected to sign an echo with the proposal hash. Validators will drop any - /// proposal they receive unless they either have a signed echo by the proposer and - /// the proposer has not double-signed, or they have a quorum of echoes. - Proposal { - round_id: RoundId, - instance_id: C::InstanceId, - proposal: Proposal, - echo: SignedMessage, - }, - /// An echo or vote signed by an active validator. - Signed(SignedMessage), - /// Two conflicting signatures by the same validator. - Evidence(SignedMessage, Content, C::Signature), - } +/// The content of a message in the main protocol, as opposed to the proposal, and to sync +/// messages, which are somewhat decoupled from the rest of the protocol. These messages, +/// along with the instance and round ID, are signed by the active validators. +#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq, Hash, DataSize)] +#[serde(bound( + serialize = "C::Hash: Serialize", + deserialize = "C::Hash: Deserialize<'de>", +))] +pub(crate) enum Content +where + C: Context, +{ + /// By signing the echo of a proposal hash a validator affirms that this is the first (and + /// usually only) proposal by the round leader that they have received. A quorum of echoes + /// is a requirement for a proposal to become accepted. + Echo(C::Hash), + /// By signing a `true` vote a validator confirms that they have accepted a proposal in + /// this round before the timeout. If there is a quorum of `true` votes, the + /// proposal becomes finalized, together with its ancestors. + /// + /// A `false` vote means they timed out waiting for a proposal to get accepted. A quorum of + /// `false` votes allows the next round's leader to make a proposal without waiting for + /// this round's. + Vote(bool), +} - impl ConsensusNetworkMessage for Message {} +/// All messages of the protocol. +#[derive(DataSize, Clone, Serialize, Deserialize, Debug, PartialEq, Eq, Hash)] +#[serde(bound( + serialize = "C::Hash: Serialize", + deserialize = "C::Hash: Deserialize<'de>", +))] +pub(crate) enum Message +where + C: Context, +{ + /// Signatures, proposals and evidence the requester was missing. + SyncResponse(SyncResponse), + /// A proposal for a new block. This does not contain any signature; instead, the proposer + /// is expected to sign an echo with the proposal hash. Validators will drop any + /// proposal they receive unless they either have a signed echo by the proposer and + /// the proposer has not double-signed, or they have a quorum of echoes. + Proposal { + round_id: RoundId, + instance_id: C::InstanceId, + proposal: Proposal, + echo: SignedMessage, + }, + /// An echo or vote signed by an active validator. + Signed(SignedMessage), + /// Two conflicting signatures by the same validator. + Evidence(SignedMessage, Content, C::Signature), } -pub(crate) use relaxed::{Content, ContentDiscriminants, Message, MessageDiscriminants}; + +impl ConsensusNetworkMessage for Message {} impl Content { /// Returns whether the two contents contradict each other. A correct validator is expected to diff --git a/node/src/components/consensus/utils/validators.rs b/node/src/components/consensus/utils/validators.rs index b1215de8f5..f9cdc2f3a6 100644 --- a/node/src/components/consensus/utils/validators.rs +++ b/node/src/components/consensus/utils/validators.rs @@ -1,3 +1,4 @@ +#![allow(clippy::arithmetic_side_effects)] use std::{ collections::HashMap, fmt, diff --git a/node/src/components/contract_runtime.rs b/node/src/components/contract_runtime.rs index e052b47d83..c51510e35a 100644 --- a/node/src/components/contract_runtime.rs +++ b/node/src/components/contract_runtime.rs @@ -51,7 +51,7 @@ use crate::{ ContractRuntimeAnnouncement, FatalAnnouncement, MetaBlockAnnouncement, UnexecutedBlockAnnouncement, }, - incoming::{TrieDemand, TrieRequest, TrieRequestIncoming}, + incoming::{TrieRequest, TrieRequestIncoming}, requests::{ContractRuntimeRequest, NetworkRequest, StorageRequest}, EffectBuilder, EffectExt, Effects, }, @@ -180,9 +180,6 @@ pub(crate) enum Event { #[from] TrieRequestIncoming(TrieRequestIncoming), - - #[from] - TrieDemand(TrieDemand), } impl Display for Event { @@ -192,7 +189,6 @@ impl Display for Event { write!(f, "contract runtime request: {}", req) } Event::TrieRequestIncoming(req) => write!(f, "trie request incoming: {}", req), - Event::TrieDemand(demand) => write!(f, "trie demand: {}", demand), } } } @@ -246,7 +242,6 @@ where Event::TrieRequestIncoming(request) => { self.handle_trie_request(effect_builder, request) } - Event::TrieDemand(demand) => self.handle_trie_demand(demand), } } @@ -266,58 +261,39 @@ impl ContractRuntime { .len() } - /// Handles an incoming request to get a trie. + /// Handles an incoming request for a trie. fn handle_trie_request( &self, effect_builder: EffectBuilder, - TrieRequestIncoming { sender, message }: TrieRequestIncoming, + TrieRequestIncoming { + message, + sender, + ticket, + }: TrieRequestIncoming, ) -> Effects where REv: From> + Send, { let TrieRequest(ref serialized_id) = *message; - let fetch_response = match self.get_trie(serialized_id) { - Ok(fetch_response) => fetch_response, - Err(error) => { - debug!("failed to get trie: {}", error); - return Effects::new(); - } - }; - - match Message::new_get_response(&fetch_response) { - Ok(message) => effect_builder.send_message(sender, message).ignore(), - Err(error) => { - error!("failed to create get-response: {}", error); - Effects::new() - } - } - } - - /// Handles an incoming demand for a trie. - fn handle_trie_demand( - &self, - TrieDemand { - request_msg, - auto_closing_responder, - .. - }: TrieDemand, - ) -> Effects { - let TrieRequest(ref serialized_id) = *request_msg; let fetch_response = match self.get_trie(serialized_id) { Ok(fetch_response) => fetch_response, Err(error) => { // Something is wrong in our trie store, but be courteous and still send a reply. debug!("failed to get trie: {}", error); - return auto_closing_responder.respond_none().ignore(); + drop(ticket); + return Effects::new(); } }; match Message::new_get_response(&fetch_response) { - Ok(message) => auto_closing_responder.respond(message).ignore(), + Ok(message) => effect_builder + .send_message_and_drop_ticket(sender, message, ticket) + .ignore(), Err(error) => { // This should never happen, but if it does, we let the peer know we cannot help. error!("failed to create get-response: {}", error); - auto_closing_responder.respond_none().ignore() + drop(ticket); + Effects::new() } } } diff --git a/node/src/components/contract_runtime/metrics.rs b/node/src/components/contract_runtime/metrics.rs index a7833e72fd..7160125b75 100644 --- a/node/src/components/contract_runtime/metrics.rs +++ b/node/src/components/contract_runtime/metrics.rs @@ -1,6 +1,6 @@ use prometheus::{self, Gauge, Histogram, IntGauge, Registry}; -use crate::{unregister_metric, utils}; +use crate::utils::registered_metric::{RegisteredMetric, RegistryExt}; /// Value of upper bound of histogram. const EXPONENTIAL_BUCKET_START: f64 = 0.01; @@ -58,20 +58,19 @@ const EXEC_QUEUE_SIZE_HELP: &str = /// Metrics for the contract runtime component. #[derive(Debug)] pub struct Metrics { - pub(super) run_execute: Histogram, - pub(super) apply_effect: Histogram, - pub(super) commit_upgrade: Histogram, - pub(super) run_query: Histogram, - pub(super) commit_step: Histogram, - pub(super) get_balance: Histogram, - pub(super) get_era_validators: Histogram, - pub(super) get_bids: Histogram, - pub(super) put_trie: Histogram, - pub(super) get_trie: Histogram, - pub(super) exec_block: Histogram, - pub(super) latest_commit_step: Gauge, - pub(super) exec_queue_size: IntGauge, - registry: Registry, + pub(super) run_execute: RegisteredMetric, + pub(super) apply_effect: RegisteredMetric, + pub(super) commit_upgrade: RegisteredMetric, + pub(super) run_query: RegisteredMetric, + pub(super) commit_step: RegisteredMetric, + pub(super) get_balance: RegisteredMetric, + pub(super) get_era_validators: RegisteredMetric, + pub(super) get_bids: RegisteredMetric, + pub(super) put_trie: RegisteredMetric, + pub(super) get_trie: RegisteredMetric, + pub(super) exec_block: RegisteredMetric, + pub(super) latest_commit_step: RegisteredMetric, + pub(super) exec_queue_size: RegisteredMetric, } impl Metrics { @@ -89,100 +88,57 @@ impl Metrics { // Anything above that should be a warning signal. let tiny_buckets = prometheus::exponential_buckets(0.001, 2.0, 10)?; - let latest_commit_step = Gauge::new(LATEST_COMMIT_STEP_NAME, LATEST_COMMIT_STEP_HELP)?; - registry.register(Box::new(latest_commit_step.clone()))?; + let latest_commit_step = + registry.new_gauge(LATEST_COMMIT_STEP_NAME, LATEST_COMMIT_STEP_HELP)?; - let exec_queue_size = IntGauge::new(EXEC_QUEUE_SIZE_NAME, EXEC_QUEUE_SIZE_HELP)?; - registry.register(Box::new(exec_queue_size.clone()))?; + let exec_queue_size = registry.new_int_gauge(EXEC_QUEUE_SIZE_NAME, EXEC_QUEUE_SIZE_HELP)?; Ok(Metrics { - run_execute: utils::register_histogram_metric( - registry, + run_execute: registry.new_histogram( RUN_EXECUTE_NAME, RUN_EXECUTE_HELP, common_buckets.clone(), )?, - apply_effect: utils::register_histogram_metric( - registry, + apply_effect: registry.new_histogram( APPLY_EFFECT_NAME, APPLY_EFFECT_HELP, common_buckets.clone(), )?, - run_query: utils::register_histogram_metric( - registry, + run_query: registry.new_histogram( RUN_QUERY_NAME, RUN_QUERY_HELP, common_buckets.clone(), )?, - commit_step: utils::register_histogram_metric( - registry, + commit_step: registry.new_histogram( COMMIT_STEP_NAME, COMMIT_STEP_HELP, common_buckets.clone(), )?, - commit_upgrade: utils::register_histogram_metric( - registry, + commit_upgrade: registry.new_histogram( COMMIT_UPGRADE_NAME, COMMIT_UPGRADE_HELP, common_buckets.clone(), )?, - get_balance: utils::register_histogram_metric( - registry, + get_balance: registry.new_histogram( GET_BALANCE_NAME, GET_BALANCE_HELP, common_buckets.clone(), )?, - get_era_validators: utils::register_histogram_metric( - registry, + get_era_validators: registry.new_histogram( GET_ERA_VALIDATORS_NAME, GET_ERA_VALIDATORS_HELP, common_buckets.clone(), )?, - get_bids: utils::register_histogram_metric( - registry, + get_bids: registry.new_histogram( GET_BIDS_NAME, GET_BIDS_HELP, common_buckets.clone(), )?, - get_trie: utils::register_histogram_metric( - registry, - GET_TRIE_NAME, - GET_TRIE_HELP, - tiny_buckets.clone(), - )?, - put_trie: utils::register_histogram_metric( - registry, - PUT_TRIE_NAME, - PUT_TRIE_HELP, - tiny_buckets, - )?, - exec_block: utils::register_histogram_metric( - registry, - EXEC_BLOCK_NAME, - EXEC_BLOCK_HELP, - common_buckets, - )?, + get_trie: registry.new_histogram(GET_TRIE_NAME, GET_TRIE_HELP, tiny_buckets.clone())?, + put_trie: registry.new_histogram(PUT_TRIE_NAME, PUT_TRIE_HELP, tiny_buckets)?, + exec_block: registry.new_histogram(EXEC_BLOCK_NAME, EXEC_BLOCK_HELP, common_buckets)?, latest_commit_step, exec_queue_size, - registry: registry.clone(), }) } } - -impl Drop for Metrics { - fn drop(&mut self) { - unregister_metric!(self.registry, self.run_execute); - unregister_metric!(self.registry, self.apply_effect); - unregister_metric!(self.registry, self.commit_upgrade); - unregister_metric!(self.registry, self.run_query); - unregister_metric!(self.registry, self.commit_step); - unregister_metric!(self.registry, self.get_balance); - unregister_metric!(self.registry, self.get_era_validators); - unregister_metric!(self.registry, self.get_bids); - unregister_metric!(self.registry, self.put_trie); - unregister_metric!(self.registry, self.get_trie); - unregister_metric!(self.registry, self.exec_block); - unregister_metric!(self.registry, self.latest_commit_step); - unregister_metric!(self.registry, self.exec_queue_size); - } -} diff --git a/node/src/components/deploy_acceptor.rs b/node/src/components/deploy_acceptor.rs index 44551bf126..044be218aa 100644 --- a/node/src/components/deploy_acceptor.rs +++ b/node/src/components/deploy_acceptor.rs @@ -46,6 +46,8 @@ use crate::{ pub(crate) use config::Config; pub(crate) use event::{Event, EventMetadata}; +use super::network::Ticket; + const COMPONENT_NAME: &str = "deploy_acceptor"; const ARG_TARGET: &str = "target"; @@ -242,6 +244,7 @@ impl DeployAcceptor { deploy: Arc, source: Source, maybe_responder: Option>>, + _: Ticket, // We currently drop the ticket implicitly, see below. ) -> Effects { debug!(%source, %deploy, "checking acceptance"); let verification_start_timestamp = Timestamp::now(); @@ -309,6 +312,9 @@ impl DeployAcceptor { maybe_block_header: maybe_block_header.map(Box::new), verification_start_timestamp, }) + + // Note: `ticket` is dropped implicitly -- it would be better to thread it through further + // to capture the work of retrieving the data from storage/EE. } fn handle_get_block_header_result( @@ -958,6 +964,7 @@ impl DeployAcceptor { .ignore(), ); } + effects } @@ -1047,7 +1054,8 @@ impl Component for DeployAcceptor { deploy, source, maybe_responder: responder, - } => self.accept(effect_builder, deploy, source, responder), + ticket, + } => self.accept(effect_builder, deploy, source, responder, ticket), Event::GetBlockHeaderResult { event_metadata, maybe_block_header, diff --git a/node/src/components/deploy_acceptor/event.rs b/node/src/components/deploy_acceptor/event.rs index f764ef4401..d8d4bcd17a 100644 --- a/node/src/components/deploy_acceptor/event.rs +++ b/node/src/components/deploy_acceptor/event.rs @@ -12,7 +12,7 @@ use casper_types::{ use super::Source; use crate::{ - components::deploy_acceptor::Error, + components::{deploy_acceptor::Error, network::Ticket}, effect::Responder, types::{BlockHeader, Deploy}, }; @@ -47,6 +47,8 @@ pub(crate) enum Event { deploy: Arc, source: Source, maybe_responder: Option>>, + #[serde(skip)] + ticket: Ticket, }, /// The result of the `DeployAcceptor` putting a `Deploy` to the storage component. PutToStorageResult { diff --git a/node/src/components/deploy_acceptor/metrics.rs b/node/src/components/deploy_acceptor/metrics.rs index 444bd41ee3..d48b5f685b 100644 --- a/node/src/components/deploy_acceptor/metrics.rs +++ b/node/src/components/deploy_acceptor/metrics.rs @@ -2,7 +2,7 @@ use prometheus::{Histogram, Registry}; use casper_types::Timestamp; -use crate::{unregister_metric, utils}; +use crate::utils::registered_metric::{RegisteredMetric, RegistryExt}; const DEPLOY_ACCEPTED_NAME: &str = "deploy_acceptor_accepted_deploy"; const DEPLOY_ACCEPTED_HELP: &str = "time in seconds to accept a deploy in the deploy acceptor"; @@ -20,9 +20,8 @@ const EXPONENTIAL_BUCKET_COUNT: usize = 10; #[derive(Debug)] pub(super) struct Metrics { - deploy_accepted: Histogram, - deploy_rejected: Histogram, - registry: Registry, + deploy_accepted: RegisteredMetric, + deploy_rejected: RegisteredMetric, } impl Metrics { @@ -34,19 +33,16 @@ impl Metrics { )?; Ok(Self { - deploy_accepted: utils::register_histogram_metric( - registry, + deploy_accepted: registry.new_histogram( DEPLOY_ACCEPTED_NAME, DEPLOY_ACCEPTED_HELP, common_buckets.clone(), )?, - deploy_rejected: utils::register_histogram_metric( - registry, + deploy_rejected: registry.new_histogram( DEPLOY_REJECTED_NAME, DEPLOY_REJECTED_HELP, common_buckets, )?, - registry: registry.clone(), }) } @@ -60,10 +56,3 @@ impl Metrics { .observe(start.elapsed().millis() as f64); } } - -impl Drop for Metrics { - fn drop(&mut self) { - unregister_metric!(self.registry, self.deploy_accepted); - unregister_metric!(self.registry, self.deploy_rejected); - } -} diff --git a/node/src/components/deploy_acceptor/tests.rs b/node/src/components/deploy_acceptor/tests.rs index c315e3e760..70dbc4f4b1 100644 --- a/node/src/components/deploy_acceptor/tests.rs +++ b/node/src/components/deploy_acceptor/tests.rs @@ -760,6 +760,7 @@ fn schedule_accept_deploy( deploy, source, maybe_responder: Some(responder), + ticket: Ticket::create_dummy(), }, QueueKind::Validation, ) diff --git a/node/src/components/deploy_buffer/metrics.rs b/node/src/components/deploy_buffer/metrics.rs index df2e292b01..811324ba9b 100644 --- a/node/src/components/deploy_buffer/metrics.rs +++ b/node/src/components/deploy_buffer/metrics.rs @@ -1,52 +1,38 @@ use prometheus::{IntGauge, Registry}; -use crate::unregister_metric; +use crate::utils::registered_metric::{RegisteredMetric, RegistryExt}; /// Metrics for the deploy_buffer component. #[derive(Debug)] pub(super) struct Metrics { /// Total number of deploys contained in the deploy buffer. - pub(super) total_deploys: IntGauge, + pub(super) total_deploys: RegisteredMetric, /// Number of deploys contained in in-flight proposed blocks. - pub(super) held_deploys: IntGauge, + pub(super) held_deploys: RegisteredMetric, /// Number of deploys that should not be included in future proposals ever again. - pub(super) dead_deploys: IntGauge, - registry: Registry, + pub(super) dead_deploys: RegisteredMetric, } impl Metrics { /// Creates a new instance of the block accumulator metrics, using the given prefix. pub fn new(registry: &Registry) -> Result { - let total_deploys = IntGauge::new( + let total_deploys = registry.new_int_gauge( "deploy_buffer_total_deploys".to_string(), "total number of deploys contained in the deploy buffer.".to_string(), )?; - let held_deploys = IntGauge::new( + let held_deploys = registry.new_int_gauge( "deploy_buffer_held_deploys".to_string(), "number of deploys included in in-flight proposed blocks.".to_string(), )?; - let dead_deploys = IntGauge::new( + let dead_deploys = registry.new_int_gauge( "deploy_buffer_dead_deploys".to_string(), "number of deploys that should not be included in future proposals.".to_string(), )?; - registry.register(Box::new(total_deploys.clone()))?; - registry.register(Box::new(held_deploys.clone()))?; - registry.register(Box::new(dead_deploys.clone()))?; - Ok(Metrics { total_deploys, held_deploys, dead_deploys, - registry: registry.clone(), }) } } - -impl Drop for Metrics { - fn drop(&mut self) { - unregister_metric!(self.registry, self.total_deploys); - unregister_metric!(self.registry, self.held_deploys); - unregister_metric!(self.registry, self.dead_deploys); - } -} diff --git a/node/src/components/diagnostics_port.rs b/node/src/components/diagnostics_port.rs index 682750c9b2..78d74b8ab9 100644 --- a/node/src/components/diagnostics_port.rs +++ b/node/src/components/diagnostics_port.rs @@ -17,7 +17,7 @@ use std::{ use datasize::DataSize; use serde::{Deserialize, Serialize}; use thiserror::Error; -use tokio::{net::UnixListener, sync::watch}; +use tokio::net::UnixListener; use tracing::{debug, error, info, warn}; use crate::{ @@ -30,7 +30,7 @@ use crate::{ }, reactor::main_reactor::MainEvent, types::NodeRng, - utils::umask, + utils::{umask, DropSwitch, ObservableFuse}, WithDir, }; pub(crate) use stop_at::StopAtSpec; @@ -65,8 +65,8 @@ impl Default for Config { pub(crate) struct DiagnosticsPort { state: ComponentState, /// Sender which will cause server and client connections to exit when dropped. - #[data_size(skip)] - _shutdown_sender: Option>, // only used for its `Drop` impl + #[allow(dead_code)] + shutdown_fuse: DropSwitch, config: WithDir, } @@ -76,7 +76,7 @@ impl DiagnosticsPort { DiagnosticsPort { state: ComponentState::Uninitialized, config, - _shutdown_sender: None, + shutdown_fuse: DropSwitch::new(ObservableFuse::new()), } } } @@ -141,8 +141,16 @@ where if self.state != ComponentState::Initializing { return Effects::new(); } - let (effects, state) = self.bind(self.config.value().enabled, effect_builder); + let (effects, mut state) = + self.bind(self.config.value().enabled, effect_builder); + + if matches!(state, ComponentState::Initializing) { + // No port address to bind, jump to initialized immediately. + state = ComponentState::Initialized; + } + >::set_state(self, state); + effects } }, @@ -195,10 +203,6 @@ where &mut self, effect_builder: EffectBuilder, ) -> Result, Self::Error> { - let (shutdown_sender, shutdown_receiver) = watch::channel(()); - - self._shutdown_sender = Some(shutdown_sender); - let cfg = self.config.value(); let socket_path = self.config.with_dir(cfg.socket_path.clone()); @@ -208,7 +212,12 @@ where #[allow(clippy::useless_conversion)] cfg.socket_umask.into(), )?; - let server = tasks::server(effect_builder, socket_path, listener, shutdown_receiver); + let server = tasks::server( + effect_builder, + socket_path, + listener, + self.shutdown_fuse.inner().clone(), + ); Ok(server.ignore()) } } diff --git a/node/src/components/diagnostics_port/tasks.rs b/node/src/components/diagnostics_port/tasks.rs index 595b815b9a..489e61aaa9 100644 --- a/node/src/components/diagnostics_port/tasks.rs +++ b/node/src/components/diagnostics_port/tasks.rs @@ -12,13 +12,15 @@ use bincode::{ DefaultOptions, Options, }; use erased_serde::Serializer as ErasedSerializer; -use futures::future::{self, Either}; +use futures::{ + future::{self, Either}, + pin_mut, +}; use serde::Serialize; use thiserror::Error; use tokio::{ io::{AsyncBufReadExt, AsyncRead, AsyncWriteExt, BufReader}, net::{unix::OwnedWriteHalf, UnixListener, UnixStream}, - sync::watch, }; use tracing::{debug, info, info_span, warn, Instrument}; @@ -39,7 +41,7 @@ use crate::{ }, failpoints::FailpointActivation, logging, - utils::{display_error, opt_display::OptDisplay}, + utils::{display_error, opt_display::OptDisplay, ObservableFuse, Peel}, }; /// Success or failure response. @@ -496,7 +498,7 @@ fn set_log_filter(filter_str: &str) -> Result<(), SetLogFilterError> { async fn handler( effect_builder: EffectBuilder, stream: UnixStream, - mut shutdown_receiver: watch::Receiver<()>, + shutdown_fuse: ObservableFuse, ) -> io::Result<()> where REv: From @@ -513,14 +515,17 @@ where let mut keep_going = true; while keep_going { - let shutdown_messages = async { while shutdown_receiver.changed().await.is_ok() {} }; + let shutdown = shutdown_fuse.wait(); + pin_mut!(shutdown); + let next_line = lines.next_line(); + pin_mut!(next_line); - match future::select(Box::pin(shutdown_messages), Box::pin(lines.next_line())).await { + match future::select(shutdown, next_line).await.peel() { Either::Left(_) => { info!("shutting down diagnostics port connection to client"); return Ok(()); } - Either::Right((line_result, _)) => { + Either::Right(line_result) => { if let Some(line) = line_result? { keep_going = session .process_line(effect_builder, &mut writer, line.as_str()) @@ -541,7 +546,7 @@ pub(super) async fn server( effect_builder: EffectBuilder, socket_path: PathBuf, listener: UnixListener, - mut shutdown_receiver: watch::Receiver<()>, + shutdown_fuse: ObservableFuse, ) where REv: From + From @@ -549,8 +554,8 @@ pub(super) async fn server( + From + Send, { - let handling_shutdown_receiver = shutdown_receiver.clone(); let mut next_client_id: u64 = 0; + let acceptor_fuse = shutdown_fuse.clone(); let accept_connections = async move { loop { match listener.accept().await { @@ -566,8 +571,7 @@ pub(super) async fn server( next_client_id += 1; tokio::spawn( - handler(effect_builder, stream, handling_shutdown_receiver.clone()) - .instrument(span), + handler(effect_builder, stream, acceptor_fuse.clone()).instrument(span), ); } Err(err) => { @@ -577,11 +581,13 @@ pub(super) async fn server( } }; - let shutdown_messages = async move { while shutdown_receiver.changed().await.is_ok() {} }; + let shutdown = shutdown_fuse.wait(); + pin_mut!(shutdown); + pin_mut!(accept_connections); // Now we can wait for either the `shutdown` channel's remote end to do be dropped or the // infinite loop to terminate, which never happens. - match future::select(Box::pin(shutdown_messages), Box::pin(accept_connections)).await { + match future::select(shutdown, accept_connections).await { Either::Left(_) => info!("shutting down diagnostics port"), Either::Right(_) => unreachable!("server accept returns `!`"), } @@ -848,12 +854,7 @@ mod tests { async fn can_dump_actual_events_from_scheduler() { // Create a scheduler with a few synthetic events. let scheduler = WeightedRoundRobin::new(QueueKind::weights(), None); - scheduler - .push( - MainEvent::Network(network::Event::SweepOutgoing), - QueueKind::Network, - ) - .await; + scheduler .push( MainEvent::Network(network::Event::GossipOurAddress), diff --git a/node/src/components/event_stream_server.rs b/node/src/components/event_stream_server.rs index ecd00ed7ea..f0bbaa4e38 100644 --- a/node/src/components/event_stream_server.rs +++ b/node/src/components/event_stream_server.rs @@ -27,11 +27,9 @@ mod tests; use std::{fmt::Debug, net::SocketAddr, path::PathBuf}; +use casper_json_rpc::{box_reply, CorsOrigin}; use datasize::DataSize; -use tokio::sync::{ - mpsc::{self, UnboundedSender}, - oneshot, -}; +use tokio::sync::mpsc::{self, UnboundedSender}; use tracing::{error, info, warn}; use warp::Filter; @@ -43,7 +41,7 @@ use crate::{ effect::{EffectBuilder, Effects}, reactor::main_reactor::MainEvent, types::JsonBlock, - utils::{self, ListeningError}, + utils::{self, ListeningError, ObservableFuse}, NodeRng, }; pub use config::Config; @@ -124,79 +122,35 @@ impl EventStreamServer { self.config.max_concurrent_subscribers, ); - let (server_shutdown_sender, shutdown_receiver) = oneshot::channel::<()>(); + let shutdown_fuse = ObservableFuse::new(); let (sse_data_sender, sse_data_receiver) = mpsc::unbounded_channel(); - let listening_address = match self.config.cors_origin.as_str() { - "" => { - let (listening_address, server_with_shutdown) = warp::serve(sse_filter) - .try_bind_with_graceful_shutdown(required_address, async { - shutdown_receiver.await.ok(); - }) - .map_err(|error| ListeningError::Listen { - address: required_address, - error: Box::new(error), - })?; - - tokio::spawn(http_server::run( - self.config.clone(), - self.api_version, - server_with_shutdown, - server_shutdown_sender, - sse_data_receiver, - event_broadcaster, - new_subscriber_info_receiver, - )); - listening_address - } - "*" => { - let (listening_address, server_with_shutdown) = - warp::serve(sse_filter.with(warp::cors().allow_any_origin())) - .try_bind_with_graceful_shutdown(required_address, async { - shutdown_receiver.await.ok(); - }) - .map_err(|error| ListeningError::Listen { - address: required_address, - error: Box::new(error), - })?; - - tokio::spawn(http_server::run( - self.config.clone(), - self.api_version, - server_with_shutdown, - server_shutdown_sender, - sse_data_receiver, - event_broadcaster, - new_subscriber_info_receiver, - )); - listening_address - } - _ => { - let (listening_address, server_with_shutdown) = warp::serve( - sse_filter.with(warp::cors().allow_origin(self.config.cors_origin.as_str())), - ) - .try_bind_with_graceful_shutdown(required_address, async { - shutdown_receiver.await.ok(); - }) - .map_err(|error| ListeningError::Listen { - address: required_address, - error: Box::new(error), - })?; - - tokio::spawn(http_server::run( - self.config.clone(), - self.api_version, - server_with_shutdown, - server_shutdown_sender, - sse_data_receiver, - event_broadcaster, - new_subscriber_info_receiver, - )); - listening_address - } + let sse_filter = match CorsOrigin::parse_str(&self.config.cors_origin) { + Some(cors_origin) => sse_filter + .with(cors_origin.to_cors_builder().build()) + .map(box_reply) + .boxed(), + None => sse_filter.map(box_reply).boxed(), }; + let (listening_address, server_with_shutdown) = warp::serve(sse_filter) + .try_bind_with_graceful_shutdown(required_address, shutdown_fuse.clone().wait_owned()) + .map_err(|error| ListeningError::Listen { + address: required_address, + error: Box::new(error), + })?; + + tokio::spawn(http_server::run( + self.config.clone(), + self.api_version, + server_with_shutdown, + shutdown_fuse, + sse_data_receiver, + event_broadcaster, + new_subscriber_info_receiver, + )); + info!(address=%listening_address, "started event stream server"); let event_indexer = EventIndexer::new(self.storage_path.clone()); @@ -257,7 +211,18 @@ where } ComponentState::Initializing => match event { Event::Initialize => { - let (effects, state) = self.bind(self.config.enable_server, _effect_builder); + let (effects, mut state) = + self.bind(self.config.enable_server, _effect_builder); + + if matches!(state, ComponentState::Initializing) { + // Our current code does not support storing the bound port, so we skip the + // second step and go straight to `Initialized`. If new tests are written + // that rely on an initialized RPC server with a port being available, this + // needs to be refactored. Compare with the REST server on how this could be + // done. + state = ComponentState::Initialized; + } + >::set_state(self, state); effects } diff --git a/node/src/components/event_stream_server/http_server.rs b/node/src/components/event_stream_server/http_server.rs index 1712f50ff1..66098c5501 100644 --- a/node/src/components/event_stream_server/http_server.rs +++ b/node/src/components/event_stream_server/http_server.rs @@ -1,7 +1,7 @@ use futures::{future, Future, FutureExt}; use tokio::{ select, - sync::{broadcast, mpsc, oneshot}, + sync::{broadcast, mpsc}, task, }; use tracing::{info, trace}; @@ -9,6 +9,8 @@ use wheelbuf::WheelBuf; use casper_types::ProtocolVersion; +use crate::utils::{Fuse, ObservableFuse}; + use super::{ sse_server::{BroadcastChannelMessage, Id, NewSubscriberInfo, ServerSentEvent}, Config, EventIndex, SseData, @@ -17,7 +19,7 @@ use super::{ /// Run the HTTP server. /// /// * `server_with_shutdown` is the actual server as a future which can be gracefully shut down. -/// * `server_shutdown_sender` is the channel by which the server will be notified to shut down. +/// * `shutdown_fuse` is the fuse by which the server will be notified to shut down. /// * `data_receiver` will provide the server with local events which should then be sent to all /// subscribed clients. /// * `broadcaster` is used by the server to send events to each subscribed client after receiving @@ -29,7 +31,7 @@ pub(super) async fn run( config: Config, api_version: ProtocolVersion, server_with_shutdown: impl Future + Send + 'static, - server_shutdown_sender: oneshot::Sender<()>, + shutdown_fuse: ObservableFuse, mut data_receiver: mpsc::UnboundedReceiver<(EventIndex, SseData)>, broadcaster: broadcast::Sender, mut new_subscriber_info_receiver: mpsc::UnboundedReceiver, @@ -117,7 +119,7 @@ pub(super) async fn run( // Kill the event-stream handlers, and shut down the server. let _ = broadcaster.send(BroadcastChannelMessage::Shutdown); - let _ = server_shutdown_sender.send(()); + shutdown_fuse.set(); trace!("Event stream server stopped"); } diff --git a/node/src/components/fetcher/fetch_response.rs b/node/src/components/fetcher/fetch_response.rs index 23f37b6872..bc23278ade 100644 --- a/node/src/components/fetcher/fetch_response.rs +++ b/node/src/components/fetcher/fetch_response.rs @@ -1,8 +1,7 @@ use serde::{Deserialize, Serialize}; /// Message to be returned by a peer. Indicates if the item could be fetched or not. -#[derive(Debug, Serialize, Deserialize, strum::EnumDiscriminants)] -#[strum_discriminants(derive(strum::EnumIter))] +#[derive(Debug, Serialize, Deserialize)] pub enum FetchResponse { /// The requested item. Fetched(T), @@ -37,30 +36,3 @@ where bincode::serialize(self) } } - -mod specimen_support { - use crate::utils::specimen::{largest_variant, Cache, LargestSpecimen, SizeEstimator}; - use serde::Serialize; - - use super::{FetchResponse, FetchResponseDiscriminants}; - - impl LargestSpecimen - for FetchResponse - { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - largest_variant::(estimator, |variant| { - match variant { - FetchResponseDiscriminants::Fetched => { - FetchResponse::Fetched(LargestSpecimen::largest_specimen(estimator, cache)) - } - FetchResponseDiscriminants::NotFound => { - FetchResponse::NotFound(LargestSpecimen::largest_specimen(estimator, cache)) - } - FetchResponseDiscriminants::NotProvided => FetchResponse::NotProvided( - LargestSpecimen::largest_specimen(estimator, cache), - ), - } - }) - } - } -} diff --git a/node/src/components/fetcher/metrics.rs b/node/src/components/fetcher/metrics.rs index 35c403d633..755e901355 100644 --- a/node/src/components/fetcher/metrics.rs +++ b/node/src/components/fetcher/metrics.rs @@ -1,62 +1,46 @@ use prometheus::{IntCounter, Registry}; -use crate::unregister_metric; +use crate::utils::registered_metric::{RegisteredMetric, RegistryExt}; #[derive(Debug)] pub(crate) struct Metrics { /// Number of fetch requests that found an item in the storage. - pub found_in_storage: IntCounter, + pub found_in_storage: RegisteredMetric, /// Number of fetch requests that fetched an item from peer. - pub found_on_peer: IntCounter, + pub found_on_peer: RegisteredMetric, /// Number of fetch requests that timed out. - pub timeouts: IntCounter, + pub timeouts: RegisteredMetric, /// Number of total fetch requests made. - pub fetch_total: IntCounter, - /// Reference to the registry for unregistering. - registry: Registry, + pub fetch_total: RegisteredMetric, } impl Metrics { pub(super) fn new(name: &str, registry: &Registry) -> Result { - let found_in_storage = IntCounter::new( + let found_in_storage = registry.new_int_counter( format!("{}_found_in_storage", name), format!( "number of fetch requests that found {} in local storage", name ), )?; - let found_on_peer = IntCounter::new( + let found_on_peer = registry.new_int_counter( format!("{}_found_on_peer", name), format!("number of fetch requests that fetched {} from peer", name), )?; - let timeouts = IntCounter::new( + let timeouts = registry.new_int_counter( format!("{}_timeouts", name), format!("number of {} fetch requests that timed out", name), )?; - let fetch_total = IntCounter::new( + let fetch_total = registry.new_int_counter( format!("{}_fetch_total", name), format!("number of {} all fetch requests made", name), )?; - registry.register(Box::new(found_in_storage.clone()))?; - registry.register(Box::new(found_on_peer.clone()))?; - registry.register(Box::new(timeouts.clone()))?; - registry.register(Box::new(fetch_total.clone()))?; Ok(Metrics { found_in_storage, found_on_peer, timeouts, fetch_total, - registry: registry.clone(), }) } } - -impl Drop for Metrics { - fn drop(&mut self) { - unregister_metric!(self.registry, self.found_in_storage); - unregister_metric!(self.registry, self.found_on_peer); - unregister_metric!(self.registry, self.timeouts); - unregister_metric!(self.registry, self.fetch_total); - } -} diff --git a/node/src/components/fetcher/tests.rs b/node/src/components/fetcher/tests.rs index 99aacfd21c..dbc9e9d64d 100644 --- a/node/src/components/fetcher/tests.rs +++ b/node/src/components/fetcher/tests.rs @@ -16,18 +16,17 @@ use casper_types::testing::TestRng; use super::*; use crate::{ components::{ - consensus::ConsensusRequestMessage, deploy_acceptor, fetcher, in_memory_network::{self, InMemoryNetwork, NetworkController}, - network::{GossipedAddress, Identity as NetworkIdentity}, + network::{GossipedAddress, Identity as NetworkIdentity, Ticket}, storage::{self, Storage}, }, effect::{ announcements::{ControlAnnouncement, DeployAcceptorAnnouncement, FatalAnnouncement}, incoming::{ - ConsensusMessageIncoming, DemandIncoming, FinalitySignatureIncoming, GossiperIncoming, - NetRequestIncoming, NetResponse, NetResponseIncoming, TrieDemand, TrieRequestIncoming, - TrieResponseIncoming, + ConsensusMessageIncoming, ConsensusRequestMessageIncoming, FinalitySignatureIncoming, + GossiperIncoming, NetRequestIncoming, NetResponse, NetResponseIncoming, + TrieRequestIncoming, TrieResponseIncoming, }, requests::{AcceptDeployRequest, MarkBlockCompletedRequest}, }, @@ -118,8 +117,6 @@ enum Event { #[from] MarkBlockCompletedRequest(MarkBlockCompletedRequest), #[from] - TrieDemand(TrieDemand), - #[from] ContractRuntimeRequest(ContractRuntimeRequest), #[from] GossiperIncomingDeploy(GossiperIncoming), @@ -136,7 +133,7 @@ enum Event { #[from] ConsensusMessageIncoming(ConsensusMessageIncoming), #[from] - ConsensusDemandIncoming(DemandIncoming), + ConsensusRequestMessageIncoming(ConsensusRequestMessageIncoming), #[from] FinalitySignatureIncoming(FinalitySignatureIncoming), } @@ -227,6 +224,7 @@ impl ReactorTrait for Reactor { deploy, source: Source::Client, maybe_responder: Some(responder), + ticket: Ticket::create_dummy(), }; reactor::wrap_effects( Event::FakeDeployAcceptor, @@ -250,8 +248,7 @@ impl ReactorTrait for Reactor { self.storage .handle_event(effect_builder, rng, request.into()), ), - Event::TrieDemand(_) - | Event::ContractRuntimeRequest(_) + Event::ContractRuntimeRequest(_) | Event::BlockAccumulatorRequest(_) | Event::BlocklistAnnouncement(_) | Event::GossiperIncomingDeploy(_) @@ -261,7 +258,7 @@ impl ReactorTrait for Reactor { | Event::TrieRequestIncoming(_) | Event::TrieResponseIncoming(_) | Event::ConsensusMessageIncoming(_) - | Event::ConsensusDemandIncoming(_) + | Event::ConsensusRequestMessageIncoming(_) | Event::FinalitySignatureIncoming(_) | Event::FetchedNewBlockAnnouncement(_) | Event::FetchedNewFinalitySignatureAnnouncement(_) @@ -356,6 +353,7 @@ impl Reactor { deploy, source: Source::Peer(response.sender), maybe_responder: None, + ticket: Ticket::create_dummy(), }), ) } @@ -451,7 +449,7 @@ async fn assert_settled( rng: &mut TestRng, timeout: Duration, ) { - let has_responded = |_nodes: &HashMap>>| { + let has_responded = |_nodes: &HashMap>>>| { fetched.lock().unwrap().0 }; diff --git a/node/src/components/gossiper.rs b/node/src/components/gossiper.rs index 7fd0aaa486..e2a8fa224b 100644 --- a/node/src/components/gossiper.rs +++ b/node/src/components/gossiper.rs @@ -40,6 +40,8 @@ use item_provider::ItemProvider; pub(crate) use message::Message; use metrics::Metrics; +use super::network::Ticket; + /// The component which gossips to peers and handles incoming gossip messages from peers. #[allow(clippy::type_complexity)] pub(crate) struct Gossiper @@ -143,7 +145,7 @@ impl Gossiper, item_id: T::Id, requested_count: usize, - peers: HashSet, + peers: Vec, ) -> Effects> where REv: From> + Send, @@ -271,6 +273,7 @@ impl Gossiper Effects> where REv: From>> + From> + Send, @@ -303,7 +306,11 @@ impl Gossiper { @@ -315,7 +322,9 @@ impl Gossiper Gossiper Gossiper Effects> where REv: From>> @@ -397,6 +409,7 @@ impl Gossiper Gossiper, item: Box, requester: NodeId, + ticket: Ticket, ) -> Effects> where REv: From>> + Send, { let message = Message::Item(item); - effect_builder.send_message(requester, message).ignore() + effect_builder + .send_message_and_drop_ticket(requester, message, ticket) + .ignore() } /// Handles the `None` case when attempting to get the item from storage. @@ -470,6 +486,7 @@ impl Gossiper, item_id: T::Id, requester: NodeId, + ticket: Ticket, ) -> Effects> where REv: From + Send, @@ -489,6 +506,7 @@ impl Gossiper Gossiper, item: Box, sender: NodeId, + ticket: Ticket, ) -> Effects> where REv: From> + Send, @@ -513,7 +532,7 @@ impl Gossiper Effects { - let effects = match event { - Event::BeginGossipRequest(BeginGossipRequest { - item_id, - source, - target, - responder, - }) => { - let mut effects = - self.handle_item_received(effect_builder, item_id, source, target); - effects.extend(responder.respond(()).ignore()); - effects - } - Event::ItemReceived { - item_id, - source, - target, - } => self.handle_item_received(effect_builder, item_id, source, target), - Event::GossipedTo { - item_id, - requested_count, - peers, - } => self.gossiped_to(effect_builder, item_id, requested_count, peers), - Event::CheckGossipTimeout { item_id, peer } => { - self.check_gossip_timeout(effect_builder, item_id, peer) - } - Event::CheckGetFromPeerTimeout { item_id, peer } => { - self.check_get_from_peer_timeout(effect_builder, item_id, peer) - } - Event::Incoming(GossiperIncoming:: { sender, message }) => match *message { - Message::Gossip(item_id) => { - Self::is_stored(effect_builder, item_id.clone()).event(move |result| { - Event::IsStoredResult { + let effects = + match event { + Event::BeginGossipRequest(BeginGossipRequest { + item_id, + source, + target, + responder, + }) => { + let mut effects = + self.handle_item_received(effect_builder, item_id, source, target); + effects.extend(responder.respond(()).ignore()); + effects + } + Event::ItemReceived { + item_id, + source, + target, + } => self.handle_item_received(effect_builder, item_id, source, target), + Event::GossipedTo { + item_id, + requested_count, + peers, + } => self.gossiped_to(effect_builder, item_id, requested_count, peers), + Event::CheckGossipTimeout { item_id, peer } => { + self.check_gossip_timeout(effect_builder, item_id, peer) + } + Event::CheckGetFromPeerTimeout { item_id, peer } => { + self.check_get_from_peer_timeout(effect_builder, item_id, peer) + } + Event::Incoming(GossiperIncoming:: { + sender, + message, + ticket, + }) => match *message { + Message::Gossip(item_id) => Self::is_stored(effect_builder, item_id.clone()) + .event(move |result| Event::IsStoredResult { item_id, sender, result, - } - }) + ticket, + }), + Message::GossipResponse { + item_id, + is_already_held, + } => self.handle_gossip_response( + effect_builder, + item_id, + is_already_held, + sender, + ticket, + ), + Message::GetItem(item_id) => { + self.handle_get_item_request(effect_builder, item_id, sender, ticket) + } + Message::Item(item) => { + self.handle_item_received_from_peer(effect_builder, item, sender, ticket) + } + }, + Event::CheckItemReceivedTimeout { item_id } => { + self.check_item_received_timeout(effect_builder, item_id) } - Message::GossipResponse { + Event::IsStoredResult { item_id, - is_already_held, - } => self.handle_gossip_response(effect_builder, item_id, is_already_held, sender), - Message::GetItem(item_id) => { - self.handle_get_item_request(effect_builder, item_id, sender) - } - Message::Item(item) => { - self.handle_item_received_from_peer(effect_builder, item, sender) + sender, + result: is_stored_locally, + ticket, + } => { + let action = if self.table.has_entry(&item_id) || !is_stored_locally { + // TODO: Ticket, do something? + self.table.new_data_id(&item_id, sender) + } else { + // We're not already handling this item, and we do have the full item + // stored, so don't initiate gossiping for it. + GossipAction::Noop + }; + self.handle_gossip(effect_builder, item_id, sender, action, ticket) } - }, - Event::CheckItemReceivedTimeout { item_id } => { - self.check_item_received_timeout(effect_builder, item_id) - } - Event::IsStoredResult { - item_id, - sender, - result: is_stored_locally, - } => { - let action = if self.table.has_entry(&item_id) || !is_stored_locally { - self.table.new_data_id(&item_id, sender) - } else { - // We're not already handling this item, and we do have the full item stored, so - // don't initiate gossiping for it. - GossipAction::Noop - }; - self.handle_gossip(effect_builder, item_id, sender, action) - } - Event::GetFromStorageResult { - item_id, - requester, - maybe_item, - } => match maybe_item { - Some(item) => Self::got_from_storage(effect_builder, item, requester), - None => self.failed_to_get_from_storage(effect_builder, item_id), - }, - }; + Event::GetFromStorageResult { + item_id, + requester, + maybe_item, + ticket, + } => match maybe_item { + Some(item) => Self::got_from_storage(effect_builder, item, requester, ticket), + None => self.failed_to_get_from_storage(effect_builder, item_id), + }, + }; self.update_gossip_table_metrics(); effects } @@ -700,23 +731,35 @@ where error!(%item_id, %peer, "should not timeout getting small item from peer"); Effects::new() } - Event::Incoming(GossiperIncoming:: { sender, message }) => match *message { + Event::Incoming(GossiperIncoming:: { + sender, + message, + ticket, + }) => match *message { Message::Gossip(item_id) => { let target = ::id_as_item(&item_id).gossip_target(); let action = self.table.new_complete_data(&item_id, Some(sender), target); - self.handle_gossip(effect_builder, item_id, sender, action) + self.handle_gossip(effect_builder, item_id, sender, action, ticket) } Message::GossipResponse { item_id, is_already_held, - } => self.handle_gossip_response(effect_builder, item_id, is_already_held, sender), + } => self.handle_gossip_response( + effect_builder, + item_id, + is_already_held, + sender, + ticket, + ), Message::GetItem(item_id) => { debug!(%item_id, %sender, "unexpected get request for small item"); + drop(ticket); Effects::new() } Message::Item(item) => { let item_id = item.gossip_id(); debug!(%item_id, %sender, "unexpected get response for small item"); + drop(ticket); Effects::new() } }, @@ -732,6 +775,7 @@ where item_id, requester, maybe_item, + ticket: _, } => { error!( %item_id, %requester, ?maybe_item, diff --git a/node/src/components/gossiper/event.rs b/node/src/components/gossiper/event.rs index 21098d71e1..0d4f09219d 100644 --- a/node/src/components/gossiper/event.rs +++ b/node/src/components/gossiper/event.rs @@ -1,13 +1,11 @@ -use std::{ - collections::HashSet, - fmt::{self, Display, Formatter}, -}; +use std::fmt::{self, Display, Formatter}; use derive_more::From; use serde::Serialize; use super::GossipItem; use crate::{ + components::network::Ticket, effect::{incoming::GossiperIncoming, requests::BeginGossipRequest, GossipTarget}, types::NodeId, utils::{DisplayIter, Source}, @@ -29,7 +27,7 @@ pub(crate) enum Event { GossipedTo { item_id: T::Id, requested_count: usize, - peers: HashSet, + peers: Vec, }, /// The timeout for waiting for a gossip response has elapsed and we should check the response /// arrived. @@ -48,6 +46,8 @@ pub(crate) enum Event { item_id: T::Id, sender: NodeId, result: bool, + #[serde(skip)] + ticket: Ticket, }, /// The result of the gossiper getting an item from storage. If the result is `Some`, the item /// should be sent to the requesting peer. @@ -55,6 +55,8 @@ pub(crate) enum Event { item_id: T::Id, requester: NodeId, maybe_item: Option>, + #[serde(skip)] + ticket: Ticket, }, } @@ -101,6 +103,7 @@ impl Display for Event { item_id, sender, result, + ticket: _, } => { write!( formatter, diff --git a/node/src/components/gossiper/message.rs b/node/src/components/gossiper/message.rs index 6cf19b5767..940befdf7c 100644 --- a/node/src/components/gossiper/message.rs +++ b/node/src/components/gossiper/message.rs @@ -4,12 +4,10 @@ use std::{ }; use serde::{Deserialize, Serialize}; -use strum::EnumDiscriminants; use super::GossipItem; -#[derive(Clone, Debug, Deserialize, Serialize, EnumDiscriminants)] -#[strum_discriminants(derive(strum::EnumIter))] +#[derive(Clone, Debug, Deserialize, Serialize)] #[serde(bound = "for<'a> T: Deserialize<'a>")] pub(crate) enum Message { /// Gossiped out to random peers to notify them of an item we hold. @@ -45,39 +43,3 @@ impl Display for Message { } } } - -mod specimen_support { - use crate::{ - components::gossiper::GossipItem, - utils::specimen::{largest_variant, Cache, LargestSpecimen, SizeEstimator}, - }; - - use super::{Message, MessageDiscriminants}; - - impl LargestSpecimen for Message - where - T: GossipItem + LargestSpecimen, - ::Id: LargestSpecimen, - { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - largest_variant::( - estimator, - |variant| match variant { - MessageDiscriminants::Gossip => { - Message::Gossip(LargestSpecimen::largest_specimen(estimator, cache)) - } - MessageDiscriminants::GossipResponse => Message::GossipResponse { - item_id: LargestSpecimen::largest_specimen(estimator, cache), - is_already_held: LargestSpecimen::largest_specimen(estimator, cache), - }, - MessageDiscriminants::GetItem => { - Message::GetItem(LargestSpecimen::largest_specimen(estimator, cache)) - } - MessageDiscriminants::Item => { - Message::Item(LargestSpecimen::largest_specimen(estimator, cache)) - } - }, - ) - } - } -} diff --git a/node/src/components/gossiper/metrics.rs b/node/src/components/gossiper/metrics.rs index 2bf9d2e900..90352a4cfb 100644 --- a/node/src/components/gossiper/metrics.rs +++ b/node/src/components/gossiper/metrics.rs @@ -1,50 +1,48 @@ use prometheus::{IntCounter, IntGauge, Registry}; -use crate::unregister_metric; +use crate::utils::registered_metric::{RegisteredMetric, RegistryExt}; /// Metrics for the gossiper component. #[derive(Debug)] pub(super) struct Metrics { /// Total number of items received by the gossiper. - pub(super) items_received: IntCounter, + pub(super) items_received: RegisteredMetric, /// Total number of gossip requests sent to peers. - pub(super) times_gossiped: IntCounter, + pub(super) times_gossiped: RegisteredMetric, /// Number of times the process had to pause due to running out of peers. - pub(super) times_ran_out_of_peers: IntCounter, + pub(super) times_ran_out_of_peers: RegisteredMetric, /// Number of items in the gossip table that are currently being gossiped. - pub(super) table_items_current: IntGauge, + pub(super) table_items_current: RegisteredMetric, /// Number of items in the gossip table that are finished. - pub(super) table_items_finished: IntGauge, - /// Reference to the registry for unregistering. - registry: Registry, + pub(super) table_items_finished: RegisteredMetric, } impl Metrics { /// Creates a new instance of gossiper metrics, using the given prefix. pub fn new(name: &str, registry: &Registry) -> Result { - let items_received = IntCounter::new( + let items_received = registry.new_int_counter( format!("{}_items_received", name), format!("number of items received by the {}", name), )?; - let times_gossiped = IntCounter::new( + let times_gossiped = registry.new_int_counter( format!("{}_times_gossiped", name), format!("number of times the {} sent gossip requests to peers", name), )?; - let times_ran_out_of_peers = IntCounter::new( + let times_ran_out_of_peers = registry.new_int_counter( format!("{}_times_ran_out_of_peers", name), format!( "number of times the {} ran out of peers and had to pause", name ), )?; - let table_items_current = IntGauge::new( + let table_items_current = registry.new_int_gauge( format!("{}_table_items_current", name), format!( "number of items in the gossip table of {} in state current", name ), )?; - let table_items_finished = IntGauge::new( + let table_items_finished = registry.new_int_gauge( format!("{}_table_items_finished", name), format!( "number of items in the gossip table of {} in state finished", @@ -52,29 +50,12 @@ impl Metrics { ), )?; - registry.register(Box::new(items_received.clone()))?; - registry.register(Box::new(times_gossiped.clone()))?; - registry.register(Box::new(times_ran_out_of_peers.clone()))?; - registry.register(Box::new(table_items_current.clone()))?; - registry.register(Box::new(table_items_finished.clone()))?; - Ok(Metrics { items_received, times_gossiped, times_ran_out_of_peers, table_items_current, table_items_finished, - registry: registry.clone(), }) } } - -impl Drop for Metrics { - fn drop(&mut self) { - unregister_metric!(self.registry, self.items_received); - unregister_metric!(self.registry, self.times_gossiped); - unregister_metric!(self.registry, self.times_ran_out_of_peers); - unregister_metric!(self.registry, self.table_items_current); - unregister_metric!(self.registry, self.table_items_finished); - } -} diff --git a/node/src/components/gossiper/tests.rs b/node/src/components/gossiper/tests.rs index e8b8a47468..157b134625 100644 --- a/node/src/components/gossiper/tests.rs +++ b/node/src/components/gossiper/tests.rs @@ -24,7 +24,7 @@ use crate::{ components::{ deploy_acceptor, in_memory_network::{self, InMemoryNetwork, NetworkController}, - network::{GossipedAddress, Identity as NetworkIdentity}, + network::{GossipedAddress, Identity as NetworkIdentity, Ticket}, storage::{self, Storage}, }, effect::{ @@ -33,9 +33,8 @@ use crate::{ GossiperAnnouncement, }, incoming::{ - ConsensusDemand, ConsensusMessageIncoming, FinalitySignatureIncoming, - NetRequestIncoming, NetResponseIncoming, TrieDemand, TrieRequestIncoming, - TrieResponseIncoming, + ConsensusMessageIncoming, ConsensusRequestMessageIncoming, FinalitySignatureIncoming, + NetRequestIncoming, NetResponseIncoming, TrieRequestIncoming, TrieResponseIncoming, }, requests::AcceptDeployRequest, }, @@ -105,7 +104,7 @@ impl From for Event { } } -impl Unhandled for ConsensusDemand {} +impl Unhandled for ConsensusRequestMessageIncoming {} impl Unhandled for ControlAnnouncement {} impl Unhandled for FatalAnnouncement {} impl Unhandled for ConsensusMessageIncoming {} @@ -115,7 +114,6 @@ impl Unhandled for GossiperIncoming {} impl Unhandled for NetRequestIncoming {} impl Unhandled for NetResponseIncoming {} impl Unhandled for TrieRequestIncoming {} -impl Unhandled for TrieDemand {} impl Unhandled for TrieResponseIncoming {} impl Unhandled for FinalitySignatureIncoming {} @@ -271,6 +269,7 @@ impl reactor::Reactor for Reactor { deploy, source: Source::Client, maybe_responder: Some(responder), + ticket: Ticket::create_dummy(), }; self.dispatch_event(effect_builder, rng, Event::DeployAcceptor(event)) } @@ -292,6 +291,7 @@ impl reactor::Reactor for Reactor { Event::DeployGossiperAnnouncement(GossiperAnnouncement::NewItemBody { item, sender, + ticket: _, // The fake deploy acceptor does not do any ticket handling }) => reactor::wrap_effects( Event::DeployAcceptor, self.fake_deploy_acceptor.handle_event( @@ -301,6 +301,7 @@ impl reactor::Reactor for Reactor { deploy: Arc::new(*item), source: Source::Peer(sender), maybe_responder: None, + ticket: Ticket::create_dummy(), }, ), ), @@ -357,12 +358,13 @@ async fn run_gossip(rng: &mut TestRng, network_size: usize, deploy_count: usize) } // Check every node has every deploy stored locally. - let all_deploys_held = |nodes: &HashMap>>| { - nodes.values().all(|runner| { - let hashes = runner.reactor().inner().storage.get_all_deploy_hashes(); - all_deploy_hashes == hashes - }) - }; + let all_deploys_held = + |nodes: &HashMap>>>| { + nodes.values().all(|runner| { + let hashes = runner.reactor().inner().storage.get_all_deploy_hashes(); + all_deploy_hashes == hashes + }) + }; network.settle_on(rng, all_deploys_held, TIMEOUT).await; // Ensure all responders are called before dropping the network. @@ -445,7 +447,7 @@ async fn should_get_from_alternate_source() { testing::advance_time(duration_to_advance.into()).await; // Check node 0 has the deploy stored locally. - let deploy_held = |nodes: &HashMap>>| { + let deploy_held = |nodes: &HashMap>>>| { let runner = nodes.get(&node_ids[2]).unwrap(); runner .reactor() @@ -514,7 +516,7 @@ async fn should_timeout_gossip_response() { testing::advance_time(duration_to_advance.into()).await; // Check every node has every deploy stored locally. - let deploy_held = |nodes: &HashMap>>| { + let deploy_held = |nodes: &HashMap>>>| { nodes.values().all(|runner| { runner .reactor() @@ -631,6 +633,7 @@ async fn should_not_gossip_old_stored_item_again() { let event = Event::DeployGossiperIncoming(GossiperIncoming { sender: node_ids[1], message: Box::new(Message::Gossip(deploy.gossip_id())), + ticket: Ticket::create_dummy(), }); effect_builder .into_inner() @@ -703,6 +706,7 @@ async fn should_ignore_unexpected_message(message_type: Unexpected) { let event = Event::DeployGossiperIncoming(GossiperIncoming { sender: node_ids[1], message: Box::new(message), + ticket: Ticket::create_dummy(), }); effect_builder .into_inner() diff --git a/node/src/components/in_memory_network.rs b/node/src/components/in_memory_network.rs index 5f0d9b99a8..d4de515e55 100644 --- a/node/src/components/in_memory_network.rs +++ b/node/src/components/in_memory_network.rs @@ -279,20 +279,19 @@ use std::{ any::Any, cell::RefCell, - collections::{HashMap, HashSet}, + collections::HashMap, fmt::{self, Display, Formatter}, sync::{Arc, RwLock}, }; +use casper_types::testing::TestRng; use rand::seq::IteratorRandom; use serde::Serialize; use tokio::sync::mpsc::{self, error::SendError}; use tracing::{debug, error, info, warn}; -use casper_types::testing::TestRng; - use crate::{ - components::Component, + components::{network::Ticket, Component}, effect::{requests::NetworkRequest, EffectBuilder, EffectExt, Effects}, logging, reactor::{EventQueueHandle, QueueKind}, @@ -538,8 +537,7 @@ where NetworkRequest::SendMessage { dest, payload, - respond_after_queueing: _, - auto_closing_responder, + message_queued_responder, } => { if *dest == self.node_id { panic!("can't send message to self"); @@ -551,7 +549,11 @@ where error!("network lock has been poisoned") }; - auto_closing_responder.respond(()).ignore() + if let Some(responder) = message_queued_responder { + responder.respond(()).ignore() + } else { + Effects::new() + } } NetworkRequest::ValidatorBroadcast { payload, @@ -576,7 +578,7 @@ where gossip_target: _, } => { if let Ok(guard) = self.nodes.read() { - let chosen: HashSet<_> = guard + let chosen: Vec<_> = guard .keys() .filter(|&node_id| !exclude.contains(node_id) && node_id != &self.node_id) .cloned() @@ -609,10 +611,11 @@ async fn receiver_task( P: 'static + Send, { while let Some((sender, payload)) = receiver.recv().await { - let announce: REv = REv::from_incoming(sender, payload); + // We do not use backpressure in the in-memory network, so provide a dummy ticket. + let announce: REv = REv::from_incoming(sender, payload, Ticket::create_dummy()); event_queue - .schedule(announce, QueueKind::NetworkIncoming) + .schedule(announce, QueueKind::MessageIncoming) .await; } diff --git a/node/src/components/metrics.rs b/node/src/components/metrics.rs index acd6ba0987..505d7b8e32 100644 --- a/node/src/components/metrics.rs +++ b/node/src/components/metrics.rs @@ -14,9 +14,9 @@ //! Creation and instantiation of this component happens inside the `reactor::Reactor::new` //! function, which is passed in a `prometheus::Registry` (see 2.). //! -//! 2. Instantiation of an `XYZMetrics` struct should always be combined with registering all of -//! the metrics on a registry. For this reason it is advisable to have the `XYZMetrics::new` -//! method take a `prometheus::Registry` and register it directly. +//! 2. Instantiation of an `XYZMetrics` struct should always be combined with registering all of the +//! metrics on a registry. For this reason it is advisable to have the `XYZMetrics::new` method +//! take a `prometheus::Registry` and register it directly. //! //! 3. Updating metrics is done inside the `handle_event` function by simply calling methods on the //! fields of `self.metrics` (`: XYZMetrics`). **Important**: Metrics should never be read to diff --git a/node/src/components/network.rs b/node/src/components/network.rs index 77a21b5ac2..d2d4039c36 100644 --- a/node/src/components/network.rs +++ b/node/src/components/network.rs @@ -14,94 +14,80 @@ //! # Connection //! //! Every node has an ID and a public listening address. The objective of each node is to constantly -//! maintain an outgoing connection to each other node (and thus have an incoming connection from -//! these nodes as well). -//! -//! Any incoming connection is, after a handshake process, strictly read from, while any outgoing -//! connection is strictly used for sending messages, also after a handshake. +//! maintain a connection to each other node, see the [`conman`] module for details. //! //! Nodes gossip their public listening addresses periodically, and will try to establish and //! maintain an outgoing connection to any new address learned. -mod bincode_format; pub(crate) mod blocklist; mod chain_info; mod config; -mod counting_format; +mod conman; +mod connection_id; mod error; mod event; mod gossiped_address; -mod health; +mod handshake; mod identity; mod insights; -mod limiter; mod message; -mod message_pack_format; mod metrics; -mod outgoing; -mod symmetry; -pub(crate) mod tasks; +mod per_channel; + #[cfg(test)] mod tests; +mod transport; use std::{ - collections::{BTreeMap, HashMap, HashSet}, - fmt::{self, Debug, Display, Formatter}, - io, + collections::{BTreeMap, HashSet}, + fmt::Debug, + fs::OpenOptions, + marker::PhantomData, + mem, net::{SocketAddr, TcpListener}, - sync::{Arc, Weak}, + ops::Deref, + sync::Arc, time::{Duration, Instant}, }; +use bincode::Options; +use bytes::Bytes; use datasize::DataSize; use futures::{future::BoxFuture, FutureExt}; + use itertools::Itertools; +use juliet::rpc::{JulietRpcClient, RequestGuard}; use prometheus::Registry; use rand::{ seq::{IteratorRandom, SliceRandom}, Rng, }; -use serde::{Deserialize, Serialize}; -use tokio::{ - net::TcpStream, - sync::{ - mpsc::{self, UnboundedSender}, - watch, - }, - task::JoinHandle, -}; +use serde::Serialize; +use strum::EnumCount; +use tokio::net::TcpStream; use tokio_openssl::SslStream; -use tokio_util::codec::LengthDelimitedCodec; -use tracing::{debug, error, info, trace, warn, Instrument, Span}; +use tracing::{debug, error, info, warn, Span}; use casper_types::{EraId, PublicKey, SecretKey}; +use self::{ + chain_info::ChainInfo, + conman::{ConMan, ConManState}, + handshake::HandshakeConfiguration, + message::NodeKeyPair, + metrics::Metrics, + transport::TransportHandler, +}; pub(crate) use self::{ - bincode_format::BincodeFormat, - config::{Config, IdentityConfig}, + config::Config, error::Error, event::Event, gossiped_address::GossipedAddress, identity::Identity, insights::NetworkInsights, - message::{ - generate_largest_serialized_message, EstimatorWeights, FromIncoming, Message, MessageKind, - Payload, - }, -}; -use self::{ - blocklist::BlocklistJustification, - chain_info::ChainInfo, - counting_format::{ConnectionId, CountingFormat, Role}, - error::{ConnectionError, Result}, - event::{IncomingConnection, OutgoingConnection}, - health::{HealthConfig, TaggedTimestamp}, - limiter::Limiter, - message::NodeKeyPair, - metrics::Metrics, - outgoing::{DialOutcome, DialRequest, OutgoingConfig, OutgoingManager}, - symmetry::ConnectionSymmetry, - tasks::{MessageQueueItem, NetworkContext}, + message::{Channel, FromIncoming, Message, MessageKind, Payload}, + per_channel::PerChannel, + transport::Ticket, }; use crate::{ components::{gossiper::GossipItem, Component, ComponentState, InitializedComponent}, @@ -113,223 +99,141 @@ use crate::{ reactor::{Finalize, ReactorEvent}, tls, types::{NodeId, ValidatorMatrix}, - utils::{self, display_error, Source}, + utils::{ + self, display_error, rate_limited::rate_limited, DropSwitch, Fuse, LockedLineWriter, + ObservableFuse, Source, + }, NodeRng, }; +use super::ValidatorBoundComponent; + +/// The name of this component. const COMPONENT_NAME: &str = "network"; +/// How often to attempt to drop metrics, so that they can be re-registered. const MAX_METRICS_DROP_ATTEMPTS: usize = 25; -const DROP_RETRY_DELAY: Duration = Duration::from_millis(100); - -/// How often to keep attempting to reconnect to a node before giving up. Note that reconnection -/// delays increase exponentially! -const RECONNECTION_ATTEMPTS: u8 = 8; - -/// Basic reconnection timeout. -/// -/// The first reconnection attempt will be made after 2x this timeout. -const BASE_RECONNECTION_TIMEOUT: Duration = Duration::from_secs(1); - -/// Interval during which to perform outgoing manager housekeeping. -const OUTGOING_MANAGER_SWEEP_INTERVAL: Duration = Duration::from_secs(1); -/// How often to send a ping down a healthy connection. -const PING_INTERVAL: Duration = Duration::from_secs(30); - -/// Maximum time for a ping until it connections are severed. -/// -/// If you are running a network under very extreme conditions, it may make sense to alter these -/// values, but usually these values should require no changing. -/// -/// `PING_TIMEOUT` should be less than `PING_INTERVAL` at all times. -const PING_TIMEOUT: Duration = Duration::from_secs(6); - -/// How many pings to send before giving up and dropping the connection. -const PING_RETRIES: u16 = 5; - -#[derive(Clone, DataSize, Debug)] -pub(crate) struct OutgoingHandle

{ - #[data_size(skip)] // Unfortunately, there is no way to inspect an `UnboundedSender`. - sender: UnboundedSender>, - peer_addr: SocketAddr, -} +/// Delays in between dropping metrics. +const DROP_RETRY_DELAY: Duration = Duration::from_millis(100); -impl

Display for OutgoingHandle

{ - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!(f, "outgoing handle to {}", self.peer_addr) - } -} +/// How often metrics are synced. +const METRICS_UPDATE_RATE: Duration = Duration::from_secs(1); -#[derive(DataSize)] -pub(crate) struct Network +#[derive(DataSize, Debug)] +pub(crate) struct Network

where - REv: 'static, P: Payload, { /// Initial configuration values. - cfg: Config, - /// Read-only networking information shared across tasks. - context: Arc>, - - /// Outgoing connections manager. - outgoing_manager: OutgoingManager, ConnectionError>, - /// Tracks whether a connection is symmetric or not. - connection_symmetries: HashMap, - - /// Tracks nodes that have announced themselves as nodes that are syncing. - syncing_nodes: HashSet, - - channel_management: Option, - + config: Config, + /// The network address the component is listening on. + /// + /// Will be initialized late. + public_addr: Option, + /// Chain information used by networking. + /// + /// Only available during initialization. + chain_info: ChainInfo, + /// Consensus keys, used for handshaking. + /// + /// Only available during initialization. + node_key_pair: Option, + /// Node's network identify. + identity: Identity, + /// Our node identity. Derived from `identity`, cached here. + our_id: NodeId, + /// The set of known addresses that are eternally kept. + known_addresses: HashSet, + /// A reference to the global validator matrix. + validator_matrix: ValidatorMatrix, + /// Connection manager for incoming and outgoing connections. + #[data_size(skip)] // Skipped, to reduce lock contention. + conman: Option, + /// Fuse signaling a shutdown of the small network. + shutdown_fuse: DropSwitch, /// Networking metrics. #[data_size(skip)] net_metrics: Arc, - - /// The outgoing bandwidth limiter. - #[data_size(skip)] - outgoing_limiter: Limiter, - - /// The limiter for incoming resource usage. - /// - /// This is not incoming bandwidth but an independent resource estimate. - #[data_size(skip)] - incoming_limiter: Limiter, - /// The era that is considered the active era by the network component. active_era: EraId, - /// The state of this component. state: ComponentState, + /// Marker for what kind of payload this small network instance supports. + _payload: PhantomData

, } -#[derive(DataSize)] -struct ChannelManagement { - /// Channel signaling a shutdown of the network. - // Note: This channel is closed when `Network` is dropped, signalling the receivers that - // they should cease operation. - #[data_size(skip)] - shutdown_sender: Option>, - /// Join handle for the server thread. - #[data_size(skip)] - server_join_handle: Option>, - - /// Channel signaling a shutdown of the incoming connections. - // Note: This channel is closed when we finished syncing, so the `Network` can close all - // connections. When they are re-established, the proper value of the now updated `is_syncing` - // flag will be exchanged on handshake. - #[data_size(skip)] - close_incoming_sender: Option>, - /// Handle used by the `message_reader` task to receive a notification that incoming - /// connections should be closed. - #[data_size(skip)] - close_incoming_receiver: watch::Receiver<()>, -} - -impl Network +impl

Network

where - P: Payload + 'static, - REv: ReactorEvent - + From> - + FromIncoming

- + From - + From> - + From - + From>, + P: Payload, { /// Creates a new network component instance. #[allow(clippy::type_complexity)] pub(crate) fn new>( - cfg: Config, - our_identity: Identity, + config: Config, + identity: Identity, node_key_pair: Option<(Arc, PublicKey)>, registry: &Registry, chain_info_source: C, validator_matrix: ValidatorMatrix, - ) -> Result> { + ) -> Result, Error> { let net_metrics = Arc::new(Metrics::new(registry)?); - let outgoing_limiter = Limiter::new( - cfg.max_outgoing_byte_rate_non_validators, - net_metrics.accumulated_outgoing_limiter_delay.clone(), - validator_matrix.clone(), - ); - - let incoming_limiter = Limiter::new( - cfg.max_incoming_message_rate_non_validators, - net_metrics.accumulated_incoming_limiter_delay.clone(), + let node_key_pair = node_key_pair.map(NodeKeyPair::new); + let our_id = identity.node_id(); + + Ok(Network { + config, + known_addresses: Default::default(), + public_addr: None, + chain_info: chain_info_source.into(), + node_key_pair, + identity, + our_id, validator_matrix, - ); - - let outgoing_manager = OutgoingManager::with_metrics( - OutgoingConfig { - retry_attempts: RECONNECTION_ATTEMPTS, - base_timeout: BASE_RECONNECTION_TIMEOUT, - unblock_after: cfg.blocklist_retain_duration.into(), - sweep_timeout: cfg.max_addr_pending_time.into(), - health: HealthConfig { - ping_interval: PING_INTERVAL, - ping_timeout: PING_TIMEOUT, - ping_retries: PING_RETRIES, - pong_limit: (1 + PING_RETRIES as u32) * 2, - }, - }, - net_metrics.create_outgoing_metrics(), - ); - - let context = Arc::new(NetworkContext::new( - cfg.clone(), - our_identity, - node_key_pair.map(NodeKeyPair::new), - chain_info_source.into(), - &net_metrics, - )); - - let component = Network { - cfg, - context, - outgoing_manager, - connection_symmetries: HashMap::new(), - syncing_nodes: HashSet::new(), - channel_management: None, + conman: None, net_metrics, - outgoing_limiter, - incoming_limiter, // We start with an empty set of validators for era 0 and expect to be updated. active_era: EraId::new(0), state: ComponentState::Uninitialized, - }; + shutdown_fuse: DropSwitch::new(ObservableFuse::new()), - Ok(component) + _payload: PhantomData, + }) } - fn initialize(&mut self, effect_builder: EffectBuilder) -> Result>> { - let mut known_addresses = HashSet::new(); - for address in &self.cfg.known_addresses { - match utils::resolve_address(address) { - Ok(known_address) => { - if !known_addresses.insert(known_address) { - warn!(%address, resolved=%known_address, "ignoring duplicated known address"); - }; - } - Err(ref err) => { - warn!(%address, err=display_error(err), "failed to resolve known address"); - } - } - } + /// Initializes the networking component. + fn initialize( + &mut self, + effect_builder: EffectBuilder, + ) -> Result>, Error> + where + REv: ReactorEvent + + From> + + From> + + FromIncoming

+ + From + + From> + + From, + P: Payload, + { + // Start by resolving all known addresses. + let known_addresses = + resolve_addresses(self.config.known_addresses.iter().map(String::as_str)); // Assert we have at least one known address in the config. if known_addresses.is_empty() { warn!("no known addresses provided via config or all failed DNS resolution"); return Err(Error::EmptyKnownHosts); } + self.known_addresses = known_addresses; let mut public_addr = - utils::resolve_address(&self.cfg.public_address).map_err(Error::ResolveAddr)?; + utils::resolve_address(&self.config.public_address).map_err(Error::ResolveAddr)?; // We can now create a listener. let bind_address = - utils::resolve_address(&self.cfg.bind_address).map_err(Error::ResolveAddr)?; + utils::resolve_address(&self.config.bind_address).map_err(Error::ResolveAddr)?; let listener = TcpListener::bind(bind_address) .map_err(|error| Error::ListenerCreation(error, bind_address))?; // We must set non-blocking to `true` or else the tokio task hangs forever. @@ -343,466 +247,338 @@ where if public_addr.port() == 0 { public_addr.set_port(local_addr.port()); } + self.public_addr = Some(public_addr); - Arc::get_mut(&mut self.context) - .expect("should be no other pointers") - .initialize(public_addr, effect_builder.into_inner()); - - let protocol_version = self.context.chain_info().protocol_version; - // Run the server task. - // We spawn it ourselves instead of through an effect to get a hold of the join handle, - // which we need to shutdown cleanly later on. - info!(%local_addr, %public_addr, %protocol_version, "starting server background task"); - - let (server_shutdown_sender, server_shutdown_receiver) = watch::channel(()); - let (close_incoming_sender, close_incoming_receiver) = watch::channel(()); - - let context = self.context.clone(); - let server_join_handle = tokio::spawn( - tasks::server( - context, - tokio::net::TcpListener::from_std(listener).map_err(Error::ListenerConversion)?, - server_shutdown_receiver, - ) - .in_current_span(), + let mut effects = Effects::new(); + + // Start broadcasting our public listening address. + effects.extend( + effect_builder + .set_timeout(self.config.initial_gossip_delay.into()) + .event(|_| Event::GossipOurAddress), ); - let channel_management = ChannelManagement { - shutdown_sender: Some(server_shutdown_sender), - server_join_handle: Some(server_join_handle), - close_incoming_sender: Some(close_incoming_sender), - close_incoming_receiver, + effects.extend(effect_builder.immediately().event(|_| Event::SyncMetrics)); + + let keylog = match self.config.keylog_path { + Some(ref path) => { + let keylog = OpenOptions::new() + .append(true) + .create(true) + .write(true) + .open(path) + .map_err(Error::CannotAppendToKeylog)?; + warn!(%path, "keylog enabled, if you are not debugging turn this off in your configuration (`network.keylog_path`)"); + Some(LockedLineWriter::new(keylog)) + } + None => None, }; - self.channel_management = Some(channel_management); - - // Learn all known addresses and mark them as unforgettable. - let now = Instant::now(); - let dial_requests: Vec<_> = known_addresses - .into_iter() - .filter_map(|addr| self.outgoing_manager.learn_addr(addr, true, now)) - .collect(); + // Start connection manager. + let rpc_builder = transport::create_rpc_builder( + &self.chain_info.networking_config, + &self.config, + &self.chain_info, + ); - let mut effects = self.process_dial_requests(dial_requests); + // Setup connection manager, then learn all known addresses. + let handshake_configuration = HandshakeConfiguration::new( + self.chain_info.clone(), + self.node_key_pair.clone(), + public_addr, + ); - // Start broadcasting our public listening address. - effects.extend( - effect_builder - .set_timeout(self.cfg.initial_gossip_delay.into()) - .event(|_| Event::GossipOurAddress), + let protocol_handler = TransportHandler::new( + effect_builder.into_inner(), + self.identity.clone(), + handshake_configuration, + keylog, + self.net_metrics.clone(), + self.validator_matrix.clone(), ); - // Start regular housekeeping of the outgoing connections. - effects.extend( - effect_builder - .set_timeout(OUTGOING_MANAGER_SWEEP_INTERVAL) - .event(|_| Event::SweepOutgoing), + let conman = ConMan::new( + tokio::net::TcpListener::from_std(listener).expect("not in tokio runtime"), + public_addr, + self.our_id, + Box::new(protocol_handler), + rpc_builder, + self.config.conman, ); + self.conman = Some(conman); + self.learn_known_addresses(); + // Done, set initialized state. >::set_state(self, ComponentState::Initialized); + Ok(effects) } - /// Should only be called after component has been initialized. - fn channel_management(&self) -> &ChannelManagement { - self.channel_management - .as_ref() - .expect("component not initialized properly") + /// Submits all known addresses to the connection manager. + fn learn_known_addresses(&self) { + let Some(ref conman) = self.conman else { + error!("cannot learn known addresses, component not initialized"); + return; + }; + + for known_address in &self.known_addresses { + conman.learn_addr(*known_address); + } } /// Queues a message to be sent to validator nodes in the given era. - fn broadcast_message_to_validators(&self, msg: Arc>, era_id: EraId) { - self.net_metrics.broadcast_requests.inc(); + fn broadcast_message_to_validators(&self, channel: Channel, payload: Bytes, era_id: EraId) { + let Some(ref conman) = self.conman else { + error!( + "cannot broadcast message to validators on non-initialized networking component" + ); + return; + }; - let mut total_connected_validators_in_era = 0; - let mut total_outgoing_manager_connected_peers = 0; + self.net_metrics.broadcast_requests.inc(); - for peer_id in self.outgoing_manager.connected_peers() { - total_outgoing_manager_connected_peers += 1; - if self.outgoing_limiter.is_validator_in_era(era_id, &peer_id) { - total_connected_validators_in_era += 1; - self.send_message(peer_id, msg.clone(), None) + // Determine whether we should restrict broadcasts at all. + let validators = self + .validator_matrix + .era_validators(era_id) + .unwrap_or_default(); + if self.config.use_validator_broadcast && !validators.is_empty() { + let state = conman.read_state(); + for (consensus_key, &peer_id) in state.key_index().iter() { + if validators.contains(consensus_key) { + self.send_message(&state, peer_id, channel, payload.clone(), None) + } + } + } else { + // We were asked to not use validator broadcasting, or do not have a list of validators + // available. Broadcast to everyone instead. + let state = conman.read_state(); + for &peer_id in state.routing_table().keys() { + self.send_message(&state, peer_id, channel, payload.clone(), None) } } - - debug!( - msg = %msg, - era = era_id.value(), - total_connected_validators_in_era, - total_outgoing_manager_connected_peers, - "broadcast_message_to_validators" - ); } /// Queues a message to `count` random nodes on the network. + /// + /// Returns the IDs of the nodes the message has been gossiped to. fn gossip_message( &self, rng: &mut NodeRng, - msg: Arc>, + channel: Channel, + payload: Bytes, gossip_target: GossipTarget, count: usize, exclude: HashSet, - ) -> HashSet { - let is_validator_in_era = - |era: EraId, peer_id: &NodeId| self.outgoing_limiter.is_validator_in_era(era, peer_id); - let peer_ids = choose_gossip_peers( - rng, - gossip_target, - count, - exclude.clone(), - self.outgoing_manager.connected_peers(), - is_validator_in_era, - ); + ) -> Vec { + self.net_metrics.gossip_requests.inc(); - // todo!() - consider sampling more validators (for example: 10%, but not fewer than 5) - - if peer_ids.len() != count { - let not_excluded = self - .outgoing_manager - .connected_peers() - .filter(|peer_id| !exclude.contains(peer_id)) - .count(); - if not_excluded > 0 { - let connected = self.outgoing_manager.connected_peers().count(); - debug!( - our_id=%self.context.our_id(), - %gossip_target, - wanted = count, - connected, - not_excluded, - selected = peer_ids.len(), - "could not select enough random nodes for gossiping" - ); + let Some(ref conman) = self.conman else { + error!("should never attempt to gossip on uninitialized component"); + return Default::default(); + }; + let state = conman.read_state(); + + // Collect all connected peers sans exclusion list. + let connected_peers: Vec<_> = state + .routing_table() + .keys() + .filter(|node_id| !exclude.contains(node_id)) + .collect(); + + let mut chosen: Vec = match gossip_target { + GossipTarget::Mixed(era_id) if self.config.use_mixed_gossip => { + if let Some(known_era_validators) = self.validator_matrix.era_validators(era_id) { + // We have the validators for the given era by consensus key, map to node ID. + let connected_era_validators: HashSet = known_era_validators + .iter() + .filter_map(|key| state.key_index().get(key)) + .filter(|node_id| !exclude.contains(node_id)) + .cloned() + .collect(); + + // Create two separate batches, first all non-validators, second all validators. + let mut first = connected_peers + .iter() + .filter(|&node_id| !connected_era_validators.contains(node_id)) + .map(Deref::deref) + .choose_multiple(rng, count); + + let mut second = connected_era_validators.iter().choose_multiple(rng, count); + + // Shuffle, then sample. + if rng.gen() { + mem::swap(&mut first, &mut second); + } + first.shuffle(rng); + second.shuffle(rng); + + first + .into_iter() + .interleave(second) + .take(count) + .cloned() + .collect() + } else { + rate_limited!( + ERA_NOT_READY, + 5, + Duration::from_secs(10), + |dropped| warn!(%gossip_target, dropped, "failed to select mixed target for era gossip") + ); + + // Fall through, keeping `chosen` empty. + Vec::new() + } + } + GossipTarget::Mixed(_) => { + // Mixed mode gossip is disabled through config. + Vec::new() + } + GossipTarget::All => { + // Simply fall through, since `GossipTarget::All` is also our fallback mode. + Vec::new() } + }; + + if chosen.is_empty() { + chosen.extend(connected_peers.choose_multiple(rng, count).cloned()); + } + + if chosen.len() != count { + rate_limited!( + GOSSIP_SELECTION_FELL_SHORT, + 5, + Duration::from_secs(60), + |dropped| warn!(%gossip_target, wanted=count, got=chosen.len(), dropped, "gossip selection fell short") + ); } - for &peer_id in &peer_ids { - self.send_message(peer_id, msg.clone(), None); + for &peer_id in &chosen { + self.send_message(&state, peer_id, channel, payload.clone(), None); } - peer_ids.into_iter().collect() + chosen } /// Queues a message to be sent to a specific node. fn send_message( &self, + state: &ConManState, dest: NodeId, - msg: Arc>, - opt_responder: Option>, + channel: Channel, + payload: Bytes, + message_queued_responder: Option>, ) { // Try to send the message. - if let Some(connection) = self.outgoing_manager.get_route(dest) { - if msg.payload_is_unsafe_for_syncing_nodes() && self.syncing_nodes.contains(&dest) { - // We should never attempt to send an unsafe message to a peer that we know is still - // syncing. Since "unsafe" does usually not mean immediately catastrophic, we - // attempt to carry on, but warn loudly. - error!(kind=%msg.classify(), node_id=%dest, "sending unsafe message to syncing node"); + if let Some(route) = state.routing_table().get(&dest) { + /// Build the request. + /// + /// Internal helper function to ensure requests are always built the same way. + // Note: Ideally, this would be a closure, but lifetime inference does not + // work out here, and we cannot annotate lifetimes on closures. + #[inline(always)] + fn mk_request( + rpc_client: &JulietRpcClient<{ Channel::COUNT }>, + channel: Channel, + payload: Bytes, + ) -> juliet::rpc::JulietRpcRequestBuilder<'_, { Channel::COUNT }> { + rpc_client + .create_request(channel.into_channel_id()) + .with_payload(payload) } - - if let Err(msg) = connection.sender.send((msg, opt_responder)) { - // We lost the connection, but that fact has not reached us yet. - warn!(our_id=%self.context.our_id(), %dest, ?msg, "dropped outgoing message, lost connection"); - } else { - self.net_metrics.queued_messages.inc(); - } - } else { - // We are not connected, so the reconnection is likely already in progress. - debug!(our_id=%self.context.our_id(), %dest, ?msg, "dropped outgoing message, no connection"); - } - } - - fn handle_incoming_connection( - &mut self, - incoming: Box>, - span: Span, - ) -> Effects> { - span.clone().in_scope(|| match *incoming { - IncomingConnection::FailedEarly { - peer_addr: _, - ref error, - } => { - // Failed without much info, there is little we can do about this. - debug!(err=%display_error(error), "incoming connection failed early"); - Effects::new() - } - IncomingConnection::Failed { - peer_addr: _, - peer_id: _, - ref error, - } => { - // TODO: At this point, we could consider blocking peers by [`PeerID`], but this - // feature is not implemented yet. - debug!( - err = display_error(error), - "incoming connection failed after TLS setup" - ); - Effects::new() - } - IncomingConnection::Loopback => { - // Loopback connections are closed immediately, but will be marked as such by the - // outgoing manager. We still record that it succeeded in the log, but this should - // be the only time per component instantiation that this happens. - info!("successful incoming loopback connection, will be dropped"); - Effects::new() - } - IncomingConnection::Established { - peer_addr, - public_addr, - peer_id, - peer_consensus_public_key, - stream, - } => { - if self.cfg.max_incoming_peer_connections != 0 { - if let Some(symmetries) = self.connection_symmetries.get(&peer_id) { - let incoming_count = symmetries - .incoming_addrs() - .map(|addrs| addrs.len()) - .unwrap_or_default(); - - if incoming_count >= self.cfg.max_incoming_peer_connections as usize { - info!(%public_addr, - %peer_id, - count=incoming_count, - limit=self.cfg.max_incoming_peer_connections, - "rejecting new incoming connection, limit for peer exceeded" - ); - return Effects::new(); - } - } - } - - info!(%public_addr, "new incoming connection established"); - - // Learn the address the peer gave us. - let dial_requests = - self.outgoing_manager - .learn_addr(public_addr, false, Instant::now()); - let mut effects = self.process_dial_requests(dial_requests); - - // Update connection symmetries. - if self - .connection_symmetries - .entry(peer_id) - .or_default() - .add_incoming(peer_addr, Instant::now()) - { - self.connection_completed(peer_id); - - // We should NOT update the syncing set when we receive an incoming connection, - // because the `message_sender` which is handling the corresponding outgoing - // connection will not receive the update of the syncing state of the remote - // peer. - // - // Such desync may cause the node to try to send "unsafe" requests to the - // syncing node, because the outgoing connection may outlive the - // incoming one, i.e. it may take some time to drop "our" outgoing - // connection after a peer has closed the corresponding incoming connection. + let payload_len = payload.len() as u64; + let request = mk_request(&route.client, channel, payload); + + // Attempt to enqueue it directly, regardless of what `message_queued_responder` is. + match request.try_queue_for_sending() { + Ok(guard) => { + self.net_metrics + .channel_metrics + .get(channel) + .update_from_outgoing_request(payload_len); + process_request_guard(&self.net_metrics, channel, guard) } + Err(builder) => { + // Failed to queue immediately, our next step depends on whether we were asked + // to keep trying or to discard. + + // Reconstruct the payload. + let payload = match builder.into_payload() { + None => { + // This should never happen. + error!("payload unexpectedly disappeard"); + return; + } + Some(payload) => payload, + }; - // Now we can start the message reader. - let boxed_span = Box::new(span.clone()); - effects.extend( - tasks::message_reader( - self.context.clone(), - stream, - self.incoming_limiter - .create_handle(peer_id, peer_consensus_public_key), - self.channel_management().close_incoming_receiver.clone(), - peer_id, - span.clone(), - ) - .instrument(span) - .event(move |result| Event::IncomingClosed { - result, - peer_id: Box::new(peer_id), - peer_addr, - span: boxed_span, - }), - ); - - effects - } - }) - } + if let Some(responder) = message_queued_responder { + // Reconstruct the client. + let client = route.client.clone(); + + // Technically, the queueing future should be spawned by the reactor, but + // since the networking component usually controls its own futures, we are + // allowed to spawn these as well. + let net_metrics = self.net_metrics.clone(); + tokio::spawn(async move { + // Note: This future is not cancellation safe due to the metrics being + // updated with no drop implementation. However, there is no way + // to exit this early or cancel its execution, so we should be + // good. + + net_metrics.overflow_buffer_count.inc(); + net_metrics.overflow_buffer_bytes.add(payload_len as i64); + let guard = mk_request(&client, channel, payload) + .queue_for_sending() + .await; + net_metrics.overflow_buffer_bytes.sub(payload_len as i64); + net_metrics.overflow_buffer_count.dec(); + net_metrics + .channel_metrics + .get(channel) + .update_from_outgoing_request(payload_len); + responder.respond(()).await; + + // We need to properly process the guard, so it does not cause a + // cancellation from being dropped. + process_request_guard(&net_metrics, channel, guard) + }); + } else { + // We had to drop the message, since we hit the buffer limit. + match deserialize_network_message::

(&payload) { + Ok(reconstructed_message) => { + debug!(our_id=%self.our_id, %dest, msg=%reconstructed_message, "dropped outgoing message, buffer exhausted"); + } + Err(err) => { + error!(our_id=%self.our_id, + %dest, + reconstruction_error=%err, + ?payload, + "dropped outgoing message, buffer exhausted and also failed to reconstruct it" + ); + } + } - fn handle_incoming_closed( - &mut self, - result: io::Result<()>, - peer_id: NodeId, - peer_addr: SocketAddr, - span: Span, - ) -> Effects> { - span.in_scope(|| { - // Log the outcome. - match result { - Ok(()) => { - info!("regular connection closing") - } - Err(ref err) => { - warn!(err = display_error(err), "connection dropped") + rate_limited!( + MESSAGE_RATE_EXCEEDED, + 1, + Duration::from_secs(5), + |dropped| warn!(%channel, payload_len=payload.len(), dropped, "node is sending at too high a rate, message dropped") + ); + } } } - - // Update the connection symmetries. - self.connection_symmetries - .entry(peer_id) - .or_default() - .remove_incoming(peer_addr, Instant::now()); - - Effects::new() - }) - } - - /// Determines whether an outgoing peer should be blocked based on the connection error. - fn is_blockable_offense_for_outgoing( - &self, - error: &ConnectionError, - ) -> Option { - match error { - // Potentially transient failures. - // - // Note that incompatible versions need to be considered transient, since they occur - // during regular upgrades. - ConnectionError::TlsInitialization(_) - | ConnectionError::TcpConnection(_) - | ConnectionError::TcpNoDelay(_) - | ConnectionError::TlsHandshake(_) - | ConnectionError::HandshakeSend(_) - | ConnectionError::HandshakeRecv(_) - | ConnectionError::IncompatibleVersion(_) => None, - - // These errors are potential bugs on our side. - ConnectionError::HandshakeSenderCrashed(_) - | ConnectionError::FailedToReuniteHandshakeSinkAndStream - | ConnectionError::CouldNotEncodeOurHandshake(_) => None, - - // These could be candidates for blocking, but for now we decided not to. - ConnectionError::NoPeerCertificate - | ConnectionError::PeerCertificateInvalid(_) - | ConnectionError::DidNotSendHandshake - | ConnectionError::InvalidRemoteHandshakeMessage(_) - | ConnectionError::InvalidConsensusCertificate(_) => None, - - // Definitely something we want to avoid. - ConnectionError::WrongNetwork(peer_network_name) => { - Some(BlocklistJustification::WrongNetwork { - peer_network_name: peer_network_name.clone(), - }) - } - ConnectionError::WrongChainspecHash(peer_chainspec_hash) => { - Some(BlocklistJustification::WrongChainspecHash { - peer_chainspec_hash: *peer_chainspec_hash, - }) - } - ConnectionError::MissingChainspecHash => { - Some(BlocklistJustification::MissingChainspecHash) - } + } else { + rate_limited!( + LOST_MESSAGE, + 5, + Duration::from_secs(30), + |dropped| warn!(%channel, %dest, size=payload.len(), dropped, "discarding message to peer, no longer connected") + ); } } - /// Sets up an established outgoing connection. - /// - /// Initiates sending of the handshake as soon as the connection is established. - #[allow(clippy::redundant_clone)] - fn handle_outgoing_connection( - &mut self, - outgoing: OutgoingConnection

, - span: Span, - ) -> Effects> { - let now = Instant::now(); - span.clone().in_scope(|| match outgoing { - OutgoingConnection::FailedEarly { peer_addr, error } - | OutgoingConnection::Failed { - peer_addr, - peer_id: _, - error, - } => { - debug!(err=%display_error(&error), "outgoing connection failed"); - // We perform blocking first, to not trigger a reconnection before blocking. - let mut requests = Vec::new(); - - if let Some(justification) = self.is_blockable_offense_for_outgoing(&error) { - requests.extend(self.outgoing_manager.block_addr( - peer_addr, - now, - justification, - )); - } - - // Now we can proceed with the regular updates. - requests.extend( - self.outgoing_manager - .handle_dial_outcome(DialOutcome::Failed { - addr: peer_addr, - error, - when: now, - }), - ); - - self.process_dial_requests(requests) - } - OutgoingConnection::Loopback { peer_addr } => { - // Loopback connections are marked, but closed. - info!("successful outgoing loopback connection, will be dropped"); - let request = self - .outgoing_manager - .handle_dial_outcome(DialOutcome::Loopback { addr: peer_addr }); - self.process_dial_requests(request) - } - OutgoingConnection::Established { - peer_addr, - peer_id, - peer_consensus_public_key, - sink, - is_syncing, - } => { - info!("new outgoing connection established"); - - let (sender, receiver) = mpsc::unbounded_channel(); - let handle = OutgoingHandle { sender, peer_addr }; - - let request = self - .outgoing_manager - .handle_dial_outcome(DialOutcome::Successful { - addr: peer_addr, - handle, - node_id: peer_id, - when: now, - }); - - let mut effects = self.process_dial_requests(request); - - // Update connection symmetries. - if self - .connection_symmetries - .entry(peer_id) - .or_default() - .mark_outgoing(now) - { - self.connection_completed(peer_id); - self.update_syncing_nodes_set(peer_id, is_syncing); - } - - effects.extend( - tasks::message_sender( - receiver, - sink, - self.outgoing_limiter - .create_handle(peer_id, peer_consensus_public_key), - self.net_metrics.queued_messages.clone(), - ) - .instrument(span) - .event(move |_| Event::OutgoingDropped { - peer_id: Box::new(peer_id), - peer_addr, - }), - ); - - effects - } - }) - } - fn handle_network_request( &self, request: NetworkRequest

, @@ -812,24 +588,31 @@ where NetworkRequest::SendMessage { dest, payload, - respond_after_queueing, - auto_closing_responder, + message_queued_responder, } => { + let Some(ref conman) = self.conman else { + error!("cannot send message on non-initialized network component"); + + return Effects::new(); + }; + + let Some((channel, payload)) = stuff_into_envelope(*payload) else { + return Effects::new(); + }; + + self.net_metrics.direct_message_requests.inc(); + // We're given a message to send. Pass on the responder so that confirmation // can later be given once the message has actually been buffered. - self.net_metrics.direct_message_requests.inc(); + self.send_message( + &conman.read_state(), + *dest, + channel, + payload, + message_queued_responder, + ); - if respond_after_queueing { - self.send_message(*dest, Arc::new(Message::Payload(*payload)), None); - auto_closing_responder.respond(()).ignore() - } else { - self.send_message( - *dest, - Arc::new(Message::Payload(*payload)), - Some(auto_closing_responder), - ); - Effects::new() - } + Effects::new() } NetworkRequest::ValidatorBroadcast { payload, @@ -837,7 +620,12 @@ where auto_closing_responder, } => { // We're given a message to broadcast. - self.broadcast_message_to_validators(Arc::new(Message::Payload(*payload)), era_id); + let Some((channel, payload)) = stuff_into_envelope(*payload) else { + return Effects::new(); + }; + + self.broadcast_message_to_validators(channel, payload, era_id); + auto_closing_responder.respond(()).ignore() } NetworkRequest::Gossip { @@ -848,86 +636,31 @@ where auto_closing_responder, } => { // We're given a message to gossip. - let sent_to = self.gossip_message( - rng, - Arc::new(Message::Payload(*payload)), - gossip_target, - count, - exclude, - ); - auto_closing_responder.respond(sent_to).ignore() - } - } - } - - fn handle_outgoing_dropped( - &mut self, - peer_id: NodeId, - peer_addr: SocketAddr, - ) -> Effects> { - let requests = self - .outgoing_manager - .handle_connection_drop(peer_addr, Instant::now()); - - self.connection_symmetries - .entry(peer_id) - .or_default() - .unmark_outgoing(Instant::now()); + let Some((channel, payload)) = stuff_into_envelope(*payload) else { + return Effects::new(); + }; - self.outgoing_limiter.remove_connected_validator(&peer_id); + let sent_to = + self.gossip_message(rng, channel, payload, gossip_target, count, exclude); - self.process_dial_requests(requests) - } - - /// Processes a set of `DialRequest`s, updating the component and emitting needed effects. - fn process_dial_requests(&mut self, requests: T) -> Effects> - where - T: IntoIterator>>, - { - let mut effects = Effects::new(); - - for request in requests.into_iter() { - trace!(%request, "processing dial request"); - match request { - DialRequest::Dial { addr, span } => effects.extend( - tasks::connect_outgoing(self.context.clone(), addr) - .instrument(span.clone()) - .event(|outgoing| Event::OutgoingConnection { - outgoing: Box::new(outgoing), - span, - }), - ), - DialRequest::Disconnect { handle: _, span } => { - // Dropping the `handle` is enough to signal the connection to shutdown. - span.in_scope(|| { - debug!("dropping connection, as requested"); - }) - } - DialRequest::SendPing { - peer_id, - nonce, - span, - } => span.in_scope(|| { - trace!("enqueuing ping to be sent"); - self.send_message(peer_id, Arc::new(Message::Ping { nonce }), None); - }), + auto_closing_responder.respond(sent_to).ignore() } } - - effects } /// Handles a received message. - fn handle_incoming_message( + fn handle_incoming_message( &mut self, effect_builder: EffectBuilder, peer_id: NodeId, msg: Message

, + ticket: Ticket, span: Span, ) -> Effects> where - REv: FromIncoming

+ From, + REv: FromIncoming

+ From> + From + Send, { + // Note: For non-payload channels, we drop the `Ticket` implicitly at end of scope. span.in_scope(|| match msg { Message::Handshake { .. } => { // We should never receive a handshake message on an established connection. Simply @@ -936,127 +669,78 @@ where warn!("received unexpected handshake"); Effects::new() } - Message::Ping { nonce } => { - // Send a pong. Incoming pings and pongs are rate limited. - - self.send_message(peer_id, Arc::new(Message::Pong { nonce }), None); - Effects::new() - } - Message::Pong { nonce } => { - // Record the time the pong arrived and forward it to outgoing. - let pong = TaggedTimestamp::from_parts(Instant::now(), nonce); - if self.outgoing_manager.record_pong(peer_id, pong) { - // Note: We no longer block peers here with a `PongLimitExceeded` for failed - // pongs, merely warn. - info!( - "peer {} exceeded failed pong limit, or allowed number of pongs", - peer_id // Redundant information due to span, but better safe than sorry. - ); - } - - Effects::new() - } - Message::Payload(payload) => { - effect_builder.announce_incoming(peer_id, payload).ignore() - } + Message::Payload(payload) => effect_builder + .announce_incoming(peer_id, payload, ticket) + .ignore(), }) } - /// Emits an announcement that a connection has been completed. - fn connection_completed(&self, peer_id: NodeId) { - trace!(num_peers = self.peers().len(), new_peer=%peer_id, "connection complete"); - self.net_metrics.peers.set(self.peers().len() as i64); - } + /// Returns the set of connected nodes. + pub(crate) fn peers(&self) -> BTreeMap { + let Some(ref conman) = self.conman else { + // Not initialized means no peers. + return Default::default(); + }; - /// Updates a set of known joining nodes. - /// If we've just connected to a non-joining node that peer will be removed from the set. - fn update_syncing_nodes_set(&mut self, peer_id: NodeId, is_syncing: bool) { - // Update set of syncing peers. - if is_syncing { - debug!(%peer_id, "is syncing"); - self.syncing_nodes.insert(peer_id); - } else { - debug!(%peer_id, "is no longer syncing"); - self.syncing_nodes.remove(&peer_id); - } + conman + .read_state() + .routing_table() + .values() + .map(|route| (route.peer, route.remote_addr)) + .collect() } - /// Returns the set of connected nodes. - pub(crate) fn peers(&self) -> BTreeMap { - let mut ret = BTreeMap::new(); - for node_id in self.outgoing_manager.connected_peers() { - if let Some(connection) = self.outgoing_manager.get_route(node_id) { - ret.insert(node_id, connection.peer_addr.to_string()); - } else { - // This should never happen unless the state of `OutgoingManager` is corrupt. - warn!(%node_id, "route disappeared unexpectedly") - } - } + /// Get a randomly sampled subset of connected peers + pub(crate) fn connected_peers_random(&self, rng: &mut NodeRng, count: usize) -> Vec { + let Some(ref conman) = self.conman else { + // If we are not initialized, return an empty set. + return Vec::new(); + }; - for (node_id, sym) in &self.connection_symmetries { - if let Some(addrs) = sym.incoming_addrs() { - for addr in addrs { - ret.entry(*node_id).or_insert_with(|| addr.to_string()); - } - } - } + // Note: This is not ideal, since it os O(n) (n = number of peers), whereas for a slice it + // would be O(k) (k = number of items). If this proves to be a bottleneck, add an + // unstable `Vec` (allows O(1) random removal) to `ConMan` that stores a list of + // currently connected nodes. - ret - } + let mut subset = conman + .read_state() + .routing_table() + .values() + .map(|route| route.peer) + .choose_multiple(rng, count); - pub(crate) fn fully_connected_peers_random( - &self, - rng: &mut NodeRng, - count: usize, - ) -> Vec { - self.connection_symmetries - .iter() - .filter(|(_, sym)| matches!(sym, ConnectionSymmetry::Symmetric { .. })) - .map(|(node_id, _)| *node_id) - .choose_multiple(rng, count) + // Documentation says result must be shuffled to be truly random. + subset.shuffle(rng); + + subset } - pub(crate) fn has_sufficient_fully_connected_peers(&self) -> bool { - self.connection_symmetries - .iter() - .filter(|(_node_id, sym)| matches!(sym, ConnectionSymmetry::Symmetric { .. })) - .count() - >= self.cfg.min_peers_for_initialization as usize + /// Returns whether or not the threshold has been crossed for the component to consider itself + /// sufficiently connected. + pub(crate) fn has_sufficient_connected_peers(&self) -> bool { + let Some(ref conman) = self.conman else { + // If we are not initialized, we do not have any fully connected peers. + return false; + }; + + let connection_count = conman.read_state().routing_table().len(); + connection_count >= self.config.min_peers_for_initialization as usize } #[cfg(test)] /// Returns the node id of this network node. pub(crate) fn node_id(&self) -> NodeId { - self.context.our_id() + self.our_id } } -impl Finalize for Network +impl

Finalize for Network

where - REv: Send + 'static, P: Payload, { - fn finalize(mut self) -> BoxFuture<'static, ()> { + fn finalize(self) -> BoxFuture<'static, ()> { async move { - if let Some(mut channel_management) = self.channel_management.take() { - // Close the shutdown socket, causing the server to exit. - drop(channel_management.shutdown_sender.take()); - drop(channel_management.close_incoming_sender.take()); - - // Wait for the server to exit cleanly. - if let Some(join_handle) = channel_management.server_join_handle.take() { - match join_handle.await { - Ok(_) => debug!(our_id=%self.context.our_id(), "server exited cleanly"), - Err(ref err) => { - error!( - our_id=%self.context.our_id(), - err=display_error(err), - "could not join server task cleanly" - ) - } - } - } - } + self.shutdown_fuse.inner().set(); // Ensure there are no ongoing metrics updates. utils::wait_for_arc_drop( @@ -1070,44 +754,24 @@ where } } -fn choose_gossip_peers( - rng: &mut NodeRng, - gossip_target: GossipTarget, - count: usize, - exclude: HashSet, - connected_peers: impl Iterator, - is_validator_in_era: F, -) -> HashSet -where - F: Fn(EraId, &NodeId) -> bool, -{ - let filtered_peers = connected_peers.filter(|peer_id| !exclude.contains(peer_id)); - match gossip_target { - GossipTarget::Mixed(era_id) => { - let (validators, non_validators): (Vec<_>, Vec<_>) = - filtered_peers.partition(|node_id| is_validator_in_era(era_id, node_id)); - - let (first, second) = if rng.gen() { - (validators, non_validators) - } else { - (non_validators, validators) - }; - - first - .choose_multiple(rng, count) - .interleave(second.iter().choose_multiple(rng, count)) - .take(count) - .copied() - .collect() +fn resolve_addresses<'a>(addresses: impl Iterator) -> HashSet { + let mut resolved = HashSet::new(); + for address in addresses { + match utils::resolve_address(address) { + Ok(addr) => { + if !resolved.insert(addr) { + warn!(%address, resolved=%addr, "ignoring duplicated address"); + }; + } + Err(ref err) => { + warn!(%address, err=display_error(err), "failed to resolve address"); + } } - GossipTarget::All => filtered_peers - .choose_multiple(rng, count) - .into_iter() - .collect(), } + resolved } -impl Component for Network +impl Component for Network

where REv: ReactorEvent + From> @@ -1156,16 +820,12 @@ where Effects::new() } }, - Event::IncomingConnection { .. } - | Event::IncomingMessage { .. } - | Event::IncomingClosed { .. } - | Event::OutgoingConnection { .. } - | Event::OutgoingDropped { .. } + Event::IncomingMessage { .. } | Event::NetworkRequest { .. } | Event::NetworkInfoRequest { .. } | Event::GossipOurAddress + | Event::SyncMetrics | Event::PeerAddressReceived(_) - | Event::SweepOutgoing | Event::BlocklistAnnouncement(_) => { warn!( ?event, @@ -1184,24 +844,12 @@ where ); Effects::new() } - Event::IncomingConnection { incoming, span } => { - self.handle_incoming_connection(incoming, span) - } - Event::IncomingMessage { peer_id, msg, span } => { - self.handle_incoming_message(effect_builder, *peer_id, *msg, span) - } - Event::IncomingClosed { - result, + Event::IncomingMessage { peer_id, - peer_addr, + msg, span, - } => self.handle_incoming_closed(result, *peer_id, peer_addr, *span), - Event::OutgoingConnection { outgoing, span } => { - self.handle_outgoing_connection(*outgoing, span) - } - Event::OutgoingDropped { peer_id, peer_addr } => { - self.handle_outgoing_dropped(*peer_id, peer_addr) - } + ticket, + } => self.handle_incoming_message(effect_builder, *peer_id, *msg, ticket, span), Event::NetworkRequest { req: request } => { self.handle_network_request(*request, rng) } @@ -1210,71 +858,84 @@ where responder.respond(self.peers()).ignore() } NetworkInfoRequest::FullyConnectedPeers { count, responder } => responder - .respond(self.fully_connected_peers_random(rng, count)) + .respond(self.connected_peers_random(rng, count)) .ignore(), NetworkInfoRequest::Insight { responder } => responder .respond(NetworkInsights::collect_from_component(self)) .ignore(), }, Event::GossipOurAddress => { - let our_address = GossipedAddress::new( - self.context - .public_addr() - .expect("component not initialized properly"), - ); - let mut effects = effect_builder - .begin_gossip(our_address, Source::Ourself, our_address.gossip_target()) - .ignore(); - effects.extend( - effect_builder - .set_timeout(self.cfg.gossip_interval.into()) - .event(|_| Event::GossipOurAddress), + .set_timeout(self.config.gossip_interval.into()) + .event(|_| Event::GossipOurAddress); + + if let Some(public_address) = self.public_addr { + let our_address = GossipedAddress::new(public_address); + debug!( %our_address, "gossiping our addresses" ); + effects.extend( + effect_builder + .begin_gossip( + our_address, + Source::Ourself, + our_address.gossip_target(), + ) + .ignore(), + ); + } else { + // The address should have been set before we first trigger the gossiping, + // thus we should never end up here. + error!("cannot gossip our address, it is missing"); + }; + + // We also ensure we know our known addresses still. + debug!( + address_count = self.known_addresses.len(), + "learning known addresses" ); + self.learn_known_addresses(); + effects } - Event::PeerAddressReceived(gossiped_address) => { - let requests = self.outgoing_manager.learn_addr( - gossiped_address.into(), - false, - Instant::now(), - ); - self.process_dial_requests(requests) + Event::SyncMetrics => { + // Update the `peers` metric. + // TODO: Add additional metrics for bans, do-not-calls, etc. + let peers = if let Some(ref conman) = self.conman { + conman.read_state().routing_table().len() + } else { + 0 + }; + self.net_metrics.peers.set(peers as i64); + effect_builder + .set_timeout(METRICS_UPDATE_RATE) + .event(|_| Event::SyncMetrics) } - Event::SweepOutgoing => { - let now = Instant::now(); - let requests = self.outgoing_manager.perform_housekeeping(rng, now); - - let mut effects = self.process_dial_requests(requests); - - effects.extend( - effect_builder - .set_timeout(OUTGOING_MANAGER_SWEEP_INTERVAL) - .event(|_| Event::SweepOutgoing), - ); + Event::PeerAddressReceived(gossiped_address) => { + if let Some(ref conman) = self.conman { + conman.learn_addr(gossiped_address.into()); + } else { + error!("received gossiped address while component was not initialized"); + } - effects + Effects::new() } Event::BlocklistAnnouncement(announcement) => match announcement { PeerBehaviorAnnouncement::OffenseCommitted { offender, justification, } => { - // TODO: We do not have a proper by-node-ID blocklist, but rather only block - // the current outgoing address of a peer. - info!(%offender, %justification, "adding peer to blocklist after transgression"); - - if let Some(addr) = self.outgoing_manager.get_addr(*offender) { - let requests = self.outgoing_manager.block_addr( - addr, - Instant::now(), - *justification, - ); - self.process_dial_requests(requests) + if let Some(ref conman) = self.conman { + let now = Instant::now(); + let until = now + + Duration::from_millis( + self.config.blocklist_retain_duration.millis(), + ); + + conman.ban_peer(*offender, *justification, now, until); } else { - // Peer got away with it, no longer an outgoing connection. - Effects::new() - } + error!("cannot ban, component not initialized"); + }; + + Effects::new() } }, }, @@ -1286,7 +947,7 @@ where } } -impl InitializedComponent for Network +impl InitializedComponent for Network

where REv: ReactorEvent + From> @@ -1312,360 +973,103 @@ where } } -/// Transport type alias for base encrypted connections. -type Transport = SslStream; +impl ValidatorBoundComponent for Network

+where + REv: ReactorEvent + + From> + + From> + + FromIncoming

+ + From + + From> + + From, + P: Payload, +{ + #[inline(always)] + fn handle_validators( + &mut self, + _effect_builder: EffectBuilder, + _rng: &mut NodeRng, + ) -> Effects { + // TODO: Not used at the moment, consider removing this in the future. -/// A framed transport for `Message`s. -pub(crate) type FullTransport

= tokio_serde::Framed< - FramedTransport, - Message

, - Arc>, - CountingFormat, ->; + Effects::default() + } +} -pub(crate) type FramedTransport = tokio_util::codec::Framed; +/// Transport type for base encrypted connections. +type Transport = SslStream; -/// Constructs a new full transport on a stream. +/// Setups bincode encoding used on the networking transport. +fn bincode_config() -> impl Options { + bincode::options() + .with_no_limit() // We rely on `juliet` to impose limits. + .with_little_endian() // Default at the time of this writing, we are merely pinning it. + .with_varint_encoding() // Same as above. + .reject_trailing_bytes() // There is no reason for us not to reject trailing bytes. +} + +/// Serializes a network message with the protocol specified encoding. /// -/// A full transport contains the framing as well as the encoding scheme used to send messages. -fn full_transport

( - metrics: Weak, - connection_id: ConnectionId, - framed: FramedTransport, - role: Role, -) -> FullTransport

+/// This function exists as a convenience, because there never should be a failure in serializing +/// messages we produced ourselves. +fn serialize_network_message(msg: &T) -> Option where - for<'de> P: Serialize + Deserialize<'de>, - for<'de> Message

: Serialize + Deserialize<'de>, + T: Serialize + ?Sized, { - tokio_serde::Framed::new( - framed, - CountingFormat::new(metrics, connection_id, role, BincodeFormat::default()), - ) + bincode_config() + .serialize(msg) + .map(Bytes::from) + .map_err(|err| { + error!(%err, "serialization failure when encoding outgoing message"); + err + }) + .ok() } -/// Constructs a framed transport. -fn framed_transport(transport: Transport, maximum_net_message_size: u32) -> FramedTransport { - tokio_util::codec::Framed::new( - transport, - LengthDelimitedCodec::builder() - .max_frame_length(maximum_net_message_size as usize) - .new_codec(), - ) +/// Given a message payload, puts it into a proper message envelope and returns the serialized +/// envelope along with the channel it should be sent on. +#[inline(always)] +fn stuff_into_envelope(payload: P) -> Option<(Channel, Bytes)> { + let msg = Message::Payload(payload); + let channel = msg.get_channel(); + let byte_payload = serialize_network_message(&msg)?; + Some((channel, byte_payload)) } -impl Debug for Network +/// Deserializes a networking message from the protocol specified encoding. +fn deserialize_network_message

(bytes: &[u8]) -> Result, bincode::Error> where P: Payload, { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - // We output only the most important fields of the component, as it gets unwieldy quite fast - // otherwise. - f.debug_struct("Network") - .field("our_id", &self.context.our_id()) - .field("state", &self.state) - .field("public_addr", &self.context.public_addr()) - .finish() - } + bincode_config().deserialize(bytes) } -#[cfg(test)] -mod gossip_target_tests { - use std::{collections::BTreeSet, iter}; - - use static_assertions::const_assert; - - use casper_types::testing::TestRng; - - use super::*; - - const VALIDATOR_COUNT: usize = 10; - const NON_VALIDATOR_COUNT: usize = 20; - // The tests assume that we have fewer validators than non-validators. - const_assert!(VALIDATOR_COUNT < NON_VALIDATOR_COUNT); - - struct Fixture { - validators: BTreeSet, - non_validators: BTreeSet, - all_peers: Vec, - } - - impl Fixture { - fn new(rng: &mut TestRng) -> Self { - let validators: BTreeSet = iter::repeat_with(|| NodeId::random(rng)) - .take(VALIDATOR_COUNT) - .collect(); - let non_validators: BTreeSet = iter::repeat_with(|| NodeId::random(rng)) - .take(NON_VALIDATOR_COUNT) - .collect(); - - let mut all_peers: Vec = validators - .iter() - .copied() - .chain(non_validators.iter().copied()) - .collect(); - all_peers.shuffle(rng); - - Fixture { - validators, - non_validators, - all_peers, - } - } - - fn is_validator_in_era(&self) -> impl Fn(EraId, &NodeId) -> bool + '_ { - move |_era_id: EraId, node_id: &NodeId| self.validators.contains(node_id) - } - - fn num_validators<'a>(&self, input: impl Iterator) -> usize { - input - .filter(move |&node_id| self.validators.contains(node_id)) - .count() - } - - fn num_non_validators<'a>(&self, input: impl Iterator) -> usize { - input - .filter(move |&node_id| self.non_validators.contains(node_id)) - .count() +/// Processes a request guard obtained by making a request to a peer through Juliet RPC. +/// +/// Ensures that outgoing messages are not cancelled, a would be the case when simply dropping the +/// `RequestGuard`. Potential errors that are available early are dropped, later errors discarded. +#[inline] +fn process_request_guard(net_metrics: &Arc, channel: Channel, guard: RequestGuard) { + let cm = net_metrics.channel_metrics.get(channel); + match guard.try_get_response() { + Ok(Ok(ref payload)) => { + // We got an incredibly quick round-trip, lucky us! Nothing to do. + cm.update_from_received_response( + payload.as_ref().map(Bytes::len).unwrap_or_default() as u64 + ) } - } - - #[test] - fn should_choose_mixed() { - const TARGET: GossipTarget = GossipTarget::Mixed(EraId::new(1)); - - let mut rng = TestRng::new(); - let fixture = Fixture::new(&mut rng); - - // Choose more than total count from all peers, exclude none, should return all peers. - let chosen = choose_gossip_peers( - &mut rng, - TARGET, - VALIDATOR_COUNT + NON_VALIDATOR_COUNT + 1, - HashSet::new(), - fixture.all_peers.iter().copied(), - fixture.is_validator_in_era(), - ); - assert_eq!(chosen.len(), fixture.all_peers.len()); - - // Choose total count from all peers, exclude none, should return all peers. - let chosen = choose_gossip_peers( - &mut rng, - TARGET, - VALIDATOR_COUNT + NON_VALIDATOR_COUNT, - HashSet::new(), - fixture.all_peers.iter().copied(), - fixture.is_validator_in_era(), - ); - assert_eq!(chosen.len(), fixture.all_peers.len()); - - // Choose 2 * VALIDATOR_COUNT from all peers, exclude none, should return all validators and - // VALIDATOR_COUNT non-validators. - let chosen = choose_gossip_peers( - &mut rng, - TARGET, - 2 * VALIDATOR_COUNT, - HashSet::new(), - fixture.all_peers.iter().copied(), - fixture.is_validator_in_era(), - ); - assert_eq!(chosen.len(), 2 * VALIDATOR_COUNT); - assert_eq!(fixture.num_validators(chosen.iter()), VALIDATOR_COUNT); - assert_eq!(fixture.num_non_validators(chosen.iter()), VALIDATOR_COUNT); - - // Choose VALIDATOR_COUNT from all peers, exclude none, should return VALIDATOR_COUNT peers, - // half validators and half non-validators. - let chosen = choose_gossip_peers( - &mut rng, - TARGET, - VALIDATOR_COUNT, - HashSet::new(), - fixture.all_peers.iter().copied(), - fixture.is_validator_in_era(), - ); - assert_eq!(chosen.len(), VALIDATOR_COUNT); - assert_eq!(fixture.num_validators(chosen.iter()), VALIDATOR_COUNT / 2); - assert_eq!( - fixture.num_non_validators(chosen.iter()), - VALIDATOR_COUNT / 2 - ); - - // Choose two from all peers, exclude none, should return two peers, one validator and one - // non-validator. - let chosen = choose_gossip_peers( - &mut rng, - TARGET, - 2, - HashSet::new(), - fixture.all_peers.iter().copied(), - fixture.is_validator_in_era(), - ); - assert_eq!(chosen.len(), 2); - assert_eq!(fixture.num_validators(chosen.iter()), 1); - assert_eq!(fixture.num_non_validators(chosen.iter()), 1); - - // Choose one from all peers, exclude none, should return one peer with 50-50 chance of - // being a validator. - let mut got_validator = false; - let mut got_non_validator = false; - let mut attempts = 0; - while !got_validator || !got_non_validator { - let chosen = choose_gossip_peers( - &mut rng, - TARGET, - 1, - HashSet::new(), - fixture.all_peers.iter().copied(), - fixture.is_validator_in_era(), + Ok(Err(err)) => { + cm.send_failures.inc(); + rate_limited!( + MESSAGE_SENDING_FAILURE, + 5, + Duration::from_secs(60), + |dropped| warn!(%channel, %err, dropped, "failed to send message") ); - assert_eq!(chosen.len(), 1); - let node_id = chosen.iter().next().unwrap(); - got_validator |= fixture.validators.contains(node_id); - got_non_validator |= fixture.non_validators.contains(node_id); - attempts += 1; - assert!(attempts < 1_000_000); } - - // Choose VALIDATOR_COUNT from all peers, exclude all but one validator, should return the - // one validator and VALIDATOR_COUNT - 1 non-validators. - let exclude: HashSet<_> = fixture - .validators - .iter() - .copied() - .take(VALIDATOR_COUNT - 1) - .collect(); - let chosen = choose_gossip_peers( - &mut rng, - TARGET, - VALIDATOR_COUNT, - exclude.clone(), - fixture.all_peers.iter().copied(), - fixture.is_validator_in_era(), - ); - assert_eq!(chosen.len(), VALIDATOR_COUNT); - assert_eq!(fixture.num_validators(chosen.iter()), 1); - assert_eq!( - fixture.num_non_validators(chosen.iter()), - VALIDATOR_COUNT - 1 - ); - assert!(exclude.is_disjoint(&chosen)); - - // Choose 3 from all peers, exclude all non-validators, should return 3 validators. - let exclude: HashSet<_> = fixture.non_validators.iter().copied().collect(); - let chosen = choose_gossip_peers( - &mut rng, - TARGET, - 3, - exclude.clone(), - fixture.all_peers.iter().copied(), - fixture.is_validator_in_era(), - ); - assert_eq!(chosen.len(), 3); - assert_eq!(fixture.num_validators(chosen.iter()), 3); - assert!(exclude.is_disjoint(&chosen)); - } - - #[test] - fn should_choose_all() { - const TARGET: GossipTarget = GossipTarget::All; - - let mut rng = TestRng::new(); - let fixture = Fixture::new(&mut rng); - - // Choose more than total count from all peers, exclude none, should return all peers. - let chosen = choose_gossip_peers( - &mut rng, - TARGET, - VALIDATOR_COUNT + NON_VALIDATOR_COUNT + 1, - HashSet::new(), - fixture.all_peers.iter().copied(), - fixture.is_validator_in_era(), - ); - assert_eq!(chosen.len(), fixture.all_peers.len()); - - // Choose total count from all peers, exclude none, should return all peers. - let chosen = choose_gossip_peers( - &mut rng, - TARGET, - VALIDATOR_COUNT + NON_VALIDATOR_COUNT, - HashSet::new(), - fixture.all_peers.iter().copied(), - fixture.is_validator_in_era(), - ); - assert_eq!(chosen.len(), fixture.all_peers.len()); - - // Choose VALIDATOR_COUNT from only validators, exclude none, should return all validators. - let chosen = choose_gossip_peers( - &mut rng, - TARGET, - VALIDATOR_COUNT, - HashSet::new(), - fixture.validators.iter().copied(), - fixture.is_validator_in_era(), - ); - assert_eq!(chosen.len(), VALIDATOR_COUNT); - assert_eq!(fixture.num_validators(chosen.iter()), VALIDATOR_COUNT); - - // Choose VALIDATOR_COUNT from only non-validators, exclude none, should return - // VALIDATOR_COUNT non-validators. - let chosen = choose_gossip_peers( - &mut rng, - TARGET, - VALIDATOR_COUNT, - HashSet::new(), - fixture.non_validators.iter().copied(), - fixture.is_validator_in_era(), - ); - assert_eq!(chosen.len(), VALIDATOR_COUNT); - assert_eq!(fixture.num_non_validators(chosen.iter()), VALIDATOR_COUNT); - - // Choose VALIDATOR_COUNT from all peers, exclude all but VALIDATOR_COUNT from all peers, - // should return all the non-excluded peers. - let exclude: HashSet<_> = fixture - .all_peers - .iter() - .copied() - .take(NON_VALIDATOR_COUNT) - .collect(); - let chosen = choose_gossip_peers( - &mut rng, - TARGET, - VALIDATOR_COUNT, - exclude.clone(), - fixture.all_peers.iter().copied(), - fixture.is_validator_in_era(), - ); - assert_eq!(chosen.len(), VALIDATOR_COUNT); - assert!(exclude.is_disjoint(&chosen)); - - // Choose one from all peers, exclude enough non-validators to have an even chance of - // returning a validator as a non-validator, should return one peer with 50-50 chance of - // being a validator. - let exclude: HashSet<_> = fixture - .non_validators - .iter() - .copied() - .take(NON_VALIDATOR_COUNT - VALIDATOR_COUNT) - .collect(); - let mut got_validator = false; - let mut got_non_validator = false; - let mut attempts = 0; - while !got_validator || !got_non_validator { - let chosen = choose_gossip_peers( - &mut rng, - TARGET, - 1, - exclude.clone(), - fixture.all_peers.iter().copied(), - fixture.is_validator_in_era(), - ); - assert_eq!(chosen.len(), 1); - assert!(exclude.is_disjoint(&chosen)); - let node_id = chosen.iter().next().unwrap(); - got_validator |= fixture.validators.contains(node_id); - got_non_validator |= fixture.non_validators.contains(node_id); - attempts += 1; - assert!(attempts < 1_000_000); + Err(guard) => { + // No ACK received yet, forget, so we don't cancel. + guard.forget(); } } } diff --git a/node/src/components/network/bincode_format.rs b/node/src/components/network/bincode_format.rs deleted file mode 100644 index 0d6e47b344..0000000000 --- a/node/src/components/network/bincode_format.rs +++ /dev/null @@ -1,92 +0,0 @@ -//! Bincode wire format encoder. -//! -//! An encoder for `Bincode` messages with our specific settings pinned. - -use std::{fmt::Debug, io, pin::Pin, sync::Arc}; - -use bincode::{ - config::{ - RejectTrailing, VarintEncoding, WithOtherEndian, WithOtherIntEncoding, WithOtherLimit, - WithOtherTrailing, - }, - Options, -}; -use bytes::{Bytes, BytesMut}; -use serde::{Deserialize, Serialize}; -use tokio_serde::{Deserializer, Serializer}; - -use super::Message; - -/// bincode encoder/decoder for messages. -#[allow(clippy::type_complexity)] -pub struct BincodeFormat( - // Note: `bincode` encodes its options at the type level. The exact shape is determined by - // `BincodeFormat::default()`. - pub(crate) WithOtherTrailing< - WithOtherIntEncoding< - WithOtherEndian< - WithOtherLimit, - bincode::config::LittleEndian, - >, - VarintEncoding, - >, - RejectTrailing, - >, -); - -impl BincodeFormat { - /// Serializes an arbitrary serializable value with the networking bincode serializer. - #[inline] - pub(crate) fn serialize_arbitrary(&self, item: &T) -> io::Result> - where - T: Serialize, - { - self.0 - .serialize(item) - .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err)) - } -} - -impl Debug for BincodeFormat { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str("BincodeFormat") - } -} - -impl Default for BincodeFormat { - fn default() -> Self { - let opts = bincode::options() - .with_no_limit() // We rely on framed tokio transports to impose limits. - .with_little_endian() // Default at the time of this writing, we are merely pinning it. - .with_varint_encoding() // Same as above. - .reject_trailing_bytes(); // There is no reason for us not to reject trailing bytes. - BincodeFormat(opts) - } -} - -impl

Serializer>> for BincodeFormat -where - Message

: Serialize, -{ - type Error = io::Error; - - #[inline] - fn serialize(self: Pin<&mut Self>, item: &Arc>) -> Result { - let msg = &**item; - self.serialize_arbitrary(msg).map(Into::into) - } -} - -impl

Deserializer> for BincodeFormat -where - for<'de> Message

: Deserialize<'de>, -{ - type Error = io::Error; - - #[inline] - fn deserialize(self: Pin<&mut Self>, src: &BytesMut) -> Result, Self::Error> { - self.0 - .deserialize(src) - .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err)) - } -} diff --git a/node/src/components/network/blocklist.rs b/node/src/components/network/blocklist.rs index 5fd28029e1..bc69c1a2ba 100644 --- a/node/src/components/network/blocklist.rs +++ b/node/src/components/network/blocklist.rs @@ -4,15 +4,18 @@ use std::fmt::{self, Display, Formatter}; -use casper_hashing::Digest; use casper_types::EraId; use datasize::DataSize; use serde::Serialize; -use crate::components::{block_accumulator, fetcher::Tag}; +use crate::{ + components::{block_accumulator, fetcher::Tag}, + consensus::ValidationError, + utils::display_error, +}; /// Reasons why a peer was blocked. -#[derive(DataSize, Debug, Serialize)] +#[derive(Clone, DataSize, Debug, Serialize)] pub(crate) enum BlocklistJustification { /// Peer sent incorrect item. SentBadItem { tag: Tag }, @@ -36,24 +39,11 @@ pub(crate) enum BlocklistJustification { SentInvalidConsensusValue { /// The era for which the invalid value was destined. era: EraId, + //// Cause of value invalidity. + cause: ValidationError, }, - /// Too many unasked or expired pongs were sent by the peer. - #[allow(dead_code)] // Disabled as per 1.5.5 for stability reasons. - PongLimitExceeded, /// Peer misbehaved during consensus and is blocked for it. BadConsensusBehavior, - /// Peer is on the wrong network. - WrongNetwork { - /// The network name reported by the peer. - peer_network_name: String, - }, - /// Peer presented the wrong chainspec hash. - WrongChainspecHash { - /// The chainspec hash reported by the peer. - peer_chainspec_hash: Digest, - }, - /// Peer did not present a chainspec hash. - MissingChainspecHash, /// Peer is considered dishonest. DishonestPeer, /// Peer sent too many finality signatures. @@ -74,30 +64,17 @@ impl Display for BlocklistJustification { "sent a finality signature that is invalid or unexpected ({})", error ), - BlocklistJustification::SentInvalidConsensusValue { era } => { - write!(f, "sent an invalid consensus value in {}", era) - } - BlocklistJustification::PongLimitExceeded => { - f.write_str("wrote too many expired or invalid pongs") + BlocklistJustification::SentInvalidConsensusValue { era, cause } => { + write!( + f, + "sent an invalid consensus value in {}: {}", + era, + display_error(cause) + ) } BlocklistJustification::BadConsensusBehavior => { f.write_str("sent invalid data in consensus") } - BlocklistJustification::WrongNetwork { peer_network_name } => write!( - f, - "reported to be on the wrong network ({:?})", - peer_network_name - ), - BlocklistJustification::WrongChainspecHash { - peer_chainspec_hash, - } => write!( - f, - "reported a mismatched chainspec hash ({})", - peer_chainspec_hash - ), - BlocklistJustification::MissingChainspecHash => { - f.write_str("sent handshake without chainspec hash") - } BlocklistJustification::SentBadBlock { error } => { write!(f, "sent a block that is invalid or unexpected ({})", error) } diff --git a/node/src/components/network/chain_info.rs b/node/src/components/network/chain_info.rs index 71e3349aad..5728fb1b5a 100644 --- a/node/src/components/network/chain_info.rs +++ b/node/src/components/network/chain_info.rs @@ -10,26 +10,30 @@ use casper_types::ProtocolVersion; use datasize::DataSize; use super::{ - counting_format::ConnectionId, + connection_id::ConnectionId, message::{ConsensusCertificate, NodeKeyPair}, - Message, + Message, PerChannel, }; -use crate::types::Chainspec; +use crate::types::{chainspec::JulietConfig, Chainspec}; /// Data retained from the chainspec by the networking component. /// /// Typically this information is used for creating handshakes. -#[derive(DataSize, Debug)] +#[derive(Clone, DataSize, Debug)] pub(crate) struct ChainInfo { /// Name of the network we participate in. We only remain connected to peers with the same /// network name as us. pub(super) network_name: String, - /// The maximum message size for a network message, as supplied from the chainspec. - pub(super) maximum_net_message_size: u32, + /// The maximum handshake message size, as supplied from the chainspec. + pub(super) maximum_handshake_message_size: u32, + /// The maximum frame size for network transport, as supplied from the chainspec. + pub maximum_frame_size: u32, /// The protocol version. pub(super) protocol_version: ProtocolVersion, /// The hash of the chainspec. pub(super) chainspec_hash: Digest, + /// The Juliet low-level data. + pub(super) networking_config: PerChannel, } impl ChainInfo { @@ -39,27 +43,27 @@ impl ChainInfo { let network_name = "rust-tests-network"; ChainInfo { network_name: network_name.to_string(), - maximum_net_message_size: 24 * 1024 * 1024, // Hardcoded at 24M. + maximum_handshake_message_size: 1024 * 1024, // Hardcoded at 1MiB. protocol_version: ProtocolVersion::V1_0_0, chainspec_hash: Digest::hash(format!("{}-chainspec", network_name)), + networking_config: Default::default(), + maximum_frame_size: 4096, } } /// Create a handshake based on chain identification data. - pub(super) fn create_handshake

( + pub(super) fn create_handshake( &self, public_addr: SocketAddr, consensus_keys: Option<&NodeKeyPair>, connection_id: ConnectionId, - is_syncing: bool, - ) -> Message

{ - Message::Handshake { + ) -> Message<()> { + Message::<()>::Handshake { network_name: self.network_name.clone(), public_addr, protocol_version: self.protocol_version, consensus_certificate: consensus_keys .map(|key_pair| ConsensusCertificate::create(connection_id, key_pair)), - is_syncing, chainspec_hash: Some(self.chainspec_hash), } } @@ -69,9 +73,11 @@ impl From<&Chainspec> for ChainInfo { fn from(chainspec: &Chainspec) -> Self { ChainInfo { network_name: chainspec.network_config.name.clone(), - maximum_net_message_size: chainspec.network_config.maximum_net_message_size, + maximum_handshake_message_size: chainspec.network_config.maximum_handshake_message_size, protocol_version: chainspec.protocol_version(), chainspec_hash: chainspec.hash(), + networking_config: chainspec.network_config.networking_config, + maximum_frame_size: chainspec.network_config.maximum_frame_size, } } } diff --git a/node/src/components/network/config.rs b/node/src/components/network/config.rs index 217aeaab25..a55bc5bb82 100644 --- a/node/src/components/network/config.rs +++ b/node/src/components/network/config.rs @@ -2,11 +2,11 @@ use std::net::{Ipv4Addr, SocketAddr}; use std::path::PathBuf; -use casper_types::{ProtocolVersion, TimeDiff}; +use casper_types::TimeDiff; use datasize::DataSize; use serde::{Deserialize, Serialize}; -use super::EstimatorWeights; +use super::{conman::Config as ConmanConfig, PerChannel}; /// Default binding address. /// @@ -26,33 +26,41 @@ const DEFAULT_GOSSIP_INTERVAL: TimeDiff = TimeDiff::from_seconds(30); /// Default delay until initial round of address gossiping starts. const DEFAULT_INITIAL_GOSSIP_DELAY: TimeDiff = TimeDiff::from_seconds(5); -/// Default time limit for an address to be in the pending set. -const DEFAULT_MAX_ADDR_PENDING_TIME: TimeDiff = TimeDiff::from_seconds(60); - /// Default timeout during which the handshake needs to be completed. const DEFAULT_HANDSHAKE_TIMEOUT: TimeDiff = TimeDiff::from_seconds(20); +/// Default value for timeout bubbling. +const DEFAULT_BUBBLE_TIMEOUTS: bool = true; + +/// Default value for error timeout. +const DEFAULT_ERROR_TIMEOUT: TimeDiff = TimeDiff::from_seconds(10); + +/// Default value for validator broadcast. +const DEFAULT_USE_VALIDATOR_BROADCAST: bool = true; + +/// Default value for use of mixed gossip. +const DEFAULT_USE_MIXED_GOSSIP: bool = false; + impl Default for Config { fn default() -> Self { Config { bind_address: DEFAULT_BIND_ADDRESS.to_string(), public_address: DEFAULT_PUBLIC_ADDRESS.to_string(), known_addresses: Vec::new(), + keylog_path: None, min_peers_for_initialization: DEFAULT_MIN_PEERS_FOR_INITIALIZATION, gossip_interval: DEFAULT_GOSSIP_INTERVAL, initial_gossip_delay: DEFAULT_INITIAL_GOSSIP_DELAY, - max_addr_pending_time: DEFAULT_MAX_ADDR_PENDING_TIME, handshake_timeout: DEFAULT_HANDSHAKE_TIMEOUT, - max_incoming_peer_connections: 0, - max_outgoing_byte_rate_non_validators: 0, - max_incoming_message_rate_non_validators: 0, - estimator_weights: Default::default(), - tarpit_version_threshold: None, - tarpit_duration: TimeDiff::from_seconds(600), - tarpit_chance: 0.2, - max_in_flight_demands: 50, + send_buffer_size: PerChannel::init_with(|_| None), + ack_timeout: TimeDiff::from_seconds(30), blocklist_retain_duration: TimeDiff::from_seconds(600), identity: None, + conman: Default::default(), + bubble_timeouts: DEFAULT_BUBBLE_TIMEOUTS, + error_timeout: DEFAULT_ERROR_TIMEOUT, + use_validator_broadcast: DEFAULT_USE_VALIDATOR_BROADCAST, + use_mixed_gossip: DEFAULT_USE_MIXED_GOSSIP, } } } @@ -83,32 +91,24 @@ pub struct Config { pub public_address: String, /// Known address of a node on the network used for joining. pub known_addresses: Vec, + /// If set, logs all TLS keys to this file. + pub keylog_path: Option, /// Minimum number of fully-connected peers to consider component initialized. pub min_peers_for_initialization: u16, /// Interval in milliseconds used for gossiping. pub gossip_interval: TimeDiff, /// Initial delay before the first round of gossip. pub initial_gossip_delay: TimeDiff, - /// Maximum allowed time for an address to be kept in the pending set. - pub max_addr_pending_time: TimeDiff, /// Maximum allowed time for handshake completion. pub handshake_timeout: TimeDiff, - /// Maximum number of incoming connections per unique peer. Unlimited if `0`. - pub max_incoming_peer_connections: u16, - /// Maximum number of bytes per second allowed for non-validating peers. Unlimited if 0. - pub max_outgoing_byte_rate_non_validators: u32, - /// Maximum of requests answered from non-validating peers. Unlimited if 0. - pub max_incoming_message_rate_non_validators: u32, - /// Weight distribution for the payload impact estimator. - pub estimator_weights: EstimatorWeights, - /// The protocol version at which (or under) tarpitting is enabled. - pub tarpit_version_threshold: Option, - /// If tarpitting is enabled, duration for which connections should be kept open. - pub tarpit_duration: TimeDiff, - /// The chance, expressed as a number between 0.0 and 1.0, of triggering the tarpit. - pub tarpit_chance: f32, - /// Maximum number of demands for objects that can be in-flight. - pub max_in_flight_demands: u32, + /// An optional buffer size for each Juliet channel, allowing to setup how many messages + /// we can keep in a memory buffer before blocking at call site. + /// + /// If it is not specified, `in_flight_limit * 2` is used as a default. + #[serde(default)] + pub send_buffer_size: PerChannel>, + /// Timeout for completing handling of a message before closing a connection to a peer. + pub ack_timeout: TimeDiff, /// Duration peers are kept on the block list, before being redeemed. pub blocklist_retain_duration: TimeDiff, /// Network identity configuration option. @@ -116,6 +116,17 @@ pub struct Config { /// An identity will be automatically generated when starting up a node if this option is /// unspecified. pub identity: Option, + /// Configuration for the connection manager. + pub conman: ConmanConfig, + /// Whether or not to consider a connection stuck after a single request times out, causing a + /// termination and reconnection. + pub bubble_timeouts: bool, + /// The maximum time a peer is allowed to take to receive a fatal error. + pub error_timeout: TimeDiff, + /// Whether to restrict broadcasts of certain values to validators. + pub use_validator_broadcast: bool, + /// Whether to enable the use of mixed mode gossiping. + pub use_mixed_gossip: bool, } #[cfg(test)] @@ -142,7 +153,11 @@ impl Config { /// Constructs a `Config` suitable for use by the first node of a testnet on a single machine. pub(crate) fn default_local_net_first_node(bind_port: u16) -> Self { - Config::new((TEST_BIND_INTERFACE, bind_port).into()) + Config { + conman: ConmanConfig::default_with_low_timeouts(), + blocklist_retain_duration: TimeDiff::from_seconds(1), + ..Config::new((TEST_BIND_INTERFACE, bind_port).into()) + } } /// Constructs a `Config` suitable for use by a node joining a testnet on a single machine. @@ -154,6 +169,8 @@ impl Config { SocketAddr::from((TEST_BIND_INTERFACE, known_peer_port)).to_string() ], gossip_interval: DEFAULT_TEST_GOSSIP_INTERVAL, + conman: ConmanConfig::default_with_low_timeouts(), + blocklist_retain_duration: TimeDiff::from_seconds(1), ..Default::default() } } diff --git a/node/src/components/network/conman.rs b/node/src/components/network/conman.rs new file mode 100644 index 0000000000..76161899dc --- /dev/null +++ b/node/src/components/network/conman.rs @@ -0,0 +1,1237 @@ +//! Overlay network connection management. +//! +//! The core goal of this module is to allow the node to maintain a connection to other nodes on the +//! network, reconnecting on connection loss and ensuring there is always exactly one [`juliet`] +//! connection between peers. + +// TODO: This module's core design of removing entries on drop is safe, but suboptimal, as it leads +// to a lot of lock contention on drop. A careful redesign might ease this burden. + +// TODO: Consider adding pruning for tables, in case someone is flooding us with bogus addresses. + +use std::{ + collections::{hash_map::Entry, HashMap, HashSet}, + fmt::{self, Debug, Display, Formatter}, + net::SocketAddr, + num::NonZeroUsize, + sync::{Arc, RwLock}, + time::{Duration, Instant}, +}; + +use async_trait::async_trait; +use bytes::Bytes; +use casper_types::{PublicKey, TimeDiff}; +use datasize::DataSize; +use futures::{TryFuture, TryFutureExt}; +use juliet::{ + header::ErrorKind, + rpc::{IncomingRequest, JulietRpcClient, JulietRpcServer, RpcBuilder, RpcServerError}, + ChannelId, Id, +}; +use serde::{Deserialize, Serialize}; +use strum::EnumCount; +use thiserror::Error; +use tokio::{ + io::{ReadHalf, WriteHalf}, + net::{TcpListener, TcpStream}, + sync::{OwnedSemaphorePermit, Semaphore, TryAcquireError}, +}; +use tracing::{ + debug, error, error_span, + field::{self, Empty}, + info, trace, warn, Instrument, Span, +}; + +use crate::{ + types::NodeId, + utils::{display_error, rate_limited::rate_limited, DropSwitch, FlattenResult, ObservableFuse}, +}; + +use super::{ + blocklist::BlocklistJustification, error::ConnectionError, handshake::HandshakeOutcome, + Transport, +}; + +pub(crate) type ConManStateReadLock<'a> = std::sync::RwLockReadGuard<'a, ConManState>; + +type RpcClient = JulietRpcClient<{ super::Channel::COUNT }>; + +type RpcServer = + JulietRpcServer<{ super::Channel::COUNT }, ReadHalf, WriteHalf>; + +/// Connection manager. +/// +/// The connection manager accepts incoming connections and intiates outgoing connections upon +/// learning about new addresses. It also handles reconnections, disambiguation when there is both +/// an incoming and outgoing connection, and back-off timers for connection attempts. +/// +/// `N` is the number of channels by the instantiated `juliet` protocol. +#[derive(Debug)] +pub(crate) struct ConMan { + /// The shared connection manager state, which contains per-peer and per-address information. + ctx: Arc, + /// A fuse used to cancel execution. + /// + /// Causes all background tasks (incoming, outgoing and server) to be shutdown as soon as + /// `ConMan` is dropped. + shutdown: DropSwitch, +} + +#[derive(DataSize, Debug, Copy, Clone, Deserialize, Serialize)] +/// Configuration settings for the connection manager. +pub struct Config { + /// The timeout for a single underlying TCP connection to be established. + tcp_connect_timeout: TimeDiff, + /// Maximum time allowed for TLS setup and handshaking to proceed. + setup_timeout: TimeDiff, + /// How often to reattempt a connection. + /// + /// At one second, 8 attempts means that the last attempt will be delayed for 128 seconds. + #[data_size(skip)] + tcp_connect_attempts: NonZeroUsize, + /// Base delay for the backoff, grows exponentially until `tcp_connect_attempts` maxes out. + tcp_connect_base_backoff: TimeDiff, + /// How long to back off from reconnecting to an address after a failure that indicates a + /// significant problem. + significant_error_backoff: TimeDiff, + /// How long to back off from reconnecting to an address if the error is likely not going to + /// change for a long time. + permanent_error_backoff: TimeDiff, + /// How long to wait before reconnecting when a successful outgoing connection is lost. + successful_reconnect_delay: TimeDiff, + /// The minimum time a connection must have successfully served data to not be seen as flaky. + flaky_connection_threshold: TimeDiff, + /// Number of incoming connections before refusing to accept any new ones. + max_incoming_connections: usize, + /// Number of outgoing connections before stopping to connect to learned addresses. + max_outgoing_connections: usize, +} + +/// Shared information across the connection manager and its subtasks. +struct ConManContext { + /// Shared configuration settings. + cfg: Config, + /// Callback handler for connection setup and incoming request handling. + protocol_handler: Box, + /// Juliet RPC configuration. + rpc_builder: RpcBuilder<{ super::Channel::COUNT }>, + /// The shared state. + state: RwLock, + /// Our own address (for loopback filtering). + public_addr: SocketAddr, + /// Our own node ID. + our_id: NodeId, + /// Limiter for incoming connections. + incoming_limiter: Arc, +} + +/// Share state for [`ConMan`]. +/// +/// Tracks outgoing and incoming connections. +#[derive(Debug, Default)] +pub(crate) struct ConManState { + /// A set of outgoing address for which a handler is currently running. + address_book: HashSet, + /// Mapping of [`SocketAddr`]s to an instant in the future until which they must not be dialed. + do_not_call: HashMap, + /// The current route per node ID. + /// + /// An entry in this table indicates an established connection to a peer. Every entry in this + /// table is controlled by an `OutgoingHandler`, all other access should be read-only. + routing_table: HashMap, + /// A mapping of `NodeId`s to details about their bans. + banlist: HashMap, + /// A mapping of known consensus keys to node IDs. + /// + /// Tracks how a specific validator key is reachable. + key_index: HashMap, NodeId>, +} + +impl ConManState { + /// Returns a reference to the address book of this [`ConManState`]. + pub(crate) fn address_book(&self) -> &HashSet { + &self.address_book + } + + /// Returns a reference to the do not call of this [`ConManState`]. + pub(crate) fn do_not_call(&self) -> &HashMap { + &self.do_not_call + } + + /// Returns a reference to the routing table of this [`ConManState`]. + pub(crate) fn routing_table(&self) -> &HashMap { + &self.routing_table + } + + /// Returns a reference to the banlist of this [`ConManState`]. + pub(crate) fn banlist(&self) -> &HashMap { + &self.banlist + } + + /// Returns a reference to the key index of this [`ConManState`]. + pub(crate) fn key_index(&self) -> &HashMap, NodeId> { + &self.key_index + } +} + +/// Record of punishment for a peers malicious behavior. +#[derive(Debug)] +pub(crate) struct Sentence { + /// Time until the ban is lifted. + pub(crate) until: Instant, + /// Justification for the ban. + pub(crate) justification: BlocklistJustification, +} + +/// Data related to an established connection. +#[derive(Debug)] +pub(crate) struct Route { + /// Node ID of the peer. + pub(crate) peer: NodeId, + /// The established [`juliet`] RPC client that is used to send requests to the peer. + pub(crate) client: RpcClient, + /// The remote address of the peer. + /// + /// For outgoing connections, this will be the peer address we connected to, for incoming ones + /// it is the usually randomly selected outgoing address of the peer. + pub(crate) remote_addr: SocketAddr, + /// The direction of the connection. + /// + /// This is only used for reporting purposes. + pub(crate) direction: Direction, + /// The consensus key the node presented upon handshaking. + // TODO: It may be beneficial to make this not a part of `Route` with a fixed type, to reduce + // coupling (e.g. use a `Route>>` instead, rename to `data`). + pub(crate) consensus_key: Option>, + /// Timestamp recording when this route was created. + pub(crate) since: Instant, +} + +/// An active route that is registered in a routing table. +#[derive(Debug)] +struct ActiveRoute { + /// The context containig the routing table this active route is contained in. + ctx: Arc, + /// The peer ID for which the route is registered. + peer_id: NodeId, + /// Consensus key associated with route. + consensus_key: Option>, +} + +/// External integration. +/// +/// Contains callbacks for transport setup (via [`setup_incoming`] and [`setup_outgoing`]) and +/// handling of actual incoming requests. +#[async_trait] +pub(crate) trait ProtocolHandler: Send + Sync { + /// Sets up an incoming connection. + /// + /// Given a TCP stream of an incoming connection, should setup any higher level transport and + /// perform a handshake. + async fn setup_incoming( + &self, + stream: TcpStream, + ) -> Result; + + /// Sets up an outgoing connection. + /// + /// Given a TCP stream of an outgoing connection, should setup any higher level transport and + /// perform a handshake. + async fn setup_outgoing( + &self, + stream: TcpStream, + ) -> Result; + + /// Process one incoming request. + async fn handle_incoming_request( + &self, + peer: NodeId, + consensus_key: Option<&PublicKey>, + request: IncomingRequest, + ) -> Result<(), String>; +} + +/// The outcome of a handshake performed by the [`ProtocolHandler`]. +pub(crate) struct ProtocolHandshakeOutcome { + /// Peer's `NodeId`. + pub(crate) peer_id: NodeId, + /// The actual handshake outcome. + pub(crate) handshake_outcome: HandshakeOutcome, +} + +/// An error communicated back to a peer. +#[derive(Debug, Deserialize, Error, Serialize)] +enum PeerError { + /// The peer told us we are banned. + #[error("you are banned by a peer: {justification}, left: {time_left:?}")] + YouAreBanned { + /// How long until the ban is lifted. + time_left: Duration, + /// Justification for the ban. + justification: String, + }, + /// Banned for another reason. + #[error("other: {0}")] + Other(String), +} + +impl PeerError { + /// Creates a peer error indicating a peer was banned. + /// + /// # Panics + /// + /// Will panic in debug targets if `now > until`. + #[inline(always)] + fn banned(now: Instant, until: Instant, justification: &BlocklistJustification) -> Self { + debug_assert!(now <= until); + + let time_left = until.checked_duration_since(now).unwrap_or_default(); + + Self::YouAreBanned { + time_left, + justification: justification.to_string(), + } + } + + /// Attempt to deserialize a [`PeerError`] from given bytes. + #[inline(always)] + fn deserialize(raw: &[u8]) -> Option { + bincode::Options::deserialize(super::bincode_config(), raw).ok() + } + + /// Creates a peer error from anything string-adjacent. + #[inline(always)] + fn other(err: E) -> Self { + Self::Other(err.to_string()) + } + + /// Serializes the error. + #[inline(always)] + fn serialize(&self) -> Bytes { + bincode::Options::serialize(super::bincode_config(), self) + .map(Bytes::from) + .map_err(|err| { + error!(%err, "serialization failure when encoding outgoing peer_error"); + err + }) + .ok() + .unwrap_or_else(|| Bytes::from(&b"serialization failure"[..])) + } +} + +impl ProtocolHandshakeOutcome { + /// Registers the handshake outcome on the tracing span, to give context to logs. + /// + /// ## Safety + /// + /// This function MUST NOT be called on the same span more than once; the current + /// `tracing_subscriber` implementation will otherwise multiply log messages. See + /// https://github.com/tokio-rs/tracing/issues/2334#issuecomment-1270751200. for details. + fn record_on(&self, span: Span) { + span.record("peer_id", &field::display(self.peer_id)); + + if let Some(ref public_key) = self.handshake_outcome.peer_consensus_public_key { + span.record("consensus_key", &field::display(public_key)); + } + } +} + +impl ConMan { + /// Create a new connection manager. + /// + /// Immediately spawns a task accepting incoming connections on a tokio task. The task will be + /// stopped if the returned [`ConMan`] is dropped. + pub(crate) fn new( + listener: TcpListener, + public_addr: SocketAddr, + our_id: NodeId, + protocol_handler: Box, + rpc_builder: RpcBuilder<{ super::Channel::COUNT }>, + cfg: Config, + ) -> Self { + let ctx = Arc::new(ConManContext { + cfg, + protocol_handler, + rpc_builder, + state: Default::default(), + public_addr, + our_id, + incoming_limiter: Arc::new(Semaphore::new(cfg.max_incoming_connections)), + }); + + let shutdown = DropSwitch::new(ObservableFuse::new()); + + let server_shutdown = shutdown.inner().clone(); + let server_ctx = ctx.clone(); + + let server = async move { + loop { + // We handle accept errors here, since they can be caused by a temporary resource + // shortage or the remote side closing the connection while it is waiting in + // the queue. + match listener.accept().await { + Ok((stream, peer_addr)) => { + // The span setup is used throughout the entire lifetime of the connection. + let span = + error_span!("incoming", %peer_addr, peer_id=Empty, consensus_key=Empty); + + match server_ctx.incoming_limiter.clone().try_acquire_owned() { + Ok(permit) => server_shutdown.spawn( + handle_incoming( + server_ctx.clone(), + stream, + server_shutdown.clone(), + permit, + ) + .instrument(span), + ), + Err(TryAcquireError::NoPermits) => { + rate_limited!( + EXCEED_INCOMING, + |dropped| warn!(most_recent_skipped=%peer_addr, dropped, "exceeded incoming connection limit, are you getting spammed?") + ); + } + Err(TryAcquireError::Closed) => { + // We may be shutting down. + debug!("incoming limiter semaphore closed"); + } + } + } + + // TODO: Handle resource errors gracefully. In general, two kinds of errors + // occur here: Local resource exhaustion, which should be handled by + // waiting a few milliseconds, or remote connection errors, which can be + // dropped immediately. + // + // The code in its current state will consume 100% CPU if local resource + // exhaustion happens, as no distinction is made and no delay introduced. + Err(ref err) => { + warn!( + ?listener, + err = display_error(err), + "dropping incoming connection during accept" + ) + } + } + } + }; + + shutdown.inner().spawn(server); + + Self { ctx, shutdown } + } + + /// Learns a new address. + /// + /// Will eventually connect to the address, if not overloaded or blocked. + #[inline(always)] + pub(crate) fn learn_addr(&self, peer_addr: SocketAddr) { + self.ctx + .clone() + .learn_addr(peer_addr, self.shutdown.inner().clone()) + } + + /// Bans a peer. + /// + /// The peer will be disconnected from and prevent from reconnecting. + pub(crate) fn ban_peer( + &self, + peer_id: NodeId, + justification: BlocklistJustification, + now: Instant, + until: Instant, + ) { + { + let mut guard = self.ctx.state.write().expect("lock poisoned"); + + rate_limited!( + BANNING_PEER, + |dropped| warn!(%peer_id, %justification, dropped, "banning peer") + ); + + let peer_error = PeerError::banned(now, until, &justification); + + match guard.banlist.entry(peer_id) { + Entry::Occupied(mut occupied) => { + if occupied.get().until > until { + debug!("peer is already serving longer sentence sentence"); + + // Leave as-is, the old sentence is longer. + return; + } + + occupied.insert(Sentence { + until, + justification, + }); + } + Entry::Vacant(vacant) => { + vacant.insert(Sentence { + until, + justification, + }); + } + } + + if let Some(route) = guard.routing_table().get(&peer_id) { + route.client.send_custom_error( + ChannelId::new(0), + Id::new(0), + peer_error.serialize(), + ); + } + } + } + + /// Returns a read lock onto the state of this connection manager. + /// + /// ## Warning + /// + /// Holding the lock for more than a few microseconds is highly discouraged, as it is a + /// non-async read lock that will potentially block a large number of threads (not tasks!) of + /// the tokio runtime. You have been warned! + #[inline] + pub(crate) fn read_state(&self) -> ConManStateReadLock<'_> { + self.ctx.state.read().expect("lock poisoned") + } +} + +impl ConManContext { + /// Informs the system about a potentially new address. + /// + /// Does a preliminary check whether or not a new outgoing handler should be spawn for the + /// supplied `peer_address`. These checks are performed on a read lock to avoid write lock + /// contention, but repeated by the spawned handler (if any are spawned) afterwards to avoid + /// race conditions. + fn learn_addr(self: Arc, peer_addr: SocketAddr, shutdown: ObservableFuse) { + if peer_addr == self.public_addr { + trace!("ignoring loopback address"); + return; + } + + // We have been informed of a new address. Find out if it is new or uncallable. + { + let guard = self.state.read().expect("lock poisoned"); + + let now = Instant::now(); + if guard.should_not_call(&peer_addr, now) { + trace!(%peer_addr, "is on do-not-call list"); + return; + } + + if guard.address_book.contains(&peer_addr) { + // There already exists a handler attempting to connect, exit. + trace!(%peer_addr, "discarding peer address, already has outgoing handler"); + return; + } + + // If we exhausted our address book capacity, discard the address, we will have to wait + // until some active connections time out. + if guard.address_book.len() >= self.cfg.max_outgoing_connections { + rate_limited!( + EXCEED_ADDRESS_BOOK, + |dropped| warn!(most_recent_lost=%peer_addr, dropped, "exceeding maximum number of outgoing connections, you may be getting spammed") + ); + + return; + } + } + + // Our initial check whether or not we can connect was successful, spawn a handler. + let span = error_span!("outgoing", %peer_addr); + trace!(%peer_addr, "learned about address"); + + shutdown.spawn(OutgoingHandler::run(self, peer_addr).instrument(span)); + } + + /// Sets up an instance of the [`juliet`] protocol on a transport returned. + fn setup_juliet(&self, transport: Transport) -> (RpcClient, RpcServer) { + let (read_half, write_half) = tokio::io::split(transport); + self.rpc_builder.build(read_half, write_half) + } +} + +impl ConManState { + /// Determines if an address is on the do-not-call list. + #[inline(always)] + fn should_not_call(&self, addr: &SocketAddr, now: Instant) -> bool { + if let Some(until) = self.do_not_call.get(addr) { + now <= *until + } else { + false + } + } + + /// Unconditionally removes an address from the do-not-call list. + #[inline(always)] + fn prune_should_not_call(&mut self, addr: &SocketAddr) { + self.do_not_call.remove(addr); + } + + /// Determines if a peer is still banned. + /// + /// Returns `None` if the peer is NOT banned, its remaining sentence otherwise. + #[inline(always)] + fn is_still_banned(&mut self, peer: &NodeId, now: Instant) -> Option<&Sentence> { + let sentence = self.banlist.get(peer)?; + + if now < sentence.until { + // Unfortunately it seems we cannot have a lifetime has matches `&self` (for returning + // the sentence), but also is shorter lived than this function's scope, so we can + // reborrow for removal. This is a workaround, retrieving the peer a second time. + return self.banlist.get(peer); + } + + self.banlist.remove(peer); + + None + } +} + +/// Handles an incoming connections. +/// +/// There is no reconnection logic for incoming connection, thus their handling is strictly linear. +async fn handle_incoming( + ctx: Arc, + stream: TcpStream, + shutdown: ObservableFuse, + _permit: OwnedSemaphorePermit, +) { + // Note: Initial errors are too spammable and triggered by foreign services connecting, so we + // restrict them to `info` level. Once a handshake has been completed, we are more + // interested in errors, so they are rate limited warnings. + debug!("handling new connection attempt"); + + // Determine the peer address to store on route. + let Ok(remote_addr) = stream.peer_addr() else { + rate_limited!(INCOMING_PEER_ADDR_FAIL, |dropped| warn!( + dropped, + "failed to retrieve peer address from incoming stream" + )); + return; + }; + + let ProtocolHandshakeOutcome { + peer_id, + handshake_outcome, + } = match tokio::time::timeout( + ctx.cfg.setup_timeout.into(), + ctx.protocol_handler.setup_incoming(stream), + ) + .await + .map_err(|_elapsed| ConnectionError::SetupTimeout) + .flatten_result() + .map(move |outcome| { + outcome.record_on(Span::current()); + outcome + }) { + Ok(outcome) => outcome, + Err(error) => { + debug!(%error, "failed to complete handshake on incoming"); + return; + } + }; + + if peer_id == ctx.our_id { + // Loopback connection established, this should never happen. + error!("should never complete an incoming loopback connection"); + return; + } + + let direction = Direction::determine(ctx.our_id, peer_id); + if direction != Direction::Incoming { + // The connection is supposed to be outgoing from our perspective. + debug!("closing low-ranking incoming connection"); + + // Conserve public address, but drop the stream early, so that when we learn, the + // connection is hopefully already closed. + let public_addr = handshake_outcome.public_addr; + drop(handshake_outcome); + + // Note: This is the original "Magic Mike" functionality. + ctx.learn_addr(public_addr, shutdown.clone()); + + return; + } + + debug!("high-ranking incoming connection established"); + + // At this point, the initial connection negotiation is complete. Setup the `juliet` RPC + // transport, which we will need regardless to send errors. + let (rpc_client, rpc_server) = ctx.setup_juliet(handshake_outcome.transport); + + let active_route = { + let mut guard = ctx.state.write().expect("lock poisoned"); + + // Check if the peer is still banned. If it isn't, ensure the banlist is cleared. + let now = Instant::now(); + if let Some(entry) = guard.is_still_banned(&peer_id, now) { + // Logged at info level - does not require operator intervention usually, but it is nice + // to know. + rate_limited!( + REFUSED_BANNED_PEER, + |dropped| info!(until=?entry.until, justification=%entry.justification, dropped, "peer is still banned") + ); + + let peer_error = PeerError::banned(now, entry.until, &entry.justification); + tokio::spawn(rpc_server.send_custom_error_and_shutdown( + ChannelId::new(0), + Id::new(0), + peer_error.serialize(), + )); + + return; + } + + // Check if there is a route registered, i.e. an incoming handler is already running. + if guard.routing_table.contains_key(&peer_id) { + // We are already connected, meaning we got raced by another connection. Keep + // the existing and exit. + debug!("additional incoming connection ignored"); + return; + } + + ActiveRoute::new( + &mut guard, + ctx.clone(), + peer_id, + rpc_client, + remote_addr, + direction, + handshake_outcome.peer_consensus_public_key, + ) + }; + + info!("now connected via incoming connection"); + match active_route.serve(rpc_server).await { + Ok(()) => { + rate_limited!(INCOMING_CLOSED, |dropped| info!( + dropped, + "connection closed, peer may reconnect" + )); + } + Err(err) => { + // Log a warning if an error occurs on an incoming connection. + rate_limited!( + INCOMING_CLOSED_WITH_ERR, + |dropped| warn!(%err, dropped, "closed incoming connection due to error") + ); + } + } +} + +impl Debug for ConManContext { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.debug_struct("ConManContext") + .field("protocol_handler", &"...") + .field("rpc_builder", &"...") + .field("state", &self.state) + .finish() + } +} + +#[derive(Debug)] +struct OutgoingHandler { + ctx: Arc, + peer_addr: SocketAddr, +} + +#[derive(Debug, Error)] +enum OutgoingError { + #[error("exhausted TCP reconnection attempts")] + ReconnectionAttemptsExhausted(#[source] ConnectionError), + #[error("failed to complete handshake")] + FailedToCompleteHandshake(#[source] ConnectionError), + #[error("loopback encountered")] + LoopbackEncountered, + #[error("should be incoming connection")] + ShouldBeIncoming, + #[error("remote peer is banned")] + EncounteredBannedPeer(Instant), + #[error("RPC server error")] + RpcServerError(RpcServerError), +} + +impl OutgoingHandler { + /// Creates a new outgoing handler. + /// + /// This should be the only method used to create new instances of `OutgoingHandler`, to + /// preserve the invariant of all of them being registered in an address book. + fn new(state: &mut ConManState, arc_ctx: Arc, peer_addr: SocketAddr) -> Self { + state.address_book.insert(peer_addr); + Self { + ctx: arc_ctx, + peer_addr, + } + } + + /// Runs the outgoing handler. + /// + /// Will perform repeated connection attempts to `peer_addr`, controlled by the configuration + /// settings on the context. + /// + /// ## Cancellation safety + /// + /// This function is cancellation safe, specifically the routing table found on `ctx` will + /// always be updated correctly. + async fn run(ctx: Arc, peer_addr: SocketAddr) { + debug!("spawned new outgoing handler"); + + // Check if we should connect at all, then register in address book. + let mut outgoing_handler = { + let mut guard = ctx.state.write().expect("lock poisoned"); + + if guard.address_book.contains(&peer_addr) { + debug!("got raced by another outgoing handler, aborting"); + return; + } + + let now = Instant::now(); + if guard.should_not_call(&peer_addr, now) { + // This should happen very rarely, it requires a racing handler to complete and the + // resulting do-not-call to expire all while this function was starting. + debug!("address turned do-not-call"); + return; + } + guard.prune_should_not_call(&peer_addr); + + Self::new(&mut guard, ctx.clone(), peer_addr) + }; + + // We now enter a connection loop. After attempting to connect and serve, we either sleep + // and repeat the loop, connecting again, or `break` with a do-not-call timer. + let do_not_call_until = loop { + // We need a subspan to avoid duplicate registrations of peer data on retries. + let sub_span = error_span!("connect-and-serve", peer_id = Empty, consensus_key = Empty); + match outgoing_handler + .connect_and_serve() + .instrument(sub_span) + .await + { + Ok(duration) => { + // Regular connection closure, i.e. without an error reported. + + // Judge how long the connection was active. + let delay = if duration > ctx.cfg.flaky_connection_threshold.into() { + rate_limited!(LOST_CONNECTION, |dropped| info!( + dropped, + "lost connection, will reconnect" + )); + ctx.cfg.successful_reconnect_delay + } else { + rate_limited!(LOST_FLAKY_CONNECTION, |dropped| warn!( + dropped, + "lost connection, but its flaky, will reconnect later" + )); + ctx.cfg.significant_error_backoff + }; + + tokio::time::sleep(delay.into()).await; + + // After this, the loop will repeat, triggering a reconnect. + } + Err(OutgoingError::EncounteredBannedPeer(until)) => { + // We will not keep attempting to connect to banned peers, put them on the + // do-not-call list. + break until; + } + Err(OutgoingError::FailedToCompleteHandshake(err)) => { + rate_limited!( + FAILED_HANDSHAKE, + |dropped| info!(%err, dropped, "failed to complete handshake") + ); + break Instant::now() + ctx.cfg.significant_error_backoff.into(); + } + Err(OutgoingError::LoopbackEncountered) => { + info!("found loopback"); + break Instant::now() + ctx.cfg.permanent_error_backoff.into(); + } + Err(OutgoingError::ReconnectionAttemptsExhausted(err)) => { + // We could not connect to the address, so we are going to forget it. + rate_limited!( + RECONNECTION_ATTEMPTS_EXHAUSTED, + |dropped| info!(last_error=%err, dropped, "forgetting address after exhausting reconnection attempts") + ); + return; + } + Err(OutgoingError::RpcServerError(err)) => { + rate_limited!( + RPC_ERROR_ON_OUTGOING, + |dropped| warn!(%err, dropped, "encountered juliet RPC error") + ); + + let delay = reconnect_delay_from_rpc_server_error(&ctx.cfg, &err); + break Instant::now() + delay; + } + Err(OutgoingError::ShouldBeIncoming) => { + // This is "our bad", but the peer has been informed of our address now. + // TODO: When an incoming connection is made (from the peer), consider clearing + // this faster. + debug!("should be incoming connection"); + break Instant::now() + ctx.cfg.permanent_error_backoff.into(); + } + } + }; + + // Update the do-not-call list. + { + let mut guard = ctx.state.write().expect("lock poisoned"); + + if guard.do_not_call.len() >= ctx.cfg.max_outgoing_connections { + rate_limited!(EXCEEDED_DO_NOT_CALL, |dropped| warn!( + most_recent_skipped=%peer_addr, + dropped, + "did not add outgoing address to do-not-call list, already at capacity" + )); + } else { + guard.do_not_call.insert(peer_addr, do_not_call_until); + } + } + } + + /// Performs one iteration of a connection cycle. + /// + /// Will attempt several times to TCP connect, then handshake and establish a connection. If the + /// connection is closed without errors, returns the duration of the connection, otherwise a + /// more specific `Err` is returned. + /// + /// ## Cancellation safety + /// + /// This function is cancellation safe, it willl at worst result in an abrupt termination of the + /// connection (which peers must be able to handle). + async fn connect_and_serve(&mut self) -> Result { + let stream = retry_with_exponential_backoff( + self.ctx.cfg.tcp_connect_attempts, + self.ctx.cfg.tcp_connect_base_backoff.into(), + || connect(self.ctx.cfg.tcp_connect_timeout.into(), self.peer_addr), + ) + .await + .map_err(OutgoingError::ReconnectionAttemptsExhausted)?; + + let ProtocolHandshakeOutcome { + peer_id, + handshake_outcome, + } = tokio::time::timeout( + self.ctx.cfg.setup_timeout.into(), + self.ctx.protocol_handler.setup_outgoing(stream), + ) + .await + .map_err(|_elapsed| { + OutgoingError::FailedToCompleteHandshake(ConnectionError::SetupTimeout) + })? + .map_err(OutgoingError::FailedToCompleteHandshake) + .map(move |outcome| { + outcome.record_on(Span::current()); + outcome + })?; + + if peer_id == self.ctx.our_id { + return Err(OutgoingError::LoopbackEncountered); + } + + let direction = Direction::determine(self.ctx.our_id, peer_id); + if direction != Direction::Outgoing { + return Err(OutgoingError::ShouldBeIncoming); + } + + let (rpc_client, rpc_server) = self.ctx.setup_juliet(handshake_outcome.transport); + + // Update routing and outgoing state. + let active_route = { + let mut guard = self.ctx.state.write().expect("lock poisoned"); + + let now = Instant::now(); + if let Some(entry) = guard.is_still_banned(&peer_id, now) { + debug!(until=?entry.until, justification=%entry.justification, "outgoing connection reached banned peer"); + + // Ensure an error is sent. + let message = PeerError::banned(now, entry.until, &entry.justification); + tokio::spawn(rpc_server.send_custom_error_and_shutdown( + ChannelId::new(0), + Id::new(0), + message.serialize(), + )); + return Err(OutgoingError::EncounteredBannedPeer(entry.until)); + } + + ActiveRoute::new( + &mut guard, + self.ctx.clone(), + peer_id, + rpc_client, + self.peer_addr, + direction, + handshake_outcome.peer_consensus_public_key, + ) + }; + + let serve_start = Instant::now(); + active_route + .serve(rpc_server) + .await + .map_err(OutgoingError::RpcServerError)?; + Ok(Instant::now().duration_since(serve_start)) + } +} + +impl Drop for OutgoingHandler { + fn drop(&mut self) { + // When being dropped, we relinquish exclusive control over the address book entry. + let mut guard = self.ctx.state.write().expect("lock poisoned"); + if !guard.address_book.remove(&self.peer_addr) { + error!("address book should not be modified by anything but outgoing handler"); + } + } +} + +impl ActiveRoute { + /// Creates a new active route by registering it on the given context. + #[inline(always)] + fn new( + state: &mut ConManState, + ctx: Arc, + peer_id: NodeId, + rpc_client: RpcClient, + remote_addr: SocketAddr, + direction: Direction, + consensus_key: Option>, + ) -> Self { + let consensus_key = consensus_key.map(Arc::from); + let route = Route { + peer: peer_id, + client: rpc_client, + remote_addr, + direction, + consensus_key: consensus_key.clone(), + since: Instant::now(), + }; + + if state.routing_table.insert(peer_id, route).is_some() { + error!("should never encounter residual route"); + } + + if let Some(ref ck) = consensus_key { + if let Some(old) = state.key_index.insert(ck.clone(), peer_id) { + rate_limited!( + RESIDUAL_CONSENSUS_KEY, + |dropped| warn!(%old, new=%peer_id, consensus_key=%ck, dropped, "consensus key moved peers while connected") + ); + } + } + + Self { + ctx, + peer_id, + consensus_key, + } + } + + /// Serve data received from an active route. + async fn serve(self, mut rpc_server: RpcServer) -> Result<(), RpcServerError> { + while let Some(request) = rpc_server.next_request().await? { + trace!(%request, "received incoming request"); + let channel = request.channel(); + let id = request.id(); + + if let Err(err) = self + .ctx + .protocol_handler + .handle_incoming_request(self.peer_id, self.consensus_key.as_deref(), request) + .await + { + // The handler return an error, exit and close connection. + rate_limited!( + INCOMING_REQUEST_HANDLING_FAILED, + |dropped| warn!(%err, dropped, "error handling incoming request") + ); + + // Send a string description of the error. This will also cause the connection to be + // torn down eventually, so we do not need to `break` here. + rpc_server.send_custom_error(channel, id, PeerError::other(err).serialize()); + } + } + + // Regular connection closing. + Ok(()) + } +} + +impl Drop for ActiveRoute { + fn drop(&mut self) { + let mut guard = self.ctx.state.write().expect("lock poisoned"); + + if let Some(ref ck) = self.consensus_key { + // Ensure we are removing the same value we put in. + if guard.key_index.get(ck) == Some(&self.peer_id) { + guard.key_index.remove(ck); + } + } + + if guard.routing_table.remove(&self.peer_id).is_none() { + error!("routing table should only be touched by active route"); + } + } +} + +/// Connects to given address. +/// +/// Will cancel the connection attempt once `TCP_CONNECT_TIMEOUT` is hit. +/// +/// ## Cancellation safety +/// +/// This function is cancellation safe, similar to [`TcpStream::connect`]. +async fn connect(timeout: Duration, addr: SocketAddr) -> Result { + tokio::time::timeout(timeout, TcpStream::connect(addr)) + .await + .map_err(|_elapsed| ConnectionError::TcpConnectionTimeout)? + .map_err(ConnectionError::TcpConnection) +} + +/// Retries a given future with an exponential backoff timer between retries. +/// +/// ## Cancellation safety +/// +/// This function is cancellation safe if and only if the returned future `Fut` is cancellation +/// safe. +/// +/// ## Panics +/// +/// Will panic in debug mode if `max_attempts` is 0. +async fn retry_with_exponential_backoff( + max_attempts: NonZeroUsize, + base_backoff: Duration, + mut f: F, +) -> Result<::Ok, ::Error> +where + Fut: TryFuture, + F: FnMut() -> Fut, +{ + let mut failed_attempts: usize = 0; + + loop { + match f().into_future().await { + Ok(v) => return Ok(v), + Err(err) => { + let backoff = 2u32.pow(failed_attempts as u32) * base_backoff; + + failed_attempts += 1; + if failed_attempts >= max_attempts.get() { + return Err(err); + } + + trace!( + failed_attempts, + remaining = max_attempts.get() - failed_attempts, + ?backoff, + "attempt failed, backing off" + ); + + tokio::time::sleep(backoff).await; + } + } + } +} + +/// Calculates a sensible do-not-call-timeout from a given error. +fn reconnect_delay_from_rpc_server_error(cfg: &Config, err: &RpcServerError) -> Duration { + let Some((header, raw)) = err.as_remote_other_err() else { + return cfg.significant_error_backoff.into(); + }; + + if !header.is_error() || header.error_kind() != ErrorKind::Other { + return cfg.significant_error_backoff.into(); + } + + // It's a valid user error with a payload. + let Some(peer_err) = PeerError::deserialize(raw) else { + rate_limited!(RPC_ERROR_OTHER_INVALID_MESSAGE, |dropped| warn!( + dropped, + "failed to deserialize a custom error message" + )); + return cfg.significant_error_backoff.into(); + }; + + match peer_err { + PeerError::YouAreBanned { time_left, .. } => { + time_left.min(cfg.permanent_error_backoff.into()) + } + PeerError::Other(_) => cfg.significant_error_backoff.into(), + } +} + +/// A connection direction. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Serialize)] +#[repr(u8)] +pub(crate) enum Direction { + /// A connection made by a peer, connected back to us. + Incoming, + /// A connection initiated by us, to a peer. + Outgoing, +} + +impl Direction { + #[inline(always)] + pub(crate) fn determine(us: NodeId, them: NodeId) -> Self { + if us > them { + Direction::Outgoing + } else { + Direction::Incoming + } + } +} + +impl Display for Direction { + #[inline(always)] + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + Direction::Incoming => f.write_str("incoming"), + Direction::Outgoing => f.write_str("outgoing"), + } + } +} + +const DEFAULT_TCP_CONNECT_TIMEOUT: TimeDiff = TimeDiff::from_seconds(10); +const DEFAULT_SETUP_TIMEOUT: TimeDiff = TimeDiff::from_seconds(10); +const DEFAULT_TCP_CONNECT_ATTEMPTS: usize = 8; +const DEFAULT_TCP_CONNECT_BASE_BACKOFF: TimeDiff = TimeDiff::from_seconds(1); +const DEFAULT_SIGNIFICANT_ERROR_BACKOFF: TimeDiff = TimeDiff::from_seconds(60); +const DEFAULT_PERMANENT_ERROR_BACKOFF: TimeDiff = TimeDiff::from_seconds(10 * 60); +const DEFAULT_SUCCESSFUL_RECONNECT_DELAY: TimeDiff = TimeDiff::from_seconds(1); +const DEFAULT_FLAKY_CONNECTION_THRESHOLD: TimeDiff = TimeDiff::from_seconds(60); +const DEFAULT_MAX_INCOMING_CONNECTIONS: usize = 10_000; +const DEFAULT_MAX_OUTGOING_CONNECTIONS: usize = 10_000; + +impl Default for Config { + fn default() -> Self { + Self { + tcp_connect_timeout: DEFAULT_TCP_CONNECT_TIMEOUT, + setup_timeout: DEFAULT_SETUP_TIMEOUT, + tcp_connect_attempts: NonZeroUsize::new(DEFAULT_TCP_CONNECT_ATTEMPTS) + .expect("expected non-zero DEFAULT_TCP_CONNECT_ATTEMPTS"), + tcp_connect_base_backoff: DEFAULT_TCP_CONNECT_BASE_BACKOFF, + significant_error_backoff: DEFAULT_SIGNIFICANT_ERROR_BACKOFF, + permanent_error_backoff: DEFAULT_PERMANENT_ERROR_BACKOFF, + flaky_connection_threshold: DEFAULT_FLAKY_CONNECTION_THRESHOLD, + successful_reconnect_delay: DEFAULT_SUCCESSFUL_RECONNECT_DELAY, + max_incoming_connections: DEFAULT_MAX_INCOMING_CONNECTIONS, + max_outgoing_connections: DEFAULT_MAX_OUTGOING_CONNECTIONS, + } + } +} + +#[cfg(test)] +impl Config { + /// Creates a configuration with very low timeouts, suitable for unit testing. + pub(crate) fn default_with_low_timeouts() -> Self { + Self { + tcp_connect_timeout: TimeDiff::from_seconds(3), + setup_timeout: TimeDiff::from_seconds(3), + tcp_connect_base_backoff: TimeDiff::from_millis(10), + significant_error_backoff: TimeDiff::from_seconds(2), + permanent_error_backoff: TimeDiff::from_seconds(2), + successful_reconnect_delay: TimeDiff::from_millis(10), + flaky_connection_threshold: TimeDiff::from_seconds(10), + ..Default::default() + } + } +} diff --git a/node/src/components/network/connection_id.rs b/node/src/components/network/connection_id.rs new file mode 100644 index 0000000000..7def93c00e --- /dev/null +++ b/node/src/components/network/connection_id.rs @@ -0,0 +1,105 @@ +//! Random unique per-connection ID. +//! +//! This module introduces [`ConnectionId`], a unique ID per established connection that can be +//! independently derived by peers on either side of a connection. + +use openssl::ssl::SslRef; +#[cfg(test)] +use rand::RngCore; +use static_assertions::const_assert; + +use casper_hashing::Digest; +#[cfg(test)] +use casper_types::testing::TestRng; + +use super::tls::KeyFingerprint; + +/// An ID identifying a connection. +/// +/// The ID is guaranteed to be the same on both ends of the connection, unique if at least once side +/// of the connection played "by the rules" and generated a proper nonce. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub(crate) struct ConnectionId([u8; Digest::LENGTH]); + +// Invariant assumed by `ConnectionId`, `Digest` must be <= than `KeyFingerprint`. +const_assert!(KeyFingerprint::LENGTH >= Digest::LENGTH); +// We also assume it is at least 12 bytes. +const_assert!(Digest::LENGTH >= 12); + +/// Random data derived from TLS connections. +#[derive(Copy, Clone, Debug)] +pub(super) struct TlsRandomData { + /// Random data extracted from the client of the connection. + digest: Digest, +} + +/// Length of the TLS-derived random data. +const RLEN: usize = 32; + +impl TlsRandomData { + /// Collects random data from an existing SSL collection. + fn collect(ssl: &SslRef) -> Self { + // Both server random and client random are public, we just need ours to be truly random for + // security reasons. + let mut combined_random: [u8; RLEN * 2] = [0; RLEN * 2]; + + // Combine both. Important: Assume an attacker knows one of these ahead of time, due to the + // way TLS handshakes work. + ssl.server_random(&mut combined_random[0..RLEN]); + ssl.client_random(&mut combined_random[RLEN..]); + + Self { + digest: Digest::hash(combined_random), + } + } + + /// Creates random `TlsRandomData`. + #[cfg(test)] + fn random(rng: &mut TestRng) -> Self { + let mut buffer = [0u8; RLEN * 2]; + + rng.fill_bytes(&mut buffer); + + Self { + digest: Digest::hash(buffer), + } + } +} + +impl ConnectionId { + /// Creates a new connection ID, based on random values from server and client and a prefix. + fn create(random_data: TlsRandomData) -> ConnectionId { + // Just to be sure, create a prefix and hash again. + // TODO: Consider replacing with a key derivation function instead. + const PREFIX: &[u8] = b"CONNECTION_ID//"; + const TOTAL_LEN: usize = PREFIX.len() + Digest::LENGTH; + + let mut data = [0; TOTAL_LEN]; + let (data_prefix, data_suffix) = &mut data[..].split_at_mut(PREFIX.len()); + + data_prefix.copy_from_slice(PREFIX); + data_suffix.copy_from_slice(&random_data.digest.value()); + + let id = Digest::hash(data).value(); + + ConnectionId(id) + } + + #[inline] + /// Returns a reference to the raw bytes of the connection ID. + pub(crate) fn as_bytes(&self) -> &[u8] { + &self.0 + } + + /// Creates a new connection ID from an existing SSL connection. + #[inline] + pub(crate) fn from_connection(ssl: &SslRef) -> Self { + Self::create(TlsRandomData::collect(ssl)) + } + + /// Creates a random `ConnectionId`. + #[cfg(test)] + pub(super) fn random(rng: &mut TestRng) -> Self { + ConnectionId::create(TlsRandomData::random(rng)) + } +} diff --git a/node/src/components/network/counting_format.rs b/node/src/components/network/counting_format.rs deleted file mode 100644 index 412633084f..0000000000 --- a/node/src/components/network/counting_format.rs +++ /dev/null @@ -1,380 +0,0 @@ -//! Observability for network serialization/deserialization. -//! -//! This module introduces two IDs: [`ConnectionId`] and [`TraceId`]. The [`ConnectionId`] is a -//! unique ID per established connection that can be independently derive by peers on either of a -//! connection. [`TraceId`] identifies a single message, distinguishing even messages that are sent -//! to the same peer with equal contents. - -use std::{ - convert::TryFrom, - fmt::{self, Display, Formatter}, - pin::Pin, - sync::{Arc, Weak}, -}; - -use bytes::{Bytes, BytesMut}; -use openssl::ssl::SslRef; -use pin_project::pin_project; -#[cfg(test)] -use rand::RngCore; -use static_assertions::const_assert; -use tokio_serde::{Deserializer, Serializer}; -use tracing::{trace, warn}; - -use casper_hashing::Digest; -#[cfg(test)] -use casper_types::testing::TestRng; - -use super::{tls::KeyFingerprint, Message, Metrics, Payload}; -use crate::{types::NodeId, utils}; - -/// Lazily-evaluated network message ID generator. -/// -/// Calculates a hash for the wrapped value when `Display::fmt` is called. -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -struct TraceId([u8; 8]); - -impl Display for TraceId { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - f.write_str(&base16::encode_lower(&self.0)) - } -} - -/// A metric-updating serializer/deserializer wrapper for network messages. -/// -/// Classifies each message given and updates the `NetworkingMetrics` accordingly. Also emits a -/// TRACE-level message to the `net_out` and `net_in` target with a per-message unique hash when -/// a message is sent or received. -#[pin_project] -#[derive(Debug)] -pub struct CountingFormat { - /// The actual serializer performing the work. - #[pin] - inner: F, - /// Identifier for the connection. - connection_id: ConnectionId, - /// Counter for outgoing messages. - out_count: u64, - /// Counter for incoming messages. - in_count: u64, - /// Our role in the connection. - role: Role, - /// Metrics to update. - metrics: Weak, -} - -impl CountingFormat { - /// Creates a new counting formatter. - #[inline] - pub(super) fn new( - metrics: Weak, - connection_id: ConnectionId, - role: Role, - inner: F, - ) -> Self { - Self { - metrics, - connection_id, - out_count: 0, - in_count: 0, - role, - inner, - } - } -} - -impl Serializer>> for CountingFormat -where - F: Serializer>>, - P: Payload, -{ - type Error = F::Error; - - #[inline] - fn serialize(self: Pin<&mut Self>, item: &Arc>) -> Result { - let this = self.project(); - let projection: Pin<&mut F> = this.inner; - - let serialized = F::serialize(projection, item)?; - let msg_size = serialized.len() as u64; - let msg_kind = item.classify(); - Metrics::record_payload_out(this.metrics, msg_kind, msg_size); - - let trace_id = this - .connection_id - .create_trace_id(this.role.out_flag(), *this.out_count); - *this.out_count += 1; - - trace!(target: "net_out", - msg_id = %trace_id, - msg_size, - msg_kind = %msg_kind, "sending"); - - Ok(serialized) - } -} - -impl Deserializer> for CountingFormat -where - F: Deserializer>, - P: Payload, -{ - type Error = F::Error; - - #[inline] - fn deserialize(self: Pin<&mut Self>, src: &BytesMut) -> Result, Self::Error> { - let this = self.project(); - let projection: Pin<&mut F> = this.inner; - - let msg_size = src.len() as u64; - - let deserialized = F::deserialize(projection, src)?; - let msg_kind = deserialized.classify(); - Metrics::record_payload_in(this.metrics, msg_kind, msg_size); - - let trace_id = this - .connection_id - .create_trace_id(this.role.in_flag(), *this.in_count); - *this.in_count += 1; - - trace!(target: "net_in", - msg_id = %trace_id, - msg_size, - msg_kind = %msg_kind, "received"); - - Ok(deserialized) - } -} - -/// An ID identifying a connection. -/// -/// The ID is guaranteed to be the same on both ends of the connection, but not guaranteed to be -/// unique or sufficiently random. Do not use it for any cryptographic/security related purposes. -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub(super) struct ConnectionId([u8; Digest::LENGTH]); - -// Invariant assumed by `ConnectionId`, `Digest` must be <= than `KeyFingerprint`. -const_assert!(KeyFingerprint::LENGTH >= Digest::LENGTH); -// We also assume it is at least 12 bytes. -const_assert!(Digest::LENGTH >= 12); - -/// Random data derived from TLS connections. -#[derive(Copy, Clone, Debug)] -pub(super) struct TlsRandomData { - /// Random data extract from the client of the connection. - combined_random: [u8; 12], -} - -/// Zero-randomness. -/// -/// Used to check random data. -const ZERO_RANDOMNESS: [u8; 12] = [0; 12]; - -impl TlsRandomData { - /// Collects random data from an existing SSL collection. - /// - /// Ideally we would use the TLS session ID, but it is not available on outgoing connections at - /// the times we need it. Instead, we use the `server_random` and `client_random` nonces, which - /// will be the same on both ends of the connection. - fn collect(ssl: &SslRef) -> Self { - // We are using only the first 12 bytes of these 32 byte values here, just in case we missed - // something in our assessment that hashing these should be safe. Additionally, these values - // are XOR'd, not concatenated. All this is done to prevent leaking information about these - // numbers. - // - // Some SSL implementations use timestamps for the first four bytes, so to be sufficiently - // random, we use 4 + 8 bytes of the nonces. - let mut server_random = [0; 12]; - let mut client_random = [0; 12]; - - ssl.server_random(&mut server_random); - - if server_random == ZERO_RANDOMNESS { - warn!("TLS server random is all zeros"); - } - - ssl.client_random(&mut client_random); - - if server_random == ZERO_RANDOMNESS { - warn!("TLS client random is all zeros"); - } - - // Combine using XOR. - utils::xor(&mut server_random, &client_random); - - Self { - combined_random: server_random, - } - } - - /// Creates random `TlsRandomData`. - #[cfg(test)] - fn random(rng: &mut TestRng) -> Self { - let mut buffer = [0u8; 12]; - - rng.fill_bytes(&mut buffer); - - Self { - combined_random: buffer, - } - } -} - -impl ConnectionId { - /// Creates a new connection ID, based on random values from server and client, as well as - /// node IDs. - fn create(random_data: TlsRandomData, our_id: NodeId, their_id: NodeId) -> ConnectionId { - // Hash the resulting random values. - let mut id = Digest::hash(random_data.combined_random).value(); - - // We XOR in a hashes of server and client fingerprint, to ensure that in the case of an - // accidental collision (e.g. when `server_random` and `client_random` turn out to be all - // zeros), we still have a chance of producing a reasonable ID. - utils::xor(&mut id, &our_id.hash_bytes()[0..Digest::LENGTH]); - utils::xor(&mut id, &their_id.hash_bytes()[0..Digest::LENGTH]); - - ConnectionId(id) - } - - /// Creates a new [`TraceID`] based on the message count. - /// - /// The `flag` should be created using the [`Role::in_flag`] or [`Role::out_flag`] method and - /// must be created accordingly (`out_flag` when serializing, `in_flag` when deserializing). - fn create_trace_id(&self, flag: u8, count: u64) -> TraceId { - // Copy the basic network ID. - let mut buffer = self.0; - - // Direction set on first byte. - buffer[0] ^= flag; - - // XOR in message count. - utils::xor(&mut buffer[4..12], &count.to_ne_bytes()); - - // Hash again and truncate. - let full_hash = Digest::hash(buffer); - - // Safe to expect here, as we assert earlier that `Digest` is at least 12 bytes. - let truncated = TryFrom::try_from(&full_hash.value()[0..8]).expect("buffer size mismatch"); - - TraceId(truncated) - } - - #[inline] - /// Returns a reference to the raw bytes of the connection ID. - pub(crate) fn as_bytes(&self) -> &[u8] { - &self.0 - } - - /// Creates a new connection ID from an existing SSL connection. - #[inline] - pub(crate) fn from_connection(ssl: &SslRef, our_id: NodeId, their_id: NodeId) -> Self { - Self::create(TlsRandomData::collect(ssl), our_id, their_id) - } - - /// Creates a random `ConnectionId`. - #[cfg(test)] - pub(super) fn random(rng: &mut TestRng) -> Self { - ConnectionId::create( - TlsRandomData::random(rng), - NodeId::random(rng), - NodeId::random(rng), - ) - } -} - -/// Message sending direction. -#[derive(Copy, Clone, Debug)] -#[repr(u8)] -pub(super) enum Role { - /// Dialer, i.e. initiator of the connection. - Dialer, - /// Listener, acceptor of the connection. - Listener, -} - -impl Role { - /// Returns a flag suitable for hashing incoming messages. - #[inline] - fn in_flag(self) -> u8 { - !(self.out_flag()) - } - - /// Returns a flag suitable for hashing outgoing messages. - #[inline] - fn out_flag(self) -> u8 { - // The magic flag uses 50% of the bits, to be XOR'd into the hash later. - const MAGIC_FLAG: u8 = 0b10101010; - - match self { - Role::Dialer => MAGIC_FLAG, - Role::Listener => !MAGIC_FLAG, - } - } -} - -#[cfg(test)] -mod tests { - use crate::types::NodeId; - - use super::{ConnectionId, Role, TlsRandomData, TraceId}; - - #[test] - fn trace_id_has_16_character() { - let data = [0, 1, 2, 3, 4, 5, 6, 7]; - - let output = format!("{}", TraceId(data)); - - assert_eq!(output.len(), 16); - } - - #[test] - fn can_create_deterministic_trace_id() { - let mut rng = crate::new_rng(); - - // Scenario: Nodes A and B are connecting to each other. Both connections are established. - let node_a = NodeId::random(&mut rng); - let node_b = NodeId::random(&mut rng); - - // We get two connections, with different Tls random data, but it will be the same on both - // ends of the connection. - let a_to_b_random = TlsRandomData::random(&mut rng); - let a_to_b = ConnectionId::create(a_to_b_random, node_a, node_b); - let a_to_b_alt = ConnectionId::create(a_to_b_random, node_b, node_a); - - // Ensure that either peer ends up with the same connection id. - assert_eq!(a_to_b, a_to_b_alt); - - let b_to_a_random = TlsRandomData::random(&mut rng); - let b_to_a = ConnectionId::create(b_to_a_random, node_b, node_a); - let b_to_a_alt = ConnectionId::create(b_to_a_random, node_a, node_b); - assert_eq!(b_to_a, b_to_a_alt); - - // The connection IDs must be distinct though. - assert_ne!(a_to_b, b_to_a); - - // We are only looking at messages sent on the `a_to_b` connection, although from both ends. - // In our example example, `node_a` is the dialing node, `node_b` the listener. - - // Trace ID on A, after sending to B. - let msg_ab_0_on_a = a_to_b.create_trace_id(Role::Dialer.out_flag(), 0); - - // The same message on B. - let msg_ab_0_on_b = a_to_b.create_trace_id(Role::Listener.in_flag(), 0); - - // These trace IDs must match. - assert_eq!(msg_ab_0_on_a, msg_ab_0_on_b); - - // The second message must have a distinct trace ID. - let msg_ab_1_on_a = a_to_b.create_trace_id(Role::Dialer.out_flag(), 1); - let msg_ab_1_on_b = a_to_b.create_trace_id(Role::Listener.in_flag(), 1); - assert_eq!(msg_ab_1_on_a, msg_ab_1_on_b); - assert_ne!(msg_ab_0_on_a, msg_ab_1_on_a); - - // Sending a message on the **same connection** in a **different direction** also must yield - // a different message id. - let msg_ba_0_on_b = a_to_b.create_trace_id(Role::Listener.out_flag(), 0); - let msg_ba_0_on_a = a_to_b.create_trace_id(Role::Dialer.in_flag(), 0); - assert_eq!(msg_ba_0_on_b, msg_ba_0_on_a); - assert_ne!(msg_ba_0_on_b, msg_ab_0_on_b); - } -} diff --git a/node/src/components/network/error.rs b/node/src/components/network/error.rs index 3a4d324676..f749e6e756 100644 --- a/node/src/components/network/error.rs +++ b/node/src/components/network/error.rs @@ -1,6 +1,7 @@ -use std::{error, io, net::SocketAddr, result}; +use std::{io, net::SocketAddr}; use datasize::DataSize; +use juliet::rpc::RpcServerError; use openssl::{error::ErrorStack, ssl}; use serde::Serialize; use thiserror::Error; @@ -8,13 +9,12 @@ use thiserror::Error; use casper_hashing::Digest; use casper_types::{crypto, ProtocolVersion}; +use super::Channel; use crate::{ tls::{LoadCertError, ValidationError}, utils::ResolveAddressError, }; -pub(super) type Result = result::Result; - /// Error type returned by the `Network` component. #[derive(Debug, Error, Serialize)] pub enum Error { @@ -43,20 +43,21 @@ pub enum Error { #[source] io::Error, ), - /// Failed to convert std TCP listener to tokio TCP listener. - #[error("failed to convert listener to tokio")] - ListenerConversion( - #[serde(skip_serializing)] - #[source] - io::Error, - ), /// Could not resolve root node address. - #[error("failed to resolve network address")] + #[error("failed to resolve network address as ipv4")] ResolveAddr( #[serde(skip_serializing)] #[source] ResolveAddressError, ), + /// Could not open the specified keylog file for appending. + #[error("could not open keylog for appending")] + CannotAppendToKeylog( + #[serde(skip_serializing)] + #[source] + io::Error, + ), + /// Instantiating metrics failed. #[error(transparent)] Metrics( @@ -95,7 +96,7 @@ impl DataSize for ConnectionError { } } -/// An error related to an incoming or outgoing connection. +/// An error related to the establishment of an incoming or outgoing connection. #[derive(Debug, Error, Serialize)] pub enum ConnectionError { /// Failed to create TLS acceptor. @@ -112,6 +113,9 @@ pub enum ConnectionError { #[source] io::Error, ), + /// TCP connection did not finish in time. + #[error("TCP connection timeout")] + TcpConnectionTimeout, /// Did not succeed setting TCP_NODELAY on the connection. #[error("Could not set TCP_NODELAY on outgoing connection")] TcpNoDelay( @@ -134,18 +138,10 @@ pub enum ConnectionError { PeerCertificateInvalid(#[source] ValidationError), /// Failed to send handshake. #[error("handshake send failed")] - HandshakeSend( - #[serde(skip_serializing)] - #[source] - IoError, - ), + HandshakeSend(#[source] RawFrameIoError), /// Failed to receive handshake. #[error("handshake receive failed")] - HandshakeRecv( - #[serde(skip_serializing)] - #[source] - IoError, - ), + HandshakeRecv(#[source] RawFrameIoError), /// Peer reported a network name that does not match ours. #[error("peer is on different network: {0}")] WrongNetwork(String), @@ -162,12 +158,15 @@ pub enum ConnectionError { /// Peer did not send any message, or a non-handshake as its first message. #[error("peer did not send handshake")] DidNotSendHandshake, + /// Handshake did not complete in time. + #[error("could not complete handshake in time")] + SetupTimeout, /// Failed to encode our handshake. #[error("could not encode our handshake")] CouldNotEncodeOurHandshake( #[serde(skip_serializing)] #[source] - io::Error, + rmp_serde::encode::Error, ), /// A background sender for our handshake panicked or crashed. /// @@ -183,7 +182,7 @@ pub enum ConnectionError { InvalidRemoteHandshakeMessage( #[serde(skip_serializing)] #[source] - io::Error, + rmp_serde::decode::Error, ), /// The peer sent a consensus certificate, but it was invalid. #[error("invalid consensus certificate")] @@ -192,26 +191,50 @@ pub enum ConnectionError { #[source] crypto::Error, ), - /// Failed to reunite handshake sink/stream. - /// - /// This is usually a bug. - #[error("handshake sink/stream could not be reunited")] - FailedToReuniteHandshakeSinkAndStream, } -/// IO operation that can time out or close. +/// IO error sending a raw frame. +/// +/// Raw frame IO is used only during the handshake, but comes with its own error conditions. +#[derive(Debug, Error, Serialize)] +pub enum RawFrameIoError { + /// Could not send or receive the raw frame. + #[error("io error")] + Io( + #[serde(skip_serializing)] + #[source] + io::Error, + ), + + /// Length limit violation. + #[error("advertised length of {0} exceeds configured maximum raw frame size")] + MaximumLengthExceeded(usize), +} + +/// An error produced by reading messages. #[derive(Debug, Error)] -pub enum IoError -where - E: error::Error + 'static, -{ - /// IO operation timed out. - #[error("io timeout")] - Timeout, - /// Non-timeout IO error. +pub enum MessageReceiverError { + /// The message receival stack returned an error. #[error(transparent)] - Error(#[from] E), - /// Unexpected close/end-of-file. - #[error("closed unexpectedly")] - UnexpectedEof, + ReceiveError(#[from] RpcServerError), + /// Empty request sent. + /// + /// This should never happen with a well-behaved client, since the current protocol always + /// expects a request to carry a payload. + #[error("empty request")] + EmptyRequest, + /// Error deserializing message. + #[error("message deserialization error")] + DeserializationError(bincode::Error), + /// Invalid channel. + #[error("invalid channel: {0}")] + InvalidChannel(u8), + /// Wrong channel for received message. + #[error("received a {got} message on channel {expected}")] + WrongChannel { + /// The channel the message was actually received on. + got: Channel, + /// The channel on which the message should have been sent. + expected: Channel, + }, } diff --git a/node/src/components/network/event.rs b/node/src/components/network/event.rs index 59c34f1b52..5cac14f4c7 100644 --- a/node/src/components/network/event.rs +++ b/node/src/components/network/event.rs @@ -1,19 +1,14 @@ use std::{ fmt::{self, Debug, Display, Formatter}, - io, mem, - net::SocketAddr, - sync::Arc, + mem, }; use derive_more::From; -use futures::stream::{SplitSink, SplitStream}; use serde::Serialize; use static_assertions::const_assert; use tracing::Span; -use casper_types::PublicKey; - -use super::{error::ConnectionError, FullTransport, GossipedAddress, Message, NodeId}; +use super::{GossipedAddress, Message, NodeId, Ticket}; use crate::{ effect::{ announcements::PeerBehaviorAnnouncement, @@ -23,49 +18,26 @@ use crate::{ }; const _NETWORK_EVENT_SIZE: usize = mem::size_of::>(); -const_assert!(_NETWORK_EVENT_SIZE < 65); +const_assert!(_NETWORK_EVENT_SIZE <= 72); /// A network event. #[derive(Debug, From, Serialize)] -pub(crate) enum Event

{ +pub(crate) enum Event

+where + // Note: See notes on the `OutgoingConnection`'s `P: Serialize` trait bound for details. + P: Serialize, +{ Initialize, - /// The TLS handshake completed on the incoming connection. - IncomingConnection { - incoming: Box>, - #[serde(skip)] - span: Span, - }, - /// Received network message. IncomingMessage { peer_id: Box, msg: Box>, #[serde(skip)] span: Span, - }, - - /// Incoming connection closed. - IncomingClosed { - #[serde(skip_serializing)] - result: io::Result<()>, - peer_id: Box, - peer_addr: SocketAddr, - #[serde(skip_serializing)] - span: Box, - }, - - /// A new outgoing connection was successfully established. - OutgoingConnection { - outgoing: Box>, - #[serde(skip_serializing)] - span: Span, - }, - - /// An established connection was terminated. - OutgoingDropped { - peer_id: Box, - peer_addr: SocketAddr, + /// The backpressure-related ticket for the message. + #[serde(skip)] + ticket: Ticket, }, /// Incoming network request. @@ -85,12 +57,12 @@ pub(crate) enum Event

{ /// The node should gossip its own public listening address. GossipOurAddress, + /// Internet metrics should be updated. + SyncMetrics, + /// We received a peer's public listening address via gossip. PeerAddressReceived(GossipedAddress), - /// Housekeeping for the outgoing manager. - SweepOutgoing, - /// Blocklist announcement. #[from] BlocklistAnnouncement(PeerBehaviorAnnouncement), @@ -108,183 +80,29 @@ impl From for Event { } } -impl Display for Event

{ +impl

Display for Event

+where + P: Display + Serialize, +{ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self { Event::Initialize => write!(f, "initialize"), - Event::IncomingConnection { incoming, span: _ } => { - write!(f, "incoming connection: {}", incoming) - } Event::IncomingMessage { peer_id: node_id, msg, span: _, + ticket: _, } => write!(f, "msg from {}: {}", node_id, msg), - Event::IncomingClosed { peer_addr, .. } => { - write!(f, "closed connection from {}", peer_addr) - } - Event::OutgoingConnection { outgoing, span: _ } => { - write!(f, "outgoing connection: {}", outgoing) - } - Event::OutgoingDropped { peer_id, peer_addr } => { - write!(f, "dropped outgoing {} {}", peer_id, peer_addr) - } Event::NetworkRequest { req } => write!(f, "request: {}", req), Event::NetworkInfoRequest { req } => write!(f, "request: {}", req), - Event::GossipOurAddress => write!(f, "gossip our address"), + Event::GossipOurAddress => f.write_str("gossip our address"), + Event::SyncMetrics => f.write_str("sync metrics"), Event::PeerAddressReceived(gossiped_address) => { write!(f, "received gossiped peer address {}", gossiped_address) } Event::BlocklistAnnouncement(ann) => { write!(f, "handling blocklist announcement: {}", ann) } - Event::SweepOutgoing => { - write!(f, "sweep outgoing connections") - } - } - } -} - -/// Outcome of an incoming connection negotiation. -#[derive(Debug, Serialize)] -pub(crate) enum IncomingConnection

{ - /// The connection failed early on, before even a peer's [`NodeId`] could be determined. - FailedEarly { - /// Remote port the peer dialed us from. - peer_addr: SocketAddr, - /// Error causing the failure. - error: ConnectionError, - }, - /// Connection failed after TLS was successfully established; thus we have a valid [`NodeId`]. - Failed { - /// Remote port the peer dialed us from. - peer_addr: SocketAddr, - /// Peer's [`NodeId`]. - peer_id: NodeId, - /// Error causing the failure. - error: ConnectionError, - }, - /// Connection turned out to be a loopback connection. - Loopback, - /// Connection successfully established. - Established { - /// Remote port the peer dialed us from. - peer_addr: SocketAddr, - /// Public address advertised by the peer. - public_addr: SocketAddr, - /// Peer's [`NodeId`]. - peer_id: NodeId, - /// The public key the peer is validating with, if any. - peer_consensus_public_key: Option, - /// Stream of incoming messages. for incoming connections. - #[serde(skip_serializing)] - stream: SplitStream>, - }, -} - -impl

Display for IncomingConnection

{ - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - match self { - IncomingConnection::FailedEarly { peer_addr, error } => { - write!(f, "early failure from {}: {}", peer_addr, error) - } - IncomingConnection::Failed { - peer_addr, - peer_id, - error, - } => write!(f, "failure from {}/{}: {}", peer_addr, peer_id, error), - IncomingConnection::Loopback => f.write_str("loopback"), - IncomingConnection::Established { - peer_addr, - public_addr, - peer_id, - peer_consensus_public_key, - stream: _, - } => { - write!( - f, - "connection established from {}/{}; public: {}", - peer_addr, peer_id, public_addr - )?; - - if let Some(public_key) = peer_consensus_public_key { - write!(f, " [{}]", public_key) - } else { - f.write_str(" [no validator id]") - } - } - } - } -} - -/// Outcome of an outgoing connection attempt. -#[derive(Debug, Serialize)] -pub(crate) enum OutgoingConnection

{ - /// The outgoing connection failed early on, before a peer's [`NodeId`] could be determined. - FailedEarly { - /// Address that was dialed. - peer_addr: SocketAddr, - /// Error causing the failure. - error: ConnectionError, - }, - /// Connection failed after TLS was successfully established; thus we have a valid [`NodeId`]. - Failed { - /// Address that was dialed. - peer_addr: SocketAddr, - /// Peer's [`NodeId`]. - peer_id: NodeId, - /// Error causing the failure. - error: ConnectionError, - }, - /// Connection turned out to be a loopback connection. - Loopback { peer_addr: SocketAddr }, - /// Connection successfully established. - Established { - /// Address that was dialed. - peer_addr: SocketAddr, - /// Peer's [`NodeId`]. - peer_id: NodeId, - /// The public key the peer is validating with, if any. - peer_consensus_public_key: Option, - /// Sink for outgoing messages. - #[serde(skip_serializing)] - sink: SplitSink, Arc>>, - /// Holds the information whether the remote node is syncing. - is_syncing: bool, - }, -} - -impl

Display for OutgoingConnection

{ - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - match self { - OutgoingConnection::FailedEarly { peer_addr, error } => { - write!(f, "early failure to {}: {}", peer_addr, error) - } - OutgoingConnection::Failed { - peer_addr, - peer_id, - error, - } => write!(f, "failure to {}/{}: {}", peer_addr, peer_id, error), - OutgoingConnection::Loopback { peer_addr } => write!(f, "loopback to {}", peer_addr), - OutgoingConnection::Established { - peer_addr, - peer_id, - peer_consensus_public_key, - sink: _, - is_syncing, - } => { - write!( - f, - "connection established to {}/{}, is_syncing: {}", - peer_addr, peer_id, is_syncing - )?; - - if let Some(public_key) = peer_consensus_public_key { - write!(f, " [{}]", public_key) - } else { - f.write_str(" [no validator id]") - } - } } } } diff --git a/node/src/components/network/gossiped_address.rs b/node/src/components/network/gossiped_address.rs index ade3ac93b0..286c29979b 100644 --- a/node/src/components/network/gossiped_address.rs +++ b/node/src/components/network/gossiped_address.rs @@ -55,15 +55,3 @@ impl From for SocketAddr { gossiped_address.0 } } - -mod specimen_support { - use crate::utils::specimen::{Cache, LargestSpecimen, SizeEstimator}; - - use super::GossipedAddress; - - impl LargestSpecimen for GossipedAddress { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - GossipedAddress::new(LargestSpecimen::largest_specimen(estimator, cache)) - } - } -} diff --git a/node/src/components/network/handshake.rs b/node/src/components/network/handshake.rs new file mode 100644 index 0000000000..764d6be054 --- /dev/null +++ b/node/src/components/network/handshake.rs @@ -0,0 +1,235 @@ +//! Handshake handling for `small_network`. +//! +//! The handshake differs from the rest of the networking code since it is (almost) unmodified since +//! version 1.0, to allow nodes to make informed decisions about blocking other nodes. +//! +//! This module contains an implementation for a minimal framing format based on 32-bit fixed size +//! big endian length prefixes. + +use std::net::SocketAddr; + +use casper_types::PublicKey; + +use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; + +use serde::{de::DeserializeOwned, Serialize}; +use tracing::debug; + +use super::{ + chain_info::ChainInfo, + connection_id::ConnectionId, + error::{ConnectionError, RawFrameIoError}, + message::NodeKeyPair, + Message, Transport, +}; + +/// The outcome of the handshake process. +pub(crate) struct HandshakeOutcome { + /// A framed transport for peer. + pub(crate) transport: Transport, + /// Public address advertised by the peer. + pub(crate) public_addr: SocketAddr, + /// The public key the peer is validating with, if any. + pub(crate) peer_consensus_public_key: Option>, +} + +/// Reads a 32 byte big endian integer prefix, followed by an actual raw message. +async fn read_length_prefixed_frame( + max_length: u32, + stream: &mut R, +) -> Result, RawFrameIoError> +where + R: AsyncRead + Unpin, +{ + let mut length_prefix_raw: [u8; 4] = [0; 4]; + stream + .read_exact(&mut length_prefix_raw) + .await + .map_err(RawFrameIoError::Io)?; + + let length = u32::from_be_bytes(length_prefix_raw); + + if length > max_length { + return Err(RawFrameIoError::MaximumLengthExceeded(length as usize)); + } + + let mut raw = Vec::new(); // not preallocating, to make DOS attacks harder. + + // We can now read the raw frame and return. + stream + .take(length as u64) + .read_to_end(&mut raw) + .await + .map_err(RawFrameIoError::Io)?; + + Ok(raw) +} + +/// Writes data to an async writer, prefixing it with the 32 bytes big endian message length. +/// +/// Output will be flushed after sending. +async fn write_length_prefixed_frame(stream: &mut W, data: &[u8]) -> Result<(), RawFrameIoError> +where + W: AsyncWrite + Unpin, +{ + if data.len() > u32::MAX as usize { + return Err(RawFrameIoError::MaximumLengthExceeded(data.len())); + } + + async move { + stream.write_all(&(data.len() as u32).to_be_bytes()).await?; + stream.write_all(data).await?; + stream.flush().await?; + Ok(()) + } + .await + .map_err(RawFrameIoError::Io)?; + + Ok(()) +} + +/// Serializes an item with the encoding settings specified for handshakes. +pub(crate) fn serialize(item: &T) -> Result, rmp_serde::encode::Error> +where + T: Serialize, +{ + rmp_serde::to_vec(item) +} + +/// Deserialize an item with the encoding settings specified for handshakes. +pub(crate) fn deserialize(raw: &[u8]) -> Result +where + T: DeserializeOwned, +{ + rmp_serde::from_slice(raw) +} + +/// Data necessary to perform a handshake. +#[derive(Debug)] +pub(crate) struct HandshakeConfiguration { + /// Chain info extract from chainspec. + chain_info: ChainInfo, + /// Optional set of signing keys, to identify as a node during handshake. + node_key_pair: Option, + /// Our own public listening address. + public_addr: SocketAddr, +} + +impl HandshakeConfiguration { + /// Creates a new handshake configuration. + pub(crate) fn new( + chain_info: ChainInfo, + node_key_pair: Option, + public_addr: SocketAddr, + ) -> Self { + Self { + chain_info, + node_key_pair, + public_addr, + } + } + + /// Performs a handshake. + /// + /// This function is cancellation safe. + pub(crate) async fn negotiate_handshake( + &self, + transport: Transport, + ) -> Result { + let connection_id = ConnectionId::from_connection(transport.ssl()); + + // Manually encode a handshake. + let handshake_message = self.chain_info.create_handshake( + self.public_addr, + self.node_key_pair.as_ref(), + connection_id, + ); + + let serialized_handshake_message = + serialize(&handshake_message).map_err(ConnectionError::CouldNotEncodeOurHandshake)?; + + // To ensure we are not dead-locking, we split the transport here and send the handshake in + // a background task before awaiting one ourselves. This ensures we can make progress + // regardless of the size of the outgoing handshake. + let (mut read_half, mut write_half) = tokio::io::split(transport); + + // TODO: This need not be spawned, but could be a local futures unordered. + let handshake_send = tokio::spawn(async move { + write_length_prefixed_frame(&mut write_half, &serialized_handshake_message).await?; + Ok::<_, RawFrameIoError>(write_half) + }); + + // The remote's message should be a handshake, but can technically be any message. We + // receive, deserialize and check it. + let remote_message_raw = read_length_prefixed_frame( + self.chain_info.maximum_handshake_message_size, + &mut read_half, + ) + .await + .map_err(ConnectionError::HandshakeRecv)?; + + // Ensure the handshake was sent correctly. + let write_half = handshake_send + .await + .map_err(ConnectionError::HandshakeSenderCrashed)? + .map_err(ConnectionError::HandshakeSend)?; + + let remote_message: Message<()> = deserialize(&remote_message_raw) + .map_err(ConnectionError::InvalidRemoteHandshakeMessage)?; + + if let Message::<()>::Handshake { + network_name, + public_addr, + protocol_version, + consensus_certificate, + chainspec_hash, + } = remote_message + { + debug!(%protocol_version, "handshake received"); + + // The handshake was valid, we can check the network name. + if network_name != self.chain_info.network_name { + return Err(ConnectionError::WrongNetwork(network_name)); + } + + // If there is a version mismatch, we treat it as a connection error. We do not ban + // peers for this error, but instead rely on exponential backoff, as bans would result + // in issues during upgrades where nodes may have a legitimate reason for differing + // versions. + // + // Since we are not using SemVer for versioning, we cannot make any assumptions about + // compatibility, so we allow only exact version matches. + if protocol_version != self.chain_info.protocol_version { + return Err(ConnectionError::IncompatibleVersion(protocol_version)); + } + + // We check the chainspec hash to ensure peer is using the same chainspec as us. + // The remote message should always have a chainspec hash at this point since + // we checked the protocol version previously. + let peer_chainspec_hash = + chainspec_hash.ok_or(ConnectionError::MissingChainspecHash)?; + if peer_chainspec_hash != self.chain_info.chainspec_hash { + return Err(ConnectionError::WrongChainspecHash(peer_chainspec_hash)); + } + + let peer_consensus_public_key = consensus_certificate + .map(|cert| { + cert.validate(connection_id) + .map_err(ConnectionError::InvalidConsensusCertificate) + }) + .transpose()? + .map(Box::new); + + let transport = read_half.unsplit(write_half); + + Ok(HandshakeOutcome { + transport, + public_addr, + peer_consensus_public_key, + }) + } else { + // Received a non-handshake, this is an error. + Err(ConnectionError::DidNotSendHandshake) + } + } +} diff --git a/node/src/components/network/health.rs b/node/src/components/network/health.rs deleted file mode 100644 index 18d018f12e..0000000000 --- a/node/src/components/network/health.rs +++ /dev/null @@ -1,825 +0,0 @@ -//! Health-check state machine. -//! -//! Health checks perform periodic pings to remote peers to ensure the connection is still alive. It -//! has somewhat complicated logic that is encoded in the `ConnectionHealth` struct, which has -//! multiple implicit states. - -use std::{ - fmt::{self, Display, Formatter}, - time::{Duration, Instant}, -}; - -use datasize::DataSize; -use rand::Rng; -use serde::{Deserialize, Serialize}; - -use crate::utils::specimen::{Cache, LargestSpecimen, SizeEstimator}; - -/// Connection health information. -/// -/// All data related to the ping/pong functionality used to verify a peer's networking liveness. -#[derive(Clone, Copy, DataSize, Debug)] -pub(crate) struct ConnectionHealth { - /// The moment the connection was established. - pub(crate) connected_since: Instant, - /// The last ping that was requested to be sent. - pub(crate) last_ping_sent: Option, - /// The most recent pong received. - pub(crate) last_pong_received: Option, - /// Number of invalid pongs received, reset upon receiving a valid pong. - pub(crate) invalid_pong_count: u32, - /// Number of pings that timed out. - pub(crate) ping_timeouts: u32, -} - -/// Health check configuration. -#[derive(DataSize, Debug)] -pub(crate) struct HealthConfig { - /// How often to send a ping to ensure a connection is established. - /// - /// Determines how soon after connecting or a successful ping another ping is sent. - pub(crate) ping_interval: Duration, - /// Duration during which a ping must succeed to be considered successful. - pub(crate) ping_timeout: Duration, - /// Number of retries before giving up and disconnecting a peer due to too many failed pings. - pub(crate) ping_retries: u16, - /// How many spurious pongs to tolerate before banning a peer. - pub(crate) pong_limit: u32, -} - -/// A timestamp with an associated nonce. -#[derive(Clone, Copy, DataSize, Debug)] -pub(crate) struct TaggedTimestamp { - /// The actual timestamp. - timestamp: Instant, - /// The nonce of the timestamp. - nonce: Nonce, -} - -impl TaggedTimestamp { - /// Creates a new tagged timestamp with a random nonce. - pub(crate) fn new(rng: &mut R, timestamp: Instant) -> Self { - Self { - timestamp, - nonce: rng.gen(), - } - } - - /// Creates a new tagged timestamp from parts. - pub(crate) fn from_parts(timestamp: Instant, nonce: Nonce) -> Self { - TaggedTimestamp { nonce, timestamp } - } - - /// Returns the actual timestamp. - pub(crate) fn timestamp(&self) -> Instant { - self.timestamp - } - - /// Returns the nonce inside the timestamp. - pub(crate) fn nonce(self) -> Nonce { - self.nonce - } -} - -/// A number-used-once, specifically one used in pings. -// Note: This nonce used to be a `u32`, but that is too small - since we immediately disconnect when -// a duplicate ping is generated, a `u32` has a ~ 1/(2^32) chance of a consecutive collision. -// -// If we ping every 5 seconds, this is a ~ 0.01% chance over a month, which is too high over -// thousands over nodes. At 64 bits, in theory the upper bound is 0.0000000002%, which is -// better (the period of the RNG used should be >> 64 bits). -// -// While we do check for consecutive ping nonces being generated, we still like the lower -// collision chance for repeated pings being sent. -#[derive(Clone, Copy, DataSize, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)] -pub(crate) struct Nonce(u64); - -impl Display for Nonce { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!(f, "{:016X}", self.0) - } -} - -impl rand::distributions::Distribution for rand::distributions::Standard { - #[inline(always)] - fn sample(&self, rng: &mut R) -> Nonce { - Nonce(rng.gen()) - } -} - -impl ConnectionHealth { - /// Creates a new connection health instance, recording when the connection was established. - pub(crate) fn new(connected_since: Instant) -> Self { - Self { - connected_since, - last_ping_sent: None, - last_pong_received: None, - invalid_pong_count: 0, - ping_timeouts: 0, - } - } -} - -impl ConnectionHealth { - /// Calculate the round-trip time, if possible. - pub(crate) fn calc_rrt(&self) -> Option { - match (self.last_ping_sent, self.last_pong_received) { - (Some(last_ping), Some(last_pong)) if last_ping.nonce == last_pong.nonce => { - Some(last_pong.timestamp.duration_since(last_ping.timestamp)) - } - _ => None, - } - } - - /// Check current health status. - /// - /// This function must be polled periodically and returns a potential action to be performed. - pub(crate) fn update_health( - &mut self, - rng: &mut R, - cfg: &HealthConfig, - now: Instant, - ) -> HealthCheckOutcome { - // Having received too many pongs should always result in a disconnect. - if self.invalid_pong_count > cfg.pong_limit { - return HealthCheckOutcome::GiveUp; - } - - // Our honeymoon period is from first establishment of the connection until we send a ping. - if now.saturating_duration_since(self.connected_since) < cfg.ping_interval { - return HealthCheckOutcome::DoNothing; - } - - let send_ping = match self.last_ping_sent { - Some(last_ping) => { - match self.last_pong_received { - Some(prev_pong) if prev_pong.nonce() == last_ping.nonce() => { - // Normal operation. The next ping should be sent in a regular interval - // after receiving the last pong. - now >= prev_pong.timestamp() + cfg.ping_interval - } - - _ => { - // No matching pong on record. Check if we need to timeout the ping. - if now >= last_ping.timestamp() + cfg.ping_timeout { - self.ping_timeouts += 1; - // Clear the `last_ping_sent`, schedule another to be sent. - self.last_ping_sent = None; - true - } else { - false - } - } - } - } - None => true, - }; - - if send_ping { - if self.ping_timeouts > cfg.ping_retries as u32 { - // We have exceeded the timeouts and will give up as a result. - return HealthCheckOutcome::GiveUp; - } - - let ping = loop { - let candidate = TaggedTimestamp::new(rng, now); - - if let Some(prev) = self.last_ping_sent { - if prev.nonce() == candidate.nonce() { - // Ensure we don't produce consecutive pings. - continue; - } - } - - break candidate; - }; - - self.last_ping_sent = Some(ping); - HealthCheckOutcome::SendPing(ping.nonce()) - } else { - HealthCheckOutcome::DoNothing - } - } - - /// Records a pong that has been sent. - /// - /// If `true`, the maximum number of pongs has been exceeded and the peer should be banned. - pub(crate) fn record_pong(&mut self, cfg: &HealthConfig, tt: TaggedTimestamp) -> bool { - let is_valid_pong = match self.last_ping_sent { - Some(last_ping) if last_ping.nonce() == tt.nonce => { - // Check if we already received a pong for this ping, which is a protocol violation. - if self - .last_pong_received - .map(|existing| existing.nonce() == tt.nonce) - .unwrap_or(false) - { - // Ping is a collsion, ban. - return true; - } - - if last_ping.timestamp() > tt.timestamp() { - // Ping is from the past somehow, ignore it (probably a bug on our side). - return false; - } - - // The ping is valid if it is within the timeout period. - last_ping.timestamp() + cfg.ping_timeout >= tt.timestamp() - } - _ => { - // Either the nonce did not match, or the nonce mismatched. - false - } - }; - - if is_valid_pong { - // Our pong is valid, reset invalid and ping count, then record it. - self.invalid_pong_count = 0; - self.ping_timeouts = 0; - self.last_pong_received = Some(tt); - false - } else { - self.invalid_pong_count += 1; - // If we have exceeded the invalid pong limit, ban. - self.invalid_pong_count > cfg.pong_limit - } - } -} - -/// The outcome of periodic health check. -#[derive(Clone, Copy, Debug)] - -pub(crate) enum HealthCheckOutcome { - /// Do nothing, as we recently took action. - DoNothing, - /// Send a ping with the given nonce. - SendPing(Nonce), - /// Give up on (i.e. terminate) the connection, as we exceeded the allowable ping limit. - GiveUp, -} - -impl LargestSpecimen for Nonce { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - Self(LargestSpecimen::largest_specimen(estimator, cache)) - } -} - -#[cfg(test)] -mod tests { - use std::{collections::HashSet, time::Duration}; - - use assert_matches::assert_matches; - use rand::Rng; - - use super::{ConnectionHealth, HealthCheckOutcome, HealthConfig}; - use crate::{ - components::network::health::TaggedTimestamp, testing::test_clock::TestClock, - types::NodeRng, - }; - - impl HealthConfig { - pub(crate) fn test_config() -> Self { - // Note: These values are assumed in tests, so do not change them. - HealthConfig { - ping_interval: Duration::from_secs(5), - ping_timeout: Duration::from_secs(2), - ping_retries: 3, - pong_limit: 6, - } - } - } - - struct Fixtures { - clock: TestClock, - cfg: HealthConfig, - rng: NodeRng, - health: ConnectionHealth, - } - - /// Sets up fixtures used in almost every test. - fn fixtures() -> Fixtures { - let clock = TestClock::new(); - let cfg = HealthConfig::test_config(); - let rng = crate::new_rng(); - - let health = ConnectionHealth::new(clock.now()); - - Fixtures { - clock, - cfg, - rng, - health, - } - } - - #[test] - fn scenario_no_response() { - let Fixtures { - mut clock, - cfg, - mut rng, - mut health, - } = fixtures(); - - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::DoNothing - ); - - // Repeated checks should not change the outcome. - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::DoNothing - ); - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::DoNothing - ); - - // After 4.9 seconds, we still do not send a ping. - clock.advance(Duration::from_millis(4900)); - - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::DoNothing - ); - - // At 5, we expect our first ping. - clock.advance(Duration::from_millis(100)); - - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(_) - ); - - // Checking health again should not result in another ping. - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::DoNothing - ); - - clock.advance(Duration::from_millis(100)); - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::DoNothing - ); - - // After two seconds, we expect another ping to be sent, due to timeouts. - clock.advance(Duration::from_millis(2000)); - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(_) - ); - - // At this point, two pings have been sent. Configuration says to retry 3 times, so a total - // of five pings is expected. - clock.advance(Duration::from_millis(2000)); - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(_) - ); - - clock.advance(Duration::from_millis(2000)); - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(_) - ); - - // Finally, without receiving a ping at all, we give up. - clock.advance(Duration::from_millis(2000)); - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::GiveUp - ); - } - - #[test] - fn pings_use_different_nonces() { - let Fixtures { - mut clock, - cfg, - mut rng, - mut health, - } = fixtures(); - - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::DoNothing - ); - clock.advance(Duration::from_secs(5)); - - let mut nonce_set = HashSet::new(); - - nonce_set.insert(assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(nonce) => nonce - )); - clock.advance(Duration::from_secs(2)); - - nonce_set.insert(assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(nonce) => nonce - )); - clock.advance(Duration::from_secs(2)); - - nonce_set.insert(assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(nonce) => nonce - )); - clock.advance(Duration::from_secs(2)); - - nonce_set.insert(assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(nonce) => nonce - )); - - // Since it is a set, we expect less than 4 items if there were any duplicates. - assert_eq!(nonce_set.len(), 4); - } - - #[test] - fn scenario_all_working() { - let Fixtures { - mut clock, - cfg, - mut rng, - mut health, - } = fixtures(); - - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::DoNothing - ); - - // At 5 seconds, we expect our first ping. - clock.advance(Duration::from_secs(5)); - - let nonce_1 = assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(nonce) => nonce - ); - - // Record a reply 500 ms later. - clock.advance(Duration::from_millis(500)); - assert!(!health.record_pong(&cfg, TaggedTimestamp::from_parts(clock.now(), nonce_1))); - - // Our next pong should be 5 seconds later, not 4.5. - clock.advance(Duration::from_millis(4500)); - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::DoNothing - ); - clock.advance(Duration::from_millis(500)); - - let nonce_2 = assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(nonce) => nonce - ); - - // We test an edge case here where we use the same timestamp for the received pong. - clock.advance(Duration::from_millis(500)); - assert!(!health.record_pong(&cfg, TaggedTimestamp::from_parts(clock.now(), nonce_2))); - - // Afterwards, no ping should be sent. - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::DoNothing - ); - - // Do 1000 additional ping/pongs. - for _ in 0..1000 { - clock.advance(Duration::from_millis(5000)); - let nonce = assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(nonce) => nonce - ); - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::DoNothing - ); - - clock.advance(Duration::from_millis(250)); - assert!(!health.record_pong(&cfg, TaggedTimestamp::from_parts(clock.now(), nonce))); - - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::DoNothing - ); - } - } - - #[test] - fn scenario_intermittent_failures() { - let Fixtures { - mut clock, - cfg, - mut rng, - mut health, - } = fixtures(); - - // We miss two pings initially, before recovering. - clock.advance(Duration::from_secs(5)); - - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(_) - ); - - clock.advance(Duration::from_secs(2)); - - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(_) - ); - - clock.advance(Duration::from_secs(2)); - - let nonce_1 = assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(nonce) => nonce - ); - - clock.advance(Duration::from_secs(1)); - assert!(!health.record_pong(&cfg, TaggedTimestamp::from_parts(clock.now(), nonce_1))); - - // We successfully "recovered", this should reset our ping counts. Miss three pings before - // successfully receiving a pong from 4th from here on out. - clock.advance(Duration::from_millis(5500)); - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(_) - ); - clock.advance(Duration::from_millis(2500)); - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(_) - ); - clock.advance(Duration::from_millis(2500)); - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(_) - ); - clock.advance(Duration::from_millis(2500)); - let nonce_2 = assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(nonce) => nonce - ); - clock.advance(Duration::from_millis(500)); - assert!(!health.record_pong(&cfg, TaggedTimestamp::from_parts(clock.now(), nonce_2))); - - // This again should reset. We miss four more pings and are disconnected. - clock.advance(Duration::from_millis(5500)); - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(_) - ); - clock.advance(Duration::from_millis(2500)); - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(_) - ); - clock.advance(Duration::from_millis(2500)); - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(_) - ); - clock.advance(Duration::from_millis(2500)); - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(_) - ); - clock.advance(Duration::from_millis(2500)); - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::GiveUp - ); - } - - #[test] - fn ignores_unwanted_pongs() { - let Fixtures { - mut clock, - cfg, - mut rng, - mut health, - } = fixtures(); - - clock.advance(Duration::from_secs(5)); - - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(_) - ); - - // Make the `ConnectionHealth` receive some unasked pongs, without exceeding the unasked - // pong limit. - assert!(!health.record_pong(&cfg, TaggedTimestamp::from_parts(clock.now(), rng.gen()))); - assert!(!health.record_pong(&cfg, TaggedTimestamp::from_parts(clock.now(), rng.gen()))); - assert!(!health.record_pong(&cfg, TaggedTimestamp::from_parts(clock.now(), rng.gen()))); - assert!(!health.record_pong(&cfg, TaggedTimestamp::from_parts(clock.now(), rng.gen()))); - assert!(!health.record_pong(&cfg, TaggedTimestamp::from_parts(clock.now(), rng.gen()))); - - // The retry delay is 2 seconds (instead of 5 for the next pong after success), so ensure - // we retry due to not having received the correct nonce in the pong. - - clock.advance(Duration::from_secs(2)); - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(_) - ); - } - - #[test] - fn ensure_excessive_pongs_result_in_ban() { - let Fixtures { - mut clock, - cfg, - mut rng, - mut health, - } = fixtures(); - - clock.advance(Duration::from_secs(5)); - - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(_) - ); - - // Make the `ConnectionHealth` receive some unasked pongs, without exceeding the unasked - // pong limit. - assert!(!health.record_pong(&cfg, TaggedTimestamp::from_parts(clock.now(), rng.gen()))); - assert!(!health.record_pong(&cfg, TaggedTimestamp::from_parts(clock.now(), rng.gen()))); - assert!(!health.record_pong(&cfg, TaggedTimestamp::from_parts(clock.now(), rng.gen()))); - assert!(!health.record_pong(&cfg, TaggedTimestamp::from_parts(clock.now(), rng.gen()))); - assert!(!health.record_pong(&cfg, TaggedTimestamp::from_parts(clock.now(), rng.gen()))); - assert!(!health.record_pong(&cfg, TaggedTimestamp::from_parts(clock.now(), rng.gen()))); - // 6 unasked pongs is still okay. - - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::DoNothing - ); - - assert!(health.record_pong(&cfg, TaggedTimestamp::from_parts(clock.now(), rng.gen()))); - // 7 is too much. - - // For good measure, we expect the health check to also output a disconnect instruction. - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::GiveUp - ); - } - - #[test] - fn time_reversal_does_not_crash_but_is_ignored() { - // Usually a pong for a given (or any) nonce should always be received with a timestamp - // equal or later than the ping sent out. Due to a programming error or a lucky attacker + - // scheduling issue, there is a very minute chance this can actually happen. - // - // In these cases, the pongs should just be discarded, not crashing due to a underflow in - // the comparison. - let Fixtures { - mut clock, - cfg, - mut rng, - mut health, - } = fixtures(); - - clock.advance(Duration::from_secs(5)); // t = 5 - - let nonce_1 = assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(nonce) => nonce - ); - - // Ignore the nonce if sent in the past (and also don't crash). - clock.rewind(Duration::from_secs(1)); // t = 4 - assert!(!health.record_pong(&cfg, TaggedTimestamp::from_parts(clock.now(), nonce_1))); - assert!(!health.record_pong(&cfg, TaggedTimestamp::from_parts(clock.now(), rng.gen()))); - - // Another ping should be sent out, since `nonce_1` was ignored. - clock.advance(Duration::from_secs(3)); // t = 7 - let nonce_2 = assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(nonce) => nonce - ); - - // Nonce 2 will be received seemingly before the connection was even established. - clock.rewind(Duration::from_secs(3600)); - assert!(!health.record_pong(&cfg, TaggedTimestamp::from_parts(clock.now(), nonce_2))); - } - - #[test] - fn handles_missed_health_checks() { - let Fixtures { - mut clock, - cfg, - mut rng, - mut health, - } = fixtures(); - - clock.advance(Duration::from_secs(15)); - - // We initially exceed our scheduled first ping by 10 seconds. This will cause the ping to - // be sent right there and then. - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(_) - ); - - // Going forward 1 second should not change anything. - clock.advance(Duration::from_secs(1)); - - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::DoNothing - ); - - // After another second, two seconds have passed since sending the first ping in total, so - // send another once. - clock.advance(Duration::from_secs(1)); - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(_) - ); - - // We have missed two pings total, now wait an hour. This will trigger the third ping. - clock.advance(Duration::from_secs(3600)); - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(_) - ); - - // Fourth right after - clock.advance(Duration::from_secs(2)); - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(_) - ); - - // Followed by a disconnect. - clock.advance(Duration::from_secs(2)); - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::GiveUp - ); - } - - #[test] - fn ignores_time_travel() { - // Any call of the health update with timestamps that are provably from the past (i.e. - // before a recorded timestamp like a previous ping) should be ignored. - - let Fixtures { - mut clock, - cfg, - mut rng, - mut health, - } = fixtures(); - - clock.advance(Duration::from_secs(5)); // t = 5 - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(_) - ); - - clock.rewind(Duration::from_secs(3)); // t = 2 - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::DoNothing - ); - - clock.advance(Duration::from_secs(4)); // t = 6 - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::DoNothing - ); - clock.advance(Duration::from_secs(1)); // t = 7 - - assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(_) - ); - } - - #[test] - fn duplicate_pong_immediately_terminates() { - let Fixtures { - mut clock, - cfg, - mut rng, - mut health, - } = fixtures(); - - clock.advance(Duration::from_secs(5)); - let nonce_1 = assert_matches!( - health.update_health(&mut rng, &cfg, clock.now()), - HealthCheckOutcome::SendPing(nonce) => nonce - ); - - clock.advance(Duration::from_secs(1)); - - // Recording the pong once is fine, but the second time should result in a ban. - assert!(!health.record_pong(&cfg, TaggedTimestamp::from_parts(clock.now(), nonce_1))); - assert!(health.record_pong(&cfg, TaggedTimestamp::from_parts(clock.now(), nonce_1))); - } -} diff --git a/node/src/components/network/identity.rs b/node/src/components/network/identity.rs index 81a592fcd4..2c642ea7f7 100644 --- a/node/src/components/network/identity.rs +++ b/node/src/components/network/identity.rs @@ -9,7 +9,7 @@ use openssl::{ use thiserror::Error; use tracing::warn; -use super::{Config, IdentityConfig}; +use super::config::{Config, IdentityConfig}; use crate::{ tls::{self, LoadCertError, LoadSecretKeyError, TlsCert, ValidationError}, types::NodeId, @@ -31,12 +31,16 @@ pub(crate) enum Error { /// An ephemeral [PKey] and [TlsCert] that identifies this node #[derive(DataSize, Debug, Clone)] pub(crate) struct Identity { - pub(super) secret_key: Arc>, - pub(super) tls_certificate: Arc, + /// TLS certificate authority associated with this identity. pub(super) network_ca: Option>, + /// TLS certificate associated with this identity. + pub(super) tls_certificate: Arc, + /// Secret key associated with `tls_certificate`. + pub(super) secret_key: Arc>, } impl Identity { + #[inline(always)] fn new(secret_key: PKey, tls_certificate: TlsCert, network_ca: Option) -> Self { Self { secret_key: Arc::new(secret_key), @@ -45,6 +49,12 @@ impl Identity { } } + /// Returns the [`NodeId`] associated with this identity. + #[inline(always)] + pub(crate) fn node_id(&self) -> NodeId { + NodeId::from(self.tls_certificate.public_key_fingerprint()) + } + pub(crate) fn from_config(config: WithDir) -> Result { match &config.value().identity { Some(identity) => Self::from_identity_config(identity), @@ -77,6 +87,13 @@ impl Identity { let tls_certificate = tls::validate_self_signed_cert(not_yet_validated_x509_cert)?; Ok(Identity::new(secret_key, tls_certificate, None)) } + + pub(crate) fn validate_peer_cert(&self, peer_cert: X509) -> Result { + match &self.network_ca { + Some(ca_cert) => tls::validate_cert_with_authority(peer_cert, ca_cert), + None => tls::validate_self_signed_cert(peer_cert), + } + } } impl From<&Identity> for NodeId { diff --git a/node/src/components/network/insights.rs b/node/src/components/network/insights.rs index 3117ac4d9e..ff43bb61b9 100644 --- a/node/src/components/network/insights.rs +++ b/node/src/components/network/insights.rs @@ -6,24 +6,21 @@ //! insights should neither be abused just because they are available. use std::{ - collections::{BTreeSet, HashSet}, fmt::{self, Debug, Display, Formatter}, net::SocketAddr, - sync::atomic::Ordering, - time::{Duration, SystemTime}, + sync::Arc, + time::Instant, }; -use casper_types::{EraId, PublicKey}; +use casper_types::{EraId, PublicKey, TimeDiff}; use serde::Serialize; -use crate::{ - types::NodeId, - utils::{opt_display::OptDisplay, DisplayIter, TimeAnchor}, -}; +use crate::{types::NodeId, utils::opt_display::OptDisplay}; use super::{ - error::ConnectionError, outgoing::OutgoingState, symmetry::ConnectionSymmetry, Network, - OutgoingHandle, Payload, + blocklist::BlocklistJustification, + conman::{Direction, Route, Sentence}, + Network, Payload, }; /// A collection of insights into the active networking component. @@ -35,298 +32,192 @@ pub(crate) struct NetworkInsights { network_ca: bool, /// The public address of the node. public_addr: Option, - /// Whether or not the node is syncing. - is_syncing: bool, + /// The fingerprint of a consensus key installed. + consensus_public_key: Option, /// The active era as seen by the networking component. net_active_era: EraId, - /// The list of node IDs that are being preferred due to being active validators. - privileged_active_outgoing_nodes: Option>, - /// The list of node IDs that are being preferred due to being upcoming validators. - privileged_upcoming_outgoing_nodes: Option>, - /// The amount of bandwidth allowance currently buffered, ready to be spent. - unspent_bandwidth_allowance_bytes: Option, - /// Map of outgoing connections, along with their current state. - outgoing_connections: Vec<(SocketAddr, OutgoingInsight)>, - /// Map of incoming connections. - connection_symmetries: Vec<(NodeId, ConnectionSymmetryInsight)>, + /// Addresses for which an outgoing task is currently running. + address_book: Vec, + /// Blocked addresses. + do_not_call_list: Vec, + /// All active routes. + active_routes: Vec, + /// Bans currently active. + blocked: Vec, } -/// Insight into an outgoing connection. +/// Information about existing routes. #[derive(Debug, Serialize)] -struct OutgoingInsight { - /// Whether or not the address is marked unforgettable. - unforgettable: bool, - /// The current connection state. - state: OutgoingStateInsight, +pub(crate) struct RouteInsight { + /// Node ID of the peer. + pub(crate) peer: NodeId, + /// The remote address of the peer. + pub(crate) remote_addr: SocketAddr, + /// Incoming or outgoing? + pub(crate) direction: Direction, + /// The consensus key provided by the peer during handshake. + pub(crate) consensus_key: Option>, + /// Duration since this route was established. + pub(crate) since: TimeDiff, } -/// The state of an outgoing connection, reduced to exportable insights. +/// Information about an existing ban. #[derive(Debug, Serialize)] -enum OutgoingStateInsight { - Connecting { - failures_so_far: u8, - since: SystemTime, - }, - Waiting { - failures_so_far: u8, - error: Option, - last_failure: SystemTime, - }, - Connected { - peer_id: NodeId, - peer_addr: SocketAddr, - last_ping_sent: Option, - last_pong_received: Option, - invalid_pong_count: u32, - rtt: Option, - }, - Blocked { - since: SystemTime, - justification: String, - }, - Loopback, +pub(crate) struct SentenceInsight { + /// The peer banned. + pub(crate) peer: NodeId, + /// Time until the ban is lifted. + pub(crate) remaining: Option, + /// Justification for the ban. + pub(crate) justification: BlocklistJustification, } -fn time_delta(now: SystemTime, then: SystemTime) -> impl Display { - OptDisplay::new( - now.duration_since(then) - .map(humantime::format_duration) - .ok(), - "err", - ) +/// Information about an entry of the do-not-call list. +#[derive(Debug, Serialize)] +pub(crate) struct DoNotCallInsight { + /// Address not to be called. + pub(crate) addr: SocketAddr, + /// How long not to call the address. + pub(crate) remaining: Option, } -impl OutgoingStateInsight { - /// Constructs a new outgoing state insight from a given outgoing state. - fn from_outgoing_state

( - anchor: &TimeAnchor, - state: &OutgoingState, ConnectionError>, - ) -> Self { - match state { - OutgoingState::Connecting { - failures_so_far, - since, - } => OutgoingStateInsight::Connecting { - failures_so_far: *failures_so_far, - since: anchor.convert(*since), - }, - OutgoingState::Waiting { - failures_so_far, - error, - last_failure, - } => OutgoingStateInsight::Waiting { - failures_so_far: *failures_so_far, - error: error.as_ref().map(ToString::to_string), - last_failure: anchor.convert(*last_failure), - }, - OutgoingState::Connected { - peer_id, - handle, - health, - } => OutgoingStateInsight::Connected { - peer_id: *peer_id, - peer_addr: handle.peer_addr, - last_ping_sent: health - .last_ping_sent - .map(|tt| anchor.convert(tt.timestamp())), - last_pong_received: health - .last_pong_received - .map(|tt| anchor.convert(tt.timestamp())), - invalid_pong_count: health.invalid_pong_count, - rtt: health.calc_rrt(), - }, - OutgoingState::Blocked { - since, - justification, - } => OutgoingStateInsight::Blocked { - since: anchor.convert(*since), - justification: justification.to_string(), - }, - OutgoingState::Loopback => OutgoingStateInsight::Loopback, +impl SentenceInsight { + /// Creates a new instance from an existing `Route`. + fn collect_from_sentence(now: Instant, peer: NodeId, sentence: &Sentence) -> Self { + let remaining = if sentence.until > now { + Some(sentence.until.duration_since(now).into()) + } else { + None + }; + Self { + peer, + remaining, + justification: sentence.justification.clone(), } } +} - /// Formats the outgoing state insight with times relative to a given timestamp. - fn fmt_time_relative(&self, now: SystemTime, f: &mut Formatter<'_>) -> fmt::Result { - match self { - OutgoingStateInsight::Connecting { - failures_so_far, - since, - } => write!( - f, - "connecting (fails: {}), since {}", - failures_so_far, - time_delta(now, *since) - ), - OutgoingStateInsight::Waiting { - failures_so_far, - error, - last_failure, - } => write!( - f, - "waiting (fails: {}, last error: {}), since {}", - failures_so_far, - OptDisplay::new(error.as_ref(), "none"), - time_delta(now, *last_failure) - ), - OutgoingStateInsight::Connected { - peer_id, - peer_addr, - last_ping_sent, - last_pong_received, - invalid_pong_count, - rtt, - } => { - let rtt_ms = rtt.map(|duration| duration.as_millis()); - - write!( - f, - "connected -> {} @ {} (rtt {}, invalid {}, last ping/pong {}/{})", - peer_id, - peer_addr, - OptDisplay::new(rtt_ms, "?"), - invalid_pong_count, - OptDisplay::new(last_ping_sent.map(|t| time_delta(now, t)), "-"), - OptDisplay::new(last_pong_received.map(|t| time_delta(now, t)), "-"), - ) - } - OutgoingStateInsight::Blocked { - since, - justification, - } => { - write!( - f, - "blocked since {}: {}", - time_delta(now, *since), - justification - ) - } - OutgoingStateInsight::Loopback => f.write_str("loopback"), +impl RouteInsight { + /// Creates a new instance from an existing `Route`. + fn collect_from_route(now: Instant, route: &Route) -> Self { + Self { + peer: route.peer, + remote_addr: route.remote_addr, + direction: route.direction, + consensus_key: route.consensus_key.clone(), + since: now.duration_since(route.since).into(), } } } -/// Describes whether a connection is uni- or bi-directional. -#[derive(Debug, Serialize)] -pub(super) enum ConnectionSymmetryInsight { - IncomingOnly { - since: SystemTime, - peer_addrs: BTreeSet, - }, - OutgoingOnly { - since: SystemTime, - }, - Symmetric { - peer_addrs: BTreeSet, - }, - Gone, -} - -impl ConnectionSymmetryInsight { - /// Creates a new insight from a given connection symmetry. - fn from_connection_symmetry(anchor: &TimeAnchor, sym: &ConnectionSymmetry) -> Self { - match sym { - ConnectionSymmetry::IncomingOnly { since, peer_addrs } => { - ConnectionSymmetryInsight::IncomingOnly { - since: anchor.convert(*since), - peer_addrs: peer_addrs.clone(), - } - } - ConnectionSymmetry::OutgoingOnly { since } => ConnectionSymmetryInsight::OutgoingOnly { - since: anchor.convert(*since), - }, - ConnectionSymmetry::Symmetric { peer_addrs } => ConnectionSymmetryInsight::Symmetric { - peer_addrs: peer_addrs.clone(), - }, - ConnectionSymmetry::Gone => ConnectionSymmetryInsight::Gone, - } - } +impl DoNotCallInsight { + /// Creates a new instance from an existing entry on the do-not-call list. + fn collect_from_dnc(now: Instant, addr: SocketAddr, until: Instant) -> Self { + let remaining = if until > now { + Some(until.duration_since(now).into()) + } else { + None + }; - /// Formats the connection symmetry insight with times relative to a given timestamp. - fn fmt_time_relative(&self, now: SystemTime, f: &mut Formatter<'_>) -> fmt::Result { - match self { - ConnectionSymmetryInsight::IncomingOnly { since, peer_addrs } => write!( - f, - "<- {} (since {})", - DisplayIter::new(peer_addrs.iter()), - time_delta(now, *since) - ), - ConnectionSymmetryInsight::OutgoingOnly { since } => { - write!(f, "-> (since {})", time_delta(now, *since)) - } - ConnectionSymmetryInsight::Symmetric { peer_addrs } => { - write!(f, "<> {}", DisplayIter::new(peer_addrs.iter())) - } - ConnectionSymmetryInsight::Gone => f.write_str("gone"), - } + DoNotCallInsight { addr, remaining } } } impl NetworkInsights { /// Collect networking insights from a given networking component. - pub(super) fn collect_from_component(net: &Network) -> Self + pub(super) fn collect_from_component

(net: &Network

) -> Self where P: Payload, { - // Since we are at the top level of the component, we gain access to inner values of the - // respective structs. We abuse this to gain debugging insights. Note: If limiters are no - // longer a `trait`, the trait methods can be removed as well in favor of direct access. - let (privileged_active_outgoing_nodes, privileged_upcoming_outgoing_nodes) = net - .outgoing_limiter - .debug_inspect_validators(&net.active_era) - .map(|(a, b)| (Some(a), Some(b))) - .unwrap_or_default(); - - let anchor = TimeAnchor::now(); - - let outgoing_connections = net - .outgoing_manager - .outgoing - .iter() - .map(|(addr, outgoing)| { - let state = OutgoingStateInsight::from_outgoing_state(&anchor, &outgoing.state); - ( - *addr, - OutgoingInsight { - unforgettable: outgoing.is_unforgettable, - state, - }, - ) - }) - .collect(); - - let connection_symmetries = net - .connection_symmetries - .iter() - .map(|(id, sym)| { - ( - *id, - ConnectionSymmetryInsight::from_connection_symmetry(&anchor, sym), - ) - }) - .collect(); + let mut address_book = Vec::new(); + let mut do_not_call_list = Vec::new(); + let mut active_routes = Vec::new(); + let mut blocked = Vec::new(); + + if let Some(ref conman) = net.conman { + // Acquire lock only long enough to copy routing table. + let guard = conman.read_state(); + let now = Instant::now(); + address_book = guard.address_book().iter().cloned().collect(); + + active_routes.extend( + guard + .routing_table() + .values() + .map(|route| RouteInsight::collect_from_route(now, route)), + ); + do_not_call_list.extend( + guard + .do_not_call() + .iter() + .map(|(&addr, &until)| DoNotCallInsight::collect_from_dnc(now, addr, until)), + ); + blocked.extend(guard.banlist().iter().map(|(&peer, sentence)| { + SentenceInsight::collect_from_sentence(now, peer, sentence) + })); + } + + // Sort only after releasing lock. + address_book.sort(); + do_not_call_list.sort_by_key(|dnc| dnc.addr); + active_routes.sort_by_key(|route_insight| route_insight.peer); + blocked.sort_by_key(|sentence_insight| sentence_insight.peer); NetworkInsights { - our_id: net.context.our_id(), - network_ca: net.context.network_ca().is_some(), - public_addr: net.context.public_addr(), - is_syncing: net.context.is_syncing().load(Ordering::Relaxed), + our_id: net.our_id, + network_ca: net.identity.network_ca.is_some(), + public_addr: net.public_addr, + consensus_public_key: net.node_key_pair.as_ref().map(|kp| kp.public_key().clone()), net_active_era: net.active_era, - privileged_active_outgoing_nodes, - privileged_upcoming_outgoing_nodes, - unspent_bandwidth_allowance_bytes: net - .outgoing_limiter - .debug_inspect_unspent_allowance(), - outgoing_connections, - connection_symmetries, + address_book, + do_not_call_list, + active_routes, + blocked, } } } -impl Display for NetworkInsights { +impl Display for DoNotCallInsight { + #[inline(always)] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - let now = SystemTime::now(); + write!( + f, + "{} for another {} ", + self.addr, + OptDisplay::new(self.remaining.as_ref(), "(expired)"), + ) + } +} +impl Display for RouteInsight { + #[inline(always)] + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!( + f, + "{} @ {} [{}] {}, since {}", + self.peer, + self.remote_addr, + self.direction, + OptDisplay::new(self.consensus_key.as_ref(), "no key provided"), + self.since, + ) + } +} + +impl Display for SentenceInsight { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!( + f, + "{} for another {}: {}", + self.peer, + OptDisplay::new(self.remaining.as_ref(), "(expired)"), + self.justification + ) + } +} + +impl Display for NetworkInsights { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { if !self.network_ca { f.write_str("Public ")?; } else { @@ -334,50 +225,40 @@ impl Display for NetworkInsights { } writeln!( f, - "node {} @ {:?} (syncing: {})", - self.our_id, self.public_addr, self.is_syncing - )?; - writeln!( - f, - "active era: {} unspent_bandwidth_allowance_bytes: {}", - self.net_active_era, - OptDisplay::new(self.unspent_bandwidth_allowance_bytes, "inactive"), - )?; - let active = self - .privileged_active_outgoing_nodes - .as_ref() - .map(HashSet::iter) - .map(DisplayIter::new); - writeln!( - f, - "privileged active: {}", - OptDisplay::new(active, "inactive") - )?; - let upcoming = self - .privileged_upcoming_outgoing_nodes - .as_ref() - .map(HashSet::iter) - .map(DisplayIter::new); - writeln!( - f, - "privileged upcoming: {}", - OptDisplay::new(upcoming, "inactive") + "node {} @ {}", + self.our_id, + OptDisplay::new(self.public_addr, "no listen addr") )?; - f.write_str("outgoing connections:\n")?; - writeln!(f, "address uf state")?; - for (addr, outgoing) in &self.outgoing_connections { - write!(f, "{:23} {:5} ", addr, outgoing.unforgettable,)?; - outgoing.state.fmt_time_relative(now, f)?; - f.write_str("\n")?; + write!(f, "in {} (according to networking), ", self.net_active_era)?; + + match self.consensus_public_key.as_ref() { + Some(pub_key) => writeln!(f, "consensus pubkey {}", pub_key)?, + None => f.write_str("no consensus key\n")?, + } + + f.write_str("\naddress book:\n")?; + + for addr in &self.address_book { + write!(f, "{} ", addr)?; + } + + f.write_str("\n\ndo-not-call:\n")?; + + for dnc in &self.do_not_call_list { + writeln!(f, "{}", dnc)?; + } + + f.write_str("\nroutes:\n")?; + + for route in &self.active_routes { + writeln!(f, "{}", route)?; } - f.write_str("connection symmetries:\n")?; - writeln!(f, "peer ID symmetry")?; - for (peer_id, symmetry) in &self.connection_symmetries { - write!(f, "{:10} ", peer_id)?; - symmetry.fmt_time_relative(now, f)?; - f.write_str("\n")?; + f.write_str("\nblocklist:\n")?; + + for sentence in &self.blocked { + writeln!(f, "{}", sentence)?; } Ok(()) diff --git a/node/src/components/network/limiter.rs b/node/src/components/network/limiter.rs deleted file mode 100644 index fcba95d2af..0000000000 --- a/node/src/components/network/limiter.rs +++ /dev/null @@ -1,550 +0,0 @@ -//! Resource limiters -//! -//! Resource limiters restrict the usable amount of a resource through slowing down the request rate -//! by making each user request an allowance first. - -use std::{ - collections::{HashMap, HashSet}, - sync::{Arc, RwLock}, - time::{Duration, Instant}, -}; - -use prometheus::Counter; -use tokio::{runtime::Handle, sync::Mutex, task}; -use tracing::{error, trace, warn}; - -use casper_types::{EraId, PublicKey}; - -use crate::types::{NodeId, ValidatorMatrix}; - -/// Amount of resource allowed to buffer in `Limiter`. -const STORED_BUFFER_SECS: Duration = Duration::from_secs(2); - -/// A limiter dividing resources into two classes based on their validator status. -/// -/// Any consumer of a specific resource is expected to call `create_handle` for every peer and use -/// the returned handle to request a access to a resource. -/// -/// Imposes a limit on non-validator resources while not limiting active validator resources at all. -#[derive(Debug)] -pub(super) struct Limiter { - /// Shared data across all handles. - data: Arc, - /// Set of active and upcoming validators shared across all handles. - validator_matrix: ValidatorMatrix, -} - -impl Limiter { - /// Creates a new class based limiter. - /// - /// Starts the background worker task as well. - pub(super) fn new( - resources_per_second: u32, - wait_time_sec: Counter, - validator_matrix: ValidatorMatrix, - ) -> Self { - Limiter { - data: Arc::new(LimiterData::new(resources_per_second, wait_time_sec)), - validator_matrix, - } - } - - /// Create a handle for a connection using the given peer and optional consensus key. - pub(super) fn create_handle( - &self, - peer_id: NodeId, - consensus_key: Option, - ) -> LimiterHandle { - if let Some(public_key) = consensus_key.as_ref().cloned() { - match self.data.connected_validators.write() { - Ok(mut connected_validators) => { - let _ = connected_validators.insert(peer_id, public_key); - } - Err(_) => { - error!( - "could not update connected validator data set of limiter, lock poisoned" - ); - } - } - } - LimiterHandle { - data: self.data.clone(), - validator_matrix: self.validator_matrix.clone(), - consumer_id: ConsumerId { - _peer_id: peer_id, - consensus_key, - }, - } - } - - pub(super) fn remove_connected_validator(&self, peer_id: &NodeId) { - match self.data.connected_validators.write() { - Ok(mut connected_validators) => { - let _ = connected_validators.remove(peer_id); - } - Err(_) => { - error!( - "could not remove connected validator from data set of limiter, lock poisoned" - ); - } - } - } - - pub(super) fn is_validator_in_era(&self, era: EraId, peer_id: &NodeId) -> bool { - let public_key = match self.data.connected_validators.read() { - Ok(connected_validators) => match connected_validators.get(peer_id) { - None => return false, - Some(public_key) => public_key.clone(), - }, - Err(_) => { - error!("could not read from connected_validators of limiter, lock poisoned"); - return false; - } - }; - - match self.validator_matrix.is_validator_in_era(era, &public_key) { - None => { - warn!(%era, "missing validator weights for given era"); - false - } - Some(is_validator) => is_validator, - } - } - - pub(super) fn debug_inspect_unspent_allowance(&self) -> Option { - Some(task::block_in_place(move || { - Handle::current().block_on(async move { self.data.resources.lock().await.available }) - })) - } - - pub(super) fn debug_inspect_validators( - &self, - current_era: &EraId, - ) -> Option<(HashSet, HashSet)> { - Some(( - self.validator_keys_for_era(current_era), - self.validator_keys_for_era(¤t_era.successor()), - )) - } - - fn validator_keys_for_era(&self, era: &EraId) -> HashSet { - self.validator_matrix - .validator_weights(*era) - .map(|validator_weights| validator_weights.validator_public_keys().cloned().collect()) - .unwrap_or_default() - } -} - -/// The limiter's state. -#[derive(Debug)] -struct LimiterData { - /// Number of resource units to allow for non-validators per second. - resources_per_second: u32, - /// A mapping from node IDs to public keys of validators to which we have an outgoing - /// connection. - connected_validators: RwLock>, - /// Information about available resources. - resources: Mutex, - /// Total time spent waiting. - wait_time_sec: Counter, -} - -/// Resource data. -#[derive(Debug)] -struct ResourceData { - /// How many resource units are buffered. - /// - /// May go negative in the case of a deficit. - available: i64, - /// Last time resource data was refilled. - last_refill: Instant, -} - -impl LimiterData { - /// Creates a new set of class based limiter data. - /// - /// Initial resources will be initialized to 0, with the last refill set to the current time. - fn new(resources_per_second: u32, wait_time_sec: Counter) -> Self { - LimiterData { - resources_per_second, - connected_validators: Default::default(), - resources: Mutex::new(ResourceData { - available: 0, - last_refill: Instant::now(), - }), - wait_time_sec, - } - } -} - -/// Peer class for the `Limiter`. -enum PeerClass { - /// A validator. - Validator, - /// Unclassified/low-priority peer. - NonValidator, -} - -/// A per-peer handle for `Limiter`. -#[derive(Debug)] -pub(super) struct LimiterHandle { - /// Data shared between handles and limiter. - data: Arc, - /// Set of active and upcoming validators. - validator_matrix: ValidatorMatrix, - /// Consumer ID for the sender holding this handle. - consumer_id: ConsumerId, -} - -impl LimiterHandle { - /// Waits until the requester is allocated `amount` additional resources. - pub(super) async fn request_allowance(&self, amount: u32) { - // As a first step, determine the peer class by checking if our id is in the validator set. - - if self.validator_matrix.is_empty() { - // It is likely that we have not been initialized, thus no node is getting the - // reserved resources. In this case, do not limit at all. - trace!("empty set of validators, not limiting resources at all"); - - return; - } - - let peer_class = if let Some(ref public_key) = self.consumer_id.consensus_key { - if self - .validator_matrix - .is_active_or_upcoming_validator(public_key) - { - PeerClass::Validator - } else { - PeerClass::NonValidator - } - } else { - PeerClass::NonValidator - }; - - match peer_class { - PeerClass::Validator => { - // No limit imposed on validators. - } - PeerClass::NonValidator => { - if self.data.resources_per_second == 0 { - return; - } - - let max_stored_resource = ((self.data.resources_per_second as f64) - * STORED_BUFFER_SECS.as_secs_f64()) - as u32; - - // We are a low-priority sender. Obtain a lock on the resources and wait an - // appropriate amount of time to fill them up. - { - let mut resources = self.data.resources.lock().await; - - while resources.available < 0 { - // Determine time delta since last refill. - let now = Instant::now(); - let elapsed = now - resources.last_refill; - resources.last_refill = now; - - // Add appropriate amount of resources, capped at `max_stored_bytes`. We - // are still maintaining the lock here to avoid issues with other - // low-priority requestors. - resources.available += ((elapsed.as_nanos() - * self.data.resources_per_second as u128) - / 1_000_000_000) as i64; - resources.available = resources.available.min(max_stored_resource as i64); - - // If we do not have enough resources available, sleep until we do. - if resources.available < 0 { - let estimated_time_remaining = Duration::from_millis( - (-resources.available) as u64 * 1000 - / self.data.resources_per_second as u64, - ); - - // Note: This sleep call is the reason we are using a tokio mutex - // instead of a regular `std` one, as we are holding it across the - // await point here. - tokio::time::sleep(estimated_time_remaining).await; - self.data - .wait_time_sec - .inc_by(estimated_time_remaining.as_secs_f64()); - } - } - - // Subtract the amount. If available resources go negative as a result, it - // is the next sender's problem. - resources.available -= amount as i64; - } - } - } - } -} - -/// An identity for a consumer. -#[derive(Debug)] -struct ConsumerId { - /// The peer's ID. - _peer_id: NodeId, - /// The remote node's public consensus key. - consensus_key: Option, -} - -#[cfg(test)] -mod tests { - use std::{sync::Arc, time::Duration}; - - use casper_types::{EraId, SecretKey}; - use num_rational::Ratio; - use prometheus::Counter; - use tokio::time::Instant; - - use super::{Limiter, NodeId, PublicKey}; - use crate::{testing::init_logging, types::ValidatorMatrix}; - - /// Something that happens almost immediately, with some allowance for test jitter. - const SHORT_TIME: Duration = Duration::from_millis(250); - - /// Creates a new counter for testing. - fn new_wait_time_sec() -> Counter { - Counter::new("test_time_waiting", "wait time counter used in tests") - .expect("could not create new counter") - } - - #[tokio::test] - async fn unlimited_limiter_is_unlimited() { - let mut rng = crate::new_rng(); - - // We insert one unrelated active validator to avoid triggering the automatic disabling of - // the limiter in case there are no active validators. - let validator_matrix = - ValidatorMatrix::new_with_validator(Arc::new(SecretKey::random(&mut rng))); - let limiter = Limiter::new(0, new_wait_time_sec(), validator_matrix); - - // Try with non-validators or unknown nodes. - let handles = vec![ - limiter.create_handle(NodeId::random(&mut rng), Some(PublicKey::random(&mut rng))), - limiter.create_handle(NodeId::random(&mut rng), None), - ]; - - for handle in handles { - let start = Instant::now(); - handle.request_allowance(0).await; - handle.request_allowance(u32::MAX).await; - handle.request_allowance(1).await; - assert!(start.elapsed() < SHORT_TIME); - } - } - - #[tokio::test] - async fn active_validator_is_unlimited() { - let mut rng = crate::new_rng(); - - let secret_key = SecretKey::random(&mut rng); - let consensus_key = PublicKey::from(&secret_key); - let validator_matrix = ValidatorMatrix::new_with_validator(Arc::new(secret_key)); - let limiter = Limiter::new(1_000, new_wait_time_sec(), validator_matrix); - - let handle = limiter.create_handle(NodeId::random(&mut rng), Some(consensus_key)); - - let start = Instant::now(); - handle.request_allowance(0).await; - handle.request_allowance(u32::MAX).await; - handle.request_allowance(1).await; - assert!(start.elapsed() < SHORT_TIME); - } - - #[tokio::test] - async fn inactive_validator_limited() { - let rng = &mut crate::new_rng(); - - // We insert one unrelated active validator to avoid triggering the automatic disabling of - // the limiter in case there are no active validators. - let validator_matrix = - ValidatorMatrix::new_with_validator(Arc::new(SecretKey::random(rng))); - let peers = [ - (NodeId::random(rng), Some(PublicKey::random(rng))), - (NodeId::random(rng), None), - ]; - - let limiter = Limiter::new(1_000, new_wait_time_sec(), validator_matrix); - - for (peer, maybe_public_key) in peers { - let start = Instant::now(); - let handle = limiter.create_handle(peer, maybe_public_key); - - // Send 9_0001 bytes, we expect this to take roughly 15 seconds. - handle.request_allowance(1000).await; - handle.request_allowance(1000).await; - handle.request_allowance(1000).await; - handle.request_allowance(2000).await; - handle.request_allowance(4000).await; - handle.request_allowance(1).await; - let elapsed = start.elapsed(); - - assert!( - elapsed >= Duration::from_secs(9), - "{}s", - elapsed.as_secs_f64() - ); - assert!( - elapsed <= Duration::from_secs(10), - "{}s", - elapsed.as_secs_f64() - ); - } - } - - #[tokio::test] - async fn nonvalidators_parallel_limited() { - let mut rng = crate::new_rng(); - - let wait_metric = new_wait_time_sec(); - - // We insert one unrelated active validator to avoid triggering the automatic disabling of - // the limiter in case there are no active validators. - let validator_matrix = - ValidatorMatrix::new_with_validator(Arc::new(SecretKey::random(&mut rng))); - let limiter = Limiter::new(1_000, wait_metric.clone(), validator_matrix); - - let start = Instant::now(); - - // Parallel test, 5 non-validators sharing 1000 bytes per second. Each sends 1001 bytes, so - // total time is expected to be just over 5 seconds. - let join_handles = (0..5) - .map(|_| { - limiter.create_handle(NodeId::random(&mut rng), Some(PublicKey::random(&mut rng))) - }) - .map(|handle| { - tokio::spawn(async move { - handle.request_allowance(500).await; - handle.request_allowance(150).await; - handle.request_allowance(350).await; - handle.request_allowance(1).await; - }) - }); - - for join_handle in join_handles { - join_handle.await.expect("could not join task"); - } - - let elapsed = start.elapsed(); - assert!(elapsed >= Duration::from_secs(5)); - assert!(elapsed <= Duration::from_secs(6)); - - // Ensure metrics recorded the correct number of seconds. - assert!( - wait_metric.get() <= 6.0, - "wait metric is too large: {}", - wait_metric.get() - ); - - // Note: The limiting will not apply to all data, so it should be slightly below 5 seconds. - assert!( - wait_metric.get() >= 4.5, - "wait metric is too small: {}", - wait_metric.get() - ); - } - - #[tokio::test] - async fn inactive_validators_unlimited_when_no_validators_known() { - init_logging(); - - let mut rng = crate::new_rng(); - - let secret_key = SecretKey::random(&mut rng); - let consensus_key = PublicKey::from(&secret_key); - let wait_metric = new_wait_time_sec(); - let limiter = Limiter::new( - 1_000, - wait_metric.clone(), - ValidatorMatrix::new( - Ratio::new(1, 3), - None, - EraId::from(0), - Arc::new(secret_key), - consensus_key.clone(), - 2, - ), - ); - - // Try with non-validators or unknown nodes. - let handles = vec![ - limiter.create_handle(NodeId::random(&mut rng), Some(PublicKey::random(&mut rng))), - limiter.create_handle(NodeId::random(&mut rng), None), - ]; - - for handle in handles { - let start = Instant::now(); - - // Send 9_0001 bytes, should now finish instantly. - handle.request_allowance(1000).await; - handle.request_allowance(1000).await; - handle.request_allowance(1000).await; - handle.request_allowance(2000).await; - handle.request_allowance(4000).await; - handle.request_allowance(1).await; - assert!(start.elapsed() < SHORT_TIME); - } - - // There should have been no time spent waiting. - assert!( - wait_metric.get() < SHORT_TIME.as_secs_f64(), - "wait_metric is too large: {}", - wait_metric.get() - ); - } - - /// Regression test for #2929. - #[tokio::test] - async fn throttling_of_non_validators_does_not_affect_validators() { - init_logging(); - - let mut rng = crate::new_rng(); - - let secret_key = SecretKey::random(&mut rng); - let consensus_key = PublicKey::from(&secret_key); - let validator_matrix = ValidatorMatrix::new_with_validator(Arc::new(secret_key)); - let limiter = Limiter::new(1_000, new_wait_time_sec(), validator_matrix); - - let non_validator_handle = limiter.create_handle(NodeId::random(&mut rng), None); - let validator_handle = limiter.create_handle(NodeId::random(&mut rng), Some(consensus_key)); - - // We request a large resource at once using a non-validator handle. At the same time, - // validator requests should be still served, even while waiting for the long-delayed - // request still blocking. - let start = Instant::now(); - let background_nv_request = tokio::spawn(async move { - non_validator_handle.request_allowance(5000).await; - non_validator_handle.request_allowance(5000).await; - - Instant::now() - }); - - // Allow for a little bit of time to pass to ensure the background task is running. - tokio::time::sleep(Duration::from_secs(1)).await; - - validator_handle.request_allowance(10000).await; - validator_handle.request_allowance(10000).await; - - let v_finished = Instant::now(); - - let nv_finished = background_nv_request - .await - .expect("failed to join background nv task"); - - let nv_completed = nv_finished.duration_since(start); - assert!( - nv_completed >= Duration::from_millis(4500), - "non-validator did not delay sufficiently: {:?}", - nv_completed - ); - - let v_completed = v_finished.duration_since(start); - assert!( - v_completed <= Duration::from_millis(1500), - "validator did not finish quickly enough: {:?}", - v_completed - ); - } -} diff --git a/node/src/components/network/message.rs b/node/src/components/network/message.rs index 02fdb30dce..d73840d57e 100644 --- a/node/src/components/network/message.rs +++ b/node/src/components/network/message.rs @@ -5,28 +5,20 @@ use std::{ }; use datasize::DataSize; -use futures::future::BoxFuture; +use juliet::ChannelId; use serde::{ de::{DeserializeOwned, Error as SerdeError}, Deserialize, Deserializer, Serialize, Serializer, }; -use strum::EnumDiscriminants; +use strum::{Display, EnumCount, EnumIter, FromRepr}; use casper_hashing::Digest; #[cfg(test)] use casper_types::testing::TestRng; use casper_types::{crypto, AsymmetricType, ProtocolVersion, PublicKey, SecretKey, Signature}; -use super::{counting_format::ConnectionId, health::Nonce, BincodeFormat}; -use crate::{ - effect::EffectBuilder, - protocol, - types::{Chainspec, NodeId}, - utils::{ - opt_display::OptDisplay, - specimen::{Cache, LargestSpecimen, SizeEstimator}, - }, -}; +use super::{connection_id::ConnectionId, Ticket}; +use crate::{types::NodeId, utils::opt_display::OptDisplay}; /// The default protocol version to use in absence of one in the protocol version field. #[inline] @@ -34,10 +26,10 @@ fn default_protocol_version() -> ProtocolVersion { ProtocolVersion::V1_0_0 } -#[derive(Clone, Debug, Deserialize, Serialize, EnumDiscriminants)] -#[strum_discriminants(derive(strum::EnumIter))] +#[derive(Clone, Debug, Deserialize, Serialize)] #[allow(clippy::large_enum_variant)] pub(crate) enum Message

{ + // TODO: Remove. Handshake { /// Network we are connected to. network_name: String, @@ -49,34 +41,20 @@ pub(crate) enum Message

{ /// A self-signed certificate indicating validator status. #[serde(default)] consensus_certificate: Option, - /// True if the node is syncing. - #[serde(default)] - is_syncing: bool, /// Hash of the chainspec the node is running. #[serde(default)] chainspec_hash: Option, }, - /// A ping request. - Ping { - /// The nonce to be returned with the pong. - nonce: Nonce, - }, - /// A pong response. - Pong { - /// Nonce to match pong to ping. - nonce: Nonce, - }, Payload(P), } impl Message

{ /// Classifies a message based on its payload. #[inline] + #[allow(dead_code)] // TODO: Re-add, once decision is made whether to keep message classses. pub(super) fn classify(&self) -> MessageKind { match self { - Message::Handshake { .. } | Message::Ping { .. } | Message::Pong { .. } => { - MessageKind::Protocol - } + Message::Handshake { .. } => MessageKind::Protocol, Message::Payload(payload) => payload.message_kind(), } } @@ -85,63 +63,36 @@ impl Message

{ #[inline] pub(super) fn is_low_priority(&self) -> bool { match self { - Message::Handshake { .. } | Message::Ping { .. } | Message::Pong { .. } => false, + Message::Handshake { .. } => false, Message::Payload(payload) => payload.is_low_priority(), } } - /// Returns the incoming resource estimate of the payload. - #[inline] - pub(super) fn payload_incoming_resource_estimate(&self, weights: &EstimatorWeights) -> u32 { - match self { - Message::Handshake { .. } => 0, - // Ping and Pong have a hardcoded weights. Since every ping will result in a pong being - // sent as a reply, it has a higher weight. - Message::Ping { .. } => 2, - Message::Pong { .. } => 1, - Message::Payload(payload) => payload.incoming_resource_estimate(weights), - } - } - - /// Returns whether or not the payload is unsafe for syncing node consumption. - #[inline] - pub(super) fn payload_is_unsafe_for_syncing_nodes(&self) -> bool { - match self { - Message::Handshake { .. } | Message::Ping { .. } | Message::Pong { .. } => false, - Message::Payload(payload) => payload.is_unsafe_for_syncing_peers(), - } - } - - /// Attempts to create a demand-event from this message. - /// - /// Succeeds if the outer message contains a payload that can be converted into a demand. - pub(super) fn try_into_demand( - self, - effect_builder: EffectBuilder, - sender: NodeId, - ) -> Result<(REv, BoxFuture<'static, Option

>), Box> - where - REv: FromIncoming

+ Send, - { + /// Determine which channel this message should be sent on. + pub(super) fn get_channel(&self) -> Channel { match self { - Message::Handshake { .. } | Message::Ping { .. } | Message::Pong { .. } => { - Err(self.into()) - } - Message::Payload(payload) => { - // Note: For now, the wrapping/unwrap of the payload is a bit unfortunate here. - REv::try_demand_from_incoming(effect_builder, sender, payload) - .map_err(|err| Message::Payload(err).into()) - } + Message::Handshake { .. } => Channel::Network, + Message::Payload(payload) => payload.get_channel(), } } } /// A pair of secret keys used by consensus. -pub(super) struct NodeKeyPair { +#[derive(Clone, DataSize)] +pub(crate) struct NodeKeyPair { secret_key: Arc, public_key: PublicKey, } +impl Debug for NodeKeyPair { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.debug_struct("NodeKeyPair") + .field("secret_key", &"..") + .field("public_key", &self.public_key) + .finish() + } +} + impl NodeKeyPair { /// Creates a new key pair for consensus signing. pub(super) fn new(key_pair: (Arc, PublicKey)) -> Self { @@ -155,6 +106,11 @@ impl NodeKeyPair { fn sign>(&self, value: T) -> Signature { crypto::sign(value, &self.secret_key, &self.public_key) } + + /// Returns a reference to the public key of this key pair. + pub(super) fn public_key(&self) -> &PublicKey { + &self.public_key + } } /// Certificate used to indicate that the peer is a validator using the specified public key. @@ -291,22 +247,19 @@ impl Display for Message

{ public_addr, protocol_version, consensus_certificate, - is_syncing, chainspec_hash, } => { write!( f, - "handshake: {}, public addr: {}, protocol_version: {}, consensus_certificate: {}, is_syncing: {}, chainspec_hash: {}", + "handshake: {}, public addr: {}, protocol_version: {}, consensus_certificate: {}, chainspec_hash: {}", network_name, public_addr, protocol_version, OptDisplay::new(consensus_certificate.as_ref(), "none"), - is_syncing, + OptDisplay::new(chainspec_hash.as_ref(), "none") ) } - Message::Ping { nonce } => write!(f, "ping({})", nonce), - Message::Pong { nonce } => write!(f, "pong({})", nonce), Message::Payload(payload) => write!(f, "payload: {}", payload), } } @@ -314,6 +267,7 @@ impl Display for Message

{ /// A classification system for networking messages. #[derive(Copy, Clone, Debug)] +#[allow(dead_code)] // TODO: Re-add, once decision is made whether or not to keep message classses. pub(crate) enum MessageKind { /// Non-payload messages, like handshakes. Protocol, @@ -354,269 +308,96 @@ impl Display for MessageKind { } } +/// Multiplexed channel identifier used across a single connection. +/// +/// Channels are separated mainly to avoid deadlocking issues where two nodes requests a large +/// amount of items from each other simultaneously, with responses being queued behind requests, +/// whilst the latter are buffered due to backpressure. +/// +/// Further separation is done to improve quality of service of certain subsystems, e.g. to +/// guarantee that consensus is not impaired by the transfer of large trie nodes. +#[derive( + Copy, Clone, Debug, Display, Eq, EnumCount, EnumIter, FromRepr, PartialEq, Ord, PartialOrd, +)] +#[repr(u8)] +pub enum Channel { + /// Networking layer messages, handshakes and ping/pong. + Network = 0, + /// Data solely used for syncing being requested. + /// + /// We separate sync data (e.g. trie nodes) requests from regular ("data") requests since the + /// former are not required for a validating node to make progress on consensus, thus + /// separating these can improve latency. + SyncDataRequests = 1, + /// Sync data requests being answered. + /// + /// Responses are separated from requests to ensure liveness (see [`Channel`] documentation). + SyncDataResponses = 2, + /// Requests for data used during regular validator operation. + DataRequests = 3, + /// Responses for data used during regular validator operation. + DataResponses = 4, + /// Consensus-level messages, like finality signature announcements and consensus messages. + Consensus = 5, + /// Regular gossip announcements and responses (e.g. for deploys and blocks). + BulkGossip = 6, +} + +impl Channel { + #[inline(always)] + pub(crate) fn into_channel_id(self) -> ChannelId { + ChannelId::new(self as u8) + } + + /// Returns the name suitable for metrics. + pub(crate) fn metrics_name(&self) -> &'static str { + match self { + Channel::Network => "network", + Channel::SyncDataRequests => "sync_data_requests", + Channel::SyncDataResponses => "sync_data_responses", + Channel::DataRequests => "data_requests", + Channel::DataResponses => "data_responses", + Channel::Consensus => "consensus", + Channel::BulkGossip => "bulk_gossip", + } + } +} + /// Network message payload. /// /// Payloads are what is transferred across the network outside of control messages from the /// networking component itself. pub(crate) trait Payload: - Serialize + DeserializeOwned + Clone + Debug + Display + Send + Sync + 'static + Serialize + DeserializeOwned + Clone + Debug + Display + Send + Sync + Unpin + 'static { /// Classifies the payload based on its contents. fn message_kind(&self) -> MessageKind; - /// The penalty for resource usage of a message to be applied when processed as incoming. - fn incoming_resource_estimate(&self, _weights: &EstimatorWeights) -> u32; - /// Determines if the payload should be considered low priority. fn is_low_priority(&self) -> bool { false } - /// Indicates a message is not safe to send to a syncing node. - /// - /// This functionality should be removed once multiplexed networking lands. - fn is_unsafe_for_syncing_peers(&self) -> bool; + /// Determine which channel a message is supposed to sent/received on. + fn get_channel(&self) -> Channel; } /// Network message conversion support. pub(crate) trait FromIncoming

{ /// Creates a new value from a received payload. - fn from_incoming(sender: NodeId, payload: P) -> Self; - - /// Tries to convert a payload into a demand. - /// - /// This function can optionally be called before `from_incoming` to attempt to convert an - /// incoming payload into a potential demand. - - // TODO: Replace both this and `from_incoming` with a single function that returns an - // appropriate `Either`. - fn try_demand_from_incoming( - _effect_builder: EffectBuilder, - _sender: NodeId, - payload: P, - ) -> Result<(Self, BoxFuture<'static, Option

>), P> - where - Self: Sized + Send, - { - Err(payload) - } -} -/// A generic configuration for payload weights. -/// -/// Implementors of `Payload` are free to interpret this as they see fit. -/// -/// The default implementation sets all weights to zero. -#[derive(DataSize, Debug, Default, Clone, Deserialize, Serialize)] -pub struct EstimatorWeights { - pub consensus: u32, - pub block_gossip: u32, - pub deploy_gossip: u32, - pub finality_signature_gossip: u32, - pub address_gossip: u32, - pub finality_signature_broadcasts: u32, - pub deploy_requests: u32, - pub deploy_responses: u32, - pub legacy_deploy_requests: u32, - pub legacy_deploy_responses: u32, - pub block_requests: u32, - pub block_responses: u32, - pub block_header_requests: u32, - pub block_header_responses: u32, - pub trie_requests: u32, - pub trie_responses: u32, - pub finality_signature_requests: u32, - pub finality_signature_responses: u32, - pub sync_leap_requests: u32, - pub sync_leap_responses: u32, - pub approvals_hashes_requests: u32, - pub approvals_hashes_responses: u32, - pub execution_results_requests: u32, - pub execution_results_responses: u32, -} - -mod specimen_support { - use std::iter; - - use serde::Serialize; - - use crate::utils::specimen::{ - largest_variant, Cache, LargestSpecimen, SizeEstimator, HIGHEST_UNICODE_CODEPOINT, - }; - - use super::{ConsensusCertificate, Message, MessageDiscriminants}; - - impl

LargestSpecimen for Message

- where - P: Serialize + LargestSpecimen, - { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - let largest_network_name = estimator.parameter("network_name_limit"); - - largest_variant::( - estimator, - |variant| match variant { - MessageDiscriminants::Handshake => Message::Handshake { - network_name: iter::repeat(HIGHEST_UNICODE_CODEPOINT) - .take(largest_network_name) - .collect(), - public_addr: LargestSpecimen::largest_specimen(estimator, cache), - protocol_version: LargestSpecimen::largest_specimen(estimator, cache), - consensus_certificate: LargestSpecimen::largest_specimen(estimator, cache), - is_syncing: LargestSpecimen::largest_specimen(estimator, cache), - chainspec_hash: LargestSpecimen::largest_specimen(estimator, cache), - }, - MessageDiscriminants::Ping => Message::Ping { - nonce: LargestSpecimen::largest_specimen(estimator, cache), - }, - MessageDiscriminants::Pong => Message::Pong { - nonce: LargestSpecimen::largest_specimen(estimator, cache), - }, - MessageDiscriminants::Payload => { - Message::Payload(LargestSpecimen::largest_specimen(estimator, cache)) - } - }, - ) - } - } - - impl LargestSpecimen for ConsensusCertificate { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - ConsensusCertificate { - public_key: LargestSpecimen::largest_specimen(estimator, cache), - signature: LargestSpecimen::largest_specimen(estimator, cache), - } - } - } -} - -/// An estimator that uses the serialized network representation as a measure of size. -#[derive(Clone, Debug)] -pub(crate) struct NetworkMessageEstimator<'a> { - /// The chainspec to retrieve estimation values from. - chainspec: &'a Chainspec, -} - -impl<'a> NetworkMessageEstimator<'a> { - /// Creates a new network message estimator. - pub(crate) fn new(chainspec: &'a Chainspec) -> Self { - Self { chainspec } - } - - /// Returns a parameter by name as `i64`. - fn get_parameter(&self, name: &'static str) -> Option { - Some(match name { - // The name limit will be larger than the actual name, so it is a safe upper bound. - "network_name_limit" => self.chainspec.network_config.name.len() as i64, - // These limits are making deploys bigger than they actually are, since many items - // have both a `contract_name` and an `entry_point`. We accept 2X as an upper bound. - "contract_name_limit" => self.chainspec.deploy_config.max_deploy_size as i64, - "entry_point_limit" => self.chainspec.deploy_config.max_deploy_size as i64, - "recent_era_count" => { - (self.chainspec.core_config.unbonding_delay - - self.chainspec.core_config.auction_delay) as i64 - } - "validator_count" => self.chainspec.core_config.validator_slots as i64, - "minimum_era_height" => self.chainspec.core_config.minimum_era_height as i64, - "era_duration_ms" => self.chainspec.core_config.era_duration.millis() as i64, - "minimum_round_length_ms" => self - .chainspec - .core_config - .minimum_block_time - .millis() - .max(1) as i64, - "max_deploy_size" => self.chainspec.deploy_config.max_deploy_size as i64, - "approvals_hashes" => { - (self.chainspec.deploy_config.block_max_deploy_count - + self.chainspec.deploy_config.block_max_transfer_count) as i64 - } - "max_deploys_per_block" => self.chainspec.deploy_config.block_max_deploy_count as i64, - "max_transfers_per_block" => { - self.chainspec.deploy_config.block_max_transfer_count as i64 - } - "average_approvals_per_deploy_in_block" => { - let max_total_deploys = (self.chainspec.deploy_config.block_max_deploy_count - + self.chainspec.deploy_config.block_max_transfer_count) - as i64; - - // Note: The +1 is to overestimate, as depending on the serialization format chosen, - // spreading out the approvals can increase or decrease the size. For - // example, in a length-prefixed encoding, putting them all in one may result - // in a smaller size if variable size integer encoding it used. In a format - // using separators without trailing separators (e.g. commas in JSON), - // spreading out will reduce the total number of bytes. - ((self.chainspec.deploy_config.block_max_approval_count as i64 + max_total_deploys - - 1) - / max_total_deploys) - .max(0) - + 1 - } - "max_accusations_per_block" => self.chainspec.core_config.validator_slots as i64, - // `RADIX` from EE. - "max_pointer_per_node" => 255, - // Endorsements are currently hard-disabled (via code). If ever re-enabled, this - // parameter should ideally be removed entirely. - "endorsements_enabled" => 0, - _ => return None, - }) - } -} - -/// Encoding helper function. -/// -/// Encodes a message in the same manner the network component would before sending it. -fn serialize_net_message(data: &T) -> Vec -where - T: Serialize, -{ - BincodeFormat::default() - .serialize_arbitrary(data) - .expect("did not expect serialization to fail") -} - -/// Creates a serialized specimen of the largest possible networking message. -pub(crate) fn generate_largest_message(chainspec: &Chainspec) -> Message { - let estimator = &NetworkMessageEstimator::new(chainspec); - let cache = &mut Cache::default(); - - Message::largest_specimen(estimator, cache) -} - -pub(crate) fn generate_largest_serialized_message(chainspec: &Chainspec) -> Vec { - serialize_net_message(&generate_largest_message(chainspec)) -} - -impl<'a> SizeEstimator for NetworkMessageEstimator<'a> { - fn estimate(&self, val: &T) -> usize { - serialize_net_message(&val).len() - } - - fn parameter>(&self, name: &'static str) -> T { - let value = self - .get_parameter(name) - .unwrap_or_else(|| panic!("missing parameter \"{}\" for specimen estimation", name)); - - T::try_from(value).unwrap_or_else(|_| { - panic!( - "Failed to convert the parameter `{name}` of value `{value}` to the type `{}`", - core::any::type_name::() - ) - }) - } + fn from_incoming(sender: NodeId, payload: P, ticket: Ticket) -> Self; } #[cfg(test)] // We use a variety of weird names in these tests. #[allow(non_camel_case_types)] mod tests { - use std::{net::SocketAddr, pin::Pin}; + use std::net::SocketAddr; - use assert_matches::assert_matches; - use bytes::BytesMut; use casper_types::ProtocolVersion; use serde::{de::DeserializeOwned, Deserialize, Serialize}; - use tokio_serde::{Deserializer, Serializer}; - use crate::{components::network::message_pack_format::MessagePackFormat, protocol}; + use crate::{components::network::handshake, protocol}; use super::*; @@ -700,22 +481,12 @@ mod tests { /// Serialize a message using the standard serialization method for handshakes. fn serialize_message(msg: &M) -> Vec { - let mut serializer = MessagePackFormat; - - Pin::new(&mut serializer) - .serialize(&msg) - .expect("handshake serialization failed") - .into_iter() - .collect() + handshake::serialize(msg).expect("handshake serialization failed") } /// Deserialize a message using the standard deserialization method for handshakes. fn deserialize_message(serialized: &[u8]) -> M { - let mut deserializer = MessagePackFormat; - - Pin::new(&mut deserializer) - .deserialize(&BytesMut::from(serialized)) - .expect("message deserialization failed") + handshake::deserialize(serialized).expect("message deserialization failed") } /// Given a message `from` of type `F`, serializes it, then deserializes it as `T`. @@ -766,7 +537,6 @@ mod tests { public_addr: ([12, 34, 56, 78], 12346).into(), protocol_version: ProtocolVersion::from_parts(5, 6, 7), consensus_certificate: Some(ConsensusCertificate::random(&mut rng)), - is_syncing: false, chainspec_hash: Some(Digest::hash("example-chainspec")), }; @@ -800,7 +570,6 @@ mod tests { public_addr, protocol_version, consensus_certificate, - is_syncing, chainspec_hash, } = modern_handshake { @@ -808,7 +577,6 @@ mod tests { assert_eq!(public_addr, ([12, 34, 56, 78], 12346).into()); assert_eq!(protocol_version, ProtocolVersion::V1_0_0); assert!(consensus_certificate.is_none()); - assert!(!is_syncing); assert!(chainspec_hash.is_none()) } else { panic!("did not expect modern handshake to deserialize to anything but") @@ -824,16 +592,13 @@ mod tests { public_addr, protocol_version, consensus_certificate, - is_syncing, chainspec_hash, } = modern_handshake { - assert!(!is_syncing); assert_eq!(network_name, "serialization-test"); assert_eq!(public_addr, ([12, 34, 56, 78], 12346).into()); assert_eq!(protocol_version, ProtocolVersion::V1_0_0); assert!(consensus_certificate.is_none()); - assert!(!is_syncing); assert!(chainspec_hash.is_none()) } else { panic!("did not expect modern handshake to deserialize to anything but") @@ -849,14 +614,12 @@ mod tests { public_addr, protocol_version, consensus_certificate, - is_syncing, chainspec_hash, } = modern_handshake { assert_eq!(network_name, "example-handshake"); assert_eq!(public_addr, ([12, 34, 56, 78], 12346).into()); assert_eq!(protocol_version, ProtocolVersion::from_parts(1, 4, 2)); - assert!(!is_syncing); let ConsensusCertificate { public_key, signature, @@ -877,7 +640,6 @@ mod tests { ) .unwrap() ); - assert!(!is_syncing); assert!(chainspec_hash.is_none()) } else { panic!("did not expect modern handshake to deserialize to anything but") @@ -893,11 +655,9 @@ mod tests { public_addr, protocol_version, consensus_certificate, - is_syncing, chainspec_hash, } = modern_handshake { - assert!(!is_syncing); assert_eq!(network_name, "example-handshake"); assert_eq!(public_addr, ([12, 34, 56, 78], 12346).into()); assert_eq!(protocol_version, ProtocolVersion::from_parts(1, 4, 3)); @@ -921,7 +681,6 @@ mod tests { ) .unwrap() ); - assert!(!is_syncing); assert!(chainspec_hash.is_none()) } else { panic!("did not expect modern handshake to deserialize to anything but") @@ -953,22 +712,10 @@ mod tests { } #[test] - fn assert_the_largest_specimen_type_and_size() { - let (chainspec, _) = crate::utils::Loadable::from_resources("production"); - let specimen = generate_largest_message(&chainspec); - - assert_matches!( - specimen, - Message::Payload(protocol::Message::GetResponse { .. }), - "the type of the largest possible network message based on the production chainspec has changed" - ); - - let serialized = serialize_net_message(&specimen); - - assert_eq!( - serialized.len(), - 8_388_736, - "the size of the largest possible network message based on the production chainspec has changed" - ); + fn channels_enum_does_not_have_holes() { + for idx in 0..Channel::COUNT { + let result = Channel::from_repr(idx as u8); + result.expect("must not have holes in channel enum"); + } } } diff --git a/node/src/components/network/message_pack_format.rs b/node/src/components/network/message_pack_format.rs deleted file mode 100644 index 27a9ee2457..0000000000 --- a/node/src/components/network/message_pack_format.rs +++ /dev/null @@ -1,47 +0,0 @@ -//! Message pack wire format encoder. -//! -//! This module is used to pin the correct version of message pack used throughout the codebase to -//! our network decoder via `Cargo.toml`; using `tokio_serde::MessagePack` would instead tie it -//! to the dependency specified in `tokio_serde`'s `Cargo.toml`. - -use std::{ - io::{self, Cursor}, - pin::Pin, -}; - -use bytes::{Bytes, BytesMut}; -use serde::{Deserialize, Serialize}; -use tokio_serde::{Deserializer, Serializer}; - -/// msgpack encoder/decoder for messages. -#[derive(Debug)] -pub struct MessagePackFormat; - -impl Serializer for MessagePackFormat -where - M: Serialize, -{ - // Note: We cast to `io::Error` because of the `Codec::Error: Into` - // requirement. - type Error = io::Error; - - #[inline] - fn serialize(self: Pin<&mut Self>, item: &M) -> Result { - rmp_serde::to_vec(item) - .map(Into::into) - .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err)) - } -} - -impl Deserializer for MessagePackFormat -where - for<'de> M: Deserialize<'de>, -{ - type Error = io::Error; - - #[inline] - fn deserialize(self: Pin<&mut Self>, src: &BytesMut) -> Result { - rmp_serde::from_read(Cursor::new(src)) - .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err)) - } -} diff --git a/node/src/components/network/metrics.rs b/node/src/components/network/metrics.rs index a407b6885a..f1bc427f09 100644 --- a/node/src/components/network/metrics.rs +++ b/node/src/components/network/metrics.rs @@ -1,408 +1,471 @@ -use std::sync::Weak; +use prometheus::{IntCounter, IntGauge, Opts, Registry}; -use prometheus::{Counter, IntCounter, IntGauge, Registry}; -use tracing::debug; +use crate::utils::registered_metric::{DeprecatedMetric, RegisteredMetric, RegistryExt}; -use super::{outgoing::OutgoingMetrics, MessageKind}; -use crate::unregister_metric; +use super::{Channel, PerChannel}; + +#[derive(Debug)] +pub(super) struct ChannelMetrics { + /// The number of requests made by this node on the given channel. + request_out_count: RegisteredMetric, + /// The total sum of payload bytes of requests made by this node on the given channel. + request_out_bytes: RegisteredMetric, + /// The number of responses sent by this node on the given channel. + response_in_count: RegisteredMetric, + /// The total sum of payload bytes of responses received by this node on the given channel. + response_in_bytes: RegisteredMetric, + /// The number of requests received by this node on the given channel. + request_in_count: RegisteredMetric, + /// The total sum of payload bytes of requests received by this node on the given channel. + request_in_bytes: RegisteredMetric, + /// The number of responses sent by this node on the given channel. + response_out_count: RegisteredMetric, + /// The total sum of payload bytes of responses sent by this node on the given channel. + response_out_bytes: RegisteredMetric, + /// The number of send failures. + pub(super) send_failures: RegisteredMetric, +} + +impl ChannelMetrics { + /// Constructs a new set of channel metrics for a given channel. + fn new(channel: Channel, registry: &Registry) -> Result { + let mk_opts = + |name, help| Opts::new(name, help).const_label("channel", channel.metrics_name()); + + let request_out_count = registry + .new_int_counter_opts(mk_opts("net_request_out_count", "number of requests sent"))?; + + let request_out_bytes = registry.new_int_counter_opts(mk_opts( + "net_request_out_bytes", + "payload total of requests sent", + ))?; + let response_in_count = registry.new_int_counter_opts(mk_opts( + "net_response_in_count", + "number of responses received", + ))?; + let response_in_bytes = registry.new_int_counter_opts(mk_opts( + "net_response_in_bytes", + "payload total of responses received", + ))?; + let request_in_count = registry.new_int_counter_opts(mk_opts( + "net_request_in_count", + "number of requests received", + ))?; + let request_in_bytes = registry.new_int_counter_opts(mk_opts( + "net_request_in_bytes", + "payload total of requests received", + ))?; + let response_out_count = registry.new_int_counter_opts(mk_opts( + "net_response_out_count", + "number of responses sent", + ))?; + let response_out_bytes = registry.new_int_counter_opts(mk_opts( + "net_response_out_bytes", + "payload total of responses sent", + ))?; + let send_failures = registry.new_int_counter_opts(mk_opts( + "net_send_failures", + "number of directly detected send failures", + ))?; + + Ok(Self { + request_out_count, + request_out_bytes, + response_in_count, + response_in_bytes, + request_in_count, + request_in_bytes, + response_out_count, + response_out_bytes, + send_failures, + }) + } + + /// Updates the channel metrics upon receiving an incoming request. + #[inline(always)] + pub(super) fn update_from_incoming_request(&self, payload_len: u64) { + self.request_in_count.inc(); + self.request_in_bytes.inc_by(payload_len); + } + + /// Updates the channel metrics upon having scheduled an outgoing request. + #[inline(always)] + pub(super) fn update_from_outgoing_request(&self, payload_len: u64) { + self.request_out_count.inc(); + self.request_out_bytes.inc_by(payload_len); + } + + /// Updates the channel metrics upon receiving a response to a request. + #[inline(always)] + pub(super) fn update_from_received_response(&self, payload_len: u64) { + self.response_in_count.inc(); + self.response_in_bytes.inc_by(payload_len); + } + + /// Updates the channel metrics upon having sent a response to an incoming request. + #[inline(always)] + pub(super) fn update_from_sent_response(&self, payload_len: u64) { + self.response_out_count.inc(); + self.response_out_bytes.inc_by(payload_len); + } +} /// Network-type agnostic networking metrics. #[derive(Debug)] +#[allow(dead_code)] // TODO: Remove this once deprecated metrics are removed. pub(super) struct Metrics { - /// How often a request was made by a component to broadcast. - pub(super) broadcast_requests: IntCounter, - /// How often a request to send a message directly to a peer was made. - pub(super) direct_message_requests: IntCounter, - /// Number of messages still waiting to be sent out (broadcast and direct). - pub(super) queued_messages: IntGauge, + /// Number of broadcasts attempted. + pub(super) broadcast_requests: RegisteredMetric, + /// Number of gossips sent. + pub(super) gossip_requests: RegisteredMetric, + /// Number of directly sent messages. + pub(super) direct_message_requests: RegisteredMetric, /// Number of connected peers. - pub(super) peers: IntGauge, - + pub(super) peers: RegisteredMetric, + /// How many additional messages have been buffered outside of the juliet stack. + pub(super) overflow_buffer_count: RegisteredMetric, + /// How many additional payload bytes have been buffered outside of the juliet stack. + pub(super) overflow_buffer_bytes: RegisteredMetric, + /// Per-channel metrics. + pub(super) channel_metrics: PerChannel, + + // *** Deprecated metrics below *** + /// Number of messages still waiting to be sent out (broadcast and direct). + pub(super) queued_messages: DeprecatedMetric, /// Count of outgoing messages that are protocol overhead. - pub(super) out_count_protocol: IntCounter, + pub(super) out_count_protocol: DeprecatedMetric, /// Count of outgoing messages with consensus payload. - pub(super) out_count_consensus: IntCounter, + pub(super) out_count_consensus: DeprecatedMetric, /// Count of outgoing messages with deploy gossiper payload. - pub(super) out_count_deploy_gossip: IntCounter, - pub(super) out_count_block_gossip: IntCounter, - pub(super) out_count_finality_signature_gossip: IntCounter, + pub(super) out_count_deploy_gossip: DeprecatedMetric, + pub(super) out_count_block_gossip: DeprecatedMetric, + pub(super) out_count_finality_signature_gossip: DeprecatedMetric, /// Count of outgoing messages with address gossiper payload. - pub(super) out_count_address_gossip: IntCounter, + pub(super) out_count_address_gossip: DeprecatedMetric, /// Count of outgoing messages with deploy request/response payload. - pub(super) out_count_deploy_transfer: IntCounter, + pub(super) out_count_deploy_transfer: DeprecatedMetric, /// Count of outgoing messages with block request/response payload. - pub(super) out_count_block_transfer: IntCounter, + pub(super) out_count_block_transfer: DeprecatedMetric, /// Count of outgoing messages with trie request/response payload. - pub(super) out_count_trie_transfer: IntCounter, + pub(super) out_count_trie_transfer: DeprecatedMetric, /// Count of outgoing messages with other payload. - pub(super) out_count_other: IntCounter, - + pub(super) out_count_other: DeprecatedMetric, /// Volume in bytes of outgoing messages that are protocol overhead. - pub(super) out_bytes_protocol: IntCounter, + pub(super) out_bytes_protocol: DeprecatedMetric, /// Volume in bytes of outgoing messages with consensus payload. - pub(super) out_bytes_consensus: IntCounter, + pub(super) out_bytes_consensus: DeprecatedMetric, /// Volume in bytes of outgoing messages with deploy gossiper payload. - pub(super) out_bytes_deploy_gossip: IntCounter, - pub(super) out_bytes_block_gossip: IntCounter, - pub(super) out_bytes_finality_signature_gossip: IntCounter, + pub(super) out_bytes_deploy_gossip: DeprecatedMetric, + /// Volume in bytes of outgoing messages with block gossiper payload. + pub(super) out_bytes_block_gossip: DeprecatedMetric, + /// Volume in bytes of outgoing messages with finality signature payload. + pub(super) out_bytes_finality_signature_gossip: DeprecatedMetric, /// Volume in bytes of outgoing messages with address gossiper payload. - pub(super) out_bytes_address_gossip: IntCounter, + pub(super) out_bytes_address_gossip: DeprecatedMetric, /// Volume in bytes of outgoing messages with deploy request/response payload. - pub(super) out_bytes_deploy_transfer: IntCounter, + pub(super) out_bytes_deploy_transfer: DeprecatedMetric, /// Volume in bytes of outgoing messages with block request/response payload. - pub(super) out_bytes_block_transfer: IntCounter, + pub(super) out_bytes_block_transfer: DeprecatedMetric, /// Volume in bytes of outgoing messages with block request/response payload. - pub(super) out_bytes_trie_transfer: IntCounter, + pub(super) out_bytes_trie_transfer: DeprecatedMetric, /// Volume in bytes of outgoing messages with other payload. - pub(super) out_bytes_other: IntCounter, - + pub(super) out_bytes_other: DeprecatedMetric, /// Number of outgoing connections in connecting state. - pub(super) out_state_connecting: IntGauge, + pub(super) out_state_connecting: DeprecatedMetric, /// Number of outgoing connections in waiting state. - pub(super) out_state_waiting: IntGauge, + pub(super) out_state_waiting: DeprecatedMetric, /// Number of outgoing connections in connected state. - pub(super) out_state_connected: IntGauge, + pub(super) out_state_connected: DeprecatedMetric, /// Number of outgoing connections in blocked state. - pub(super) out_state_blocked: IntGauge, + pub(super) out_state_blocked: DeprecatedMetric, /// Number of outgoing connections in loopback state. - pub(super) out_state_loopback: IntGauge, - + pub(super) out_state_loopback: DeprecatedMetric, /// Volume in bytes of incoming messages that are protocol overhead. - pub(super) in_bytes_protocol: IntCounter, + pub(super) in_bytes_protocol: DeprecatedMetric, /// Volume in bytes of incoming messages with consensus payload. - pub(super) in_bytes_consensus: IntCounter, + pub(super) in_bytes_consensus: DeprecatedMetric, /// Volume in bytes of incoming messages with deploy gossiper payload. - pub(super) in_bytes_deploy_gossip: IntCounter, - pub(super) in_bytes_block_gossip: IntCounter, - pub(super) in_bytes_finality_signature_gossip: IntCounter, + pub(super) in_bytes_deploy_gossip: DeprecatedMetric, + /// Volume in bytes of incoming messages with block gossiper payload. + pub(super) in_bytes_block_gossip: DeprecatedMetric, + /// Volume in bytes of incoming messages with finality signature gossiper payload. + pub(super) in_bytes_finality_signature_gossip: DeprecatedMetric, /// Volume in bytes of incoming messages with address gossiper payload. - pub(super) in_bytes_address_gossip: IntCounter, + pub(super) in_bytes_address_gossip: DeprecatedMetric, /// Volume in bytes of incoming messages with deploy request/response payload. - pub(super) in_bytes_deploy_transfer: IntCounter, + pub(super) in_bytes_deploy_transfer: DeprecatedMetric, /// Volume in bytes of incoming messages with block request/response payload. - pub(super) in_bytes_block_transfer: IntCounter, + pub(super) in_bytes_block_transfer: DeprecatedMetric, /// Volume in bytes of incoming messages with block request/response payload. - pub(super) in_bytes_trie_transfer: IntCounter, + pub(super) in_bytes_trie_transfer: DeprecatedMetric, /// Volume in bytes of incoming messages with other payload. - pub(super) in_bytes_other: IntCounter, - + pub(super) in_bytes_other: DeprecatedMetric, /// Count of incoming messages that are protocol overhead. - pub(super) in_count_protocol: IntCounter, + pub(super) in_count_protocol: DeprecatedMetric, /// Count of incoming messages with consensus payload. - pub(super) in_count_consensus: IntCounter, + pub(super) in_count_consensus: DeprecatedMetric, /// Count of incoming messages with deploy gossiper payload. - pub(super) in_count_deploy_gossip: IntCounter, - pub(super) in_count_block_gossip: IntCounter, - pub(super) in_count_finality_signature_gossip: IntCounter, + pub(super) in_count_deploy_gossip: DeprecatedMetric, + /// Count of incoming messages with block gossiper payload. + pub(super) in_count_block_gossip: DeprecatedMetric, + /// Count of incoming messages with finality signature gossiper payload. + pub(super) in_count_finality_signature_gossip: DeprecatedMetric, /// Count of incoming messages with address gossiper payload. - pub(super) in_count_address_gossip: IntCounter, + pub(super) in_count_address_gossip: DeprecatedMetric, /// Count of incoming messages with deploy request/response payload. - pub(super) in_count_deploy_transfer: IntCounter, + pub(super) in_count_deploy_transfer: DeprecatedMetric, /// Count of incoming messages with block request/response payload. - pub(super) in_count_block_transfer: IntCounter, + pub(super) in_count_block_transfer: DeprecatedMetric, /// Count of incoming messages with trie request/response payload. - pub(super) in_count_trie_transfer: IntCounter, + pub(super) in_count_trie_transfer: DeprecatedMetric, /// Count of incoming messages with other payload. - pub(super) in_count_other: IntCounter, - + pub(super) in_count_other: DeprecatedMetric, /// Number of trie requests accepted for processing. - pub(super) requests_for_trie_accepted: IntCounter, + pub(super) requests_for_trie_accepted: DeprecatedMetric, /// Number of trie requests finished (successful or unsuccessful). - pub(super) requests_for_trie_finished: IntCounter, - + pub(super) requests_for_trie_finished: DeprecatedMetric, /// Total time spent delaying outgoing traffic to non-validators due to limiter, in seconds. - pub(super) accumulated_outgoing_limiter_delay: Counter, - /// Total time spent delaying incoming traffic from non-validators due to limiter, in seconds. - pub(super) accumulated_incoming_limiter_delay: Counter, - - /// Registry instance. - registry: Registry, + pub(super) accumulated_outgoing_limiter_delay: DeprecatedMetric, } impl Metrics { /// Creates a new instance of networking metrics. pub(super) fn new(registry: &Registry) -> Result { let broadcast_requests = - IntCounter::new("net_broadcast_requests", "number of broadcasting requests")?; - let direct_message_requests = IntCounter::new( + registry.new_int_counter("net_broadcast_requests", "number of broadcasts attempted")?; + let gossip_requests = + registry.new_int_counter("net_gossip_requests", "number of gossips sent")?; + let direct_message_requests = registry.new_int_counter( "net_direct_message_requests", "number of requests to send a message directly to a peer", )?; - let queued_messages = IntGauge::new( + + let peers = registry.new_int_gauge("peers", "number of connected peers")?; + + let overflow_buffer_count = registry.new_int_gauge( + "net_overflow_buffer_count", + "count of outgoing messages buffered outside network stack", + )?; + let overflow_buffer_bytes = registry.new_int_gauge( + "net_overflow_buffer_bytes", + "payload byte sum of outgoing messages buffered outside network stack", + )?; + let channel_metrics = + PerChannel::try_init_with(|channel| ChannelMetrics::new(channel, registry))?; + + // *** Deprecated metrics below *** + let queued_messages = registry.new_deprecated( "net_queued_direct_messages", "number of messages waiting to be sent out", )?; - let peers = IntGauge::new("peers", "number of connected peers")?; - - let out_count_protocol = IntCounter::new( + let out_count_protocol = registry.new_deprecated( "net_out_count_protocol", "count of outgoing messages that are protocol overhead", )?; - let out_count_consensus = IntCounter::new( + let out_count_consensus = registry.new_deprecated( "net_out_count_consensus", "count of outgoing messages with consensus payload", )?; - let out_count_deploy_gossip = IntCounter::new( + let out_count_deploy_gossip = registry.new_deprecated( "net_out_count_deploy_gossip", "count of outgoing messages with deploy gossiper payload", )?; - let out_count_block_gossip = IntCounter::new( + let out_count_block_gossip = registry.new_deprecated( "net_out_count_block_gossip", "count of outgoing messages with block gossiper payload", )?; - let out_count_finality_signature_gossip = IntCounter::new( + let out_count_finality_signature_gossip = registry.new_deprecated( "net_out_count_finality_signature_gossip", "count of outgoing messages with finality signature gossiper payload", )?; - let out_count_address_gossip = IntCounter::new( + let out_count_address_gossip = registry.new_deprecated( "net_out_count_address_gossip", "count of outgoing messages with address gossiper payload", )?; - let out_count_deploy_transfer = IntCounter::new( + let out_count_deploy_transfer = registry.new_deprecated( "net_out_count_deploy_transfer", "count of outgoing messages with deploy request/response payload", )?; - let out_count_block_transfer = IntCounter::new( + let out_count_block_transfer = registry.new_deprecated( "net_out_count_block_transfer", "count of outgoing messages with block request/response payload", )?; - let out_count_trie_transfer = IntCounter::new( + let out_count_trie_transfer = registry.new_deprecated( "net_out_count_trie_transfer", "count of outgoing messages with trie payloads", )?; - let out_count_other = IntCounter::new( + let out_count_other = registry.new_deprecated( "net_out_count_other", "count of outgoing messages with other payload", )?; - let out_bytes_protocol = IntCounter::new( + let out_bytes_protocol = registry.new_deprecated( "net_out_bytes_protocol", "volume in bytes of outgoing messages that are protocol overhead", )?; - let out_bytes_consensus = IntCounter::new( + let out_bytes_consensus = registry.new_deprecated( "net_out_bytes_consensus", "volume in bytes of outgoing messages with consensus payload", )?; - let out_bytes_deploy_gossip = IntCounter::new( + let out_bytes_deploy_gossip = registry.new_deprecated( "net_out_bytes_deploy_gossip", "volume in bytes of outgoing messages with deploy gossiper payload", )?; - let out_bytes_block_gossip = IntCounter::new( + let out_bytes_block_gossip = registry.new_deprecated( "net_out_bytes_block_gossip", "volume in bytes of outgoing messages with block gossiper payload", )?; - let out_bytes_finality_signature_gossip = IntCounter::new( + let out_bytes_finality_signature_gossip = registry.new_deprecated( "net_out_bytes_finality_signature_gossip", "volume in bytes of outgoing messages with finality signature gossiper payload", )?; - let out_bytes_address_gossip = IntCounter::new( + let out_bytes_address_gossip = registry.new_deprecated( "net_out_bytes_address_gossip", "volume in bytes of outgoing messages with address gossiper payload", )?; - let out_bytes_deploy_transfer = IntCounter::new( + let out_bytes_deploy_transfer = registry.new_deprecated( "net_out_bytes_deploy_transfer", "volume in bytes of outgoing messages with deploy request/response payload", )?; - let out_bytes_block_transfer = IntCounter::new( + let out_bytes_block_transfer = registry.new_deprecated( "net_out_bytes_block_transfer", "volume in bytes of outgoing messages with block request/response payload", )?; - let out_bytes_trie_transfer = IntCounter::new( + let out_bytes_trie_transfer = registry.new_deprecated( "net_out_bytes_trie_transfer", "volume in bytes of outgoing messages with trie payloads", )?; - let out_bytes_other = IntCounter::new( + let out_bytes_other = registry.new_deprecated( "net_out_bytes_other", "volume in bytes of outgoing messages with other payload", )?; - let out_state_connecting = IntGauge::new( + let out_state_connecting = registry.new_deprecated( "out_state_connecting", "number of connections in the connecting state", )?; - let out_state_waiting = IntGauge::new( + let out_state_waiting = registry.new_deprecated( "out_state_waiting", "number of connections in the waiting state", )?; - let out_state_connected = IntGauge::new( + let out_state_connected = registry.new_deprecated( "out_state_connected", "number of connections in the connected state", )?; - let out_state_blocked = IntGauge::new( + let out_state_blocked = registry.new_deprecated( "out_state_blocked", "number of connections in the blocked state", )?; - let out_state_loopback = IntGauge::new( + let out_state_loopback = registry.new_deprecated( "out_state_loopback", "number of connections in the loopback state", )?; - let in_count_protocol = IntCounter::new( + let in_count_protocol = registry.new_deprecated( "net_in_count_protocol", "count of incoming messages that are protocol overhead", )?; - let in_count_consensus = IntCounter::new( + let in_count_consensus = registry.new_deprecated( "net_in_count_consensus", "count of incoming messages with consensus payload", )?; - let in_count_deploy_gossip = IntCounter::new( + let in_count_deploy_gossip = registry.new_deprecated( "net_in_count_deploy_gossip", "count of incoming messages with deploy gossiper payload", )?; - let in_count_block_gossip = IntCounter::new( + let in_count_block_gossip = registry.new_deprecated( "net_in_count_block_gossip", "count of incoming messages with block gossiper payload", )?; - let in_count_finality_signature_gossip = IntCounter::new( + let in_count_finality_signature_gossip = registry.new_deprecated( "net_in_count_finality_signature_gossip", "count of incoming messages with finality signature gossiper payload", )?; - let in_count_address_gossip = IntCounter::new( + let in_count_address_gossip = registry.new_deprecated( "net_in_count_address_gossip", "count of incoming messages with address gossiper payload", )?; - let in_count_deploy_transfer = IntCounter::new( + let in_count_deploy_transfer = registry.new_deprecated( "net_in_count_deploy_transfer", "count of incoming messages with deploy request/response payload", )?; - let in_count_block_transfer = IntCounter::new( + let in_count_block_transfer = registry.new_deprecated( "net_in_count_block_transfer", "count of incoming messages with block request/response payload", )?; - let in_count_trie_transfer = IntCounter::new( + let in_count_trie_transfer = registry.new_deprecated( "net_in_count_trie_transfer", "count of incoming messages with trie payloads", )?; - let in_count_other = IntCounter::new( + let in_count_other = registry.new_deprecated( "net_in_count_other", "count of incoming messages with other payload", )?; - let in_bytes_protocol = IntCounter::new( + let in_bytes_protocol = registry.new_deprecated( "net_in_bytes_protocol", "volume in bytes of incoming messages that are protocol overhead", )?; - let in_bytes_consensus = IntCounter::new( + let in_bytes_consensus = registry.new_deprecated( "net_in_bytes_consensus", "volume in bytes of incoming messages with consensus payload", )?; - let in_bytes_deploy_gossip = IntCounter::new( + let in_bytes_deploy_gossip = registry.new_deprecated( "net_in_bytes_deploy_gossip", "volume in bytes of incoming messages with deploy gossiper payload", )?; - let in_bytes_block_gossip = IntCounter::new( + let in_bytes_block_gossip = registry.new_deprecated( "net_in_bytes_block_gossip", "volume in bytes of incoming messages with block gossiper payload", )?; - let in_bytes_finality_signature_gossip = IntCounter::new( + let in_bytes_finality_signature_gossip = registry.new_deprecated( "net_in_bytes_finality_signature_gossip", "volume in bytes of incoming messages with finality signature gossiper payload", )?; - let in_bytes_address_gossip = IntCounter::new( + let in_bytes_address_gossip = registry.new_deprecated( "net_in_bytes_address_gossip", "volume in bytes of incoming messages with address gossiper payload", )?; - let in_bytes_deploy_transfer = IntCounter::new( + let in_bytes_deploy_transfer = registry.new_deprecated( "net_in_bytes_deploy_transfer", "volume in bytes of incoming messages with deploy request/response payload", )?; - let in_bytes_block_transfer = IntCounter::new( + let in_bytes_block_transfer = registry.new_deprecated( "net_in_bytes_block_transfer", "volume in bytes of incoming messages with block request/response payload", )?; - let in_bytes_trie_transfer = IntCounter::new( + let in_bytes_trie_transfer = registry.new_deprecated( "net_in_bytes_trie_transfer", "volume in bytes of incoming messages with trie payloads", )?; - let in_bytes_other = IntCounter::new( + let in_bytes_other = registry.new_deprecated( "net_in_bytes_other", "volume in bytes of incoming messages with other payload", )?; - let requests_for_trie_accepted = IntCounter::new( + let requests_for_trie_accepted = registry.new_deprecated( "requests_for_trie_accepted", "number of trie requests accepted for processing", )?; - let requests_for_trie_finished = IntCounter::new( + let requests_for_trie_finished = registry.new_deprecated( "requests_for_trie_finished", "number of trie requests finished, successful or not", )?; - let accumulated_outgoing_limiter_delay = Counter::new( + let accumulated_outgoing_limiter_delay = registry.new_deprecated( "accumulated_outgoing_limiter_delay", "seconds spent delaying outgoing traffic to non-validators due to limiter, in seconds", )?; - let accumulated_incoming_limiter_delay = Counter::new( - "accumulated_incoming_limiter_delay", - "seconds spent delaying incoming traffic from non-validators due to limiter, in seconds." - )?; - - registry.register(Box::new(broadcast_requests.clone()))?; - registry.register(Box::new(direct_message_requests.clone()))?; - registry.register(Box::new(queued_messages.clone()))?; - registry.register(Box::new(peers.clone()))?; - - registry.register(Box::new(out_count_protocol.clone()))?; - registry.register(Box::new(out_count_consensus.clone()))?; - registry.register(Box::new(out_count_deploy_gossip.clone()))?; - registry.register(Box::new(out_count_block_gossip.clone()))?; - registry.register(Box::new(out_count_finality_signature_gossip.clone()))?; - registry.register(Box::new(out_count_address_gossip.clone()))?; - registry.register(Box::new(out_count_deploy_transfer.clone()))?; - registry.register(Box::new(out_count_block_transfer.clone()))?; - registry.register(Box::new(out_count_trie_transfer.clone()))?; - registry.register(Box::new(out_count_other.clone()))?; - - registry.register(Box::new(out_bytes_protocol.clone()))?; - registry.register(Box::new(out_bytes_consensus.clone()))?; - registry.register(Box::new(out_bytes_deploy_gossip.clone()))?; - registry.register(Box::new(out_bytes_block_gossip.clone()))?; - registry.register(Box::new(out_bytes_finality_signature_gossip.clone()))?; - registry.register(Box::new(out_bytes_address_gossip.clone()))?; - registry.register(Box::new(out_bytes_deploy_transfer.clone()))?; - registry.register(Box::new(out_bytes_block_transfer.clone()))?; - registry.register(Box::new(out_bytes_trie_transfer.clone()))?; - registry.register(Box::new(out_bytes_other.clone()))?; - - registry.register(Box::new(out_state_connecting.clone()))?; - registry.register(Box::new(out_state_waiting.clone()))?; - registry.register(Box::new(out_state_connected.clone()))?; - registry.register(Box::new(out_state_blocked.clone()))?; - registry.register(Box::new(out_state_loopback.clone()))?; - - registry.register(Box::new(in_count_protocol.clone()))?; - registry.register(Box::new(in_count_consensus.clone()))?; - registry.register(Box::new(in_count_deploy_gossip.clone()))?; - registry.register(Box::new(in_count_block_gossip.clone()))?; - registry.register(Box::new(in_count_finality_signature_gossip.clone()))?; - registry.register(Box::new(in_count_address_gossip.clone()))?; - registry.register(Box::new(in_count_deploy_transfer.clone()))?; - registry.register(Box::new(in_count_block_transfer.clone()))?; - registry.register(Box::new(in_count_trie_transfer.clone()))?; - registry.register(Box::new(in_count_other.clone()))?; - - registry.register(Box::new(in_bytes_protocol.clone()))?; - registry.register(Box::new(in_bytes_consensus.clone()))?; - registry.register(Box::new(in_bytes_deploy_gossip.clone()))?; - registry.register(Box::new(in_bytes_block_gossip.clone()))?; - registry.register(Box::new(in_bytes_finality_signature_gossip.clone()))?; - registry.register(Box::new(in_bytes_address_gossip.clone()))?; - registry.register(Box::new(in_bytes_deploy_transfer.clone()))?; - registry.register(Box::new(in_bytes_block_transfer.clone()))?; - registry.register(Box::new(in_bytes_trie_transfer.clone()))?; - registry.register(Box::new(in_bytes_other.clone()))?; - - registry.register(Box::new(requests_for_trie_accepted.clone()))?; - registry.register(Box::new(requests_for_trie_finished.clone()))?; - - registry.register(Box::new(accumulated_outgoing_limiter_delay.clone()))?; - registry.register(Box::new(accumulated_incoming_limiter_delay.clone()))?; Ok(Metrics { broadcast_requests, + gossip_requests, direct_message_requests, - queued_messages, + overflow_buffer_count, + overflow_buffer_bytes, peers, + channel_metrics, + queued_messages, out_count_protocol, out_count_consensus, out_count_deploy_gossip, @@ -451,202 +514,6 @@ impl Metrics { requests_for_trie_accepted, requests_for_trie_finished, accumulated_outgoing_limiter_delay, - accumulated_incoming_limiter_delay, - registry: registry.clone(), }) } - - /// Records an outgoing payload. - pub(crate) fn record_payload_out(this: &Weak, kind: MessageKind, size: u64) { - if let Some(metrics) = this.upgrade() { - match kind { - MessageKind::Protocol => { - metrics.out_bytes_protocol.inc_by(size); - metrics.out_count_protocol.inc(); - } - MessageKind::Consensus => { - metrics.out_bytes_consensus.inc_by(size); - metrics.out_count_consensus.inc(); - } - MessageKind::DeployGossip => { - metrics.out_bytes_deploy_gossip.inc_by(size); - metrics.out_count_deploy_gossip.inc(); - } - MessageKind::BlockGossip => { - metrics.out_bytes_block_gossip.inc_by(size); - metrics.out_count_block_gossip.inc() - } - MessageKind::FinalitySignatureGossip => { - metrics.out_bytes_finality_signature_gossip.inc_by(size); - metrics.out_count_finality_signature_gossip.inc() - } - MessageKind::AddressGossip => { - metrics.out_bytes_address_gossip.inc_by(size); - metrics.out_count_address_gossip.inc(); - } - MessageKind::DeployTransfer => { - metrics.out_bytes_deploy_transfer.inc_by(size); - metrics.out_count_deploy_transfer.inc(); - } - MessageKind::BlockTransfer => { - metrics.out_bytes_block_transfer.inc_by(size); - metrics.out_count_block_transfer.inc(); - } - MessageKind::TrieTransfer => { - metrics.out_bytes_trie_transfer.inc_by(size); - metrics.out_count_trie_transfer.inc(); - } - MessageKind::Other => { - metrics.out_bytes_other.inc_by(size); - metrics.out_count_other.inc(); - } - } - } else { - debug!("not recording metrics, component already shut down"); - } - } - - /// Records an incoming payload. - pub(crate) fn record_payload_in(this: &Weak, kind: MessageKind, size: u64) { - if let Some(metrics) = this.upgrade() { - match kind { - MessageKind::Protocol => { - metrics.in_bytes_protocol.inc_by(size); - metrics.in_count_protocol.inc(); - } - MessageKind::Consensus => { - metrics.in_bytes_consensus.inc_by(size); - metrics.in_count_consensus.inc(); - } - MessageKind::DeployGossip => { - metrics.in_bytes_deploy_gossip.inc_by(size); - metrics.in_count_deploy_gossip.inc(); - } - MessageKind::BlockGossip => { - metrics.in_bytes_block_gossip.inc_by(size); - metrics.in_count_block_gossip.inc(); - } - MessageKind::FinalitySignatureGossip => { - metrics.in_bytes_finality_signature_gossip.inc_by(size); - metrics.in_count_finality_signature_gossip.inc(); - } - MessageKind::AddressGossip => { - metrics.in_bytes_address_gossip.inc_by(size); - metrics.in_count_address_gossip.inc(); - } - MessageKind::DeployTransfer => { - metrics.in_bytes_deploy_transfer.inc_by(size); - metrics.in_count_deploy_transfer.inc(); - } - MessageKind::BlockTransfer => { - metrics.in_bytes_block_transfer.inc_by(size); - metrics.in_count_block_transfer.inc(); - } - MessageKind::TrieTransfer => { - metrics.in_bytes_trie_transfer.inc_by(size); - metrics.in_count_trie_transfer.inc(); - } - MessageKind::Other => { - metrics.in_bytes_other.inc_by(size); - metrics.in_count_other.inc(); - } - } - } else { - debug!("not recording metrics, component already shut down"); - } - } - - /// Creates a set of outgoing metrics that is connected to this set of metrics. - pub(super) fn create_outgoing_metrics(&self) -> OutgoingMetrics { - OutgoingMetrics { - out_state_connecting: self.out_state_connecting.clone(), - out_state_waiting: self.out_state_waiting.clone(), - out_state_connected: self.out_state_connected.clone(), - out_state_blocked: self.out_state_blocked.clone(), - out_state_loopback: self.out_state_loopback.clone(), - } - } - - /// Records that a trie request has been started. - pub(super) fn record_trie_request_start(this: &Weak) { - if let Some(metrics) = this.upgrade() { - metrics.requests_for_trie_accepted.inc(); - } else { - debug!("not recording metrics, component already shut down"); - } - } - - /// Records that a trie request has ended. - pub(super) fn record_trie_request_end(this: &Weak) { - if let Some(metrics) = this.upgrade() { - metrics.requests_for_trie_finished.inc(); - } else { - debug!("not recording metrics, component already shut down"); - } - } -} - -impl Drop for Metrics { - fn drop(&mut self) { - unregister_metric!(self.registry, self.broadcast_requests); - unregister_metric!(self.registry, self.direct_message_requests); - unregister_metric!(self.registry, self.queued_messages); - unregister_metric!(self.registry, self.peers); - - unregister_metric!(self.registry, self.out_count_protocol); - unregister_metric!(self.registry, self.out_count_consensus); - unregister_metric!(self.registry, self.out_count_deploy_gossip); - unregister_metric!(self.registry, self.out_count_block_gossip); - unregister_metric!(self.registry, self.out_count_finality_signature_gossip); - unregister_metric!(self.registry, self.out_count_address_gossip); - unregister_metric!(self.registry, self.out_count_deploy_transfer); - unregister_metric!(self.registry, self.out_count_block_transfer); - unregister_metric!(self.registry, self.out_count_trie_transfer); - unregister_metric!(self.registry, self.out_count_other); - - unregister_metric!(self.registry, self.out_bytes_protocol); - unregister_metric!(self.registry, self.out_bytes_consensus); - unregister_metric!(self.registry, self.out_bytes_deploy_gossip); - unregister_metric!(self.registry, self.out_bytes_block_gossip); - unregister_metric!(self.registry, self.out_bytes_finality_signature_gossip); - unregister_metric!(self.registry, self.out_bytes_address_gossip); - unregister_metric!(self.registry, self.out_bytes_deploy_transfer); - unregister_metric!(self.registry, self.out_bytes_block_transfer); - unregister_metric!(self.registry, self.out_bytes_trie_transfer); - unregister_metric!(self.registry, self.out_bytes_other); - - unregister_metric!(self.registry, self.out_state_connecting); - unregister_metric!(self.registry, self.out_state_waiting); - unregister_metric!(self.registry, self.out_state_connected); - unregister_metric!(self.registry, self.out_state_blocked); - unregister_metric!(self.registry, self.out_state_loopback); - - unregister_metric!(self.registry, self.in_count_protocol); - unregister_metric!(self.registry, self.in_count_consensus); - unregister_metric!(self.registry, self.in_count_deploy_gossip); - unregister_metric!(self.registry, self.in_count_block_gossip); - unregister_metric!(self.registry, self.in_count_finality_signature_gossip); - unregister_metric!(self.registry, self.in_count_address_gossip); - unregister_metric!(self.registry, self.in_count_deploy_transfer); - unregister_metric!(self.registry, self.in_count_block_transfer); - unregister_metric!(self.registry, self.in_count_trie_transfer); - unregister_metric!(self.registry, self.in_count_other); - - unregister_metric!(self.registry, self.in_bytes_protocol); - unregister_metric!(self.registry, self.in_bytes_consensus); - unregister_metric!(self.registry, self.in_bytes_deploy_gossip); - unregister_metric!(self.registry, self.in_bytes_block_gossip); - unregister_metric!(self.registry, self.in_bytes_finality_signature_gossip); - unregister_metric!(self.registry, self.in_bytes_address_gossip); - unregister_metric!(self.registry, self.in_bytes_deploy_transfer); - unregister_metric!(self.registry, self.in_bytes_block_transfer); - unregister_metric!(self.registry, self.in_bytes_trie_transfer); - unregister_metric!(self.registry, self.in_bytes_other); - - unregister_metric!(self.registry, self.requests_for_trie_accepted); - unregister_metric!(self.registry, self.requests_for_trie_finished); - - unregister_metric!(self.registry, self.accumulated_outgoing_limiter_delay); - unregister_metric!(self.registry, self.accumulated_incoming_limiter_delay); - } } diff --git a/node/src/components/network/outgoing.rs b/node/src/components/network/outgoing.rs deleted file mode 100644 index 871a968a48..0000000000 --- a/node/src/components/network/outgoing.rs +++ /dev/null @@ -1,1828 +0,0 @@ -//! Management of outgoing connections. -//! -//! This module implements outgoing connection management, decoupled from the underlying transport -//! or any higher-level level parts. It encapsulates the reconnection and blocklisting logic on the -//! `SocketAddr` level. -//! -//! # Basic structure -//! -//! Core of this module is the `OutgoingManager`, which supports the following functionality: -//! -//! * Handed a `SocketAddr`s via the `learn_addr` function, it will permanently maintain a -//! connection to the given address, only giving up if retry thresholds are exceeded, after which -//! it will be forgotten. -//! * `block_addr` and `redeem_addr` can be used to maintain a `SocketAddr`-keyed block list. -//! * `OutgoingManager` maintains an internal routing table. The `get_route` function can be used to -//! retrieve a "route" (typically a `sync::channel` accepting network messages) to a remote peer -//! by `NodeId`. -//! -//! # Requirements -//! -//! `OutgoingManager` is decoupled from the underlying protocol, all of its interactions are -//! performed through [`DialRequest`] and [`DialOutcome`]s. This frees the `OutgoingManager` from -//! having to worry about protocol specifics. -//! -//! Three conditions not expressed in code must be fulfilled for the `OutgoingManager` to function: -//! -//! * The `Dialer` is expected to produce `DialOutcomes` for every dial [`DialRequest::Dial`] -//! eventually. These must be forwarded to the `OutgoingManager` via the `handle_dial_outcome` -//! function. -//! * The `perform_housekeeping` method must be called periodically to give the `OutgoingManager` a -//! chance to initiate reconnections and collect garbage. -//! * When a connection is dropped, the connection manager must be notified via -//! `handle_connection_drop`. -//! -//! # Lifecycle -//! -//! The following chart illustrates the lifecycle of an outgoing connection. -//! -//! ```text -//! forget (after n tries) -//! ┌────────────────────────────────────┐ -//! │ learn ▼ -//! │ ┌────────────── unknown/forgotten -//! │ │ (implicit state) -//! │ │ -//! │ │ │ -//! │ │ │ block -//! │ │ │ -//! │ │ │ -//! │ │ ▼ -//! ┌────┴────┐ │ ┌─────────┐ -//! │ │ fail │ block │ │ -//! │ Waiting │◄───────┐ │ ┌─────►│ Blocked │◄──────────┐ -//! ┌───┤ │ │ │ │ │ │ │ -//! │ └────┬────┘ │ │ │ └────┬────┘ │ -//! │ block │ │ │ │ │ │ -//! │ │ timeout │ ▼ │ │ redeem, │ -//! │ │ ┌────┴─────┴───┐ │ block timeout │ -//! │ │ │ │ │ │ -//! │ └───────►│ Connecting │◄──────┘ │ -//! │ │ │ │ -//! │ └─────┬────┬───┘ │ -//! │ │ ▲ │ │ -//! │ success │ │ │ detect │ -//! │ │ │ │ ┌──────────┐ │ -//! │ ┌───────────┐ │ │ │ │ │ │ -//! │ │ │◄────────┘ │ │ │ Loopback │ │ -//! │ │ Connected │ │ └─────►│ │ │ -//! │ │ │ dropped/ │ └──────────┘ │ -//! │ └─────┬─────┴───────────┘ │ -//! │ │ timeout │ -//! │ │ block │ -//! └───────┴─────────────────────────────────────────────────┘ -//! ``` -//! -//! # Timeouts/safety -//! -//! The `sweep` transition for connections usually does not happen during normal operations. Three -//! causes are typical for it: -//! -//! * A configured TCP timeout above [`OutgoingConfig::sweep_timeout`]. -//! * Very slow responses from remote peers (similar to a Slowloris-attack) -//! * Faulty handling by the driver of the [`OutgoingManager`], i.e. the outside component. -//! -//! Should a dial attempt exceed a certain timeout, it is considered failed and put into the waiting -//! state again. -//! -//! If a conflict (multiple successful dial results) occurs, the more recent connection takes -//! precedence over the previous one. This prevents problems when a notification of a terminated -//! connection is overtaken by the new connection announcement. - -// Clippy has a lot of false positives due to `span.clone()`-closures. -#![allow(clippy::redundant_clone)] - -use std::{ - collections::{hash_map::Entry, HashMap}, - error::Error, - fmt::{self, Debug, Display, Formatter}, - mem, - net::SocketAddr, - time::{Duration, Instant}, -}; - -use datasize::DataSize; - -use prometheus::IntGauge; -use rand::Rng; -use tracing::{debug, error, error_span, field::Empty, info, trace, warn, Span}; - -use super::{ - blocklist::BlocklistJustification, - display_error, - health::{ConnectionHealth, HealthCheckOutcome, HealthConfig, Nonce, TaggedTimestamp}, - NodeId, -}; - -/// An outgoing connection/address in various states. -#[derive(DataSize, Debug)] -pub struct Outgoing -where - H: DataSize, - E: DataSize, -{ - /// Whether or not the address is unforgettable, see `learn_addr` for details. - pub(super) is_unforgettable: bool, - /// The current state the connection/address is in. - pub(super) state: OutgoingState, -} - -/// Active state for a connection/address. -#[derive(DataSize, Debug)] -pub(crate) enum OutgoingState -where - H: DataSize, - E: DataSize, -{ - /// The outgoing address has been known for the first time and we are currently connecting. - Connecting { - /// Number of attempts that failed, so far. - failures_so_far: u8, - /// Time when the connection attempt was instantiated. - since: Instant, - }, - /// The connection has failed at least one connection attempt and is waiting for a retry. - Waiting { - /// Number of attempts that failed, so far. - failures_so_far: u8, - /// The most recent connection error. - /// - /// If not given, the connection was put into a `Waiting` state due to a sweep timeout. - error: Option, - /// The precise moment when the last connection attempt failed. - last_failure: Instant, - }, - /// An established outgoing connection. - Connected { - /// The peers remote ID. - peer_id: NodeId, - /// Handle to a communication channel that can be used to send data to the peer. - /// - /// Can be a channel to decouple sending, or even a direct connection handle. - handle: H, - /// Health of the connection. - health: ConnectionHealth, - }, - /// The address was blocked and will not be retried. - Blocked { - /// Since when the block took effect. - since: Instant, - /// The justification given for blocking. - justification: BlocklistJustification, - }, - /// The address is owned by ourselves and will not be tried again. - Loopback, -} - -impl Display for OutgoingState -where - H: DataSize, - E: DataSize, -{ - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - match self { - OutgoingState::Connecting { - failures_so_far, .. - } => { - write!(f, "connecting({})", failures_so_far) - } - OutgoingState::Waiting { - failures_so_far, .. - } => write!(f, "waiting({})", failures_so_far), - OutgoingState::Connected { .. } => write!(f, "connected"), - OutgoingState::Blocked { .. } => write!(f, "blocked"), - OutgoingState::Loopback => write!(f, "loopback"), - } - } -} - -/// The result of dialing `SocketAddr`. -#[derive(Debug)] -pub enum DialOutcome { - /// A connection was successfully established. - Successful { - /// The address dialed. - addr: SocketAddr, - /// A handle to send data down the connection. - handle: H, - /// The remote peer's authenticated node ID. - node_id: NodeId, - /// The moment the connection was established. - when: Instant, - }, - /// The connection attempt failed. - Failed { - /// The address dialed. - addr: SocketAddr, - /// The error encountered while dialing. - error: E, - /// The moment the connection attempt failed. - when: Instant, - }, - /// The connection was aborted, because the remote peer turned out to be a loopback. - Loopback { - /// The address used to connect. - addr: SocketAddr, - }, -} - -impl DialOutcome { - /// Retrieves the socket address from the `DialOutcome`. - fn addr(&self) -> SocketAddr { - match self { - DialOutcome::Successful { addr, .. } => *addr, - DialOutcome::Failed { addr, .. } => *addr, - DialOutcome::Loopback { addr, .. } => *addr, - } - } -} - -/// A request made for dialing. -#[derive(Clone, Debug)] -#[must_use] -pub(crate) enum DialRequest { - /// Attempt to connect to the outgoing socket address. - /// - /// For every time this request is emitted, there must be a corresponding call to - /// `handle_dial_outcome` eventually. - /// - /// Any logging of connection issues should be done in the context of `span` for better log - /// output. - Dial { addr: SocketAddr, span: Span }, - - /// Disconnects a potentially existing connection. - /// - /// Used when a peer has been blocked or should be disconnected for other reasons. Note that - /// this request can immediately be followed by a connection request, as in the case of a ping - /// timeout. - Disconnect { handle: H, span: Span }, - - /// Send a ping to a peer. - SendPing { - peer_id: NodeId, - nonce: Nonce, - span: Span, - }, -} - -impl Display for DialRequest -where - H: Display, -{ - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - match self { - DialRequest::Dial { addr, .. } => { - write!(f, "dial: {}", addr) - } - DialRequest::Disconnect { handle, .. } => { - write!(f, "disconnect: {}", handle) - } - DialRequest::SendPing { peer_id, nonce, .. } => { - write!(f, "ping[{}]: {}", nonce, peer_id) - } - } - } -} - -#[derive(DataSize, Debug)] -/// Connection settings for the outgoing connection manager. -pub struct OutgoingConfig { - /// The maximum number of attempts before giving up and forgetting an address, if permitted. - pub(crate) retry_attempts: u8, - /// The basic time slot for exponential backoff when reconnecting. - pub(crate) base_timeout: Duration, - /// Time until an outgoing address is unblocked. - pub(crate) unblock_after: Duration, - /// Safety timeout, after which a connection is no longer expected to finish dialing. - pub(crate) sweep_timeout: Duration, - /// Health check configuration. - pub(crate) health: HealthConfig, -} - -impl OutgoingConfig { - /// Calculates the backoff time. - /// - /// `failed_attempts` (n) is the number of previous attempts *before* the current failure (thus - /// starting at 0). The backoff time will be double for each attempt. - fn calc_backoff(&self, failed_attempts: u8) -> Duration { - 2u32.pow(failed_attempts as u32) * self.base_timeout - } -} - -/// Manager of outbound connections. -/// -/// See the module documentation for usage suggestions. -#[derive(DataSize, Debug)] -pub struct OutgoingManager -where - H: DataSize, - E: DataSize, -{ - /// Outgoing connections subsystem configuration. - config: OutgoingConfig, - /// Mapping of address to their current connection state. - pub(super) outgoing: HashMap>, - /// Routing table. - /// - /// Contains a mapping from node IDs to connected socket addresses. A missing entry means that - /// the destination is not connected. - routes: HashMap, - /// A set of outgoing metrics. - #[data_size(skip)] - metrics: OutgoingMetrics, -} - -/// A set of metrics used by the outgoing component. -#[derive(Clone, Debug)] -pub(super) struct OutgoingMetrics { - /// Number of outgoing connections in connecting state. - pub(super) out_state_connecting: IntGauge, - /// Number of outgoing connections in waiting state. - pub(super) out_state_waiting: IntGauge, - /// Number of outgoing connections in connected state. - pub(super) out_state_connected: IntGauge, - /// Number of outgoing connections in blocked state. - pub(super) out_state_blocked: IntGauge, - /// Number of outgoing connections in loopback state. - pub(super) out_state_loopback: IntGauge, -} - -// Note: We only implement `Default` here for use in testing with `OutgoingManager::new`. -#[cfg(test)] -impl Default for OutgoingMetrics { - fn default() -> Self { - Self { - out_state_connecting: IntGauge::new( - "out_state_connecting", - "internal out_state_connecting", - ) - .unwrap(), - out_state_waiting: IntGauge::new("out_state_waiting", "internal out_state_waiting") - .unwrap(), - out_state_connected: IntGauge::new( - "out_state_connected", - "internal out_state_connected", - ) - .unwrap(), - out_state_blocked: IntGauge::new("out_state_blocked", "internal out_state_blocked") - .unwrap(), - out_state_loopback: IntGauge::new("out_state_loopback", "internal loopback").unwrap(), - } - } -} - -impl OutgoingManager -where - H: DataSize, - E: DataSize, -{ - /// Creates a new outgoing manager with a set of metrics that is not connected to any registry. - #[cfg(test)] - #[inline] - pub(super) fn new(config: OutgoingConfig) -> Self { - Self::with_metrics(config, Default::default()) - } - - /// Creates a new outgoing manager with an already existing set of metrics. - pub(super) fn with_metrics(config: OutgoingConfig, metrics: OutgoingMetrics) -> Self { - Self { - config, - outgoing: Default::default(), - routes: Default::default(), - metrics, - } - } - - /// Returns a reference to the internal metrics. - #[cfg(test)] - fn metrics(&self) -> &OutgoingMetrics { - &self.metrics - } -} - -/// Creates a logging span for a specific connection. -#[inline] -fn make_span(addr: SocketAddr, outgoing: Option<&Outgoing>) -> Span -where - H: DataSize, - E: DataSize, -{ - // Note: The jury is still out on whether we want to create a single span per connection and - // cache it, or create a new one (with the same connection ID) each time this is called. The - // advantage of the former is external tools have it easier correlating all related - // information, while the drawback is not being able to change the parent span link, which - // might be awkward. - - if let Some(outgoing) = outgoing { - match outgoing.state { - OutgoingState::Connected { peer_id, .. } => { - error_span!("outgoing", %addr, state=%outgoing.state, %peer_id, consensus_key=Empty) - } - _ => { - error_span!("outgoing", %addr, state=%outgoing.state, peer_id=Empty, consensus_key=Empty) - } - } - } else { - error_span!("outgoing", %addr, state = "-") - } -} - -impl OutgoingManager -where - H: DataSize + Clone, - E: DataSize + Error, -{ - /// Changes the state of an outgoing connection. - /// - /// Will trigger an update of the routing table if necessary. Does not emit any other - /// side-effects. - /// - /// Returns the new state, as well as any residual handle. - fn change_outgoing_state( - &mut self, - addr: SocketAddr, - mut new_state: OutgoingState, - ) -> (&mut Outgoing, Option) { - let (prev_state, new_outgoing) = match self.outgoing.entry(addr) { - Entry::Vacant(vacant) => { - let inserted = vacant.insert(Outgoing { - state: new_state, - is_unforgettable: false, - }); - - (None, inserted) - } - - Entry::Occupied(occupied) => { - let prev = occupied.into_mut(); - - mem::swap(&mut prev.state, &mut new_state); - - // `new_state` and `prev.state` are swapped now. - (Some(new_state), prev) - } - }; - - // Update the routing table. - match (&prev_state, &new_outgoing.state) { - (Some(OutgoingState::Connected { .. }), OutgoingState::Connected { .. }) => { - trace!("route unchanged, already connected"); - } - - // Dropping from connected to any other state requires clearing the route. - (Some(OutgoingState::Connected { peer_id, .. }), _) => { - debug!(%peer_id, "route removed"); - self.routes.remove(peer_id); - } - - // Otherwise we have established a new route. - (_, OutgoingState::Connected { peer_id, .. }) => { - debug!(%peer_id, "route added"); - self.routes.insert(*peer_id, addr); - } - - _ => { - trace!("route unchanged"); - } - } - - // Update the metrics, decreasing the count of the state that was left, while increasing - // the new state. Note that this will lead to a non-atomic dec/inc if the previous state - // was the same as before. - match prev_state { - Some(OutgoingState::Blocked { .. }) => self.metrics.out_state_blocked.dec(), - Some(OutgoingState::Connected { .. }) => self.metrics.out_state_connected.dec(), - Some(OutgoingState::Connecting { .. }) => self.metrics.out_state_connecting.dec(), - Some(OutgoingState::Loopback) => self.metrics.out_state_loopback.dec(), - Some(OutgoingState::Waiting { .. }) => self.metrics.out_state_waiting.dec(), - None => { - // Nothing to do, there was no previous state. - } - } - - match new_outgoing.state { - OutgoingState::Blocked { .. } => self.metrics.out_state_blocked.inc(), - OutgoingState::Connected { .. } => self.metrics.out_state_connected.inc(), - OutgoingState::Connecting { .. } => self.metrics.out_state_connecting.inc(), - OutgoingState::Loopback => self.metrics.out_state_loopback.inc(), - OutgoingState::Waiting { .. } => self.metrics.out_state_waiting.inc(), - } - - // Finally, deconstruct the previous state in case we need to preserve the handle. - let handle = if let Some(OutgoingState::Connected { handle, .. }) = prev_state { - Some(handle) - } else { - None - }; - - (new_outgoing, handle) - } - - /// Retrieves the address by peer. - pub(crate) fn get_addr(&self, peer_id: NodeId) -> Option { - self.routes.get(&peer_id).copied() - } - - /// Retrieves a handle to a peer. - /// - /// Primary function to send data to peers; clients retrieve a handle to it which can then - /// be used to send data. - pub(crate) fn get_route(&self, peer_id: NodeId) -> Option<&H> { - let outgoing = self.outgoing.get(self.routes.get(&peer_id)?)?; - - if let OutgoingState::Connected { ref handle, .. } = outgoing.state { - Some(handle) - } else { - None - } - } - - /// Iterates over all connected peer IDs. - pub(crate) fn connected_peers(&'_ self) -> impl Iterator + '_ { - self.routes.keys().cloned() - } - - /// Notify about a potentially new address that has been discovered. - /// - /// Immediately triggers the connection process to said address if it was not known before. - /// - /// A connection marked `unforgettable` will never be evicted but reset instead when it exceeds - /// the retry limit. - pub(crate) fn learn_addr( - &mut self, - addr: SocketAddr, - unforgettable: bool, - now: Instant, - ) -> Option> { - let span = make_span(addr, self.outgoing.get(&addr)); - span.clone() - .in_scope(move || match self.outgoing.entry(addr) { - Entry::Occupied(_) => { - trace!("ignoring already known address"); - None - } - Entry::Vacant(_vacant) => { - info!("connecting to newly learned address"); - let (outgoing, _) = self.change_outgoing_state( - addr, - OutgoingState::Connecting { - failures_so_far: 0, - since: now, - }, - ); - if outgoing.is_unforgettable != unforgettable { - outgoing.is_unforgettable = unforgettable; - debug!(unforgettable, "marked"); - } - Some(DialRequest::Dial { addr, span }) - } - }) - } - - /// Blocks an address. - /// - /// Causes any current connection to the address to be terminated and future ones prohibited. - pub(crate) fn block_addr( - &mut self, - addr: SocketAddr, - now: Instant, - justification: BlocklistJustification, - ) -> Option> { - let span = make_span(addr, self.outgoing.get(&addr)); - - span.clone() - .in_scope(move || match self.outgoing.entry(addr) { - Entry::Vacant(_vacant) => { - info!("unknown address blocked"); - self.change_outgoing_state( - addr, - OutgoingState::Blocked { - since: now, - justification, - }, - ); - None - } - // TODO: Check what happens on close on our end, i.e. can we distinguish in logs - // between a closed connection on our end vs one that failed? - Entry::Occupied(occupied) => match occupied.get().state { - OutgoingState::Blocked { .. } => { - debug!("address already blocked"); - None - } - OutgoingState::Loopback => { - warn!("loopback address block ignored"); - None - } - OutgoingState::Connected { ref handle, .. } => { - info!("connected address blocked, disconnecting"); - let handle = handle.clone(); - self.change_outgoing_state( - addr, - OutgoingState::Blocked { - since: now, - justification, - }, - ); - Some(DialRequest::Disconnect { span, handle }) - } - OutgoingState::Waiting { .. } | OutgoingState::Connecting { .. } => { - info!("address blocked"); - self.change_outgoing_state( - addr, - OutgoingState::Blocked { - since: now, - justification, - }, - ); - None - } - }, - }) - } - - /// Checks if an address is blocked. - #[cfg(test)] - pub(crate) fn is_blocked(&self, addr: SocketAddr) -> bool { - match self.outgoing.get(&addr) { - Some(outgoing) => matches!(outgoing.state, OutgoingState::Blocked { .. }), - None => false, - } - } - - /// Removes an address from the block list. - /// - /// Does nothing if the address was not blocked. - // This function is currently not in use by `network` itself. - #[allow(dead_code)] - pub(crate) fn redeem_addr(&mut self, addr: SocketAddr, now: Instant) -> Option> { - let span = make_span(addr, self.outgoing.get(&addr)); - span.clone() - .in_scope(move || match self.outgoing.entry(addr) { - Entry::Vacant(_) => { - debug!("unknown address redeemed"); - None - } - Entry::Occupied(occupied) => match occupied.get().state { - OutgoingState::Blocked { .. } => { - self.change_outgoing_state( - addr, - OutgoingState::Connecting { - failures_so_far: 0, - since: now, - }, - ); - Some(DialRequest::Dial { addr, span }) - } - _ => { - debug!("address redemption ignored, not blocked"); - None - } - }, - }) - } - - /// Records a pong being received. - pub(super) fn record_pong(&mut self, peer_id: NodeId, pong: TaggedTimestamp) -> bool { - let addr = if let Some(addr) = self.routes.get(&peer_id) { - *addr - } else { - debug!(%peer_id, nonce=%pong.nonce(), "ignoring pong received from peer without route"); - return false; - }; - - if let Some(outgoing) = self.outgoing.get_mut(&addr) { - if let OutgoingState::Connected { ref mut health, .. } = outgoing.state { - health.record_pong(&self.config.health, pong) - } else { - debug!(%peer_id, nonce=%pong.nonce(), "ignoring pong received from peer that is not in connected state"); - false - } - } else { - debug!(%peer_id, nonce=%pong.nonce(), "ignoring pong received from peer without route"); - false - } - } - - /// Performs housekeeping like reconnection or unblocking peers. - /// - /// This function must periodically be called. A good interval is every second. - pub(super) fn perform_housekeeping( - &mut self, - rng: &mut R, - now: Instant, - ) -> Vec> { - let mut to_forget = Vec::new(); - let mut to_fail = Vec::new(); - let mut to_ping_timeout = Vec::new(); - let mut to_reconnect = Vec::new(); - let mut to_ping = Vec::new(); - - for (&addr, outgoing) in self.outgoing.iter_mut() { - // Note: `Span::in_scope` is no longer serviceable here due to borrow limitations. - let _span_guard = make_span(addr, Some(outgoing)).entered(); - - match outgoing.state { - // Decide whether to attempt reconnecting a failed-waiting address. - OutgoingState::Waiting { - failures_so_far, - last_failure, - .. - } => { - if failures_so_far > self.config.retry_attempts { - if outgoing.is_unforgettable { - // Unforgettable addresses simply have their timer reset. - info!("unforgettable address reset"); - - to_reconnect.push((addr, 0)); - } else { - // Address had too many attempts at reconnection, we will forget - // it after exiting this closure. - to_forget.push(addr); - - info!("address forgotten"); - } - } else { - // The address has not exceeded the limit, so check if it is due. - let due = last_failure + self.config.calc_backoff(failures_so_far); - if now >= due { - debug!(attempts = failures_so_far, "address reconnecting"); - - to_reconnect.push((addr, failures_so_far)); - } - } - } - - OutgoingState::Blocked { since, .. } => { - if now >= since + self.config.unblock_after { - info!("address unblocked"); - - to_reconnect.push((addr, 0)); - } - } - - OutgoingState::Connecting { - since, - failures_so_far, - } => { - let timeout = since + self.config.sweep_timeout; - if now >= timeout { - // The outer component has not called us with a `DialOutcome` in a - // reasonable amount of time. This should happen very rarely, ideally - // never. - warn!("address timed out connecting, was swept"); - - // Count the timeout as a failure against the connection. - to_fail.push((addr, failures_so_far + 1)); - } - } - OutgoingState::Connected { - peer_id, - ref mut health, - .. - } => { - // Check if we need to send a ping, or give up and disconnect. - let health_outcome = health.update_health(rng, &self.config.health, now); - - match health_outcome { - HealthCheckOutcome::DoNothing => { - // Nothing to do. - } - HealthCheckOutcome::SendPing(nonce) => { - trace!(%nonce, "sending ping"); - to_ping.push((peer_id, addr, nonce)); - } - HealthCheckOutcome::GiveUp => { - info!("disconnecting after ping retries were exhausted"); - to_ping_timeout.push(addr); - } - } - } - OutgoingState::Loopback => { - // Entry is ignored. Not outputting any `trace` because this is log spam even at - // the `trace` level. - } - } - } - - // Remove all addresses marked for forgetting. - to_forget.into_iter().for_each(|addr| { - self.outgoing.remove(&addr); - }); - - // Fail connections that are taking way too long to connect. - to_fail.into_iter().for_each(|(addr, failures_so_far)| { - let span = make_span(addr, self.outgoing.get(&addr)); - - span.in_scope(|| { - self.change_outgoing_state( - addr, - OutgoingState::Waiting { - failures_so_far, - error: None, - last_failure: now, - }, - ) - }); - }); - - let mut dial_requests = Vec::new(); - - // Request disconnection from failed pings. - for addr in to_ping_timeout { - let span = make_span(addr, self.outgoing.get(&addr)); - - let (_, opt_handle) = span.clone().in_scope(|| { - self.change_outgoing_state( - addr, - OutgoingState::Connecting { - failures_so_far: 0, - since: now, - }, - ) - }); - - if let Some(handle) = opt_handle { - dial_requests.push(DialRequest::Disconnect { - handle, - span: span.clone(), - }); - } else { - error!("did not expect connection under ping timeout to not have a residual connection handle. this is a bug"); - } - dial_requests.push(DialRequest::Dial { addr, span }); - } - - // Reconnect others. - dial_requests.extend(to_reconnect.into_iter().map(|(addr, failures_so_far)| { - let span = make_span(addr, self.outgoing.get(&addr)); - - span.clone().in_scope(|| { - self.change_outgoing_state( - addr, - OutgoingState::Connecting { - failures_so_far, - since: now, - }, - ) - }); - - DialRequest::Dial { addr, span } - })); - - // Finally, schedule pings. - dial_requests.extend(to_ping.into_iter().map(|(peer_id, addr, nonce)| { - let span = make_span(addr, self.outgoing.get(&addr)); - DialRequest::SendPing { - peer_id, - nonce, - span, - } - })); - - dial_requests - } - - /// Handles the outcome of a dialing attempt. - /// - /// Note that reconnects will earliest happen on the next `perform_housekeeping` call. - pub(crate) fn handle_dial_outcome( - &mut self, - dial_outcome: DialOutcome, - ) -> Option> { - let addr = dial_outcome.addr(); - let span = make_span(addr, self.outgoing.get(&addr)); - - span.clone().in_scope(move || match dial_outcome { - DialOutcome::Successful { - addr, - handle, - node_id, - when - } => { - info!("established outgoing connection"); - - if let Some(Outgoing{ - state: OutgoingState::Blocked { .. }, .. - }) = self.outgoing.get(&addr) { - // If we connected to a blocked address, do not go into connected, but stay - // blocked instead. - Some(DialRequest::Disconnect{ - handle, span - }) - } else { - // Otherwise, just record the connected state. - self.change_outgoing_state( - addr, - OutgoingState::Connected { - peer_id: node_id, - handle, - health: ConnectionHealth::new(when), - }, - ); - None - } - } - - DialOutcome::Failed { addr, error, when } => { - info!(err = display_error(&error), "outgoing connection failed"); - - if let Some(outgoing) = self.outgoing.get(&addr) { - match outgoing.state { - OutgoingState::Connecting { failures_so_far,.. } => { - self.change_outgoing_state( - addr, - OutgoingState::Waiting { - failures_so_far: failures_so_far + 1, - error: Some(error), - last_failure: when, - }, - ); - None - } - OutgoingState::Blocked { .. } => { - debug!("failed dial outcome after block ignored"); - - // We do not set the connection to "waiting" if an out-of-order failed - // connection arrives, but continue to honor the blocking. - None - } - OutgoingState::Waiting { .. } | - OutgoingState::Connected { .. } | - OutgoingState::Loopback => { - warn!( - "processing dial outcome on a connection that was not marked as connecting or blocked" - ); - - None - } - } - } else { - warn!("processing dial outcome non-existent connection"); - - // If the connection does not exist, do not introduce it! - None - } - } - DialOutcome::Loopback { addr } => { - info!("found loopback address"); - self.change_outgoing_state(addr, OutgoingState::Loopback); - None - } - }) - } - - /// Notifies the connection manager about a dropped connection. - /// - /// This will usually result in an immediate reconnection. - pub(crate) fn handle_connection_drop( - &mut self, - addr: SocketAddr, - now: Instant, - ) -> Option> { - let span = make_span(addr, self.outgoing.get(&addr)); - - span.clone().in_scope(move || { - if let Some(outgoing) = self.outgoing.get(&addr) { - match outgoing.state { - OutgoingState::Waiting { .. } - | OutgoingState::Loopback - | OutgoingState::Connecting { .. } => { - // We should, under normal circumstances, not receive drop notifications for - // any of these. Connection failures are handled by the dialer. - warn!("unexpected drop notification"); - None - } - OutgoingState::Connected { .. } => { - // Drop the handle, immediately initiate a reconnection. - self.change_outgoing_state( - addr, - OutgoingState::Connecting { - failures_so_far: 0, - since: now, - }, - ); - Some(DialRequest::Dial { addr, span }) - } - OutgoingState::Blocked { .. } => { - // Blocked addresses ignore connection drops. - debug!("received drop notification for blocked connection"); - None - } - } - } else { - warn!("received connection drop notification for unknown connection"); - None - } - }) - } -} - -#[cfg(test)] -mod tests { - use std::{net::SocketAddr, time::Duration}; - - use assert_matches::assert_matches; - use datasize::DataSize; - use rand::Rng; - use thiserror::Error; - - use super::{DialOutcome, DialRequest, NodeId, OutgoingConfig, OutgoingManager}; - use crate::{ - components::network::{ - blocklist::BlocklistJustification, - health::{HealthConfig, TaggedTimestamp}, - }, - testing::{init_logging, test_clock::TestClock}, - }; - - /// Error for test dialer. - /// - /// Tracks a configurable id for the error. - #[derive(DataSize, Debug, Error)] - #[error("test dialer error({})", id)] - struct TestDialerError { - id: u32, - } - - /// Setup an outgoing configuration for testing. - fn test_config() -> OutgoingConfig { - OutgoingConfig { - retry_attempts: 3, - base_timeout: Duration::from_secs(1), - unblock_after: Duration::from_secs(60), - sweep_timeout: Duration::from_secs(45), - health: HealthConfig::test_config(), - } - } - - /// Helper function that checks if a given dial request actually dials the expected address. - fn dials<'a, H, T>(expected: SocketAddr, requests: T) -> bool - where - T: IntoIterator> + 'a, - H: 'a, - { - for req in requests.into_iter() { - if let DialRequest::Dial { addr, .. } = req { - if *addr == expected { - return true; - } - } - } - - false - } - - /// Helper function that checks if a given dial request actually disconnects the expected - /// address. - fn disconnects<'a, H, T>(expected: H, requests: T) -> bool - where - T: IntoIterator> + 'a, - H: 'a + PartialEq, - { - for req in requests.into_iter() { - if let DialRequest::Disconnect { handle, .. } = req { - if *handle == expected { - return true; - } - } - } - - false - } - - #[test] - fn successful_lifecycle() { - init_logging(); - - let mut rng = crate::new_rng(); - let mut clock = TestClock::new(); - - let addr_a: SocketAddr = "1.2.3.4:1234".parse().unwrap(); - let id_a = NodeId::random(&mut rng); - - let mut manager = OutgoingManager::::new(test_config()); - - // We begin by learning a single, regular address, triggering a dial request. - assert!(dials( - addr_a, - &manager.learn_addr(addr_a, false, clock.now()) - )); - assert_eq!(manager.metrics().out_state_connecting.get(), 1); - - // Our first connection attempt fails. The connection should now be in waiting state, but - // not reconnect, since the minimum delay is 2 seconds (2*base_timeout). - assert!(manager - .handle_dial_outcome(DialOutcome::Failed { - addr: addr_a, - error: TestDialerError { id: 1 }, - when: clock.now(), - },) - .is_none()); - assert_eq!(manager.metrics().out_state_connecting.get(), 0); - assert_eq!(manager.metrics().out_state_waiting.get(), 1); - - // Performing housekeeping multiple times should not make a difference. - assert!(manager - .perform_housekeeping(&mut rng, clock.now()) - .is_empty()); - assert!(manager - .perform_housekeeping(&mut rng, clock.now()) - .is_empty()); - assert!(manager - .perform_housekeeping(&mut rng, clock.now()) - .is_empty()); - assert!(manager - .perform_housekeeping(&mut rng, clock.now()) - .is_empty()); - - // Advancing the clock will trigger a reconnection on the next housekeeping. - clock.advance_time(2_000); - assert!(dials( - addr_a, - &manager.perform_housekeeping(&mut rng, clock.now()) - )); - assert_eq!(manager.metrics().out_state_connecting.get(), 1); - assert_eq!(manager.metrics().out_state_waiting.get(), 0); - - // This time the connection succeeds. - assert!(manager - .handle_dial_outcome(DialOutcome::Successful { - addr: addr_a, - handle: 99, - node_id: id_a, - when: clock.now(), - },) - .is_none()); - assert_eq!(manager.metrics().out_state_connecting.get(), 0); - assert_eq!(manager.metrics().out_state_connected.get(), 1); - - // The routing table should have been updated and should return the handle. - assert_eq!(manager.get_route(id_a), Some(&99)); - assert_eq!(manager.get_addr(id_a), Some(addr_a)); - - // Time passes, and our connection drops. Reconnecting should be immediate. - assert!(manager - .perform_housekeeping(&mut rng, clock.now()) - .is_empty()); - clock.advance_time(20_000); - assert!(dials( - addr_a, - &manager.handle_connection_drop(addr_a, clock.now()) - )); - assert_eq!(manager.metrics().out_state_connecting.get(), 1); - assert_eq!(manager.metrics().out_state_waiting.get(), 0); - - // The route should have been cleared. - assert!(manager.get_route(id_a).is_none()); - assert!(manager.get_addr(id_a).is_none()); - - // Reconnection is already in progress, so we do not expect another request on housekeeping. - assert!(manager - .perform_housekeeping(&mut rng, clock.now()) - .is_empty()); - } - - #[test] - fn connections_forgotten_after_too_many_tries() { - init_logging(); - - let mut rng = crate::new_rng(); - let mut clock = TestClock::new(); - - let addr_a: SocketAddr = "1.2.3.4:1234".parse().unwrap(); - // Address `addr_b` will be a known address. - let addr_b: SocketAddr = "5.6.7.8:5678".parse().unwrap(); - - let mut manager = OutgoingManager::::new(test_config()); - - // First, attempt to connect. Tests are set to 3 retries after 2, 4 and 8 seconds. - assert!(dials( - addr_a, - &manager.learn_addr(addr_a, false, clock.now()) - )); - assert!(dials( - addr_b, - &manager.learn_addr(addr_b, true, clock.now()) - )); - - // Fail the first connection attempts, not triggering a retry (timeout not reached yet). - assert!(manager - .handle_dial_outcome(DialOutcome::Failed { - addr: addr_a, - error: TestDialerError { id: 10 }, - when: clock.now(), - },) - .is_none()); - assert!(manager - .handle_dial_outcome(DialOutcome::Failed { - addr: addr_b, - error: TestDialerError { id: 11 }, - when: clock.now(), - },) - .is_none()); - - // Learning the address again should not cause a reconnection. - assert!(manager.learn_addr(addr_a, false, clock.now()).is_none()); - assert!(manager.learn_addr(addr_b, false, clock.now()).is_none()); - - assert!(manager - .perform_housekeeping(&mut rng, clock.now()) - .is_empty()); - assert!(manager.learn_addr(addr_a, false, clock.now()).is_none()); - assert!(manager.learn_addr(addr_b, false, clock.now()).is_none()); - - // After 1.999 seconds, reconnection should still be delayed. - clock.advance_time(1_999); - assert!(manager - .perform_housekeeping(&mut rng, clock.now()) - .is_empty()); - - // Adding 0.001 seconds finally is enough to reconnect. - clock.advance_time(1); - let requests = manager.perform_housekeeping(&mut rng, clock.now()); - assert!(dials(addr_a, &requests)); - assert!(dials(addr_b, &requests)); - - // Waiting for more than the reconnection delay should not be harmful or change - // anything, as we are currently connecting. - clock.advance_time(6_000); - - assert!(manager - .perform_housekeeping(&mut rng, clock.now()) - .is_empty()); - - // Fail the connection again, wait 3.999 seconds, expecting no reconnection. - assert!(manager - .handle_dial_outcome(DialOutcome::Failed { - addr: addr_a, - error: TestDialerError { id: 40 }, - when: clock.now(), - },) - .is_none()); - assert!(manager - .handle_dial_outcome(DialOutcome::Failed { - addr: addr_b, - error: TestDialerError { id: 41 }, - when: clock.now(), - },) - .is_none()); - - clock.advance_time(3_999); - assert!(manager - .perform_housekeeping(&mut rng, clock.now()) - .is_empty()); - - // Adding 0.001 seconds finally again pushes us over the threshold. - clock.advance_time(1); - let requests = manager.perform_housekeeping(&mut rng, clock.now()); - assert!(dials(addr_a, &requests)); - assert!(dials(addr_b, &requests)); - - // Fail the connection quickly. - clock.advance_time(25); - assert!(manager - .handle_dial_outcome(DialOutcome::Failed { - addr: addr_a, - error: TestDialerError { id: 10 }, - when: clock.now(), - },) - .is_none()); - assert!(manager - .handle_dial_outcome(DialOutcome::Failed { - addr: addr_b, - error: TestDialerError { id: 10 }, - when: clock.now(), - },) - .is_none()); - assert!(manager - .perform_housekeeping(&mut rng, clock.now()) - .is_empty()); - - // The last attempt should happen 8 seconds after the error, not the last attempt. - clock.advance_time(7_999); - assert!(manager - .perform_housekeeping(&mut rng, clock.now()) - .is_empty()); - - clock.advance_time(1); - let requests = manager.perform_housekeeping(&mut rng, clock.now()); - assert!(dials(addr_a, &requests)); - assert!(dials(addr_b, &requests)); - - // Fail the last attempt. No more reconnections should be happening. - assert!(manager - .handle_dial_outcome(DialOutcome::Failed { - addr: addr_a, - error: TestDialerError { id: 10 }, - when: clock.now(), - },) - .is_none()); - assert!(manager - .handle_dial_outcome(DialOutcome::Failed { - addr: addr_b, - error: TestDialerError { id: 10 }, - when: clock.now(), - },) - .is_none()); - - // Only the unforgettable address should be reconnecting. - let requests = manager.perform_housekeeping(&mut rng, clock.now()); - assert!(!dials(addr_a, &requests)); - assert!(dials(addr_b, &requests)); - - // But not `addr_a`, even after a long wait. - clock.advance_time(1_000_000_000); - assert!(manager - .perform_housekeeping(&mut rng, clock.now()) - .is_empty()); - } - - #[test] - fn blocking_works() { - init_logging(); - - let mut rng = crate::new_rng(); - let mut clock = TestClock::new(); - - let addr_a: SocketAddr = "1.2.3.4:1234".parse().unwrap(); - // We use `addr_b` as an unforgettable address, which does not mean it cannot be blocked! - let addr_b: SocketAddr = "5.6.7.8:5678".parse().unwrap(); - let addr_c: SocketAddr = "9.0.1.2:9012".parse().unwrap(); - let id_a = NodeId::random(&mut rng); - let id_b = NodeId::random(&mut rng); - let id_c = NodeId::random(&mut rng); - - let mut manager = OutgoingManager::::new(test_config()); - - // Block `addr_a` from the start. - assert!(manager - .block_addr( - addr_a, - clock.now(), - BlocklistJustification::MissingChainspecHash - ) - .is_none()); - - // Learning both `addr_a` and `addr_b` should only trigger a connection to `addr_b` now. - assert!(manager.learn_addr(addr_a, false, clock.now()).is_none()); - assert!(dials( - addr_b, - &manager.learn_addr(addr_b, true, clock.now()) - )); - - assert!(manager - .perform_housekeeping(&mut rng, clock.now()) - .is_empty()); - - // Fifteen seconds later we succeed in connecting to `addr_b`. - clock.advance_time(15_000); - assert!(manager - .handle_dial_outcome(DialOutcome::Successful { - addr: addr_b, - handle: 101, - node_id: id_b, - when: clock.now(), - },) - .is_none()); - assert_eq!(manager.get_route(id_b), Some(&101)); - - // Invariant through housekeeping. - assert!(manager - .perform_housekeeping(&mut rng, clock.now()) - .is_empty()); - - assert_eq!(manager.get_route(id_b), Some(&101)); - - // Another fifteen seconds later, we block `addr_b`. - clock.advance_time(15_000); - assert!(disconnects( - 101, - &manager.block_addr( - addr_b, - clock.now(), - BlocklistJustification::MissingChainspecHash - ) - )); - - // `addr_c` will be blocked during the connection phase. - assert!(dials( - addr_c, - &manager.learn_addr(addr_c, false, clock.now()) - )); - assert!(manager - .block_addr( - addr_c, - clock.now(), - BlocklistJustification::MissingChainspecHash - ) - .is_none()); - - // We are still expect to provide a dial outcome, but afterwards, there should be no - // route to C and an immediate disconnection should be queued. - assert!(disconnects( - 42, - &manager.handle_dial_outcome(DialOutcome::Successful { - addr: addr_c, - handle: 42, - node_id: id_c, - when: clock.now(), - },) - )); - - assert!(manager - .perform_housekeeping(&mut rng, clock.now()) - .is_empty()); - - assert!(manager.get_route(id_c).is_none()); - - // At this point, we have blocked all three addresses. 30 seconds later, the first one is - // unblocked due to the block timing out. - - clock.advance_time(30_000); - assert!(dials( - addr_a, - &manager.perform_housekeeping(&mut rng, clock.now()) - )); - - // Fifteen seconds later, B and C are still blocked, but we redeem B early. - clock.advance_time(15_000); - assert!(manager - .perform_housekeeping(&mut rng, clock.now()) - .is_empty()); - - assert!(dials(addr_b, &manager.redeem_addr(addr_b, clock.now()))); - - // Succeed both connections, and ensure we have routes to both. - assert!(manager - .handle_dial_outcome(DialOutcome::Successful { - addr: addr_b, - handle: 77, - node_id: id_b, - when: clock.now(), - },) - .is_none()); - assert!(manager - .handle_dial_outcome(DialOutcome::Successful { - addr: addr_a, - handle: 66, - node_id: id_a, - when: clock.now(), - },) - .is_none()); - - assert_eq!(manager.get_route(id_a), Some(&66)); - assert_eq!(manager.get_route(id_b), Some(&77)); - } - - #[test] - fn loopback_handled_correctly() { - init_logging(); - - let mut rng = crate::new_rng(); - let mut clock = TestClock::new(); - - let loopback_addr: SocketAddr = "1.2.3.4:1234".parse().unwrap(); - - let mut manager = OutgoingManager::::new(test_config()); - - // Loopback addresses are connected to only once, and then marked as loopback forever. - assert!(dials( - loopback_addr, - &manager.learn_addr(loopback_addr, false, clock.now()) - )); - - assert!(manager - .handle_dial_outcome(DialOutcome::Loopback { - addr: loopback_addr, - },) - .is_none()); - - assert!(manager - .perform_housekeeping(&mut rng, clock.now()) - .is_empty()); - - // Learning loopbacks again should not trigger another connection - assert!(manager - .learn_addr(loopback_addr, false, clock.now()) - .is_none()); - - // Blocking loopbacks does not result in a block, since regular blocks would clear after - // some time. - assert!(manager - .block_addr( - loopback_addr, - clock.now(), - BlocklistJustification::MissingChainspecHash - ) - .is_none()); - - clock.advance_time(1_000_000_000); - - assert!(manager - .perform_housekeeping(&mut rng, clock.now()) - .is_empty()); - } - - #[test] - fn connected_peers_works() { - init_logging(); - - let mut rng = crate::new_rng(); - let clock = TestClock::new(); - - let addr_a: SocketAddr = "1.2.3.4:1234".parse().unwrap(); - let addr_b: SocketAddr = "5.6.7.8:5678".parse().unwrap(); - - let id_a = NodeId::random(&mut rng); - let id_b = NodeId::random(&mut rng); - - let mut manager = OutgoingManager::::new(test_config()); - - manager.learn_addr(addr_a, false, clock.now()); - manager.learn_addr(addr_b, true, clock.now()); - - manager.handle_dial_outcome(DialOutcome::Successful { - addr: addr_a, - handle: 22, - node_id: id_a, - when: clock.now(), - }); - manager.handle_dial_outcome(DialOutcome::Successful { - addr: addr_b, - handle: 33, - node_id: id_b, - when: clock.now(), - }); - - let mut peer_ids: Vec<_> = manager.connected_peers().collect(); - let mut expected = vec![id_a, id_b]; - - peer_ids.sort(); - expected.sort(); - - assert_eq!(peer_ids, expected); - } - - #[test] - fn sweeping_works() { - init_logging(); - - let mut rng = crate::new_rng(); - let mut clock = TestClock::new(); - - let addr_a: SocketAddr = "1.2.3.4:1234".parse().unwrap(); - - let id_a = NodeId::random(&mut rng); - - let mut manager = OutgoingManager::::new(test_config()); - - // Trigger a new connection via learning an address. - assert!(dials( - addr_a, - &manager.learn_addr(addr_a, false, clock.now()) - )); - - // We now let enough time pass to cause the connection to be considered failed aborted. - // No effects are expected at this point. - clock.advance_time(50_000); - assert!(manager - .perform_housekeeping(&mut rng, clock.now()) - .is_empty()); - - // The connection will now experience a regular failure. Since this is the first connection - // failure, it should reconnect after 2 seconds. - clock.advance_time(2_000); - assert!(dials( - addr_a, - &manager.perform_housekeeping(&mut rng, clock.now()) - )); - - // We now simulate the second connection (`handle: 2`) succeeding first, after 1 second. - clock.advance_time(1_000); - assert!(manager - .handle_dial_outcome(DialOutcome::Successful { - addr: addr_a, - handle: 2, - node_id: id_a, - when: clock.now(), - }) - .is_none()); - - // A route should now be established. - assert_eq!(manager.get_route(id_a), Some(&2)); - - // More time passes and the first connection attempt finally finishes. - clock.advance_time(30_000); - assert!(manager - .handle_dial_outcome(DialOutcome::Successful { - addr: addr_a, - handle: 1, - node_id: id_a, - when: clock.now(), - }) - .is_none()); - - // We now expect to be connected through the first connection (see documentation). - assert_eq!(manager.get_route(id_a), Some(&1)); - } - - #[test] - fn blocking_not_overridden_by_racing_failed_connections() { - init_logging(); - - let mut rng = crate::new_rng(); - let mut clock = TestClock::new(); - - let addr_a: SocketAddr = "1.2.3.4:1234".parse().unwrap(); - - let mut manager = OutgoingManager::::new(test_config()); - - assert!(!manager.is_blocked(addr_a)); - - // Block `addr_a` from the start. - assert!(manager - .block_addr( - addr_a, - clock.now(), - BlocklistJustification::MissingChainspecHash - ) - .is_none()); - assert!(manager.is_blocked(addr_a)); - - clock.advance_time(60); - - // Receive an "illegal" dial outcome, even though we did not dial. - assert!(manager - .handle_dial_outcome(DialOutcome::Failed { - addr: addr_a, - error: TestDialerError { id: 12345 }, - - /// The moment the connection attempt failed. - when: clock.now(), - }) - .is_none()); - - // The failed connection should _not_ have reset the block! - assert!(manager.is_blocked(addr_a)); - clock.advance_time(60); - assert!(manager.is_blocked(addr_a)); - - assert!(manager - .perform_housekeeping(&mut rng, clock.now()) - .is_empty()); - assert!(manager.is_blocked(addr_a)); - } - - #[test] - fn emits_and_accepts_pings() { - init_logging(); - - let mut rng = crate::new_rng(); - let mut clock = TestClock::new(); - - let addr: SocketAddr = "1.2.3.4:1234".parse().unwrap(); - let id = NodeId::random(&mut rng); - - // Setup a connection and put it into the connected state. - let mut manager = OutgoingManager::::new(test_config()); - - // Trigger a new connection via learning an address. - assert!(dials(addr, &manager.learn_addr(addr, false, clock.now()))); - - assert!(manager - .handle_dial_outcome(DialOutcome::Successful { - addr, - handle: 1, - node_id: id, - when: clock.now(), - }) - .is_none()); - - // Initial housekeeping should do nothing. - assert!(manager - .perform_housekeeping(&mut rng, clock.now()) - .is_empty()); - - // Go through 50 pings, which should be happening every 5 seconds. - for _ in 0..50 { - clock.advance(Duration::from_secs(3)); - assert!(manager - .perform_housekeeping(&mut rng, clock.now()) - .is_empty()); - clock.advance(Duration::from_secs(2)); - - let (_first_nonce, peer_id) = assert_matches!( - manager - .perform_housekeeping(&mut rng, clock.now()) - .as_slice(), - &[DialRequest::SendPing { nonce, peer_id, .. }] => (nonce, peer_id) - ); - assert_eq!(peer_id, id); - - // After a second, nothing should have changed. - assert!(manager - .perform_housekeeping(&mut rng, clock.now()) - .is_empty()); - - clock.advance(Duration::from_secs(1)); - // Waiting another second (two in total) should trigger another ping. - clock.advance(Duration::from_secs(1)); - - let (second_nonce, peer_id) = assert_matches!( - manager - .perform_housekeeping(&mut rng, clock.now()) - .as_slice(), - &[DialRequest::SendPing { nonce, peer_id, .. }] => (nonce, peer_id) - ); - - // Ensure the ID is correct. - assert_eq!(peer_id, id); - - // Pong arrives 1 second later. - clock.advance(Duration::from_secs(1)); - - // We now feed back the ping with the correct nonce. This should not result in a ban. - assert!(!manager.record_pong( - peer_id, - TaggedTimestamp::from_parts(clock.now(), second_nonce), - )); - - // This resets the "cycle", the next ping is due in 5 seconds. - } - - // Now we are going to miss 4 pings in a row and expect a disconnect. - clock.advance(Duration::from_secs(5)); - assert_matches!( - manager - .perform_housekeeping(&mut rng, clock.now()) - .as_slice(), - &[DialRequest::SendPing { .. }] - ); - clock.advance(Duration::from_secs(2)); - assert_matches!( - manager - .perform_housekeeping(&mut rng, clock.now()) - .as_slice(), - &[DialRequest::SendPing { .. }] - ); - clock.advance(Duration::from_secs(2)); - assert_matches!( - manager - .perform_housekeeping(&mut rng, clock.now()) - .as_slice(), - &[DialRequest::SendPing { .. }] - ); - clock.advance(Duration::from_secs(2)); - assert_matches!( - manager - .perform_housekeeping(&mut rng, clock.now()) - .as_slice(), - &[DialRequest::SendPing { .. }] - ); - - // This results in a disconnect, followed by a reconnect. - clock.advance(Duration::from_secs(2)); - let dial_addr = assert_matches!( - manager - .perform_housekeeping(&mut rng, clock.now()) - .as_slice(), - &[DialRequest::Disconnect { .. }, DialRequest::Dial { addr, .. }] => addr - ); - - assert_eq!(dial_addr, addr); - } - - #[test] - fn indicates_issue_when_excessive_pongs_are_encountered() { - let mut rng = crate::new_rng(); - let mut clock = TestClock::new(); - - let addr: SocketAddr = "1.2.3.4:1234".parse().unwrap(); - let id = NodeId::random(&mut rng); - - // Ensure we have one connected node. - let mut manager = OutgoingManager::::new(test_config()); - - assert!(dials(addr, &manager.learn_addr(addr, false, clock.now()))); - assert!(manager - .handle_dial_outcome(DialOutcome::Successful { - addr, - handle: 1, - node_id: id, - when: clock.now(), - }) - .is_none()); - - clock.advance(Duration::from_millis(50)); - - // We can now receive excessive pongs. - assert!(!manager.record_pong(id, TaggedTimestamp::from_parts(clock.now(), rng.gen()))); - assert!(!manager.record_pong(id, TaggedTimestamp::from_parts(clock.now(), rng.gen()))); - assert!(!manager.record_pong(id, TaggedTimestamp::from_parts(clock.now(), rng.gen()))); - assert!(!manager.record_pong(id, TaggedTimestamp::from_parts(clock.now(), rng.gen()))); - assert!(!manager.record_pong(id, TaggedTimestamp::from_parts(clock.now(), rng.gen()))); - assert!(!manager.record_pong(id, TaggedTimestamp::from_parts(clock.now(), rng.gen()))); - assert!(manager.record_pong(id, TaggedTimestamp::from_parts(clock.now(), rng.gen()))); - } -} diff --git a/node/src/components/network/per_channel.rs b/node/src/components/network/per_channel.rs new file mode 100644 index 0000000000..0d7eed10fa --- /dev/null +++ b/node/src/components/network/per_channel.rs @@ -0,0 +1,173 @@ +//! `PerChannel` allows to hold some configuration for every channel. It has +//! convenience methods allowing to cover common operations. +//! +//! For example, `buffer_size: PerChannel` allows to associate a buffer +//! size of type `usize` to every channel. + +use std::convert::Infallible; + +use casper_types::bytesrepr::{self, FromBytes, ToBytes}; +use datasize::DataSize; +use serde::{Deserialize, Serialize}; + +use super::Channel; +use crate::utils::UnwrapInfallible; + +/// Allows to hold some data for every channel used in the node. +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, DataSize, Serialize, Deserialize)] +pub struct PerChannel { + network: T, + sync_data_request: T, + sync_data_responses: T, + data_requests: T, + data_responses: T, + consensus: T, + bulk_gossip: T, +} + +impl PerChannel { + /// Returns data value stored for the given channel. + #[inline(always)] + pub const fn get(&self, channel: Channel) -> &T { + match channel { + Channel::Network => &self.network, + Channel::SyncDataRequests => &self.sync_data_request, + Channel::SyncDataResponses => &self.sync_data_responses, + Channel::DataRequests => &self.data_requests, + Channel::DataResponses => &self.data_responses, + Channel::Consensus => &self.consensus, + Channel::BulkGossip => &self.bulk_gossip, + } + } + + /// Creates a new `PerChannel` from the original one by applying the given function. + pub fn map(self, mut f: impl FnMut(Channel, T) -> U) -> PerChannel { + PerChannel { + network: f(Channel::Network, self.network), + sync_data_request: f(Channel::SyncDataRequests, self.sync_data_request), + sync_data_responses: f(Channel::SyncDataResponses, self.sync_data_responses), + data_requests: f(Channel::DataRequests, self.data_requests), + data_responses: f(Channel::DataResponses, self.data_responses), + consensus: f(Channel::Consensus, self.consensus), + bulk_gossip: f(Channel::BulkGossip, self.bulk_gossip), + } + } + + /// Fill the fields for all the channels with a value generated from the given closure. + #[inline(always)] + pub fn init_with(mut initializer: impl FnMut(Channel) -> T) -> Self { + Self::try_init_with::(|channel| Ok(initializer(channel))).unwrap_infallible() + } + + /// Fill the fields for all the channels with a value generated from the given closure, reducing + /// to a single result.. + pub fn try_init_with( + mut initializer: impl FnMut(Channel) -> Result, + ) -> Result { + Ok(PerChannel { + network: initializer(Channel::Network)?, + sync_data_request: initializer(Channel::SyncDataRequests)?, + sync_data_responses: initializer(Channel::SyncDataResponses)?, + data_requests: initializer(Channel::DataRequests)?, + data_responses: initializer(Channel::DataResponses)?, + consensus: initializer(Channel::Consensus)?, + bulk_gossip: initializer(Channel::BulkGossip)?, + }) + } +} + +impl IntoIterator for PerChannel { + type Item = (Channel, T); + + type IntoIter = std::array::IntoIter<(Channel, T), 7>; + + fn into_iter(self) -> Self::IntoIter { + let Self { + network, + sync_data_request, + sync_data_responses, + data_requests, + data_responses, + consensus, + bulk_gossip, + } = self; + + [ + (Channel::Network, network), + (Channel::SyncDataRequests, sync_data_request), + (Channel::SyncDataResponses, sync_data_responses), + (Channel::DataRequests, data_requests), + (Channel::DataResponses, data_responses), + (Channel::Consensus, consensus), + (Channel::BulkGossip, bulk_gossip), + ] + .into_iter() + } +} + +impl ToBytes for PerChannel { + fn to_bytes(&self) -> Result, bytesrepr::Error> { + let mut buffer = bytesrepr::allocate_buffer(self)?; + let Self { + network, + sync_data_request, + sync_data_responses, + data_requests, + data_responses, + consensus, + bulk_gossip, + } = self; + + buffer.extend(network.to_bytes()?); + buffer.extend(sync_data_request.to_bytes()?); + buffer.extend(sync_data_responses.to_bytes()?); + buffer.extend(data_requests.to_bytes()?); + buffer.extend(data_responses.to_bytes()?); + buffer.extend(consensus.to_bytes()?); + buffer.extend(bulk_gossip.to_bytes()?); + Ok(buffer) + } + + fn serialized_length(&self) -> usize { + let Self { + network, + sync_data_request, + sync_data_responses, + data_requests, + data_responses, + consensus, + bulk_gossip, + } = self; + + network.serialized_length() + + sync_data_request.serialized_length() + + sync_data_responses.serialized_length() + + data_requests.serialized_length() + + data_responses.serialized_length() + + consensus.serialized_length() + + bulk_gossip.serialized_length() + } +} + +impl FromBytes for PerChannel { + fn from_bytes(bytes: &[u8]) -> Result<(Self, &[u8]), bytesrepr::Error> { + let (network, bytes) = FromBytes::from_bytes(bytes)?; + let (sync_data_request, bytes) = FromBytes::from_bytes(bytes)?; + let (sync_data_responses, bytes) = FromBytes::from_bytes(bytes)?; + let (data_requests, bytes) = FromBytes::from_bytes(bytes)?; + let (data_responses, bytes) = FromBytes::from_bytes(bytes)?; + let (consensus, bytes) = FromBytes::from_bytes(bytes)?; + let (bulk_gossip, bytes) = FromBytes::from_bytes(bytes)?; + + let config = Self { + network, + sync_data_request, + sync_data_responses, + data_requests, + data_responses, + consensus, + bulk_gossip, + }; + Ok((config, bytes)) + } +} diff --git a/node/src/components/network/symmetry.rs b/node/src/components/network/symmetry.rs deleted file mode 100644 index 37433fd24a..0000000000 --- a/node/src/components/network/symmetry.rs +++ /dev/null @@ -1,300 +0,0 @@ -//! Connection symmetry management. -//! -//! Tracks the state of connections, which may be uni- or bi-directional, depending on whether a -//! peer has connected back to us. Asymmetric connections are usually removed periodically. - -use std::{collections::BTreeSet, mem, net::SocketAddr, time::Instant}; - -use datasize::DataSize; -use tracing::{debug, warn}; - -/// Describes whether a connection is uni- or bi-directional. -#[derive(DataSize, Debug, Default)] -pub(super) enum ConnectionSymmetry { - /// We have only seen an incoming connection. - IncomingOnly { - /// Time this connection remained incoming only. - since: Instant, - /// The outgoing address of the peer that is connected to us. - peer_addrs: BTreeSet, - }, - /// We have only seen an outgoing connection. - OutgoingOnly { - /// Time this connection remained outgoing only. - since: Instant, - }, - /// The connection is fully symmetric. - Symmetric { - /// The outgoing address on the peer that is connected to us. - peer_addrs: BTreeSet, - }, - /// The connection is invalid/missing and should be removed. - #[default] - Gone, -} - -impl ConnectionSymmetry { - /// A new incoming connection has been registered. - /// - /// Returns true, if the connection achieved symmetry with this change. - pub(super) fn add_incoming(&mut self, peer_addr: SocketAddr, since: Instant) -> bool { - match self { - ConnectionSymmetry::IncomingOnly { - ref mut peer_addrs, .. - } => { - // Already incoming connection, just add it to the pile. - peer_addrs.insert(peer_addr); - debug!( - total_incoming_count = peer_addrs.len(), - "added additional incoming connection on non-symmetric" - ); - false - } - ConnectionSymmetry::OutgoingOnly { .. } => { - // Outgoing graduates to Symmetric when we receive an incoming connection. - let mut peer_addrs = BTreeSet::new(); - peer_addrs.insert(peer_addr); - *self = ConnectionSymmetry::Symmetric { peer_addrs }; - debug!("added incoming connection, now symmetric"); - true - } - ConnectionSymmetry::Symmetric { peer_addrs } => { - // Just record an additional incoming connection. - peer_addrs.insert(peer_addr); - debug!( - total_incoming_count = peer_addrs.len(), - "added additional incoming connection on symmetric" - ); - false - } - ConnectionSymmetry::Gone => { - let mut peer_addrs = BTreeSet::new(); - peer_addrs.insert(peer_addr); - *self = ConnectionSymmetry::IncomingOnly { peer_addrs, since }; - debug!("added incoming connection, now incoming only"); - false - } - } - } - - /// An incoming address has been removed. - /// - /// Returns `false` if the `ConnectionSymmetry` should be removed after this. - pub(super) fn remove_incoming(&mut self, peer_addr: SocketAddr, now: Instant) -> bool { - match self { - ConnectionSymmetry::IncomingOnly { peer_addrs, .. } => { - // Remove the incoming connection, warn if it didn't exist. - if !peer_addrs.remove(&peer_addr) { - warn!("tried to remove non-existent incoming connection from symmetry"); - } - - // Indicate removal if this was the last incoming connection. - if peer_addrs.is_empty() { - *self = ConnectionSymmetry::Gone; - debug!("removed incoming connection, now gone"); - - false - } else { - debug!( - total_incoming_count = peer_addrs.len(), - "removed incoming connection, still has remaining incoming" - ); - - true - } - } - ConnectionSymmetry::OutgoingOnly { .. } => { - warn!("cannot remove incoming connection from outgoing-only"); - true - } - ConnectionSymmetry::Symmetric { peer_addrs } => { - if !peer_addrs.remove(&peer_addr) { - warn!("tried to remove non-existent symmetric connection from symmetry"); - } - if peer_addrs.is_empty() { - *self = ConnectionSymmetry::OutgoingOnly { since: now }; - debug!("removed incoming connection, now incoming-only"); - } - true - } - ConnectionSymmetry::Gone => { - // This is just an error. - warn!("removing incoming connection from already gone symmetry"); - false - } - } - } - - /// Marks a connection as having an outgoing connection. - /// - /// Returns true, if the connection achieved symmetry with this change. - pub(super) fn mark_outgoing(&mut self, now: Instant) -> bool { - match self { - ConnectionSymmetry::IncomingOnly { peer_addrs, .. } => { - // Connection is now complete. - debug!("incoming connection marked outgoing, now complete"); - *self = ConnectionSymmetry::Symmetric { - peer_addrs: mem::take(peer_addrs), - }; - true - } - ConnectionSymmetry::OutgoingOnly { .. } => { - warn!("outgoing connection marked outgoing"); - false - } - ConnectionSymmetry::Symmetric { .. } => { - warn!("symmetric connection marked outgoing"); - false - } - ConnectionSymmetry::Gone => { - *self = ConnectionSymmetry::OutgoingOnly { since: now }; - debug!("absent connection marked outgoing"); - false - } - } - } - - /// Unmarks a connection as having an outgoing connection. - /// - /// Returns `false` if the `ConnectionSymmetry` should be removed after this. - pub(super) fn unmark_outgoing(&mut self, now: Instant) -> bool { - match self { - ConnectionSymmetry::IncomingOnly { .. } => { - warn!("incoming-only unmarked outgoing"); - true - } - ConnectionSymmetry::OutgoingOnly { .. } => { - // With neither incoming, nor outgoing connections, the symmetry is finally gone. - *self = ConnectionSymmetry::Gone; - debug!("outgoing connection unmarked, now gone"); - - false - } - ConnectionSymmetry::Symmetric { peer_addrs } => { - *self = ConnectionSymmetry::IncomingOnly { - peer_addrs: mem::take(peer_addrs), - since: now, - }; - debug!("symmetric connection unmarked, now outgoing only"); - - true - } - ConnectionSymmetry::Gone => { - warn!("gone marked outgoing"); - false - } - } - } - - /// Returns the set of incoming addresses, if any. - pub(super) fn incoming_addrs(&self) -> Option<&BTreeSet> { - match self { - ConnectionSymmetry::IncomingOnly { peer_addrs, .. } - | ConnectionSymmetry::Symmetric { peer_addrs, .. } => Some(peer_addrs), - ConnectionSymmetry::OutgoingOnly { .. } | ConnectionSymmetry::Gone => None, - } - } -} - -#[cfg(test)] -mod tests { - use std::{ - collections::BTreeSet, - net::SocketAddr, - time::{Duration, Instant}, - }; - - use crate::testing::test_clock::TestClock; - - use super::ConnectionSymmetry; - - /// Indicates whether or not a connection should be cleaned up. - fn should_be_reaped( - connection_symmetry: &ConnectionSymmetry, - now: Instant, - max_time_asymmetric: Duration, - ) -> bool { - match connection_symmetry { - ConnectionSymmetry::IncomingOnly { since, .. } => now >= *since + max_time_asymmetric, - ConnectionSymmetry::OutgoingOnly { since } => now >= *since + max_time_asymmetric, - ConnectionSymmetry::Symmetric { .. } => false, - ConnectionSymmetry::Gone => true, - } - } - - #[test] - fn symmetry_successful_lifecycles() { - let mut clock = TestClock::new(); - - let max_time_asymmetric = Duration::from_secs(240); - let peer_addr: SocketAddr = "1.2.3.4:1234".parse().unwrap(); - - let mut sym = ConnectionSymmetry::default(); - - // Symmetries that have just been initialized are always reaped instantly. - assert!(should_be_reaped(&sym, clock.now(), max_time_asymmetric)); - - // Adding an incoming address. - sym.add_incoming(peer_addr, clock.now()); - assert!(!should_be_reaped(&sym, clock.now(), max_time_asymmetric)); - - // Add an outgoing address. - clock.advance(Duration::from_secs(20)); - sym.mark_outgoing(clock.now()); - - // The connection will now never be reaped, as it is symmetrical. - clock.advance(Duration::from_secs(1_000_000)); - assert!(!should_be_reaped(&sym, clock.now(), max_time_asymmetric)); - } - - #[test] - fn symmetry_lifecycle_reaps_incoming_only() { - let mut clock = TestClock::new(); - - let max_time_asymmetric = Duration::from_secs(240); - let peer_addr: SocketAddr = "1.2.3.4:1234".parse().unwrap(); - let peer_addr2: SocketAddr = "1.2.3.4:1234".parse().unwrap(); - - let mut sym = ConnectionSymmetry::default(); - - // Adding an incoming address prevents it from being reaped. - sym.add_incoming(peer_addr, clock.now()); - assert!(!should_be_reaped(&sym, clock.now(), max_time_asymmetric)); - - // Adding another incoming address does not change the timeout. - clock.advance(Duration::from_secs(120)); - sym.add_incoming(peer_addr2, clock.now()); - assert!(!should_be_reaped(&sym, clock.now(), max_time_asymmetric)); - - // We also expected `peer_addr` and `peer_addr2` to be the incoming addresses now. - let mut expected = BTreeSet::new(); - expected.insert(peer_addr); - expected.insert(peer_addr2); - assert_eq!(sym.incoming_addrs(), Some(&expected)); - - // After 240 seconds since the first incoming connection, we finally are due reaping. - clock.advance(Duration::from_secs(120)); - assert!(should_be_reaped(&sym, clock.now(), max_time_asymmetric)); - } - - #[test] - fn symmetry_lifecycle_reaps_outgoing_only() { - let mut clock = TestClock::new(); - - let max_time_asymmetric = Duration::from_secs(240); - - let mut sym = ConnectionSymmetry::default(); - - // Mark as outgoing, to prevent reaping. - sym.mark_outgoing(clock.now()); - assert!(!should_be_reaped(&sym, clock.now(), max_time_asymmetric)); - - // Marking as outgoing again is usually an error, but should not affect the timeout. - clock.advance(Duration::from_secs(120)); - assert!(!should_be_reaped(&sym, clock.now(), max_time_asymmetric)); - - // After 240 seconds we finally are reaping. - clock.advance(Duration::from_secs(120)); - assert!(should_be_reaped(&sym, clock.now(), max_time_asymmetric)); - } -} diff --git a/node/src/components/network/tasks.rs b/node/src/components/network/tasks.rs deleted file mode 100644 index 671c2a11f5..0000000000 --- a/node/src/components/network/tasks.rs +++ /dev/null @@ -1,845 +0,0 @@ -//! Tasks run by the component. - -use std::{ - error::Error as StdError, - fmt::Display, - io, - net::SocketAddr, - pin::Pin, - sync::{ - atomic::{AtomicBool, Ordering}, - Arc, Weak, - }, - time::Duration, -}; - -use bincode::Options; -use futures::{ - future::{self, Either}, - stream::{SplitSink, SplitStream}, - Future, SinkExt, StreamExt, -}; -use openssl::{ - pkey::{PKey, Private}, - ssl::Ssl, - x509::X509, -}; -use prometheus::IntGauge; -use rand::Rng; -use serde::{de::DeserializeOwned, Deserialize, Serialize}; -use tokio::{ - net::TcpStream, - sync::{mpsc::UnboundedReceiver, watch, Semaphore}, -}; -use tokio_openssl::SslStream; -use tokio_serde::{Deserializer, Serializer}; -use tracing::{ - debug, error, error_span, - field::{self, Empty}, - info, trace, warn, Instrument, Span, -}; - -use casper_types::{ProtocolVersion, PublicKey, TimeDiff}; - -use super::{ - chain_info::ChainInfo, - counting_format::{ConnectionId, Role}, - error::{ConnectionError, IoError}, - event::{IncomingConnection, OutgoingConnection}, - full_transport, - limiter::LimiterHandle, - message::NodeKeyPair, - message_pack_format::MessagePackFormat, - EstimatorWeights, Event, FramedTransport, FullTransport, Identity, Message, Metrics, Payload, - Transport, -}; -use crate::{ - components::network::{framed_transport, BincodeFormat, Config, FromIncoming}, - effect::{ - announcements::PeerBehaviorAnnouncement, requests::NetworkRequest, AutoClosingResponder, - EffectBuilder, - }, - reactor::{EventQueueHandle, QueueKind}, - tls::{self, TlsCert, ValidationError}, - types::NodeId, - utils::display_error, -}; - -/// An item on the internal outgoing message queue. -/// -/// Contains a reference counted message and an optional responder to call once the message has been -/// successfully handed over to the kernel for sending. -pub(super) type MessageQueueItem

= (Arc>, Option>); - -/// The outcome of the handshake process. -struct HandshakeOutcome { - /// A framed transport for peer. - framed_transport: FramedTransport, - /// Public address advertised by the peer. - public_addr: SocketAddr, - /// The public key the peer is validating with, if any. - peer_consensus_public_key: Option, - /// Holds the information whether the remote node is syncing. - is_peer_syncing: bool, -} - -/// Low-level TLS connection function. -/// -/// Performs the actual TCP+TLS connection setup. -async fn tls_connect( - context: &NetworkContext, - peer_addr: SocketAddr, -) -> Result<(NodeId, Transport), ConnectionError> -where - REv: 'static, -{ - let stream = TcpStream::connect(peer_addr) - .await - .map_err(ConnectionError::TcpConnection)?; - - stream - .set_nodelay(true) - .map_err(ConnectionError::TcpNoDelay)?; - - let mut transport = tls::create_tls_connector(context.our_cert.as_x509(), &context.secret_key) - .and_then(|connector| connector.configure()) - .and_then(|mut config| { - config.set_verify_hostname(false); - config.into_ssl("this-will-not-be-checked.example.com") - }) - .and_then(|ssl| SslStream::new(ssl, stream)) - .map_err(ConnectionError::TlsInitialization)?; - - SslStream::connect(Pin::new(&mut transport)) - .await - .map_err(ConnectionError::TlsHandshake)?; - - let peer_cert = transport - .ssl() - .peer_certificate() - .ok_or(ConnectionError::NoPeerCertificate)?; - - let validated_peer_cert = context - .validate_peer_cert(peer_cert) - .map_err(ConnectionError::PeerCertificateInvalid)?; - - let peer_id = NodeId::from(validated_peer_cert.public_key_fingerprint()); - - Ok((peer_id, transport)) -} - -/// Initiates a TLS connection to a remote address. -pub(super) async fn connect_outgoing( - context: Arc>, - peer_addr: SocketAddr, -) -> OutgoingConnection

-where - REv: 'static, - P: Payload, -{ - let (peer_id, transport) = match tls_connect(&context, peer_addr).await { - Ok(value) => value, - Err(error) => return OutgoingConnection::FailedEarly { peer_addr, error }, - }; - - // Register the `peer_id` on the [`Span`]. - Span::current().record("peer_id", &field::display(peer_id)); - - if peer_id == context.our_id { - info!("incoming loopback connection"); - return OutgoingConnection::Loopback { peer_addr }; - } - - debug!("Outgoing TLS connection established"); - - // Setup connection id and framed transport. - let connection_id = ConnectionId::from_connection(transport.ssl(), context.our_id, peer_id); - let framed_transport = framed_transport(transport, context.chain_info.maximum_net_message_size); - - // Negotiate the handshake, concluding the incoming connection process. - match negotiate_handshake::(&context, framed_transport, connection_id).await { - Ok(HandshakeOutcome { - framed_transport, - public_addr, - peer_consensus_public_key, - is_peer_syncing: is_syncing, - }) => { - if let Some(ref public_key) = peer_consensus_public_key { - Span::current().record("consensus_key", &field::display(public_key)); - } - - if public_addr != peer_addr { - // We don't need the `public_addr`, as we already connected, but warn anyway. - warn!(%public_addr, %peer_addr, "peer advertises a different public address than what we connected to"); - } - - // Setup full framed transport, then close down receiving end of the transport. - let full_transport = full_transport::

( - context.net_metrics.clone(), - connection_id, - framed_transport, - Role::Dialer, - ); - let (sink, _stream) = full_transport.split(); - - OutgoingConnection::Established { - peer_addr, - peer_id, - peer_consensus_public_key, - sink, - is_syncing, - } - } - Err(error) => OutgoingConnection::Failed { - peer_addr, - peer_id, - error, - }, - } -} - -/// A context holding all relevant information for networking communication shared across tasks. -pub(crate) struct NetworkContext -where - REv: 'static, -{ - /// The handle to the reactor's event queue, used by incoming message handlers to put events - /// onto the queue. - event_queue: Option>, - /// Our own [`NodeId`]. - our_id: NodeId, - /// TLS certificate associated with this node's identity. - our_cert: Arc, - /// TLS certificate authority associated with this node's identity. - network_ca: Option>, - /// Secret key associated with `our_cert`. - secret_key: Arc>, - /// Weak reference to the networking metrics shared by all sender/receiver tasks. - net_metrics: Weak, - /// Chain info extract from chainspec. - chain_info: ChainInfo, - /// Optional set of signing keys, to identify as a node during handshake. - node_key_pair: Option, - /// Our own public listening address. - public_addr: Option, - /// Timeout for handshake completion. - handshake_timeout: TimeDiff, - /// Weights to estimate payloads with. - payload_weights: EstimatorWeights, - /// The protocol version at which (or under) tarpitting is enabled. - tarpit_version_threshold: Option, - /// If tarpitting is enabled, duration for which connections should be kept open. - tarpit_duration: TimeDiff, - /// The chance, expressed as a number between 0.0 and 1.0, of triggering the tarpit. - tarpit_chance: f32, - /// Maximum number of demands allowed to be running at once. If 0, no limit is enforced. - max_in_flight_demands: usize, - /// Flag indicating whether this node is syncing. - is_syncing: AtomicBool, -} - -impl NetworkContext { - pub(super) fn new( - cfg: Config, - our_identity: Identity, - node_key_pair: Option, - chain_info: ChainInfo, - net_metrics: &Arc, - ) -> Self { - // Set the demand max from configuration, regarding `0` as "unlimited". - let max_in_flight_demands = if cfg.max_in_flight_demands == 0 { - usize::MAX - } else { - cfg.max_in_flight_demands as usize - }; - - let Identity { - secret_key, - tls_certificate, - network_ca, - } = our_identity; - let our_id = NodeId::from(tls_certificate.public_key_fingerprint()); - - NetworkContext { - our_id, - public_addr: None, - event_queue: None, - our_cert: tls_certificate, - network_ca, - secret_key, - net_metrics: Arc::downgrade(net_metrics), - chain_info, - node_key_pair, - handshake_timeout: cfg.handshake_timeout, - payload_weights: cfg.estimator_weights.clone(), - tarpit_version_threshold: cfg.tarpit_version_threshold, - tarpit_duration: cfg.tarpit_duration, - tarpit_chance: cfg.tarpit_chance, - max_in_flight_demands, - is_syncing: AtomicBool::new(false), - } - } - - pub(super) fn initialize( - &mut self, - our_public_addr: SocketAddr, - event_queue: EventQueueHandle, - ) { - self.public_addr = Some(our_public_addr); - self.event_queue = Some(event_queue); - } - - /// Our own [`NodeId`]. - pub(super) fn our_id(&self) -> NodeId { - self.our_id - } - - /// Our own public listening address. - pub(super) fn public_addr(&self) -> Option { - self.public_addr - } - - /// Chain info extract from chainspec. - pub(super) fn chain_info(&self) -> &ChainInfo { - &self.chain_info - } - - pub(crate) fn validate_peer_cert(&self, peer_cert: X509) -> Result { - match &self.network_ca { - Some(ca_cert) => tls::validate_cert_with_authority(peer_cert, ca_cert), - None => tls::validate_self_signed_cert(peer_cert), - } - } - - pub(crate) fn network_ca(&self) -> Option<&Arc> { - self.network_ca.as_ref() - } - - pub(crate) fn is_syncing(&self) -> &AtomicBool { - &self.is_syncing - } -} - -/// Handles an incoming connection. -/// -/// Sets up a TLS stream and performs the protocol handshake. -async fn handle_incoming( - context: Arc>, - stream: TcpStream, - peer_addr: SocketAddr, -) -> IncomingConnection

-where - REv: From> + 'static, - P: Payload, - for<'de> P: Serialize + Deserialize<'de>, - for<'de> Message

: Serialize + Deserialize<'de>, -{ - let (peer_id, transport) = match server_setup_tls(&context, stream).await { - Ok(value) => value, - Err(error) => { - return IncomingConnection::FailedEarly { peer_addr, error }; - } - }; - - // Register the `peer_id` on the [`Span`] for logging the ID from here on out. - Span::current().record("peer_id", &field::display(peer_id)); - - if peer_id == context.our_id { - info!("incoming loopback connection"); - return IncomingConnection::Loopback; - } - - debug!("Incoming TLS connection established"); - - // Setup connection id and framed transport. - let connection_id = ConnectionId::from_connection(transport.ssl(), context.our_id, peer_id); - let framed_transport = framed_transport(transport, context.chain_info.maximum_net_message_size); - - // Negotiate the handshake, concluding the incoming connection process. - match negotiate_handshake::(&context, framed_transport, connection_id).await { - Ok(HandshakeOutcome { - framed_transport, - public_addr, - peer_consensus_public_key, - is_peer_syncing: _, - }) => { - if let Some(ref public_key) = peer_consensus_public_key { - Span::current().record("consensus_key", &field::display(public_key)); - } - - // Establish full transport and close the receiving end. - let full_transport = full_transport::

( - context.net_metrics.clone(), - connection_id, - framed_transport, - Role::Listener, - ); - - let (_sink, stream) = full_transport.split(); - - IncomingConnection::Established { - peer_addr, - public_addr, - peer_id, - peer_consensus_public_key, - stream, - } - } - Err(error) => IncomingConnection::Failed { - peer_addr, - peer_id, - error, - }, - } -} - -/// Server-side TLS setup. -/// -/// This function groups the TLS setup into a convenient function, enabling the `?` operator. -pub(super) async fn server_setup_tls( - context: &NetworkContext, - stream: TcpStream, -) -> Result<(NodeId, Transport), ConnectionError> { - let mut tls_stream = tls::create_tls_acceptor( - context.our_cert.as_x509().as_ref(), - context.secret_key.as_ref(), - ) - .and_then(|ssl_acceptor| Ssl::new(ssl_acceptor.context())) - .and_then(|ssl| SslStream::new(ssl, stream)) - .map_err(ConnectionError::TlsInitialization)?; - - SslStream::accept(Pin::new(&mut tls_stream)) - .await - .map_err(ConnectionError::TlsHandshake)?; - - // We can now verify the certificate. - let peer_cert = tls_stream - .ssl() - .peer_certificate() - .ok_or(ConnectionError::NoPeerCertificate)?; - - let validated_peer_cert = context - .validate_peer_cert(peer_cert) - .map_err(ConnectionError::PeerCertificateInvalid)?; - - Ok(( - NodeId::from(validated_peer_cert.public_key_fingerprint()), - tls_stream, - )) -} - -/// Performs an IO-operation that can time out. -async fn io_timeout(duration: Duration, future: F) -> Result> -where - F: Future>, - E: StdError + 'static, -{ - tokio::time::timeout(duration, future) - .await - .map_err(|_elapsed| IoError::Timeout)? - .map_err(IoError::Error) -} - -/// Performs an IO-operation that can time out or result in a closed connection. -async fn io_opt_timeout(duration: Duration, future: F) -> Result> -where - F: Future>>, - E: StdError + 'static, -{ - let item = tokio::time::timeout(duration, future) - .await - .map_err(|_elapsed| IoError::Timeout)?; - - match item { - Some(Ok(value)) => Ok(value), - Some(Err(err)) => Err(IoError::Error(err)), - None => Err(IoError::UnexpectedEof), - } -} - -/// Negotiates a handshake between two peers. -async fn negotiate_handshake( - context: &NetworkContext, - framed: FramedTransport, - connection_id: ConnectionId, -) -> Result -where - P: Payload, -{ - let mut encoder = MessagePackFormat; - - // Manually encode a handshake. - let handshake_message = context.chain_info.create_handshake::

( - context.public_addr.expect("component not initialized"), - context.node_key_pair.as_ref(), - connection_id, - context.is_syncing.load(Ordering::SeqCst), - ); - - let serialized_handshake_message = Pin::new(&mut encoder) - .serialize(&Arc::new(handshake_message)) - .map_err(ConnectionError::CouldNotEncodeOurHandshake)?; - - // To ensure we are not dead-locking, we split the framed transport here and send the handshake - // in a background task before awaiting one ourselves. This ensures we can make progress - // regardless of the size of the outgoing handshake. - let (mut sink, mut stream) = framed.split(); - - let handshake_send = tokio::spawn(io_timeout(context.handshake_timeout.into(), async move { - sink.send(serialized_handshake_message).await?; - Ok(sink) - })); - - // The remote's message should be a handshake, but can technically be any message. We receive, - // deserialize and check it. - let remote_message_raw = io_opt_timeout(context.handshake_timeout.into(), stream.next()) - .await - .map_err(ConnectionError::HandshakeRecv)?; - - // Ensure the handshake was sent correctly. - let sink = handshake_send - .await - .map_err(ConnectionError::HandshakeSenderCrashed)? - .map_err(ConnectionError::HandshakeSend)?; - - let remote_message: Message

= Pin::new(&mut encoder) - .deserialize(&remote_message_raw) - .map_err(ConnectionError::InvalidRemoteHandshakeMessage)?; - - if let Message::Handshake { - network_name, - public_addr, - protocol_version, - consensus_certificate, - is_syncing, - chainspec_hash, - } = remote_message - { - debug!(%protocol_version, "handshake received"); - - // The handshake was valid, we can check the network name. - if network_name != context.chain_info.network_name { - return Err(ConnectionError::WrongNetwork(network_name)); - } - - // If there is a version mismatch, we treat it as a connection error. We do not ban peers - // for this error, but instead rely on exponential backoff, as bans would result in issues - // during upgrades where nodes may have a legitimate reason for differing versions. - // - // Since we are not using SemVer for versioning, we cannot make any assumptions about - // compatibility, so we allow only exact version matches. - if protocol_version != context.chain_info.protocol_version { - if let Some(threshold) = context.tarpit_version_threshold { - if protocol_version <= threshold { - let mut rng = crate::new_rng(); - - if rng.gen_bool(context.tarpit_chance as f64) { - // If tarpitting is enabled, we hold open the connection for a specific - // amount of time, to reduce load on other nodes and keep them from - // reconnecting. - info!(duration=?context.tarpit_duration, "randomly tarpitting node"); - tokio::time::sleep(Duration::from(context.tarpit_duration)).await; - } else { - debug!(p = context.tarpit_chance, "randomly not tarpitting node"); - } - } - } - return Err(ConnectionError::IncompatibleVersion(protocol_version)); - } - - // We check the chainspec hash to ensure peer is using the same chainspec as us. - // The remote message should always have a chainspec hash at this point since - // we checked the protocol version previously. - let peer_chainspec_hash = chainspec_hash.ok_or(ConnectionError::MissingChainspecHash)?; - if peer_chainspec_hash != context.chain_info.chainspec_hash { - return Err(ConnectionError::WrongChainspecHash(peer_chainspec_hash)); - } - - let peer_consensus_public_key = consensus_certificate - .map(|cert| { - cert.validate(connection_id) - .map_err(ConnectionError::InvalidConsensusCertificate) - }) - .transpose()?; - - let framed_transport = sink - .reunite(stream) - .map_err(|_| ConnectionError::FailedToReuniteHandshakeSinkAndStream)?; - - Ok(HandshakeOutcome { - framed_transport, - public_addr, - peer_consensus_public_key, - is_peer_syncing: is_syncing, - }) - } else { - // Received a non-handshake, this is an error. - Err(ConnectionError::DidNotSendHandshake) - } -} - -/// Runs the server core acceptor loop. -pub(super) async fn server( - context: Arc>, - listener: tokio::net::TcpListener, - mut shutdown_receiver: watch::Receiver<()>, -) where - REv: From> + Send, - P: Payload, -{ - // The server task is a bit tricky, since it has to wait on incoming connections while at the - // same time shut down if the networking component is dropped, otherwise the TCP socket will - // stay open, preventing reuse. - - // We first create a future that never terminates, handling incoming connections: - let accept_connections = async { - let event_queue = context.event_queue.expect("component not initialized"); - loop { - // We handle accept errors here, since they can be caused by a temporary resource - // shortage or the remote side closing the connection while it is waiting in - // the queue. - match listener.accept().await { - Ok((stream, peer_addr)) => { - // The span setup here is used throughout the entire lifetime of the connection. - let span = - error_span!("incoming", %peer_addr, peer_id=Empty, consensus_key=Empty); - - let context = context.clone(); - let handler_span = span.clone(); - tokio::spawn( - async move { - let incoming = - handle_incoming(context.clone(), stream, peer_addr).await; - event_queue - .schedule( - Event::IncomingConnection { - incoming: Box::new(incoming), - span, - }, - QueueKind::NetworkIncoming, - ) - .await; - } - .instrument(handler_span), - ); - } - - // TODO: Handle resource errors gracefully. - // In general, two kinds of errors occur here: Local resource exhaustion, - // which should be handled by waiting a few milliseconds, or remote connection - // errors, which can be dropped immediately. - // - // The code in its current state will consume 100% CPU if local resource - // exhaustion happens, as no distinction is made and no delay introduced. - Err(ref err) => { - warn!(%context.our_id, err=display_error(err), "dropping incoming connection during accept") - } - } - } - }; - - let shutdown_messages = async move { while shutdown_receiver.changed().await.is_ok() {} }; - - // Now we can wait for either the `shutdown` channel's remote end to do be dropped or the - // infinite loop to terminate, which never happens. - match future::select(Box::pin(shutdown_messages), Box::pin(accept_connections)).await { - Either::Left(_) => info!( - %context.our_id, - "shutting down socket, no longer accepting incoming connections" - ), - Either::Right(_) => unreachable!(), - } -} - -/// Network message reader. -/// -/// Schedules all received messages until the stream is closed or an error occurs. -pub(super) async fn message_reader( - context: Arc>, - mut stream: SplitStream>, - limiter: LimiterHandle, - mut close_incoming_receiver: watch::Receiver<()>, - peer_id: NodeId, - span: Span, -) -> io::Result<()> -where - P: DeserializeOwned + Send + Display + Payload, - REv: From> - + FromIncoming

- + From> - + From - + Send, -{ - let demands_in_flight = Arc::new(Semaphore::new(context.max_in_flight_demands)); - let event_queue = context.event_queue.expect("component not initialized"); - - let read_messages = async move { - while let Some(msg_result) = stream.next().await { - match msg_result { - Ok(msg) => { - trace!(%msg, "message received"); - - let effect_builder = EffectBuilder::new(event_queue); - - match msg.try_into_demand(effect_builder, peer_id) { - Ok((event, wait_for_response)) => { - // Note: For now, demands bypass the limiter, as we expect the - // backpressure to handle this instead. - - // Acquire a permit. If we are handling too many demands at this - // time, this will block, halting the processing of new message, - // thus letting the peer they have reached their maximum allowance. - let in_flight = demands_in_flight - .clone() - .acquire_owned() - .await - // Note: Since the semaphore is reference counted, it must - // explicitly be closed for acquisition to fail, which we - // never do. If this happens, there is a bug in the code; - // we exit with an error and close the connection. - .map_err(|_| { - io::Error::new( - io::ErrorKind::Other, - "demand limiter semaphore closed unexpectedly", - ) - })?; - - Metrics::record_trie_request_start(&context.net_metrics); - - let net_metrics = context.net_metrics.clone(); - // Spawn a future that will eventually send the returned message. It - // will essentially buffer the response. - tokio::spawn(async move { - if let Some(payload) = wait_for_response.await { - // Send message and await its return. `send_message` should - // only return when the message has been buffered, if the - // peer is not accepting data, we will block here until the - // send buffer has sufficient room. - effect_builder.send_message(peer_id, payload).await; - - // Note: We could short-circuit the event queue here and - // directly insert into the outgoing message queue, - // which may be potential performance improvement. - } - - // Missing else: The handler of the demand did not deem it - // worthy a response. Just drop it. - - // After we have either successfully buffered the message for - // sending, failed to do so or did not have a message to send - // out, we consider the request handled and free up the permit. - Metrics::record_trie_request_end(&net_metrics); - drop(in_flight); - }); - - // Schedule the created event. - event_queue - .schedule::(event, QueueKind::NetworkDemand) - .await; - } - Err(msg) => { - // We've received a non-demand message. Ensure we have the proper amount - // of resources, then push it to the reactor. - limiter - .request_allowance( - msg.payload_incoming_resource_estimate( - &context.payload_weights, - ), - ) - .await; - - let queue_kind = if msg.is_low_priority() { - QueueKind::NetworkLowPriority - } else { - QueueKind::NetworkIncoming - }; - - event_queue - .schedule( - Event::IncomingMessage { - peer_id: Box::new(peer_id), - msg, - span: span.clone(), - }, - queue_kind, - ) - .await; - } - } - } - Err(err) => { - warn!( - err = display_error(&err), - "receiving message failed, closing connection" - ); - return Err(err); - } - } - } - Ok(()) - }; - - let shutdown_messages = async move { while close_incoming_receiver.changed().await.is_ok() {} }; - - // Now we can wait for either the `shutdown` channel's remote end to do be dropped or the - // while loop to terminate. - match future::select(Box::pin(shutdown_messages), Box::pin(read_messages)).await { - Either::Left(_) => info!("shutting down incoming connection message reader"), - Either::Right(_) => (), - } - - Ok(()) -} - -/// Network message sender. -/// -/// Reads from a channel and sends all messages, until the stream is closed or an error occurs. -pub(super) async fn message_sender

( - mut queue: UnboundedReceiver>, - mut sink: SplitSink, Arc>>, - limiter: LimiterHandle, - counter: IntGauge, -) where - P: Payload, -{ - while let Some((message, opt_responder)) = queue.recv().await { - counter.dec(); - - let estimated_wire_size = match BincodeFormat::default().0.serialized_size(&*message) { - Ok(size) => size as u32, - Err(error) => { - error!( - error = display_error(&error), - "failed to get serialized size of outgoing message, closing outgoing connection" - ); - break; - } - }; - limiter.request_allowance(estimated_wire_size).await; - - let mut outcome = sink.send(message).await; - - // Notify via responder that the message has been buffered by the kernel. - if let Some(auto_closing_responder) = opt_responder { - // Since someone is interested in the message, flush the socket to ensure it was sent. - outcome = outcome.and(sink.flush().await); - auto_closing_responder.respond(()).await; - } - - // We simply error-out if the sink fails, it means that our connection broke. - if let Err(ref err) = outcome { - info!( - err = display_error(err), - "message send failed, closing outgoing connection" - ); - - // To ensure, metrics are up to date, we close the queue and drain it. - queue.close(); - while queue.recv().await.is_some() { - counter.dec(); - } - - break; - }; - } -} diff --git a/node/src/components/network/tests.rs b/node/src/components/network/tests.rs index a30a432587..20d0b8f395 100644 --- a/node/src/components/network/tests.rs +++ b/node/src/components/network/tests.rs @@ -22,7 +22,7 @@ use casper_types::SecretKey; use super::{ chain_info::ChainInfo, Config, Event as NetworkEvent, FromIncoming, GossipedAddress, Identity, - MessageKind, Network, Payload, + MessageKind, Network, Payload, Ticket, }; use crate::{ components::{ @@ -123,11 +123,12 @@ impl From for Event { } impl FromIncoming for Event { - fn from_incoming(sender: NodeId, payload: Message) -> Self { + fn from_incoming(sender: NodeId, payload: Message, ticket: Ticket) -> Self { match payload { Message::AddressGossiper(message) => Event::AddressGossiperIncoming(GossiperIncoming { sender, message: Box::new(message), + ticket, }), } } @@ -159,12 +160,8 @@ impl Payload for Message { } } - fn incoming_resource_estimate(&self, _weights: &super::EstimatorWeights) -> u32 { - 0 - } - - fn is_unsafe_for_syncing_peers(&self) -> bool { - false + fn get_channel(&self) -> super::Channel { + super::Channel::Network } } @@ -173,7 +170,7 @@ impl Payload for Message { /// Runs a single network. #[derive(Debug)] struct TestReactor { - net: Network, + net: Network, address_gossiper: Gossiper<{ GossipedAddress::ID_IS_COMPLETE_ITEM }, GossipedAddress>, } @@ -207,7 +204,7 @@ impl Reactor for TestReactor { registry, )?; - net.start_initialization(); + as InitializedComponent>::start_initialization(&mut net); let effects = smallvec![async { smallvec![Event::Net(NetworkEvent::Initialize)] }.boxed()]; Ok(( @@ -294,7 +291,7 @@ impl Finalize for TestReactor { /// Checks whether or not a given network with potentially blocked nodes is completely connected. fn network_is_complete( blocklist: &HashSet, - nodes: &HashMap>>, + nodes: &HashMap>>>, ) -> bool { // Collect expected nodes. let expected: HashSet<_> = nodes @@ -305,7 +302,6 @@ fn network_is_complete( for (node_id, node) in nodes { let net = &node.reactor().inner().net; - // TODO: Ensure the connections are symmetrical. let peers: HashSet<_> = net.peers().into_keys().collect(); let mut missing = expected.difference(&peers); @@ -446,6 +442,10 @@ async fn check_varying_size_network_connects() { // Try with a few predefined sets of network sizes. for &number_of_nodes in &[2u16, 3, 5, 9, 15] { + info!( + number_of_nodes, + "begin varying size network connection test" + ); let timeout = Duration::from_secs(3 * number_of_nodes as u64); let mut net = TestingNetwork::new(); @@ -487,6 +487,11 @@ async fn check_varying_size_network_connects() { // This test will run multiple times, so ensure we cleanup all ports. net.finalize().await; + + info!( + number_of_nodes, + "finished varying size network connection test" + ); } } diff --git a/node/src/components/network/transport.rs b/node/src/components/network/transport.rs new file mode 100644 index 0000000000..b098248b30 --- /dev/null +++ b/node/src/components/network/transport.rs @@ -0,0 +1,388 @@ +//! Low-level network transport configuration. +//! +//! The low-level transport is built on top of an existing TLS stream, handling all multiplexing. It +//! is based on a configuration of the Juliet protocol implemented in the `juliet` crate. + +use std::{ + marker::PhantomData, + pin::Pin, + sync::{Arc, Weak}, +}; + +use casper_types::PublicKey; +use datasize::DataSize; +use juliet::rpc::IncomingRequest; +use openssl::ssl::Ssl; +use strum::EnumCount; +use tokio::net::TcpStream; +use tokio_openssl::SslStream; +use tracing::{error, trace, Span}; + +use crate::{ + components::network::{deserialize_network_message, Message}, + reactor::{EventQueueHandle, QueueKind}, + tls, + types::{chainspec::JulietConfig, NodeId, ValidatorMatrix}, + utils::{rate_limited::rate_limited, LockedLineWriter}, +}; + +use super::{ + chain_info::ChainInfo, + conman::{ProtocolHandler, ProtocolHandshakeOutcome}, + error::{ConnectionError, MessageReceiverError}, + handshake::HandshakeConfiguration, + metrics::Metrics, + Channel, Config, Event, FromIncoming, Identity, Payload, PerChannel, Transport, +}; + +/// Creates a new RPC builder with Juliet configuration as specified in the config and chainspec. +/// +/// The resulting `RpcBuilder` can be reused for multiple connections. +pub(super) fn create_rpc_builder( + juliet_config: &PerChannel, + config: &Config, + chain_info: &ChainInfo, +) -> juliet::rpc::RpcBuilder<{ Channel::COUNT }> { + let protocol = juliet_config.into_iter().fold( + juliet::protocol::ProtocolBuilder::new().max_frame_size(chain_info.maximum_frame_size), + |protocol, (channel, juliet_config)| { + protocol.channel_config(channel.into_channel_id(), juliet_config.into()) + }, + ); + + // If buffer_size is not specified, `in_flight_limit * 2` is used: + let buffer_size = config.send_buffer_size.map(|channel, maybe_buffer_size| { + maybe_buffer_size.unwrap_or((2 * juliet_config.get(channel).in_flight_limit).into()) + }); + + let io_core = buffer_size.into_iter().fold( + juliet::io::IoCoreBuilder::new(protocol).error_timeout(config.error_timeout.into()), + |io_core, (channel, buffer_size)| { + io_core.buffer_size(channel.into_channel_id(), buffer_size) + }, + ); + + juliet::rpc::RpcBuilder::new(io_core) + // We currently disable bubble timeouts due to not having enough data on whether nodes can + // process data fast enough in all cases. For now, we just warn. + .with_bubble_timeouts(config.bubble_timeouts) + .with_default_timeout(config.ack_timeout.into()) +} + +/// Adapter for incoming Juliet requests. +/// +/// At this time the node does not take full advantage of the Juliet RPC capabilities, relying on +/// its older message+ACK based model introduced with `muxink`. In this model, every message is only +/// acknowledged, with no request-response association being done. The ACK indicates that the peer +/// is free to send another message. +/// +/// The [`Ticket`] type is used to track the processing of an incoming message or its resulting +/// operations; it should dropped once the resources for doing so have been spent, but no earlier. +/// +/// Dropping it will cause an "ACK", which in the Juliet transport's case is an empty response, to +/// be sent. Cancellations or responses with actual payloads are not used at this time. +#[derive(DataSize, Debug)] +pub(crate) struct Ticket { + /// The underlying request. + #[data_size(skip)] + opt_request: Option>, + /// A weak reference to the networking metrics. + #[data_size(skip)] + net_metrics: Weak, +} + +impl Ticket { + /// Creates a new ticket from a given juliet RPC request. + #[inline(always)] + pub(super) fn from_rpc_request( + net_metrics: Weak, + incoming_request: IncomingRequest, + ) -> Self { + Ticket { + opt_request: Some(Box::new(incoming_request)), + net_metrics, + } + } + + /// Creates a new dummy ticket for testing. + /// + /// Unlike [`standin`], it is perfectly fine to use in testing. + #[cfg(test)] + #[inline(always)] + pub(crate) fn create_dummy() -> Self { + Self::stub() + } + + /// Creates a new ticket that does nothing. + /// + /// This method indicates a "hole" in the pass-through chain of `Ticket`s and its usage should + /// ultimately be removed. + #[inline(always)] + pub(crate) fn stub() -> Self { + Ticket { + opt_request: None, + net_metrics: Weak::new(), + } + } +} + +impl Drop for Ticket { + #[inline(always)] + fn drop(&mut self) { + // Currently, we simply send a request confirmation in the for of an `ACK`. + if let Some(incoming_request) = self.opt_request.take() { + if let Some(net_metrics) = self.net_metrics.upgrade() { + if let Some(channel) = Channel::from_repr(incoming_request.channel().get()) { + let cm = net_metrics.channel_metrics.get(channel); + cm.update_from_sent_response(0); + } else { + rate_limited!(FAILED_TO_RECONSTRUCT_CHANNEL_ID, |dropped| error!( + req_channel = incoming_request.channel().get(), + dropped, "should never failed to reconstruct channel from incoming request" + )); + } + } + + incoming_request.respond(None); + } + } +} + +pub(super) struct TransportHandler { + event_queue: EventQueueHandle, + identity: Identity, + handshake_configuration: HandshakeConfiguration, + keylog: Option, + net_metrics: Arc, + validator_matrix: ValidatorMatrix, + _payload: PhantomData

, +} + +impl TransportHandler +where + REv: 'static, +{ + pub(super) fn new( + event_queue: EventQueueHandle, + identity: Identity, + handshake_configuration: HandshakeConfiguration, + keylog: Option, + net_metrics: Arc, + validator_matrix: ValidatorMatrix, + ) -> Self { + Self { + event_queue, + identity, + handshake_configuration, + keylog, + net_metrics, + validator_matrix, + _payload: PhantomData, + } + } + + /// Finish the transport setup after the TLS connection has been negotiated. + async fn finish_setting_up( + &self, + peer_id: NodeId, + transport: Transport, + ) -> Result { + let handshake_outcome = self + .handshake_configuration + .negotiate_handshake(transport) + .await?; + + Ok(ProtocolHandshakeOutcome { + peer_id, + handshake_outcome, + }) + } +} + +#[async_trait::async_trait] +impl ProtocolHandler for TransportHandler +where + REv: From> + FromIncoming

+ Send + 'static, + P: Payload, +{ + #[inline(always)] + async fn setup_incoming( + &self, + stream: TcpStream, + ) -> Result { + let (peer_id, transport) = + server_setup_tls(&self.identity, stream, self.keylog.clone()).await?; + + self.finish_setting_up(peer_id, transport).await + } + + #[inline(always)] + async fn setup_outgoing( + &self, + stream: TcpStream, + ) -> Result { + let (peer_id, transport) = tls_connect(&self.identity, stream, self.keylog.clone()).await?; + + self.finish_setting_up(peer_id, transport).await + } + + #[inline(always)] + async fn handle_incoming_request( + &self, + peer: NodeId, + consensus_key: Option<&PublicKey>, + request: IncomingRequest, + ) -> Result<(), String> { + self.do_handle_incoming_request(peer, consensus_key, request) + .await + .map_err(|err| err.to_string()) + } +} + +impl TransportHandler +where + REv: From> + FromIncoming

+ Send + 'static, + P: Payload, +{ + async fn do_handle_incoming_request( + &self, + peer: NodeId, + consensus_key: Option<&PublicKey>, + request: IncomingRequest, + ) -> Result<(), MessageReceiverError> { + let channel = Channel::from_repr(request.channel().get()) + .ok_or_else(|| MessageReceiverError::InvalidChannel(request.channel().get()))?; + let payload = request + .payload() + .as_ref() + .ok_or_else(|| MessageReceiverError::EmptyRequest)?; + + let msg: Message

= deserialize_network_message(payload) + .map_err(MessageReceiverError::DeserializationError)?; + + trace!(%msg, %channel, "message received"); + + // Ensure the peer did not try to sneak in a message on a different channel. + let msg_channel = msg.get_channel(); + if msg_channel != channel { + return Err(MessageReceiverError::WrongChannel { + got: msg_channel, + expected: channel, + }); + } + + let validator_status = consensus_key + .map(|key| self.validator_matrix.is_active_or_upcoming_validator(key)) + .unwrap_or(false); + + let queue_kind = if validator_status { + QueueKind::MessageValidator + } else if msg.is_low_priority() { + QueueKind::MessageLowPriority + } else { + QueueKind::MessageIncoming + }; + + // Update metrics. + self.net_metrics + .channel_metrics + .get(channel) + .update_from_incoming_request(payload.len() as u64); + + self.event_queue + .schedule::>( + Event::IncomingMessage { + peer_id: Box::new(peer), + msg: Box::new(msg), + span: Span::current(), + ticket: Ticket::from_rpc_request(Arc::downgrade(&self.net_metrics), request), + }, + queue_kind, + ) + .await; + + Ok(()) + } +} + +/// Server-side TLS setup. +/// +/// This function groups the TLS setup into a convenient function, enabling the `?` operator. +pub(super) async fn server_setup_tls( + identity: &Identity, + stream: TcpStream, + keylog: Option, +) -> Result<(NodeId, Transport), ConnectionError> { + let mut tls_stream = tls::create_tls_acceptor( + identity.tls_certificate.as_x509().as_ref(), + identity.secret_key.as_ref(), + keylog, + ) + .and_then(|ssl_acceptor| Ssl::new(ssl_acceptor.context())) + .and_then(|ssl| SslStream::new(ssl, stream)) + .map_err(ConnectionError::TlsInitialization)?; + + SslStream::accept(Pin::new(&mut tls_stream)) + .await + .map_err(ConnectionError::TlsHandshake)?; + + // We can now verify the certificate. + let peer_cert = tls_stream + .ssl() + .peer_certificate() + .ok_or(ConnectionError::NoPeerCertificate)?; + + let validated_peer_cert = identity + .validate_peer_cert(peer_cert) + .map_err(ConnectionError::PeerCertificateInvalid)?; + + Ok(( + NodeId::from(validated_peer_cert.public_key_fingerprint()), + tls_stream, + )) +} + +/// Low-level TLS connection function. +/// +/// Performs the actual TCP+TLS connection setup. +async fn tls_connect( + identity: &Identity, + stream: TcpStream, + keylog: Option, +) -> Result<(NodeId, Transport), ConnectionError> { + // TODO: Timeout eventually if the connection gets stuck? + + stream + .set_nodelay(true) + .map_err(ConnectionError::TcpNoDelay)?; + + let mut transport = tls::create_tls_connector( + identity.tls_certificate.as_x509(), + &identity.secret_key, + keylog, + ) + .and_then(|connector| connector.configure()) + .and_then(|mut config| { + config.set_verify_hostname(false); + config.into_ssl("this-will-not-be-checked.example.com") + }) + .and_then(|ssl| SslStream::new(ssl, stream)) + .map_err(ConnectionError::TlsInitialization)?; + + SslStream::connect(Pin::new(&mut transport)) + .await + .map_err(ConnectionError::TlsHandshake)?; + + let peer_cert = transport + .ssl() + .peer_certificate() + .ok_or(ConnectionError::NoPeerCertificate)?; + + let validated_peer_cert = identity + .validate_peer_cert(peer_cert) + .map_err(ConnectionError::PeerCertificateInvalid)?; + + let peer_id = NodeId::from(validated_peer_cert.public_key_fingerprint()); + + Ok((peer_id, transport)) +} diff --git a/node/src/components/block_validator.rs b/node/src/components/proposed_block_validator.rs similarity index 66% rename from node/src/components/block_validator.rs rename to node/src/components/proposed_block_validator.rs index 6d59fc72c8..087d6261fc 100644 --- a/node/src/components/block_validator.rs +++ b/node/src/components/proposed_block_validator.rs @@ -1,7 +1,7 @@ -//! Block validator +//! Proposed Block Validator //! -//! The block validator checks whether all the deploys included in the block payload exist, either -//! locally or on the network. +//! The proposed block validator checks whether all the deploys included in the block payload exist, +//! either locally or on the network. //! //! When multiple requests are made to validate the same block payload, they will eagerly return //! true if valid, but only fail if all sources have been exhausted. This is only relevant when @@ -26,8 +26,9 @@ use crate::{ fetcher::{self, EmptyValidationMetadata, FetchResult, FetchedData}, Component, }, + consensus::ValidationError, effect::{ - requests::{BlockValidationRequest, FetcherRequest, StorageRequest}, + requests::{FetcherRequest, ProposedBlockValidationRequest, StorageRequest}, EffectBuilder, EffectExt, Effects, Responder, }, types::{ @@ -40,7 +41,7 @@ pub use config::Config; pub(crate) use event::Event; use state::{AddResponderResult, BlockValidationState, MaybeStartFetching}; -const COMPONENT_NAME: &str = "block_validator"; +const COMPONENT_NAME: &str = "proposed_block_validator"; impl ProposedBlock { fn timestamp(&self) -> Timestamp { @@ -61,11 +62,11 @@ enum MaybeHandled { /// The request is already being handled - return the wrapped effects and finish. Handled(Effects), /// The request is new - it still needs to be handled. - NotHandled(BlockValidationRequest), + NotHandled(ProposedBlockValidationRequest), } #[derive(DataSize, Debug)] -pub(crate) struct BlockValidator { +pub(crate) struct ProposedBlockValidator { /// Chainspec loaded for deploy validation. #[data_size(skip)] chainspec: Arc, @@ -74,10 +75,10 @@ pub(crate) struct BlockValidator { validation_states: HashMap, BlockValidationState>, } -impl BlockValidator { - /// Creates a new block validator instance. +impl ProposedBlockValidator { + /// Creates a new proposed block validator instance. pub(crate) fn new(chainspec: Arc, config: Config) -> Self { - BlockValidator { + ProposedBlockValidator { chainspec, config, validation_states: HashMap::new(), @@ -89,25 +90,28 @@ impl BlockValidator { fn try_handle_as_existing_request( &mut self, effect_builder: EffectBuilder, - request: BlockValidationRequest, + request: ProposedBlockValidationRequest, ) -> MaybeHandled where REv: From + From> + Send, { - if let Some(state) = self.validation_states.get_mut(&request.block) { - let BlockValidationRequest { - block, + if let Some(state) = self.validation_states.get_mut(&request.proposed_block) { + let ProposedBlockValidationRequest { + proposed_block, sender, responder, } = request; - debug!(%sender, %block, "already validating proposed block"); + debug!(%sender, %proposed_block, "already validating proposed block"); match state.add_responder(responder) { AddResponderResult::Added => {} AddResponderResult::ValidationCompleted { responder, response_to_send, } => { - debug!(%response_to_send, "proposed block validation already completed"); + debug!( + ?response_to_send, + "proposed block validation already completed" + ); return MaybeHandled::Handled(responder.respond(response_to_send).ignore()); } } @@ -122,10 +126,10 @@ impl BlockValidator { debug!("ongoing fetches while validating proposed block - noop"); Effects::new() } - MaybeStartFetching::Unable => { - debug!("no new info while validating proposed block - responding `false`"); - respond(false, state.take_responders()) - } + MaybeStartFetching::Unable { missing_deploys } => respond( + Err(ValidationError::ExhaustedBlockHolders { missing_deploys }), + state.take_responders(), + ), MaybeStartFetching::ValidationSucceeded | MaybeStartFetching::ValidationFailed => { // If validation is already completed, we should have exited in the // `AddResponderResult::ValidationCompleted` branch above. @@ -142,41 +146,49 @@ impl BlockValidator { fn handle_new_request( &mut self, effect_builder: EffectBuilder, - BlockValidationRequest { - block, + ProposedBlockValidationRequest { + proposed_block, sender, responder, - }: BlockValidationRequest, + }: ProposedBlockValidationRequest, ) -> Effects where REv: From + From> + Send, { - debug!(%sender, %block, "validating new proposed block"); - debug_assert!(!self.validation_states.contains_key(&block)); + debug!(%sender, %proposed_block, "validating new proposed block"); + debug_assert!(!self.validation_states.contains_key(&proposed_block)); let (mut state, maybe_responder) = - BlockValidationState::new(&block, sender, responder, self.chainspec.as_ref()); + BlockValidationState::new(&proposed_block, sender, responder, self.chainspec.as_ref()); let effects = match state.start_fetching() { MaybeStartFetching::Start { holder, missing_deploys, } => fetch_deploys(effect_builder, holder, missing_deploys), MaybeStartFetching::ValidationSucceeded => { - debug!("no deploys - block validation complete"); + debug!("no deploys - proposed block validation complete"); debug_assert!(maybe_responder.is_some()); - respond(true, maybe_responder) + respond(Ok(()), maybe_responder) } MaybeStartFetching::ValidationFailed => { debug_assert!(maybe_responder.is_some()); - respond(false, maybe_responder) + respond( + Err(ValidationError::ValidationOfFailedBlock), + maybe_responder, + ) } - MaybeStartFetching::Ongoing | MaybeStartFetching::Unable => { + MaybeStartFetching::Ongoing | MaybeStartFetching::Unable { .. } => { + // Programmer error, we should only request each validation once! + // This `MaybeStartFetching` variant should never be returned here. - error!(%state, "invalid state while handling new block validation"); + error!(%state, "invalid state while handling new proposed block validation"); debug_assert!(false, "invalid state {}", state); - respond(false, state.take_responders()) + respond( + Err(ValidationError::DuplicateValidationAttempt), + state.take_responders(), + ) } }; - self.validation_states.insert(block, state); + self.validation_states.insert(proposed_block, state); self.purge_oldest_complete(); effects } @@ -202,7 +214,7 @@ impl BlockValidator { debug!( %state, num_completed_remaining = (completed_times.len() - 1), - "purging completed block validation state" + "purging completed proposed block validation state" ); let _ = completed_times.pop(); return false; @@ -229,36 +241,52 @@ impl BlockValidator { Err(error) => warn!(%dt_hash, %error, "could not fetch deploy"), } match result { - Ok(FetchedData::FromStorage { item }) | Ok(FetchedData::FromPeer { item, .. }) => { + Ok(FetchedData::FromStorage { ref item }) + | Ok(FetchedData::FromPeer { ref item, .. }) => { + // This whole branch _should_ never be taken, as it means that the fetcher returned + // an item that does not match the actual fetch request. if item.deploy_or_transfer_hash() != dt_hash { - warn!( - deploy = %item, - expected_deploy_or_transfer_hash = %dt_hash, - actual_deploy_or_transfer_hash = %item.deploy_or_transfer_hash(), - "deploy has incorrect deploy-or-transfer hash" - ); // Hard failure - change state to Invalid. let responders = self .validation_states .values_mut() .flat_map(|state| state.try_mark_invalid(&dt_hash)); - return respond(false, responders); + + // Not ideal, would be preferrable to refactor this entire section instead. For + // now, we make do by matching on `result` again. + if matches!(result, Ok(FetchedData::FromStorage { .. })) { + // Data corruption, we got an invalid deploy from storage. + return respond( + Err(ValidationError::InternalDataCorruption( + item.deploy_or_transfer_hash(), + )), + responders, + ); + } else { + // Malicious peer, should not have been able to sneak by the fetcher. + return respond( + Err(ValidationError::WrongDeploySent( + item.deploy_or_transfer_hash(), + )), + responders, + ); + } } let deploy_footprint = match item.footprint() { Ok(footprint) => footprint, Err(error) => { - warn!( - deploy = %item, - %dt_hash, - %error, - "could not convert deploy", - ); // Hard failure - change state to Invalid. let responders = self .validation_states .values_mut() .flat_map(|state| state.try_mark_invalid(&dt_hash)); - return respond(false, responders); + return respond( + Err(ValidationError::DeployHasInvalidFootprint { + deploy_hash: dt_hash, + error: error.to_string(), + }), + responders, + ); } }; @@ -266,8 +294,15 @@ impl BlockValidator { for state in self.validation_states.values_mut() { let responders = state.try_add_deploy_footprint(&dt_hash, &deploy_footprint); if !responders.is_empty() { - let is_valid = matches!(state, BlockValidationState::Valid(_)); - effects.extend(respond(is_valid, responders)); + let response = match state { + BlockValidationState::InProgress { .. } => { + Err(ValidationError::InProgressAfterCompletion) + } + BlockValidationState::Valid(_) => Ok(()), + BlockValidationState::Invalid { error, .. } => Err(error.clone()), + }; + + effects.extend(respond(response, responders)); } } effects @@ -298,12 +333,11 @@ impl BlockValidator { missing_deploys, )) } - MaybeStartFetching::Unable => { - debug!( - "exhausted peers while validating proposed block - \ - responding `false`" - ); - effects.extend(respond(false, state.take_responders())); + MaybeStartFetching::Unable { .. } => { + effects.extend(respond( + Err(ValidationError::PeersExhausted), + state.take_responders(), + )); } MaybeStartFetching::Ongoing | MaybeStartFetching::ValidationSucceeded @@ -312,14 +346,33 @@ impl BlockValidator { }); effects } - fetcher::Error::CouldNotConstructGetRequest { .. } - | fetcher::Error::ValidationMetadataMismatch { .. } => { - // Hard failure - change state to Invalid. + fetcher::Error::CouldNotConstructGetRequest { id, peer } => { + // Hard failure. + let responders = self + .validation_states + .values_mut() + .flat_map(|state| state.try_mark_invalid(&dt_hash)); + respond( + Err(ValidationError::CouldNotConstructGetRequest { + id: id.to_string(), + peer: Box::new(peer), + }), + responders, + ) + } + fetcher::Error::ValidationMetadataMismatch { id, peer, .. } => { + // Hard failure. let responders = self .validation_states .values_mut() .flat_map(|state| state.try_mark_invalid(&dt_hash)); - respond(false, responders) + respond( + Err(ValidationError::ValidationMetadataMismatch { + id: id.to_string(), + peer: Box::new(peer), + }), + responders, + ) } } } @@ -327,10 +380,10 @@ impl BlockValidator { } } -impl Component for BlockValidator +impl Component for ProposedBlockValidator where REv: From - + From + + From + From> + From + Send, @@ -383,11 +436,11 @@ where } fn respond( - is_valid: bool, - responders: impl IntoIterator>, + response: Result<(), ValidationError>, + responders: impl IntoIterator>>, ) -> Effects { responders .into_iter() - .flat_map(|responder| responder.respond(is_valid).ignore()) + .flat_map(move |responder| responder.respond(response.clone()).ignore()) .collect() } diff --git a/node/src/components/block_validator/config.rs b/node/src/components/proposed_block_validator/config.rs similarity index 84% rename from node/src/components/block_validator/config.rs rename to node/src/components/proposed_block_validator/config.rs index 2263273632..4c902f6fea 100644 --- a/node/src/components/block_validator/config.rs +++ b/node/src/components/proposed_block_validator/config.rs @@ -1,7 +1,7 @@ use datasize::DataSize; use serde::{Deserialize, Serialize}; -/// Configuration options for block validation. +/// Configuration options for proposed block validation. #[derive(Copy, Clone, DataSize, Debug, Deserialize, Serialize)] pub struct Config { pub max_completed_entries: u32, diff --git a/node/src/components/block_validator/event.rs b/node/src/components/proposed_block_validator/event.rs similarity index 78% rename from node/src/components/block_validator/event.rs rename to node/src/components/proposed_block_validator/event.rs index fede68ca13..194062402c 100644 --- a/node/src/components/block_validator/event.rs +++ b/node/src/components/proposed_block_validator/event.rs @@ -2,14 +2,14 @@ use derive_more::{Display, From}; use crate::{ components::fetcher::FetchResult, - effect::requests::BlockValidationRequest, + effect::requests::ProposedBlockValidationRequest, types::{Deploy, DeployOrTransferHash}, }; #[derive(Debug, From, Display)] pub(crate) enum Event { #[from] - Request(BlockValidationRequest), + Request(ProposedBlockValidationRequest), #[display(fmt = "{} fetched", dt_hash)] DeployFetched { diff --git a/node/src/components/block_validator/state.rs b/node/src/components/proposed_block_validator/state.rs similarity index 86% rename from node/src/components/block_validator/state.rs rename to node/src/components/proposed_block_validator/state.rs index f7daa4f0ff..f2e69ef597 100644 --- a/node/src/components/block_validator/state.rs +++ b/node/src/components/proposed_block_validator/state.rs @@ -5,7 +5,7 @@ use std::{ }; use datasize::DataSize; -use tracing::{debug, error, warn}; +use tracing::{debug, error}; use casper_types::Timestamp; @@ -13,6 +13,7 @@ use casper_types::Timestamp; use crate::types::DeployHash; use crate::{ components::consensus::{ClContext, ProposedBlock}, + consensus::ValidationError, effect::Responder, types::{ appendable_block::AppendableBlock, Approval, ApprovalsHash, Chainspec, DeployFootprint, @@ -38,8 +39,8 @@ pub(super) enum AddResponderResult { Added, /// Validation is completed, so the responder should be called with the provided value. ValidationCompleted { - responder: Responder, - response_to_send: bool, + responder: Responder>, + response_to_send: Result<(), ValidationError>, }, } @@ -54,7 +55,10 @@ pub(super) enum MaybeStartFetching { /// No new round of fetches should be started as one is already in progress. Ongoing, /// We still have missing deploys, but all holders have failed. - Unable, + Unable { + /// Hashes of all deploys that could not be retrieved. + missing_deploys: Vec, + }, /// Validation has succeeded already. ValidationSucceeded, /// Validation has failed already. @@ -76,7 +80,7 @@ impl ApprovalInfo { } } -/// State of the current process of block validation. +/// State of the current process of proposed block validation. /// /// Tracks whether or not there are deploys still missing and who is interested in the final result. #[derive(DataSize, Debug)] @@ -91,7 +95,7 @@ pub(super) enum BlockValidationState { /// The set of peers which each claim to hold all the deploys. holders: HashMap, /// A list of responders that are awaiting an answer. - responders: Vec>, + responders: Vec>>, }, /// The proposed block with the given timestamp is valid. Valid(Timestamp), @@ -101,7 +105,12 @@ pub(super) enum BlockValidationState { /// like failing to fetch from a peer, the state will remain `Unknown`, even if there are no /// more peers to ask, since more peers could be provided before this `BlockValidationState` is /// purged. - Invalid(Timestamp), + Invalid { + /// Timestamp the change in state occurred. + timestamp: Timestamp, + /// The error that cause the state's invalidity. + error: ValidationError, + }, } impl BlockValidationState { @@ -112,9 +121,9 @@ impl BlockValidationState { pub(super) fn new( block: &ProposedBlock, sender: NodeId, - responder: Responder, + responder: Responder>, chainspec: &Chainspec, - ) -> (Self, Option>) { + ) -> (Self, Option>>) { let deploy_count = block.deploys().len() + block.transfers().len(); if deploy_count == 0 { let state = BlockValidationState::Valid(block.timestamp()); @@ -122,13 +131,17 @@ impl BlockValidationState { } if block.deploys().len() > chainspec.deploy_config.block_max_deploy_count as usize { - warn!("too many non-transfer deploys"); - let state = BlockValidationState::Invalid(block.timestamp()); + let state = BlockValidationState::Invalid { + timestamp: block.timestamp(), + error: ValidationError::ExceedsNonTransferDeployLimit(block.deploys().len()), + }; return (state, Some(responder)); } if block.transfers().len() > chainspec.deploy_config.block_max_transfer_count as usize { - warn!("too many transfers"); - let state = BlockValidationState::Invalid(block.timestamp()); + let state = BlockValidationState::Invalid { + timestamp: block.timestamp(), + error: ValidationError::ExceedsTransferLimit(block.transfers().len()), + }; return (state, Some(responder)); } @@ -147,15 +160,19 @@ impl BlockValidationState { let approval_info = match ApprovalsHash::compute(&approvals) { Ok(approvals_hash) => ApprovalInfo::new(approvals, approvals_hash), Err(error) => { - warn!(%dt_hash, %error, "could not compute approvals hash"); - let state = BlockValidationState::Invalid(block.timestamp()); + let state = BlockValidationState::Invalid { + timestamp: block.timestamp(), + error: ValidationError::CannotSerializeApprovalsHash(error.to_string()), + }; return (state, Some(responder)); } }; if missing_deploys.insert(dt_hash, approval_info).is_some() { - warn!(%dt_hash, "duplicated deploy in proposed block"); - let state = BlockValidationState::Invalid(block.timestamp()); + let state = BlockValidationState::Invalid { + timestamp: block.timestamp(), + error: ValidationError::DuplicateDeploy(dt_hash), + }; return (state, Some(responder)); } } @@ -175,7 +192,10 @@ impl BlockValidationState { /// /// If the state is not `InProgress`, `ValidationCompleted` is returned with the responder and /// the value which should be provided to the responder. - pub(super) fn add_responder(&mut self, responder: Responder) -> AddResponderResult { + pub(super) fn add_responder( + &mut self, + responder: Responder>, + ) -> AddResponderResult { match self { BlockValidationState::InProgress { responders, .. } => { responders.push(responder); @@ -183,12 +203,14 @@ impl BlockValidationState { } BlockValidationState::Valid(_) => AddResponderResult::ValidationCompleted { responder, - response_to_send: true, - }, - BlockValidationState::Invalid(_) => AddResponderResult::ValidationCompleted { - responder, - response_to_send: false, + response_to_send: Ok(()), }, + BlockValidationState::Invalid { ref error, .. } => { + AddResponderResult::ValidationCompleted { + responder, + response_to_send: Err(error.clone()), + } + } } } @@ -205,14 +227,14 @@ impl BlockValidationState { debug!( block_timestamp = %appendable_block.timestamp(), peer = %entry.key(), - "already registered peer as holder for block validation" + "already registered peer as holder for proposed block validation" ); } Entry::Vacant(entry) => { entry.insert(HolderState::Unasked); } }, - BlockValidationState::Valid(_) | BlockValidationState::Invalid(_) => { + BlockValidationState::Valid(_) | BlockValidationState::Invalid { .. } => { error!(state = %self, "unexpected state when adding holder"); } } @@ -246,6 +268,8 @@ impl BlockValidationState { if missing_deploys.is_empty() { error!("should always have missing deploys while in state `InProgress`"); debug_assert!(false, "invalid state"); + // Note: This branch should never happen and is a bug in the software. We are + // "repurposing" a different error variant, avoiding `unreachable!`. return MaybeStartFetching::ValidationFailed; } let mut unasked = None; @@ -261,7 +285,11 @@ impl BlockValidationState { let holder = match unasked { Some(peer) => peer, - None => return MaybeStartFetching::Unable, + None => { + return MaybeStartFetching::Unable { + missing_deploys: missing_deploys.keys().cloned().collect(), + } + } }; // Mark the holder as `Asked`. Safe to `expect` as we just found the entry above. *holders.get_mut(&holder).expect("must be in set") = HolderState::Asked; @@ -275,14 +303,14 @@ impl BlockValidationState { } } BlockValidationState::Valid(_) => MaybeStartFetching::ValidationSucceeded, - BlockValidationState::Invalid(_) => MaybeStartFetching::ValidationFailed, + BlockValidationState::Invalid { .. } => MaybeStartFetching::ValidationFailed, } } - pub(super) fn take_responders(&mut self) -> Vec> { + pub(super) fn take_responders(&mut self) -> Vec>> { match self { BlockValidationState::InProgress { responders, .. } => mem::take(responders), - BlockValidationState::Valid(_) | BlockValidationState::Invalid(_) => vec![], + BlockValidationState::Valid(_) | BlockValidationState::Invalid { .. } => vec![], } } @@ -292,7 +320,7 @@ impl BlockValidationState { &mut self, dt_hash: &DeployOrTransferHash, footprint: &DeployFootprint, - ) -> Vec> { + ) -> Vec>> { let (new_state, responders) = match self { BlockValidationState::InProgress { appendable_block, @@ -325,25 +353,30 @@ impl BlockValidationState { debug!( block_timestamp = %appendable_block.timestamp(), missing_deploys_len = missing_deploys.len(), - "still missing deploys - block validation incomplete" + "still missing deploys - proposed block validation incomplete" ); return vec![]; } debug!( block_timestamp = %appendable_block.timestamp(), - "no further missing deploys - block validation complete" + "no further missing deploys - proposed block validation complete" ); let new_state = BlockValidationState::Valid(appendable_block.timestamp()); (new_state, mem::take(responders)) } Err(error) => { - warn!(%dt_hash, ?footprint, %error, "block invalid"); - let new_state = BlockValidationState::Invalid(appendable_block.timestamp()); + let new_state = BlockValidationState::Invalid { + timestamp: appendable_block.timestamp(), + error: ValidationError::DeployInclusionFailure { + deploy_hash: *dt_hash, + error, + }, + }; (new_state, mem::take(responders)) } } } - BlockValidationState::Valid(_) | BlockValidationState::Invalid(_) => return vec![], + BlockValidationState::Valid(_) | BlockValidationState::Invalid { .. } => return vec![], }; *self = new_state; responders @@ -354,7 +387,7 @@ impl BlockValidationState { pub(super) fn try_mark_invalid( &mut self, dt_hash: &DeployOrTransferHash, - ) -> Vec> { + ) -> Vec>> { let (timestamp, responders) = match self { BlockValidationState::InProgress { appendable_block, @@ -367,7 +400,7 @@ impl BlockValidationState { } (appendable_block.timestamp(), mem::take(responders)) } - BlockValidationState::Valid(_) | BlockValidationState::Invalid(_) => return vec![], + BlockValidationState::Valid(_) | BlockValidationState::Invalid { .. } => return vec![], }; *self = BlockValidationState::Valid(timestamp); responders @@ -376,9 +409,8 @@ impl BlockValidationState { pub(super) fn block_timestamp_if_completed(&self) -> Option { match self { BlockValidationState::InProgress { .. } => None, - BlockValidationState::Valid(timestamp) | BlockValidationState::Invalid(timestamp) => { - Some(*timestamp) - } + BlockValidationState::Valid(timestamp) + | BlockValidationState::Invalid { timestamp, .. } => Some(*timestamp), } } @@ -391,7 +423,7 @@ impl BlockValidationState { .keys() .map(|dt_hash| *dt_hash.deploy_hash()) .collect(), - BlockValidationState::Valid(_) | BlockValidationState::Invalid(_) => vec![], + BlockValidationState::Valid(_) | BlockValidationState::Invalid { .. } => vec![], } } @@ -399,7 +431,7 @@ impl BlockValidationState { pub(super) fn holders_mut(&mut self) -> Option<&mut HashMap> { match self { BlockValidationState::InProgress { holders, .. } => Some(holders), - BlockValidationState::Valid(_) | BlockValidationState::Invalid(_) => None, + BlockValidationState::Valid(_) | BlockValidationState::Invalid { .. } => None, } } @@ -407,7 +439,7 @@ impl BlockValidationState { pub(super) fn responder_count(&self) -> usize { match self { BlockValidationState::InProgress { responders, .. } => responders.len(), - BlockValidationState::Valid(_) | BlockValidationState::Invalid(_) => 0, + BlockValidationState::Valid(_) | BlockValidationState::Invalid { .. } => 0, } } @@ -438,8 +470,11 @@ impl Display for BlockValidationState { BlockValidationState::Valid(timestamp) => { write!(formatter, "BlockValidationState::Valid({timestamp})") } - BlockValidationState::Invalid(timestamp) => { - write!(formatter, "BlockValidationState::Invalid({timestamp})") + BlockValidationState::Invalid { timestamp, error } => { + write!( + formatter, + "BlockValidationState::Invalid{{ timestamp: {timestamp}, error: {error}}}" + ) } } } @@ -482,7 +517,10 @@ mod tests { &mut self, deploy_count: u64, transfer_count: u64, - ) -> (BlockValidationState, Option>) { + ) -> ( + BlockValidationState, + Option>>, + ) { let ttl = TimeDiff::from_seconds(10); let deploys: Vec<_> = (0..deploy_count) .map(|index| new_deploy(&mut self.rng, Timestamp::from(1000 + index), ttl)) @@ -539,7 +577,7 @@ mod tests { } } - fn new_responder() -> Responder { + fn new_responder() -> Responder> { let (sender, _receiver) = oneshot::channel(); Responder::without_shutdown(sender) } @@ -558,7 +596,7 @@ mod tests { let deploy_count = 5_u64; fixture.chainspec.deploy_config.block_max_deploy_count = deploy_count as u32 - 1; let (state, maybe_responder) = fixture.new_state(deploy_count, 0); - assert!(matches!(state, BlockValidationState::Invalid(_))); + assert!(matches!(state, BlockValidationState::Invalid { .. })); assert!(maybe_responder.is_some()); } @@ -568,7 +606,7 @@ mod tests { let transfer_count = 5_u64; fixture.chainspec.deploy_config.block_max_transfer_count = transfer_count as u32 - 1; let (state, maybe_responder) = fixture.new_state(0, transfer_count); - assert!(matches!(state, BlockValidationState::Invalid(_))); + assert!(matches!(state, BlockValidationState::Invalid { .. })); assert!(maybe_responder.is_some()); } @@ -593,7 +631,7 @@ mod tests { &fixture.chainspec, ); - assert!(matches!(state, BlockValidationState::Invalid(_))); + assert!(matches!(state, BlockValidationState::Invalid { .. })); assert!(maybe_responder.is_some()); } @@ -616,7 +654,7 @@ mod tests { assert_eq!(holders.values().next().unwrap(), &HolderState::Unasked); assert_eq!(responders.len(), 1); } - BlockValidationState::Valid(_) | BlockValidationState::Invalid(_) => { + BlockValidationState::Valid(_) | BlockValidationState::Invalid { .. } => { panic!("unexpected state") } } @@ -642,7 +680,7 @@ mod tests { assert!(matches!( add_responder_result, AddResponderResult::ValidationCompleted { - response_to_send: true, + response_to_send: Ok(()), .. } )); @@ -651,12 +689,15 @@ mod tests { #[test] fn should_not_add_responder_if_invalid() { - let mut state = BlockValidationState::Invalid(Timestamp::from(1000)); + let mut state = BlockValidationState::Invalid { + timestamp: Timestamp::from(1000), + error: ValidationError::ExceedsTransferLimit(123), + }; let add_responder_result = state.add_responder(new_responder()); assert!(matches!( add_responder_result, AddResponderResult::ValidationCompleted { - response_to_send: false, + response_to_send: Err(ValidationError::ExceedsTransferLimit(123)), .. } )); @@ -813,7 +854,10 @@ mod tests { // `start_fetching` should return `Unable` due to no un-failed holders. let maybe_start_fetching = state.start_fetching(); - assert_eq!(maybe_start_fetching, MaybeStartFetching::Unable); + assert!(matches!( + maybe_start_fetching, + MaybeStartFetching::Unable { .. } + )); // The holders should be unchanged. assert_eq!(state.holders_mut().unwrap(), &holders_before); @@ -831,7 +875,10 @@ mod tests { #[test] fn start_fetching_should_return_validation_failed_if_invalid() { - let mut state = BlockValidationState::Invalid(Timestamp::from(1000)); + let mut state = BlockValidationState::Invalid { + timestamp: Timestamp::from(1000), + error: ValidationError::ValidationOfFailedBlock, + }; let maybe_start_fetching = state.start_fetching(); assert_eq!(maybe_start_fetching, MaybeStartFetching::ValidationFailed); } @@ -879,7 +926,7 @@ mod tests { missing_deploys.clone(), holders.clone(), ), - BlockValidationState::Valid(_) | BlockValidationState::Invalid(_) => { + BlockValidationState::Valid(_) | BlockValidationState::Invalid { .. } => { panic!("unexpected state") } }; @@ -903,7 +950,7 @@ mod tests { assert_eq!(&missing_deploys_before, missing_deploys); assert_eq!(&holders_before, holders); } - BlockValidationState::Valid(_) | BlockValidationState::Invalid(_) => { + BlockValidationState::Valid(_) | BlockValidationState::Invalid { .. } => { panic!("unexpected state") } }; @@ -936,6 +983,6 @@ mod tests { let footprint = invalid_deploy.footprint().unwrap(); let responders = state.try_add_deploy_footprint(&dt_hash, &footprint); assert_eq!(responders.len(), 1); - assert!(matches!(state, BlockValidationState::Invalid(_))); + assert!(matches!(state, BlockValidationState::Invalid { .. })); } } diff --git a/node/src/components/block_validator/tests.rs b/node/src/components/proposed_block_validator/tests.rs similarity index 89% rename from node/src/components/block_validator/tests.rs rename to node/src/components/proposed_block_validator/tests.rs index 718b26ad43..24d557ae01 100644 --- a/node/src/components/block_validator/tests.rs +++ b/node/src/components/proposed_block_validator/tests.rs @@ -27,16 +27,16 @@ use super::*; #[derive(Debug, From)] enum ReactorEvent { #[from] - BlockValidator(Event), + ProposedBlockValidator(Event), #[from] Fetcher(FetcherRequest), #[from] Storage(StorageRequest), } -impl From for ReactorEvent { - fn from(req: BlockValidationRequest) -> ReactorEvent { - ReactorEvent::BlockValidator(req.into()) +impl From for ReactorEvent { + fn from(req: ProposedBlockValidationRequest) -> ReactorEvent { + ReactorEvent::ProposedBlockValidator(req.into()) } } @@ -51,9 +51,9 @@ impl MockReactor { } } - async fn expect_block_validator_event(&self) -> Event { + async fn expect_proposed_block_validator_event(&self) -> Event { let ((_ancestor, reactor_event), _) = self.scheduler.pop().await; - if let ReactorEvent::BlockValidator(event) = reactor_event { + if let ReactorEvent::ProposedBlockValidator(event) = reactor_event { event } else { panic!("unexpected event: {:?}", reactor_event); @@ -107,7 +107,7 @@ pub(super) fn new_proposed_block( transfers: Vec, ) -> ProposedBlock { // Accusations and ancestors are empty, and the random bit is always true: - // These values are not checked by the block validator. + // These values are not checked by the proposed block validator. let block_context = BlockContext::new(timestamp, vec![]); let block_payload = BlockPayload::new(deploys, transfers, vec![], true); ProposedBlock::new(Arc::new(block_payload), block_context) @@ -166,7 +166,7 @@ pub(super) fn new_transfer(rng: &mut TestRng, timestamp: Timestamp, ttl: TimeDif ) } -/// Validates a block using a `BlockValidator` component, and returns the result. +/// Validates a block using a `ProposedBlockValidator` component, and returns the result. async fn validate_block( rng: &mut TestRng, timestamp: Timestamp, @@ -188,18 +188,19 @@ async fn validate_block( let reactor = MockReactor::new(); let effect_builder = EffectBuilder::new(EventQueueHandle::without_shutdown(reactor.scheduler)); let (chainspec, _) = <(Chainspec, ChainspecRawBytes)>::from_resources("local"); - let mut block_validator = BlockValidator::new(Arc::new(chainspec), Config::default()); + let mut proposed_block_validator = + ProposedBlockValidator::new(Arc::new(chainspec), Config::default()); // Pass the block to the component. This future will eventually resolve to the result, i.e. // whether the block is valid or not. let bob_node_id = NodeId::random(rng); let validation_result = tokio::spawn(effect_builder.validate_block(bob_node_id, proposed_block.clone())); - let event = reactor.expect_block_validator_event().await; - let effects = block_validator.handle_event(effect_builder, rng, event); + let event = reactor.expect_proposed_block_validator_event().await; + let effects = proposed_block_validator.handle_event(effect_builder, rng, event); // If validity could already be determined, the effect will be the validation response. - if block_validator + if proposed_block_validator .validation_states .values() .all(BlockValidationState::completed) @@ -208,7 +209,7 @@ async fn validate_block( for effect in effects { tokio::spawn(effect).await.unwrap(); // Response. } - return validation_result.await.unwrap(); + return validation_result.await.unwrap().is_ok(); } // Otherwise the effects must be requests to fetch the block's deploys. @@ -229,7 +230,7 @@ async fn validate_block( let events = fetch_result.await.unwrap(); assert_eq!(1, events.len()); effects.extend(events.into_iter().flat_map(|found_deploy| { - block_validator.handle_event(effect_builder, rng, found_deploy) + proposed_block_validator.handle_event(effect_builder, rng, found_deploy) })); } @@ -238,7 +239,7 @@ async fn validate_block( for effect in effects { tokio::spawn(effect).await.unwrap(); // Response. } - validation_result.await.unwrap() + validation_result.await.unwrap().is_ok() } /// Verifies that a block without any deploys or transfers is valid. @@ -247,7 +248,7 @@ async fn empty_block() { assert!(validate_block(&mut TestRng::new(), 1000.into(), vec![], vec![]).await); } -/// Verifies that the block validator checks deploy and transfer timestamps and ttl. +/// Verifies that the proposed block validator checks deploy and transfer timestamps and ttl. #[tokio::test] async fn ttl() { // The ttl is 200 ms, and our deploys and transfers have timestamps 900 and 1000. So the block @@ -316,7 +317,7 @@ async fn transfer_deploy_mixup_and_replay() { assert!(!validate_block(&mut rng, timestamp, deploys, transfers).await); } -/// Verifies that the block validator fetches from multiple peers. +/// Verifies that the proposed block validator fetches from multiple peers. #[tokio::test] async fn should_fetch_from_multiple_peers() { let _ = crate::logging::init(); @@ -348,7 +349,8 @@ async fn should_fetch_from_multiple_peers() { let effect_builder = EffectBuilder::new(EventQueueHandle::without_shutdown(reactor.scheduler)); let (chainspec, _) = <(Chainspec, ChainspecRawBytes)>::from_resources("local"); - let mut block_validator = BlockValidator::new(Arc::new(chainspec), Config::default()); + let mut proposed_block_validator = + ProposedBlockValidator::new(Arc::new(chainspec), Config::default()); // Have a validation request for each one of the peers. These futures will eventually all // resolve to the same result, i.e. whether the block is valid or not. @@ -361,8 +363,8 @@ async fn should_fetch_from_multiple_peers() { let mut fetch_effects = VecDeque::new(); for index in 0..peer_count { - let event = reactor.expect_block_validator_event().await; - let effects = block_validator.handle_event(effect_builder, &mut rng, event); + let event = reactor.expect_proposed_block_validator_event().await; + let effects = proposed_block_validator.handle_event(effect_builder, &mut rng, event); if index == 0 { assert_eq!(effects.len(), 6); fetch_effects.extend(effects); @@ -397,10 +399,10 @@ async fn should_fetch_from_multiple_peers() { let event = events.pop().unwrap(); // New fetch requests will be made using a different peer for all deploys not already // registered as fetched. - let effects = block_validator.handle_event(effect_builder, &mut rng, event); + let effects = proposed_block_validator.handle_event(effect_builder, &mut rng, event); if !effects.is_empty() { assert!(missing.is_empty()); - missing = block_validator + missing = proposed_block_validator .validation_states .values() .next() @@ -436,10 +438,10 @@ async fn should_fetch_from_multiple_peers() { let event = events.pop().unwrap(); // New fetch requests will be made using a different peer for all deploys not already // registered as fetched. - let effects = block_validator.handle_event(effect_builder, &mut rng, event); + let effects = proposed_block_validator.handle_event(effect_builder, &mut rng, event); if !effects.is_empty() { assert!(missing.is_empty()); - missing = block_validator + missing = proposed_block_validator .validation_states .values() .next() @@ -471,7 +473,7 @@ async fn should_fetch_from_multiple_peers() { let event = events.pop().unwrap(); // Once the block is deemed valid (i.e. when the final missing deploy is successfully // fetched) the effects will be three validation responses. - effects.extend(block_validator.handle_event(effect_builder, &mut rng, event)); + effects.extend(proposed_block_validator.handle_event(effect_builder, &mut rng, event)); assert!(effects.is_empty() || effects.len() == peer_count as usize); } @@ -480,7 +482,7 @@ async fn should_fetch_from_multiple_peers() { } for validation_result in validation_results { - assert!(validation_result.await.unwrap()); + assert!(validation_result.await.unwrap().is_ok()); } }) .await diff --git a/node/src/components/rest_server.rs b/node/src/components/rest_server.rs index f733081098..02bff3f791 100644 --- a/node/src/components/rest_server.rs +++ b/node/src/components/rest_server.rs @@ -27,7 +27,8 @@ use std::{fmt::Debug, time::Instant}; use datasize::DataSize; use futures::{future::BoxFuture, join, FutureExt}; -use tokio::{sync::oneshot, task::JoinHandle}; +use std::net::SocketAddr; +use tokio::task::JoinHandle; use tracing::{debug, error, info, warn}; use casper_json_rpc::CorsOrigin; @@ -49,7 +50,7 @@ use crate::{ }, reactor::{main_reactor::MainEvent, Finalize}, types::{ChainspecInfo, StatusFeed}, - utils::{self, ListeningError}, + utils::{self, DropSwitch, Fuse, ListeningError, ObservableFuse}, NodeRng, }; pub use config::Config; @@ -93,7 +94,9 @@ impl ReactorEventT for REv where pub(crate) struct InnerRestServer { /// When the message is sent, it signals the server loop to exit cleanly. #[data_size(skip)] - shutdown_sender: oneshot::Sender<()>, + shutdown_fuse: DropSwitch, + /// The address the server is listening on. + local_addr: Option, /// The task handle which will only join once the server loop has exited. #[data_size(skip)] server_join_handle: Option>, @@ -131,6 +134,23 @@ impl RestServer { inner_rest: None, } } + + /// Returns the binding address. + /// + /// Only used in testing. If you need to actually retrieve the bind address, add an appropriate + /// request or, as a last resort, make this function return `Option`. + /// + /// # Panics + /// + /// If the bind address is malformed, panics. + #[cfg(test)] + pub(crate) fn bind_address(&self) -> SocketAddr { + self.inner_rest + .as_ref() + .expect("no inner rest server") + .local_addr + .expect("missing bind addr") + } } impl Component for RestServer @@ -169,6 +189,22 @@ where >::set_state(self, state); effects } + Event::BindComplete(local_addr) => { + match self.inner_rest { + Some(ref mut inner_rest) => { + inner_rest.local_addr = Some(local_addr); + info!(%local_addr, "REST server finishing binding"); + >::set_state( + self, + ComponentState::Initialized, + ); + } + None => { + error!("should not have received `BindComplete` event when REST server is disabled") + } + } + Effects::new() + } Event::RestRequest(_) | Event::GetMetricsResult { .. } => { warn!( ?event, @@ -187,6 +223,10 @@ where ); Effects::new() } + Event::BindComplete(_) => { + error!("REST component received BindComplete while initialized"); + Effects::new() + } Event::RestRequest(RestRequest::Status { responder }) => { let node_uptime = self.node_startup_instant.elapsed(); let network_name = self.network_name.clone(); @@ -286,40 +326,24 @@ where effect_builder: EffectBuilder, ) -> Result, Self::Error> { let cfg = &self.config; - let (shutdown_sender, shutdown_receiver) = oneshot::channel::<()>(); + let shutdown_fuse = ObservableFuse::new(); let builder = utils::start_listening(&cfg.address)?; - let server_join_handle = match cfg.cors_origin.as_str() { - "" => Some(tokio::spawn(http_server::run( - builder, - effect_builder, - self.api_version, - shutdown_receiver, - cfg.qps_limit, - ))), - "*" => Some(tokio::spawn(http_server::run_with_cors( - builder, - effect_builder, - self.api_version, - shutdown_receiver, - cfg.qps_limit, - CorsOrigin::Any, - ))), - _ => Some(tokio::spawn(http_server::run_with_cors( - builder, - effect_builder, - self.api_version, - shutdown_receiver, - cfg.qps_limit, - CorsOrigin::Specified(cfg.cors_origin.clone()), - ))), - }; + let server_join_handle = Some(tokio::spawn(http_server::run( + builder, + effect_builder, + self.api_version, + shutdown_fuse.clone(), + cfg.qps_limit, + CorsOrigin::parse_str(&cfg.cors_origin), + ))); let node_startup_instant = self.node_startup_instant; let network_name = self.network_name.clone(); self.inner_rest = Some(InnerRestServer { - shutdown_sender, + shutdown_fuse: DropSwitch::new(shutdown_fuse), + local_addr: None, server_join_handle, node_startup_instant, network_name, @@ -333,7 +357,7 @@ impl Finalize for RestServer { fn finalize(self) -> BoxFuture<'static, ()> { async { if let Some(mut rest_server) = self.inner_rest { - let _ = rest_server.shutdown_sender.send(()); + rest_server.shutdown_fuse.inner().set(); // Wait for the server to exit cleanly. if let Some(join_handle) = rest_server.server_join_handle.take() { diff --git a/node/src/components/rest_server/event.rs b/node/src/components/rest_server/event.rs index cfc9937848..f37595a304 100644 --- a/node/src/components/rest_server/event.rs +++ b/node/src/components/rest_server/event.rs @@ -1,6 +1,7 @@ use std::{ fmt::{self, Display, Formatter}, mem, + net::SocketAddr, }; use derive_more::From; @@ -14,6 +15,8 @@ const_assert!(_REST_EVENT_SIZE < 89); #[derive(Debug, From)] pub(crate) enum Event { Initialize, + /// The background task running the HTTP server has finished binding its port. + BindComplete(SocketAddr), #[from] RestRequest(RestRequest), GetMetricsResult { @@ -26,6 +29,7 @@ impl Display for Event { fn fmt(&self, formatter: &mut Formatter) -> fmt::Result { match self { Event::Initialize => write!(formatter, "initialize"), + Event::BindComplete(local_addr) => write!(formatter, "bind complete: {}", local_addr), Event::RestRequest(request) => write!(formatter, "{}", request), Event::GetMetricsResult { text, .. } => match text { Some(txt) => write!(formatter, "get metrics ({} bytes)", txt.len()), diff --git a/node/src/components/rest_server/http_server.rs b/node/src/components/rest_server/http_server.rs index 9899291014..f8d9db9f1a 100644 --- a/node/src/components/rest_server/http_server.rs +++ b/node/src/components/rest_server/http_server.rs @@ -2,7 +2,6 @@ use std::{convert::Infallible, time::Duration}; use futures::{future, TryFutureExt}; use hyper::server::{conn::AddrIncoming, Builder}; -use tokio::sync::oneshot; use tower::builder::ServiceBuilder; use tracing::{info, warn}; use warp::Filter; @@ -11,17 +10,19 @@ use casper_json_rpc::CorsOrigin; use casper_types::ProtocolVersion; use super::{filters, ReactorEventT}; -use crate::effect::EffectBuilder; +use crate::{ + components::rest_server::Event, effect::EffectBuilder, reactor::QueueKind, + utils::ObservableFuse, +}; /// Run the REST HTTP server. -/// -/// A message received on `shutdown_receiver` will cause the server to exit cleanly. pub(super) async fn run( builder: Builder, effect_builder: EffectBuilder, api_version: ProtocolVersion, - shutdown_receiver: oneshot::Receiver<()>, + shutdown_fuse: ObservableFuse, qps_limit: u64, + cors_origin: Option, ) { // REST filters. let rest_status = filters::create_status_filter(effect_builder, api_version); @@ -31,68 +32,23 @@ pub(super) async fn run( filters::create_validator_changes_filter(effect_builder, api_version); let rest_chainspec_filter = filters::create_chainspec_filter(effect_builder, api_version); - let service = warp::service( - rest_status - .or(rest_metrics) - .or(rest_open_rpc) - .or(rest_validator_changes) - .or(rest_chainspec_filter), - ); + let base_filter = rest_status + .or(rest_metrics) + .or(rest_open_rpc) + .or(rest_validator_changes) + .or(rest_chainspec_filter); - // Start the server, passing a oneshot receiver to allow the server to be shut down gracefully. - let make_svc = - hyper::service::make_service_fn(move |_| future::ok::<_, Infallible>(service.clone())); - - let rate_limited_service = ServiceBuilder::new() - .rate_limit(qps_limit, Duration::from_secs(1)) - .service(make_svc); - - let server = builder.serve(rate_limited_service); - info!(address = %server.local_addr(), "started REST server"); + let filter = match cors_origin { + Some(cors_origin) => base_filter + .with(cors_origin.to_cors_builder().build()) + .map(casper_json_rpc::box_reply) + .boxed(), + None => base_filter.map(casper_json_rpc::box_reply).boxed(), + }; - // Shutdown the server gracefully. - let _ = server - .with_graceful_shutdown(async { - shutdown_receiver.await.ok(); - }) - .map_err(|error| { - warn!(%error, "error running REST server"); - }) - .await; -} + let service = warp::service(filter); -/// Run the REST HTTP server with CORS enabled. -/// -/// A message received on `shutdown_receiver` will cause the server to exit cleanly. -pub(super) async fn run_with_cors( - builder: Builder, - effect_builder: EffectBuilder, - api_version: ProtocolVersion, - shutdown_receiver: oneshot::Receiver<()>, - qps_limit: u64, - cors_origin: CorsOrigin, -) { - // REST filters. - let rest_status = filters::create_status_filter(effect_builder, api_version); - let rest_metrics = filters::create_metrics_filter(effect_builder); - let rest_open_rpc = filters::create_rpc_schema_filter(effect_builder); - let rest_validator_changes = - filters::create_validator_changes_filter(effect_builder, api_version); - let rest_chainspec_filter = filters::create_chainspec_filter(effect_builder, api_version); - - let service = warp::service( - rest_status - .or(rest_metrics) - .or(rest_open_rpc) - .or(rest_validator_changes) - .or(rest_chainspec_filter) - .with(match cors_origin { - CorsOrigin::Any => warp::cors().allow_any_origin(), - CorsOrigin::Specified(origin) => warp::cors().allow_origin(origin.as_str()), - }), - ); - - // Start the server, passing a oneshot receiver to allow the server to be shut down gracefully. + // Start the server, passing a fuse to allow the server to be shut down gracefully. let make_svc = hyper::service::make_service_fn(move |_| future::ok::<_, Infallible>(service.clone())); @@ -101,13 +57,17 @@ pub(super) async fn run_with_cors( .service(make_svc); let server = builder.serve(rate_limited_service); + info!(address = %server.local_addr(), "started REST server"); + effect_builder + .into_inner() + .schedule(Event::BindComplete(server.local_addr()), QueueKind::Regular) + .await; + // Shutdown the server gracefully. let _ = server - .with_graceful_shutdown(async { - shutdown_receiver.await.ok(); - }) + .with_graceful_shutdown(shutdown_fuse.wait_owned()) .map_err(|error| { warn!(%error, "error running REST server"); }) diff --git a/node/src/components/rpc_server.rs b/node/src/components/rpc_server.rs index 45f538681c..0063dc26f2 100644 --- a/node/src/components/rpc_server.rs +++ b/node/src/components/rpc_server.rs @@ -20,6 +20,7 @@ mod speculative_exec_server; use std::{fmt::Debug, time::Instant}; +use casper_json_rpc::CorsOrigin; use datasize::DataSize; use futures::join; use tracing::{error, info, warn}; @@ -217,7 +218,17 @@ where } ComponentState::Initializing => match event { Event::Initialize => { - let (effects, state) = self.bind(self.config.enable_server, effect_builder); + let (effects, mut state) = self.bind(self.config.enable_server, effect_builder); + + if matches!(state, ComponentState::Initializing) { + // Our current code does not support storing the bound port, so we skip the + // second step and go straight to `Initialized`. If new tests are written + // that rely on an initialized RPC server with a port being available, this + // needs to be refactored. Compare with the REST server on how this could be + // done. + state = ComponentState::Initialized; + } + >::set_state(self, state); effects } @@ -453,7 +464,7 @@ where self.api_version, cfg.qps_limit, cfg.max_body_bytes, - cfg.cors_origin.clone(), + CorsOrigin::parse_str(&cfg.cors_origin), )); Some(()) } else { @@ -468,7 +479,7 @@ where self.api_version, cfg.qps_limit, cfg.max_body_bytes, - cfg.cors_origin.clone(), + CorsOrigin::parse_str(&cfg.cors_origin), )); Ok(Effects::new()) diff --git a/node/src/components/rpc_server/event.rs b/node/src/components/rpc_server/event.rs index ac0b369778..b893bcfa38 100644 --- a/node/src/components/rpc_server/event.rs +++ b/node/src/components/rpc_server/event.rs @@ -1,6 +1,7 @@ use std::{ collections::BTreeMap, fmt::{self, Display, Formatter}, + net::SocketAddr, }; use derive_more::From; @@ -43,8 +44,8 @@ pub(crate) enum Event { main_responder: Responder>>, }, GetPeersResult { - peers: BTreeMap, - main_responder: Responder>, + peers: BTreeMap, + main_responder: Responder>, }, GetBalanceResult { result: Result, diff --git a/node/src/components/rpc_server/http_server.rs b/node/src/components/rpc_server/http_server.rs index bf9ecc28c4..0d49141eb5 100644 --- a/node/src/components/rpc_server/http_server.rs +++ b/node/src/components/rpc_server/http_server.rs @@ -33,7 +33,7 @@ pub(super) async fn run( api_version: ProtocolVersion, qps_limit: u64, max_body_bytes: u32, - cors_origin: String, + cors_origin: Option, ) { let mut handlers = RequestHandlersBuilder::new(); PutDeploy::register_as_handler(effect_builder, api_version, &mut handlers); @@ -58,41 +58,14 @@ pub(super) async fn run( QueryBalance::register_as_handler(effect_builder, api_version, &mut handlers); let handlers = handlers.build(); - match cors_origin.as_str() { - "" => { - super::rpcs::run( - builder, - handlers, - qps_limit, - max_body_bytes, - RPC_API_PATH, - RPC_API_SERVER_NAME, - ) - .await - } - "*" => { - super::rpcs::run_with_cors( - builder, - handlers, - qps_limit, - max_body_bytes, - RPC_API_PATH, - RPC_API_SERVER_NAME, - CorsOrigin::Any, - ) - .await - } - _ => { - super::rpcs::run_with_cors( - builder, - handlers, - qps_limit, - max_body_bytes, - RPC_API_PATH, - RPC_API_SERVER_NAME, - CorsOrigin::Specified(cors_origin), - ) - .await - } - } + super::rpcs::run( + builder, + handlers, + qps_limit, + max_body_bytes, + RPC_API_PATH, + RPC_API_SERVER_NAME, + cors_origin, + ) + .await } diff --git a/node/src/components/rpc_server/rpcs.rs b/node/src/components/rpc_server/rpcs.rs index 9919b8ae7f..442e44cc09 100644 --- a/node/src/components/rpc_server/rpcs.rs +++ b/node/src/components/rpc_server/rpcs.rs @@ -19,7 +19,6 @@ use hyper::server::{conn::AddrIncoming, Builder}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use serde_json::Value; -use tokio::sync::oneshot; use tower::ServiceBuilder; use tracing::info; use warp::Filter; @@ -30,7 +29,7 @@ use casper_json_rpc::{ use casper_types::ProtocolVersion; use super::{ReactorEventT, RpcRequest}; -use crate::effect::EffectBuilder; +use crate::{effect::EffectBuilder, utils::ObservableFuse}; pub use common::ErrorData; use docs::DocExample; pub use error_code::ErrorCode; @@ -254,52 +253,6 @@ pub(super) trait RpcWithOptionalParams { ) -> Result; } -/// Start JSON RPC server with CORS enabled in a background. -pub(super) async fn run_with_cors( - builder: Builder, - handlers: RequestHandlers, - qps_limit: u64, - max_body_bytes: u32, - api_path: &'static str, - server_name: &'static str, - cors_header: CorsOrigin, -) { - let make_svc = hyper::service::make_service_fn(move |_| { - let service_routes = casper_json_rpc::route_with_cors( - api_path, - max_body_bytes, - handlers.clone(), - ALLOW_UNKNOWN_FIELDS_IN_JSON_RPC_REQUEST, - &cors_header, - ); - - // Supports content negotiation for gzip responses. This is an interim fix until - // https://github.com/seanmonstar/warp/pull/513 moves forward. - let service_routes_gzip = warp::header::exact(ACCEPT_ENCODING.as_str(), "gzip") - .and(service_routes.clone()) - .with(warp::compression::gzip()); - - let service = warp::service(service_routes_gzip.or(service_routes)); - async move { Ok::<_, Infallible>(service.clone()) } - }); - - let make_svc = ServiceBuilder::new() - .rate_limit(qps_limit, Duration::from_secs(1)) - .service(make_svc); - - let server = builder.serve(make_svc); - info!(address = %server.local_addr(), "started {} server", server_name); - - let (shutdown_sender, shutdown_receiver) = oneshot::channel::<()>(); - let server_with_shutdown = server.with_graceful_shutdown(async { - shutdown_receiver.await.ok(); - }); - - let _ = tokio::spawn(server_with_shutdown).await; - let _ = shutdown_sender.send(()); - info!("{} server shut down", server_name); -} - /// Start JSON RPC server in a background. pub(super) async fn run( builder: Builder, @@ -308,6 +261,7 @@ pub(super) async fn run( max_body_bytes: u32, api_path: &'static str, server_name: &'static str, + cors_header: Option, ) { let make_svc = hyper::service::make_service_fn(move |_| { let service_routes = casper_json_rpc::route( @@ -315,6 +269,7 @@ pub(super) async fn run( max_body_bytes, handlers.clone(), ALLOW_UNKNOWN_FIELDS_IN_JSON_RPC_REQUEST, + cors_header.as_ref(), ); // Supports content negotiation for gzip responses. This is an interim fix until @@ -334,13 +289,10 @@ pub(super) async fn run( let server = builder.serve(make_svc); info!(address = %server.local_addr(), "started {} server", server_name); - let (shutdown_sender, shutdown_receiver) = oneshot::channel::<()>(); - let server_with_shutdown = server.with_graceful_shutdown(async { - shutdown_receiver.await.ok(); - }); + let shutdown_fuse = ObservableFuse::new(); + let server_with_shutdown = server.with_graceful_shutdown(shutdown_fuse.clone().wait_owned()); let _ = tokio::spawn(server_with_shutdown).await; - let _ = shutdown_sender.send(()); info!("{} server shut down", server_name); } diff --git a/node/src/components/rpc_server/speculative_exec_server.rs b/node/src/components/rpc_server/speculative_exec_server.rs index 002f8761ac..02cc239e75 100644 --- a/node/src/components/rpc_server/speculative_exec_server.rs +++ b/node/src/components/rpc_server/speculative_exec_server.rs @@ -21,47 +21,20 @@ pub(super) async fn run( api_version: ProtocolVersion, qps_limit: u64, max_body_bytes: u32, - cors_origin: String, + cors_origin: Option, ) { let mut handlers = RequestHandlersBuilder::new(); SpeculativeExec::register_as_handler(effect_builder, api_version, &mut handlers); let handlers = handlers.build(); - match cors_origin.as_str() { - "" => { - super::rpcs::run( - builder, - handlers, - qps_limit, - max_body_bytes, - SPECULATIVE_EXEC_API_PATH, - SPECULATIVE_EXEC_SERVER_NAME, - ) - .await; - } - "*" => { - super::rpcs::run_with_cors( - builder, - handlers, - qps_limit, - max_body_bytes, - SPECULATIVE_EXEC_API_PATH, - SPECULATIVE_EXEC_SERVER_NAME, - CorsOrigin::Any, - ) - .await - } - _ => { - super::rpcs::run_with_cors( - builder, - handlers, - qps_limit, - max_body_bytes, - SPECULATIVE_EXEC_API_PATH, - SPECULATIVE_EXEC_SERVER_NAME, - CorsOrigin::Specified(cors_origin), - ) - .await - } - } + super::rpcs::run( + builder, + handlers, + qps_limit, + max_body_bytes, + SPECULATIVE_EXEC_API_PATH, + SPECULATIVE_EXEC_SERVER_NAME, + cors_origin, + ) + .await; } diff --git a/node/src/components/storage.rs b/node/src/components/storage.rs index 3f692f99aa..c0c2120fd1 100644 --- a/node/src/components/storage.rs +++ b/node/src/components/storage.rs @@ -49,7 +49,6 @@ use std::{ io::ErrorKind, mem, path::{Path, PathBuf}, - rc::Rc, sync::Arc, }; @@ -108,6 +107,8 @@ use lmdb_ext::{BytesreprError, LmdbExtError, TransactionExt, WriteTransactionExt use metrics::Metrics; use object_pool::ObjectPool; +use super::network::Ticket; + const COMPONENT_NAME: &str = "storage"; /// Filename for the LMDB database created by the Storage component. @@ -164,7 +165,7 @@ pub struct Storage { root: PathBuf, /// Environment holding LMDB databases. #[data_size(skip)] - env: Rc, + env: Arc, /// The block header database. #[data_size(skip)] block_header_db: Database, @@ -311,13 +312,14 @@ where ) -> Effects { let result = match event { Event::StorageRequest(req) => self.handle_storage_request(*req), - Event::NetRequestIncoming(ref incoming) => { - match self.handle_net_request_incoming::(effect_builder, incoming) { + Event::NetRequestIncoming(incoming) => { + let sender = incoming.sender; + match self.handle_net_request_incoming::(effect_builder, *incoming) { Ok(effects) => Ok(effects), Err(GetRequestError::Fatal(fatal_error)) => Err(fatal_error), Err(ref other_err) => { warn!( - sender=%incoming.sender, + %sender, err=display_error(other_err), "error handling net request" ); @@ -498,7 +500,7 @@ impl Storage { let mut component = Self { root, - env: Rc::new(env), + env: Arc::new(env), block_header_db, block_body_db, block_metadata_db, @@ -644,7 +646,7 @@ impl Storage { fn handle_net_request_incoming( &mut self, effect_builder: EffectBuilder, - incoming: &NetRequestIncoming, + incoming: NetRequestIncoming, ) -> Result, GetRequestError> where REv: From> + Send, @@ -663,7 +665,9 @@ impl Storage { incoming.message.tag(), serialized_item, ); - return Ok(effect_builder.send_message(incoming.sender, found).ignore()); + return Ok(effect_builder + .send_message_and_drop_ticket(incoming.sender, found, incoming.ticket) + .ignore()); } } @@ -678,6 +682,7 @@ impl Storage { incoming.sender, serialized_id, fetch_response, + incoming.ticket, )?) } NetRequest::LegacyDeploy(ref serialized_id) => { @@ -692,6 +697,7 @@ impl Storage { incoming.sender, serialized_id, fetch_response, + incoming.ticket, )?) } NetRequest::Block(ref serialized_id) => { @@ -704,6 +710,7 @@ impl Storage { incoming.sender, serialized_id, fetch_response, + incoming.ticket, )?) } NetRequest::BlockHeader(ref serialized_id) => { @@ -718,6 +725,7 @@ impl Storage { incoming.sender, serialized_id, fetch_response, + incoming.ticket, )?) } NetRequest::FinalitySignature(ref serialized_id) => { @@ -743,6 +751,7 @@ impl Storage { incoming.sender, serialized_id, fetch_response, + incoming.ticket, )?) } NetRequest::SyncLeap(ref serialized_id) => { @@ -754,6 +763,7 @@ impl Storage { incoming.sender, serialized_id, fetch_response, + incoming.ticket, )?) } NetRequest::ApprovalsHashes(ref serialized_id) => { @@ -766,6 +776,7 @@ impl Storage { incoming.sender, serialized_id, fetch_response, + incoming.ticket, )?) } NetRequest::BlockExecutionResults(ref serialized_id) => { @@ -778,6 +789,7 @@ impl Storage { incoming.sender, serialized_id, fetch_response, + incoming.ticket, )?) } } @@ -799,7 +811,7 @@ impl Storage { approvals_hashes, responder, } => { - let env = Rc::clone(&self.env); + let env = Arc::clone(&self.env); let mut txn = env.begin_rw_txn()?; let result = self.write_approvals_hashes(&mut txn, &approvals_hashes)?; txn.commit()?; @@ -922,7 +934,7 @@ impl Storage { execution_results, responder, } => { - let env = Rc::clone(&self.env); + let env = Arc::clone(&self.env); let mut txn = env.begin_rw_txn()?; self.write_execution_results(&mut txn, &block_hash, execution_results)?; txn.commit()?; @@ -1268,7 +1280,7 @@ impl Storage { approvals_hashes: &ApprovalsHashes, execution_results: HashMap, ) -> Result { - let env = Rc::clone(&self.env); + let env = Arc::clone(&self.env); let mut txn = env.begin_rw_txn()?; let wrote = self.write_validated_block(&mut txn, block)?; if !wrote { @@ -1435,7 +1447,7 @@ impl Storage { pub fn write_block(&mut self, block: &Block) -> Result { // Validate the block prior to inserting it into the database block.verify()?; - let env = Rc::clone(&self.env); + let env = Arc::clone(&self.env); let mut txn = env.begin_rw_txn()?; let wrote = self.write_validated_block(&mut txn, block)?; if wrote { @@ -1453,7 +1465,7 @@ impl Storage { pub fn write_complete_block(&mut self, block: &Block) -> Result { // Validate the block prior to inserting it into the database block.verify()?; - let env = Rc::clone(&self.env); + let env = Arc::clone(&self.env); let mut txn = env.begin_rw_txn()?; let wrote = self.write_validated_block(&mut txn, block)?; if wrote { @@ -2447,6 +2459,7 @@ impl Storage { sender: NodeId, serialized_id: &[u8], fetch_response: FetchResponse, + ticket: Ticket, ) -> Result, FatalStorageError> where REv: From> + Send, @@ -2463,7 +2476,9 @@ impl Storage { } let message = Message::new_get_response_from_serialized(::TAG, shared); - Ok(effect_builder.send_message(sender, message).ignore()) + Ok(effect_builder + .send_message_and_drop_ticket(sender, message, ticket) + .ignore()) } /// Returns `true` if the storage should attempt to return a block. Depending on the diff --git a/node/src/components/storage/metrics.rs b/node/src/components/storage/metrics.rs index b6ee022b65..4c0f7f816d 100644 --- a/node/src/components/storage/metrics.rs +++ b/node/src/components/storage/metrics.rs @@ -1,6 +1,6 @@ use prometheus::{self, IntGauge, Registry}; -use crate::unregister_metric; +use crate::utils::registered_metric::{RegisteredMetric, RegistryExt}; const CHAIN_HEIGHT_NAME: &str = "chain_height"; const CHAIN_HEIGHT_HELP: &str = "highest complete block (DEPRECATED)"; @@ -17,38 +17,24 @@ const LOWEST_AVAILABLE_BLOCK_HELP: &str = #[derive(Debug)] pub struct Metrics { // deprecated - replaced by `highest_available_block` - pub(super) chain_height: IntGauge, - pub(super) highest_available_block: IntGauge, - pub(super) lowest_available_block: IntGauge, - registry: Registry, + pub(super) chain_height: RegisteredMetric, + pub(super) highest_available_block: RegisteredMetric, + pub(super) lowest_available_block: RegisteredMetric, } impl Metrics { /// Constructor of metrics which creates and registers metrics objects for use. pub(super) fn new(registry: &Registry) -> Result { - let chain_height = IntGauge::new(CHAIN_HEIGHT_NAME, CHAIN_HEIGHT_HELP)?; + let chain_height = registry.new_int_gauge(CHAIN_HEIGHT_NAME, CHAIN_HEIGHT_HELP)?; let highest_available_block = - IntGauge::new(HIGHEST_AVAILABLE_BLOCK_NAME, HIGHEST_AVAILABLE_BLOCK_HELP)?; + registry.new_int_gauge(HIGHEST_AVAILABLE_BLOCK_NAME, HIGHEST_AVAILABLE_BLOCK_HELP)?; let lowest_available_block = - IntGauge::new(LOWEST_AVAILABLE_BLOCK_NAME, LOWEST_AVAILABLE_BLOCK_HELP)?; - - registry.register(Box::new(chain_height.clone()))?; - registry.register(Box::new(highest_available_block.clone()))?; - registry.register(Box::new(lowest_available_block.clone()))?; + registry.new_int_gauge(LOWEST_AVAILABLE_BLOCK_NAME, LOWEST_AVAILABLE_BLOCK_HELP)?; Ok(Metrics { chain_height, highest_available_block, lowest_available_block, - registry: registry.clone(), }) } } - -impl Drop for Metrics { - fn drop(&mut self) { - unregister_metric!(self.registry, self.chain_height); - unregister_metric!(self.registry, self.highest_available_block); - unregister_metric!(self.registry, self.lowest_available_block); - } -} diff --git a/node/src/components/sync_leaper/metrics.rs b/node/src/components/sync_leaper/metrics.rs index 04443d493a..f64fabda88 100644 --- a/node/src/components/sync_leaper/metrics.rs +++ b/node/src/components/sync_leaper/metrics.rs @@ -1,6 +1,6 @@ use prometheus::{Histogram, IntCounter, Registry}; -use crate::{unregister_metric, utils}; +use crate::utils::registered_metric::{RegisteredMetric, RegistryExt}; const SYNC_LEAP_DURATION_NAME: &str = "sync_leap_duration_seconds"; const SYNC_LEAP_DURATION_HELP: &str = "duration (in sec) to perform a successful sync leap"; @@ -15,15 +15,13 @@ const LINEAR_BUCKET_COUNT: usize = 4; #[derive(Debug)] pub(super) struct Metrics { /// Time duration to perform a sync leap. - pub(super) sync_leap_duration: Histogram, + pub(super) sync_leap_duration: RegisteredMetric, /// Number of successful sync leap responses that were received from peers. - pub(super) sync_leap_fetched_from_peer: IntCounter, + pub(super) sync_leap_fetched_from_peer: RegisteredMetric, /// Number of requests that were rejected by peers. - pub(super) sync_leap_rejected_by_peer: IntCounter, + pub(super) sync_leap_rejected_by_peer: RegisteredMetric, /// Number of requests that couldn't be fetched from peers. - pub(super) sync_leap_cant_fetch: IntCounter, - - registry: Registry, + pub(super) sync_leap_cant_fetch: RegisteredMetric, } impl Metrics { @@ -35,26 +33,21 @@ impl Metrics { LINEAR_BUCKET_COUNT, )?; - let sync_leap_fetched_from_peer = IntCounter::new( + let sync_leap_fetched_from_peer = registry.new_int_counter( "sync_leap_fetched_from_peer_total".to_string(), "number of successful sync leap responses that were received from peers".to_string(), )?; - let sync_leap_rejected_by_peer = IntCounter::new( + let sync_leap_rejected_by_peer = registry.new_int_counter( "sync_leap_rejected_by_peer_total".to_string(), "number of sync leap requests that were rejected by peers".to_string(), )?; - let sync_leap_cant_fetch = IntCounter::new( + let sync_leap_cant_fetch = registry.new_int_counter( "sync_leap_cant_fetch_total".to_string(), "number of sync leap requests that couldn't be fetched from peers".to_string(), )?; - registry.register(Box::new(sync_leap_fetched_from_peer.clone()))?; - registry.register(Box::new(sync_leap_rejected_by_peer.clone()))?; - registry.register(Box::new(sync_leap_cant_fetch.clone()))?; - Ok(Metrics { - sync_leap_duration: utils::register_histogram_metric( - registry, + sync_leap_duration: registry.new_histogram( SYNC_LEAP_DURATION_NAME, SYNC_LEAP_DURATION_HELP, buckets, @@ -62,16 +55,6 @@ impl Metrics { sync_leap_fetched_from_peer, sync_leap_rejected_by_peer, sync_leap_cant_fetch, - registry: registry.clone(), }) } } - -impl Drop for Metrics { - fn drop(&mut self) { - unregister_metric!(self.registry, self.sync_leap_duration); - unregister_metric!(self.registry, self.sync_leap_cant_fetch); - unregister_metric!(self.registry, self.sync_leap_fetched_from_peer); - unregister_metric!(self.registry, self.sync_leap_rejected_by_peer); - } -} diff --git a/node/src/dead_metrics.rs b/node/src/dead_metrics.rs new file mode 100644 index 0000000000..0ece6a7451 --- /dev/null +++ b/node/src/dead_metrics.rs @@ -0,0 +1,42 @@ +//! This file contains metrics that have been retired, but are kept around for now to avoid breaking +//! changes to downstream consumers of said metrics. + +use prometheus::{IntCounter, Registry}; + +use crate::utils::registered_metric::{RegisteredMetric, RegistryExt}; + +/// Metrics that are never updated. +#[derive(Debug)] +#[allow(dead_code)] +pub(super) struct DeadMetrics { + scheduler_queue_network_low_priority_count: RegisteredMetric, + scheduler_queue_network_demands_count: RegisteredMetric, + accumulated_incoming_limiter_delay: RegisteredMetric, + scheduler_queue_network_incoming_count: RegisteredMetric, +} + +impl DeadMetrics { + /// Creates a new instance of the dead metrics. + pub(super) fn new(registry: &Registry) -> Result { + let scheduler_queue_network_low_priority_count = registry.new_int_counter( + "scheduler_queue_network_low_priority_count", + "retired metric", + )?; + + let scheduler_queue_network_demands_count = + registry.new_int_counter("scheduler_queue_network_demands_count", "retired metric")?; + + let accumulated_incoming_limiter_delay = + registry.new_int_counter("accumulated_incoming_limiter_delay", "retired metric")?; + + let scheduler_queue_network_incoming_count = + registry.new_int_counter("scheduler_queue_network_incoming_count", "retired metric")?; + + Ok(DeadMetrics { + scheduler_queue_network_low_priority_count, + scheduler_queue_network_demands_count, + accumulated_incoming_limiter_delay, + scheduler_queue_network_incoming_count, + }) + } +} diff --git a/node/src/effect.rs b/node/src/effect.rs index d40dad4445..39c7e7780c 100644 --- a/node/src/effect.rs +++ b/node/src/effect.rs @@ -102,6 +102,7 @@ use std::{ fmt::{self, Debug, Display, Formatter}, future::Future, mem, + net::SocketAddr, sync::Arc, time::{Duration, Instant}, }; @@ -141,9 +142,10 @@ use crate::{ diagnostics_port::StopAtSpec, fetcher::{FetchItem, FetchResult}, gossiper::GossipItem, - network::{blocklist::BlocklistJustification, FromIncoming, NetworkInsights}, + network::{blocklist::BlocklistJustification, FromIncoming, NetworkInsights, Ticket}, upgrade_watcher::NextUpgrade, }, + consensus::ValidationError, contract_runtime::SpeculativeExecutionState, failpoints::FailpointActivation, reactor::{main_reactor::ReactorState, EventQueueHandle, QueueKind}, @@ -155,7 +157,7 @@ use crate::{ FinalitySignatureId, FinalizedApprovals, FinalizedBlock, LegacyDeploy, MetaBlock, MetaBlockState, NodeId, TrieOrChunk, TrieOrChunkId, }, - utils::{fmt_limit::FmtLimit, SharedFlag, Source}, + utils::{fmt_limit::FmtLimit, SharedFuse, Source}, }; use announcements::{ BlockAccumulatorAnnouncement, ConsensusAnnouncement, ContractRuntimeAnnouncement, @@ -167,10 +169,11 @@ use announcements::{ use diagnostics_port::DumpConsensusStateRequest; use requests::{ AcceptDeployRequest, BeginGossipRequest, BlockAccumulatorRequest, BlockSynchronizerRequest, - BlockValidationRequest, ChainspecRawBytesRequest, ConsensusRequest, ContractRuntimeRequest, - DeployBufferRequest, FetcherRequest, MakeBlockExecutableRequest, MarkBlockCompletedRequest, - MetricsRequest, NetworkInfoRequest, NetworkRequest, ReactorStatusRequest, SetNodeStopRequest, - StorageRequest, SyncGlobalStateRequest, TrieAccumulatorRequest, UpgradeWatcherRequest, + ChainspecRawBytesRequest, ConsensusRequest, ContractRuntimeRequest, DeployBufferRequest, + FetcherRequest, MakeBlockExecutableRequest, MarkBlockCompletedRequest, MetricsRequest, + NetworkInfoRequest, NetworkRequest, ProposedBlockValidationRequest, ReactorStatusRequest, + SetNodeStopRequest, StorageRequest, SyncGlobalStateRequest, TrieAccumulatorRequest, + UpgradeWatcherRequest, }; /// A resource that will never be available, thus trying to acquire it will wait forever. @@ -191,11 +194,18 @@ pub(crate) type Effects = Multiple>; pub(crate) type Multiple = SmallVec<[T; 2]>; /// The type of peers that should receive the gossip message. +/// +/// The selection process is as follows: +/// +/// 1. From all peers +/// 2. exclude those explicitly specified to be excluded +/// 3. construct subsequences according to [`GossipTarget`] +/// 4. then select desired number of peers. #[derive(Debug, Serialize, PartialEq, Eq, Hash, Copy, Clone, DataSize)] pub(crate) enum GossipTarget { - /// Both validators and non validators. + /// Alternate between validators and non-validators. Mixed(EraId), - /// All peers. + /// A random subset of all connected peers. All, } @@ -215,7 +225,7 @@ pub(crate) struct Responder { /// Sender through which the response ultimately should be sent. sender: Option>, /// Reactor flag indicating shutdown. - is_shutting_down: SharedFlag, + is_shutting_down: SharedFuse, } /// A responder that will automatically send a `None` on drop. @@ -247,20 +257,11 @@ impl AutoClosingResponder { pub(crate) async fn respond(self, data: T) { self.into_inner().respond(Some(data)).await } - - /// Send `None` to the origin of the request. - pub(crate) async fn respond_none(self) { - self.into_inner().respond(None).await - } } impl Drop for AutoClosingResponder { fn drop(&mut self) { if let Some(sender) = self.0.sender.take() { - debug!( - sending_value = %self.0, - "responding None by dropping auto-close responder" - ); // We still haven't answered, send an answer. if let Err(_unsent_value) = sender.send(None) { debug!( @@ -275,7 +276,7 @@ impl Drop for AutoClosingResponder { impl Responder { /// Creates a new `Responder`. #[inline] - fn new(sender: oneshot::Sender, is_shutting_down: SharedFlag) -> Self { + fn new(sender: oneshot::Sender, is_shutting_down: SharedFuse) -> Self { Responder { sender: Some(sender), is_shutting_down, @@ -289,7 +290,7 @@ impl Responder { #[cfg(test)] #[inline] pub(crate) fn without_shutdown(sender: oneshot::Sender) -> Self { - Responder::new(sender, SharedFlag::global_shared()) + Responder::new(sender, SharedFuse::global_shared()) } } @@ -672,8 +673,20 @@ impl EffectBuilder { /// Sends a network message. /// - /// The message is queued and sent, but no delivery guaranteed. Will return after the message - /// has been buffered in the outgoing kernel buffer and thus is subject to backpressure. + /// The message is queued and sent, without any delivery guarantees. Will return after the + /// message has been buffered by the networking stack and is thus subject to backpressure + /// from the receiving peer. + /// + /// If the message cannot be buffered immediately, `send_message` will wait until there is room + /// in the networking layer's buffer available. This means that messages will be buffered + /// outside the networking component without any limit, when this method is used. The calling + /// component is responsible for ensuring that not too many instances of `send_message` are + /// awaited at any one point in time. + /// + /// If the peer is not reachable, the message will be discarded. + /// + /// See `try_send_message` for a method that does not buffer messages outside networking if + /// buffers are full, but discards them instead. pub(crate) async fn send_message

(self, dest: NodeId, payload: P) where REv: From>, @@ -682,32 +695,62 @@ impl EffectBuilder { |responder| NetworkRequest::SendMessage { dest: Box::new(dest), payload: Box::new(payload), - respond_after_queueing: false, - auto_closing_responder: AutoClosingResponder::from_opt_responder(responder), + message_queued_responder: Some(AutoClosingResponder::from_opt_responder(responder)), }, QueueKind::Network, ) .await; + + // Note: It does not matter to use whether `Some()` (indicating buffering) or `None` + // (indicating a lost message) was returned, since we do not guarantee anything about + // delivery. + } + + /// Sends a network message, drops a ticket upon successful sending. + /// + /// Similar to `send_message`, except a [`Ticket`] is passed as well, which will be dropped as + /// soon as `send_message` returns (but no earlier). + pub(crate) async fn send_message_and_drop_ticket

( + self, + dest: NodeId, + payload: P, + ticket: Ticket, + ) where + REv: From>, + { + self.send_message(dest, payload).await; + + drop(ticket); } - /// Enqueues a network message. + /// Sends a network message with best effort. + /// + /// The message is queued in "fire-and-forget" fashion, there is no guarantee that the peer will + /// receive it. It may also be dropped if the outbound message queue for the specific peer is + /// full as well, instead of backpressure being propagated. /// - /// The message is queued in "fire-and-forget" fashion, there is no guarantee that the peer - /// will receive it. Returns as soon as the message is queued inside the networking component. - pub(crate) async fn enqueue_message

(self, dest: NodeId, payload: P) + /// Returns immediately. If called at extreme rates, this function may blow up the event queue, + /// since messages are only discarded once they have made their way to a networking component, + /// while this method returns earlier. + /// + /// A more heavyweight message sending function is available in `send_message`. + pub(crate) async fn try_send_message

(self, dest: NodeId, payload: P) where REv: From>, { - self.make_request( - |responder| NetworkRequest::SendMessage { - dest: Box::new(dest), - payload: Box::new(payload), - respond_after_queueing: true, - auto_closing_responder: AutoClosingResponder::from_opt_responder(responder), - }, - QueueKind::Network, - ) - .await; + // Note: Since we do not expect any response to our request, we can avoid spawning an extra + // task awaiting the responder. + + self.event_queue + .schedule( + NetworkRequest::SendMessage { + dest: Box::new(dest), + payload: Box::new(payload), + message_queued_responder: None, + }, + QueueKind::Network, + ) + .await } /// Broadcasts a network message to validator peers in the given era. @@ -741,7 +784,7 @@ impl EffectBuilder { gossip_target: GossipTarget, count: usize, exclude: HashSet, - ) -> HashSet + ) -> Vec where REv: From>, P: Send, @@ -773,7 +816,7 @@ impl EffectBuilder { } /// Gets a map of the current network peers to their socket addresses. - pub(crate) async fn network_peers(self) -> BTreeMap + pub(crate) async fn network_peers(self) -> BTreeMap where REv: From, { @@ -810,15 +853,15 @@ impl EffectBuilder { } /// Announces an incoming network message. - pub(crate) async fn announce_incoming

(self, sender: NodeId, payload: P) + pub(crate) async fn announce_incoming

(self, sender: NodeId, payload: P, ticket: Ticket) where - REv: FromIncoming

, + REv: FromIncoming

+ From> + Send, + P: 'static + Send, { + let reactor_event = >::from_incoming(sender, payload, ticket); + self.event_queue - .schedule( - >::from_incoming(sender, payload), - QueueKind::NetworkIncoming, - ) + .schedule::(reactor_event, QueueKind::MessageIncoming) .await } @@ -842,16 +885,23 @@ impl EffectBuilder { /// Announces that a gossiper has received a full item, where the item's ID is NOT the complete /// item. + /// + /// The associated [`Ticket`] is the ticket from the message received containing the item. pub(crate) async fn announce_item_body_received_via_gossip( self, item: Box, sender: NodeId, + ticket: Ticket, ) where REv: From>, { self.event_queue .schedule( - GossiperAnnouncement::NewItemBody { item, sender }, + GossiperAnnouncement::NewItemBody { + item, + sender, + ticket, + }, QueueKind::Gossip, ) .await; @@ -1764,14 +1814,14 @@ impl EffectBuilder { pub(crate) async fn validate_block( self, sender: NodeId, - block: ProposedBlock, - ) -> bool + proposed_block: ProposedBlock, + ) -> Result<(), ValidationError> where - REv: From, + REv: From, { self.make_request( - |responder| BlockValidationRequest { - block, + |responder| ProposedBlockValidationRequest { + proposed_block, sender, responder, }, diff --git a/node/src/effect/announcements.rs b/node/src/effect/announcements.rs index c149e60c32..4397bada7d 100644 --- a/node/src/effect/announcements.rs +++ b/node/src/effect/announcements.rs @@ -22,7 +22,7 @@ use crate::{ diagnostics_port::FileSerializer, fetcher::FetchItem, gossiper::GossipItem, - network::blocklist::BlocklistJustification, + network::{blocklist::BlocklistJustification, Ticket}, upgrade_watcher::NextUpgrade, }, effect::Responder, @@ -310,7 +310,11 @@ pub(crate) enum GossiperAnnouncement { NewCompleteItem(T::Id), /// A new item has been received where the item's ID is NOT the complete item. - NewItemBody { item: Box, sender: NodeId }, + NewItemBody { + item: Box, + sender: NodeId, + ticket: Ticket, + }, /// Finished gossiping about the indicated item. FinishedGossiping(T::Id), @@ -323,7 +327,11 @@ impl Display for GossiperAnnouncement { write!(f, "new gossiped item {} from sender {}", item_id, sender) } GossiperAnnouncement::NewCompleteItem(item) => write!(f, "new complete item {}", item), - GossiperAnnouncement::NewItemBody { item, sender } => { + GossiperAnnouncement::NewItemBody { + item, + sender, + ticket: _, + } => { write!(f, "new item body {} from {}", item.gossip_id(), sender) } GossiperAnnouncement::FinishedGossiping(item_id) => { diff --git a/node/src/effect/incoming.rs b/node/src/effect/incoming.rs index 8f6857b16c..cf9e9bad6d 100644 --- a/node/src/effect/incoming.rs +++ b/node/src/effect/incoming.rs @@ -11,18 +11,24 @@ use datasize::DataSize; use serde::Serialize; use crate::{ - components::{consensus, fetcher::Tag, gossiper}, - protocol::Message, + components::{consensus, fetcher::Tag, gossiper, network::Ticket}, types::{FinalitySignature, NodeId, TrieOrChunkIdDisplay}, }; -use super::AutoClosingResponder; - -/// An envelope for an incoming message, attaching a sender address. +/// An envelope for an incoming message, attaching a sender address and a backpressure ticket. #[derive(DataSize, Debug, Serialize)] pub struct MessageIncoming { + /// Sender of the incoming message. pub(crate) sender: NodeId, + /// Actual message, deserialized. pub(crate) message: Box, + /// A ticket representing the "work" for processing the incoming message. + /// + /// Only drop this once no more resources are consumed, as doing so will signal the peer to + /// start sending another message. If a response is generated, consider using + /// [`crate::effect::EffectBuilder::send_message_and_drop_ticket`]. + #[serde(skip)] + pub(crate) ticket: Ticket, } impl Display for MessageIncoming @@ -34,29 +40,13 @@ where } } -/// An envelope for an incoming demand, attaching a sender address and responder. -#[derive(DataSize, Debug, Serialize)] -pub struct DemandIncoming { - /// The sender from which the demand originated. - pub(crate) sender: NodeId, - /// The wrapped demand. - pub(crate) request_msg: Box, - /// Responder to send the answer down through. - pub(crate) auto_closing_responder: AutoClosingResponder, -} - -impl Display for DemandIncoming -where - M: Display, -{ - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!(f, "demand from {}: {}", self.sender, self.request_msg) - } -} - /// A new consensus message arrived. pub(crate) type ConsensusMessageIncoming = MessageIncoming; +/// A request for consensus protocol data that should be answered. +pub(crate) type ConsensusRequestMessageIncoming = + MessageIncoming; + /// A new message from a gossiper arrived. pub(crate) type GossiperIncoming = MessageIncoming>; @@ -69,12 +59,6 @@ pub(crate) type NetResponseIncoming = MessageIncoming; /// A new message requesting a trie arrived. pub(crate) type TrieRequestIncoming = MessageIncoming; -/// A demand for a trie that should be answered. -pub(crate) type TrieDemand = DemandIncoming; - -/// A demand for consensus protocol data that should be answered. -pub(crate) type ConsensusDemand = DemandIncoming; - /// A new message responding to a trie request arrived. pub(crate) type TrieResponseIncoming = MessageIncoming; diff --git a/node/src/effect/requests.rs b/node/src/effect/requests.rs index 3bb87cbced..bb04e86bdd 100644 --- a/node/src/effect/requests.rs +++ b/node/src/effect/requests.rs @@ -7,6 +7,7 @@ use std::{ collections::{BTreeMap, HashMap, HashSet}, fmt::{self, Debug, Display, Formatter}, mem, + net::SocketAddr, sync::Arc, }; @@ -46,6 +47,7 @@ use crate::{ network::NetworkInsights, upgrade_watcher::NextUpgrade, }, + consensus::ValidationError, contract_runtime::{ContractRuntimeError, SpeculativeExecutionState}, effect::{AutoClosingResponder, Responder}, reactor::main_reactor::ReactorState, @@ -97,12 +99,9 @@ pub(crate) enum NetworkRequest

{ dest: Box, /// Message payload. payload: Box

, - /// If `true`, the responder will be called early after the message has been queued, not - /// waiting until it has passed to the kernel. - respond_after_queueing: bool, /// Responder to be called when the message has been *buffered for sending*. #[serde(skip_serializing)] - auto_closing_responder: AutoClosingResponder<()>, + message_queued_responder: Option>, }, /// Send a message on the network to validator peers in the given era. ValidatorBroadcast { @@ -127,7 +126,7 @@ pub(crate) enum NetworkRequest

{ exclude: HashSet, /// Responder to be called when all messages are queued. #[serde(skip_serializing)] - auto_closing_responder: AutoClosingResponder>, + auto_closing_responder: AutoClosingResponder>, }, } @@ -143,13 +142,11 @@ impl

NetworkRequest

{ NetworkRequest::SendMessage { dest, payload, - respond_after_queueing, - auto_closing_responder, + message_queued_responder, } => NetworkRequest::SendMessage { dest, payload: Box::new(wrap_payload(*payload)), - respond_after_queueing, - auto_closing_responder, + message_queued_responder, }, NetworkRequest::ValidatorBroadcast { payload, @@ -175,6 +172,19 @@ impl

NetworkRequest

{ }, } } + + /// Returns the message's payload. + /// + /// This is typically used for filtering payloads in tests. + #[cfg(test)] + #[inline(always)] + pub(crate) fn payload(&self) -> &P { + match self { + NetworkRequest::SendMessage { payload, .. } => payload, + NetworkRequest::ValidatorBroadcast { payload, .. } => payload, + NetworkRequest::Gossip { payload, .. } => payload, + } + } } impl

Display for NetworkRequest

@@ -200,11 +210,11 @@ pub(crate) enum NetworkInfoRequest { /// Get incoming and outgoing peers. Peers { /// Responder to be called with all connected peers. - /// Responds with a map from [NodeId]s to a socket address, represented as a string. - responder: Responder>, + responder: Responder>, }, /// Get up to `count` fully-connected peers in random order. FullyConnectedPeers { + /// Responder to be called with all connected in random order peers. count: usize, /// Responder to be called with the peers. responder: Responder>, @@ -748,7 +758,7 @@ pub(crate) enum RpcRequest { /// Return the connected peers. GetPeers { /// Responder to call with the result. - responder: Responder>, + responder: Responder>, }, /// Return string formatted status or `None` if an error occurred. GetStatus { @@ -1045,24 +1055,28 @@ impl Display for SyncGlobalStateRequest { } } -/// A block validator request. +/// A proposed block validator request. #[derive(Debug)] #[must_use] -pub(crate) struct BlockValidationRequest { - /// The block to be validated. - pub(crate) block: ProposedBlock, +pub(crate) struct ProposedBlockValidationRequest { + /// The proposed block to be validated. + pub(crate) proposed_block: ProposedBlock, /// The sender of the block, which will be asked to provide all missing deploys. pub(crate) sender: NodeId, /// Responder to call with the result. /// /// Indicates whether or not validation was successful. - pub(crate) responder: Responder, + pub(crate) responder: Responder>, } -impl Display for BlockValidationRequest { +impl Display for ProposedBlockValidationRequest { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - let BlockValidationRequest { block, sender, .. } = self; - write!(f, "validate block {} from {}", block, sender) + let ProposedBlockValidationRequest { + proposed_block, + sender, + .. + } = self; + write!(f, "validate {} from {}", proposed_block, sender) } } diff --git a/node/src/lib.rs b/node/src/lib.rs index 3bbd416507..14ecaebd88 100644 --- a/node/src/lib.rs +++ b/node/src/lib.rs @@ -26,6 +26,7 @@ pub mod cli; pub(crate) mod components; mod config_migration; mod data_migration; +mod dead_metrics; pub(crate) mod effect; #[cfg_attr(not(feature = "failpoints"), path = "failpoints_disabled.rs")] pub(crate) mod failpoints; @@ -54,7 +55,6 @@ use tracing::warn; pub(crate) use components::{ block_accumulator::Config as BlockAccumulatorConfig, block_synchronizer::Config as BlockSynchronizerConfig, - block_validator::Config as BlockValidatorConfig, consensus::Config as ConsensusConfig, contract_runtime::Config as ContractRuntimeConfig, deploy_acceptor::Config as DeployAcceptorConfig, @@ -64,6 +64,7 @@ pub(crate) use components::{ fetcher::Config as FetcherConfig, gossiper::Config as GossipConfig, network::Config as NetworkConfig, + proposed_block_validator::Config as ProposedBlockValidatorConfig, rest_server::Config as RestServerConfig, rpc_server::{Config as RpcServerConfig, SpeculativeExecConfig}, upgrade_watcher::Config as UpgradeWatcherConfig, diff --git a/node/src/logging.rs b/node/src/logging.rs index b73f4300c9..c99f3a4e30 100644 --- a/node/src/logging.rs +++ b/node/src/logging.rs @@ -72,7 +72,7 @@ impl LoggingConfig { /// Logging output format. /// /// Defaults to "text"". -#[derive(Clone, DataSize, Debug, Deserialize, Serialize, Default)] +#[derive(Clone, Copy, DataSize, Debug, Deserialize, Serialize, Default)] #[serde(rename_all = "lowercase")] pub enum LoggingFormat { /// Text format. @@ -260,7 +260,17 @@ where /// See `init_params` for details. #[cfg(test)] pub fn init() -> anyhow::Result<()> { - init_with_config(&Default::default()) + let mut cfg = LoggingConfig::default(); + + // The `NODE_TEST_LOG` environment variable can be used to specify JSON output when testing. + match env::var("NODE_TEST_LOG") { + Ok(s) if s == "json" => { + cfg.format = LoggingFormat::Json; + } + _ => (), + } + + init_with_config(&cfg) } /// A handle for reloading the logger. diff --git a/node/src/protocol.rs b/node/src/protocol.rs index 189ce64357..18f86daa0e 100644 --- a/node/src/protocol.rs +++ b/node/src/protocol.rs @@ -7,37 +7,31 @@ use std::{ use derive_more::From; use fmt::Debug; -use futures::{future::BoxFuture, FutureExt}; use hex_fmt::HexFmt; use serde::{Deserialize, Serialize}; -use strum::EnumDiscriminants; use crate::{ components::{ consensus, fetcher::{FetchItem, FetchResponse, Tag}, gossiper, - network::{EstimatorWeights, FromIncoming, GossipedAddress, MessageKind, Payload}, + network::{Channel, FromIncoming, GossipedAddress, MessageKind, Payload, Ticket}, }, - effect::{ - incoming::{ - ConsensusDemand, ConsensusMessageIncoming, FinalitySignatureIncoming, GossiperIncoming, - NetRequest, NetRequestIncoming, NetResponse, NetResponseIncoming, TrieDemand, - TrieRequest, TrieRequestIncoming, TrieResponse, TrieResponseIncoming, - }, - AutoClosingResponder, EffectBuilder, + effect::incoming::{ + ConsensusMessageIncoming, ConsensusRequestMessageIncoming, FinalitySignatureIncoming, + GossiperIncoming, NetRequest, NetRequestIncoming, NetResponse, NetResponseIncoming, + TrieRequest, TrieRequestIncoming, TrieResponse, TrieResponseIncoming, }, types::{Block, Deploy, FinalitySignature, NodeId}, }; /// Reactor message. -#[derive(Clone, From, Serialize, Deserialize, EnumDiscriminants)] -#[strum_discriminants(derive(strum::EnumIter))] +#[derive(Clone, From, Serialize, Deserialize)] pub(crate) enum Message { /// Consensus component message. #[from] Consensus(consensus::ConsensusMessage), - /// Consensus component demand. + /// Consensus component request. #[from] ConsensusRequest(consensus::ConsensusRequestMessage), /// Block gossiper component message. @@ -112,53 +106,44 @@ impl Payload for Message { } #[inline] - fn incoming_resource_estimate(&self, weights: &EstimatorWeights) -> u32 { + fn get_channel(&self) -> Channel { match self { - Message::Consensus(_) => weights.consensus, - Message::ConsensusRequest(_) => weights.consensus, - Message::BlockGossiper(_) => weights.block_gossip, - Message::DeployGossiper(_) => weights.deploy_gossip, - Message::FinalitySignatureGossiper(_) => weights.finality_signature_gossip, - Message::AddressGossiper(_) => weights.address_gossip, - Message::GetRequest { tag, .. } => match tag { - Tag::Deploy => weights.deploy_requests, - Tag::LegacyDeploy => weights.legacy_deploy_requests, - Tag::Block => weights.block_requests, - Tag::BlockHeader => weights.block_header_requests, - Tag::TrieOrChunk => weights.trie_requests, - Tag::FinalitySignature => weights.finality_signature_requests, - Tag::SyncLeap => weights.sync_leap_requests, - Tag::ApprovalsHashes => weights.approvals_hashes_requests, - Tag::BlockExecutionResults => weights.execution_results_requests, + Message::Consensus(_) => Channel::Consensus, + Message::DeployGossiper(_) => Channel::BulkGossip, + Message::AddressGossiper(_) => Channel::BulkGossip, + Message::GetRequest { + tag, + serialized_id: _, + } => match tag { + Tag::Deploy => Channel::DataRequests, + Tag::LegacyDeploy => Channel::SyncDataRequests, + Tag::Block => Channel::SyncDataRequests, + Tag::BlockHeader => Channel::SyncDataRequests, + Tag::TrieOrChunk => Channel::SyncDataRequests, + Tag::FinalitySignature => Channel::DataRequests, + Tag::SyncLeap => Channel::SyncDataRequests, + Tag::ApprovalsHashes => Channel::SyncDataRequests, + Tag::BlockExecutionResults => Channel::SyncDataRequests, }, - Message::GetResponse { tag, .. } => match tag { - Tag::Deploy => weights.deploy_responses, - Tag::LegacyDeploy => weights.legacy_deploy_responses, - Tag::Block => weights.block_responses, - Tag::BlockHeader => weights.block_header_responses, - Tag::TrieOrChunk => weights.trie_responses, - Tag::FinalitySignature => weights.finality_signature_responses, - Tag::SyncLeap => weights.sync_leap_responses, - Tag::ApprovalsHashes => weights.approvals_hashes_responses, - Tag::BlockExecutionResults => weights.execution_results_responses, + Message::GetResponse { + tag, + serialized_item: _, + } => match tag { + // TODO: Verify which responses are for sync data. + Tag::Deploy => Channel::DataResponses, + Tag::LegacyDeploy => Channel::SyncDataResponses, + Tag::Block => Channel::SyncDataResponses, + Tag::BlockHeader => Channel::SyncDataResponses, + Tag::TrieOrChunk => Channel::SyncDataResponses, + Tag::FinalitySignature => Channel::DataResponses, + Tag::SyncLeap => Channel::SyncDataResponses, + Tag::ApprovalsHashes => Channel::SyncDataResponses, + Tag::BlockExecutionResults => Channel::SyncDataResponses, }, - Message::FinalitySignature(_) => weights.finality_signature_broadcasts, - } - } - - fn is_unsafe_for_syncing_peers(&self) -> bool { - match self { - Message::Consensus(_) => false, - Message::ConsensusRequest(_) => false, - Message::BlockGossiper(_) => false, - Message::DeployGossiper(_) => false, - Message::FinalitySignatureGossiper(_) => false, - Message::AddressGossiper(_) => false, - // Trie requests can deadlock between syncing nodes. - Message::GetRequest { tag, .. } if *tag == Tag::TrieOrChunk => true, - Message::GetRequest { .. } => false, - Message::GetResponse { .. } => false, - Message::FinalitySignature(_) => false, + Message::FinalitySignature(_) => Channel::Consensus, + Message::ConsensusRequest(_) => Channel::Consensus, + Message::BlockGossiper(_) => Channel::BulkGossip, + Message::FinalitySignatureGossiper(_) => Channel::BulkGossip, } } } @@ -223,49 +208,6 @@ impl Debug for Message { } } } -mod specimen_support { - use crate::utils::specimen::{ - largest_get_request, largest_get_response, largest_variant, Cache, LargestSpecimen, - SizeEstimator, - }; - - use super::{Message, MessageDiscriminants}; - - impl LargestSpecimen for Message { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - largest_variant::( - estimator, - |variant| match variant { - MessageDiscriminants::Consensus => { - Message::Consensus(LargestSpecimen::largest_specimen(estimator, cache)) - } - MessageDiscriminants::ConsensusRequest => Message::ConsensusRequest( - LargestSpecimen::largest_specimen(estimator, cache), - ), - MessageDiscriminants::BlockGossiper => { - Message::BlockGossiper(LargestSpecimen::largest_specimen(estimator, cache)) - } - MessageDiscriminants::DeployGossiper => { - Message::DeployGossiper(LargestSpecimen::largest_specimen(estimator, cache)) - } - MessageDiscriminants::FinalitySignatureGossiper => { - Message::FinalitySignatureGossiper(LargestSpecimen::largest_specimen( - estimator, cache, - )) - } - MessageDiscriminants::AddressGossiper => Message::AddressGossiper( - LargestSpecimen::largest_specimen(estimator, cache), - ), - MessageDiscriminants::GetRequest => largest_get_request(estimator, cache), - MessageDiscriminants::GetResponse => largest_get_response(estimator, cache), - MessageDiscriminants::FinalitySignature => Message::FinalitySignature( - LargestSpecimen::largest_specimen(estimator, cache), - ), - }, - ) - } - } -} impl Display for Message { fn fmt(&self, f: &mut Formatter) -> fmt::Result { @@ -297,7 +239,7 @@ impl Display for Message { impl FromIncoming for REv where REv: From - + From + + From + From> + From> + From> @@ -305,85 +247,106 @@ where + From + From + From - + From + From + From, { - fn from_incoming(sender: NodeId, payload: Message) -> Self { + fn from_incoming(sender: NodeId, payload: Message, ticket: Ticket) -> Self { match payload { Message::Consensus(message) => ConsensusMessageIncoming { sender, message: Box::new(message), + ticket, } .into(), - Message::ConsensusRequest(_message) => { - // TODO: Remove this once from_incoming and try_demand_from_incoming are unified. - unreachable!("called from_incoming with a consensus request") + Message::ConsensusRequest(message) => ConsensusRequestMessageIncoming { + sender, + message: Box::new(message), + ticket, } + .into(), Message::BlockGossiper(message) => GossiperIncoming { sender, message: Box::new(message), + ticket, } .into(), Message::DeployGossiper(message) => GossiperIncoming { sender, + message: Box::new(message), + ticket, } .into(), Message::FinalitySignatureGossiper(message) => GossiperIncoming { sender, message: Box::new(message), + ticket, } .into(), Message::AddressGossiper(message) => GossiperIncoming { sender, + message: Box::new(message), + ticket, } .into(), Message::GetRequest { tag, serialized_id } => match tag { Tag::Deploy => NetRequestIncoming { sender, message: Box::new(NetRequest::Deploy(serialized_id)), + ticket, } .into(), Tag::LegacyDeploy => NetRequestIncoming { sender, + message: Box::new(NetRequest::LegacyDeploy(serialized_id)), + ticket, } .into(), Tag::Block => NetRequestIncoming { sender, message: Box::new(NetRequest::Block(serialized_id)), + ticket, } .into(), Tag::BlockHeader => NetRequestIncoming { sender, message: Box::new(NetRequest::BlockHeader(serialized_id)), + ticket, } .into(), Tag::TrieOrChunk => TrieRequestIncoming { sender, message: Box::new(TrieRequest(serialized_id)), + ticket, } .into(), Tag::FinalitySignature => NetRequestIncoming { sender, + message: Box::new(NetRequest::FinalitySignature(serialized_id)), + ticket, } .into(), Tag::SyncLeap => NetRequestIncoming { sender, message: Box::new(NetRequest::SyncLeap(serialized_id)), + ticket, } .into(), Tag::ApprovalsHashes => NetRequestIncoming { sender, + message: Box::new(NetRequest::ApprovalsHashes(serialized_id)), + ticket, } .into(), Tag::BlockExecutionResults => NetRequestIncoming { sender, + message: Box::new(NetRequest::BlockExecutionResults(serialized_id)), + ticket, } .into(), }, @@ -394,87 +357,68 @@ where Tag::Deploy => NetResponseIncoming { sender, message: Box::new(NetResponse::Deploy(serialized_item)), + ticket, } .into(), Tag::LegacyDeploy => NetResponseIncoming { sender, + message: Box::new(NetResponse::LegacyDeploy(serialized_item)), + ticket, } .into(), Tag::Block => NetResponseIncoming { sender, message: Box::new(NetResponse::Block(serialized_item)), + ticket, } .into(), Tag::BlockHeader => NetResponseIncoming { sender, + message: Box::new(NetResponse::BlockHeader(serialized_item)), + ticket, } .into(), Tag::TrieOrChunk => TrieResponseIncoming { sender, + message: Box::new(TrieResponse(serialized_item.to_vec())), + ticket, } .into(), Tag::FinalitySignature => NetResponseIncoming { sender, + message: Box::new(NetResponse::FinalitySignature(serialized_item)), + ticket, } .into(), Tag::SyncLeap => NetResponseIncoming { sender, message: Box::new(NetResponse::SyncLeap(serialized_item)), + ticket, } .into(), Tag::ApprovalsHashes => NetResponseIncoming { sender, message: Box::new(NetResponse::ApprovalsHashes(serialized_item)), + ticket, } .into(), Tag::BlockExecutionResults => NetResponseIncoming { sender, message: Box::new(NetResponse::BlockExecutionResults(serialized_item)), + ticket, } .into(), }, - Message::FinalitySignature(message) => { - FinalitySignatureIncoming { sender, message }.into() - } - } - } - - fn try_demand_from_incoming( - effect_builder: EffectBuilder, - sender: NodeId, - payload: Message, - ) -> Result<(Self, BoxFuture<'static, Option>), Message> - where - Self: Sized + Send, - { - match payload { - Message::GetRequest { - tag: Tag::TrieOrChunk, - serialized_id, - } => { - let (ev, fut) = effect_builder.create_request_parts(move |responder| TrieDemand { - sender, - request_msg: Box::new(TrieRequest(serialized_id)), - auto_closing_responder: AutoClosingResponder::from_opt_responder(responder), - }); - - Ok((ev, fut.boxed())) - } - Message::ConsensusRequest(request_msg) => { - let (ev, fut) = - effect_builder.create_request_parts(move |responder| ConsensusDemand { - sender, - request_msg: Box::new(request_msg), - auto_closing_responder: AutoClosingResponder::from_opt_responder(responder), - }); - - Ok((ev, fut.boxed())) + Message::FinalitySignature(message) => FinalitySignatureIncoming { + sender, + message, + ticket, } - _ => Err(payload), + .into(), } } } diff --git a/node/src/reactor.rs b/node/src/reactor.rs index 0d9bd25e2a..aad3549349 100644 --- a/node/src/reactor.rs +++ b/node/src/reactor.rs @@ -34,12 +34,10 @@ pub(crate) mod main_reactor; mod queue_kind; use std::{ - any, collections::HashMap, env, fmt::{Debug, Display}, io::Write, - mem, num::NonZeroU64, str::FromStr, sync::{atomic::Ordering, Arc}, @@ -51,7 +49,7 @@ use erased_serde::Serialize as ErasedSerialize; use fake_instant::FakeClock; use futures::{future::BoxFuture, FutureExt}; use once_cell::sync::Lazy; -use prometheus::{self, Histogram, HistogramOpts, IntCounter, IntGauge, Registry}; +use prometheus::{self, Histogram, IntCounter, IntGauge, Registry}; use quanta::{Clock, IntoNanoseconds}; use serde::Serialize; use signal_hook::consts::signal::{SIGINT, SIGQUIT, SIGTERM}; @@ -60,6 +58,8 @@ use tokio::time::{Duration, Instant}; use tracing::{debug_span, error, info, instrument, trace, warn, Span}; use tracing_futures::Instrument; +#[cfg(test)] +use crate::components::ComponentState; #[cfg(test)] use casper_types::testing::TestRng; @@ -72,7 +72,7 @@ use crate::{ components::{ block_accumulator, deploy_acceptor, fetcher::{self, FetchItem}, - network::{blocklist::BlocklistJustification, Identity as NetworkIdentity}, + network::{blocklist::BlocklistJustification, Identity as NetworkIdentity, Ticket}, }, effect::{ announcements::{ControlAnnouncement, PeerBehaviorAnnouncement, QueueDumpFormat}, @@ -85,8 +85,11 @@ use crate::{ ChainspecRawBytes, Deploy, ExitCode, FinalitySignature, LegacyDeploy, NodeId, SyncLeap, TrieOrChunk, }, - unregister_metric, - utils::{self, SharedFlag, WeightedRoundRobin}, + utils::{ + self, + registered_metric::{RegisteredMetric, RegistryExt}, + Fuse, SharedFuse, WeightedRoundRobin, + }, NodeRng, TERMINATION_REQUESTED, }; pub(crate) use queue_kind::QueueKind; @@ -184,7 +187,7 @@ where /// A reference to the scheduler of the event queue. scheduler: &'static Scheduler, /// Flag indicating whether or not the reactor processing this event queue is shutting down. - is_shutting_down: SharedFlag, + is_shutting_down: SharedFuse, } // Implement `Clone` and `Copy` manually, as `derive` will make it depend on `R` and `Ev` otherwise. @@ -197,7 +200,7 @@ impl Copy for EventQueueHandle {} impl EventQueueHandle { /// Creates a new event queue handle. - pub(crate) fn new(scheduler: &'static Scheduler, is_shutting_down: SharedFlag) -> Self { + pub(crate) fn new(scheduler: &'static Scheduler, is_shutting_down: SharedFuse) -> Self { EventQueueHandle { scheduler, is_shutting_down, @@ -209,7 +212,7 @@ impl EventQueueHandle { /// This method is used in tests, where we are never disabling shutdown warnings anyway. #[cfg(test)] pub(crate) fn without_shutdown(scheduler: &'static Scheduler) -> Self { - EventQueueHandle::new(scheduler, SharedFlag::global_shared()) + EventQueueHandle::new(scheduler, SharedFuse::global_shared()) } /// Schedule an event on a specific queue. @@ -242,7 +245,7 @@ impl EventQueueHandle { } /// Returns whether the associated reactor is currently shutting down. - pub(crate) fn shutdown_flag(&self) -> SharedFlag { + pub(crate) fn shutdown_flag(&self) -> SharedFuse { self.is_shutting_down } } @@ -296,6 +299,15 @@ pub(crate) trait Reactor: Sized { /// Instructs the reactor to update performance metrics, if any. fn update_metrics(&mut self, _event_queue_handle: EventQueueHandle) {} + /// Returns the state of a named components. + /// + /// May return `None` if the component cannot be found, or if the reactor does not support + /// querying component states. + #[cfg(test)] + fn get_component_state(&self, _name: &str) -> Option<&ComponentState> { + None + } + /// Activate/deactivate a failpoint. fn activate_failpoint(&mut self, _activation: &FailpointActivation) { // Default is to ignore the failpoint. If failpoint support is enabled for a reactor, route @@ -376,41 +388,37 @@ where clock: Clock, /// Flag indicating the reactor is being shut down. - is_shutting_down: SharedFlag, + is_shutting_down: SharedFuse, } /// Metric data for the Runner #[derive(Debug)] struct RunnerMetrics { /// Total number of events processed. - events: IntCounter, + events: RegisteredMetric, /// Histogram of how long it took to dispatch an event. - event_dispatch_duration: Histogram, + event_dispatch_duration: RegisteredMetric, /// Total allocated RAM in bytes, as reported by stats_alloc. - allocated_ram_bytes: IntGauge, + allocated_ram_bytes: RegisteredMetric, /// Total consumed RAM in bytes, as reported by sys-info. - consumed_ram_bytes: IntGauge, + consumed_ram_bytes: RegisteredMetric, /// Total system RAM in bytes, as reported by sys-info. - total_ram_bytes: IntGauge, - /// Handle to the metrics registry, in case we need to unregister. - registry: Registry, + total_ram_bytes: RegisteredMetric, } impl RunnerMetrics { /// Create and register new runner metrics. fn new(registry: &Registry) -> Result { - let events = IntCounter::new( + let events = registry.new_int_counter( "runner_events", "running total count of events handled by this reactor", )?; // Create an event dispatch histogram, putting extra emphasis on the area between 1-10 us. - let event_dispatch_duration = Histogram::with_opts( - HistogramOpts::new( - "event_dispatch_duration", - "time in nanoseconds to dispatch an event", - ) - .buckets(vec![ + let event_dispatch_duration = registry.new_histogram( + "event_dispatch_duration", + "time in nanoseconds to dispatch an event", + vec![ 100.0, 500.0, 1_000.0, @@ -430,25 +438,19 @@ impl RunnerMetrics { 1_000_000.0, 2_000_000.0, 5_000_000.0, - ]), + ], )?; let allocated_ram_bytes = - IntGauge::new("allocated_ram_bytes", "total allocated ram in bytes")?; + registry.new_int_gauge("allocated_ram_bytes", "total allocated ram in bytes")?; let consumed_ram_bytes = - IntGauge::new("consumed_ram_bytes", "total consumed ram in bytes")?; - let total_ram_bytes = IntGauge::new("total_ram_bytes", "total system ram in bytes")?; - - registry.register(Box::new(events.clone()))?; - registry.register(Box::new(event_dispatch_duration.clone()))?; - registry.register(Box::new(allocated_ram_bytes.clone()))?; - registry.register(Box::new(consumed_ram_bytes.clone()))?; - registry.register(Box::new(total_ram_bytes.clone()))?; + registry.new_int_gauge("consumed_ram_bytes", "total consumed ram in bytes")?; + let total_ram_bytes = + registry.new_int_gauge("total_ram_bytes", "total system ram in bytes")?; Ok(RunnerMetrics { events, event_dispatch_duration, - registry: registry.clone(), allocated_ram_bytes, consumed_ram_bytes, total_ram_bytes, @@ -456,16 +458,6 @@ impl RunnerMetrics { } } -impl Drop for RunnerMetrics { - fn drop(&mut self) { - unregister_metric!(self.registry, self.events); - unregister_metric!(self.registry, self.event_dispatch_duration); - unregister_metric!(self.registry, self.allocated_ram_bytes); - unregister_metric!(self.registry, self.consumed_ram_bytes); - unregister_metric!(self.registry, self.total_ram_bytes); - } -} - impl Runner where R: Reactor, @@ -489,18 +481,6 @@ where ) -> Result { adjust_open_files_limit(); - let event_size = mem::size_of::(); - - // Check if the event is of a reasonable size. This only emits a runtime warning at startup - // right now, since storage size of events is not an issue per se, but copying might be - // expensive if events get too large. - if event_size > 16 * mem::size_of::() { - warn!( - %event_size, type_name = ?any::type_name::(), - "large event size, consider reducing it or boxing" - ); - } - let event_queue_dump_threshold = env::var("CL_EVENT_QUEUE_DUMP_THRESHOLD").map_or(None, |s| s.parse::().ok()); @@ -508,7 +488,7 @@ where QueueKind::weights(), event_queue_dump_threshold, )); - let is_shutting_down = SharedFlag::new(); + let is_shutting_down = SharedFuse::new(); let event_queue = EventQueueHandle::new(scheduler, is_shutting_down); let (reactor, initial_effects) = R::new( cfg, @@ -520,7 +500,7 @@ where rng, )?; - info!( + trace!( "Reactor: with_metrics has: {} initial_effects", initial_effects.len() ); @@ -976,6 +956,7 @@ fn handle_fetch_response( rng: &mut NodeRng, sender: NodeId, serialized_item: &[u8], + ticket: Ticket, ) -> Effects<::Event> where I: FetchItem, @@ -984,14 +965,20 @@ where { match fetcher::Event::::from_get_response_serialized_item(sender, serialized_item) { Some(fetcher_event) => { - Reactor::dispatch_event(reactor, effect_builder, rng, fetcher_event.into()) + let effects = + Reactor::dispatch_event(reactor, effect_builder, rng, fetcher_event.into()); + + // We have processed the response, drop the ticket before evaluating effects. + drop(ticket); + + effects } None => effect_builder .announce_block_peer_with_justification( sender, BlocklistJustification::SentBadItem { tag: I::TAG }, ) - .ignore(), + .ignore(), // Implicitly drops `ticket`. } } @@ -1001,6 +988,7 @@ fn handle_get_response( rng: &mut NodeRng, sender: NodeId, message: Box, + ticket: Ticket, ) -> Effects<::Event> where R: Reactor, @@ -1024,6 +1012,7 @@ where rng, sender, serialized_item, + ticket, ), NetResponse::LegacyDeploy(ref serialized_item) => handle_fetch_response::( reactor, @@ -1031,16 +1020,23 @@ where rng, sender, serialized_item, + ticket, + ), + NetResponse::Block(ref serialized_item) => handle_fetch_response::( + reactor, + effect_builder, + rng, + sender, + serialized_item, + ticket, ), - NetResponse::Block(ref serialized_item) => { - handle_fetch_response::(reactor, effect_builder, rng, sender, serialized_item) - } NetResponse::BlockHeader(ref serialized_item) => handle_fetch_response::( reactor, effect_builder, rng, sender, serialized_item, + ticket, ), NetResponse::FinalitySignature(ref serialized_item) => { handle_fetch_response::( @@ -1049,6 +1045,7 @@ where rng, sender, serialized_item, + ticket, ) } NetResponse::SyncLeap(ref serialized_item) => handle_fetch_response::( @@ -1057,6 +1054,7 @@ where rng, sender, serialized_item, + ticket, ), NetResponse::ApprovalsHashes(ref serialized_item) => { handle_fetch_response::( @@ -1065,6 +1063,7 @@ where rng, sender, serialized_item, + ticket, ) } NetResponse::BlockExecutionResults(ref serialized_item) => { @@ -1074,6 +1073,7 @@ where rng, sender, serialized_item, + ticket, ) } } diff --git a/node/src/reactor/event_queue_metrics.rs b/node/src/reactor/event_queue_metrics.rs index a9971bff59..cf1cbc5f01 100644 --- a/node/src/reactor/event_queue_metrics.rs +++ b/node/src/reactor/event_queue_metrics.rs @@ -2,22 +2,20 @@ use std::collections::HashMap; use itertools::Itertools; use prometheus::{self, IntGauge, Registry}; -use tracing::{debug, error}; +use tracing::debug; use crate::{ reactor::{EventQueueHandle, QueueKind}, - unregister_metric, + utils::registered_metric::{RegisteredMetric, RegistryExt}, }; /// Metrics for event queue sizes. #[derive(Debug)] pub(super) struct EventQueueMetrics { /// Per queue kind gauges that measure number of event in the queue. - event_queue_gauges: HashMap, + event_queue_gauges: HashMap>, /// Total events count. - event_total: IntGauge, - /// Instance of registry to unregister from when being dropped. - registry: Registry, + event_total: RegisteredMetric, } impl EventQueueMetrics { @@ -26,31 +24,29 @@ impl EventQueueMetrics { registry: Registry, event_queue_handle: EventQueueHandle, ) -> Result { - let mut event_queue_gauges: HashMap = HashMap::new(); + let mut event_queue_gauges = HashMap::new(); for queue_kind in event_queue_handle.event_queues_counts().keys() { let key = format!("scheduler_queue_{}_count", queue_kind.metrics_name()); - let queue_event_counter = IntGauge::new( + let queue_event_counter = registry.new_int_gauge( key, format!( "current number of events in the reactor {} queue", queue_kind.metrics_name() ), )?; - registry.register(Box::new(queue_event_counter.clone()))?; + let result = event_queue_gauges.insert(*queue_kind, queue_event_counter); assert!(result.is_none(), "Map keys should not be overwritten."); } - let event_total = IntGauge::new( + let event_total = registry.new_int_gauge( "scheduler_queue_total_count", "current total number of events in all reactor queues", )?; - registry.register(Box::new(event_total.clone()))?; Ok(EventQueueMetrics { event_queue_gauges, event_total, - registry, }) } @@ -81,16 +77,3 @@ impl EventQueueMetrics { debug!(%total, %event_counts, "Collected new set of event queue sizes metrics.") } } - -impl Drop for EventQueueMetrics { - fn drop(&mut self) { - unregister_metric!(self.registry, self.event_total); - self.event_queue_gauges - .iter() - .for_each(|(key, queue_gauge)| { - self.registry - .unregister(Box::new(queue_gauge.clone())) - .unwrap_or_else(|_| error!("unregistering {} failed: was not registered", key)) - }); - } -} diff --git a/node/src/reactor/main_reactor.rs b/node/src/reactor/main_reactor.rs index 63d92fb54d..00bfef3abf 100644 --- a/node/src/reactor/main_reactor.rs +++ b/node/src/reactor/main_reactor.rs @@ -27,13 +27,10 @@ use tracing::{debug, error, info, warn}; use casper_types::{EraId, PublicKey, TimeDiff, Timestamp, U512}; -#[cfg(test)] -use crate::testing::network::NetworkedReactor; use crate::{ components::{ block_accumulator::{self, BlockAccumulator}, block_synchronizer::{self, BlockSynchronizer}, - block_validator::{self, BlockValidator}, consensus::{self, EraSupervisor}, contract_runtime::ContractRuntime, deploy_acceptor::{self, DeployAcceptor}, @@ -42,7 +39,8 @@ use crate::{ event_stream_server::{self, EventStreamServer}, gossiper::{self, GossipItem, Gossiper}, metrics::Metrics, - network::{self, GossipedAddress, Identity as NetworkIdentity, Network}, + network::{self, GossipedAddress, Identity as NetworkIdentity, Network, Ticket}, + proposed_block_validator::{self, ProposedBlockValidator}, rest_server::RestServer, rpc_server::RpcServer, shutdown_trigger::{self, ShutdownTrigger}, @@ -51,6 +49,7 @@ use crate::{ upgrade_watcher::{self, UpgradeWatcher}, Component, ValidatorBoundComponent, }, + dead_metrics::DeadMetrics, effect::{ announcements::{ BlockAccumulatorAnnouncement, ConsensusAnnouncement, ContractRuntimeAnnouncement, @@ -79,6 +78,11 @@ use crate::{ utils::{Source, WithDir}, NodeRng, }; +#[cfg(test)] +use crate::{ + components::{ComponentState, InitializedComponent}, + testing::network::NetworkedReactor, +}; pub use config::Config; pub(crate) use error::Error; pub(crate) use event::MainEvent; @@ -141,11 +145,11 @@ pub(crate) struct MainReactor { event_stream_server: EventStreamServer, diagnostics_port: DiagnosticsPort, shutdown_trigger: ShutdownTrigger, - net: Network, + net: Network, consensus: EraSupervisor, // block handling - block_validator: BlockValidator, + proposed_block_validator: ProposedBlockValidator, block_accumulator: BlockAccumulator, block_synchronizer: BlockSynchronizer, @@ -171,6 +175,9 @@ pub(crate) struct MainReactor { memory_metrics: MemoryMetrics, #[data_size(skip)] event_queue_metrics: EventQueueMetrics, + #[data_size(skip)] + #[allow(dead_code)] + dead_metrics: DeadMetrics, // ambient settings / data / load-bearing config validator_matrix: ValidatorMatrix, @@ -382,9 +389,11 @@ impl reactor::Reactor for MainReactor { self.storage .handle_event(effect_builder, rng, incoming.into()), ), - MainEvent::NetworkPeerProvidingData(NetResponseIncoming { sender, message }) => { - reactor::handle_get_response(self, effect_builder, rng, sender, message) - } + MainEvent::NetworkPeerProvidingData(NetResponseIncoming { + sender, + message, + ticket, + }) => reactor::handle_get_response(self, effect_builder, rng, sender, message, ticket), MainEvent::AddressGossiper(event) => reactor::wrap_effects( MainEvent::AddressGossiper, self.address_gossiper @@ -423,10 +432,10 @@ impl reactor::Reactor for MainReactor { self.consensus .handle_event(effect_builder, rng, incoming.into()), ), - MainEvent::ConsensusDemand(demand) => reactor::wrap_effects( + MainEvent::ConsensusRequestMessageIncoming(request_message) => reactor::wrap_effects( MainEvent::Consensus, self.consensus - .handle_event(effect_builder, rng, demand.into()), + .handle_event(effect_builder, rng, request_message.into()), ), MainEvent::ConsensusAnnouncement(consensus_announcement) => { match consensus_announcement { @@ -457,15 +466,15 @@ impl reactor::Reactor for MainReactor { } // BLOCKS - MainEvent::BlockValidator(event) => reactor::wrap_effects( - MainEvent::BlockValidator, - self.block_validator + MainEvent::ProposedBlockValidator(event) => reactor::wrap_effects( + MainEvent::ProposedBlockValidator, + self.proposed_block_validator .handle_event(effect_builder, rng, event), ), - MainEvent::BlockValidatorRequest(req) => self.dispatch_event( + MainEvent::ProposedBlockValidatorRequest(req) => self.dispatch_event( effect_builder, rng, - MainEvent::BlockValidator(block_validator::Event::from(req)), + MainEvent::ProposedBlockValidator(proposed_block_validator::Event::from(req)), ), MainEvent::BlockAccumulator(event) => reactor::wrap_effects( MainEvent::BlockAccumulator, @@ -551,6 +560,7 @@ impl reactor::Reactor for MainReactor { MainEvent::BlockGossiperAnnouncement(GossiperAnnouncement::NewItemBody { item, sender, + ticket, }) => reactor::wrap_effects( MainEvent::BlockAccumulator, self.block_accumulator.handle_event( @@ -559,6 +569,7 @@ impl reactor::Reactor for MainReactor { block_accumulator::Event::ReceivedBlock { block: Arc::new(*item), sender, + ticket, }, ), ), @@ -574,6 +585,7 @@ impl reactor::Reactor for MainReactor { block_accumulator::Event::ReceivedBlock { block, sender: peer, + ticket: Ticket::stub(), }, ), ) @@ -593,6 +605,7 @@ impl reactor::Reactor for MainReactor { let block_accumulator_event = block_accumulator::Event::ReceivedFinalitySignature { finality_signature, sender, + ticket: incoming.ticket, }; reactor::wrap_effects( MainEvent::BlockAccumulator, @@ -637,7 +650,11 @@ impl reactor::Reactor for MainReactor { Effects::new() } MainEvent::FinalitySignatureGossiperAnnouncement( - GossiperAnnouncement::NewItemBody { item, sender }, + GossiperAnnouncement::NewItemBody { + item, + sender, + ticket, + }, ) => reactor::wrap_effects( MainEvent::BlockAccumulator, self.block_accumulator.handle_event( @@ -646,6 +663,7 @@ impl reactor::Reactor for MainReactor { block_accumulator::Event::ReceivedFinalitySignature { finality_signature: item, sender, + ticket, }, ), ), @@ -669,6 +687,7 @@ impl reactor::Reactor for MainReactor { block_accumulator::Event::ReceivedFinalitySignature { finality_signature, sender: peer, + ticket: Ticket::stub(), }, ), ), @@ -693,6 +712,7 @@ impl reactor::Reactor for MainReactor { deploy, source, maybe_responder: Some(responder), + ticket: Ticket::stub(), }; reactor::wrap_effects( MainEvent::DeployAcceptor, @@ -776,6 +796,7 @@ impl reactor::Reactor for MainReactor { MainEvent::DeployGossiperAnnouncement(GossiperAnnouncement::NewItemBody { item, sender, + ticket, }) => reactor::wrap_effects( MainEvent::DeployAcceptor, self.deploy_acceptor.handle_event( @@ -785,6 +806,7 @@ impl reactor::Reactor for MainReactor { deploy: Arc::new(*item), source: Source::PeerGossiped(sender), maybe_responder: None, + ticket, }, ), ), @@ -855,20 +877,18 @@ impl reactor::Reactor for MainReactor { self.contract_runtime .handle_event(effect_builder, rng, req.into()), ), - MainEvent::TrieDemand(demand) => reactor::wrap_effects( - MainEvent::ContractRuntime, - self.contract_runtime - .handle_event(effect_builder, rng, demand.into()), + MainEvent::TrieResponseIncoming(TrieResponseIncoming { + sender, + message, + ticket, + }) => reactor::handle_fetch_response::( + self, + effect_builder, + rng, + sender, + &message.0, + ticket, ), - MainEvent::TrieResponseIncoming(TrieResponseIncoming { sender, message }) => { - reactor::handle_fetch_response::( - self, - effect_builder, - rng, - sender, - &message.0, - ) - } // STORAGE MainEvent::Storage(event) => reactor::wrap_effects( @@ -1003,6 +1023,7 @@ impl reactor::Reactor for MainReactor { let metrics = Metrics::new(registry.clone()); let memory_metrics = MemoryMetrics::new(registry.clone())?; let event_queue_metrics = EventQueueMetrics::new(registry.clone(), event_queue)?; + let dead_metrics = DeadMetrics::new(registry)?; let protocol_version = chainspec.protocol_config.version; let prevent_validator_shutdown = config.value().node.prevent_validator_shutdown; @@ -1153,7 +1174,8 @@ impl reactor::Reactor for MainReactor { validator_matrix.clone(), registry, )?; - let block_validator = BlockValidator::new(Arc::clone(&chainspec), config.block_validator); + let proposed_block_validator = + ProposedBlockValidator::new(Arc::clone(&chainspec), config.proposed_block_validator); let upgrade_watcher = UpgradeWatcher::new(chainspec.as_ref(), config.upgrade_watcher, &root_dir)?; let deploy_acceptor = @@ -1182,7 +1204,7 @@ impl reactor::Reactor for MainReactor { sync_leaper, deploy_buffer, consensus, - block_validator, + proposed_block_validator, block_accumulator, block_synchronizer, diagnostics_port, @@ -1191,6 +1213,7 @@ impl reactor::Reactor for MainReactor { metrics, memory_metrics, event_queue_metrics, + dead_metrics, state: ReactorState::Initialize {}, attempts: 0, @@ -1232,6 +1255,27 @@ impl reactor::Reactor for MainReactor { .record_event_queue_counts(&event_queue_handle) } + #[cfg(test)] + fn get_component_state(&self, name: &str) -> Option<&ComponentState> { + match name { + "diagnostics_port" => Some( + >::state(&self.diagnostics_port), + ), + "event_stream_server" => Some( + >::state( + &self.event_stream_server, + ), + ), + "rest_server" => Some(>::state( + &self.rest_server, + )), + "rpc_server" => Some(>::state( + &self.rpc_server, + )), + _ => None, + } + } + fn activate_failpoint(&mut self, activation: &FailpointActivation) { if activation.key().starts_with("consensus") { >::activate_failpoint( @@ -1264,6 +1308,10 @@ impl MainReactor { self.block_synchronizer .handle_validators(effect_builder, rng), )); + effects.extend(reactor::wrap_effects( + MainEvent::Network, + self.net.handle_validators(effect_builder, rng), + )); effects } diff --git a/node/src/reactor/main_reactor/catch_up.rs b/node/src/reactor/main_reactor/catch_up.rs index b37d7d380a..124fba9377 100644 --- a/node/src/reactor/main_reactor/catch_up.rs +++ b/node/src/reactor/main_reactor/catch_up.rs @@ -321,7 +321,7 @@ impl MainReactor { block_hash: BlockHash, ) -> CatchUpInstruction { // we get a random sampling of peers to ask. - let peers_to_ask = self.net.fully_connected_peers_random( + let peers_to_ask = self.net.connected_peers_random( rng, self.chainspec.core_config.simultaneous_peer_requests as usize, ); diff --git a/node/src/reactor/main_reactor/config.rs b/node/src/reactor/main_reactor/config.rs index 9ab528d9a2..f25eb3c2b1 100644 --- a/node/src/reactor/main_reactor/config.rs +++ b/node/src/reactor/main_reactor/config.rs @@ -5,9 +5,9 @@ use tracing::error; use crate::{ logging::LoggingConfig, types::{Chainspec, NodeConfig}, - BlockAccumulatorConfig, BlockSynchronizerConfig, BlockValidatorConfig, ConsensusConfig, - ContractRuntimeConfig, DeployAcceptorConfig, DeployBufferConfig, DiagnosticsPortConfig, - EventStreamServerConfig, FetcherConfig, GossipConfig, NetworkConfig, RestServerConfig, + BlockAccumulatorConfig, BlockSynchronizerConfig, ConsensusConfig, ContractRuntimeConfig, + DeployAcceptorConfig, DeployBufferConfig, DiagnosticsPortConfig, EventStreamServerConfig, + FetcherConfig, GossipConfig, NetworkConfig, ProposedBlockValidatorConfig, RestServerConfig, RpcServerConfig, SpeculativeExecConfig, StorageConfig, UpgradeWatcherConfig, }; @@ -50,8 +50,8 @@ pub struct Config { pub block_accumulator: BlockAccumulatorConfig, /// Config values for the block synchronizer. pub block_synchronizer: BlockSynchronizerConfig, - /// Config values for the block validator. - pub block_validator: BlockValidatorConfig, + /// Config values for the proposed block validator. + pub proposed_block_validator: ProposedBlockValidatorConfig, /// Config values for the upgrade watcher. pub upgrade_watcher: UpgradeWatcherConfig, } diff --git a/node/src/reactor/main_reactor/control.rs b/node/src/reactor/main_reactor/control.rs index f0731d6f84..6828859ef7 100644 --- a/node/src/reactor/main_reactor/control.rs +++ b/node/src/reactor/main_reactor/control.rs @@ -60,7 +60,7 @@ impl MainReactor { match self.initialize_next_component(effect_builder) { Some(effects) => (initialization_logic_default_delay.into(), effects), None => { - if false == self.net.has_sufficient_fully_connected_peers() { + if false == self.net.has_sufficient_connected_peers() { info!("Initialize: awaiting sufficient fully-connected peers"); return (initialization_logic_default_delay.into(), Effects::new()); } diff --git a/node/src/reactor/main_reactor/event.rs b/node/src/reactor/main_reactor/event.rs index a2d02fd9e7..9ae44abc35 100644 --- a/node/src/reactor/main_reactor/event.rs +++ b/node/src/reactor/main_reactor/event.rs @@ -13,10 +13,11 @@ use crate::{ components::{ block_accumulator, block_synchronizer::{self, GlobalStateSynchronizerEvent, TrieAccumulatorEvent}, - block_validator, consensus, contract_runtime, deploy_acceptor, deploy_buffer, - diagnostics_port, event_stream_server, fetcher, gossiper, + consensus, contract_runtime, deploy_acceptor, deploy_buffer, diagnostics_port, + event_stream_server, fetcher, gossiper, network::{self, GossipedAddress}, - rest_server, rpc_server, shutdown_trigger, storage, sync_leaper, upgrade_watcher, + proposed_block_validator, rest_server, rpc_server, shutdown_trigger, storage, sync_leaper, + upgrade_watcher, }, effect::{ announcements::{ @@ -28,18 +29,18 @@ use crate::{ }, diagnostics_port::DumpConsensusStateRequest, incoming::{ - ConsensusDemand, ConsensusMessageIncoming, FinalitySignatureIncoming, GossiperIncoming, - NetRequestIncoming, NetResponseIncoming, TrieDemand, TrieRequestIncoming, + ConsensusMessageIncoming, ConsensusRequestMessageIncoming, FinalitySignatureIncoming, + GossiperIncoming, NetRequestIncoming, NetResponseIncoming, TrieRequestIncoming, TrieResponseIncoming, }, requests::{ AcceptDeployRequest, BeginGossipRequest, BlockAccumulatorRequest, - BlockSynchronizerRequest, BlockValidationRequest, ChainspecRawBytesRequest, - ConsensusRequest, ContractRuntimeRequest, DeployBufferRequest, FetcherRequest, + BlockSynchronizerRequest, ChainspecRawBytesRequest, ConsensusRequest, + ContractRuntimeRequest, DeployBufferRequest, FetcherRequest, MakeBlockExecutableRequest, MarkBlockCompletedRequest, MetricsRequest, - NetworkInfoRequest, NetworkRequest, ReactorStatusRequest, RestRequest, RpcRequest, - SetNodeStopRequest, StorageRequest, SyncGlobalStateRequest, TrieAccumulatorRequest, - UpgradeWatcherRequest, + NetworkInfoRequest, NetworkRequest, ProposedBlockValidationRequest, + ReactorStatusRequest, RestRequest, RpcRequest, SetNodeStopRequest, StorageRequest, + SyncGlobalStateRequest, TrieAccumulatorRequest, UpgradeWatcherRequest, }, }, protocol::Message, @@ -121,7 +122,7 @@ pub(crate) enum MainEvent { #[from] ConsensusMessageIncoming(ConsensusMessageIncoming), #[from] - ConsensusDemand(ConsensusDemand), + ConsensusRequestMessageIncoming(ConsensusRequestMessageIncoming), #[from] ConsensusAnnouncement(#[serde(skip_serializing)] ConsensusAnnouncement), #[from] @@ -129,9 +130,9 @@ pub(crate) enum MainEvent { #[from] BlockHeaderFetcherRequest(#[serde(skip_serializing)] FetcherRequest), #[from] - BlockValidator(#[serde(skip_serializing)] block_validator::Event), + ProposedBlockValidator(#[serde(skip_serializing)] proposed_block_validator::Event), #[from] - BlockValidatorRequest(#[serde(skip_serializing)] BlockValidationRequest), + ProposedBlockValidatorRequest(#[serde(skip_serializing)] ProposedBlockValidationRequest), #[from] BlockAccumulator(#[serde(skip_serializing)] block_accumulator::Event), #[from] @@ -229,8 +230,6 @@ pub(crate) enum MainEvent { #[from] TrieRequestIncoming(TrieRequestIncoming), #[from] - TrieDemand(TrieDemand), - #[from] TrieResponseIncoming(TrieResponseIncoming), #[from] Storage(storage::Event), @@ -282,7 +281,7 @@ impl ReactorEvent for MainEvent { MainEvent::DeployGossiper(_) => "DeployGossiper", MainEvent::FinalitySignatureGossiper(_) => "FinalitySignatureGossiper", MainEvent::AddressGossiper(_) => "AddressGossiper", - MainEvent::BlockValidator(_) => "BlockValidator", + MainEvent::ProposedBlockValidator(_) => "ProposedBlockValidator", MainEvent::ContractRuntimeRequest(_) => "ContractRuntimeRequest", MainEvent::BlockHeaderFetcher(_) => "BlockHeaderFetcher", MainEvent::TrieOrChunkFetcher(_) => "TrieOrChunkFetcher", @@ -307,7 +306,7 @@ impl ReactorEvent for MainEvent { MainEvent::SyncLeapFetcherRequest(_) => "SyncLeapFetcherRequest", MainEvent::ApprovalsHashesFetcherRequest(_) => "ApprovalsHashesFetcherRequest", MainEvent::DeployBufferRequest(_) => "DeployBufferRequest", - MainEvent::BlockValidatorRequest(_) => "BlockValidatorRequest", + MainEvent::ProposedBlockValidatorRequest(_) => "ProposedBlockValidatorRequest", MainEvent::MetricsRequest(_) => "MetricsRequest", MainEvent::ChainspecRawBytesRequest(_) => "ChainspecRawBytesRequest", MainEvent::UpgradeWatcherRequest(_) => "UpgradeWatcherRequest", @@ -329,14 +328,13 @@ impl ReactorEvent for MainEvent { } MainEvent::AddressGossiperCrank(_) => "BeginAddressGossipRequest", MainEvent::ConsensusMessageIncoming(_) => "ConsensusMessageIncoming", - MainEvent::ConsensusDemand(_) => "ConsensusDemand", + MainEvent::ConsensusRequestMessageIncoming(_) => "ConsensusRequestMessageIncoming", MainEvent::DeployGossiperIncoming(_) => "DeployGossiperIncoming", MainEvent::FinalitySignatureGossiperIncoming(_) => "FinalitySignatureGossiperIncoming", MainEvent::AddressGossiperIncoming(_) => "AddressGossiperIncoming", MainEvent::NetworkPeerRequestingData(_) => "NetRequestIncoming", MainEvent::NetworkPeerProvidingData(_) => "NetResponseIncoming", MainEvent::TrieRequestIncoming(_) => "TrieRequestIncoming", - MainEvent::TrieDemand(_) => "TrieDemand", MainEvent::TrieResponseIncoming(_) => "TrieResponseIncoming", MainEvent::FinalitySignatureIncoming(_) => "FinalitySignatureIncoming", MainEvent::ContractRuntime(_) => "ContractRuntime", @@ -393,7 +391,9 @@ impl Display for MainEvent { MainEvent::ContractRuntimeRequest(event) => { write!(f, "contract runtime request: {:?}", event) } - MainEvent::BlockValidator(event) => write!(f, "block validator: {}", event), + MainEvent::ProposedBlockValidator(event) => { + write!(f, "proposed block validator: {}", event) + } MainEvent::BlockHeaderFetcher(event) => { write!(f, "block header fetcher: {}", event) } @@ -477,8 +477,8 @@ impl Display for MainEvent { MainEvent::DeployBufferRequest(req) => { write!(f, "deploy buffer request: {}", req) } - MainEvent::BlockValidatorRequest(req) => { - write!(f, "block validator request: {}", req) + MainEvent::ProposedBlockValidatorRequest(req) => { + write!(f, "proposed block validator request: {}", req) } MainEvent::MetricsRequest(req) => write!(f, "metrics request: {}", req), MainEvent::ControlAnnouncement(ctrl_ann) => write!(f, "control: {}", ctrl_ann), @@ -517,14 +517,13 @@ impl Display for MainEvent { write!(f, "finality signature fetcher announcement: {}", ann) } MainEvent::ConsensusMessageIncoming(inner) => Display::fmt(inner, f), - MainEvent::ConsensusDemand(inner) => Display::fmt(inner, f), + MainEvent::ConsensusRequestMessageIncoming(inner) => Display::fmt(inner, f), MainEvent::DeployGossiperIncoming(inner) => Display::fmt(inner, f), MainEvent::FinalitySignatureGossiperIncoming(inner) => Display::fmt(inner, f), MainEvent::AddressGossiperIncoming(inner) => Display::fmt(inner, f), MainEvent::NetworkPeerRequestingData(inner) => Display::fmt(inner, f), MainEvent::NetworkPeerProvidingData(inner) => Display::fmt(inner, f), MainEvent::TrieRequestIncoming(inner) => Display::fmt(inner, f), - MainEvent::TrieDemand(inner) => Display::fmt(inner, f), MainEvent::TrieResponseIncoming(inner) => Display::fmt(inner, f), MainEvent::FinalitySignatureIncoming(inner) => Display::fmt(inner, f), MainEvent::ContractRuntime(inner) => Display::fmt(inner, f), diff --git a/node/src/reactor/main_reactor/keep_up.rs b/node/src/reactor/main_reactor/keep_up.rs index 2239a6a8df..1ae3a23597 100644 --- a/node/src/reactor/main_reactor/keep_up.rs +++ b/node/src/reactor/main_reactor/keep_up.rs @@ -483,7 +483,7 @@ impl MainReactor { offset: Duration, ) -> KeepUpInstruction { // we get a random sampling of peers to ask. - let peers_to_ask = self.net.fully_connected_peers_random( + let peers_to_ask = self.net.connected_peers_random( rng, self.chainspec.core_config.simultaneous_peer_requests as usize, ); @@ -559,7 +559,7 @@ impl MainReactor { // it is possible that we may get a random sampling that do not have the data // we need, but the synchronizer should (eventually) detect that and ask for // more peers via the NeedNext behavior. - let peers_to_ask = self.net.fully_connected_peers_random( + let peers_to_ask = self.net.connected_peers_random( rng, self.chainspec.core_config.simultaneous_peer_requests as usize, ); diff --git a/node/src/reactor/main_reactor/memory_metrics.rs b/node/src/reactor/main_reactor/memory_metrics.rs index 6aafd47436..aa1e60d869 100644 --- a/node/src/reactor/main_reactor/memory_metrics.rs +++ b/node/src/reactor/main_reactor/memory_metrics.rs @@ -1,135 +1,110 @@ use datasize::DataSize; -use prometheus::{self, Histogram, HistogramOpts, IntGauge, Registry}; +use prometheus::{self, Histogram, IntGauge, Registry}; use tracing::debug; use super::MainReactor; -use crate::unregister_metric; +use crate::utils::registered_metric::{RegisteredMetric, RegistryExt}; /// Metrics for estimated heap memory usage for the main reactor. #[derive(Debug)] pub(super) struct MemoryMetrics { - mem_total: IntGauge, - mem_metrics: IntGauge, - mem_net: IntGauge, - mem_address_gossiper: IntGauge, - mem_storage: IntGauge, - mem_contract_runtime: IntGauge, - mem_rpc_server: IntGauge, - mem_rest_server: IntGauge, - mem_event_stream_server: IntGauge, - mem_consensus: IntGauge, - mem_deploy_gossiper: IntGauge, - mem_finality_signature_gossiper: IntGauge, - mem_block_gossiper: IntGauge, - mem_deploy_buffer: IntGauge, - mem_block_validator: IntGauge, - mem_sync_leaper: IntGauge, - mem_deploy_acceptor: IntGauge, - mem_block_synchronizer: IntGauge, - mem_block_accumulator: IntGauge, - mem_fetchers: IntGauge, - mem_diagnostics_port: IntGauge, - mem_upgrade_watcher: IntGauge, + mem_total: RegisteredMetric, + mem_metrics: RegisteredMetric, + mem_net: RegisteredMetric, + mem_address_gossiper: RegisteredMetric, + mem_storage: RegisteredMetric, + mem_contract_runtime: RegisteredMetric, + mem_rpc_server: RegisteredMetric, + mem_rest_server: RegisteredMetric, + mem_event_stream_server: RegisteredMetric, + mem_consensus: RegisteredMetric, + mem_deploy_gossiper: RegisteredMetric, + mem_finality_signature_gossiper: RegisteredMetric, + mem_block_gossiper: RegisteredMetric, + mem_deploy_buffer: RegisteredMetric, + mem_proposed_block_validator: RegisteredMetric, + mem_sync_leaper: RegisteredMetric, + mem_deploy_acceptor: RegisteredMetric, + mem_block_synchronizer: RegisteredMetric, + mem_block_accumulator: RegisteredMetric, + mem_fetchers: RegisteredMetric, + mem_diagnostics_port: RegisteredMetric, + mem_upgrade_watcher: RegisteredMetric, /// Histogram detailing how long it took to measure memory usage. - mem_estimator_runtime_s: Histogram, - registry: Registry, + mem_estimator_runtime_s: RegisteredMetric, } impl MemoryMetrics { /// Initializes a new set of memory metrics. pub(super) fn new(registry: Registry) -> Result { - let mem_total = IntGauge::new("mem_total", "total memory usage in bytes")?; - let mem_metrics = IntGauge::new("mem_metrics", "metrics memory usage in bytes")?; - let mem_net = IntGauge::new("mem_net", "network memory usage in bytes")?; - let mem_address_gossiper = IntGauge::new( + let mem_total = registry.new_int_gauge("mem_total", "total memory usage in bytes")?; + let mem_metrics = registry.new_int_gauge("mem_metrics", "metrics memory usage in bytes")?; + let mem_net = registry.new_int_gauge("mem_net", "network memory usage in bytes")?; + let mem_address_gossiper = registry.new_int_gauge( "mem_address_gossiper", "address_gossiper memory usage in bytes", )?; - let mem_storage = IntGauge::new("mem_storage", "storage memory usage in bytes")?; - let mem_contract_runtime = IntGauge::new( + let mem_storage = registry.new_int_gauge("mem_storage", "storage memory usage in bytes")?; + let mem_contract_runtime = registry.new_int_gauge( "mem_contract_runtime", "contract runtime memory usage in bytes", )?; - let mem_rpc_server = IntGauge::new("mem_rpc_server", "rpc server memory usage in bytes")?; + let mem_rpc_server = + registry.new_int_gauge("mem_rpc_server", "rpc server memory usage in bytes")?; let mem_rest_server = - IntGauge::new("mem_rest_server", "rest server memory usage in bytes")?; - let mem_event_stream_server = IntGauge::new( + registry.new_int_gauge("mem_rest_server", "rest server memory usage in bytes")?; + let mem_event_stream_server = registry.new_int_gauge( "mem_event_stream_server", "event stream server memory usage in bytes", )?; - let mem_consensus = IntGauge::new("mem_consensus", "consensus memory usage in bytes")?; - let mem_fetchers = IntGauge::new("mem_fetchers", "combined fetcher memory usage in bytes")?; - let mem_deploy_gossiper = IntGauge::new( + let mem_consensus = + registry.new_int_gauge("mem_consensus", "consensus memory usage in bytes")?; + let mem_fetchers = + registry.new_int_gauge("mem_fetchers", "combined fetcher memory usage in bytes")?; + let mem_deploy_gossiper = registry.new_int_gauge( "mem_deploy_gossiper", "deploy gossiper memory usage in bytes", )?; - let mem_finality_signature_gossiper = IntGauge::new( + let mem_finality_signature_gossiper = registry.new_int_gauge( "mem_finality_signature_gossiper", "finality signature gossiper memory usage in bytes", )?; let mem_block_gossiper = - IntGauge::new("mem_block_gossiper", "block gossiper memory usage in bytes")?; + registry.new_int_gauge("mem_block_gossiper", "block gossiper memory usage in bytes")?; let mem_deploy_buffer = - IntGauge::new("mem_deploy_buffer", "deploy buffer memory usage in bytes")?; - let mem_block_validator = IntGauge::new( + registry.new_int_gauge("mem_deploy_buffer", "deploy buffer memory usage in bytes")?; + let mem_proposed_block_validator = registry.new_int_gauge( "mem_block_validator", - "block validator memory usage in bytes", + "proposed block validator memory usage in bytes", )?; let mem_sync_leaper = - IntGauge::new("mem_sync_leaper", "sync leaper memory usage in bytes")?; - let mem_deploy_acceptor = IntGauge::new( + registry.new_int_gauge("mem_sync_leaper", "sync leaper memory usage in bytes")?; + let mem_deploy_acceptor = registry.new_int_gauge( "mem_deploy_acceptor", "deploy acceptor memory usage in bytes", )?; - let mem_block_synchronizer = IntGauge::new( + let mem_block_synchronizer = registry.new_int_gauge( "mem_block_synchronizer", "block synchronizer memory usage in bytes", )?; - let mem_block_accumulator = IntGauge::new( + let mem_block_accumulator = registry.new_int_gauge( "mem_block_accumulator", "block accumulator memory usage in bytes", )?; - let mem_diagnostics_port = IntGauge::new( + let mem_diagnostics_port = registry.new_int_gauge( "mem_diagnostics_port", "diagnostics port memory usage in bytes", )?; - let mem_upgrade_watcher = IntGauge::new( + let mem_upgrade_watcher = registry.new_int_gauge( "mem_upgrade_watcher", "upgrade watcher memory usage in bytes", )?; - let mem_estimator_runtime_s = Histogram::with_opts( - HistogramOpts::new( - "mem_estimator_runtime_s", - "time in seconds to estimate memory usage", - ) - // Create buckets from one nanosecond to eight seconds. - .buckets(prometheus::exponential_buckets(0.000_000_004, 2.0, 32)?), + let mem_estimator_runtime_s = registry.new_histogram( + "mem_estimator_runtime_s", + "time in seconds to estimate memory usage", + prometheus::exponential_buckets(0.000_000_004, 2.0, 32)?, )?; - registry.register(Box::new(mem_total.clone()))?; - registry.register(Box::new(mem_metrics.clone()))?; - registry.register(Box::new(mem_net.clone()))?; - registry.register(Box::new(mem_address_gossiper.clone()))?; - registry.register(Box::new(mem_storage.clone()))?; - registry.register(Box::new(mem_contract_runtime.clone()))?; - registry.register(Box::new(mem_rpc_server.clone()))?; - registry.register(Box::new(mem_rest_server.clone()))?; - registry.register(Box::new(mem_event_stream_server.clone()))?; - registry.register(Box::new(mem_consensus.clone()))?; - registry.register(Box::new(mem_fetchers.clone()))?; - registry.register(Box::new(mem_deploy_gossiper.clone()))?; - registry.register(Box::new(mem_finality_signature_gossiper.clone()))?; - registry.register(Box::new(mem_block_gossiper.clone()))?; - registry.register(Box::new(mem_deploy_buffer.clone()))?; - registry.register(Box::new(mem_block_validator.clone()))?; - registry.register(Box::new(mem_sync_leaper.clone()))?; - registry.register(Box::new(mem_deploy_acceptor.clone()))?; - registry.register(Box::new(mem_block_synchronizer.clone()))?; - registry.register(Box::new(mem_block_accumulator.clone()))?; - registry.register(Box::new(mem_diagnostics_port.clone()))?; - registry.register(Box::new(mem_upgrade_watcher.clone()))?; - registry.register(Box::new(mem_estimator_runtime_s.clone()))?; - Ok(MemoryMetrics { mem_total, mem_metrics, @@ -146,7 +121,7 @@ impl MemoryMetrics { mem_finality_signature_gossiper, mem_block_gossiper, mem_deploy_buffer, - mem_block_validator, + mem_proposed_block_validator, mem_sync_leaper, mem_deploy_acceptor, mem_block_synchronizer, @@ -154,7 +129,6 @@ impl MemoryMetrics { mem_diagnostics_port, mem_upgrade_watcher, mem_estimator_runtime_s, - registry, }) } @@ -177,7 +151,7 @@ impl MemoryMetrics { reactor.finality_signature_gossiper.estimate_heap_size() as i64; let block_gossiper = reactor.block_gossiper.estimate_heap_size() as i64; let deploy_buffer = reactor.deploy_buffer.estimate_heap_size() as i64; - let block_validator = reactor.block_validator.estimate_heap_size() as i64; + let proposed_block_validator = reactor.proposed_block_validator.estimate_heap_size() as i64; let sync_leaper = reactor.sync_leaper.estimate_heap_size() as i64; let deploy_acceptor = reactor.deploy_acceptor.estimate_heap_size() as i64; let block_synchronizer = reactor.block_synchronizer.estimate_heap_size() as i64; @@ -199,7 +173,7 @@ impl MemoryMetrics { + finality_signature_gossiper + block_gossiper + deploy_buffer - + block_validator + + proposed_block_validator + sync_leaper + deploy_acceptor + block_synchronizer @@ -221,7 +195,8 @@ impl MemoryMetrics { .set(finality_signature_gossiper); self.mem_block_gossiper.set(block_gossiper); self.mem_deploy_buffer.set(deploy_buffer); - self.mem_block_validator.set(block_validator); + self.mem_proposed_block_validator + .set(proposed_block_validator); self.mem_sync_leaper.set(sync_leaper); self.mem_deploy_acceptor.set(deploy_acceptor); self.mem_block_synchronizer.set(block_synchronizer); @@ -251,7 +226,7 @@ impl MemoryMetrics { %finality_signature_gossiper, %block_gossiper, %deploy_buffer, - %block_validator, + %proposed_block_validator, %sync_leaper, %deploy_acceptor, %block_synchronizer, @@ -261,32 +236,3 @@ impl MemoryMetrics { "Collected new set of memory metrics."); } } - -impl Drop for MemoryMetrics { - fn drop(&mut self) { - unregister_metric!(self.registry, self.mem_total); - unregister_metric!(self.registry, self.mem_metrics); - unregister_metric!(self.registry, self.mem_estimator_runtime_s); - - unregister_metric!(self.registry, self.mem_net); - unregister_metric!(self.registry, self.mem_address_gossiper); - unregister_metric!(self.registry, self.mem_storage); - unregister_metric!(self.registry, self.mem_contract_runtime); - unregister_metric!(self.registry, self.mem_rpc_server); - unregister_metric!(self.registry, self.mem_rest_server); - unregister_metric!(self.registry, self.mem_event_stream_server); - unregister_metric!(self.registry, self.mem_consensus); - unregister_metric!(self.registry, self.mem_fetchers); - unregister_metric!(self.registry, self.mem_deploy_gossiper); - unregister_metric!(self.registry, self.mem_finality_signature_gossiper); - unregister_metric!(self.registry, self.mem_block_gossiper); - unregister_metric!(self.registry, self.mem_deploy_buffer); - unregister_metric!(self.registry, self.mem_block_validator); - unregister_metric!(self.registry, self.mem_sync_leaper); - unregister_metric!(self.registry, self.mem_deploy_acceptor); - unregister_metric!(self.registry, self.mem_block_synchronizer); - unregister_metric!(self.registry, self.mem_block_accumulator); - unregister_metric!(self.registry, self.mem_diagnostics_port); - unregister_metric!(self.registry, self.mem_upgrade_watcher); - } -} diff --git a/node/src/reactor/main_reactor/tests.rs b/node/src/reactor/main_reactor/tests.rs index f4815d443c..a02fce23d9 100644 --- a/node/src/reactor/main_reactor/tests.rs +++ b/node/src/reactor/main_reactor/tests.rs @@ -1,4 +1,11 @@ -use std::{collections::BTreeMap, iter, net::SocketAddr, str::FromStr, sync::Arc, time::Duration}; +use std::{ + collections::{BTreeMap, HashSet}, + fs, iter, + net::SocketAddr, + str::FromStr, + sync::Arc, + time::Duration, +}; use either::Either; use num::Zero; @@ -22,12 +29,9 @@ use crate::{ }, gossiper, network, storage, upgrade_watcher::NextUpgrade, + ComponentState, }, - effect::{ - incoming::ConsensusMessageIncoming, - requests::{ContractRuntimeRequest, NetworkRequest}, - EffectExt, - }, + effect::{incoming::ConsensusMessageIncoming, requests::ContractRuntimeRequest, EffectExt}, protocol::Message, reactor::{ main_reactor::{Config, MainEvent, MainReactor, ReactorState}, @@ -41,7 +45,7 @@ use crate::{ ActivationPoint, AvailableBlockRange, Block, BlockHash, BlockHeader, BlockPayload, Chainspec, ChainspecRawBytes, Deploy, ExitCode, NodeId, SyncHandling, }, - utils::{External, Loadable, Source, RESOURCES_PATH}, + utils::{extract_metric_names, External, Fuse, Loadable, Source, RESOURCES_PATH}, WithDir, }; @@ -207,6 +211,12 @@ impl TestFixture { fixture } + /// Access the environments RNG. + #[inline(always)] + pub fn rng_mut(&mut self) -> &mut TestRng { + &mut self.rng + } + /// Returns the highest complete block from node 0. /// /// Panics if there is no such block. @@ -436,6 +446,18 @@ impl TestFixture { .await; } } + + #[inline(always)] + pub fn network_mut(&mut self) -> &mut TestingNetwork> { + &mut self.network + } + + pub fn run_until_stopped( + self, + rng: TestRng, + ) -> impl futures::Future>, TestRng)> { + self.network.crank_until_stopped(rng) + } } /// Given a block height and a node id, returns a predicate to check if the lowest available block @@ -726,19 +748,26 @@ async fn run_equivocator_network() { if is_ping(&event) { return Either::Left(time::sleep((min_round_len * 30).into()).event(move |_| event)); } + + // Filter out all incoming and outgoing consensus message traffic. let now = Timestamp::now(); match &event { - MainEvent::ConsensusMessageIncoming(_) => {} - MainEvent::NetworkRequest( - NetworkRequest::SendMessage { payload, .. } - | NetworkRequest::ValidatorBroadcast { payload, .. } - | NetworkRequest::Gossip { payload, .. }, - ) if matches!(**payload, Message::Consensus(_)) => {} - _ => return Either::Right(event), + MainEvent::ConsensusMessageIncoming(_) + | MainEvent::ConsensusRequestMessageIncoming(_) => { + // delayed. + } + MainEvent::NetworkRequest(req) if matches!(req.payload(), Message::Consensus(_)) => { + // delayed + } + _ => { + return Either::Right(event); + } }; let first_message_time = *maybe_first_message_time.get_or_insert(now); if now < first_message_time + min_round_len * 3 { - return Either::Left(time::sleep(min_round_len.into()).event(move |_| event)); + return Either::Left( + time::sleep(Duration::from(min_round_len) * 3).event(move |_| event), + ); } Either::Right(event) }); @@ -751,6 +780,7 @@ async fn run_equivocator_network() { Either::Right(event) }); + assert!(alice_reactors.next().is_none()); drop(alice_reactors); let era_count = 4; @@ -1132,6 +1162,122 @@ async fn empty_block_validation_regression() { } } +#[tokio::test] +#[ignore] // Disabled until fixed, after the issue with `TestFixture` and multiple `TestRng`s was fixed. +async fn all_metrics_from_1_5_are_present() { + testing::init_logging(); + + let mut fixture = TestFixture::new( + InitialStakes::AllEqual { + count: 4, + stake: 100, + }, + None, + ) + .await; + let mut rng = fixture.rng_mut().create_child(); + + let net = fixture.network_mut(); + + net.settle_on_component_state( + &mut rng, + "rest_server", + &ComponentState::Initialized, + Duration::from_secs(59), + ) + .await; + + // Get the node ID. + let node_id = *net.nodes().keys().next().unwrap(); + + let rest_addr = net.nodes()[&node_id] + .main_reactor() + .rest_server + .bind_address(); + + // We let the entire network run in the background, until our request completes. + let finish_cranking = fixture.run_until_stopped(rng); + + let metrics_response = reqwest::Client::builder() + .build() + .expect("failed to build client") + .get(format!("http://localhost:{}/metrics", rest_addr.port())) + .timeout(Duration::from_secs(2)) + .send() + .await + .expect("request failed") + .error_for_status() + .expect("error response on metrics request") + .text() + .await + .expect("error retrieving text on metrics request"); + + let (_net, _rng) = finish_cranking.await; + + let actual = extract_metric_names(&metrics_response); + let raw_1_5 = fs::read_to_string(RESOURCES_PATH.join("metrics-1.5.txt")) + .expect("could not read 1.5 metrics snapshot"); + let metrics_1_5 = extract_metric_names(&raw_1_5); + + let missing: HashSet<_> = metrics_1_5.difference(&actual).collect(); + assert!( + missing.is_empty(), + "missing 1.5 metrics in current metrics set: {:?}", + missing + ); +} + +#[tokio::test] +#[ignore] // Disabled, until the issue with `TestFixture` and multiple `TestRng`s is fixed. +async fn port_bound_components_report_ready() { + testing::init_logging(); + + let mut rng = crate::new_rng(); + + let mut fixture = TestFixture::new( + InitialStakes::AllEqual { + count: 4, + stake: 100, + }, + None, + ) + .await; + let net = fixture.network_mut(); + + // Ensure all `PortBoundComponent` implementors report readiness eventually. + net.settle_on_component_state( + &mut rng, + "rest_server", + &ComponentState::Initialized, + Duration::from_secs(10), + ) + .await; + + net.settle_on_component_state( + &mut rng, + "rpc_server", + &ComponentState::Initialized, + Duration::from_secs(10), + ) + .await; + + net.settle_on_component_state( + &mut rng, + "event_stream_server", + &ComponentState::Initialized, + Duration::from_secs(10), + ) + .await; + + net.settle_on_component_state( + &mut rng, + "diagnostics_port", + &ComponentState::Initialized, + Duration::from_secs(10), + ) + .await; +} + #[tokio::test] async fn network_should_recover_from_stall() { // Set up a network with three nodes. diff --git a/node/src/reactor/queue_kind.rs b/node/src/reactor/queue_kind.rs index 628ccc0ee6..03ac062c0b 100644 --- a/node/src/reactor/queue_kind.rs +++ b/node/src/reactor/queue_kind.rs @@ -4,7 +4,7 @@ //! round-robin manner. This way, events are only competing for time within one queue, non-congested //! queues can always assume to be speedily processed. -use std::{fmt::Display, num::NonZeroUsize}; +use std::num::NonZeroUsize; use enum_iterator::IntoEnumIterator; use serde::Serialize; @@ -13,19 +13,30 @@ use serde::Serialize; /// /// Priorities are ordered from lowest to highest. #[derive( - Copy, Clone, Debug, Eq, PartialEq, Hash, IntoEnumIterator, PartialOrd, Ord, Serialize, Default, + Copy, + Clone, + Debug, + strum::Display, + Eq, + PartialEq, + Hash, + IntoEnumIterator, + PartialOrd, + Ord, + Serialize, + Default, )] pub enum QueueKind { /// Control messages for the runtime itself. Control, - /// Network events that were initiated outside of this node. + /// Incoming message events that were initiated outside of this node. /// - /// Their load may vary and grouping them together in one queue aides DoS protection. - NetworkIncoming, - /// Network events that are low priority. - NetworkLowPriority, - /// Network events demand a resource directly. - NetworkDemand, + /// Their load may vary and grouping them together in one queue aids DoS protection. + MessageIncoming, + /// Incoming messages that are low priority. + MessageLowPriority, + /// Incoming messages from validators. + MessageValidator, /// Network events that were initiated by the local node, such as outgoing messages. Network, /// NetworkInfo events. @@ -60,31 +71,6 @@ pub enum QueueKind { Api, } -impl Display for QueueKind { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let str_value = match self { - QueueKind::Control => "Control", - QueueKind::NetworkIncoming => "NetworkIncoming", - QueueKind::NetworkLowPriority => "NetworkLowPriority", - QueueKind::NetworkDemand => "NetworkDemand", - QueueKind::Network => "Network", - QueueKind::NetworkInfo => "NetworkInfo", - QueueKind::Fetch => "Fetch", - QueueKind::Regular => "Regular", - QueueKind::Gossip => "Gossip", - QueueKind::FromStorage => "FromStorage", - QueueKind::ToStorage => "ToStorage", - QueueKind::ContractRuntime => "ContractRuntime", - QueueKind::SyncGlobalState => "SyncGlobalState", - QueueKind::FinalitySignature => "FinalitySignature", - QueueKind::Consensus => "Consensus", - QueueKind::Validation => "Validation", - QueueKind::Api => "Api", - }; - write!(f, "{}", str_value) - } -} - impl QueueKind { /// Returns the weight of a specific queue. /// @@ -92,10 +78,10 @@ impl QueueKind { /// each event processing round. fn weight(self) -> NonZeroUsize { NonZeroUsize::new(match self { - QueueKind::NetworkLowPriority => 1, + QueueKind::MessageLowPriority => 1, QueueKind::NetworkInfo => 2, - QueueKind::NetworkDemand => 2, - QueueKind::NetworkIncoming => 8, + QueueKind::MessageIncoming => 4, + QueueKind::MessageValidator => 8, QueueKind::Network => 4, QueueKind::Regular => 4, QueueKind::Fetch => 4, @@ -124,9 +110,9 @@ impl QueueKind { pub(crate) fn metrics_name(&self) -> &str { match self { QueueKind::Control => "control", - QueueKind::NetworkIncoming => "network_incoming", - QueueKind::NetworkDemand => "network_demands", - QueueKind::NetworkLowPriority => "network_low_priority", + QueueKind::MessageIncoming => "message_incoming", + QueueKind::MessageLowPriority => "message_low_priority", + QueueKind::MessageValidator => "message_validator", QueueKind::Network => "network", QueueKind::NetworkInfo => "network_info", QueueKind::SyncGlobalState => "sync_global_state", diff --git a/node/src/testing/condition_check_reactor.rs b/node/src/testing/condition_check_reactor.rs index af2e0a0fb2..017f26ecb8 100644 --- a/node/src/testing/condition_check_reactor.rs +++ b/node/src/testing/condition_check_reactor.rs @@ -103,6 +103,10 @@ impl Reactor for ConditionCheckReactor { } self.reactor.dispatch_event(effect_builder, rng, event) } + + fn get_component_state(&self, name: &str) -> Option<&crate::components::ComponentState> { + self.inner().get_component_state(name) + } } impl Finalize for ConditionCheckReactor { diff --git a/node/src/testing/fake_deploy_acceptor.rs b/node/src/testing/fake_deploy_acceptor.rs index 5cef41dcb1..10d21c47b7 100644 --- a/node/src/testing/fake_deploy_acceptor.rs +++ b/node/src/testing/fake_deploy_acceptor.rs @@ -119,6 +119,7 @@ impl Component for FakeDeployAcceptor { deploy, source, maybe_responder, + ticket: _, // not handled in tests. } => self.accept(effect_builder, deploy, source, maybe_responder), Event::PutToStorageResult { event_metadata, diff --git a/node/src/testing/filter_reactor.rs b/node/src/testing/filter_reactor.rs index 091040bcbf..c9a068cac9 100644 --- a/node/src/testing/filter_reactor.rs +++ b/node/src/testing/filter_reactor.rs @@ -84,6 +84,10 @@ impl Reactor for FilterReactor { Either::Right(event) => self.reactor.dispatch_event(effect_builder, rng, event), } } + + fn get_component_state(&self, name: &str) -> Option<&crate::components::ComponentState> { + self.inner().get_component_state(name) + } } impl Finalize for FilterReactor { diff --git a/node/src/testing/network.rs b/node/src/testing/network.rs index 320acdcd09..ee89f1d318 100644 --- a/node/src/testing/network.rs +++ b/node/src/testing/network.rs @@ -4,7 +4,10 @@ use std::{ collections::{hash_map::Entry, HashMap}, fmt::Debug, mem, - sync::Arc, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, time::Duration, }; @@ -12,13 +15,14 @@ use fake_instant::FakeClock as Instant; use futures::future::{BoxFuture, FutureExt}; use serde::Serialize; use tokio::time::{self, error::Elapsed}; -use tracing::{debug, error_span}; +use tracing::{debug, error_span, field, Span}; use tracing_futures::Instrument; use casper_types::testing::TestRng; use super::ConditionCheckReactor; use crate::{ + components::ComponentState, effect::{EffectBuilder, Effects}, reactor::{Finalize, Reactor, Runner, TryCrankOutcome}, tls::KeyFingerprint, @@ -30,7 +34,7 @@ use crate::{ /// Type alias for set of nodes inside a network. /// /// Provided as a convenience for writing condition functions for `settle_on` and friends. -pub(crate) type Nodes = HashMap>>; +pub(crate) type Nodes = HashMap>>>; /// A reactor with networking functionality. /// @@ -60,7 +64,9 @@ const POLL_INTERVAL: Duration = Duration::from_millis(10); #[derive(Debug, Default)] pub(crate) struct TestingNetwork { /// Current network. - nodes: HashMap>>, + nodes: HashMap>>>, + /// Mapping of node IDs to spans. + spans: HashMap, } impl TestingNetwork @@ -69,7 +75,7 @@ where R::Config: Default, ::Error: Debug, R::Event: Serialize, - R::Error: From, + R::Error: From + Send, { /// Creates a new networking node on the network using the default root node port. /// @@ -99,12 +105,13 @@ impl TestingNetwork where R: Reactor + NetworkedReactor, R::Event: Serialize, - R::Error: From + From, + R::Error: From + From + Send, { /// Creates a new network. pub(crate) fn new() -> Self { TestingNetwork { nodes: HashMap::new(), + spans: HashMap::new(), } } @@ -141,10 +148,17 @@ where chainspec_raw_bytes: Arc, rng: &'b mut NodeRng, ) -> Result<(NodeId, &mut Runner>), R::Error> { - let runner: Runner> = - Runner::new(cfg, chainspec, chainspec_raw_bytes, rng).await?; + let node_idx = self.nodes.len(); + let span = error_span!("node", node_idx, node_id = field::Empty); + let runner: Box>> = Box::new( + Runner::new(cfg, chainspec, chainspec_raw_bytes, rng) + .instrument(span.clone()) + .await?, + ); let node_id = runner.reactor().node_id(); + span.record("node_id", field::display(node_id)); + self.spans.insert(node_id, span.clone()); let node_ref = match self.nodes.entry(node_id) { Entry::Occupied(_) => { @@ -162,7 +176,7 @@ where pub(crate) fn remove_node( &mut self, node_id: &NodeId, - ) -> Option>> { + ) -> Option>>> { self.nodes.remove(node_id) } @@ -170,10 +184,9 @@ where pub(crate) async fn crank(&mut self, node_id: &NodeId, rng: &mut TestRng) -> TryCrankOutcome { let runner = self.nodes.get_mut(node_id).expect("should find node"); let node_id = runner.reactor().node_id(); - runner - .try_crank(rng) - .instrument(error_span!("crank", node_id = %node_id)) - .await + let span = self.spans.get(&node_id).expect("should find span"); + + runner.try_crank(rng).instrument(span.clone()).await } /// Crank only the specified runner until `condition` is true or until `within` has elapsed. @@ -204,11 +217,9 @@ where let mut event_count = 0; for node in self.nodes.values_mut() { let node_id = node.reactor().node_id(); - match node - .try_crank(rng) - .instrument(error_span!("crank", node_id = %node_id)) - .await - { + let span = self.spans.get(&node_id).expect("span disappeared").clone(); + + match node.try_crank(rng).instrument(span).await { TryCrankOutcome::NoEventsToProcess => (), TryCrankOutcome::ProcessedAnEvent => event_count += 1, TryCrankOutcome::ShouldExit(exit_code) => { @@ -346,6 +357,10 @@ where /// Panics if the `condition` is not reached inside of `within`, or if any node returns an exit /// code. /// + /// If the `condition` is not reached inside of `within`, panics. + // Note: `track_caller` will not have an effect until + // is fixed. + // #[track_caller] /// To settle on an exit code, use `settle_on_exit` instead. pub(crate) async fn settle_on(&mut self, rng: &mut TestRng, condition: F, within: Duration) where @@ -361,6 +376,7 @@ where }) } + // #[track_caller] async fn settle_on_indefinitely(&mut self, rng: &mut TestRng, condition: F) where F: Fn(&Nodes) -> bool, @@ -394,6 +410,64 @@ where .unwrap_or_else(|_| panic!("network did not settle on condition within {:?}", within)) } + /// Keeps cranking the network until every reactor's specified component is in the given state. + /// + /// # Panics + /// + /// Panics if any reactor returns `None` on its [`Reactor::get_component_state()`] call. + pub(crate) async fn settle_on_component_state( + &mut self, + rng: &mut TestRng, + name: &str, + state: &ComponentState, + timeout: Duration, + ) { + self.settle_on( + rng, + |net| { + net.values() + .all(|runner| match runner.reactor().get_component_state(name) { + Some(actual_state) => actual_state == state, + None => panic!("unknown or unsupported component: {}", name), + }) + }, + timeout, + ) + .await; + } + + /// Starts a background process that will crank all nodes until stopped. + /// + /// Returns a future that will, once polled, stop all cranking and return the network and the + /// the random number generator. Note that the stop command will be sent as soon as the returned + /// future is polled (awaited), but no sooner. + pub(crate) fn crank_until_stopped( + mut self, + mut rng: TestRng, + ) -> impl futures::Future + where + R: Send + 'static, + { + let stop = Arc::new(AtomicBool::new(false)); + let handle = tokio::spawn({ + let stop = stop.clone(); + async move { + while !stop.load(Ordering::Relaxed) { + if self.crank_all(&mut rng).await == 0 { + time::sleep(POLL_INTERVAL).await; + }; + } + (self, rng) + } + }); + + async move { + // Trigger the background process stop. + stop.store(true, Ordering::Relaxed); + handle.await.expect("failed to join background crank") + } + } + async fn settle_on_exit_indefinitely(&mut self, rng: &mut TestRng, expected: ExitCode) { let mut exited_as_expected = 0; loop { @@ -435,12 +509,14 @@ where } /// Returns the internal map of nodes. - pub(crate) fn nodes(&self) -> &HashMap>> { + pub(crate) fn nodes(&self) -> &HashMap>>> { &self.nodes } /// Returns the internal map of nodes, mutable. - pub(crate) fn nodes_mut(&mut self) -> &mut HashMap>> { + pub(crate) fn nodes_mut( + &mut self, + ) -> &mut HashMap>>> { &mut self.nodes } @@ -448,7 +524,7 @@ where pub(crate) fn runners_mut( &mut self, ) -> impl Iterator>> { - self.nodes.values_mut() + self.nodes.values_mut().map(|bx| &mut **bx) } /// Returns an iterator over all reactors, mutable. @@ -481,7 +557,7 @@ impl Finalize for TestingNetwork where R: Finalize + NetworkedReactor + Reactor + Send + 'static, R::Event: Serialize + Send + Sync, - R::Error: From, + R::Error: From + Send, { fn finalize(self) -> BoxFuture<'static, ()> { // We support finalizing networks where the reactor itself can be finalized. diff --git a/node/src/tls.rs b/node/src/tls.rs index cad3f18468..81cb27a040 100644 --- a/node/src/tls.rs +++ b/node/src/tls.rs @@ -55,6 +55,8 @@ use rand::{ use serde::{Deserialize, Serialize}; use thiserror::Error; +use crate::utils::LockedLineWriter; + // This is inside a private module so that the generated `BigArray` does not form part of this // crate's public API, and hence also doesn't appear in the rustdocs. mod big_array { @@ -215,6 +217,7 @@ impl TlsCert { } /// Returns the public key fingerprint. + #[inline(always)] pub(crate) fn public_key_fingerprint(&self) -> KeyFingerprint { self.key_fingerprint } @@ -320,9 +323,10 @@ pub fn generate_node_cert() -> SslResult<(X509, PKey)> { pub(crate) fn create_tls_acceptor( cert: &X509Ref, private_key: &PKeyRef, + keylog: Option, ) -> SslResult { let mut builder = SslAcceptor::mozilla_modern_v5(SslMethod::tls_server())?; - set_context_options(&mut builder, cert, private_key)?; + set_context_options(&mut builder, cert, private_key, keylog)?; Ok(builder.build()) } @@ -334,9 +338,10 @@ pub(crate) fn create_tls_acceptor( pub(crate) fn create_tls_connector( cert: &X509Ref, private_key: &PKeyRef, + keylog: Option, ) -> SslResult { let mut builder = SslConnector::builder(SslMethod::tls_client())?; - set_context_options(&mut builder, cert, private_key)?; + set_context_options(&mut builder, cert, private_key, keylog)?; Ok(builder.build()) } @@ -348,6 +353,7 @@ fn set_context_options( ctx: &mut SslContextBuilder, cert: &X509Ref, private_key: &PKeyRef, + keylog: Option, ) -> SslResult<()> { ctx.set_min_proto_version(Some(SslVersion::TLS1_3))?; @@ -361,6 +367,14 @@ fn set_context_options( // handshake has completed. ctx.set_verify_callback(SslVerifyMode::PEER, |_, _| true); + if let Some(writer) = keylog { + ctx.set_keylog_callback(move |_ssl_ref, str| { + let mut line = str.to_owned(); + line.push('\n'); + writer.write_line(&line); + }); + } + Ok(()) } diff --git a/node/src/types/appendable_block.rs b/node/src/types/appendable_block.rs index 72da119c5b..ae29108e11 100644 --- a/node/src/types/appendable_block.rs +++ b/node/src/types/appendable_block.rs @@ -6,6 +6,7 @@ use std::{ use casper_types::{Gas, PublicKey, TimeDiff, Timestamp}; use datasize::DataSize; use num_traits::Zero; +use serde::Serialize; use thiserror::Error; use crate::types::{ @@ -15,8 +16,8 @@ use crate::types::{ const NO_LEEWAY: TimeDiff = TimeDiff::from_millis(0); -#[derive(Debug, Error)] -pub(crate) enum AddError { +#[derive(Copy, Clone, DataSize, Debug, Error, Serialize)] +pub enum AddError { #[error("would exceed maximum transfer count per block")] TransferCount, #[error("would exceed maximum deploy count per block")] diff --git a/node/src/types/block.rs b/node/src/types/block.rs index 13b08b00fa..bbe0fcd3ac 100644 --- a/node/src/types/block.rs +++ b/node/src/types/block.rs @@ -827,120 +827,6 @@ pub struct BlockHeader { block_hash: OnceCell, } -pub(crate) mod specimen_support { - use crate::utils::specimen::{ - btree_map_distinct_from_prop, Cache, LargestSpecimen, SizeEstimator, - }; - - use super::{ - BlockExecutionResultsOrChunk, BlockExecutionResultsOrChunkId, BlockHeader, - BlockHeaderWithMetadata, BlockSignatures, EraEnd, - }; - use once_cell::sync::OnceCell; - - /// A wrapper around `BlockHeader` that implements `LargestSpecimen` without including the era - /// end. - pub(crate) struct BlockHeaderWithoutEraEnd(BlockHeader); - - impl BlockHeaderWithoutEraEnd { - /// Unwraps the inner `BlockHeader`. - pub(crate) fn into_inner(self) -> BlockHeader { - self.0 - } - } - - impl LargestSpecimen for BlockHeaderWithoutEraEnd { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - BlockHeaderWithoutEraEnd(BlockHeader { - parent_hash: LargestSpecimen::largest_specimen(estimator, cache), - state_root_hash: LargestSpecimen::largest_specimen(estimator, cache), - body_hash: LargestSpecimen::largest_specimen(estimator, cache), - random_bit: LargestSpecimen::largest_specimen(estimator, cache), - accumulated_seed: LargestSpecimen::largest_specimen(estimator, cache), - era_end: None, - timestamp: LargestSpecimen::largest_specimen(estimator, cache), - era_id: LargestSpecimen::largest_specimen(estimator, cache), - height: LargestSpecimen::largest_specimen(estimator, cache), - protocol_version: LargestSpecimen::largest_specimen(estimator, cache), - block_hash: OnceCell::with_value(LargestSpecimen::largest_specimen( - estimator, cache, - )), - }) - } - } - - impl LargestSpecimen for BlockHeader { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - BlockHeader { - parent_hash: LargestSpecimen::largest_specimen(estimator, cache), - state_root_hash: LargestSpecimen::largest_specimen(estimator, cache), - body_hash: LargestSpecimen::largest_specimen(estimator, cache), - random_bit: LargestSpecimen::largest_specimen(estimator, cache), - accumulated_seed: LargestSpecimen::largest_specimen(estimator, cache), - era_end: LargestSpecimen::largest_specimen(estimator, cache), - timestamp: LargestSpecimen::largest_specimen(estimator, cache), - era_id: LargestSpecimen::largest_specimen(estimator, cache), - height: LargestSpecimen::largest_specimen(estimator, cache), - protocol_version: LargestSpecimen::largest_specimen(estimator, cache), - block_hash: OnceCell::with_value(LargestSpecimen::largest_specimen( - estimator, cache, - )), - } - } - } - - impl LargestSpecimen for EraEnd { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - EraEnd { - era_report: LargestSpecimen::largest_specimen(estimator, cache), - next_era_validator_weights: btree_map_distinct_from_prop( - estimator, - "validator_count", - cache, - ), - } - } - } - - impl LargestSpecimen for BlockExecutionResultsOrChunkId { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - BlockExecutionResultsOrChunkId { - chunk_index: u64::MAX, - block_hash: LargestSpecimen::largest_specimen(estimator, cache), - } - } - } - - impl LargestSpecimen for BlockHeaderWithMetadata { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - BlockHeaderWithMetadata { - block_header: LargestSpecimen::largest_specimen(estimator, cache), - block_signatures: LargestSpecimen::largest_specimen(estimator, cache), - } - } - } - - impl LargestSpecimen for BlockSignatures { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - BlockSignatures { - block_hash: LargestSpecimen::largest_specimen(estimator, cache), - era_id: LargestSpecimen::largest_specimen(estimator, cache), - proofs: btree_map_distinct_from_prop(estimator, "validator_count", cache), - } - } - } - - impl LargestSpecimen for BlockExecutionResultsOrChunk { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - BlockExecutionResultsOrChunk { - block_hash: LargestSpecimen::largest_specimen(estimator, cache), - value: LargestSpecimen::largest_specimen(estimator, cache), - is_valid: OnceCell::with_value(Ok(true)), - } - } - } -} - impl BlockHeader { /// The parent block's hash. pub fn parent_hash(&self) -> &BlockHash { diff --git a/node/src/types/block/approvals_hashes.rs b/node/src/types/block/approvals_hashes.rs index 78423be79c..7ed379966e 100644 --- a/node/src/types/block/approvals_hashes.rs +++ b/node/src/types/block/approvals_hashes.rs @@ -168,80 +168,3 @@ pub(crate) enum ApprovalsHashesValidationError { value_in_proof: Digest, }, } - -mod specimen_support { - use crate::{ - contract_runtime::{APPROVALS_CHECKSUM_NAME, EXECUTION_RESULTS_CHECKSUM_NAME}, - utils::specimen::{ - largest_variant, vec_of_largest_specimen, vec_prop_specimen, Cache, LargestSpecimen, - SizeEstimator, - }, - }; - - use super::ApprovalsHashes; - use casper_execution_engine::storage::trie::{ - merkle_proof::{TrieMerkleProof, TrieMerkleProofStep}, - Pointer, - }; - use casper_hashing::Digest; - use casper_types::{bytesrepr::Bytes, CLValue, Key, StoredValue}; - use once_cell::sync::OnceCell; - use std::collections::BTreeMap; - - impl LargestSpecimen for ApprovalsHashes { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - let data = { - let mut map = BTreeMap::new(); - map.insert( - APPROVALS_CHECKSUM_NAME, - Digest::largest_specimen(estimator, cache), - ); - map.insert( - EXECUTION_RESULTS_CHECKSUM_NAME, - Digest::largest_specimen(estimator, cache), - ); - map - }; - let merkle_proof_approvals = TrieMerkleProof::new( - Key::ChecksumRegistry, - StoredValue::CLValue(CLValue::from_t(data).expect("a correct cl value")), - // 2^64/2^13 = 2^51, so 51 items: - vec_of_largest_specimen(estimator, 51, cache).into(), - ); - ApprovalsHashes { - block_hash: LargestSpecimen::largest_specimen(estimator, cache), - approvals_hashes: vec_prop_specimen(estimator, "approvals_hashes", cache), - merkle_proof_approvals, - is_verified: OnceCell::with_value(Ok(())), // Not serialized, so we do not care - } - } - } - - impl LargestSpecimen for TrieMerkleProofStep { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - #[derive(strum::EnumIter)] - enum TrieMerkleProofStepDiscriminants { - Node, - Extension, - } - - largest_variant(estimator, |variant| match variant { - TrieMerkleProofStepDiscriminants::Node => TrieMerkleProofStep::Node { - hole_index: u8::MAX, - indexed_pointers_with_hole: vec![ - ( - u8::MAX, - Pointer::LeafPointer(LargestSpecimen::largest_specimen( - estimator, cache - )) - ); - estimator.parameter("max_pointer_per_node") - ], - }, - TrieMerkleProofStepDiscriminants::Extension => TrieMerkleProofStep::Extension { - affix: Bytes::from(vec![u8::MAX; Key::max_serialized_length()]), - }, - }) - } - } -} diff --git a/node/src/types/block/meta_block/merge_mismatch_error.rs b/node/src/types/block/meta_block/merge_mismatch_error.rs index a2de312222..345e6e59d2 100644 --- a/node/src/types/block/meta_block/merge_mismatch_error.rs +++ b/node/src/types/block/meta_block/merge_mismatch_error.rs @@ -1,7 +1,7 @@ use thiserror::Error; use tracing::error; -#[derive(Error, Debug)] +#[derive(Clone, Copy, Error, Debug)] pub(crate) enum MergeMismatchError { #[error("block mismatch when merging meta blocks")] Block, diff --git a/node/src/types/chainspec.rs b/node/src/types/chainspec.rs index c058ea1981..08e1f9cf30 100644 --- a/node/src/types/chainspec.rs +++ b/node/src/types/chainspec.rs @@ -45,17 +45,14 @@ pub use self::{ error::Error, global_state_update::GlobalStateUpdate, highway_config::{HighwayConfig, PerformanceMeterConfig}, - network_config::NetworkConfig, + network_config::{JulietConfig, NetworkConfig}, protocol_config::ProtocolConfig, }; -use crate::{components::network::generate_largest_serialized_message, utils::Loadable}; +use crate::utils::Loadable; /// The name of the chainspec file on disk. pub const CHAINSPEC_FILENAME: &str = "chainspec.toml"; -// Additional overhead accounted for (eg. lower level networking packet encapsulation). -const CHAINSPEC_NETWORK_MESSAGE_SAFETY_MARGIN: usize = 256; - /// A collection of configuration settings describing the state of the system at genesis and after /// upgrades to basic system functionality occurring after genesis. #[derive(DataSize, PartialEq, Eq, Serialize, Debug)] @@ -94,18 +91,6 @@ impl Chainspec { #[tracing::instrument(ret, level = "info", skip(self), fields(hash=%self.hash()))] pub fn is_valid(&self) -> bool { info!("begin chainspec validation"); - // Ensure the size of the largest message generated under these chainspec settings does not - // exceed the configured message size limit. - let serialized = generate_largest_serialized_message(self); - - if serialized.len() + CHAINSPEC_NETWORK_MESSAGE_SAFETY_MARGIN - > self.network_config.maximum_net_message_size as usize - { - warn!(calculated_length=serialized.len(), configured_maximum=self.network_config.maximum_net_message_size, - "config value [network][maximum_net_message_size] is too small to accomodate the maximum message size", - ); - return false; - } if self.core_config.unbonding_delay <= self.core_config.auction_delay { warn!( diff --git a/node/src/types/chainspec/network_config.rs b/node/src/types/chainspec/network_config.rs index 547ff743a2..c879a0d269 100644 --- a/node/src/types/chainspec/network_config.rs +++ b/node/src/types/chainspec/network_config.rs @@ -1,12 +1,15 @@ use datasize::DataSize; +use juliet::ChannelConfiguration; #[cfg(test)] use rand::Rng; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use casper_types::bytesrepr::{self, FromBytes, ToBytes}; #[cfg(test)] use casper_types::testing::TestRng; +use crate::components::network::PerChannel; + use super::AccountsConfig; /// Configuration values associated with the network. @@ -14,26 +17,56 @@ use super::AccountsConfig; pub struct NetworkConfig { /// The network name. pub name: String, - /// The maximum size of an accepted network message, in bytes. - pub maximum_net_message_size: u32, + /// The maximum size of an accepted handshake network message, in bytes. + pub maximum_handshake_message_size: u32, + /// The maximum frame size for network transport. + pub maximum_frame_size: u32, /// Validator accounts specified in the chainspec. // Note: `accounts_config` must be the last field on this struct due to issues in the TOML // crate - see . pub accounts_config: AccountsConfig, + /// Low level configuration. + pub networking_config: PerChannel, +} + +/// Low-level configuration for the Juliet crate. +#[derive(Debug, Clone, Copy, PartialEq, Eq, DataSize, Serialize, Deserialize)] +pub struct JulietConfig { + /// Sets a limit for channels. + pub in_flight_limit: u16, // order of magnitude: 10-50 + /// The maximum size of a request payload on this channel. + pub maximum_request_payload_size: u32, + /// The maximum size of a response payload on this channel. + pub maximum_response_payload_size: u32, +} + +impl Default for PerChannel { + fn default() -> Self { + //TODO figure out the right values: + PerChannel::init_with(|_| JulietConfig { + in_flight_limit: 25, + maximum_request_payload_size: 24 * 1024 * 1024, + maximum_response_payload_size: 0, + }) + } } #[cfg(test)] impl NetworkConfig { - /// Generates a random instance using a `TestRng`. + /// Generates a random instance for fuzzy testing using a `TestRng`. pub fn random(rng: &mut TestRng) -> Self { let name = rng.gen::().to_string(); - let maximum_net_message_size = 4 + rng.gen_range(0..4); + let maximum_handshake_message_size = 4 + rng.gen_range(0..4); + let maximum_frame_size = 16 + rng.gen_range(0..16); let accounts_config = AccountsConfig::random(rng); + let networking_config = PerChannel::init_with(|_| JulietConfig::random(rng)); NetworkConfig { name, - maximum_net_message_size, + maximum_handshake_message_size, accounts_config, + networking_config, + maximum_frame_size, } } } @@ -41,33 +74,126 @@ impl NetworkConfig { impl ToBytes for NetworkConfig { fn to_bytes(&self) -> Result, bytesrepr::Error> { let mut buffer = bytesrepr::allocate_buffer(self)?; - buffer.extend(self.name.to_bytes()?); - buffer.extend(self.accounts_config.to_bytes()?); - buffer.extend(self.maximum_net_message_size.to_bytes()?); + let Self { + name, + maximum_handshake_message_size, + accounts_config, + networking_config, + maximum_frame_size, + } = self; + + buffer.extend(name.to_bytes()?); + buffer.extend(maximum_handshake_message_size.to_bytes()?); + buffer.extend(accounts_config.to_bytes()?); + buffer.extend(networking_config.to_bytes()?); + buffer.extend(maximum_frame_size.to_bytes()?); Ok(buffer) } fn serialized_length(&self) -> usize { - self.name.serialized_length() - + self.accounts_config.serialized_length() - + self.maximum_net_message_size.serialized_length() + let Self { + name, + maximum_handshake_message_size, + accounts_config, + networking_config, + maximum_frame_size, + } = self; + + name.serialized_length() + + maximum_handshake_message_size.serialized_length() + + accounts_config.serialized_length() + + networking_config.serialized_length() + + maximum_frame_size.serialized_length() } } impl FromBytes for NetworkConfig { fn from_bytes(bytes: &[u8]) -> Result<(Self, &[u8]), bytesrepr::Error> { let (name, remainder) = String::from_bytes(bytes)?; + let (maximum_handshake_message_size, remainder) = FromBytes::from_bytes(remainder)?; let (accounts_config, remainder) = FromBytes::from_bytes(remainder)?; - let (maximum_net_message_size, remainder) = FromBytes::from_bytes(remainder)?; + let (networking_config, remainder) = FromBytes::from_bytes(remainder)?; + let (maximum_frame_size, remainder) = FromBytes::from_bytes(remainder)?; + let config = NetworkConfig { name, - maximum_net_message_size, + maximum_handshake_message_size, accounts_config, + networking_config, + maximum_frame_size, }; Ok((config, remainder)) } } +#[cfg(test)] +impl JulietConfig { + /// Generates a random instance using a `TestRng`. + pub fn random(rng: &mut TestRng) -> Self { + let in_flight_limit = rng.gen_range(2..50); + let maximum_request_payload_size = rng.gen_range(1024 * 1024..24 * 1024 * 1024); + let maximum_response_payload_size = rng.gen_range(0..32); + + Self { + in_flight_limit, + maximum_request_payload_size, + maximum_response_payload_size, + } + } +} + +impl ToBytes for JulietConfig { + fn to_bytes(&self) -> Result, bytesrepr::Error> { + let mut buffer = bytesrepr::allocate_buffer(self)?; + let Self { + in_flight_limit, + maximum_request_payload_size, + maximum_response_payload_size, + } = self; + + buffer.extend(in_flight_limit.to_bytes()?); + buffer.extend(maximum_request_payload_size.to_bytes()?); + buffer.extend(maximum_response_payload_size.to_bytes()?); + Ok(buffer) + } + + fn serialized_length(&self) -> usize { + let Self { + in_flight_limit, + maximum_request_payload_size, + maximum_response_payload_size, + } = self; + + in_flight_limit.serialized_length() + + maximum_request_payload_size.serialized_length() + + maximum_response_payload_size.serialized_length() + } +} + +impl FromBytes for JulietConfig { + fn from_bytes(bytes: &[u8]) -> Result<(Self, &[u8]), bytesrepr::Error> { + let (in_flight_limit, remainder) = FromBytes::from_bytes(bytes)?; + let (maximum_request_payload_size, remainder) = FromBytes::from_bytes(remainder)?; + let (maximum_response_payload_size, remainder) = FromBytes::from_bytes(remainder)?; + + let config = Self { + in_flight_limit, + maximum_request_payload_size, + maximum_response_payload_size, + }; + Ok((config, remainder)) + } +} + +impl From for ChannelConfiguration { + fn from(juliet_config: JulietConfig) -> Self { + ChannelConfiguration::new() + .with_request_limit(juliet_config.in_flight_limit) + .with_max_request_payload_size(juliet_config.maximum_request_payload_size) + .with_max_response_payload_size(juliet_config.maximum_response_payload_size) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/node/src/types/chainspec/parse_toml.rs b/node/src/types/chainspec/parse_toml.rs index 5851433bb8..c2927eb148 100644 --- a/node/src/types/chainspec/parse_toml.rs +++ b/node/src/types/chainspec/parse_toml.rs @@ -13,10 +13,12 @@ use serde::{Deserialize, Serialize}; use casper_execution_engine::shared::{system_config::SystemConfig, wasm_config::WasmConfig}; use casper_types::{bytesrepr::Bytes, file_utils, ProtocolVersion}; +use crate::components::network::PerChannel; + use super::{ accounts_config::AccountsConfig, global_state_update::GlobalStateUpdateConfig, ActivationPoint, Chainspec, ChainspecRawBytes, CoreConfig, DeployConfig, Error, GlobalStateUpdate, - HighwayConfig, NetworkConfig, ProtocolConfig, + HighwayConfig, JulietConfig, NetworkConfig, ProtocolConfig, }; #[derive(PartialEq, Eq, Serialize, Deserialize, Debug)] @@ -24,7 +26,9 @@ use super::{ #[serde(deny_unknown_fields)] struct TomlNetwork { name: String, - maximum_net_message_size: u32, + maximum_handshake_message_size: u32, + maximum_frame_size: u32, + networking_config: PerChannel, } #[derive(PartialEq, Eq, Serialize, Deserialize, Debug)] @@ -59,7 +63,9 @@ impl From<&Chainspec> for TomlChainspec { }; let network = TomlNetwork { name: chainspec.network_config.name.clone(), - maximum_net_message_size: chainspec.network_config.maximum_net_message_size, + maximum_handshake_message_size: chainspec.network_config.maximum_handshake_message_size, + networking_config: chainspec.network_config.networking_config, + maximum_frame_size: chainspec.network_config.maximum_frame_size, }; let core = chainspec.core_config.clone(); @@ -98,7 +104,9 @@ pub(super) fn parse_toml>( let network_config = NetworkConfig { name: toml_chainspec.network.name, accounts_config, - maximum_net_message_size: toml_chainspec.network.maximum_net_message_size, + maximum_handshake_message_size: toml_chainspec.network.maximum_handshake_message_size, + networking_config: toml_chainspec.network.networking_config, + maximum_frame_size: toml_chainspec.network.maximum_frame_size, }; // global_state_update.toml must live in the same directory as chainspec.toml. diff --git a/node/src/types/deploy/approval.rs b/node/src/types/deploy/approval.rs index 46db58f654..2ad1d090a8 100644 --- a/node/src/types/deploy/approval.rs +++ b/node/src/types/deploy/approval.rs @@ -84,38 +84,3 @@ impl FromBytes for Approval { Ok((approval, remainder)) } } - -mod specimen_support { - use std::collections::BTreeSet; - - use casper_types::PublicKey; - - use crate::utils::specimen::{Cache, LargeUniqueSequence, LargestSpecimen, SizeEstimator}; - - use super::Approval; - - impl LargestSpecimen for Approval { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - Approval { - signer: LargestSpecimen::largest_specimen(estimator, cache), - signature: LargestSpecimen::largest_specimen(estimator, cache), - } - } - } - - impl LargeUniqueSequence for Approval - where - Self: Sized + Ord, - E: SizeEstimator, - { - fn large_unique_sequence(estimator: &E, count: usize, cache: &mut Cache) -> BTreeSet { - PublicKey::large_unique_sequence(estimator, count, cache) - .into_iter() - .map(|public_key| Approval { - signer: public_key, - signature: LargestSpecimen::largest_specimen(estimator, cache), - }) - .collect() - } - } -} diff --git a/node/src/types/deploy/deploy_hash.rs b/node/src/types/deploy/deploy_hash.rs index b380c3d11e..15d830991c 100644 --- a/node/src/types/deploy/deploy_hash.rs +++ b/node/src/types/deploy/deploy_hash.rs @@ -106,18 +106,6 @@ impl FromBytes for DeployHash { } } -mod specimen_support { - use crate::utils::specimen::{Cache, LargestSpecimen, SizeEstimator}; - - use super::DeployHash; - - impl LargestSpecimen for DeployHash { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - DeployHash::new(LargestSpecimen::largest_specimen(estimator, cache)) - } - } -} - #[cfg(test)] mod tests { use super::*; diff --git a/node/src/types/deploy/error.rs b/node/src/types/deploy/error.rs index f70ea6d676..6f64484038 100644 --- a/node/src/types/deploy/error.rs +++ b/node/src/types/deploy/error.rs @@ -156,7 +156,7 @@ pub enum Error { /// Error while decoding from JSON. #[error("decoding from JSON: {0}")] - DecodeFromJson(Box), + DecodeFromJson(Box), /// Failed to get "amount" from `payment()`'s runtime args. #[error("invalid payment: missing \"amount\" arg")] diff --git a/node/src/types/deploy/legacy_deploy.rs b/node/src/types/deploy/legacy_deploy.rs index 047df7a22d..79ffce877e 100644 --- a/node/src/types/deploy/legacy_deploy.rs +++ b/node/src/types/deploy/legacy_deploy.rs @@ -76,15 +76,3 @@ mod tests { bytesrepr::test_serialization_roundtrip(&legacy_deploy); } } - -mod specimen_support { - use crate::utils::specimen::{Cache, LargestSpecimen, SizeEstimator}; - - use super::LegacyDeploy; - - impl LargestSpecimen for LegacyDeploy { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - LegacyDeploy(LargestSpecimen::largest_specimen(estimator, cache)) - } - } -} diff --git a/node/src/types/node_id.rs b/node/src/types/node_id.rs index ad0f197ce6..db8ad05cb5 100644 --- a/node/src/types/node_id.rs +++ b/node/src/types/node_id.rs @@ -96,6 +96,7 @@ impl Display for NodeId { } impl From for NodeId { + #[inline(always)] fn from(id: KeyFingerprint) -> Self { NodeId(id) } diff --git a/node/src/types/peers_map.rs b/node/src/types/peers_map.rs index 2c5d045010..9373f2ff69 100644 --- a/node/src/types/peers_map.rs +++ b/node/src/types/peers_map.rs @@ -1,7 +1,7 @@ // TODO - remove once schemars stops causing warning. #![allow(clippy::field_reassign_with_default)] -use std::collections::BTreeMap; +use std::{collections::BTreeMap, net::SocketAddr}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -15,7 +15,7 @@ pub struct PeerEntry { /// Node id. pub node_id: String, /// Node address. - pub address: String, + pub address: SocketAddr, } /// Map of peer IDs to network addresses. @@ -30,8 +30,8 @@ impl PeersMap { } } -impl From> for PeersMap { - fn from(input: BTreeMap) -> Self { +impl From> for PeersMap { + fn from(input: BTreeMap) -> Self { let ret = input .into_iter() .map(|(node_id, address)| PeerEntry { diff --git a/node/src/types/status_feed.rs b/node/src/types/status_feed.rs index 32fdf2e2c6..d20835cef9 100644 --- a/node/src/types/status_feed.rs +++ b/node/src/types/status_feed.rs @@ -41,7 +41,7 @@ static GET_STATUS_RESULT: Lazy = Lazy::new(|| { let node_id = NodeId::doc_example(); let socket_addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 54321); let mut peers = BTreeMap::new(); - peers.insert(*node_id, socket_addr.to_string()); + peers.insert(*node_id, socket_addr); let status_feed = StatusFeed { last_added_block: Some(Block::doc_example().clone()), peers, @@ -88,7 +88,7 @@ pub struct StatusFeed { /// The last block added to the chain. pub last_added_block: Option, /// The peer nodes which are connected to this node. - pub peers: BTreeMap, + pub peers: BTreeMap, /// The chainspec info for this node. pub chainspec_info: ChainspecInfo, /// Our public signing key. @@ -115,7 +115,7 @@ impl StatusFeed { #[allow(clippy::too_many_arguments)] pub(crate) fn new( last_added_block: Option, - peers: BTreeMap, + peers: BTreeMap, chainspec_info: ChainspecInfo, consensus_status: Option<(PublicKey, Option)>, node_uptime: Duration, diff --git a/node/src/types/sync_leap.rs b/node/src/types/sync_leap.rs index 45ee9658f5..0a8f78dac7 100644 --- a/node/src/types/sync_leap.rs +++ b/node/src/types/sync_leap.rs @@ -388,55 +388,6 @@ impl FetchItem for SyncLeap { } } -mod specimen_support { - use crate::{ - types::block::specimen_support::BlockHeaderWithoutEraEnd, - utils::specimen::{ - estimator_max_rounds_per_era, vec_of_largest_specimen, vec_prop_specimen, Cache, - LargestSpecimen, SizeEstimator, - }, - }; - - use super::{SyncLeap, SyncLeapIdentifier}; - - impl LargestSpecimen for SyncLeap { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - // Will at most contain as many blocks as a single era. And how many blocks can - // there be in an era is determined by the chainspec: it's the - // maximum of minimum_era_height and era_duration / minimum_block_time - let count = estimator_max_rounds_per_era(estimator).saturating_sub(1); - - let non_switch_block_ancestors: Vec = - vec_of_largest_specimen(estimator, count, cache); - - let mut trusted_ancestor_headers = - vec![LargestSpecimen::largest_specimen(estimator, cache)]; - trusted_ancestor_headers.extend( - non_switch_block_ancestors - .into_iter() - .map(BlockHeaderWithoutEraEnd::into_inner), - ); - - let signed_block_headers = vec_prop_specimen(estimator, "recent_era_count", cache); - SyncLeap { - trusted_ancestor_only: LargestSpecimen::largest_specimen(estimator, cache), - trusted_block_header: LargestSpecimen::largest_specimen(estimator, cache), - trusted_ancestor_headers, - signed_block_headers, - } - } - } - - impl LargestSpecimen for SyncLeapIdentifier { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - SyncLeapIdentifier { - block_hash: LargestSpecimen::largest_specimen(estimator, cache), - trusted_ancestor_only: true, - } - } - } -} - #[cfg(test)] mod tests { // The `FetchItem::::validate()` function can potentially return the diff --git a/node/src/types/validator_matrix.rs b/node/src/types/validator_matrix.rs index 5040296c16..7e0fee3de6 100644 --- a/node/src/types/validator_matrix.rs +++ b/node/src/types/validator_matrix.rs @@ -217,10 +217,6 @@ impl ValidatorMatrix { self.finality_threshold_fraction } - pub(crate) fn is_empty(&self) -> bool { - self.read_inner().is_empty() - } - /// Returns whether `pub_key` is the ID of a validator in this era, or `None` if the validator /// information for that era is missing. pub(crate) fn is_validator_in_era( @@ -240,21 +236,13 @@ impl ValidatorMatrix { } } - pub(crate) fn public_signing_key(&self) -> &PublicKey { - &self.public_signing_key - } - - /// Returns whether `pub_key` is the ID of a validator in this era, or `None` if the validator - /// information for that era is missing. - pub(crate) fn is_self_validator_in_era(&self, era_id: EraId) -> Option { - self.is_validator_in_era(era_id, &self.public_signing_key) - } - /// Determine if the active validator is in a current or upcoming set of active validators. + /// + /// This function may produce false positives, as it works backwards from the highest known + /// era. Depending on the current network state, this may be an upcoming or active era, the + /// previous era validators may be positively identified by this function. #[inline] pub(crate) fn is_active_or_upcoming_validator(&self, public_key: &PublicKey) -> bool { - // This function is potentially expensive and could be memoized, with the cache being - // invalidated when the max value of the `BTreeMap` changes. self.read_inner() .values() .rev() @@ -262,6 +250,36 @@ impl ValidatorMatrix { .any(|validator_weights| validator_weights.is_validator(public_key)) } + /// Returns the public keys of all validators in a given era. + /// + /// Will return `None` if the era is not known. + pub(crate) fn era_validators(&self, era_id: EraId) -> Option> { + if let Some(ref chainspec_validators) = self.chainspec_validators { + if era_id == self.chainspec_activation_era { + return Some(chainspec_validators.keys().cloned().collect()); + } + } + + Some( + self.read_inner() + .get(&era_id)? + .validator_weights + .keys() + .cloned() + .collect(), + ) + } + + pub(crate) fn public_signing_key(&self) -> &PublicKey { + &self.public_signing_key + } + + /// Returns whether `pub_key` is the ID of a validator in this era, or `None` if the validator + /// information for that era is missing. + pub(crate) fn is_self_validator_in_era(&self, era_id: EraId) -> Option { + self.is_validator_in_era(era_id, &self.public_signing_key) + } + pub(crate) fn create_finality_signature( &self, block_header: &BlockHeader, diff --git a/node/src/types/value_or_chunk.rs b/node/src/types/value_or_chunk.rs index 9363855d01..bffba5d55d 100644 --- a/node/src/types/value_or_chunk.rs +++ b/node/src/types/value_or_chunk.rs @@ -266,25 +266,3 @@ mod tests { assert_eq!(input, retrieved_bytes); } } - -mod specimen_support { - use crate::utils::specimen::{Cache, LargestSpecimen, SizeEstimator}; - - use super::{TrieOrChunkId, ValueOrChunk}; - - impl LargestSpecimen for TrieOrChunkId { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - TrieOrChunkId( - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - ) - } - } - - impl LargestSpecimen for ValueOrChunk { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - // By definition, the chunk is always the largest (8MiB): - ValueOrChunk::ChunkWithProof(LargestSpecimen::largest_specimen(estimator, cache)) - } - } -} diff --git a/node/src/utils.rs b/node/src/utils.rs index 07e4c5f379..5c279989e9 100644 --- a/node/src/utils.rs +++ b/node/src/utils.rs @@ -6,34 +6,35 @@ mod display_error; pub(crate) mod ds; mod external; pub(crate) mod fmt_limit; +mod fuse; pub(crate) mod opt_display; +pub(crate) mod rate_limited; +pub(crate) mod registered_metric; #[cfg(target_os = "linux")] pub(crate) mod rlimit; pub(crate) mod round_robin; -pub(crate) mod specimen; pub(crate) mod umask; pub mod work_queue; use std::{ any, cell::RefCell, + convert::Infallible, fmt::{self, Debug, Display, Formatter}, - io, + fs::File, + io::{self, Write}, net::{SocketAddr, ToSocketAddrs}, - ops::{Add, BitXorAssign, Div}, + ops::{Add, Div}, path::{Path, PathBuf}, - sync::{ - atomic::{AtomicBool, Ordering}, - Arc, - }, - time::{Duration, Instant, SystemTime}, + sync::{Arc, Mutex}, + time::Duration, }; use datasize::DataSize; +use fs2::FileExt; +use futures::future::Either; use hyper::server::{conn::AddrIncoming, Builder, Server}; -#[cfg(test)] -use once_cell::sync::Lazy; -use prometheus::{self, Histogram, HistogramOpts, Registry}; + use serde::Serialize; use thiserror::Error; use tracing::{error, warn}; @@ -44,7 +45,10 @@ pub(crate) use display_error::display_error; #[cfg(test)] pub(crate) use external::RESOURCES_PATH; pub use external::{External, LoadError, Loadable}; +pub(crate) use fuse::{DropSwitch, Fuse, ObservableFuse, SharedFuse}; pub(crate) use round_robin::WeightedRoundRobin; +#[cfg(test)] +pub(crate) use tests::extract_metric_names; /// DNS resolution error. #[derive(Debug, Error)] @@ -104,6 +108,8 @@ impl FlattenResult for Result, E> { } /// Parses a network address from a string, with DNS resolution. +/// +/// Only resolves to IPv4 addresses, IPv6 addresses are filtered out. pub(crate) fn resolve_address(address: &str) -> Result { address .to_socket_addrs() @@ -111,7 +117,7 @@ pub(crate) fn resolve_address(address: &str) -> Result(value: T) -> &'static T { Box::leak(Box::new(value)) } -/// A flag shared across multiple subsystem. -#[derive(Copy, Clone, DataSize, Debug)] -pub(crate) struct SharedFlag(&'static AtomicBool); - -impl SharedFlag { - /// Creates a new shared flag. - /// - /// The flag is initially not set. - pub(crate) fn new() -> Self { - SharedFlag(leak(AtomicBool::new(false))) - } - - /// Checks whether the flag is set. - pub(crate) fn is_set(self) -> bool { - self.0.load(Ordering::SeqCst) - } - - /// Set the flag. - pub(crate) fn set(self) { - self.0.store(true, Ordering::SeqCst) - } - - /// Returns a shared instance of the flag for testing. - /// - /// The returned flag should **never** have `set` be called upon it. - #[cfg(test)] - pub(crate) fn global_shared() -> Self { - static SHARED_FLAG: Lazy = Lazy::new(SharedFlag::new); - - *SHARED_FLAG - } -} - -impl Default for SharedFlag { - fn default() -> Self { - Self::new() - } -} - /// A display-helper that shows iterators display joined by ",". #[derive(Debug)] pub(crate) struct DisplayIter(RefCell>); @@ -340,48 +307,6 @@ where (numerator + denominator / T::from(2)) / denominator } -/// Creates a prometheus Histogram and registers it. -pub(crate) fn register_histogram_metric( - registry: &Registry, - metric_name: &str, - metric_help: &str, - buckets: Vec, -) -> Result { - let histogram_opts = HistogramOpts::new(metric_name, metric_help).buckets(buckets); - let histogram = Histogram::with_opts(histogram_opts)?; - registry.register(Box::new(histogram.clone()))?; - Ok(histogram) -} - -/// Unregisters a metric from the Prometheus registry. -#[macro_export] -macro_rules! unregister_metric { - ($registry:expr, $metric:expr) => { - $registry - .unregister(Box::new($metric.clone())) - .unwrap_or_else(|_| { - tracing::error!( - "unregistering {} failed: was not registered", - stringify!($metric) - ) - }); - }; -} - -/// XORs two byte sequences. -/// -/// # Panics -/// -/// Panics if `lhs` and `rhs` are not of equal length. -#[inline] -pub(crate) fn xor(lhs: &mut [u8], rhs: &[u8]) { - // Implementing SIMD support is left as an exercise for the reader. - assert_eq!(lhs.len(), rhs.len(), "xor inputs should have equal length"); - lhs.iter_mut() - .zip(rhs.iter()) - .for_each(|(sb, &cb)| sb.bitxor_assign(cb)); -} - /// Wait until all strong references for a particular arc have been dropped. /// /// Downgrades and immediately drops the `Arc`, keeping only a weak reference. The reference will @@ -421,63 +346,109 @@ pub(crate) async fn wait_for_arc_drop( false } -/// An anchor for converting an `Instant` into a wall-clock (`SystemTime`) time. -#[derive(Copy, Clone, Debug)] -pub(crate) struct TimeAnchor { - /// The reference instant used for conversion. - now: Instant, - /// The reference wall-clock timestamp used for conversion. - wall_clock_now: SystemTime, -} +/// A thread-safe wrapper around a file that writes chunks. +/// +/// A chunk can (but needn't) be a line. The writer guarantees it will be written to the wrapped +/// file, even if other threads are attempting to write chunks at the same time. +#[derive(Clone)] +pub(crate) struct LockedLineWriter(Arc>); -impl TimeAnchor { - /// Creates a new time anchor. +impl LockedLineWriter { + /// Creates a new `LockedLineWriter`. /// - /// Will take a sample of the monotonic clock and the current time and store it in the anchor. - pub(crate) fn now() -> Self { - TimeAnchor { - now: Instant::now(), - wall_clock_now: SystemTime::now(), - } + /// This function does not panic - if any error occurs, it will be logged and ignored. + pub(crate) fn new(file: File) -> Self { + LockedLineWriter(Arc::new(Mutex::new(file))) } - /// Converts a point in time from the monotonic clock to wall clock time, using this anchor. - #[inline] - pub(crate) fn convert(&self, then: Instant) -> SystemTime { - if then > self.now { - self.wall_clock_now + then.duration_since(self.now) - } else { - self.wall_clock_now - self.now.duration_since(then) + /// Writes a chunk to the wrapped file. + pub(crate) fn write_line(&self, line: &str) { + match self.0.lock() { + Ok(mut guard) => { + // Acquire a lock on the file. This ensures we do not garble output when multiple + // nodes are writing to the same file. + if let Err(err) = guard.lock_exclusive() { + warn!(%line, %err, "could not acquire file lock, not writing line"); + return; + } + + if let Err(err) = guard.write_all(line.as_bytes()) { + warn!(%line, %err, "could not finish writing line"); + } + + if let Err(err) = guard.unlock() { + warn!(%err, "failed to release file lock in locked line writer, ignored"); + } + } + Err(_) => { + error!(%line, "line writer lock poisoned, lost line"); + } } } } -#[cfg(test)] -mod tests { - use std::{sync::Arc, time::Duration}; +/// Discard secondary data from a value. +pub(crate) trait Peel { + /// What is left after discarding the wrapping. + type Inner; + + /// Discard "uninteresting" data. + fn peel(self) -> Self::Inner; +} - use crate::utils::SharedFlag; +impl Peel for Either<(A, G), (B, F)> { + type Inner = Either; - use super::{wait_for_arc_drop, xor}; + fn peel(self) -> Self::Inner { + match self { + Either::Left((v, _)) => Either::Left(v), + Either::Right((v, _)) => Either::Right(v), + } + } +} - #[test] - fn xor_works() { - let mut lhs = [0x43, 0x53, 0xf2, 0x2f, 0xa9, 0x70, 0xfb, 0xf4]; - let rhs = [0x04, 0x0b, 0x5c, 0xa1, 0xef, 0x11, 0x12, 0x23]; - let xor_result = [0x47, 0x58, 0xae, 0x8e, 0x46, 0x61, 0xe9, 0xd7]; +/// Helper trait to unwrap `Result` to `T`. +pub(crate) trait UnwrapInfallible { + type Output; + + fn unwrap_infallible(self) -> Self::Output; +} - xor(&mut lhs, &rhs); +impl UnwrapInfallible for Result { + type Output = T; - assert_eq!(lhs, xor_result); + #[inline] + fn unwrap_infallible(self) -> Self::Output { + match self { + Ok(val) => val, + Err(_) => unreachable!(), + } } +} - #[test] - #[should_panic(expected = "equal length")] - fn xor_panics_on_uneven_inputs() { - let mut lhs = [0x43, 0x53, 0xf2, 0x2f, 0xa9, 0x70, 0xfb, 0xf4]; - let rhs = [0x04, 0x0b, 0x5c, 0xa1, 0xef, 0x11]; +#[cfg(test)] +mod tests { + use std::{collections::HashSet, net::SocketAddr, sync::Arc, time::Duration}; + + use crate::utils::resolve_address; + + use super::wait_for_arc_drop; - xor(&mut lhs, &rhs); + /// Extracts the names of all metrics contained in a prometheus-formatted metrics snapshot. + + pub(crate) fn extract_metric_names(raw: &str) -> HashSet<&str> { + raw.lines() + .filter_map(|line| { + let trimmed = line.trim(); + if trimmed.is_empty() || trimmed.starts_with('#') { + None + } else { + let (full_id, _) = trimmed.split_once(' ')?; + let id = full_id.split_once('{').map(|v| v.0).unwrap_or(full_id); + Some(id) + } + }) + .collect() } #[tokio::test] @@ -516,20 +487,47 @@ mod tests { } #[test] - fn shared_flag_sanity_check() { - let flag = SharedFlag::new(); - let copied = flag; - - assert!(!flag.is_set()); - assert!(!copied.is_set()); - assert!(!flag.is_set()); - assert!(!copied.is_set()); + fn can_parse_metrics() { + let sample = r#" + chain_height 0 + # HELP consensus_current_era the current era in consensus + # TYPE consensus_current_era gauge + consensus_current_era 0 + # HELP consumed_ram_bytes total consumed ram in bytes + # TYPE consumed_ram_bytes gauge + consumed_ram_bytes 0 + # HELP contract_runtime_apply_commit time in seconds to commit the execution effects of a contract + # TYPE contract_runtime_apply_commit histogram + contract_runtime_apply_commit_bucket{le="0.01"} 0 + contract_runtime_apply_commit_bucket{le="0.02"} 0 + contract_runtime_apply_commit_bucket{le="0.04"} 0 + contract_runtime_apply_commit_bucket{le="0.08"} 0 + contract_runtime_apply_commit_bucket{le="0.16"} 0 + "#; + + let extracted = extract_metric_names(sample); + + let mut expected = HashSet::new(); + expected.insert("chain_height"); + expected.insert("consensus_current_era"); + expected.insert("consumed_ram_bytes"); + expected.insert("contract_runtime_apply_commit_bucket"); + + assert_eq!(extracted, expected); + } - flag.set(); + #[test] + fn resolve_address_rejects_ipv6() { + let raw = "2b02:c307:2042:360::1:0"; + assert!(resolve_address(raw).is_err()); + } - assert!(flag.is_set()); - assert!(copied.is_set()); - assert!(flag.is_set()); - assert!(copied.is_set()); + #[test] + fn resolve_address_accepts_ipv4() { + let raw = "1.2.3.4:567"; + assert_eq!( + resolve_address(raw).expect("failed to resolve ipv4"), + SocketAddr::from(([1, 2, 3, 4], 567)) + ); } } diff --git a/node/src/utils/external.rs b/node/src/utils/external.rs index 6a18f5cdbb..dddfef4e84 100644 --- a/node/src/utils/external.rs +++ b/node/src/utils/external.rs @@ -49,7 +49,6 @@ pub enum External { /// Value that should be loaded from an external path. Path(PathBuf), /// The value has not been specified, but a default has been requested. - #[serde(skip)] #[default] Missing, } @@ -94,10 +93,11 @@ pub trait Loadable: Sized { /// Load a test-only instance from the local path. #[cfg(test)] fn from_resources>(rel_path: P) -> Self { - Self::from_path(RESOURCES_PATH.join(rel_path.as_ref())).unwrap_or_else(|error| { + let full_path = RESOURCES_PATH.join(rel_path.as_ref()); + Self::from_path(&full_path).unwrap_or_else(|error| { panic!( "could not load resources from {}: {}", - rel_path.as_ref().display(), + full_path.display(), error ) }) diff --git a/node/src/utils/fuse.rs b/node/src/utils/fuse.rs new file mode 100644 index 0000000000..6cf31ac806 --- /dev/null +++ b/node/src/utils/fuse.rs @@ -0,0 +1,261 @@ +/// Fuses of various kind. +/// +/// A fuse is a boolean flag that can only be set once, but checked any number of times. +use std::sync::{ + atomic::{AtomicBool, Ordering}, + Arc, +}; + +use datasize::DataSize; +use futures::{ + future::{self, Either}, + pin_mut, Future, +}; +use tokio::sync::Notify; + +use super::leak; + +/// A one-time settable boolean flag. +pub(crate) trait Fuse { + /// Trigger the fuse. + fn set(&self); +} + +/// A set-once-only flag shared across multiple subsystems. +#[derive(Copy, Clone, DataSize, Debug)] +pub(crate) struct SharedFuse(&'static AtomicBool); + +impl SharedFuse { + /// Creates a new shared fuse. + /// + /// The fuse is initially not set. + pub(crate) fn new() -> Self { + SharedFuse(leak(AtomicBool::new(false))) + } + + /// Checks whether the fuse is set. + pub(crate) fn is_set(self) -> bool { + self.0.load(Ordering::SeqCst) + } + + /// Returns a shared instance of the fuse for testing. + /// + /// The returned fuse should **never** have `set` be called upon it, since there is only once + /// instance globally. + #[cfg(test)] + pub(crate) fn global_shared() -> Self { + use once_cell::sync::Lazy; + + static SHARED_FUSE: Lazy = Lazy::new(SharedFuse::new); + + *SHARED_FUSE + } +} + +impl Fuse for SharedFuse { + fn set(&self) { + self.0.store(true, Ordering::SeqCst) + } +} + +impl Default for SharedFuse { + fn default() -> Self { + Self::new() + } +} + +/// A shared fuse that can be observed for change. +/// +/// It is similar to a condition var, except it can only bet set once and will immediately return +/// if it was previously set. +#[derive(DataSize, Clone, Debug)] +pub(crate) struct ObservableFuse(Arc); + +impl ObservableFuse { + /// Creates a new sticky fuse. + /// + /// The fuse will start out as not set. + pub(crate) fn new() -> Self { + ObservableFuse(Arc::new(ObservableFuseInner { + fuse: AtomicBool::new(false), + notify: Notify::new(), + })) + } +} + +/// Inner implementation of the `ObservableFuse`. +#[derive(DataSize, Debug)] +struct ObservableFuseInner { + /// The fuse to trigger. + #[data_size(skip)] + fuse: AtomicBool, + /// Notification that the fuse has been triggered. + #[data_size(skip)] + notify: Notify, +} + +impl ObservableFuse { + /// Waits for the fuse to be triggered. + /// + /// If the fuse is already set, returns immediately, otherwise waits for the notification. + /// + /// The future returned by this function is safe to cancel. + pub(crate) async fn wait(&self) { + // Note: We will catch all notifications from the point on where `notified()` is called, so + // we first construct the future, then check the fuse. Any notification sent while we + // were loading will be caught in the `notified.await`. + let notified = self.0.notify.notified(); + + if self.0.fuse.load(Ordering::SeqCst) { + return; + } + + notified.await; + } + + /// Owned wait function. + /// + /// Like wait, but owns `self`, thus it can be called and passed around with a static lifetime. + pub(crate) async fn wait_owned(self) { + self.wait().await; + } + + /// Runs a given future with a cancellation switch. + /// + /// Similar to [`tokio::time::timeout`], except instead of a duration, the cancellation of the + /// future depends on the given observable fuse. + pub(crate) async fn cancellable(self, f: F) -> Result + where + F: Future, + { + let wait = self.wait_owned(); + + pin_mut!(wait); + pin_mut!(f); + + match future::select(wait, f).await { + Either::Left(((), _)) => Err(Cancelled), + Either::Right((rv, _)) => Ok(rv), + } + } + + /// Convenience method to spawn a cancellable future. + /// + /// Uses the [`tokio::spawn`] function to spawn `f` wrapped in `ObservableFuse::cancellable`. + /// + /// Note that the join handle and return value of the future are lost; if you need access to + /// these, use `cancellable` directly. + #[inline(always)] + pub(crate) fn spawn(&self, f: F) + where + F: Future + Send + 'static, + { + tokio::spawn(self.clone().cancellable(async { + f.await; + })); + } +} + +/// A future has been cancelled. +#[derive(Copy, Clone, Debug)] +pub struct Cancelled; + +impl Fuse for ObservableFuse { + fn set(&self) { + self.0.fuse.store(true, Ordering::SeqCst); + self.0.notify.notify_waiters(); + } +} + +/// A wrapper for a fuse that will cause it to be set when dropped. +// Note: Do not implement/derive `Clone` for `DropSwitch`, as this is a massive footgun. Creating a +// new instance explicitly is safer, as it avoid unintentially trigger the entire switch from +// after having created it on the stack and passed on a clone instance. +#[derive(DataSize, Debug)] +pub(crate) struct DropSwitch(T) +where + T: Fuse; + +impl DropSwitch +where + T: Fuse, +{ + /// Creates a new drop switch around a fuse. + pub(crate) fn new(fuse: T) -> Self { + DropSwitch(fuse) + } + + /// Access the wrapped fuse. + pub(crate) fn inner(&self) -> &T { + &self.0 + } +} + +impl Drop for DropSwitch +where + T: Fuse, +{ + fn drop(&mut self) { + self.0.set() + } +} + +#[cfg(test)] +mod tests { + use futures::FutureExt; + + use crate::utils::Fuse; + + use super::{DropSwitch, ObservableFuse, SharedFuse}; + + #[test] + fn shared_fuse_sanity_check() { + let fuse = SharedFuse::new(); + let copied = fuse; + + assert!(!fuse.is_set()); + assert!(!copied.is_set()); + assert!(!fuse.is_set()); + assert!(!copied.is_set()); + + fuse.set(); + + assert!(fuse.is_set()); + assert!(copied.is_set()); + assert!(fuse.is_set()); + assert!(copied.is_set()); + } + + #[test] + fn observable_fuse_sanity_check() { + let fuse = ObservableFuse::new(); + assert!(fuse.wait().now_or_never().is_none()); + + fuse.set(); + + // Should finish immediately due to the fuse being set. + assert!(fuse.wait().now_or_never().is_some()); + } + + #[test] + fn observable_fuse_drop_switch_check() { + let fuse = ObservableFuse::new(); + assert!(fuse.wait().now_or_never().is_none()); + + let drop_switch = DropSwitch::new(fuse.clone()); + assert!(fuse.wait().now_or_never().is_none()); + + drop(drop_switch); + assert!(fuse.wait().now_or_never().is_some()); + } + + #[test] + fn observable_fuse_race_condition_check() { + let fuse = ObservableFuse::new(); + assert!(fuse.wait().now_or_never().is_none()); + + let waiting = fuse.wait(); + fuse.set(); + assert!(waiting.now_or_never().is_some()); + } +} diff --git a/node/src/utils/rate_limited.rs b/node/src/utils/rate_limited.rs new file mode 100644 index 0000000000..aca1f9d3e7 --- /dev/null +++ b/node/src/utils/rate_limited.rs @@ -0,0 +1,233 @@ +//! Rate limiting for log messages. +//! +//! Implements the `rate_limited!` macro which can be used to ensure that a log message does not +//! spam the logs if triggered many times in a row. See its documentation for details. + +// Note: This module uses 64 bit microseconds, so it is only usable a few hundred thousand years. +// Code accordingly. + +use std::{ + sync::atomic::{AtomicU64, Ordering}, + time::{Duration, SystemTime, UNIX_EPOCH}, +}; + +use tokio::sync::Semaphore; + +/// Default interval to add tickets in. +pub(crate) const DEFAULT_REFRESH_INTERVAL: Duration = Duration::from_secs(60); + +/// Default count to add to tickets after interval has passed. +pub(crate) const DEFAULT_REFRESH_COUNT: usize = 100; + +/// Macro for rate limiting log message (and other things). +/// +/// Every rate limiter needs a unique identifier, which is used to create a static variable holding +/// the count and time of last update. +/// +/// Every call of this macro will result, on average, in the load of two atomics in the success +/// path, three in the failure case, with the latter potentially doing additional work. Overall, it +/// is fairly cheap to call. +/// +/// Associated with each call (and defaulting to `DEFAULT_REFRESH_INTERVAL` and +/// `DEFAULT_REFRESH_COUNT`) is an interval and a refresh count. Whenever the macro is called, it +/// will see if messages are available, if this is not the case, it will top up the count by `count` +/// if at least the interval has passed since the last top-up. +/// +/// ## Example usage +/// +/// The `rate_limited!` macro expects at least two arguments, the identifier described above, and a +/// function taking a single `usize` argument that will be called to make the actual log message. +/// The argument is the number of times this call has been skipped since the last time it was +/// called. +/// +/// ```ignore +/// rate_limited!( +/// CONNECTION_THRESHOLD_EXCEEDED, +/// |count| warn!(count, "exceeded connection threshold") +/// ); +/// ``` +/// +/// The macro can alternatively called with a specific count-per: +/// +/// ```ignore +/// rate_limited!( +/// CONNECTION_THRESHOLD_EXCEEDED, +/// 20, +/// Duration::from_secs(30), +/// |count| warn!(count, "exceeded connection threshold") +/// ); +/// ``` +/// +/// The example above limits to 20 executions per 30 seconds. + +macro_rules! rate_limited { + ($key:ident, $action:expr) => { + rate_limited!( + $key, + $crate::utils::rate_limited::DEFAULT_REFRESH_COUNT, + $crate::utils::rate_limited::DEFAULT_REFRESH_INTERVAL, + $action + ); + }; + ($key:ident, $count:expr, $per:expr, $action:expr) => { + static $key: $crate::utils::rate_limited::RateLimited = + $crate::utils::rate_limited::RateLimited::new(); + + #[allow(clippy::redundant_closure_call)] + if let Some(skipped) = $key.acquire($count, $per) { + $action(skipped); + } + }; +} +pub(crate) use rate_limited; + +/// Helper struct for the `rate_limited!` macro. +/// +/// There is usually little use in constructing these directly. +#[derive(Debug)] +pub(crate) struct RateLimited { + /// The count indicating how many messages are remaining. + remaining: Semaphore, + /// How many were skipped in the meantime. + skipped: AtomicU64, + /// The last time `remaining` was topped up. + last_refresh_us: AtomicU64, +} + +/// Returns the current time in microseconds. +#[inline(always)] +fn now_micros() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_micros() as u64) + .unwrap_or_default() +} + +impl RateLimited { + /// Constructs a new once-per instance. + #[inline(always)] + pub(crate) const fn new() -> Self { + Self { + remaining: Semaphore::const_new(0), + skipped: AtomicU64::new(0), + last_refresh_us: AtomicU64::new(0), + } + } + + /// Checks if there are tickets available. + /// + /// Returns `Some` on success with the count of skipped items that now has been reset to 0. Will + /// add tickets if `per` has passed since the last top-up. + pub(crate) fn acquire(&self, count: usize, per: Duration) -> Option { + if count == 0 { + return None; + } + + if let Some(rv) = self.consume_permit() { + return Some(rv); + } + + // We failed to acquire a ticket. Check if we can refill tickets. + let interval = per.as_micros() as u64; + + let now = now_micros(); + let last_refresh = self.last_refresh_us.load(Ordering::Relaxed); + if last_refresh + interval > now { + // No dice, not enough time has passed. Indicate we skipped our output and return. + self.skipped.fetch_add(1, Ordering::Relaxed); + + return None; + } + + // Enough time has passed! Let's see if we won the race for the next refresh. + if self + .last_refresh_us + .compare_exchange(last_refresh, now, Ordering::Relaxed, Ordering::Relaxed) + .is_ok() + { + // We won! Add tickets. + self.remaining.add_permits(count); + } + + // Regardless, tickets have been added at this point. Try one more time before giving up. + if let Some(rv) = self.consume_permit() { + Some(rv) + } else { + self.skipped.fetch_add(1, Ordering::Relaxed); + None + } + } + + /// Consume a permit from the counter/semaphore. + /// + /// Will reset skip count to 0 on success, and return the number of skipped calls. + #[inline(always)] + pub(crate) fn consume_permit(&self) -> Option { + let permit = self.remaining.try_acquire().ok()?; + + permit.forget(); + Some(self.skipped.swap(0, Ordering::Relaxed)) + } +} + +#[cfg(test)] +mod tests { + use std::{ + sync::atomic::{AtomicUsize, Ordering}, + thread, + time::Duration, + }; + + #[test] + fn rate_limited_is_rate_limited() { + let counter = AtomicUsize::new(0); + + let run = || { + rate_limited!( + RATE_LIMITED_IS_RATE_LIMITED_TEST, + 1, + Duration::from_secs(60), + |dropped| { + counter.fetch_add(1, Ordering::Relaxed); + assert_eq!(dropped, 0); + } + ); + }; + + for _ in 0..10 { + run(); + } + + // We expect one call in the default configuration. + assert_eq!(counter.load(Ordering::Relaxed), 1); + } + + #[test] + fn rate_limiting_refreshes_properly() { + let mut drop_counts = Vec::new(); + + let run = |dc: &mut Vec| { + rate_limited!( + RATE_LIMITED_IS_RATE_LIMITED_TEST, + 2, + Duration::from_secs(1), + |dropped| { + dc.push(dropped); + } + ); + }; + + for _ in 0..5 { + run(&mut drop_counts); + } + assert_eq!(&[0, 0], drop_counts.as_slice()); + + // Sleep long enough for the counter to refresh. + thread::sleep(Duration::from_secs(1)); + + for _ in 0..5 { + run(&mut drop_counts); + } + assert_eq!(&[0, 0, 3, 0], drop_counts.as_slice()); + } +} diff --git a/node/src/utils/registered_metric.rs b/node/src/utils/registered_metric.rs new file mode 100644 index 0000000000..2feadb638c --- /dev/null +++ b/node/src/utils/registered_metric.rs @@ -0,0 +1,260 @@ +//! Self registering and deregistering metrics support. + +use prometheus::{ + core::{Atomic, Collector, GenericCounter, GenericGauge}, + Counter, Gauge, Histogram, HistogramOpts, HistogramTimer, IntCounter, IntGauge, Opts, Registry, +}; + +/// A metric wrapper that will deregister the metric from a given registry on drop. +#[derive(Debug)] +pub(crate) struct RegisteredMetric +where + T: Collector + 'static, +{ + metric: Option>, + registry: Registry, +} + +/// A metric that has been deprecated, but is kept around for backwards API compatibility. +#[derive(Debug)] +pub(crate) struct DeprecatedMetric(RegisteredMetric); + +impl DeprecatedMetric { + /// Creates a new deprecated metric. + #[inline(always)] + pub(crate) fn new, S2: Into>( + registry: Registry, + name: S1, + help: S2, + ) -> Result { + Ok(DeprecatedMetric(registry.new_int_counter(name, help)?)) + } +} + +impl RegisteredMetric +where + T: Collector + 'static, +{ + /// Creates a new self-deregistering metric. + pub(crate) fn new(registry: Registry, metric: T) -> Result + where + T: Clone, + { + let boxed_metric = Box::new(metric); + registry.register(boxed_metric.clone())?; + + Ok(RegisteredMetric { + metric: Some(boxed_metric), + registry, + }) + } + + /// Returns a reference to the wrapped metric. + #[inline] + pub(crate) fn inner(&self) -> &T { + self.metric.as_ref().expect("metric disappeared") + } +} + +impl

RegisteredMetric> +where + P: Atomic, +{ + /// Increments the counter. + #[inline] + pub(crate) fn inc(&self) { + self.inner().inc() + } + + /// Increments the counter by set amount. + #[inline] + pub(crate) fn inc_by(&self, v: P::T) { + self.inner().inc_by(v) + } +} + +impl

RegisteredMetric> +where + P: Atomic, +{ + /// Decrements the gauge. + #[inline] + pub(crate) fn dec(&self) { + self.inner().dec() + } + + /// Decrements the gauge by set amount. + #[inline] + pub(crate) fn sub(&self, v: P::T) { + self.inner().sub(v) + } + + /// Returns the gauge value. + #[cfg(test)] + #[inline] + pub(crate) fn get(&self) -> P::T { + self.inner().get() + } + + /// Increments the gauge. + #[inline] + pub(crate) fn inc(&self) { + self.inner().inc() + } + + /// Increments the gauge by set amount. + #[inline] + pub(crate) fn add(&self, v: P::T) { + self.inner().add(v) + } + + /// Sets the gauge value. + #[inline] + pub(crate) fn set(&self, v: P::T) { + self.inner().set(v) + } +} + +impl RegisteredMetric { + /// Observes a given value. + #[inline] + pub(crate) fn observe(&self, v: f64) { + self.inner().observe(v) + } + + /// Creates a new histogram timer. + #[inline] + pub(crate) fn start_timer(&self) -> HistogramTimer { + self.inner().start_timer() + } +} + +impl Drop for RegisteredMetric +where + T: Collector + 'static, +{ + fn drop(&mut self) { + if let Some(boxed_metric) = self.metric.take() { + let desc = boxed_metric + .desc() + .first() + .map(|desc| desc.fq_name.clone()) + .unwrap_or_default(); + self.registry.unregister(boxed_metric).unwrap_or_else(|_| { + tracing::error!("unregistering {} failed: was not registered", desc) + }) + } + } +} + +/// Extension trait for [`Registry`] instances. +pub(crate) trait RegistryExt { + /// Creates a new [`Counter`] registered to this registry. + fn new_counter, S2: Into>( + &self, + name: S1, + help: S2, + ) -> Result, prometheus::Error>; + + /// Creates a new [`Histogram`] registered to this registry. + fn new_histogram, S2: Into>( + &self, + name: S1, + help: S2, + buckets: Vec, + ) -> Result, prometheus::Error>; + + /// Creates a new [`Gauge`] registered to this registry. + fn new_gauge, S2: Into>( + &self, + name: S1, + help: S2, + ) -> Result, prometheus::Error>; + + /// Creates a new [`IntCounter`] registered to this registry. + fn new_int_counter, S2: Into>( + &self, + name: S1, + help: S2, + ) -> Result, prometheus::Error>; + + /// Creates a new [`IntCounter`] from options. + fn new_int_counter_opts( + &self, + opts: Opts, + ) -> Result, prometheus::Error>; + + /// Creates a new [`IntGauge`] registered to this registry. + fn new_int_gauge, S2: Into>( + &self, + name: S1, + help: S2, + ) -> Result, prometheus::Error>; + + /// Creates a new deprecated metric, registered to this registry. + fn new_deprecated, S2: Into>( + &self, + name: S1, + help: S2, + ) -> Result; +} + +impl RegistryExt for Registry { + fn new_counter, S2: Into>( + &self, + name: S1, + help: S2, + ) -> Result, prometheus::Error> { + RegisteredMetric::new(self.clone(), Counter::new(name, help)?) + } + + fn new_gauge, S2: Into>( + &self, + name: S1, + help: S2, + ) -> Result, prometheus::Error> { + RegisteredMetric::new(self.clone(), Gauge::new(name, help)?) + } + + fn new_histogram, S2: Into>( + &self, + name: S1, + help: S2, + buckets: Vec, + ) -> Result, prometheus::Error> { + let histogram_opts = HistogramOpts::new(name, help).buckets(buckets); + + RegisteredMetric::new(self.clone(), Histogram::with_opts(histogram_opts)?) + } + + fn new_int_counter, S2: Into>( + &self, + name: S1, + help: S2, + ) -> Result, prometheus::Error> { + RegisteredMetric::new(self.clone(), IntCounter::new(name, help)?) + } + + fn new_int_counter_opts( + &self, + opts: Opts, + ) -> Result, prometheus::Error> { + RegisteredMetric::new(self.clone(), IntCounter::with_opts(opts)?) + } + + fn new_int_gauge, S2: Into>( + &self, + name: S1, + help: S2, + ) -> Result, prometheus::Error> { + RegisteredMetric::new(self.clone(), IntGauge::new(name, help)?) + } + fn new_deprecated, S2: Into>( + &self, + name: S1, + help: S2, + ) -> Result { + let help = format!("(DEPRECATED) {}", help.into()); + DeprecatedMetric::new(self.clone(), name, help) + } +} diff --git a/node/src/utils/specimen.rs b/node/src/utils/specimen.rs deleted file mode 100644 index c59122c5e1..0000000000 --- a/node/src/utils/specimen.rs +++ /dev/null @@ -1,870 +0,0 @@ -//! Specimen support. -//! -//! Structs implementing the specimen trait allow for specific sample instances being created, such -//! as the biggest possible. - -use std::{ - any::{Any, TypeId}, - collections::{BTreeMap, BTreeSet, HashMap}, - convert::{TryFrom, TryInto}, - iter::FromIterator, - net::{Ipv6Addr, SocketAddr, SocketAddrV6}, - sync::Arc, -}; - -use casper_execution_engine::core::engine_state::ExecutableDeployItem; -use casper_hashing::{ChunkWithProof, Digest}; -use casper_types::{ - bytesrepr::Bytes, - crypto::{sign, PublicKey, Signature}, - AsymmetricType, ContractPackageHash, EraId, ProtocolVersion, RuntimeArgs, SecretKey, SemVer, - TimeDiff, Timestamp, KEY_HASH_LENGTH, U512, -}; -use either::Either; -use serde::Serialize; -use strum::{EnumIter, IntoEnumIterator}; - -use crate::{ - components::{ - consensus::{max_rounds_per_era, utils::ValidatorMap, EraReport}, - fetcher::Tag, - }, - protocol::Message, - types::{ - ApprovalsHash, ApprovalsHashes, Block, BlockExecutionResultsOrChunk, BlockHash, - BlockHeader, BlockPayload, Deploy, DeployHashWithApprovals, DeployId, FinalitySignature, - FinalitySignatureId, FinalizedBlock, LegacyDeploy, SyncLeap, TrieOrChunk, - }, -}; - -/// The largest valid unicode codepoint that can be encoded to UTF-8. -pub(crate) const HIGHEST_UNICODE_CODEPOINT: char = '\u{10FFFF}'; - -/// A cache used for memoization, typically on a single estimator. -#[derive(Debug, Default)] -pub(crate) struct Cache { - /// A map of items that have been hashed. Indexed by type. - items: HashMap>>, -} - -impl Cache { - /// Retrieves a potentially memoized instance. - pub(crate) fn get(&mut self) -> Option<&T> { - self.get_all::() - .get(0) - .map(|box_any| box_any.downcast_ref::().expect("cache corrupted")) - } - - /// Sets the memoized instance if not already set. - /// - /// Returns a reference to the memoized instance. Note that this may be an instance other than - /// the passed in `item`, if the cache entry was not empty before/ - pub(crate) fn set(&mut self, item: T) -> &T { - let items = self.get_all::(); - if items.is_empty() { - let boxed_item: Box = Box::new(item); - items.push(boxed_item); - } - self.get::().expect("should not be empty") - } - - /// Get or insert the vector storing item instances. - fn get_all(&mut self) -> &mut Vec> { - self.items.entry(TypeId::of::()).or_default() - } -} - -/// Given a specific type instance, estimates its serialized size. -pub(crate) trait SizeEstimator { - /// Estimate the serialized size of a value. - fn estimate(&self, val: &T) -> usize; - - /// Requires a parameter. - /// - /// Parameters indicate potential specimens which values to expect, e.g. a maximum number of - /// items configured for a specific collection. - /// - /// ## Panics - /// - /// - If the named parameter is not set, panics. - /// - If `T` is of an invalid type. - fn parameter>(&self, name: &'static str) -> T; - - /// Require a parameter, cast into a boolean. - /// - /// See [`parameter`] for details. Will return `false` if the stored value is `0`, - /// otherwise `true`. - /// - /// This method exists because `bool` does not implement `TryFrom`. - /// - /// ## Panics - /// - /// Same as [`parameter`]. - fn parameter_bool(&self, name: &'static str) -> bool { - self.parameter::(name) != 0 - } -} - -/// Supports returning a maximum size specimen. -/// -/// "Maximum size" refers to the instance that uses the highest amount of memory and is also most -/// likely to have the largest representation when serialized. -pub(crate) trait LargestSpecimen: Sized { - /// Returns the largest possible specimen for this type. - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self; -} - -/// Supports generating a unique sequence of specimen that are as large as possible. -pub(crate) trait LargeUniqueSequence -where - Self: Sized + Ord, - E: SizeEstimator, -{ - /// Create a new sequence of the largest possible unique specimens. - /// - /// Note that multiple calls to this function will return overlapping sequences. - // Note: This functions returns a materialized sequence instead of a generator to avoid - // complications with borrowing `E`. - fn large_unique_sequence(estimator: &E, count: usize, cache: &mut Cache) -> BTreeSet; -} - -/// Produces the largest variant of a specific `enum` using an estimator and a generation function. -pub(crate) fn largest_variant(estimator: &E, generator: F) -> T -where - T: Serialize, - D: IntoEnumIterator, - E: SizeEstimator, - F: FnMut(D) -> T, -{ - D::iter() - .map(generator) - .max_by_key(|candidate| estimator.estimate(candidate)) - .expect("should have at least one candidate") -} - -/// Generates a vec of a given size filled with the largest specimen. -pub(crate) fn vec_of_largest_specimen( - estimator: &E, - count: usize, - cache: &mut Cache, -) -> Vec { - let mut vec = Vec::new(); - for _ in 0..count { - vec.push(LargestSpecimen::largest_specimen(estimator, cache)); - } - vec -} - -/// Generates a vec of the largest specimen, with a size from a property. -pub(crate) fn vec_prop_specimen( - estimator: &E, - parameter_name: &'static str, - cache: &mut Cache, -) -> Vec { - let mut count = estimator.parameter(parameter_name); - if count < 0 { - count = 0; - } - - vec_of_largest_specimen(estimator, count as usize, cache) -} - -/// Generates a `BTreeMap` with the size taken from a property. -/// -/// Keys are generated uniquely using `LargeUniqueSequence`, while values will be largest specimen. -pub(crate) fn btree_map_distinct_from_prop( - estimator: &E, - parameter_name: &'static str, - cache: &mut Cache, -) -> BTreeMap -where - V: LargestSpecimen, - K: Ord + LargeUniqueSequence + Sized, - E: SizeEstimator, -{ - let mut count = estimator.parameter(parameter_name); - if count < 0 { - count = 0; - } - - K::large_unique_sequence(estimator, count as usize, cache) - .into_iter() - .map(|key| (key, LargestSpecimen::largest_specimen(estimator, cache))) - .collect() -} - -/// Generates a `BTreeSet` with the size taken from a property. -/// -/// Value are generated uniquely using `LargeUniqueSequence`. -pub(crate) fn btree_set_distinct_from_prop( - estimator: &E, - parameter_name: &'static str, - cache: &mut Cache, -) -> BTreeSet -where - T: Ord + LargeUniqueSequence + Sized, - E: SizeEstimator, -{ - let mut count = estimator.parameter(parameter_name); - if count < 0 { - count = 0; - } - - T::large_unique_sequence(estimator, count as usize, cache) -} - -/// Generates a `BTreeSet` with a given amount of items. -/// -/// Value are generated uniquely using `LargeUniqueSequence`. -pub(crate) fn btree_set_distinct( - estimator: &E, - count: usize, - cache: &mut Cache, -) -> BTreeSet -where - T: Ord + LargeUniqueSequence + Sized, - E: SizeEstimator, -{ - T::large_unique_sequence(estimator, count, cache) -} - -impl LargestSpecimen for SocketAddr { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - SocketAddr::V6(SocketAddrV6::largest_specimen(estimator, cache)) - } -} - -impl LargestSpecimen for SocketAddrV6 { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - SocketAddrV6::new( - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - ) - } -} - -impl LargestSpecimen for Ipv6Addr { - fn largest_specimen(_estimator: &E, _cache: &mut Cache) -> Self { - // Leading zeros get shorted, ensure there are none in the address. - Ipv6Addr::new( - 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, - ) - } -} - -impl LargestSpecimen for bool { - fn largest_specimen(_estimator: &E, _cache: &mut Cache) -> Self { - true - } -} - -impl LargestSpecimen for u8 { - fn largest_specimen(_estimator: &E, _cache: &mut Cache) -> Self { - u8::MAX - } -} - -impl LargestSpecimen for u16 { - fn largest_specimen(_estimator: &E, _cache: &mut Cache) -> Self { - u16::MAX - } -} - -impl LargestSpecimen for u32 { - fn largest_specimen(_estimator: &E, _cache: &mut Cache) -> Self { - u32::MAX - } -} - -impl LargestSpecimen for u64 { - fn largest_specimen(_estimator: &E, _cache: &mut Cache) -> Self { - u64::MAX - } -} - -impl LargestSpecimen for u128 { - fn largest_specimen(_estimator: &E, _cache: &mut Cache) -> Self { - u128::MAX - } -} - -impl LargestSpecimen for [T; N] { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - [LargestSpecimen::largest_specimen(estimator, cache); N] - } -} - -impl LargestSpecimen for Option -where - T: LargestSpecimen, -{ - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - Some(LargestSpecimen::largest_specimen(estimator, cache)) - } -} - -impl LargestSpecimen for Box -where - T: LargestSpecimen, -{ - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - Box::new(LargestSpecimen::largest_specimen(estimator, cache)) - } -} - -impl LargestSpecimen for Arc -where - T: LargestSpecimen, -{ - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - Arc::new(LargestSpecimen::largest_specimen(estimator, cache)) - } -} - -impl LargestSpecimen for (T1, T2) -where - T1: LargestSpecimen, - T2: LargestSpecimen, -{ - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - ( - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - ) - } -} - -impl LargestSpecimen for (T1, T2, T3) -where - T1: LargestSpecimen, - T2: LargestSpecimen, - T3: LargestSpecimen, -{ - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - ( - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - ) - } -} - -// Various third party crates. - -impl LargestSpecimen for Either -where - L: LargestSpecimen + Serialize, - R: LargestSpecimen + Serialize, -{ - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - let l = L::largest_specimen(estimator, cache); - let r = R::largest_specimen(estimator, cache); - - if estimator.estimate(&l) >= estimator.estimate(&r) { - Either::Left(l) - } else { - Either::Right(r) - } - } -} - -// impls for `casper_types`, which is technically a foreign crate -- so we put them here. -impl LargestSpecimen for ProtocolVersion { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - ProtocolVersion::new(LargestSpecimen::largest_specimen(estimator, cache)) - } -} - -impl LargestSpecimen for SemVer { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - SemVer { - major: LargestSpecimen::largest_specimen(estimator, cache), - minor: LargestSpecimen::largest_specimen(estimator, cache), - patch: LargestSpecimen::largest_specimen(estimator, cache), - } - } -} - -impl LargestSpecimen for PublicKey { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - PublicKey::large_unique_sequence(estimator, 1, cache) - .into_iter() - .next() - .unwrap() - } -} - -// Dummy implementation to replace the buggy real one below: -impl LargeUniqueSequence for PublicKey -where - E: SizeEstimator, -{ - fn large_unique_sequence(estimator: &E, count: usize, cache: &mut Cache) -> BTreeSet { - let data_vec = cache.get_all::(); - - /// Generates a secret key from a fixed, numbered seed. - fn generate_key(estimator: &E, seed: usize) -> PublicKey { - // Like `Signature`, we do not wish to pollute the types crate here. - #[derive(Copy, Clone, Debug, EnumIter)] - enum PublicKeyDiscriminants { - System, - Ed25519, - Secp256k1, - } - largest_variant::(estimator, |variant| { - // We take advantage of two things here: - // - // 1. The required seed bytes for Ed25519 and Secp256k1 are both the same length of - // 32 bytes. - // 2. While Secp256k1 does not allow the most trivial seed bytes of 0x00..0001, a - // a hash function output seems to satisfy it, and our current hashing scheme - // also output 32 bytes. - let seed_bytes = Digest::hash(seed.to_be_bytes()).value(); - - match variant { - PublicKeyDiscriminants::System => PublicKey::system(), - PublicKeyDiscriminants::Ed25519 => { - let ed25519_sec = SecretKey::ed25519_from_bytes(seed_bytes) - .expect("unable to create ed25519 key from seed bytes"); - PublicKey::from(&ed25519_sec) - } - PublicKeyDiscriminants::Secp256k1 => { - let secp256k1_sec = SecretKey::secp256k1_from_bytes(seed_bytes) - .expect("unable to create secp256k1 key from seed bytes"); - PublicKey::from(&secp256k1_sec) - } - } - }) - } - - while data_vec.len() < count { - let seed = data_vec.len(); - let key = generate_key(estimator, seed); - data_vec.push(Box::new(key)); - } - - debug_assert!(data_vec.len() >= count); - let output_set: BTreeSet = data_vec[..count] - .iter() - .map(|item| item.downcast_ref::().expect("cache corrupted")) - .cloned() - .collect(); - debug_assert_eq!(output_set.len(), count); - - output_set - } -} - -impl LargeUniqueSequence for Digest -where - E: SizeEstimator, -{ - fn large_unique_sequence(_estimator: &E, count: usize, _cache: &mut Cache) -> BTreeSet { - (0..count).map(|n| Digest::hash(n.to_ne_bytes())).collect() - } -} - -impl LargestSpecimen for Signature { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - if let Some(item) = cache.get::() { - return *item; - } - - // Note: We do not use strum generated discriminator enums for the signature, as we do not - // want to make `strum` a direct dependency of `casper-types`, to keep its size down. - #[derive(Debug, Copy, Clone, EnumIter)] - enum SignatureDiscriminants { - System, - Ed25519, - Secp256k1, - } - - *cache.set(largest_variant::( - estimator, - |variant| match variant { - SignatureDiscriminants::System => Signature::system(), - SignatureDiscriminants::Ed25519 => { - let ed25519_sec = &SecretKey::generate_ed25519().expect("a correct secret"); - - sign([0_u8], ed25519_sec, &ed25519_sec.into()) - } - SignatureDiscriminants::Secp256k1 => { - let secp256k1_sec = &SecretKey::generate_secp256k1().expect("a correct secret"); - - sign([0_u8], secp256k1_sec, &secp256k1_sec.into()) - } - }, - )) - } -} - -impl LargestSpecimen for EraId { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - EraId::new(LargestSpecimen::largest_specimen(estimator, cache)) - } -} - -impl LargestSpecimen for Timestamp { - fn largest_specimen(_estimator: &E, _cache: &mut Cache) -> Self { - const MAX_TIMESTAMP_HUMAN_READABLE: u64 = 253_402_300_799; - Timestamp::from(MAX_TIMESTAMP_HUMAN_READABLE) - } -} - -impl LargestSpecimen for TimeDiff { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - TimeDiff::from_millis(LargestSpecimen::largest_specimen(estimator, cache)) - } -} - -impl LargestSpecimen for Block { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - Block::new( - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - Some(btree_map_distinct_from_prop( - estimator, - "validator_count", - cache, - )), - LargestSpecimen::largest_specimen(estimator, cache), - ) - .expect("did not expect largest specimen creation of block to fail") - } -} - -impl LargestSpecimen for FinalizedBlock { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - FinalizedBlock::new( - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - ) - } -} - -impl LargestSpecimen for FinalitySignature { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - FinalitySignature::new( - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - ) - } -} - -impl LargestSpecimen for FinalitySignatureId { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - FinalitySignatureId { - block_hash: LargestSpecimen::largest_specimen(estimator, cache), - era_id: LargestSpecimen::largest_specimen(estimator, cache), - public_key: LargestSpecimen::largest_specimen(estimator, cache), - } - } -} - -impl LargestSpecimen for EraReport { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - EraReport { - equivocators: vec_prop_specimen(estimator, "validator_count", cache), - rewards: btree_map_distinct_from_prop(estimator, "validator_count", cache), - inactive_validators: vec_prop_specimen(estimator, "validator_count", cache), - } - } -} - -impl LargestSpecimen for BlockHash { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - BlockHash::new(LargestSpecimen::largest_specimen(estimator, cache)) - } -} - -// impls for `casper_hashing`, which is technically a foreign crate -- so we put them here. -impl LargestSpecimen for Digest { - fn largest_specimen(_estimator: &E, _cache: &mut Cache) -> Self { - // Hashes are fixed size by definition, so any value will do. - Digest::hash("") - } -} - -impl LargestSpecimen for BlockPayload { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - // We cannot just use the standard largest specimen for `DeployHashWithApprovals`, as this - // would cause a quadratic increase in deploys. Instead, we generate one large deploy that - // contains the number of approvals if they are spread out across the block. - - let large_deploy = Deploy::largest_specimen(estimator, cache).with_approvals( - btree_set_distinct_from_prop(estimator, "average_approvals_per_deploy_in_block", cache), - ); - let large_deploy_hash_with_approvals = DeployHashWithApprovals::from(&large_deploy); - - let deploys = vec![ - large_deploy_hash_with_approvals.clone(); - estimator.parameter::("max_deploys_per_block") - ]; - let transfers = vec![ - large_deploy_hash_with_approvals; - estimator.parameter::("max_transfers_per_block") - ]; - - BlockPayload::new( - deploys, - transfers, - vec_prop_specimen(estimator, "max_accusations_per_block", cache), - LargestSpecimen::largest_specimen(estimator, cache), - ) - } -} - -impl LargestSpecimen for DeployHashWithApprovals { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - // Note: This is an upper bound, the actual value is lower. We are keeping the order of - // magnitude intact though. - let max_items = estimator.parameter::("max_deploys_per_block") - + estimator.parameter::("max_transfers_per_block"); - DeployHashWithApprovals::new( - LargestSpecimen::largest_specimen(estimator, cache), - btree_set_distinct(estimator, max_items, cache), - ) - } -} - -impl LargestSpecimen for Deploy { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - // Note: Deploys have a maximum size enforced on their serialized representation. A deploy - // generated here is guaranteed to exceed this maximum size due to the session code - // being this maximum size already (see the [`LargestSpecimen`] implementation of - // [`ExecutableDeployItem`]). For this reason, we leave `dependencies` and `payment` - // small. - Deploy::new( - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - Default::default(), // See note. - largest_chain_name(estimator), - LargestSpecimen::largest_specimen(estimator, cache), - ExecutableDeployItem::Transfer { - args: Default::default(), // See note. - }, - &LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - ) - } -} - -impl LargestSpecimen for DeployId { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - DeployId::new( - LargestSpecimen::largest_specimen(estimator, cache), - LargestSpecimen::largest_specimen(estimator, cache), - ) - } -} - -impl LargestSpecimen for ApprovalsHash { - fn largest_specimen(_estimator: &E, _cache: &mut Cache) -> Self { - ApprovalsHash::compute(&Default::default()).expect("empty approvals hash should compute") - } -} - -// EE impls -impl LargestSpecimen for ExecutableDeployItem { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - // `module_bytes` already blows this up to the maximum deploy size, so we use this variant - // as the largest always and don't need to fill in any args. - // - // However, this does not hold true for all encoding schemes: An inefficient encoding can - // easily, via `RuntimeArgs`, result in a much larger encoded size, e.g. when encoding an - // array of 1-byte elements in a format that uses string quoting and a delimiter to seperate - // elements. - // - // We compromise by not supporting encodings this inefficient and add 10 * a 32-bit integer - // as a safety margin for tags and length prefixes. - let max_size_with_margin = - estimator.parameter::("max_deploy_size").max(0) as usize + 10 * 4; - - ExecutableDeployItem::ModuleBytes { - module_bytes: Bytes::from(vec_of_largest_specimen( - estimator, - max_size_with_margin, - cache, - )), - args: RuntimeArgs::new(), - } - } -} - -impl LargestSpecimen for U512 { - fn largest_specimen(_estimator: &E, _cache: &mut Cache) -> Self { - U512::max_value() - } -} - -impl LargestSpecimen for ContractPackageHash { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - ContractPackageHash::new( - [LargestSpecimen::largest_specimen(estimator, cache); KEY_HASH_LENGTH], - ) - } -} - -impl LargestSpecimen for ChunkWithProof { - fn largest_specimen(_estimator: &E, _cache: &mut Cache) -> Self { - ChunkWithProof::new(&[0xFF; Self::CHUNK_SIZE_BYTES], 0) - .expect("the chunk to be correctly created") - } -} - -impl LargestSpecimen for SecretKey { - fn largest_specimen(_estimator: &E, _cache: &mut Cache) -> Self { - SecretKey::ed25519_from_bytes([u8::MAX; 32]).expect("valid secret key bytes") - } -} - -impl LargestSpecimen for ValidatorMap { - fn largest_specimen(estimator: &E, cache: &mut Cache) -> Self { - let max_validators = estimator.parameter("validator_count"); - - ValidatorMap::from_iter( - std::iter::repeat_with(|| LargestSpecimen::largest_specimen(estimator, cache)) - .take(max_validators), - ) - } -} - -/// Returns the largest `Message::GetRequest`. -pub(crate) fn largest_get_request(estimator: &E, cache: &mut Cache) -> Message { - largest_variant::(estimator, |variant| { - match variant { - Tag::Deploy => Message::new_get_request::(&LargestSpecimen::largest_specimen( - estimator, cache, - )), - Tag::LegacyDeploy => Message::new_get_request::( - &LargestSpecimen::largest_specimen(estimator, cache), - ), - Tag::Block => Message::new_get_request::(&LargestSpecimen::largest_specimen( - estimator, cache, - )), - Tag::BlockHeader => Message::new_get_request::( - &LargestSpecimen::largest_specimen(estimator, cache), - ), - Tag::TrieOrChunk => Message::new_get_request::( - &LargestSpecimen::largest_specimen(estimator, cache), - ), - Tag::FinalitySignature => Message::new_get_request::( - &LargestSpecimen::largest_specimen(estimator, cache), - ), - Tag::SyncLeap => Message::new_get_request::( - &LargestSpecimen::largest_specimen(estimator, cache), - ), - Tag::ApprovalsHashes => Message::new_get_request::( - &LargestSpecimen::largest_specimen(estimator, cache), - ), - Tag::BlockExecutionResults => Message::new_get_request::( - &LargestSpecimen::largest_specimen(estimator, cache), - ), - } - .expect("did not expect new_get_request from largest deploy to fail") - }) -} - -/// Returns the largest `Message::GetResponse`. -pub(crate) fn largest_get_response(estimator: &E, cache: &mut Cache) -> Message { - largest_variant::(estimator, |variant| { - match variant { - Tag::Deploy => Message::new_get_response::(&LargestSpecimen::largest_specimen( - estimator, cache, - )), - Tag::LegacyDeploy => Message::new_get_response::( - &LargestSpecimen::largest_specimen(estimator, cache), - ), - Tag::Block => Message::new_get_response::(&LargestSpecimen::largest_specimen( - estimator, cache, - )), - Tag::BlockHeader => Message::new_get_response::( - &LargestSpecimen::largest_specimen(estimator, cache), - ), - Tag::TrieOrChunk => Message::new_get_response::( - &LargestSpecimen::largest_specimen(estimator, cache), - ), - Tag::FinalitySignature => Message::new_get_response::( - &LargestSpecimen::largest_specimen(estimator, cache), - ), - Tag::SyncLeap => Message::new_get_response::( - &LargestSpecimen::largest_specimen(estimator, cache), - ), - Tag::ApprovalsHashes => Message::new_get_response::( - &LargestSpecimen::largest_specimen(estimator, cache), - ), - Tag::BlockExecutionResults => { - Message::new_get_response::( - &LargestSpecimen::largest_specimen(estimator, cache), - ) - } - } - .expect("did not expect new_get_response from largest deploy to fail") - }) -} - -/// Returns the largest string allowed for a chain name. -fn largest_chain_name(estimator: &E) -> String { - string_max_characters(estimator.parameter("network_name_limit")) -} - -/// Returns a string with `len`s characters of the largest possible size. -fn string_max_characters(max_char: usize) -> String { - std::iter::repeat(HIGHEST_UNICODE_CODEPOINT) - .take(max_char) - .collect() -} - -/// Returns the max rounds per era with the specimen parameters. -/// -/// See the [`max_rounds_per_era`] function. -pub(crate) fn estimator_max_rounds_per_era(estimator: &impl SizeEstimator) -> usize { - let minimum_era_height = estimator.parameter("minimum_era_height"); - let era_duration_ms = TimeDiff::from_millis(estimator.parameter("era_duration_ms")); - let minimum_round_length_ms = - TimeDiff::from_millis(estimator.parameter("minimum_round_length_ms")); - - max_rounds_per_era(minimum_era_height, era_duration_ms, minimum_round_length_ms) - .try_into() - .expect("to be a valid `usize`") -} - -#[cfg(test)] -mod tests { - use super::Cache; - - #[test] - fn memoization_cache_simple() { - let mut cache = Cache::default(); - - assert!(cache.get::().is_none()); - assert!(cache.get::().is_none()); - - cache.set::(1234); - assert_eq!(cache.get::(), Some(&1234)); - - cache.set::("a string is not copy".to_owned()); - assert_eq!( - cache.get::().map(String::as_str), - Some("a string is not copy") - ); - assert_eq!(cache.get::(), Some(&1234)); - - cache.set::("this should not overwrite".to_owned()); - assert_eq!( - cache.get::().map(String::as_str), - Some("a string is not copy") - ); - } -} diff --git a/resources/local/chainspec.toml.in b/resources/local/chainspec.toml.in index a7339c5361..1782ace02a 100644 --- a/resources/local/chainspec.toml.in +++ b/resources/local/chainspec.toml.in @@ -18,9 +18,20 @@ activation_point = '${TIMESTAMP}' # contributing to the seeding of the pseudo-random number generator used in contract-runtime for computing genesis # post-state hash. name = 'casper-example' -# The maximum size of an acceptable networking message in bytes. Any message larger than this will +# The maximum size of an acceptable handshake message in bytes. Any handshake larger than this will # be rejected at the networking level. -maximum_net_message_size = 25_165_824 +maximum_handshake_message_size = 1_048_576 +# The maximum frame size for network transport. +maximum_frame_size = 4096 + +[network.networking_config] +network = { in_flight_limit = 25, maximum_request_payload_size = 25_165_824, maximum_response_payload_size = 0 } +sync_data_request = { in_flight_limit = 25, maximum_request_payload_size = 25_165_824, maximum_response_payload_size = 0 } +sync_data_responses = { in_flight_limit = 25, maximum_request_payload_size = 25_165_824, maximum_response_payload_size = 0 } +data_requests = { in_flight_limit = 25, maximum_request_payload_size = 25_165_824, maximum_response_payload_size = 0 } +data_responses = { in_flight_limit = 25, maximum_request_payload_size = 25_165_824, maximum_response_payload_size = 0 } +consensus = { in_flight_limit = 25, maximum_request_payload_size = 25_165_824, maximum_response_payload_size = 0 } +bulk_gossip = { in_flight_limit = 25, maximum_request_payload_size = 25_165_824, maximum_response_payload_size = 0 } [core] # Era duration. @@ -233,7 +244,7 @@ provision_contract_user_group_uref = { cost = 200, arguments = [0, 0, 0, 0, 0] } put_key = { cost = 38_000, arguments = [0, 1_100, 0, 0] } read_host_buffer = { cost = 3_500, arguments = [0, 310, 0] } read_value = { cost = 6_000, arguments = [0, 0, 0] } -read_value_local = { cost = 5_500, arguments = [0, 590, 0] } +dictionary_get = { cost = 5_500, arguments = [0, 590, 0] } remove_associated_key = { cost = 4_200, arguments = [0, 0] } remove_contract_user_group = { cost = 200, arguments = [0, 0, 0, 0] } remove_contract_user_group_urefs = { cost = 200, arguments = [0, 0, 0, 0, 0, 0] } @@ -246,7 +257,7 @@ transfer_from_purse_to_purse = { cost = 82_000, arguments = [0, 0, 0, 0, 0, 0, 0 transfer_to_account = { cost = 2_500_000_000, arguments = [0, 0, 0, 0, 0, 0, 0] } update_associated_key = { cost = 4_200, arguments = [0, 0, 0] } write = { cost = 14_000, arguments = [0, 0, 0, 980] } -write_local = { cost = 9_500, arguments = [0, 1_800, 0, 520] } +dictionary_put = { cost = 9_500, arguments = [0, 1_800, 0, 520] } [system_costs] wasmless_transfer_cost = 100_000_000 @@ -272,6 +283,7 @@ mint = 2_500_000_000 reduce_total_supply = 10_000 create = 2_500_000_000 balance = 10_000 +burn = 10_000 transfer = 10_000 read_base_round_reward = 10_000 mint_into_existing_purse = 2_500_000_000 diff --git a/resources/local/config.toml b/resources/local/config.toml index de15d0a729..b4dd1c119f 100644 --- a/resources/local/config.toml +++ b/resources/local/config.toml @@ -189,6 +189,15 @@ bind_address = '0.0.0.0:34553' # one connection. known_addresses = ['127.0.0.1:34553'] +# TLS keylog location +# +# If set, the node will write all keys generated during all TLS connections to the given file path. +# This option is intended for debugging only, do NOT enable this on production systems. +# +# The specified location will be appended to, even across node restarts, so it may grow large if +# unattended. +# keylog_path = "/path/to/keylog" + # Minimum number of fully-connected peers to consider network component initialized. min_peers_for_initialization = 3 @@ -199,54 +208,30 @@ gossip_interval = '30 seconds' # more than the expected time required for initial connections to complete. initial_gossip_delay = '5 seconds' -# How long a connection is allowed to be stuck as pending before it is abandoned. -max_addr_pending_time = '1 minute' - # Maximum time allowed for a connection handshake between two nodes to be completed. Connections # exceeding this threshold are considered unlikely to be healthy or even malicious and thus # terminated. handshake_timeout = '20 seconds' -# Maximum number of incoming connections per unique peer allowed. If the limit is hit, additional -# connections will be rejected. A value of `0` means unlimited. -max_incoming_peer_connections = 3 - -# The maximum total of upstream bandwidth in bytes per second allocated to non-validating peers. -# A value of `0` means unlimited. -max_outgoing_byte_rate_non_validators = 0 +# Timeout before giving up on a peer. If a peer exceeds this time limit for acknowledging or +# responding to a received message, it is considered unresponsive and the connection severed. +ack_timeout = '30sec' -# The maximum allowed total impact of requests from non-validating peers per second answered. -# A value of `0` means unlimited. -max_incoming_message_rate_non_validators = 0 - -# Maximum number of requests for data from a single peer that are allowed be buffered. A value of -# `0` means unlimited. -max_in_flight_demands = 50 +# How long peers remain blocked after they get blocklisted. +blocklist_retain_duration = '1 minute' -# Version threshold to enable tarpit for. -# -# When set to a version (the value may be `null` to disable the feature), any peer that reports a -# protocol version equal or below the threshold will be rejected only after holding open the -# connection for a specific (`tarpit_duration`) amount of time. -# -# This option makes most sense to enable on known nodes with addresses where legacy nodes that are -# still in operation are connecting to, as these older versions will only attempt to reconnect to -# other nodes once they have exhausted their set of known nodes. -tarpit_version_threshold = '1.2.1' +# Whether or not to consider a connection stuck after a single request times out, causing a termination and reconnection. +# It is recommended to set this to `true` unless network connectivity issues are being troubleshot. +bubble_timeouts = true -# How long to hold connections to trapped legacy nodes. -tarpit_duration = '10 minutes' +# The maximum time a peer is allowed to take to receive a fatal error. +error_timeout = '10 seconds' -# The probability [0.0, 1.0] of this node trapping a legacy node. -# -# Since older nodes will only reconnect if all their options are exhausted, it is sufficient for a -# single known node to hold open a connection to prevent the node from reconnecting. This should be -# set to `1/n` or higher, with `n` being the number of known nodes expected in the configuration of -# legacy nodes running this software. -tarpit_chance = 0.2 +# Whether to restrict broadcasts of values most likely only relevant for validators to only those. +use_validator_broadcast = true -# How long peers remain blocked after they get blocklisted. -blocklist_retain_duration = '1 minute' +# Whether to enable the use of optimized gossip peer selection for a subset of items. +use_mixed_gossip = false # Identity of a node # @@ -257,44 +242,44 @@ blocklist_retain_duration = '1 minute' # secret_key = "local_node.pem" # ca_certificate = "ca_cert.pem" -# Weights for impact estimation of incoming messages, used in combination with -# `max_incoming_message_rate_non_validators`. -# -# Any weight set to 0 means that the category of traffic is exempt from throttling. -[network.estimator_weights] -consensus = 0 -block_gossip = 1 -deploy_gossip = 0 -finality_signature_gossip = 1 -address_gossip = 0 -finality_signature_broadcasts = 0 -deploy_requests = 1 -deploy_responses = 0 -legacy_deploy_requests = 1 -legacy_deploy_responses = 0 -block_requests = 1 -block_responses = 0 -block_header_requests = 1 -block_header_responses = 0 -trie_requests = 1 -trie_responses = 0 -finality_signature_requests = 1 -finality_signature_responses = 0 -sync_leap_requests = 1 -sync_leap_responses = 0 -approvals_hashes_requests = 1 -approvals_hashes_responses = 0 -execution_results_requests = 1 -execution_results_responses = 0 -# Identity of a node -# -# When this section is not specified, an identity will be generated when the node process starts with a self-signed certifcate. -# This option makes sense for some private chains where for security reasons joining new nodes is restricted. -# [network.identity] -# tls_certificate = "local_node_cert.pem" -# secret_key = "local_node.pem" -# ca_certificate = "ca_cert.pem" +# ================================================ +# Configuration options for the connection manager +# ================================================ +[network.conman] + +# The timeout for a single underlying TCP connection to be established. +tcp_connect_timeout = '10 seconds' + +# Maximum time allowed for TLS setup and handshaking to proceed. +setup_timeout = '10 seconds' + +# How often to reattempt a connection. +tcp_connect_attempts = 8 + +# Base delay for the backoff, grows exponentially until `tcp_connect_attempts` maxes out. +tcp_connect_base_backoff = '1 second' + +# How long to back off from reconnecting to an address after a failure that indicates a +# significant problem. +significant_error_backoff = '60 seconds' + +# How long to back off from reconnecting to an address if the error is likely not going to +# change for a long time. +permanent_error_backoff = '10 minutes' + +# How long to wait before reconnecting when a succesful outgoing connection is lost. +successful_reconnect_delay = '1 second' + +# The minimum time a connection must have successfully served data to not be seen as flaky. +flaky_connection_threshold = '1 minute' + +# Number of incoming connections before refusing to accept any new ones. +max_incoming_connections = 10000 + +# Number of outgoing connections before stopping to connect to learned addresses. +max_outgoing_connections = 10000 + # ================================================== # Configuration options for the JSON-RPC HTTP server @@ -543,10 +528,10 @@ disconnect_dishonest_peers_interval = '10 seconds' latch_reset_interval = '5 seconds' -# ============================================= -# Configuration options for the block validator -# ============================================= -[block_validator] +# ====================================================== +# Configuration options for the proposed block validator +# ====================================================== +[proposed_block_validator] # Maximum number of completed entries to retain. # diff --git a/resources/metrics-1.5.txt b/resources/metrics-1.5.txt new file mode 100644 index 0000000000..7c7525443f --- /dev/null +++ b/resources/metrics-1.5.txt @@ -0,0 +1,808 @@ +# HELP accumulated_incoming_limiter_delay seconds spent delaying incoming traffic from non-validators due to limiter, in seconds. +# TYPE accumulated_incoming_limiter_delay counter +accumulated_incoming_limiter_delay 0 +# HELP accumulated_outgoing_limiter_delay seconds spent delaying outgoing traffic to non-validators due to limiter, in seconds +# TYPE accumulated_outgoing_limiter_delay counter +accumulated_outgoing_limiter_delay 0 +# HELP address_gossiper_items_received number of items received by the address_gossiper +# TYPE address_gossiper_items_received counter +address_gossiper_items_received 3 +# HELP address_gossiper_table_items_current number of items in the gossip table of address_gossiper in state current +# TYPE address_gossiper_table_items_current gauge +address_gossiper_table_items_current 0 +# HELP address_gossiper_table_items_finished number of items in the gossip table of address_gossiper in state finished +# TYPE address_gossiper_table_items_finished gauge +address_gossiper_table_items_finished 1 +# HELP address_gossiper_times_gossiped number of times the address_gossiper sent gossip requests to peers +# TYPE address_gossiper_times_gossiped counter +address_gossiper_times_gossiped 0 +# HELP address_gossiper_times_ran_out_of_peers number of times the address_gossiper ran out of peers and had to pause +# TYPE address_gossiper_times_ran_out_of_peers counter +address_gossiper_times_ran_out_of_peers 3 +# HELP allocated_ram_bytes total allocated ram in bytes +# TYPE allocated_ram_bytes gauge +allocated_ram_bytes 0 +# HELP amount_of_blocks the number of blocks finalized so far +# TYPE amount_of_blocks gauge +amount_of_blocks 0 +# HELP approvals_hashes_fetch_total number of approvals_hashes all fetch requests made +# TYPE approvals_hashes_fetch_total counter +approvals_hashes_fetch_total 0 +# HELP approvals_hashes_found_in_storage number of fetch requests that found approvals_hashes in local storage +# TYPE approvals_hashes_found_in_storage counter +approvals_hashes_found_in_storage 0 +# HELP approvals_hashes_found_on_peer number of fetch requests that fetched approvals_hashes from peer +# TYPE approvals_hashes_found_on_peer counter +approvals_hashes_found_on_peer 0 +# HELP approvals_hashes_timeouts number of approvals_hashes fetch requests that timed out +# TYPE approvals_hashes_timeouts counter +approvals_hashes_timeouts 0 +# HELP block_accumulator_block_acceptors number of block acceptors in the Block Accumulator +# TYPE block_accumulator_block_acceptors gauge +block_accumulator_block_acceptors 0 +# HELP block_accumulator_known_child_blocks number of blocks received by the Block Accumulator for which we know the hash of the child block +# TYPE block_accumulator_known_child_blocks gauge +block_accumulator_known_child_blocks 0 +# HELP block_execution_results_or_chunk_fetcher_fetch_total number of block_execution_results_or_chunk_fetcher all fetch requests made +# TYPE block_execution_results_or_chunk_fetcher_fetch_total counter +block_execution_results_or_chunk_fetcher_fetch_total 0 +# HELP block_execution_results_or_chunk_fetcher_found_in_storage number of fetch requests that found block_execution_results_or_chunk_fetcher in local storage +# TYPE block_execution_results_or_chunk_fetcher_found_in_storage counter +block_execution_results_or_chunk_fetcher_found_in_storage 0 +# HELP block_execution_results_or_chunk_fetcher_found_on_peer number of fetch requests that fetched block_execution_results_or_chunk_fetcher from peer +# TYPE block_execution_results_or_chunk_fetcher_found_on_peer counter +block_execution_results_or_chunk_fetcher_found_on_peer 0 +# HELP block_execution_results_or_chunk_fetcher_timeouts number of block_execution_results_or_chunk_fetcher fetch requests that timed out +# TYPE block_execution_results_or_chunk_fetcher_timeouts counter +block_execution_results_or_chunk_fetcher_timeouts 0 +# HELP block_fetch_total number of block all fetch requests made +# TYPE block_fetch_total counter +block_fetch_total 0 +# HELP block_found_in_storage number of fetch requests that found block in local storage +# TYPE block_found_in_storage counter +block_found_in_storage 0 +# HELP block_found_on_peer number of fetch requests that fetched block from peer +# TYPE block_found_on_peer counter +block_found_on_peer 0 +# HELP block_gossiper_items_received number of items received by the block_gossiper +# TYPE block_gossiper_items_received counter +block_gossiper_items_received 0 +# HELP block_gossiper_table_items_current number of items in the gossip table of block_gossiper in state current +# TYPE block_gossiper_table_items_current gauge +block_gossiper_table_items_current 0 +# HELP block_gossiper_table_items_finished number of items in the gossip table of block_gossiper in state finished +# TYPE block_gossiper_table_items_finished gauge +block_gossiper_table_items_finished 0 +# HELP block_gossiper_times_gossiped number of times the block_gossiper sent gossip requests to peers +# TYPE block_gossiper_times_gossiped counter +block_gossiper_times_gossiped 0 +# HELP block_gossiper_times_ran_out_of_peers number of times the block_gossiper ran out of peers and had to pause +# TYPE block_gossiper_times_ran_out_of_peers counter +block_gossiper_times_ran_out_of_peers 0 +# HELP block_header_fetch_total number of block_header all fetch requests made +# TYPE block_header_fetch_total counter +block_header_fetch_total 0 +# HELP block_header_found_in_storage number of fetch requests that found block_header in local storage +# TYPE block_header_found_in_storage counter +block_header_found_in_storage 0 +# HELP block_header_found_on_peer number of fetch requests that fetched block_header from peer +# TYPE block_header_found_on_peer counter +block_header_found_on_peer 0 +# HELP block_header_timeouts number of block_header fetch requests that timed out +# TYPE block_header_timeouts counter +block_header_timeouts 0 +# HELP block_timeouts number of block fetch requests that timed out +# TYPE block_timeouts counter +block_timeouts 0 +# HELP chain_height highest complete block (DEPRECATED) +# TYPE chain_height gauge +chain_height 0 +# HELP consensus_current_era the current era in consensus +# TYPE consensus_current_era gauge +consensus_current_era 0 +# HELP consumed_ram_bytes total consumed ram in bytes +# TYPE consumed_ram_bytes gauge +consumed_ram_bytes 0 +# HELP contract_runtime_apply_commit time in seconds to commit the execution effects of a contract +# TYPE contract_runtime_apply_commit histogram +contract_runtime_apply_commit_bucket{le="0.01"} 0 +contract_runtime_apply_commit_bucket{le="0.02"} 0 +contract_runtime_apply_commit_bucket{le="0.04"} 0 +contract_runtime_apply_commit_bucket{le="0.08"} 0 +contract_runtime_apply_commit_bucket{le="0.16"} 0 +contract_runtime_apply_commit_bucket{le="0.32"} 0 +contract_runtime_apply_commit_bucket{le="0.64"} 0 +contract_runtime_apply_commit_bucket{le="1.28"} 0 +contract_runtime_apply_commit_bucket{le="2.56"} 0 +contract_runtime_apply_commit_bucket{le="5.12"} 0 +contract_runtime_apply_commit_bucket{le="+Inf"} 0 +contract_runtime_apply_commit_sum 0 +contract_runtime_apply_commit_count 0 +# HELP contract_runtime_commit_step time in seconds to commit the step at era end +# TYPE contract_runtime_commit_step histogram +contract_runtime_commit_step_bucket{le="0.01"} 0 +contract_runtime_commit_step_bucket{le="0.02"} 0 +contract_runtime_commit_step_bucket{le="0.04"} 0 +contract_runtime_commit_step_bucket{le="0.08"} 0 +contract_runtime_commit_step_bucket{le="0.16"} 0 +contract_runtime_commit_step_bucket{le="0.32"} 0 +contract_runtime_commit_step_bucket{le="0.64"} 0 +contract_runtime_commit_step_bucket{le="1.28"} 0 +contract_runtime_commit_step_bucket{le="2.56"} 0 +contract_runtime_commit_step_bucket{le="5.12"} 0 +contract_runtime_commit_step_bucket{le="+Inf"} 0 +contract_runtime_commit_step_sum 0 +contract_runtime_commit_step_count 0 +# HELP contract_runtime_commit_upgrade time in seconds to commit an upgrade +# TYPE contract_runtime_commit_upgrade histogram +contract_runtime_commit_upgrade_bucket{le="0.01"} 0 +contract_runtime_commit_upgrade_bucket{le="0.02"} 0 +contract_runtime_commit_upgrade_bucket{le="0.04"} 0 +contract_runtime_commit_upgrade_bucket{le="0.08"} 0 +contract_runtime_commit_upgrade_bucket{le="0.16"} 0 +contract_runtime_commit_upgrade_bucket{le="0.32"} 0 +contract_runtime_commit_upgrade_bucket{le="0.64"} 0 +contract_runtime_commit_upgrade_bucket{le="1.28"} 0 +contract_runtime_commit_upgrade_bucket{le="2.56"} 0 +contract_runtime_commit_upgrade_bucket{le="5.12"} 0 +contract_runtime_commit_upgrade_bucket{le="+Inf"} 0 +contract_runtime_commit_upgrade_sum 0 +contract_runtime_commit_upgrade_count 0 +# HELP contract_runtime_execute_block time in seconds to execute all deploys in a block +# TYPE contract_runtime_execute_block histogram +contract_runtime_execute_block_bucket{le="0.01"} 0 +contract_runtime_execute_block_bucket{le="0.02"} 0 +contract_runtime_execute_block_bucket{le="0.04"} 0 +contract_runtime_execute_block_bucket{le="0.08"} 0 +contract_runtime_execute_block_bucket{le="0.16"} 0 +contract_runtime_execute_block_bucket{le="0.32"} 0 +contract_runtime_execute_block_bucket{le="0.64"} 0 +contract_runtime_execute_block_bucket{le="1.28"} 0 +contract_runtime_execute_block_bucket{le="2.56"} 0 +contract_runtime_execute_block_bucket{le="5.12"} 0 +contract_runtime_execute_block_bucket{le="+Inf"} 0 +contract_runtime_execute_block_sum 0 +contract_runtime_execute_block_count 0 +# HELP contract_runtime_get_balance time in seconds to get the balance of a purse from global state +# TYPE contract_runtime_get_balance histogram +contract_runtime_get_balance_bucket{le="0.01"} 0 +contract_runtime_get_balance_bucket{le="0.02"} 0 +contract_runtime_get_balance_bucket{le="0.04"} 0 +contract_runtime_get_balance_bucket{le="0.08"} 0 +contract_runtime_get_balance_bucket{le="0.16"} 0 +contract_runtime_get_balance_bucket{le="0.32"} 0 +contract_runtime_get_balance_bucket{le="0.64"} 0 +contract_runtime_get_balance_bucket{le="1.28"} 0 +contract_runtime_get_balance_bucket{le="2.56"} 0 +contract_runtime_get_balance_bucket{le="5.12"} 0 +contract_runtime_get_balance_bucket{le="+Inf"} 0 +contract_runtime_get_balance_sum 0 +contract_runtime_get_balance_count 0 +# HELP contract_runtime_get_bids time in seconds to get bids from global state +# TYPE contract_runtime_get_bids histogram +contract_runtime_get_bids_bucket{le="0.01"} 0 +contract_runtime_get_bids_bucket{le="0.02"} 0 +contract_runtime_get_bids_bucket{le="0.04"} 0 +contract_runtime_get_bids_bucket{le="0.08"} 0 +contract_runtime_get_bids_bucket{le="0.16"} 0 +contract_runtime_get_bids_bucket{le="0.32"} 0 +contract_runtime_get_bids_bucket{le="0.64"} 0 +contract_runtime_get_bids_bucket{le="1.28"} 0 +contract_runtime_get_bids_bucket{le="2.56"} 0 +contract_runtime_get_bids_bucket{le="5.12"} 0 +contract_runtime_get_bids_bucket{le="+Inf"} 0 +contract_runtime_get_bids_sum 0 +contract_runtime_get_bids_count 0 +# HELP contract_runtime_get_era_validators time in seconds to get validators for a given era from global state +# TYPE contract_runtime_get_era_validators histogram +contract_runtime_get_era_validators_bucket{le="0.01"} 0 +contract_runtime_get_era_validators_bucket{le="0.02"} 0 +contract_runtime_get_era_validators_bucket{le="0.04"} 0 +contract_runtime_get_era_validators_bucket{le="0.08"} 0 +contract_runtime_get_era_validators_bucket{le="0.16"} 0 +contract_runtime_get_era_validators_bucket{le="0.32"} 0 +contract_runtime_get_era_validators_bucket{le="0.64"} 0 +contract_runtime_get_era_validators_bucket{le="1.28"} 0 +contract_runtime_get_era_validators_bucket{le="2.56"} 0 +contract_runtime_get_era_validators_bucket{le="5.12"} 0 +contract_runtime_get_era_validators_bucket{le="+Inf"} 0 +contract_runtime_get_era_validators_sum 0 +contract_runtime_get_era_validators_count 0 +# HELP contract_runtime_get_trie time in seconds to get a trie +# TYPE contract_runtime_get_trie histogram +contract_runtime_get_trie_bucket{le="0.001"} 0 +contract_runtime_get_trie_bucket{le="0.002"} 0 +contract_runtime_get_trie_bucket{le="0.004"} 0 +contract_runtime_get_trie_bucket{le="0.008"} 0 +contract_runtime_get_trie_bucket{le="0.016"} 0 +contract_runtime_get_trie_bucket{le="0.032"} 0 +contract_runtime_get_trie_bucket{le="0.064"} 0 +contract_runtime_get_trie_bucket{le="0.128"} 0 +contract_runtime_get_trie_bucket{le="0.256"} 0 +contract_runtime_get_trie_bucket{le="0.512"} 0 +contract_runtime_get_trie_bucket{le="+Inf"} 0 +contract_runtime_get_trie_sum 0 +contract_runtime_get_trie_count 0 +# HELP contract_runtime_latest_commit_step duration in seconds of latest commit step at era end +# TYPE contract_runtime_latest_commit_step gauge +contract_runtime_latest_commit_step 0 +# HELP contract_runtime_put_trie time in seconds to put a trie +# TYPE contract_runtime_put_trie histogram +contract_runtime_put_trie_bucket{le="0.001"} 0 +contract_runtime_put_trie_bucket{le="0.002"} 0 +contract_runtime_put_trie_bucket{le="0.004"} 0 +contract_runtime_put_trie_bucket{le="0.008"} 0 +contract_runtime_put_trie_bucket{le="0.016"} 0 +contract_runtime_put_trie_bucket{le="0.032"} 0 +contract_runtime_put_trie_bucket{le="0.064"} 0 +contract_runtime_put_trie_bucket{le="0.128"} 0 +contract_runtime_put_trie_bucket{le="0.256"} 0 +contract_runtime_put_trie_bucket{le="0.512"} 0 +contract_runtime_put_trie_bucket{le="+Inf"} 0 +contract_runtime_put_trie_sum 0 +contract_runtime_put_trie_count 0 +# HELP contract_runtime_run_execute time in seconds to execute but not commit a contract +# TYPE contract_runtime_run_execute histogram +contract_runtime_run_execute_bucket{le="0.01"} 0 +contract_runtime_run_execute_bucket{le="0.02"} 0 +contract_runtime_run_execute_bucket{le="0.04"} 0 +contract_runtime_run_execute_bucket{le="0.08"} 0 +contract_runtime_run_execute_bucket{le="0.16"} 0 +contract_runtime_run_execute_bucket{le="0.32"} 0 +contract_runtime_run_execute_bucket{le="0.64"} 0 +contract_runtime_run_execute_bucket{le="1.28"} 0 +contract_runtime_run_execute_bucket{le="2.56"} 0 +contract_runtime_run_execute_bucket{le="5.12"} 0 +contract_runtime_run_execute_bucket{le="+Inf"} 0 +contract_runtime_run_execute_sum 0 +contract_runtime_run_execute_count 0 +# HELP contract_runtime_run_query time in seconds to run a query in global state +# TYPE contract_runtime_run_query histogram +contract_runtime_run_query_bucket{le="0.01"} 0 +contract_runtime_run_query_bucket{le="0.02"} 0 +contract_runtime_run_query_bucket{le="0.04"} 0 +contract_runtime_run_query_bucket{le="0.08"} 0 +contract_runtime_run_query_bucket{le="0.16"} 0 +contract_runtime_run_query_bucket{le="0.32"} 0 +contract_runtime_run_query_bucket{le="0.64"} 0 +contract_runtime_run_query_bucket{le="1.28"} 0 +contract_runtime_run_query_bucket{le="2.56"} 0 +contract_runtime_run_query_bucket{le="5.12"} 0 +contract_runtime_run_query_bucket{le="+Inf"} 0 +contract_runtime_run_query_sum 0 +contract_runtime_run_query_count 0 +# HELP deploy_acceptor_accepted_deploy time in seconds to accept a deploy in the deploy acceptor +# TYPE deploy_acceptor_accepted_deploy histogram +deploy_acceptor_accepted_deploy_bucket{le="10"} 0 +deploy_acceptor_accepted_deploy_bucket{le="20"} 0 +deploy_acceptor_accepted_deploy_bucket{le="40"} 0 +deploy_acceptor_accepted_deploy_bucket{le="80"} 0 +deploy_acceptor_accepted_deploy_bucket{le="160"} 0 +deploy_acceptor_accepted_deploy_bucket{le="320"} 0 +deploy_acceptor_accepted_deploy_bucket{le="640"} 0 +deploy_acceptor_accepted_deploy_bucket{le="1280"} 0 +deploy_acceptor_accepted_deploy_bucket{le="2560"} 0 +deploy_acceptor_accepted_deploy_bucket{le="5120"} 0 +deploy_acceptor_accepted_deploy_bucket{le="+Inf"} 0 +deploy_acceptor_accepted_deploy_sum 0 +deploy_acceptor_accepted_deploy_count 0 +# HELP deploy_acceptor_rejected_deploy time in seconds to reject a deploy in the deploy acceptor +# TYPE deploy_acceptor_rejected_deploy histogram +deploy_acceptor_rejected_deploy_bucket{le="10"} 0 +deploy_acceptor_rejected_deploy_bucket{le="20"} 0 +deploy_acceptor_rejected_deploy_bucket{le="40"} 0 +deploy_acceptor_rejected_deploy_bucket{le="80"} 0 +deploy_acceptor_rejected_deploy_bucket{le="160"} 0 +deploy_acceptor_rejected_deploy_bucket{le="320"} 0 +deploy_acceptor_rejected_deploy_bucket{le="640"} 0 +deploy_acceptor_rejected_deploy_bucket{le="1280"} 0 +deploy_acceptor_rejected_deploy_bucket{le="2560"} 0 +deploy_acceptor_rejected_deploy_bucket{le="5120"} 0 +deploy_acceptor_rejected_deploy_bucket{le="+Inf"} 0 +deploy_acceptor_rejected_deploy_sum 0 +deploy_acceptor_rejected_deploy_count 0 +# HELP deploy_buffer_dead_deploys number of deploys that should not be included in future proposals. +# TYPE deploy_buffer_dead_deploys gauge +deploy_buffer_dead_deploys 0 +# HELP deploy_buffer_held_deploys number of deploys included in in-flight proposed blocks. +# TYPE deploy_buffer_held_deploys gauge +deploy_buffer_held_deploys 0 +# HELP deploy_buffer_total_deploys total number of deploys contained in the deploy buffer. +# TYPE deploy_buffer_total_deploys gauge +deploy_buffer_total_deploys 0 +# HELP deploy_fetch_total number of deploy all fetch requests made +# TYPE deploy_fetch_total counter +deploy_fetch_total 0 +# HELP deploy_found_in_storage number of fetch requests that found deploy in local storage +# TYPE deploy_found_in_storage counter +deploy_found_in_storage 0 +# HELP deploy_found_on_peer number of fetch requests that fetched deploy from peer +# TYPE deploy_found_on_peer counter +deploy_found_on_peer 0 +# HELP deploy_gossiper_items_received number of items received by the deploy_gossiper +# TYPE deploy_gossiper_items_received counter +deploy_gossiper_items_received 0 +# HELP deploy_gossiper_table_items_current number of items in the gossip table of deploy_gossiper in state current +# TYPE deploy_gossiper_table_items_current gauge +deploy_gossiper_table_items_current 0 +# HELP deploy_gossiper_table_items_finished number of items in the gossip table of deploy_gossiper in state finished +# TYPE deploy_gossiper_table_items_finished gauge +deploy_gossiper_table_items_finished 0 +# HELP deploy_gossiper_times_gossiped number of times the deploy_gossiper sent gossip requests to peers +# TYPE deploy_gossiper_times_gossiped counter +deploy_gossiper_times_gossiped 0 +# HELP deploy_gossiper_times_ran_out_of_peers number of times the deploy_gossiper ran out of peers and had to pause +# TYPE deploy_gossiper_times_ran_out_of_peers counter +deploy_gossiper_times_ran_out_of_peers 0 +# HELP deploy_timeouts number of deploy fetch requests that timed out +# TYPE deploy_timeouts counter +deploy_timeouts 0 +# HELP event_dispatch_duration time in nanoseconds to dispatch an event +# TYPE event_dispatch_duration histogram +event_dispatch_duration_bucket{le="100"} 0 +event_dispatch_duration_bucket{le="500"} 0 +event_dispatch_duration_bucket{le="1000"} 0 +event_dispatch_duration_bucket{le="5000"} 4 +event_dispatch_duration_bucket{le="10000"} 4 +event_dispatch_duration_bucket{le="20000"} 4 +event_dispatch_duration_bucket{le="50000"} 9 +event_dispatch_duration_bucket{le="100000"} 20 +event_dispatch_duration_bucket{le="200000"} 45 +event_dispatch_duration_bucket{le="300000"} 78 +event_dispatch_duration_bucket{le="400000"} 126 +event_dispatch_duration_bucket{le="500000"} 200 +event_dispatch_duration_bucket{le="600000"} 247 +event_dispatch_duration_bucket{le="700000"} 271 +event_dispatch_duration_bucket{le="800000"} 274 +event_dispatch_duration_bucket{le="900000"} 276 +event_dispatch_duration_bucket{le="1000000"} 281 +event_dispatch_duration_bucket{le="2000000"} 305 +event_dispatch_duration_bucket{le="5000000"} 315 +event_dispatch_duration_bucket{le="+Inf"} 316 +event_dispatch_duration_sum 183686355 +event_dispatch_duration_count 316 +# HELP execution_queue_size number of blocks that are currently enqueued and waiting for execution +# TYPE execution_queue_size gauge +execution_queue_size 0 +# HELP finality_signature_fetcher_fetch_total number of finality_signature_fetcher all fetch requests made +# TYPE finality_signature_fetcher_fetch_total counter +finality_signature_fetcher_fetch_total 0 +# HELP finality_signature_fetcher_found_in_storage number of fetch requests that found finality_signature_fetcher in local storage +# TYPE finality_signature_fetcher_found_in_storage counter +finality_signature_fetcher_found_in_storage 0 +# HELP finality_signature_fetcher_found_on_peer number of fetch requests that fetched finality_signature_fetcher from peer +# TYPE finality_signature_fetcher_found_on_peer counter +finality_signature_fetcher_found_on_peer 0 +# HELP finality_signature_fetcher_timeouts number of finality_signature_fetcher fetch requests that timed out +# TYPE finality_signature_fetcher_timeouts counter +finality_signature_fetcher_timeouts 0 +# HELP finality_signature_gossiper_items_received number of items received by the finality_signature_gossiper +# TYPE finality_signature_gossiper_items_received counter +finality_signature_gossiper_items_received 0 +# HELP finality_signature_gossiper_table_items_current number of items in the gossip table of finality_signature_gossiper in state current +# TYPE finality_signature_gossiper_table_items_current gauge +finality_signature_gossiper_table_items_current 0 +# HELP finality_signature_gossiper_table_items_finished number of items in the gossip table of finality_signature_gossiper in state finished +# TYPE finality_signature_gossiper_table_items_finished gauge +finality_signature_gossiper_table_items_finished 0 +# HELP finality_signature_gossiper_times_gossiped number of times the finality_signature_gossiper sent gossip requests to peers +# TYPE finality_signature_gossiper_times_gossiped counter +finality_signature_gossiper_times_gossiped 0 +# HELP finality_signature_gossiper_times_ran_out_of_peers number of times the finality_signature_gossiper ran out of peers and had to pause +# TYPE finality_signature_gossiper_times_ran_out_of_peers counter +finality_signature_gossiper_times_ran_out_of_peers 0 +# HELP finalization_time the amount of time, in milliseconds, between proposal and finalization of the latest finalized block +# TYPE finalization_time gauge +finalization_time 0 +# HELP forward_block_sync_duration_seconds duration (in sec) to synchronize a forward block +# TYPE forward_block_sync_duration_seconds histogram +forward_block_sync_duration_seconds_bucket{le="0.05"} 0 +forward_block_sync_duration_seconds_bucket{le="0.08750000000000001"} 0 +forward_block_sync_duration_seconds_bucket{le="0.153125"} 0 +forward_block_sync_duration_seconds_bucket{le="0.26796875000000003"} 0 +forward_block_sync_duration_seconds_bucket{le="0.46894531250000004"} 0 +forward_block_sync_duration_seconds_bucket{le="0.8206542968750001"} 0 +forward_block_sync_duration_seconds_bucket{le="1.4361450195312502"} 0 +forward_block_sync_duration_seconds_bucket{le="2.513253784179688"} 0 +forward_block_sync_duration_seconds_bucket{le="4.398194122314454"} 0 +forward_block_sync_duration_seconds_bucket{le="7.696839714050294"} 0 +forward_block_sync_duration_seconds_bucket{le="+Inf"} 0 +forward_block_sync_duration_seconds_sum 0 +forward_block_sync_duration_seconds_count 0 +# HELP highest_available_block_height highest height of the available block range (the highest contiguous chain of complete blocks) +# TYPE highest_available_block_height gauge +highest_available_block_height 0 +# HELP historical_block_sync_duration_seconds duration (in sec) to synchronize a historical block +# TYPE historical_block_sync_duration_seconds histogram +historical_block_sync_duration_seconds_bucket{le="0.05"} 0 +historical_block_sync_duration_seconds_bucket{le="0.08750000000000001"} 0 +historical_block_sync_duration_seconds_bucket{le="0.153125"} 0 +historical_block_sync_duration_seconds_bucket{le="0.26796875000000003"} 0 +historical_block_sync_duration_seconds_bucket{le="0.46894531250000004"} 0 +historical_block_sync_duration_seconds_bucket{le="0.8206542968750001"} 0 +historical_block_sync_duration_seconds_bucket{le="1.4361450195312502"} 0 +historical_block_sync_duration_seconds_bucket{le="2.513253784179688"} 0 +historical_block_sync_duration_seconds_bucket{le="4.398194122314454"} 0 +historical_block_sync_duration_seconds_bucket{le="7.696839714050294"} 0 +historical_block_sync_duration_seconds_bucket{le="+Inf"} 0 +historical_block_sync_duration_seconds_sum 0 +historical_block_sync_duration_seconds_count 0 +# HELP legacy_deploy_fetch_total number of legacy_deploy all fetch requests made +# TYPE legacy_deploy_fetch_total counter +legacy_deploy_fetch_total 0 +# HELP legacy_deploy_found_in_storage number of fetch requests that found legacy_deploy in local storage +# TYPE legacy_deploy_found_in_storage counter +legacy_deploy_found_in_storage 0 +# HELP legacy_deploy_found_on_peer number of fetch requests that fetched legacy_deploy from peer +# TYPE legacy_deploy_found_on_peer counter +legacy_deploy_found_on_peer 0 +# HELP legacy_deploy_timeouts number of legacy_deploy fetch requests that timed out +# TYPE legacy_deploy_timeouts counter +legacy_deploy_timeouts 0 +# HELP lowest_available_block_height lowest height of the available block range (the highest contiguous chain of complete blocks) +# TYPE lowest_available_block_height gauge +lowest_available_block_height 0 +# HELP mem_address_gossiper address_gossiper memory usage in bytes +# TYPE mem_address_gossiper gauge +mem_address_gossiper 0 +# HELP mem_block_accumulator block accumulator memory usage in bytes +# TYPE mem_block_accumulator gauge +mem_block_accumulator 0 +# HELP mem_block_gossiper block gossiper memory usage in bytes +# TYPE mem_block_gossiper gauge +mem_block_gossiper 0 +# HELP mem_block_synchronizer block synchronizer memory usage in bytes +# TYPE mem_block_synchronizer gauge +mem_block_synchronizer 0 +# HELP mem_block_validator block validator memory usage in bytes +# TYPE mem_block_validator gauge +mem_block_validator 0 +# HELP mem_consensus consensus memory usage in bytes +# TYPE mem_consensus gauge +mem_consensus 0 +# HELP mem_contract_runtime contract runtime memory usage in bytes +# TYPE mem_contract_runtime gauge +mem_contract_runtime 0 +# HELP mem_deploy_acceptor deploy acceptor memory usage in bytes +# TYPE mem_deploy_acceptor gauge +mem_deploy_acceptor 0 +# HELP mem_deploy_buffer deploy buffer memory usage in bytes +# TYPE mem_deploy_buffer gauge +mem_deploy_buffer 0 +# HELP mem_deploy_gossiper deploy gossiper memory usage in bytes +# TYPE mem_deploy_gossiper gauge +mem_deploy_gossiper 0 +# HELP mem_diagnostics_port diagnostics port memory usage in bytes +# TYPE mem_diagnostics_port gauge +mem_diagnostics_port 0 +# HELP mem_estimator_runtime_s time in seconds to estimate memory usage +# TYPE mem_estimator_runtime_s histogram +mem_estimator_runtime_s_bucket{le="0.000000004"} 0 +mem_estimator_runtime_s_bucket{le="0.000000008"} 0 +mem_estimator_runtime_s_bucket{le="0.000000016"} 0 +mem_estimator_runtime_s_bucket{le="0.000000032"} 0 +mem_estimator_runtime_s_bucket{le="0.000000064"} 0 +mem_estimator_runtime_s_bucket{le="0.000000128"} 0 +mem_estimator_runtime_s_bucket{le="0.000000256"} 0 +mem_estimator_runtime_s_bucket{le="0.000000512"} 0 +mem_estimator_runtime_s_bucket{le="0.000001024"} 0 +mem_estimator_runtime_s_bucket{le="0.000002048"} 0 +mem_estimator_runtime_s_bucket{le="0.000004096"} 0 +mem_estimator_runtime_s_bucket{le="0.000008192"} 0 +mem_estimator_runtime_s_bucket{le="0.000016384"} 0 +mem_estimator_runtime_s_bucket{le="0.000032768"} 0 +mem_estimator_runtime_s_bucket{le="0.000065536"} 0 +mem_estimator_runtime_s_bucket{le="0.000131072"} 0 +mem_estimator_runtime_s_bucket{le="0.000262144"} 0 +mem_estimator_runtime_s_bucket{le="0.000524288"} 0 +mem_estimator_runtime_s_bucket{le="0.001048576"} 0 +mem_estimator_runtime_s_bucket{le="0.002097152"} 0 +mem_estimator_runtime_s_bucket{le="0.004194304"} 0 +mem_estimator_runtime_s_bucket{le="0.008388608"} 0 +mem_estimator_runtime_s_bucket{le="0.016777216"} 0 +mem_estimator_runtime_s_bucket{le="0.033554432"} 0 +mem_estimator_runtime_s_bucket{le="0.067108864"} 0 +mem_estimator_runtime_s_bucket{le="0.134217728"} 0 +mem_estimator_runtime_s_bucket{le="0.268435456"} 0 +mem_estimator_runtime_s_bucket{le="0.536870912"} 0 +mem_estimator_runtime_s_bucket{le="1.073741824"} 0 +mem_estimator_runtime_s_bucket{le="2.147483648"} 0 +mem_estimator_runtime_s_bucket{le="4.294967296"} 0 +mem_estimator_runtime_s_bucket{le="8.589934592"} 0 +mem_estimator_runtime_s_bucket{le="+Inf"} 0 +mem_estimator_runtime_s_sum 0 +mem_estimator_runtime_s_count 0 +# HELP mem_event_stream_server event stream server memory usage in bytes +# TYPE mem_event_stream_server gauge +mem_event_stream_server 0 +# HELP mem_fetchers combined fetcher memory usage in bytes +# TYPE mem_fetchers gauge +mem_fetchers 0 +# HELP mem_finality_signature_gossiper finality signature gossiper memory usage in bytes +# TYPE mem_finality_signature_gossiper gauge +mem_finality_signature_gossiper 0 +# HELP mem_metrics metrics memory usage in bytes +# TYPE mem_metrics gauge +mem_metrics 0 +# HELP mem_net network memory usage in bytes +# TYPE mem_net gauge +mem_net 0 +# HELP mem_rest_server rest server memory usage in bytes +# TYPE mem_rest_server gauge +mem_rest_server 0 +# HELP mem_rpc_server rpc server memory usage in bytes +# TYPE mem_rpc_server gauge +mem_rpc_server 0 +# HELP mem_storage storage memory usage in bytes +# TYPE mem_storage gauge +mem_storage 0 +# HELP mem_sync_leaper sync leaper memory usage in bytes +# TYPE mem_sync_leaper gauge +mem_sync_leaper 0 +# HELP mem_total total memory usage in bytes +# TYPE mem_total gauge +mem_total 0 +# HELP mem_upgrade_watcher upgrade watcher memory usage in bytes +# TYPE mem_upgrade_watcher gauge +mem_upgrade_watcher 0 +# HELP net_broadcast_requests number of broadcasting requests +# TYPE net_broadcast_requests counter +net_broadcast_requests 0 +# HELP net_direct_message_requests number of requests to send a message directly to a peer +# TYPE net_direct_message_requests counter +net_direct_message_requests 0 +# HELP net_in_bytes_address_gossip volume in bytes of incoming messages with address gossiper payload +# TYPE net_in_bytes_address_gossip counter +net_in_bytes_address_gossip 0 +# HELP net_in_bytes_block_gossip volume in bytes of incoming messages with block gossiper payload +# TYPE net_in_bytes_block_gossip counter +net_in_bytes_block_gossip 0 +# HELP net_in_bytes_block_transfer volume in bytes of incoming messages with block request/response payload +# TYPE net_in_bytes_block_transfer counter +net_in_bytes_block_transfer 0 +# HELP net_in_bytes_consensus volume in bytes of incoming messages with consensus payload +# TYPE net_in_bytes_consensus counter +net_in_bytes_consensus 0 +# HELP net_in_bytes_deploy_gossip volume in bytes of incoming messages with deploy gossiper payload +# TYPE net_in_bytes_deploy_gossip counter +net_in_bytes_deploy_gossip 0 +# HELP net_in_bytes_deploy_transfer volume in bytes of incoming messages with deploy request/response payload +# TYPE net_in_bytes_deploy_transfer counter +net_in_bytes_deploy_transfer 0 +# HELP net_in_bytes_finality_signature_gossip volume in bytes of incoming messages with finality signature gossiper payload +# TYPE net_in_bytes_finality_signature_gossip counter +net_in_bytes_finality_signature_gossip 0 +# HELP net_in_bytes_other volume in bytes of incoming messages with other payload +# TYPE net_in_bytes_other counter +net_in_bytes_other 0 +# HELP net_in_bytes_protocol volume in bytes of incoming messages that are protocol overhead +# TYPE net_in_bytes_protocol counter +net_in_bytes_protocol 0 +# HELP net_in_bytes_trie_transfer volume in bytes of incoming messages with trie payloads +# TYPE net_in_bytes_trie_transfer counter +net_in_bytes_trie_transfer 0 +# HELP net_in_count_address_gossip count of incoming messages with address gossiper payload +# TYPE net_in_count_address_gossip counter +net_in_count_address_gossip 0 +# HELP net_in_count_block_gossip count of incoming messages with block gossiper payload +# TYPE net_in_count_block_gossip counter +net_in_count_block_gossip 0 +# HELP net_in_count_block_transfer count of incoming messages with block request/response payload +# TYPE net_in_count_block_transfer counter +net_in_count_block_transfer 0 +# HELP net_in_count_consensus count of incoming messages with consensus payload +# TYPE net_in_count_consensus counter +net_in_count_consensus 0 +# HELP net_in_count_deploy_gossip count of incoming messages with deploy gossiper payload +# TYPE net_in_count_deploy_gossip counter +net_in_count_deploy_gossip 0 +# HELP net_in_count_deploy_transfer count of incoming messages with deploy request/response payload +# TYPE net_in_count_deploy_transfer counter +net_in_count_deploy_transfer 0 +# HELP net_in_count_finality_signature_gossip count of incoming messages with finality signature gossiper payload +# TYPE net_in_count_finality_signature_gossip counter +net_in_count_finality_signature_gossip 0 +# HELP net_in_count_other count of incoming messages with other payload +# TYPE net_in_count_other counter +net_in_count_other 0 +# HELP net_in_count_protocol count of incoming messages that are protocol overhead +# TYPE net_in_count_protocol counter +net_in_count_protocol 0 +# HELP net_in_count_trie_transfer count of incoming messages with trie payloads +# TYPE net_in_count_trie_transfer counter +net_in_count_trie_transfer 0 +# HELP net_out_bytes_address_gossip volume in bytes of outgoing messages with address gossiper payload +# TYPE net_out_bytes_address_gossip counter +net_out_bytes_address_gossip 0 +# HELP net_out_bytes_block_gossip volume in bytes of outgoing messages with block gossiper payload +# TYPE net_out_bytes_block_gossip counter +net_out_bytes_block_gossip 0 +# HELP net_out_bytes_block_transfer volume in bytes of outgoing messages with block request/response payload +# TYPE net_out_bytes_block_transfer counter +net_out_bytes_block_transfer 0 +# HELP net_out_bytes_consensus volume in bytes of outgoing messages with consensus payload +# TYPE net_out_bytes_consensus counter +net_out_bytes_consensus 0 +# HELP net_out_bytes_deploy_gossip volume in bytes of outgoing messages with deploy gossiper payload +# TYPE net_out_bytes_deploy_gossip counter +net_out_bytes_deploy_gossip 0 +# HELP net_out_bytes_deploy_transfer volume in bytes of outgoing messages with deploy request/response payload +# TYPE net_out_bytes_deploy_transfer counter +net_out_bytes_deploy_transfer 0 +# HELP net_out_bytes_finality_signature_gossip volume in bytes of outgoing messages with finality signature gossiper payload +# TYPE net_out_bytes_finality_signature_gossip counter +net_out_bytes_finality_signature_gossip 0 +# HELP net_out_bytes_other volume in bytes of outgoing messages with other payload +# TYPE net_out_bytes_other counter +net_out_bytes_other 0 +# HELP net_out_bytes_protocol volume in bytes of outgoing messages that are protocol overhead +# TYPE net_out_bytes_protocol counter +net_out_bytes_protocol 0 +# HELP net_out_bytes_trie_transfer volume in bytes of outgoing messages with trie payloads +# TYPE net_out_bytes_trie_transfer counter +net_out_bytes_trie_transfer 0 +# HELP net_out_count_address_gossip count of outgoing messages with address gossiper payload +# TYPE net_out_count_address_gossip counter +net_out_count_address_gossip 0 +# HELP net_out_count_block_gossip count of outgoing messages with block gossiper payload +# TYPE net_out_count_block_gossip counter +net_out_count_block_gossip 0 +# HELP net_out_count_block_transfer count of outgoing messages with block request/response payload +# TYPE net_out_count_block_transfer counter +net_out_count_block_transfer 0 +# HELP net_out_count_consensus count of outgoing messages with consensus payload +# TYPE net_out_count_consensus counter +net_out_count_consensus 0 +# HELP net_out_count_deploy_gossip count of outgoing messages with deploy gossiper payload +# TYPE net_out_count_deploy_gossip counter +net_out_count_deploy_gossip 0 +# HELP net_out_count_deploy_transfer count of outgoing messages with deploy request/response payload +# TYPE net_out_count_deploy_transfer counter +net_out_count_deploy_transfer 0 +# HELP net_out_count_finality_signature_gossip count of outgoing messages with finality signature gossiper payload +# TYPE net_out_count_finality_signature_gossip counter +net_out_count_finality_signature_gossip 0 +# HELP net_out_count_other count of outgoing messages with other payload +# TYPE net_out_count_other counter +net_out_count_other 0 +# HELP net_out_count_protocol count of outgoing messages that are protocol overhead +# TYPE net_out_count_protocol counter +net_out_count_protocol 0 +# HELP net_out_count_trie_transfer count of outgoing messages with trie payloads +# TYPE net_out_count_trie_transfer counter +net_out_count_trie_transfer 0 +# HELP net_queued_direct_messages number of messages waiting to be sent out +# TYPE net_queued_direct_messages gauge +net_queued_direct_messages 0 +# HELP out_state_blocked number of connections in the blocked state +# TYPE out_state_blocked gauge +out_state_blocked 2 +# HELP out_state_connected number of connections in the connected state +# TYPE out_state_connected gauge +out_state_connected 0 +# HELP out_state_connecting number of connections in the connecting state +# TYPE out_state_connecting gauge +out_state_connecting 0 +# HELP out_state_loopback number of connections in the loopback state +# TYPE out_state_loopback gauge +out_state_loopback 1 +# HELP out_state_waiting number of connections in the waiting state +# TYPE out_state_waiting gauge +out_state_waiting 0 +# HELP peers number of connected peers +# TYPE peers gauge +peers 0 +# HELP requests_for_trie_accepted number of trie requests accepted for processing +# TYPE requests_for_trie_accepted counter +requests_for_trie_accepted 0 +# HELP requests_for_trie_finished number of trie requests finished, successful or not +# TYPE requests_for_trie_finished counter +requests_for_trie_finished 0 +# HELP runner_events running total count of events handled by this reactor +# TYPE runner_events counter +runner_events 317 +# HELP scheduler_queue_api_count current number of events in the reactor api queue +# TYPE scheduler_queue_api_count gauge +scheduler_queue_api_count 0 +# HELP scheduler_queue_consensus_count current number of events in the reactor consensus queue +# TYPE scheduler_queue_consensus_count gauge +scheduler_queue_consensus_count 0 +# HELP scheduler_queue_contract_runtime_count current number of events in the reactor contract_runtime queue +# TYPE scheduler_queue_contract_runtime_count gauge +scheduler_queue_contract_runtime_count 0 +# HELP scheduler_queue_control_count current number of events in the reactor control queue +# TYPE scheduler_queue_control_count gauge +scheduler_queue_control_count 0 +# HELP scheduler_queue_fetch_count current number of events in the reactor fetch queue +# TYPE scheduler_queue_fetch_count gauge +scheduler_queue_fetch_count 0 +# HELP scheduler_queue_finality_signature_count current number of events in the reactor finality_signature queue +# TYPE scheduler_queue_finality_signature_count gauge +scheduler_queue_finality_signature_count 0 +# HELP scheduler_queue_from_storage_count current number of events in the reactor from_storage queue +# TYPE scheduler_queue_from_storage_count gauge +scheduler_queue_from_storage_count 0 +# HELP scheduler_queue_gossip_count current number of events in the reactor gossip queue +# TYPE scheduler_queue_gossip_count gauge +scheduler_queue_gossip_count 0 +# HELP scheduler_queue_network_count current number of events in the reactor network queue +# TYPE scheduler_queue_network_count gauge +scheduler_queue_network_count 0 +# HELP scheduler_queue_network_demands_count current number of events in the reactor network_demands queue +# TYPE scheduler_queue_network_demands_count gauge +scheduler_queue_network_demands_count 0 +# HELP scheduler_queue_network_incoming_count current number of events in the reactor network_incoming queue +# TYPE scheduler_queue_network_incoming_count gauge +scheduler_queue_network_incoming_count 0 +# HELP scheduler_queue_network_info_count current number of events in the reactor network_info queue +# TYPE scheduler_queue_network_info_count gauge +scheduler_queue_network_info_count 0 +# HELP scheduler_queue_network_low_priority_count current number of events in the reactor network_low_priority queue +# TYPE scheduler_queue_network_low_priority_count gauge +scheduler_queue_network_low_priority_count 0 +# HELP scheduler_queue_regular_count current number of events in the reactor regular queue +# TYPE scheduler_queue_regular_count gauge +scheduler_queue_regular_count 0 +# HELP scheduler_queue_sync_global_state_count current number of events in the reactor sync_global_state queue +# TYPE scheduler_queue_sync_global_state_count gauge +scheduler_queue_sync_global_state_count 0 +# HELP scheduler_queue_to_storage_count current number of events in the reactor to_storage queue +# TYPE scheduler_queue_to_storage_count gauge +scheduler_queue_to_storage_count 0 +# HELP scheduler_queue_total_count current total number of events in all reactor queues +# TYPE scheduler_queue_total_count gauge +scheduler_queue_total_count 0 +# HELP scheduler_queue_validation_count current number of events in the reactor validation queue +# TYPE scheduler_queue_validation_count gauge +scheduler_queue_validation_count 0 +# HELP sync_leap_cant_fetch_total number of sync leap requests that couldn't be fetched from peers +# TYPE sync_leap_cant_fetch_total counter +sync_leap_cant_fetch_total 0 +# HELP sync_leap_duration_seconds duration (in sec) to perform a successful sync leap +# TYPE sync_leap_duration_seconds histogram +sync_leap_duration_seconds_bucket{le="1"} 0 +sync_leap_duration_seconds_bucket{le="2"} 0 +sync_leap_duration_seconds_bucket{le="3"} 0 +sync_leap_duration_seconds_bucket{le="4"} 0 +sync_leap_duration_seconds_bucket{le="+Inf"} 0 +sync_leap_duration_seconds_sum 0 +sync_leap_duration_seconds_count 0 +# HELP sync_leap_fetched_from_peer_total number of successful sync leap responses that were received from peers +# TYPE sync_leap_fetched_from_peer_total counter +sync_leap_fetched_from_peer_total 0 +# HELP sync_leap_fetcher_fetch_total number of sync_leap_fetcher all fetch requests made +# TYPE sync_leap_fetcher_fetch_total counter +sync_leap_fetcher_fetch_total 0 +# HELP sync_leap_fetcher_found_in_storage number of fetch requests that found sync_leap_fetcher in local storage +# TYPE sync_leap_fetcher_found_in_storage counter +sync_leap_fetcher_found_in_storage 0 +# HELP sync_leap_fetcher_found_on_peer number of fetch requests that fetched sync_leap_fetcher from peer +# TYPE sync_leap_fetcher_found_on_peer counter +sync_leap_fetcher_found_on_peer 0 +# HELP sync_leap_fetcher_timeouts number of sync_leap_fetcher fetch requests that timed out +# TYPE sync_leap_fetcher_timeouts counter +sync_leap_fetcher_timeouts 0 +# HELP sync_leap_rejected_by_peer_total number of sync leap requests that were rejected by peers +# TYPE sync_leap_rejected_by_peer_total counter +sync_leap_rejected_by_peer_total 0 +# HELP time_of_last_block_payload timestamp of the most recently accepted block payload +# TYPE time_of_last_block_payload gauge +time_of_last_block_payload 0 +# HELP time_of_last_finalized_block timestamp of the most recently finalized block +# TYPE time_of_last_finalized_block gauge +time_of_last_finalized_block 0 +# HELP total_ram_bytes total system ram in bytes +# TYPE total_ram_bytes gauge +total_ram_bytes 0 +# HELP trie_or_chunk_fetch_total number of trie_or_chunk all fetch requests made +# TYPE trie_or_chunk_fetch_total counter +trie_or_chunk_fetch_total 0 +# HELP trie_or_chunk_found_in_storage number of fetch requests that found trie_or_chunk in local storage +# TYPE trie_or_chunk_found_in_storage counter +trie_or_chunk_found_in_storage 0 +# HELP trie_or_chunk_found_on_peer number of fetch requests that fetched trie_or_chunk from peer +# TYPE trie_or_chunk_found_on_peer counter +trie_or_chunk_found_on_peer 0 +# HELP trie_or_chunk_timeouts number of trie_or_chunk fetch requests that timed out +# TYPE trie_or_chunk_timeouts counter +trie_or_chunk_timeouts 0 diff --git a/resources/production/chainspec.toml b/resources/production/chainspec.toml index 1656c69fe4..af2ba0b8c6 100644 --- a/resources/production/chainspec.toml +++ b/resources/production/chainspec.toml @@ -18,9 +18,20 @@ activation_point = 11000 # contributing to the seeding of the pseudo-random number generator used in contract-runtime for computing genesis # post-state hash. name = 'casper' -# The maximum size of an acceptable networking message in bytes. Any message larger than this will +# The maximum size of an acceptable handshake message in bytes. Any handshake larger than this will # be rejected at the networking level. -maximum_net_message_size = 25_165_824 +maximum_handshake_message_size = 1_048_576 +# The maximum frame size for network transport. +maximum_frame_size = 4096 + +[network.networking_config] +network = { in_flight_limit = 25, maximum_request_payload_size = 25_165_824, maximum_response_payload_size = 0 } +sync_data_request = { in_flight_limit = 25, maximum_request_payload_size = 25_165_824, maximum_response_payload_size = 0 } +sync_data_responses = { in_flight_limit = 25, maximum_request_payload_size = 25_165_824, maximum_response_payload_size = 0 } +data_requests = { in_flight_limit = 25, maximum_request_payload_size = 25_165_824, maximum_response_payload_size = 0 } +data_responses = { in_flight_limit = 25, maximum_request_payload_size = 25_165_824, maximum_response_payload_size = 0 } +consensus = { in_flight_limit = 25, maximum_request_payload_size = 25_165_824, maximum_response_payload_size = 0 } +bulk_gossip = { in_flight_limit = 25, maximum_request_payload_size = 25_165_824, maximum_response_payload_size = 0 } [core] # Era duration. @@ -243,7 +254,7 @@ provision_contract_user_group_uref = { cost = 200, arguments = [0, 0, 0, 0, 0] } put_key = { cost = 100_000_000, arguments = [0, 120_000, 0, 120_000] } read_host_buffer = { cost = 3_500, arguments = [0, 310, 0] } read_value = { cost = 60_000, arguments = [0, 120_000, 0] } -read_value_local = { cost = 5_500, arguments = [0, 590, 0] } +dictionary_get = { cost = 5_500, arguments = [0, 590, 0] } remove_associated_key = { cost = 4_200, arguments = [0, 0] } remove_contract_user_group = { cost = 200, arguments = [0, 0, 0, 0] } remove_contract_user_group_urefs = { cost = 200, arguments = [0, 0, 0, 0, 0, 120_000] } @@ -256,7 +267,7 @@ transfer_from_purse_to_purse = { cost = 82_000_000, arguments = [0, 0, 0, 0, 0, transfer_to_account = { cost = 2_500_000_000, arguments = [0, 0, 0, 0, 0, 0, 0] } update_associated_key = { cost = 4_200, arguments = [0, 0, 0] } write = { cost = 14_000, arguments = [0, 0, 0, 980] } -write_local = { cost = 9_500, arguments = [0, 1_800, 0, 520] } +dictionary_put = { cost = 9_500, arguments = [0, 1_800, 0, 520] } enable_contract_version = { cost = 200, arguments = [0, 0, 0, 0] } [system_costs] @@ -283,6 +294,7 @@ mint = 2_500_000_000 reduce_total_supply = 10_000 create = 2_500_000_000 balance = 10_000 +burn = 10_000 transfer = 10_000 read_base_round_reward = 10_000 mint_into_existing_purse = 2_500_000_000 diff --git a/resources/production/config-example.toml b/resources/production/config-example.toml index ee1f5222f6..c73c04e11c 100644 --- a/resources/production/config-example.toml +++ b/resources/production/config-example.toml @@ -189,6 +189,15 @@ bind_address = '0.0.0.0:35000' # one connection. known_addresses = ['168.119.137.143:35000','47.251.14.254:35000','47.242.53.164:35000','46.101.61.107:35000','47.88.87.63:35000','35.152.42.229:35000','206.189.47.102:35000','134.209.243.124:35000','148.251.190.103:35000','167.172.32.44:35000','165.22.252.48:35000','18.219.70.138:35000','3.225.191.9:35000','3.221.194.62:35000','101.36.120.117:35000','54.151.24.120:35000','148.251.135.60:35000','18.188.103.230:35000','54.215.53.35:35000','88.99.95.7:35000','99.81.225.72:35000','52.207.122.179:35000','3.135.134.105:35000','62.171.135.101:35000','139.162.132.144:35000','63.33.251.206:35000','135.181.165.110:35000','135.181.134.57:35000','94.130.107.198:35000','54.180.220.20:35000','188.40.83.254:35000','157.90.131.121:35000','134.209.110.11:35000','168.119.69.6:35000','45.76.251.225:35000','168.119.209.31:35000','31.7.207.16:35000','209.145.60.74:35000','54.252.66.23:35000','134.209.16.172:35000','178.238.235.196:35000','18.217.20.213:35000','3.14.161.135:35000','3.12.207.193:35000','3.12.207.193:35000'] +# TLS keylog location +# +# If set, the node will write all keys generated during all TLS connections to the given file path. +# This option is intended for debugging only, do NOT enable this on production systems. +# +# The specified location will be appended to, even across node restarts, so it may grow large if +# unattended. +# keylog_path = "/path/to/keylog" + # Minimum number of fully-connected peers to consider network component initialized. min_peers_for_initialization = 3 @@ -199,93 +208,30 @@ gossip_interval = '120 seconds' # more than the expected time required for initial connections to complete. initial_gossip_delay = '5 seconds' -# How long a connection is allowed to be stuck as pending before it is abandoned. -max_addr_pending_time = '1 minute' - # Maximum time allowed for a connection handshake between two nodes to be completed. Connections # exceeding this threshold are considered unlikely to be healthy or even malicious and thus # terminated. handshake_timeout = '20 seconds' -# Maximum number of incoming connections per unique peer allowed. If the limit is hit, additional -# connections will be rejected. A value of `0` means unlimited. -max_incoming_peer_connections = 3 - -# The maximum total of upstream bandwidth in bytes per second allocated to non-validating peers. -# A value of `0` means unlimited. -max_outgoing_byte_rate_non_validators = 6553600 - -# The maximum allowed total impact of requests from non-validating peers per second answered. -# A value of `0` means unlimited. -max_incoming_message_rate_non_validators = 3000 - -# Maximum number of requests for data from a single peer that are allowed be buffered. A value of -# `0` means unlimited. -max_in_flight_demands = 50 - -# Version threshold to enable tarpit for. -# -# When set to a version (the value may be `null` to disable the feature), any peer that reports a -# protocol version equal or below the threshold will be rejected only after holding open the -# connection for a specific (`tarpit_duration`) amount of time. -# -# This option makes most sense to enable on known nodes with addresses where legacy nodes that are -# still in operation are connecting to, as these older versions will only attempt to reconnect to -# other nodes once they have exhausted their set of known nodes. -tarpit_version_threshold = '1.2.1' - -# How long to hold connections to trapped legacy nodes. -tarpit_duration = '10 minutes' - -# The probability [0.0, 1.0] of this node trapping a legacy node. -# -# Since older nodes will only reconnect if all their options are exhausted, it is sufficient for a -# single known node to hold open a connection to prevent the node from reconnecting. This should be -# set to `1/n` or higher, with `n` being the number of known nodes expected in the configuration of -# legacy nodes running this software. -tarpit_chance = 0.2 +# Timeout before giving up on a peer. If a peer exceeds this time limit for acknowledging or +# responding to a received message, it is considered unresponsive and the connection severed. +ack_timeout = '30sec' # How long peers remain blocked after they get blocklisted. blocklist_retain_duration = '10 minutes' -# Identity of a node -# -# When this section is not specified, an identity will be generated when the node process starts with a self-signed certifcate. -# This option makes sense for some private chains where for security reasons joining new nodes is restricted. -# [network.identity] -# tls_certificate = "node_cert.pem" -# secret_key = "node.pem" -# ca_certificate = "ca_cert.pem" +# Whether or not to consider a connection stuck after a single request times out, causing a termination and reconnection. +# It is recommended to set this to `true` unless network connectivity issues are being troubleshot. +bubble_timeouts = true -# Weights for impact estimation of incoming messages, used in combination with -# `max_incoming_message_rate_non_validators`. -# -# Any weight set to 0 means that the category of traffic is exempt from throttling. -[network.estimator_weights] -consensus = 0 -block_gossip = 1 -deploy_gossip = 0 -finality_signature_gossip = 1 -address_gossip = 0 -finality_signature_broadcasts = 0 -deploy_requests = 1 -deploy_responses = 0 -legacy_deploy_requests = 1 -legacy_deploy_responses = 0 -block_requests = 1 -block_responses = 0 -block_header_requests = 1 -block_header_responses = 0 -trie_requests = 1 -trie_responses = 0 -finality_signature_requests = 1 -finality_signature_responses = 0 -sync_leap_requests = 1 -sync_leap_responses = 0 -approvals_hashes_requests = 1 -approvals_hashes_responses = 0 -execution_results_requests = 1 -execution_results_responses = 0 +# The maximum time a peer is allowed to take to receive a fatal error. +error_timeout = '10 seconds' + +# Whether to restrict broadcasts of values most likely only relevant for validators to only those. +use_validator_broadcast = true + +# Whether to enable the use of optimized gossip peer selection for a subset of items. +use_mixed_gossip = false # Identity of a node # @@ -296,6 +242,45 @@ execution_results_responses = 0 # secret_key = "local_node.pem" # ca_certificate = "ca_cert.pem" + +# ================================================ +# Configuration options for the connection manager +# ================================================ +[network.conman] + +# The timeout for a single underlying TCP connection to be established. +tcp_connect_timeout = '10 seconds' + +# Maximum time allowed for TLS setup and handshaking to proceed. +setup_timeout = '10 seconds' + +# How often to reattempt a connection. +tcp_connect_attempts = 8 + +# Base delay for the backoff, grows exponentially until `tcp_connect_attempts` maxes out. +tcp_connect_base_backoff = '1 second' + +# How long to back off from reconnecting to an address after a failure that indicates a +# significant problem. +significant_error_backoff = '60 seconds' + +# How long to back off from reconnecting to an address if the error is likely not going to +# change for a long time. +permanent_error_backoff = '10 minutes' + +# How long to wait before reconnecting when a succesful outgoing connection is lost. +successful_reconnect_delay = '1 second' + +# The minimum time a connection must have successfully served data to not be seen as flaky. +flaky_connection_threshold = '1 minute' + +# Number of incoming connections before refusing to accept any new ones. +max_incoming_connections = 10000 + +# Number of outgoing connections before stopping to connect to learned addresses. +max_outgoing_connections = 10000 + + # ================================================== # Configuration options for the JSON-RPC HTTP server # ================================================== @@ -543,10 +528,10 @@ disconnect_dishonest_peers_interval = '10 seconds' latch_reset_interval = '5 seconds' -# ============================================= -# Configuration options for the block validator -# ============================================= -[block_validator] +# ====================================================== +# Configuration options for the proposed block validator +# ====================================================== +[proposed_block_validator] # Maximum number of completed entries to retain. # diff --git a/resources/test/rpc_schema_hashing.json b/resources/test/rpc_schema_hashing.json index 4cc9b224c5..919ad9018c 100644 --- a/resources/test/rpc_schema_hashing.json +++ b/resources/test/rpc_schema_hashing.json @@ -2479,7 +2479,8 @@ "Read", "Write", "Add", - "NoOp" + "NoOp", + "Delete" ] }, "TransformEntry": { @@ -2514,7 +2515,8 @@ "Identity", "WriteContractWasm", "WriteContract", - "WriteContractPackage" + "WriteContractPackage", + "Prune" ] }, { diff --git a/resources/test/sse_data_schema.json b/resources/test/sse_data_schema.json index 7a7b305793..6901b2b9ea 100644 --- a/resources/test/sse_data_schema.json +++ b/resources/test/sse_data_schema.json @@ -1234,7 +1234,8 @@ "Read", "Write", "Add", - "NoOp" + "NoOp", + "Delete" ] }, "TransformEntry": { @@ -1269,7 +1270,8 @@ "Identity", "WriteContractWasm", "WriteContract", - "WriteContractPackage" + "WriteContractPackage", + "Prune" ] }, { diff --git a/resources/test/valid/0_9_0/chainspec.toml b/resources/test/valid/0_9_0/chainspec.toml index 597c493d09..31946df79c 100644 --- a/resources/test/valid/0_9_0/chainspec.toml +++ b/resources/test/valid/0_9_0/chainspec.toml @@ -126,7 +126,7 @@ provision_contract_user_group_uref = { cost = 124, arguments = [0,1,2,3,4] } put_key = { cost = 125, arguments = [0, 1, 2, 3] } read_host_buffer = { cost = 126, arguments = [0, 1, 2] } read_value = { cost = 127, arguments = [0, 1, 0] } -read_value_local = { cost = 128, arguments = [0, 1, 0] } +dictionary_get = { cost = 128, arguments = [0, 1, 0] } remove_associated_key = { cost = 129, arguments = [0, 1] } remove_contract_user_group = { cost = 130, arguments = [0, 1, 2, 3] } remove_contract_user_group_urefs = { cost = 131, arguments = [0,1,2,3,4,5] } @@ -139,7 +139,7 @@ transfer_from_purse_to_purse = { cost = 137, arguments = [0, 1, 2, 3, 4, 5, 6, 7 transfer_to_account = { cost = 138, arguments = [0, 1, 2, 3, 4, 5, 6] } update_associated_key = { cost = 139, arguments = [0, 1, 2] } write = { cost = 140, arguments = [0, 1, 0, 2] } -write_local = { cost = 141, arguments = [0, 1, 2, 3] } +dictionary_put = { cost = 141, arguments = [0, 1, 2, 3] } enable_contract_version = { cost = 142, arguments = [0, 1, 2, 3] } [system_costs] diff --git a/resources/test/valid/0_9_0_unordered/chainspec.toml b/resources/test/valid/0_9_0_unordered/chainspec.toml index e922307476..5e62721938 100644 --- a/resources/test/valid/0_9_0_unordered/chainspec.toml +++ b/resources/test/valid/0_9_0_unordered/chainspec.toml @@ -124,7 +124,7 @@ provision_contract_user_group_uref = { cost = 124, arguments = [0,1,2,3,4] } put_key = { cost = 125, arguments = [0, 1, 2, 3] } read_host_buffer = { cost = 126, arguments = [0, 1, 2] } read_value = { cost = 127, arguments = [0, 1, 0] } -read_value_local = { cost = 128, arguments = [0, 1, 0] } +dictionary_get = { cost = 128, arguments = [0, 1, 0] } remove_associated_key = { cost = 129, arguments = [0, 1] } remove_contract_user_group = { cost = 130, arguments = [0, 1, 2, 3] } remove_contract_user_group_urefs = { cost = 131, arguments = [0,1,2,3,4,5] } @@ -137,7 +137,7 @@ transfer_from_purse_to_purse = { cost = 137, arguments = [0, 1, 2, 3, 4, 5, 6, 7 transfer_to_account = { cost = 138, arguments = [0, 1, 2, 3, 4, 5, 6] } update_associated_key = { cost = 139, arguments = [0, 1, 2] } write = { cost = 140, arguments = [0, 1, 0, 2] } -write_local = { cost = 141, arguments = [0, 1, 2, 3] } +dictionary_put = { cost = 141, arguments = [0, 1, 2, 3] } enable_contract_version = { cost = 142, arguments = [0, 1, 2, 3] } [system_costs] diff --git a/resources/test/valid/1_0_0/chainspec.toml b/resources/test/valid/1_0_0/chainspec.toml index c40c8671ee..37e9c6dd57 100644 --- a/resources/test/valid/1_0_0/chainspec.toml +++ b/resources/test/valid/1_0_0/chainspec.toml @@ -127,7 +127,7 @@ put_key = { cost = 125, arguments = [0, 1, 2, 3] } random_bytes = { cost = 123, arguments = [0, 1] } read_host_buffer = { cost = 126, arguments = [0, 1, 2] } read_value = { cost = 127, arguments = [0, 1, 0] } -read_value_local = { cost = 128, arguments = [0, 1, 0] } +dictionary_get = { cost = 128, arguments = [0, 1, 0] } remove_associated_key = { cost = 129, arguments = [0, 1] } remove_contract_user_group = { cost = 130, arguments = [0, 1, 2, 3] } remove_contract_user_group_urefs = { cost = 131, arguments = [0,1,2,3,4,5] } @@ -140,7 +140,7 @@ transfer_from_purse_to_purse = { cost = 137, arguments = [0, 1, 2, 3, 4, 5, 6, 7 transfer_to_account = { cost = 138, arguments = [0, 1, 2, 3, 4, 5, 6] } update_associated_key = { cost = 139, arguments = [0, 1, 2] } write = { cost = 140, arguments = [0, 1, 0, 2] } -write_local = { cost = 141, arguments = [0, 1, 2, 3] } +dictionary_put = { cost = 141, arguments = [0, 1, 2, 3] } enable_contract_version = { cost = 142, arguments = [0, 1, 2, 3] } [system_costs] diff --git a/smart_contracts/contracts/client/burn/Cargo.toml b/smart_contracts/contracts/client/burn/Cargo.toml new file mode 100644 index 0000000000..f9949db688 --- /dev/null +++ b/smart_contracts/contracts/client/burn/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "burn" +version = "0.1.0" +authors = ["Igor Bunar ", "Jan Hoffmann "] +edition = "2021" + +[[bin]] +name = "burn" +path = "src/main.rs" +bench = false +doctest = false +test = false + +[dependencies] +casper-contract = { path = "../../../contract" } +casper-types = { path = "../../../../types" } diff --git a/smart_contracts/contracts/client/burn/src/main.rs b/smart_contracts/contracts/client/burn/src/main.rs new file mode 100644 index 0000000000..9d63257d08 --- /dev/null +++ b/smart_contracts/contracts/client/burn/src/main.rs @@ -0,0 +1,91 @@ +#![no_std] +#![no_main] + +extern crate alloc; +use alloc::{string::String, vec::Vec}; + +use casper_contract::{ + contract_api::{account, alloc_bytes, runtime, system}, + ext_ffi, + unwrap_or_revert::UnwrapOrRevert, +}; +use casper_types::{ + api_error, bytesrepr, runtime_args, system::mint, ApiError, Key, RuntimeArgs, URef, U512, +}; + +const ARG_PURSE_NAME: &str = "purse_name"; + +fn burn(uref: URef, amount: U512) -> Result<(), mint::Error> { + let contract_hash = system::get_mint(); + let args = runtime_args! { + mint::ARG_PURSE => uref, + mint::ARG_AMOUNT => amount, + }; + runtime::call_contract(contract_hash, mint::METHOD_BURN, args) +} + +#[no_mangle] +pub extern "C" fn call() { + let purse_uref = match get_named_arg_option::(ARG_PURSE_NAME) { + Some(name) => { + // if a key was provided and there is no value under it we revert + // to prevent user from accidentaly burning tokens from the main purse + // eg. if they make a typo + let Some(Key::URef(purse_uref)) = runtime::get_key(&name) else { + runtime::revert(ApiError::InvalidPurseName) + }; + purse_uref + } + None => account::get_main_purse(), + }; + let amount: U512 = runtime::get_named_arg(mint::ARG_AMOUNT); + + burn(purse_uref, amount).unwrap_or_revert(); +} + +fn get_named_arg_size(name: &str) -> Option { + let mut arg_size: usize = 0; + let ret = unsafe { + ext_ffi::casper_get_named_arg_size( + name.as_bytes().as_ptr(), + name.len(), + &mut arg_size as *mut usize, + ) + }; + match api_error::result_from(ret) { + Ok(_) => Some(arg_size), + Err(ApiError::MissingArgument) => None, + Err(e) => runtime::revert(e), + } +} + +fn get_named_arg_option(name: &str) -> Option { + let arg_size = get_named_arg_size(name).unwrap_or_revert_with(ApiError::MissingArgument); + let arg_bytes = if arg_size > 0 { + let res = { + let data_non_null_ptr = alloc_bytes(arg_size); + let ret = unsafe { + ext_ffi::casper_get_named_arg( + name.as_bytes().as_ptr(), + name.len(), + data_non_null_ptr.as_ptr(), + arg_size, + ) + }; + let data = + unsafe { Vec::from_raw_parts(data_non_null_ptr.as_ptr(), arg_size, arg_size) }; + if ret != 0 { + return None; + } + data + }; + res + } else { + // Avoids allocation with 0 bytes and a call to get_named_arg + Vec::new() + }; + + let deserialized_data = + bytesrepr::deserialize(arg_bytes).unwrap_or_revert_with(ApiError::InvalidArgument); + Some(deserialized_data) +} diff --git a/types/src/checksummed_hex.rs b/types/src/checksummed_hex.rs index 2b7aa19307..165acd3a4f 100644 --- a/types/src/checksummed_hex.rs +++ b/types/src/checksummed_hex.rs @@ -169,8 +169,8 @@ mod tests { #[proptest] fn hex_roundtrip(input: Vec) { prop_assert_eq!( - input.clone(), - decode(encode_iter(&input).collect::()).expect("Failed to decode input.") + &input, + &decode(encode_iter(&input).collect::()).expect("Failed to decode input.") ); } diff --git a/types/src/crypto/asymmetric_key/tests.rs b/types/src/crypto/asymmetric_key/tests.rs index 545b8dad00..be7132da86 100644 --- a/types/src/crypto/asymmetric_key/tests.rs +++ b/types/src/crypto/asymmetric_key/tests.rs @@ -198,6 +198,7 @@ fn hash(data: &T) -> u64 { } fn check_ord_and_hash(low: T, high: T) { + #[allow(clippy::redundant_clone)] let low_copy = low.clone(); assert_eq!(hash(&low), hash(&low_copy)); diff --git a/types/src/execution_result.rs b/types/src/execution_result.rs index cc73d9ec91..87788fc94c 100644 --- a/types/src/execution_result.rs +++ b/types/src/execution_result.rs @@ -63,6 +63,7 @@ enum OpTag { Write = 1, Add = 2, NoOp = 3, + Delete = 4, } impl TryFrom for OpTag { @@ -95,6 +96,7 @@ enum TransformTag { AddKeys = 16, Failure = 17, WriteUnbonding = 18, + Prune = 19, } impl TryFrom for TransformTag { @@ -438,6 +440,8 @@ pub enum OpKind { Add, /// An operation which has no effect. NoOp, + /// A delete operation. + Delete, } impl OpKind { @@ -447,6 +451,7 @@ impl OpKind { OpKind::Write => OpTag::Write, OpKind::Add => OpTag::Add, OpKind::NoOp => OpTag::NoOp, + OpKind::Delete => OpTag::Delete, } } } @@ -471,6 +476,7 @@ impl FromBytes for OpKind { OpTag::Write => Ok((OpKind::Write, remainder)), OpTag::Add => Ok((OpKind::Add, remainder)), OpTag::NoOp => Ok((OpKind::NoOp, remainder)), + OpTag::Delete => Ok((OpKind::Delete, remainder)), } } } @@ -554,6 +560,8 @@ pub enum Transform { Failure(String), /// Writes the given Unbonding to global state. WriteUnbonding(Vec), + /// Prunes a key. + Prune, } impl Transform { @@ -578,6 +586,7 @@ impl Transform { Transform::AddKeys(_) => TransformTag::AddKeys, Transform::Failure(_) => TransformTag::Failure, Transform::WriteUnbonding(_) => TransformTag::WriteUnbonding, + Transform::Prune => TransformTag::Prune, } } } @@ -638,6 +647,7 @@ impl ToBytes for Transform { Transform::WriteUnbonding(value) => { buffer.extend(value.to_bytes()?); } + Transform::Prune => {} } Ok(buffer) } @@ -663,6 +673,7 @@ impl ToBytes for Transform { Transform::WriteBid(value) => value.serialized_length(), Transform::WriteWithdraw(value) => value.serialized_length(), Transform::WriteUnbonding(value) => value.serialized_length(), + Transform::Prune => 0, }; U8_SERIALIZED_LENGTH + body_len } @@ -738,6 +749,7 @@ impl FromBytes for Transform { as FromBytes>::from_bytes(remainder)?; Ok((Transform::WriteUnbonding(unbonding_purses), remainder)) } + TransformTag::Prune => Ok((Transform::Prune, remainder)), } } } @@ -745,7 +757,7 @@ impl FromBytes for Transform { impl Distribution for Standard { fn sample(&self, rng: &mut R) -> Transform { // TODO - include WriteDeployInfo and WriteTransfer as options - match rng.gen_range(0..13) { + match rng.gen_range(0..14) { 0 => Transform::Identity, 1 => Transform::WriteCLValue(CLValue::from_t(true).unwrap()), 2 => Transform::WriteAccount(AccountHash::new(rng.gen())), @@ -768,6 +780,7 @@ impl Distribution for Standard { Transform::AddKeys(named_keys) } 12 => Transform::Failure(rng.gen::().to_string()), + 13 => Transform::Prune, _ => unreachable!(), } } diff --git a/types/src/key.rs b/types/src/key.rs index c9ca44a061..6e32544f05 100644 --- a/types/src/key.rs +++ b/types/src/key.rs @@ -575,6 +575,16 @@ impl Key { } false } + + /// Returns a reference to the inner [`AccountHash`] if `self` is of type + /// [`Key::Withdraw`], otherwise returns `None`. + pub fn as_withdraw(&self) -> Option<&AccountHash> { + if let Self::Withdraw(v) = self { + Some(v) + } else { + None + } + } } impl Display for Key { diff --git a/types/src/system/mint/constants.rs b/types/src/system/mint/constants.rs index cffada448e..b49ab5c94f 100644 --- a/types/src/system/mint/constants.rs +++ b/types/src/system/mint/constants.rs @@ -17,6 +17,8 @@ pub const ARG_ROUND_SEIGNIORAGE_RATE: &str = "round_seigniorage_rate"; pub const METHOD_MINT: &str = "mint"; /// Named constant for method `reduce_total_supply`. pub const METHOD_REDUCE_TOTAL_SUPPLY: &str = "reduce_total_supply"; +/// Named constant for method `burn`. +pub const METHOD_BURN: &str = "burn"; /// Named constant for (synthetic) method `create` pub const METHOD_CREATE: &str = "create"; /// Named constant for method `balance`. diff --git a/types/src/system/mint/entry_points.rs b/types/src/system/mint/entry_points.rs index bbc82c2097..e348f23bec 100644 --- a/types/src/system/mint/entry_points.rs +++ b/types/src/system/mint/entry_points.rs @@ -3,7 +3,7 @@ use alloc::boxed::Box; use crate::{ contracts::Parameters, system::mint::{ - ARG_AMOUNT, ARG_ID, ARG_PURSE, ARG_SOURCE, ARG_TARGET, ARG_TO, METHOD_BALANCE, + ARG_AMOUNT, ARG_ID, ARG_PURSE, ARG_SOURCE, ARG_TARGET, ARG_TO, METHOD_BALANCE, METHOD_BURN, METHOD_CREATE, METHOD_MINT, METHOD_MINT_INTO_EXISTING_PURSE, METHOD_READ_BASE_ROUND_REWARD, METHOD_REDUCE_TOTAL_SUPPLY, METHOD_TRANSFER, }, @@ -38,6 +38,21 @@ pub fn mint_entry_points() -> EntryPoints { ); entry_points.add_entry_point(entry_point); + let entry_point = EntryPoint::new( + METHOD_BURN, + vec![ + Parameter::new(ARG_PURSE, CLType::URef), + Parameter::new(ARG_AMOUNT, CLType::U512), + ], + CLType::Result { + ok: Box::new(CLType::Unit), + err: Box::new(CLType::U8), + }, + EntryPointAccess::Public, + EntryPointType::Contract, + ); + entry_points.add_entry_point(entry_point); + let entry_point = EntryPoint::new( METHOD_CREATE, Parameters::new(), diff --git a/types/src/system/mint/error.rs b/types/src/system/mint/error.rs index db327a4057..dc03989c8e 100644 --- a/types/src/system/mint/error.rs +++ b/types/src/system/mint/error.rs @@ -154,6 +154,12 @@ pub enum Error { /// assert_eq!(22, Error::DisabledUnrestrictedTransfers as u8); DisabledUnrestrictedTransfers = 22, + /// Attempt to access a record using forged permissions. + /// ``` + /// # use casper_types::system::mint::Error; + /// assert_eq!(23, Error::ForgedReference as u8); + ForgedReference = 23, + #[cfg(test)] #[doc(hidden)] Sentinel, @@ -209,6 +215,7 @@ impl TryFrom for Error { d if d == Error::DisabledUnrestrictedTransfers as u8 => { Ok(Error::DisabledUnrestrictedTransfers) } + d if d == Error::ForgedReference as u8 => Ok(Error::ForgedReference), _ => Err(TryFromU8ForError(())), } } @@ -269,6 +276,7 @@ impl Display for Error { Error::DisabledUnrestrictedTransfers => { formatter.write_str("Disabled unrestricted transfers") } + Error::ForgedReference => formatter.write_str("Forged reference"), #[cfg(test)] Error::Sentinel => formatter.write_str("Sentinel error"), } diff --git a/types/src/testing.rs b/types/src/testing.rs index 8dbcb131d7..9bbb0e2b7c 100644 --- a/types/src/testing.rs +++ b/types/src/testing.rs @@ -83,6 +83,15 @@ impl TestRng { *flag.borrow_mut() = true; }); } + + /// Creates a child RNG. + /// + /// The resulting RNG is seeded from `self` deterministically. + pub fn create_child(&mut self) -> Self { + let seed = self.gen(); + let rng = Pcg64Mcg::from_seed(seed); + TestRng { seed, rng } + } } impl Default for TestRng { diff --git a/utils/nctl/sh/assets/compile.sh b/utils/nctl/sh/assets/compile.sh index ed61e8f5b0..82077a3363 100644 --- a/utils/nctl/sh/assets/compile.sh +++ b/utils/nctl/sh/assets/compile.sh @@ -6,6 +6,11 @@ # NCTL - path to nctl home directory. ######################################## +if [ "$NCTL_SKIP_COMPILATION" = "true" ]; then + echo "skipping nctl-compile as requested"; + return; +fi + unset OPTIND #clean OPTIND envvar, otherwise getopts can break. COMPILE_MODE="release" #default compile mode to release. diff --git a/utils/nctl/sh/assets/setup_shared.sh b/utils/nctl/sh/assets/setup_shared.sh index 905da4fa53..47cdfeecaf 100644 --- a/utils/nctl/sh/assets/setup_shared.sh +++ b/utils/nctl/sh/assets/setup_shared.sh @@ -411,6 +411,10 @@ function setup_asset_node_configs() SPECULATIVE_EXEC_ADDR=$(grep 'speculative_exec_server' $PATH_TO_CONFIG_FILE || true) # Set node configuration settings. + # Note: To dump TLS keys, add + # "cfg['network']['keylog_path']='$PATH_TO_NET/tlskeys';" + # -- but beware, this will break older nodes configurations. + # TODO: Write conditional include of this configuration setting. SCRIPT=( "import toml;" "cfg=toml.load('$PATH_TO_CONFIG_FILE');" diff --git a/utils/nctl/sh/scenarios/common/itst.sh b/utils/nctl/sh/scenarios/common/itst.sh index 1d209275a7..0095092cd4 100644 --- a/utils/nctl/sh/scenarios/common/itst.sh +++ b/utils/nctl/sh/scenarios/common/itst.sh @@ -39,7 +39,7 @@ function clean_up() { tar -cvzf "${DRONE_BUILD_NUMBER}"_nctl_dump.tar.gz * > /dev/null 2>&1 aws s3 cp ./"${DRONE_BUILD_NUMBER}"_nctl_dump.tar.gz s3://nctl.casperlabs.io/nightly-logs/ > /dev/null 2>&1 log "Download the dump file: curl -O https://s3.us-east-2.amazonaws.com/nctl.casperlabs.io/nightly-logs/${DRONE_BUILD_NUMBER}_nctl_dump.tar.gz" - log "\nextra log lines to push\ndownload instructions above\nserver license expired banner\n" + log "\nextra log lines push\ndownload instructions above\nlicense expired\n" popd fi fi diff --git a/utils/nctl/sh/scenarios/network_soundness.py b/utils/nctl/sh/scenarios/network_soundness.py index 0a03fe0acc..9906026f22 100755 --- a/utils/nctl/sh/scenarios/network_soundness.py +++ b/utils/nctl/sh/scenarios/network_soundness.py @@ -85,24 +85,31 @@ def invoke(command, quiet=False): try: start = time.time() - result = subprocess.check_output([ - '/bin/bash', '-c', + completed = subprocess.run([ + '/usr/bin/env', 'bash', '-c', 'shopt -s expand_aliases\nsource $NCTL/activate\n{}'.format( - command, timeout=60) - ]).decode("utf-8").rstrip() + command) + ], timeout=60, capture_output=True) end = time.time() + stdout = completed.stdout.decode("utf-8").rstrip() + stderr = completed.stderr.decode("utf-8").rstrip() elapsed = end - start if elapsed > COMMAND_EXECUTION_TIME_SECS: log("command took {:.2f} seconds to execute: {}".format( end - start, command)) - return result + completed.check_returncode() + return stdout except subprocess.CalledProcessError as err: log("command returned non-zero exit code - this can be a transitory error if the node is temporarily down: {}" .format(err)) + log("command stdout: {}".format(ellipsize(stdout))) + log("command stderr: {}".format(ellipsize(stderr))) return "" except subprocess.TimeoutExpired as err: log("subprocess timeout - this can be a transitory error if the node is temporarily down: {}" .format(err)) + log("command stdout: {}".format(ellipsize(stdout))) + log("command stderr: {}".format(ellipsize(stderr))) return "" finally: invoke_lock.release() @@ -126,7 +133,13 @@ def start_network(): chainspec['deploys']['block_gas_limit'] = huge_deploy_payment_amount toml.dump(chainspec, open(path_to_chainspec, 'w')) - command = "RUST_LOG=debug nctl-start" + path_to_config = "utils/nctl/assets/net-1/nodes/node-{}/config/1_0_0/config.toml".format( + node) + config = toml.load(path_to_config) + config['network']['conman']['permanent_error_backoff'] = "1 second" + toml.dump(config, open(path_to_config, 'w')) + + command = "RUST_LOG=debug,juliet=info nctl-start" invoke(command) @@ -196,9 +209,9 @@ def huge_deploy_sender_thread(count, interval): for i in range(count): random_node = random.randint(1, current_node_count) huge_deploy_path = make_huge_deploy(random_node) - command = "{} send-deploy --input {} --node-address http://{} > /dev/null 2>&1".format( + command = "{} send-deploy -v --input {} --node-address http://{}".format( path_to_client, huge_deploy_path, - get_node_rpc_endpoint(random_node)) + get_node_rpc_address(random_node)) invoke(command) log("sent " + str(count) + " huge deploys and sleeping " + @@ -218,12 +231,12 @@ def get_node_metrics_endpoint(node): return -def get_node_rpc_endpoint(node): +def get_node_rpc_address(node): command = "nctl-view-node-ports node={}".format(node) result = invoke(command, True) m = re.match(r'.*RPC @ (\d*).*', result) if m and m.group(1): - return "localhost:{}/rpc/".format(int(m.group(1))) + return "localhost:{}".format(int(m.group(1))) return @@ -359,7 +372,7 @@ def make_huge_deploy(node): if os.path.exists(output): os.remove(output) - command = "{} make-deploy --output {} --chain-name {} --payment-amount {} --ttl {} --secret-key {} --session-path {} > /dev/null 2>&1".format( + command = "{} make-deploy --output {} --chain-name {} --payment-amount {} --ttl {} --secret-key {} --session-path {}".format( path_to_client, output, chain_name, huge_deploy_payment_amount, ttl, secret_key, session_path) invoke(command) @@ -482,6 +495,14 @@ def join_node(current_node_count): return current_node_count +def ellipsize(s, max_length=4096): + if len(s) > max_length: + chunk = int((max_length-5) / 2) + s = s[:chunk] + " ... " + s[-chunk:] + + return s + + path_to_client = invoke("get_path_to_client") start_test() diff --git a/utils/nctl/sh/scenarios/swap_validator_set.sh b/utils/nctl/sh/scenarios/swap_validator_set.sh index cdd2b6d338..404d823c4f 100755 --- a/utils/nctl/sh/scenarios/swap_validator_set.sh +++ b/utils/nctl/sh/scenarios/swap_validator_set.sh @@ -46,6 +46,12 @@ function main() { # 10. Wait auction_delay + 2 log_step "waiting until era 8 where swap should take place" + nctl-await-until-era-n era='7' log='true' + + # We're refreshing the PRE_SWAP_HASH here since 5 eras have passed since we initialized it. + # It will be used later to re-start node 1 in step 18 and we don't want the node to fail + # because the hash was too old. + PRE_SWAP_HASH=$(do_read_lfb_hash 1) nctl-await-until-era-n era='8' log='true' # Since this walks back to first found switch block, keep this immediately after era 8 starts diff --git a/utils/nctl/sh/staging/build.sh b/utils/nctl/sh/staging/build.sh index 3ffd002985..2fac9e4164 100644 --- a/utils/nctl/sh/staging/build.sh +++ b/utils/nctl/sh/staging/build.sh @@ -45,6 +45,12 @@ function _main() ####################################### function set_stage_binaries() { + # Allow for external overriding of binary staging step if necessary. + if [ ! -z $NCTL_OVERRIDE_STAGE_BINARIES ]; then + $NCTL_OVERRIDE_STAGE_BINARIES + return + fi; + local PATH_TO_NODE_SOURCE=${1} local PATH_TO_CLIENT_SOURCE=${2} diff --git a/utils/nctl/sh/staging/set_remote.sh b/utils/nctl/sh/staging/set_remote.sh index b78afdfa2f..be1f490e9e 100644 --- a/utils/nctl/sh/staging/set_remote.sh +++ b/utils/nctl/sh/staging/set_remote.sh @@ -53,6 +53,13 @@ function _main() curl -O "$_BASE_URL/v$PROTOCOL_VERSION/$REMOTE_FILE" > /dev/null 2>&1 fi done + + # Allow external hook for patching the downloaded binaries. + if [ ! -z "${NCTL_PATCH_REMOTE_CMD}" ]; then + $NCTL_PATCH_REMOTE_CMD ./casper-node + $NCTL_PATCH_REMOTE_CMD ./global-state-update-gen + fi + chmod +x ./casper-node chmod +x ./global-state-update-gen if [ "${#PROTOCOL_VERSION}" = '3' ]; then