Skip to content

Commit

Permalink
refactor our test separation and how we run them (near#12458)
Browse files Browse the repository at this point in the history
This fundamentally removes the differentiation between "integration" and
"unit" tests from the test runner perspective, as well as removes the
reliance on the "expensive-tests" compile-time feature.

Instead tests are categorized by the underlying reason for which we had
the separation in the first place -- test execution speed. These are now
annotated with `slow_test_` and `ultraslow_test_` prefixes, and their
execution time is enforced by time limits in `nextest.toml`.

By using `nextest` filters we can cleanly filter out tests from these
three categories to be run in various contexts. For instance, maybe
quick checks locally don't need to run the slow tests every time --
`just nextest` by default now takes just 9 seconds (on my machine) to
run all the tests. Whereas previously (due to e.g. estimator smoke test)
it would take at least a minute.

Then a `just nextest-slow` is added in the rarer cases where one might
want to run the full suite that runs on CI. CI will run these mildly
slower tests for each PR.

Then what remains are the ultraslow tests which only run on nayduck and
can take quite some time to complete. These can now be trivially run
locally as well with `just nextest-all` without necessarily rebuilding
the entire project or "unignoring" tests that are marked as ignored for
other reasons (like for instance because they're just broken.)
  • Loading branch information
nagisa authored Nov 18, 2024
1 parent 6dab9ee commit 31a2ba0
Show file tree
Hide file tree
Showing 63 changed files with 521 additions and 672 deletions.
41 changes: 31 additions & 10 deletions .config/nextest.toml
Original file line number Diff line number Diff line change
@@ -1,24 +1,45 @@
# By default tests should not take more than 5 seconds to execute and will get killed after 15
# seconds.
#
# If that happens, instead of making the period longer here, change the test name to one of the
# next test "speed tiers" by using `slow_test_` or `ultra_slow_test` prefix for the test's name.
[profile.default]
slow-timeout = { period = "60s", terminate-after = 3, grace-period = "0s" }
slow-timeout = { period = "5s", terminate-after = 3, grace-period = "1s" }
default-filter = "not (test(/^(.*::slow_test|slow_test)/) | test(/^(.*::ultra_slow_test|ultra_slow_test)/))"
final-status-level = "slow"

# Tests that take a longer time to execute. `slow_test*` tests are given 1m30s to execute.
[[profile.default.overrides]]
filter = 'test(test_full_estimator)'
slow-timeout = { period = "10m", terminate-after = 3 }
retries = 0
threads-required = 2
filter = 'test(/^(.*::slow_test|slow_test)/)'
slow-timeout = { period = "30s", terminate-after = 3, grace-period = "1s" }

# Tests that take longer than the heat death of the universe of time to execute. Consider making
# the test faster, but if you must `ultra_slow_test*` tests are given 1h to execute. These are
# generally left for nayduck to execute.
[[profile.default.overrides]]
filter = 'test(/^(.*::ultra_slow_test|ultra_slow_test)/)'
slow-timeout = { period = "30m", terminate-after = 2, grace-period = "1s" }


# Unfortunately no support for inheriting profiles yet:
# https://github.com/nextest-rs/nextest/issues/387
[profile.ci]
slow-timeout = { period = "120s", terminate-after = 5 }
slow-timeout = { period = "5s", terminate-after = 5, grace-period = "1s" }
default-filter = "not test(/^(.*::ultra_slow_test|ultra_slow_test)/)"
# Try a few times before failing the whole test suite on a potentially spurious tests.
# The hope is that people will fix the spurious tests as they encounter them locally...
retries = { backoff = "fixed", count = 3, delay = "1s" }
failure-output = "final"
fail-fast = false

# Tests that take a longer time to execute. `slow_test*` tests are given 1m30s to execute.
[[profile.ci.overrides]]
filter = 'test(/^(.*::slow_test|slow_test)/)'
slow-timeout = { period = "30s", terminate-after = 3, grace-period = "1s" }

# Tests that take longer than the heat death of the universe of time to execute. Consider making
# the test faster, but if you must `ultra_slow_test*` tests are given 1h to execute. These are
# generally left for nayduck to execute.
[[profile.ci.overrides]]
filter = 'test(test_full_estimator)'
slow-timeout = { period = "10m", terminate-after = 3 }
retries = 0
threads-required = 2
filter = 'test(/^(.*::ultra_slow_test|ultra_slow_test)/)'
slow-timeout = { period = "30m", terminate-after = 2, grace-period = "1s" }
33 changes: 3 additions & 30 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,16 @@ jobs:
id: linux
os: ubuntu-22.04-16core
type: stable
runs_integ_tests: true
upload_profraws: true
- name: Linux Nightly
id: linux-nightly
os: ubuntu-22.04-16core
type: nightly
runs_integ_tests: true
upload_profraws: true
- name: MacOS
id: macos
os: macos-latest-xlarge
type: stable
runs_integ_tests: false
# TODO: Currently only computing linux coverage, because the MacOS runners
# have files at a different path and thus comes out duplicated.
upload_profraws: false
Expand All @@ -53,18 +50,11 @@ jobs:
tool: just,cargo-nextest,cargo-llvm-cov

# Run the tests:
- run: mkdir -p coverage/profraw/{unit,integration,binaries}
- run: mkdir -p coverage/profraw/{unit,binaries}
# - Run the unit tests, retrieving the coverage information
- run: just codecov-ci "nextest-unit ${{ matrix.type }}"
- run: just codecov-ci "nextest-slow ${{ matrix.type }}"
- run: mv coverage/codecov/{new,unit-${{matrix.id}}}.json
- run: mv coverage/profraw/{new,unit/${{matrix.id}}}.tar.zst
# - Run the integration tests, retrieving the coverage information
- run: just codecov-ci "nextest-integration ${{ matrix.type }}"
if: matrix.runs_integ_tests
- run: mv coverage/codecov/{new,integration-${{matrix.id}}}.json
if: matrix.runs_integ_tests
- run: mv coverage/profraw/{new,integration/${{matrix.id}}}.tar.zst
if: matrix.runs_integ_tests

# Cleanup the target directory, leaving only stuff interesting to llvm-cov, and tarball it
- run: just tar-bins-for-coverage-ci
Expand Down Expand Up @@ -348,8 +338,6 @@ jobs:
include:
- type: unit
profraws: unit
- type: integration
profraws: unit integration
steps:
- uses: actions/checkout@v4
with:
Expand Down Expand Up @@ -423,21 +411,6 @@ jobs:
files: unit-macos.json
fail_ci_if_error: true
flags: unittests,macos
- uses: codecov/codecov-action@eaaf4bedf32dbdc6b720b63067d99c4d77d6047d
with:
files: integration-linux.json
fail_ci_if_error: true
flags: integration-tests,linux
- uses: codecov/codecov-action@eaaf4bedf32dbdc6b720b63067d99c4d77d6047d
with:
files: integration-linux-nightly.json
fail_ci_if_error: true
flags: integration-tests,linux-nightly
# - uses: codecov/codecov-action@eaaf4bedf32dbdc6b720b63067d99c4d77d6047d
# with:
# files: integration-macos.json
# fail_ci_if_error: true
# flags: integration-tests,macos
- uses: codecov/codecov-action@eaaf4bedf32dbdc6b720b63067d99c4d77d6047d
with:
files: py-backward-compat.json
Expand All @@ -463,7 +436,7 @@ jobs:
files: py-upgradability.json
fail_ci_if_error: true
flags: pytests,upgradability,linux

windows_public_libraries_check:
name: "Windows check for building public libraries"
runs-on: "windows-latest"
Expand Down
67 changes: 18 additions & 49 deletions Justfile
Original file line number Diff line number Diff line change
@@ -1,18 +1,14 @@
# FIXME: some of these tests don't work very well on MacOS at the moment. Should fix
# them at earliest convenience :)
# Also in addition to this, the `nextest-integration` test is currently disabled on macos
with_macos_excludes := if os() == "macos" {
"--exclude node-runtime --exclude runtime-params-estimator --exclude near-network --exclude estimator-warehouse"
} else {
""
}
# On MacOS, not all structs are collected by `inventory`. Non-incremental build fixes that.
# See https://github.com/dtolnay/inventory/issues/52.
with_macos_incremental := if os() == "macos" {
"CARGO_INCREMENTAL=0"
platform_excludes := if os() == "macos" {
"--exclude node-runtime --exclude runtime-params-estimator --exclude near-network --exclude estimator-warehouse --exclude integration-tests"
} else if os() == "windows" {
"--exclude node-runtime --exclude runtime-params-estimator --exclude near-network --exclude estimator-warehouse --exclude integration-tests"
} else {
""
}

nightly_flags := "--features nightly,test_features"
public_libraries := "-p near-primitives -p near-crypto -p near-jsonrpc-primitives -p near-chain-configs -p near-primitives-core"

Expand All @@ -39,46 +35,23 @@ test-ci *FLAGS: check-cargo-fmt \
# tests that are as close to CI as possible, but not exactly the same code
test-extra: check-lychee

# all cargo tests, TYPE is "stable" or "nightly"
nextest TYPE *FLAGS: (nextest-unit TYPE FLAGS) (nextest-integration TYPE FLAGS)

# cargo unit tests, TYPE is "stable" or "nightly"
nextest-unit TYPE *FLAGS:
RUSTFLAGS="-D warnings" \
# all cargo tests,
# TYPE is one of "stable", "nightly"
nextest TYPE *FLAGS:
env RUSTFLAGS="-D warnings" \
cargo nextest run \
--locked \
--workspace \
--exclude integration-tests \
--cargo-profile dev-release \
{{ ci_hack_nextest_profile }} \
{{ with_macos_excludes }} \
{{ platform_excludes }} \
{{ if TYPE == "nightly" { nightly_flags } \
else if TYPE == "stable" { "" } \
else { error("TYPE is neighter 'nightly' nor 'stable'") } }} \
{{ FLAGS }}

# cargo integration tests, TYPE is "stable" or "nightly"
[linux]
nextest-integration TYPE *FLAGS:
RUSTFLAGS="-D warnings" \
cargo nextest run \
--locked \
--package integration-tests \
--cargo-profile dev-release \
{{ ci_hack_nextest_profile }} \
{{ if TYPE == "nightly" { nightly_flags } \
else if TYPE == "stable" { "" } \
else { error("TYPE is neither 'nightly' nor 'stable'") } }} \
{{ FLAGS }}
# Note: when re-enabling this on macos, ci.yml will need to be adjusted to report code coverage again
[macos]
nextest-integration TYPE *FLAGS:
@echo "Nextest integration tests are currently disabled on macos!"

[windows]
nextest-integration TYPE *FLAGS:
@echo "Nextest integration tests are currently disabled on windows!"

nextest-slow TYPE *FLAGS: (nextest TYPE "--ignore-default-filter -E 'default() + test(/^(.*::slow_test|slow_test)/)'" FLAGS)
nextest-all TYPE *FLAGS: (nextest TYPE "--ignore-default-filter -E 'all()'" FLAGS)

doctests:
cargo test --doc
Expand Down Expand Up @@ -164,22 +137,18 @@ check-lychee:
else { "Note: 'Too Many Requests' errors are allowed here but not in CI, set GITHUB_TOKEN to check them" } }}

# check tools/protocol-schema-check/res/protocol_schema.toml
check-protocol-schema: install-rustc-nightly
# On MacOS, not all structs are collected by `inventory`. Non-incremental build fixes that.
# See https://github.com/dtolnay/inventory/issues/52.
protocol_schema_env := "CARGO_TARGET_DIR=" + justfile_directory() + "/target/schema-check RUSTC_BOOTSTRAP=1 RUSTFLAGS='--cfg enable_const_type_id' CARGO_INCREMENTAL=0"
check-protocol-schema:
# Below, we *should* have been used `cargo +nightly ...` instead of
# `RUSTC_BOOTSTRAP=1`. However, the env var appears to be more stable.
# `nightly` builds are updated daily and may be broken sometimes, e.g.
# https://github.com/rust-lang/rust/issues/130769.
#
# If there is an issue with the env var, fall back to `cargo +nightly ...`.

# Test that checker is not broken
RUSTC_BOOTSTRAP=1 RUSTFLAGS="--cfg enable_const_type_id" \
cargo test -p protocol-schema-check --profile dev-artifacts

# Run the checker
RUSTC_BOOTSTRAP=1 RUSTFLAGS="--cfg enable_const_type_id" \
{{ with_macos_incremental }} \
cargo run -p protocol-schema-check --profile dev-artifacts
env {{protocol_schema_env}} cargo test -p protocol-schema-check --profile dev-artifacts
env {{protocol_schema_env}} cargo run -p protocol-schema-check --profile dev-artifacts

check_build_public_libraries:
cargo check {{public_libraries}}
3 changes: 1 addition & 2 deletions chain/chain/src/tests/doomslug.rs
Original file line number Diff line number Diff line change
Expand Up @@ -284,8 +284,7 @@ fn one_iter(
}

#[test]
#[cfg_attr(not(feature = "expensive_tests"), ignore)]
fn test_fuzzy_doomslug_liveness_and_safety() {
fn ultra_slow_test_fuzzy_doomslug_liveness_and_safety() {
for (time_to_gst_millis, height_goal) in
&[(0, 200), (1000, 200), (10000, 300), (100000, 400), (500000, 500)]
{
Expand Down
27 changes: 10 additions & 17 deletions chain/chain/src/tests/garbage_collection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -350,8 +350,7 @@ fn test_gc_remove_fork_small() {
}

#[test]
#[cfg_attr(not(feature = "expensive_tests"), ignore)]
fn test_gc_remove_fork_large() {
fn ultra_slow_test_gc_remove_fork_large() {
test_gc_remove_fork_common(20)
}

Expand Down Expand Up @@ -397,8 +396,7 @@ fn test_gc_not_remove_fork_small() {
}

#[test]
#[cfg_attr(not(feature = "expensive_tests"), ignore)]
fn test_gc_not_remove_fork_large() {
fn ultra_slow_test_gc_not_remove_fork_large() {
test_gc_not_remove_fork_common(20)
}

Expand All @@ -416,7 +414,7 @@ fn test_gc_not_remove_longer_fork() {

// This test creates forks from genesis
#[test]
fn test_gc_forks_from_genesis() {
fn slow_test_gc_forks_from_genesis() {
for fork_length in 1..=10 {
let chains = vec![
SimpleChain { from: 0, length: 101, is_removed: false },
Expand Down Expand Up @@ -450,7 +448,7 @@ fn test_gc_forks_from_genesis() {
}

#[test]
fn test_gc_overlap() {
fn slow_test_gc_overlap() {
for max_changes in 1..=20 {
let chains = vec![
SimpleChain { from: 0, length: 101, is_removed: false },
Expand Down Expand Up @@ -483,8 +481,7 @@ fn test_gc_boundaries_small() {
}

#[test]
#[cfg_attr(not(feature = "expensive_tests"), ignore)]
fn test_gc_boundaries_large() {
fn ultra_slow_test_gc_boundaries_large() {
test_gc_boundaries_common(20)
}

Expand All @@ -507,13 +504,12 @@ fn test_gc_random_common(runs: u64) {
}

#[test]
fn test_gc_random_small() {
fn slow_test_gc_random_small() {
test_gc_random_common(3);
}

#[test]
#[cfg_attr(not(feature = "expensive_tests"), ignore)]
fn test_gc_random_large() {
fn ultra_slow_test_gc_random_large() {
test_gc_random_common(25);
}

Expand Down Expand Up @@ -545,8 +541,7 @@ fn test_gc_pine_small() {
}

#[test]
#[cfg_attr(not(feature = "expensive_tests"), ignore)]
fn test_gc_pine() {
fn ultra_slow_test_gc_pine() {
for max_changes in 1..=20 {
let mut chains = vec![SimpleChain { from: 0, length: 101, is_removed: false }];
for i in 1..100 {
Expand Down Expand Up @@ -578,8 +573,7 @@ fn test_gc_star_small() {
}

#[test]
#[cfg_attr(not(feature = "expensive_tests"), ignore)]
fn test_gc_star_large() {
fn ultra_slow_test_gc_star_large() {
test_gc_star_common(20)
}

Expand Down Expand Up @@ -834,9 +828,8 @@ fn test_clear_old_data_fixed_height() {

/// Test that `gc_blocks_limit` works properly
#[test]
#[cfg_attr(not(feature = "expensive_tests"), ignore)]
#[allow(unreachable_code)]
fn test_clear_old_data_too_many_heights() {
fn ultra_slow_test_clear_old_data_too_many_heights() {
// TODO(#10634): panics on `clear_data` -> `clear_resharding_data` ->
// `MockEpochManager::is_next_block_epoch_start` apparently because
// epoch manager is not updated at all. Should we fix it together with
Expand Down
4 changes: 2 additions & 2 deletions chain/client/src/sync/header.rs
Original file line number Diff line number Diff line change
Expand Up @@ -651,7 +651,7 @@ mod test {
/// the peer doesn't get banned. (specifically, that the expected height downloaded gets properly
/// adjusted for time passed)
#[test]
fn test_slow_header_sync() {
fn slow_test_slow_header_sync() {
let network_adapter = Arc::new(MockPeerManagerAdapter::default());
let highest_height = 1000;

Expand Down Expand Up @@ -751,7 +751,7 @@ mod test {
}

#[test]
fn test_sync_from_very_behind() {
fn slow_test_sync_from_very_behind() {
let mock_adapter = Arc::new(MockPeerManagerAdapter::default());
let mut header_sync = HeaderSync::new(
Clock::real(),
Expand Down
6 changes: 3 additions & 3 deletions chain/client/src/tests/bug_repros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use near_primitives::block::Block;
use near_primitives::transaction::SignedTransaction;

#[test]
fn repro_1183() {
fn slow_test_repro_1183() {
init_test_logger();
run_actix(async {
let connectors: Arc<RwLock<Vec<ActorHandlesForTesting>>> = Arc::new(RwLock::new(vec![]));
Expand Down Expand Up @@ -169,7 +169,7 @@ fn repro_1183() {
}

#[test]
fn test_sync_from_archival_node() {
fn slow_test_sync_from_archival_node() {
init_test_logger();
let vs = ValidatorSchedule::new().num_shards(4).block_producers_per_epoch(vec![vec![
"test1".parse().unwrap(),
Expand Down Expand Up @@ -279,7 +279,7 @@ fn test_sync_from_archival_node() {
}

#[test]
fn test_long_gap_between_blocks() {
fn slow_test_long_gap_between_blocks() {
init_test_logger();
let vs = ValidatorSchedule::new()
.num_shards(2)
Expand Down
Loading

0 comments on commit 31a2ba0

Please sign in to comment.