diff --git a/beacon_node/beacon_chain/src/migrate.rs b/beacon_node/beacon_chain/src/migrate.rs index f435d6adb6..167345f72c 100644 --- a/beacon_node/beacon_chain/src/migrate.rs +++ b/beacon_node/beacon_chain/src/migrate.rs @@ -11,7 +11,7 @@ use std::sync::{mpsc, Arc}; use std::thread; use std::time::{Duration, SystemTime, UNIX_EPOCH}; use store::hot_cold_store::{migrate_database, HotColdDBError}; -use store::iter::StateRootsIterator; +use store::iter::{BlockRootsIterator, StateRootsIterator}; use store::{DBColumn, Error, HotStateSummary, ItemStore, StoreItem, StoreOp}; pub use store::{HotColdDB, MemoryStore}; use types::{ @@ -514,14 +514,6 @@ impl, Cold: ItemStore> BackgroundMigrator new_finalized_checkpoint.epoch, ); - let mut hot_db_ops = vec![]; - // TODO(hdiff): iterate blocks from finalized block up to old_finalized_slot - let finalized_blocks = vec![]; - Self::prune_non_checkpoint_sync_committee_branches(&finalized_blocks, &mut hot_db_ops); - // if store.config.prune_payloads { - Self::prune_finalized_payloads(&finalized_blocks, &mut hot_db_ops); - // } - debug!( log, "Extra pruning information"; @@ -529,8 +521,6 @@ impl, Cold: ItemStore> BackgroundMigrator format!("{:?}", new_finalized_checkpoint.root), ); - ///// - let summaries_dag = { let state_summaries = store .hot_db @@ -568,26 +558,26 @@ impl, Cold: ItemStore> BackgroundMigrator= old_finalized_slot) + .map_or(true, |(_, slot)| *slot >= old_finalized_slot) }) - .map(|res| res.map(|(_, state_root)| state_root)) - .collect::, _>>()?; + .map(|res| res.map(|(state_root, _)| state_root)) + .collect::, _>>()?; + + // TODO(hdiff): Could re-use the summaries dag + let newly_finalized_blocks = BlockRootsIterator::new(&store, new_finalized_state) + .take_while(|res| { + res.as_ref() + .map_or(true, |(_, slot)| *slot >= old_finalized_slot) + }) + .collect::, _>>()?; // Compute the set of finalized state roots that we must keep to make the dynamic HDiff system // work. - // TODO(hdiff): must not delete *all* finalized hdiffs. Instead, keep - // the more recent diff of each layer including the snapshot. - // Implement a routine somewhere to figure out which diffs should be kept - // Given a start slot, and the current finalized state: - // - iterate each hdiff layer and compute the most recent point <= finalized slot let required_finalized_diff_state_slots = store.hierarchy_hot.closest_layer_points(new_finalized_slot); @@ -601,23 +591,21 @@ impl, Cold: ItemStore> BackgroundMigrator> = abandoned_blocks .into_iter() .map(Into::into) @@ -650,11 +638,13 @@ impl, Cold: ItemStore> BackgroundMigrator, Cold: ItemStore> BackgroundMigrator>, ) { - for (_, block_root) in finalized_blocks { + for (block_root, _) in finalized_blocks { // Delete the execution payload if payload pruning is enabled. At a skipped slot we may // delete the payload for the finalized block itself, but that's OK as we only guarantee // that payloads are present for slots >= the split slot. The payload fetching code is also @@ -677,14 +667,14 @@ impl, Cold: ItemStore> BackgroundMigrator>, ) { let mut epoch_boundary_blocks = HashSet::new(); let mut non_checkpoint_block_roots = HashSet::new(); // Then, iterate states in slot ascending order, as they are stored wrt previous states. - for (slot, block_root) in finalized_blocks { + for (block_root, slot) in finalized_blocks { // At a missed slot, `state_root_iter` will return the block root // from the previous non-missed slot. This ensures that the block root at an // epoch boundary is always a checkpoint block root. We keep track of block roots diff --git a/beacon_node/beacon_chain/src/schema_change/migration_schema_v23.rs b/beacon_node/beacon_chain/src/schema_change/migration_schema_v23.rs index e2d3dc0de2..724f041b05 100644 --- a/beacon_node/beacon_chain/src/schema_change/migration_schema_v23.rs +++ b/beacon_node/beacon_chain/src/schema_change/migration_schema_v23.rs @@ -31,21 +31,15 @@ pub fn upgrade_to_v23( let split = db.get_split_info(); - // TODO(hdiff): sort summaries topologically starting from finalized + let state_summaries_dag = new_dag::(&db)?; + + // Sort summaries by slot so we have their ancestor diffs already stored when we store them. // If the summaries are sorted topologically we can insert them into the DB like if they were a // new state, re-using existing code. As states are likely to be sequential the diff cache // should kick in making the migration more efficient. If we just iterate the column of // summaries we may get distance state of each iteration. - let state_summaries_dag = new_dag::(&db)?; - - // Sort summaries by slot so we have their ancestor diffs already stored when we store them. let summaries_by_slot = state_summaries_dag.summaries_by_slot_ascending(); - // TODO(hdiff): Should prune states associated with summaries that are not descendant of finalized? - - // TODO(hdiff): Must create the finalized diff points in the hot DB if the node checkpoint synced long - // ago. Also consider availability of the finalized state from the hot DB. - // Upgrade all hot DB state summaries to the new type: // - Set all summaries of boundary states as `Snapshot` type // - Set all others are `Replay` pointing to `epoch_boundary_state_root` diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index 7d251b4d62..3088b98434 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -1210,18 +1210,36 @@ impl, Cold: ItemStore> HotColdDB ); key_value_batch.push(KeyValueStoreOp::DeleteKey(state_temp_key)); - // Always delete diffs with summary. If a diff must be kept beyond finalization, - // do not issue a DeleteState op at that time. - key_value_batch.push(KeyValueStoreOp::DeleteKey(get_key_for_col( - DBColumn::BeaconStateHotDiff.into(), - state_root.as_slice(), - ))); - - // TODO(hdiff): Review under HDiff - if slot.map_or(true, |slot| slot % E::slots_per_epoch() == 0) { - let state_key = - get_key_for_col(DBColumn::BeaconState.into(), state_root.as_slice()); - key_value_batch.push(KeyValueStoreOp::DeleteKey(state_key)); + if let Some(slot) = slot { + match self.hierarchy_hot.storage_strategy(slot)? { + StorageStrategy::Snapshot => { + // Full state stored in this position + key_value_batch.push(KeyValueStoreOp::DeleteKey(get_key_for_col( + DBColumn::BeaconState.into(), + state_root.as_slice(), + ))); + } + StorageStrategy::DiffFrom(_) => { + // Diff stored in this position + key_value_batch.push(KeyValueStoreOp::DeleteKey(get_key_for_col( + DBColumn::BeaconStateHotDiff.into(), + state_root.as_slice(), + ))); + } + StorageStrategy::ReplayFrom(_) => { + // Nothing else to delete + } + } + } else { + // TODO(hdiff): should attempt to delete everything if slot is not available? + key_value_batch.push(KeyValueStoreOp::DeleteKey(get_key_for_col( + DBColumn::BeaconState.into(), + state_root.as_slice(), + ))); + key_value_batch.push(KeyValueStoreOp::DeleteKey(get_key_for_col( + DBColumn::BeaconStateHotDiff.into(), + state_root.as_slice(), + ))); } }