Skip to content

Commit

Permalink
pageserver: assert that uploads don't modify indexed layers (#10228)
Browse files Browse the repository at this point in the history
## Problem

It's not legal to modify layers that are referenced by the current layer
index. Assert this in the upload queue, as preparation for upload queue
reordering.

Touches #10096.

## Summary of changes

Add a debug assertion that the upload queue does not modify layers
referenced by the current index.

I could be convinced that this should be a plain assertion, but will be
conservative for now.
  • Loading branch information
erikgrinaker authored Jan 3, 2025
1 parent 1393cc6 commit a77e87a
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 6 deletions.
39 changes: 39 additions & 0 deletions pageserver/src/tenant/remote_timeline_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1943,6 +1943,30 @@ impl RemoteTimelineClient {
return;
}

// Assert that we don't modify a layer that's referenced by the current index.
if cfg!(debug_assertions) {
let modified = match &task.op {
UploadOp::UploadLayer(layer, layer_metadata, _) => {
vec![(layer.layer_desc().layer_name(), layer_metadata)]
}
UploadOp::Delete(delete) => {
delete.layers.iter().map(|(n, m)| (n.clone(), m)).collect()
}
// These don't modify layers.
UploadOp::UploadMetadata { .. } => Vec::new(),
UploadOp::Barrier(_) => Vec::new(),
UploadOp::Shutdown => Vec::new(),
};
if let Ok(queue) = self.upload_queue.lock().unwrap().initialized_mut() {
for (ref name, metadata) in modified {
debug_assert!(
!queue.clean.0.references(name, metadata),
"layer {name} modified while referenced by index",
);
}
}
}

let upload_result: anyhow::Result<()> = match &task.op {
UploadOp::UploadLayer(ref layer, ref layer_metadata, mode) => {
if let Some(OpType::FlushDeletion) = mode {
Expand Down Expand Up @@ -2509,6 +2533,21 @@ pub fn remote_layer_path(
RemotePath::from_string(&path).expect("Failed to construct path")
}

/// Returns true if a and b have the same layer path within a tenant/timeline. This is essentially
/// remote_layer_path(a) == remote_layer_path(b) without the string allocations.
///
/// TODO: there should be a variant of LayerName for the physical path that contains information
/// about the shard and generation, such that this could be replaced by a simple comparison.
pub fn is_same_remote_layer_path(
aname: &LayerName,
ameta: &LayerFileMetadata,
bname: &LayerName,
bmeta: &LayerFileMetadata,
) -> bool {
// NB: don't assert remote_layer_path(a) == remote_layer_path(b); too expensive even for debug.
aname == bname && ameta.shard == bmeta.shard && ameta.generation == bmeta.generation
}

pub fn remote_initdb_archive_path(tenant_id: &TenantId, timeline_id: &TimelineId) -> RemotePath {
RemotePath::from_string(&format!(
"tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{INITDB_PATH}"
Expand Down
21 changes: 15 additions & 6 deletions pageserver/src/tenant/remote_timeline_client/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ use std::collections::HashMap;
use chrono::NaiveDateTime;
use pageserver_api::models::AuxFilePolicy;
use serde::{Deserialize, Serialize};
use utils::id::TimelineId;

use super::is_same_remote_layer_path;
use crate::tenant::metadata::TimelineMetadata;
use crate::tenant::storage_layer::LayerName;
use crate::tenant::timeline::import_pgdata;
use crate::tenant::Generation;
use pageserver_api::shard::ShardIndex;

use utils::id::TimelineId;
use utils::lsn::Lsn;

/// In-memory representation of an `index_part.json` file
Expand Down Expand Up @@ -45,10 +45,8 @@ pub struct IndexPart {
#[serde(skip_serializing_if = "Option::is_none")]
pub import_pgdata: Option<import_pgdata::index_part_format::Root>,

/// Per layer file name metadata, which can be present for a present or missing layer file.
///
/// Older versions of `IndexPart` will not have this property or have only a part of metadata
/// that latest version stores.
/// Layer filenames and metadata. For an index persisted in remote storage, all layers must
/// exist in remote storage.
pub layer_metadata: HashMap<LayerName, LayerFileMetadata>,

/// Because of the trouble of eyeballing the legacy "metadata" field, we copied the
Expand Down Expand Up @@ -143,6 +141,17 @@ impl IndexPart {
pub(crate) fn example() -> Self {
Self::empty(TimelineMetadata::example())
}

/// Returns true if the index contains a reference to the given layer (i.e. file path).
///
/// TODO: there should be a variant of LayerName for the physical remote path that contains
/// information about the shard and generation, to avoid passing in metadata.
pub fn references(&self, name: &LayerName, metadata: &LayerFileMetadata) -> bool {
let Some(index_metadata) = self.layer_metadata.get(name) else {
return false;
};
is_same_remote_layer_path(name, metadata, name, index_metadata)
}
}

/// Metadata gathered for each of the layer files.
Expand Down

1 comment on commit a77e87a

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

7377 tests run: 7014 passed, 1 failed, 362 skipped (full report)


Failures on Postgres 16

  • test_storage_controller_many_tenants[github-actions-selfhosted]: release-x86-64
# Run all failed tests locally:
scripts/pytest -vv -n $(nproc) -k "test_storage_controller_many_tenants[release-pg16-github-actions-selfhosted]"
Flaky tests (2)

Postgres 17

Postgres 16

  • test_physical_replication_config_mismatch_too_many_known_xids: release-arm64

Code coverage* (full report)

  • functions: 31.2% (8410 of 26948 functions)
  • lines: 48.0% (66777 of 139204 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
a77e87a at 2025-01-03T18:32:41.609Z :recycle:

Please sign in to comment.