Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix remote call of s3Cluster function #583

Open
wants to merge 8 commits into
base: project-antalya-24.12.2
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/Interpreters/ClusterProxy/executeQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,7 @@ void executeQuery(
not_optimized_cluster->getName());

read_from_remote->setStepDescription("Read from remote replica");
read_from_remote->setRemoteFunction(is_remote_function);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To me, "setIsRemoteFunction" is slightly more natural.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Renamed

plan->addStep(std::move(read_from_remote));
plan->addInterpreterContext(new_context);
plans.emplace_back(std::move(plan));
Expand Down
5 changes: 4 additions & 1 deletion src/Interpreters/Context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2702,8 +2702,11 @@ void Context::setCurrentQueryId(const String & query_id)

client_info.current_query_id = query_id_to_set;

if (client_info.query_kind == ClientInfo::QueryKind::INITIAL_QUERY)
if (client_info.query_kind == ClientInfo::QueryKind::INITIAL_QUERY
&& (getApplicationType() != ApplicationType::SERVER || client_info.initial_query_id.empty()))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to the comment in ClientInfo.h
when query_kind == INITIAL_QUERY
initial_query_id is equal to current.
Does not it contradict with the condition?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically it must be some new kind, something like REMOTE_INITIAL_QUERY, but this breaks backward compatibility on protocol level.

{
client_info.initial_query_id = client_info.current_query_id;
}
}

void Context::setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType background_operation)
Expand Down
9 changes: 8 additions & 1 deletion src/Processors/QueryPlan/ReadFromRemote.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,8 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStream
my_main_table = main_table, my_table_func_ptr = table_func_ptr,
my_scalars = scalars, my_external_tables = external_tables,
my_stage = stage, local_delay = shard.local_delay,
add_agg_info, add_totals, add_extremes, async_read, async_query_sending]() mutable
add_agg_info, add_totals, add_extremes, async_read, async_query_sending,
my_is_remote_function = is_remote_function]() mutable
-> QueryPipelineBuilder
{
auto current_settings = my_context->getSettingsRef();
Expand Down Expand Up @@ -221,6 +222,8 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStream
{DataTypeUInt32().createColumnConst(1, my_shard.shard_info.shard_num), std::make_shared<DataTypeUInt32>(), "_shard_num"}};
auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
std::move(connections), query_string, header, my_context, my_throttler, my_scalars, my_external_tables, my_stage);
remote_query_executor->setRemoteFunction(my_is_remote_function);
remote_query_executor->setShardCount(my_shard_count);

auto pipe = createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending);
QueryPipelineBuilder builder;
Expand Down Expand Up @@ -304,6 +307,8 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact
priority_func);
remote_query_executor->setLogger(log);
remote_query_executor->setPoolMode(PoolMode::GET_ONE);
remote_query_executor->setRemoteFunction(is_remote_function);
remote_query_executor->setShardCount(shard_count);

if (!table_func_ptr)
remote_query_executor->setMainTable(shard.main_table ? shard.main_table : main_table);
Expand All @@ -320,6 +325,8 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact
auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
shard.shard_info.pool, query_string, shard.header, context, throttler, scalars, external_tables, stage);
remote_query_executor->setLogger(log);
remote_query_executor->setRemoteFunction(is_remote_function);
remote_query_executor->setShardCount(shard_count);

if (context->canUseTaskBasedParallelReplicas())
{
Expand Down
2 changes: 2 additions & 0 deletions src/Processors/QueryPlan/ReadFromRemote.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class ReadFromRemote final : public ISourceStep

void enableMemoryBoundMerging();
void enforceAggregationInOrder();
void setRemoteFunction(bool is_remote_function_ = true) { is_remote_function = is_remote_function_; }

private:
ClusterProxy::SelectStreamFactory::Shards shards;
Expand All @@ -57,6 +58,7 @@ class ReadFromRemote final : public ISourceStep
UInt32 shard_count;
const String cluster_name;
std::optional<GetPriorityForLoadBalancing> priority_func_factory;
bool is_remote_function = false;

void addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStreamFactory::Shard & shard);
void addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFactory::Shard & shard);
Expand Down
11 changes: 10 additions & 1 deletion src/QueryPipeline/RemoteQueryExecutor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,16 @@ void RemoteQueryExecutor::sendQueryUnlocked(ClientInfo::QueryKind query_kind, As

auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(settings);
ClientInfo modified_client_info = context->getClientInfo();
modified_client_info.query_kind = query_kind;

/// Doesn't support now "remote('1.1.1.{1,2}')""
if (is_remote_function && (shard_count == 1))
{
modified_client_info.setInitialQuery();
modified_client_info.client_name = "ClickHouse server";
modified_client_info.interface = ClientInfo::Interface::TCP;
}
else
modified_client_info.query_kind = query_kind;

if (!duplicated_part_uuids.empty())
connections->sendIgnoredPartUUIDs(duplicated_part_uuids);
Expand Down
7 changes: 7 additions & 0 deletions src/QueryPipeline/RemoteQueryExecutor.h
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,10 @@ class RemoteQueryExecutor

void setLogger(LoggerPtr logger) { log = logger; }

void setRemoteFunction(bool is_remote_function_ = true) { is_remote_function = is_remote_function_; }

void setShardCount(UInt32 shard_count_) { shard_count = shard_count_; }

const Block & getHeader() const { return header; }

IConnections & getConnections() { return *connections; }
Expand Down Expand Up @@ -302,6 +306,9 @@ class RemoteQueryExecutor

bool has_postponed_packet = false;

bool is_remote_function = false;
UInt32 shard_count = 0;

/// Parts uuids, collected from remote replicas
std::vector<UUID> duplicated_part_uuids;

Expand Down
56 changes: 55 additions & 1 deletion tests/integration/test_s3_cluster/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ def test_parallel_distributed_insert_select_with_schema_inference(started_cluste
node.query(
"""
CREATE TABLE parallel_insert_select ON CLUSTER 'first_shard' (a String, b UInt64)
ENGINE=ReplicatedMergeTree('/clickhouse/tables/{shard}/insert_select_with_replicated', '{replica}')
ENGINE=ReplicatedMergeTree('/clickhouse/tables/{shard}/parallel_insert_select', '{replica}')
ORDER BY (a, b);
"""
)
Expand Down Expand Up @@ -508,3 +508,57 @@ def test_cluster_default_expression(started_cluster):
)

assert result == expected_result


def test_remote_hedged(started_cluster):
node = started_cluster.instances["s0_0_0"]
pure_s3 = node.query(
"""
SELECT * from s3(
'http://minio1:9001/root/data/{clickhouse,database}/*',
'minio', 'minio123', 'CSV',
'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')
ORDER BY (name, value, polygon)
LIMIT 1
"""
)
s3_distributed = node.query(
"""
SELECT * from remote('s0_0_1', s3Cluster(
'cluster_simple',
'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV',
'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))'))
ORDER BY (name, value, polygon)
LIMIT 1
SETTINGS use_hedged_requests=True
"""
)

assert TSV(pure_s3) == TSV(s3_distributed)


def test_remote_no_hedged(started_cluster):
node = started_cluster.instances["s0_0_0"]
pure_s3 = node.query(
"""
SELECT * from s3(
'http://minio1:9001/root/data/{clickhouse,database}/*',
'minio', 'minio123', 'CSV',
'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')
ORDER BY (name, value, polygon)
LIMIT 1
"""
)
s3_distributed = node.query(
"""
SELECT * from remote('s0_0_1', s3Cluster(
'cluster_simple',
'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV',
'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))'))
ORDER BY (name, value, polygon)
LIMIT 1
SETTINGS use_hedged_requests=False
"""
)

assert TSV(pure_s3) == TSV(s3_distributed)
Copy link
Collaborator

@ilejn ilejn Jan 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the PR really s3Cluster specific?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, affects all *Cluster object storage functions. Suggest to make test for others?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure about new tests, may be. Depends on your feeling how probably is to break something that worked before or accidentally create a "bridge" that e.g. bypasses security.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added test for iceberg, most actual for us.