Skip to content

Commit

Permalink
Character set tracking
Browse files Browse the repository at this point in the history
Added current_character_set and set_character_set

close #206
  • Loading branch information
anarthal committed Jan 26, 2024
1 parent c21a05d commit bf2eb59
Show file tree
Hide file tree
Showing 22 changed files with 627 additions and 55 deletions.
148 changes: 146 additions & 2 deletions include/boost/mysql/any_connection.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#ifndef BOOST_MYSQL_ANY_CONNECTION_HPP
#define BOOST_MYSQL_ANY_CONNECTION_HPP

#include <boost/mysql/character_set.hpp>
#include <boost/mysql/connect_params.hpp>
#include <boost/mysql/defaults.hpp>
#include <boost/mysql/diagnostics.hpp>
Expand Down Expand Up @@ -241,6 +242,44 @@ class any_connection
*/
bool backslash_escapes() const noexcept { return impl_.backslash_escapes(); }

/**
* \brief Returns the character set used by this connection.
* \details
* The MySQL protocol doesn't expose a clean way to track the character set
* used by this connection. This can change inadvertly by SQL queries or by calling \ref reset_connection.
* \n
* Connections attempt to keep track of the current character set
* used by the connection. When the character set is known, this function returns
* a non-null pointer to the character set currently in use. If the character set
* is unknown, returns `nullptr`.
* \n
* The following functions can modify the return value of this function: \n
* \li Prior to connection, the character set is always unknown.
* \li \ref connect and \ref async_connect may set the current character set
* to a known value, depending on the requested collation.
* \li \ref set_character_set always and \ref async_set_character_set always
* set the current character set to the passed value.
* \li \ref reset_connection and \ref async_reset_connection always resets the current character
* set to an unknown value.
*
* \par Avoid changing the character set directly
* If you change the connection's character set directly using SQL statements,
* like in `conn.execute("SET NAMES utf8mb4")`, the client has no way to track this change,
* and this function will return incorrect results. If you're using this function, avoid: \n
* \li The `SET NAMES` statement
* \li The `SET CHARACTER SET` statement
* \li Modifying the `character_set_client`, `character_set_connection` and `character_set_results`
* session variables.
*
* \par Exception safety
* No-throw guarantee.
*
* \par Object lifetimes
* This function returns a pointer to the connection's internal storage. It will be valid
* as long as `*this` is alive and valid.
*/
const character_set* current_character_set() const noexcept { return impl_.current_character_set(); }

/// \copydoc connection::meta_mode
metadata_mode meta_mode() const noexcept { return impl_.meta_mode(); }

Expand Down Expand Up @@ -278,6 +317,10 @@ class any_connection
* If the server doesn't support it, this function will fail with \ref
* client_errc::server_doesnt_support_ssl.
* \n
* If `params.connection_collation` is within a set of well-known collations, this function
* sets the current character set, such that \ref current_character_set returns a non-null value.
* The default collation (`utf8mb4_general_ci`) is the only one guaranteed to be in the set of well-known
* collations.
*/
void connect(const connect_params& params, error_code& ec, diagnostics& diag)
{
Expand Down Expand Up @@ -798,6 +841,66 @@ class any_connection
);
}

/**
* \brief Sets the connection's character set, as per SET NAMES.
* \details
* Sets the connection's character set by running a
* <a href="https://dev.mysql.com/doc/refman/8.0/en/set-names.html">`SET NAMES`</a>
* SQL statement, using the passed \ref character_set::name as the charset name to set.
* \n
* This function will also update the value returned by \ref current_character_set, so
* prefer using this function over raw SQL statements.
* \n
* If the server was unable to set the character set to the requested value (e.g. because
* the server does not support the requested charset), this function will fail,
* as opposed to how \ref connect behaves when an unsupported collation is passed.
* This is a limitation of MySQL servers.
* \n
* You need to perform connection establishment for this function to succeed, since it
* involves communicating with the server.
*
* \par Object lifetimes
* `charset` will be copied as required, and does not need to be kept alive.
*/
void set_character_set(const character_set& charset, error_code& err, diagnostics& diag)
{
impl_.run(impl_.make_params_set_character_set(charset, diag), err);
}

/// \copydoc set_character_set
void set_character_set(const character_set& charset)
{
error_code err;
diagnostics diag;
set_character_set(charset, err, diag);
detail::throw_on_error_loc(err, diag, BOOST_CURRENT_LOCATION);
}

/**
* \copydoc set_character_set
* \details
* \n
* \par Handler signature
* The handler signature for this operation is `void(boost::mysql::error_code)`.
*/
template <BOOST_ASIO_COMPLETION_TOKEN_FOR(void(::boost::mysql::error_code)) CompletionToken>
auto async_set_character_set(const character_set& charset, CompletionToken&& token)
BOOST_MYSQL_RETURN_TYPE(detail::async_set_character_set_t<CompletionToken&&>)
{
return async_set_character_set(charset, impl_.shared_diag(), std::forward<CompletionToken>(token));
}

/// \copydoc async_set_character_set
template <BOOST_ASIO_COMPLETION_TOKEN_FOR(void(::boost::mysql::error_code)) CompletionToken>
auto async_set_character_set(const character_set& charset, diagnostics& diag, CompletionToken&& token)
BOOST_MYSQL_RETURN_TYPE(detail::async_set_character_set_t<CompletionToken&&>)
{
return impl_.async_run(
impl_.make_params_set_character_set(charset, diag),
std::forward<CompletionToken>(token)
);
}

/// \copydoc connection::ping
void ping(error_code& err, diagnostics& diag) { impl_.run(impl_.make_params_ping(diag), err); }

Expand Down Expand Up @@ -825,7 +928,42 @@ class any_connection
return impl_.async_run(impl_.make_params_ping(diag), std::forward<CompletionToken>(token));
}

/// \copydoc connection::reset_connection
/**
* \brief Resets server-side session state, like variables and prepared statements.
* \details
* Resets all server-side state for the current session:
* \n
* \li Rolls back any active transactions and resets autocommit mode.
* \li Releases all table locks.
* \li Drops all temporary tables.
* \li Resets all session system variables to their default values (including the ones set by `SET
* NAMES`) and clears all user-defined variables.
* \li Closes all prepared statements.
* \n
* A full reference on the affected session state can be found
* <a href="https://dev.mysql.com/doc/c-api/8.0/en/mysql-reset-connection.html">here</a>.
* \n
* \n
* This function will not reset the current physical connection and won't cause re-authentication.
* It is faster than closing and re-opening a connection.
* \n
* The connection must be connected and authenticated before calling this function.
* This function involves communication with the server, and thus may fail.
*
* \par Warning on character sets
* This function will restore the connection's character set and collation **to the server's default**,
* and not to the one specified during connection establishment. Some servers have `latin1` as their
* default character set, which is not usually what you want. Since there is no way to know this
* character set, \ref current_character_set will return `nullptr` after the operation succeeds.
* We recommend always using \ref set_character_set or \ref async_set_character_set after calling this
* function.
* \n
* You can find the character set that your server will use after the reset by running:
* \code
* SELECT @@global.character_set_client, @@global.character_set_connection,
* @@global.character_set_results;
* \endcode
*/
void reset_connection(error_code& err, diagnostics& diag)
{
impl_.run(impl_.make_params_reset_connection(diag), err);
Expand All @@ -840,7 +978,13 @@ class any_connection
detail::throw_on_error_loc(err, diag, BOOST_CURRENT_LOCATION);
}

/// \copydoc connection::async_reset_connection
/**
* \copydoc reset_connection
* \details
* \n
* \par Handler signature
* The handler signature for this operation is `void(boost::mysql::error_code)`.
*/
template <BOOST_ASIO_COMPLETION_TOKEN_FOR(void(::boost::mysql::error_code)) CompletionToken>
auto async_reset_connection(CompletionToken&& token)
BOOST_MYSQL_RETURN_TYPE(detail::async_reset_connection_t<CompletionToken&&>)
Expand Down
10 changes: 10 additions & 0 deletions include/boost/mysql/detail/algo_params.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#ifndef BOOST_MYSQL_DETAIL_ALGO_PARAMS_HPP
#define BOOST_MYSQL_DETAIL_ALGO_PARAMS_HPP

#include <boost/mysql/character_set.hpp>
#include <boost/mysql/diagnostics.hpp>
#include <boost/mysql/handshake_params.hpp>
#include <boost/mysql/rows_view.hpp>
Expand All @@ -23,6 +24,7 @@

namespace boost {
namespace mysql {

namespace detail {

struct connect_algo_params
Expand Down Expand Up @@ -114,6 +116,14 @@ struct reset_connection_algo_params
using result_type = void;
};

struct set_character_set_algo_params
{
diagnostics* diag;
character_set charset;

using result_type = void;
};

struct quit_connection_algo_params
{
diagnostics* diag;
Expand Down
15 changes: 15 additions & 0 deletions include/boost/mysql/detail/connection_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ namespace mysql {
template <class... StaticRow>
class static_execution_state;

struct character_set;

namespace detail {

// Forward decl
Expand Down Expand Up @@ -233,6 +235,7 @@ class connection_impl
BOOST_MYSQL_DECL std::vector<field_view>& get_shared_fields() noexcept;
BOOST_MYSQL_DECL bool ssl_active() const noexcept;
BOOST_MYSQL_DECL bool backslash_escapes() const noexcept;
BOOST_MYSQL_DECL const character_set* current_character_set() const noexcept;

// Generic algorithm
template <class AlgoParams, class CompletionToken>
Expand Down Expand Up @@ -429,6 +432,15 @@ class connection_impl
return {&diag, stmt.id()};
}

// Set character set
set_character_set_algo_params make_params_set_character_set(
const character_set& charset,
diagnostics& diag
) const noexcept
{
return {&diag, charset};
}

// Ping
ping_algo_params make_params_ping(diagnostics& diag) const noexcept { return {&diag}; }

Expand Down Expand Up @@ -495,6 +507,9 @@ using async_prepare_statement_t = async_run_t<prepare_statement_algo_params, Com
template <class CompletionToken>
using async_close_statement_t = async_run_t<close_statement_algo_params, CompletionToken>;

template <class CompletionToken>
using async_set_character_set_t = async_run_t<set_character_set_algo_params, CompletionToken>;

template <class CompletionToken>
using async_ping_t = async_run_t<ping_algo_params, CompletionToken>;

Expand Down
6 changes: 6 additions & 0 deletions include/boost/mysql/impl/connection_impl.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,10 @@ boost::mysql::diagnostics& boost::mysql::detail::connection_impl::shared_diag()
return st_->data().shared_diag;
}

const boost::mysql::character_set* boost::mysql::detail::connection_impl::current_character_set(
) const noexcept
{
return st_->data().charset_ptr();
}

#endif
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@
#include <boost/mysql/impl/internal/sansio/read_some_rows.hpp>
#include <boost/mysql/impl/internal/sansio/read_some_rows_dynamic.hpp>
#include <boost/mysql/impl/internal/sansio/reset_connection.hpp>
#include <boost/mysql/impl/internal/sansio/set_character_set.hpp>
#include <boost/mysql/impl/internal/sansio/start_execution.hpp>

#include <boost/asio/coroutine.hpp>
#include <boost/variant2/variant.hpp>

#include <cstddef>
#include <utility>

namespace boost {
namespace mysql {
Expand All @@ -52,6 +52,7 @@ template <> struct get_algo<read_some_rows_algo_params> { using type = read_some
template <> struct get_algo<read_some_rows_dynamic_algo_params> { using type = read_some_rows_dynamic_algo; };
template <> struct get_algo<prepare_statement_algo_params> { using type = prepare_statement_algo; };
template <> struct get_algo<close_statement_algo_params> { using type = close_statement_algo; };
template <> struct get_algo<set_character_set_algo_params> { using type = set_character_set_algo; };
template <> struct get_algo<ping_algo_params> { using type = ping_algo; };
template <> struct get_algo<reset_connection_algo_params> { using type = reset_connection_algo; };
template <> struct get_algo<quit_connection_algo_params> { using type = quit_connection_algo; };
Expand All @@ -71,6 +72,7 @@ class connection_state
read_some_rows_dynamic_algo,
prepare_statement_algo,
close_statement_algo,
set_character_set_algo,
ping_algo,
reset_connection_algo,
quit_connection_algo,
Expand Down
10 changes: 10 additions & 0 deletions include/boost/mysql/impl/internal/sansio/connection_state_data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#ifndef BOOST_MYSQL_IMPL_INTERNAL_SANSIO_CONNECTION_STATE_DATA_HPP
#define BOOST_MYSQL_IMPL_INTERNAL_SANSIO_CONNECTION_STATE_DATA_HPP

#include <boost/mysql/character_set.hpp>
#include <boost/mysql/diagnostics.hpp>
#include <boost/mysql/field_view.hpp>
#include <boost/mysql/metadata_mode.hpp>
Expand Down Expand Up @@ -59,13 +60,21 @@ struct connection_state_data
// be disabled using a variable. OK packets include a flag with this info.
bool backslash_escapes{true};

// The current character set, or a default-constructed character set (will all nullptrs) if unknown
character_set current_charset{};

// Reader and writer
message_reader reader;
message_writer writer;

bool ssl_active() const noexcept { return ssl == ssl_state::active; }
bool supports_ssl() const noexcept { return ssl != ssl_state::unsupported; }

const character_set* charset_ptr() const noexcept
{
return current_charset.name == nullptr ? nullptr : &current_charset;
}

connection_state_data(std::size_t read_buffer_size, bool transport_supports_ssl = false)
: ssl(transport_supports_ssl ? ssl_state::inactive : ssl_state::unsupported), reader(read_buffer_size)
{
Expand All @@ -82,6 +91,7 @@ struct connection_state_data
if (supports_ssl())
ssl = ssl_state::inactive;
backslash_escapes = true;
current_charset = character_set{};
}
};

Expand Down
24 changes: 24 additions & 0 deletions include/boost/mysql/impl/internal/sansio/handshake.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@
#ifndef BOOST_MYSQL_IMPL_INTERNAL_SANSIO_HANDSHAKE_HPP
#define BOOST_MYSQL_IMPL_INTERNAL_SANSIO_HANDSHAKE_HPP

#include <boost/mysql/character_set.hpp>
#include <boost/mysql/diagnostics.hpp>
#include <boost/mysql/error_code.hpp>
#include <boost/mysql/handshake_params.hpp>
#include <boost/mysql/mysql_collations.hpp>

#include <boost/mysql/detail/algo_params.hpp>
#include <boost/mysql/detail/ok_view.hpp>
Expand Down Expand Up @@ -70,6 +72,27 @@ class handshake_algo : public sansio_algorithm, asio::coroutine
auth_response auth_resp_;
std::uint8_t sequence_number_{0};

// Attempts to map the collection_id to a character set. We try to be conservative
// here, since servers will happily accept unknown collation IDs, silently defaulting
// to the server's default character set (often latin1, which is not Unicode).
static character_set collation_id_to_charset(std::uint16_t collation_id) noexcept
{
switch (collation_id)
{
case mysql_collations::utf8mb4_bin:
case mysql_collations::utf8mb4_general_ci: return utf8mb4_charset;
case mysql_collations::latin1_german1_ci:
case mysql_collations::latin1_swedish_ci:
case mysql_collations::latin1_danish_ci:
case mysql_collations::latin1_german2_ci:
case mysql_collations::latin1_bin:
case mysql_collations::latin1_general_ci:
case mysql_collations::latin1_general_cs:
case mysql_collations::latin1_spanish_ci: return latin1_charset;
default: return character_set{};
}
}

// Once the handshake is processed, the capabilities are stored in the connection state
bool use_ssl() const noexcept { return st_->current_capabilities.has(CLIENT_SSL); }

Expand Down Expand Up @@ -157,6 +180,7 @@ class handshake_algo : public sansio_algorithm, asio::coroutine
{
st_->is_connected = true;
st_->backslash_escapes = ok.backslash_escapes();
st_->current_charset = collation_id_to_charset(hparams_.connection_collation());
}

error_code process_ok()
Expand Down
Loading

0 comments on commit bf2eb59

Please sign in to comment.