Skip to content

Commit

Permalink
Avoid UnicodeDecodeError for non-utf8 QueryEvents (#465)
Browse files Browse the repository at this point in the history
Query strings in QueryEvents that appear in the binlog stream must not
necessarily be utf-8 encoded, but the current implementation handles
only utf-8.

This commit adds the `errors="backslashreplace"` kwarg to decode(), to
avoid a runtime error and insert \xNN escape sequences for byte
sequences that are not valid utf-8. It includes a test that generates a
QueryEvent with latin-1 encoding, which fails without the fix.

Co-authored-by: Oliver Seemann <oliver.seemann@olx.com>
  • Loading branch information
oseemann and Oliver Seemann authored Sep 1, 2023
1 parent 73e2eeb commit c2364bb
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 4 deletions.
5 changes: 3 additions & 2 deletions pymysqlreplication/event.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,8 +325,9 @@ def __init__(self, from_packet, event_size, table_map, ctl_connection, **kwargs)
self.schema = self.packet.read(self.schema_length)
self.packet.advance(1)

self.query = self.packet.read(event_size - 13 - self.status_vars_length
- self.schema_length - 1).decode("utf-8")
query = self.packet.read(event_size - 13 - self.status_vars_length
- self.schema_length - 1)
self.query = query.decode("utf-8", errors='backslashreplace')
#string[EOF] query

def _dump(self):
Expand Down
4 changes: 2 additions & 2 deletions pymysqlreplication/tests/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ class PyMySQLReplicationTestCase(base):
def ignoredEvents(self):
return []

def setUp(self):
def setUp(self, charset="utf8"):
# default
self.database = {
"host": os.environ.get("MYSQL_5_7") or "localhost",
"user": "root",
"passwd": "",
"port": 3306,
"use_unicode": True,
"charset": "utf8",
"charset": charset,
"db": "pymysqlreplication_test"
}

Expand Down
18 changes: 18 additions & 0 deletions pymysqlreplication/tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1371,6 +1371,24 @@ def test_rows_query_log_event(self):
event = self.stream.fetchone()
self.assertIsInstance(event, RowsQueryLogEvent)

class TestLatin1(base.PyMySQLReplicationTestCase):

def setUp(self):
super().setUp(charset='latin1')

def test_query_event_latin1(self):
"""
Ensure query events with a non-utf8 encoded query are parsed without errors.
"""
self.stream = BinLogStreamReader(self.database, server_id=1024, only_events=[QueryEvent])
self.execute("CREATE TABLE test_latin1_ÖÆÛ (a INT)")
self.execute("COMMIT")
assert "ÖÆÛ".encode('latin-1') == b'\xd6\xc6\xdb'

event = self.stream.fetchone()
assert event.query.startswith("CREATE TABLE test")
assert event.query == r"CREATE TABLE test_latin1_\xd6\xc6\xdb (a INT)"


if __name__ == "__main__":
import unittest
Expand Down

0 comments on commit c2364bb

Please sign in to comment.