Skip to content

Commit

Permalink
Add support for using a SOCKS5 proxy to connect to live replica datab…
Browse files Browse the repository at this point in the history
…ase (#25)

* Add support for using a SOCKS5 proxy to connect to live replica databases

* Fix with black
  • Loading branch information
audiodude authored Oct 7, 2023
1 parent 75a35e6 commit 822379a
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 14 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,5 @@ config.yaml
.python-version
.tox/
*.swp
.vscode
.tool-versions
2 changes: 2 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ services:
- .:/app
- results:/results
entrypoint: ["celery", "--app", "quarry.web.worker", "worker", ]
extra_hosts:
- "host.docker.internal:host-gateway"
depends_on:
- "db"
- "redis"
Expand Down
20 changes: 15 additions & 5 deletions quarry/default_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,22 @@ task_acks_late: True # Tasks are idempotent!
task_track_started: True
worker_prefetch_multiplier: 1 # Tasks can run for a long time

# Run queries against the live wikimedia replica databases. This requires a
# toolforge account, so that you can retrieve your credentials from
# ~/replica.my.cnf (and log into toolforge to establish the SOCKS5 proxy). These
# lines should remain commented if you're not using a SOCKS5 proxy. The IP
# address of the host running the docker container. On Windows or macOS this
# might be `host.docker.internal`.
# REPLICA_SOCKS5_PROXY_HOST: '172.17.0.1'
# REPLICA_SOCKS5_PROXY_PORT: 1080


# Run queries against a fake wiki database
REPLICA_DOMAIN: ''
REPLICA_HOST: 'mywiki'
REPLICA_DB: 'mywiki_p'
REPLICA_USER: 'repl'
REPLICA_PASSWORD: 'repl'
# Change these 3 lines if you're using the live replicas.
REPLICA_DOMAIN: '' # Change to `analytics.db.svc.wikimedia.cloud` for live replicas
REPLICA_USER: 'repl' # For live replicas, your replica.my.cnf username
REPLICA_PASSWORD: 'repl' # For live replicas, your replica.my.cnf password

REPLICA_PORT: 3306
OUTPUT_PATH_TEMPLATE: '/results/%s/%s/%s.sqlite'
REDIS_HOST: 'redis'
Expand Down
34 changes: 25 additions & 9 deletions quarry/web/replica.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pymysql
import socks


class ReplicaConnectionException(Exception):
Expand All @@ -18,6 +19,7 @@ def _db_name_mangler(self):

if self.dbname == "meta" or self.dbname == "meta_p":
self.database_name = "s7"

self.database_p = "meta_p"
elif self.dbname == "centralauth" or self.dbname == "centralauth_p":
self.database_name = "s7"
Expand Down Expand Up @@ -55,15 +57,29 @@ def connection(self, db):
if self.config["REPLICA_DOMAIN"]
else self.database_name
)
self._replica = pymysql.connect(
host=repl_host,
db=self.database_p,
user=self.config["REPLICA_USER"],
passwd=self.config["REPLICA_PASSWORD"],
port=self.config["REPLICA_PORT"],
charset="utf8",
client_flag=pymysql.constants.CLIENT.MULTI_STATEMENTS,
)
connect_opts = {
"db": self.database_p,
"user": self.config["REPLICA_USER"],
"passwd": self.config["REPLICA_PASSWORD"],
"charset": "utf8",
"client_flag": pymysql.constants.CLIENT.MULTI_STATEMENTS,
}

if not self.config.get("REPLICA_SOCKS5_PROXY_HOST"):
self._replica = pymysql.connect(
host=repl_host, port=self.config["REPLICA_PORT"], **connect_opts
)
else:
self._replica = pymysql.connect(defer_connect=True, **connect_opts)

sock = socks.socksocket()
sock.set_proxy(
socks.SOCKS5,
addr=self.config["REPLICA_SOCKS5_PROXY_HOST"],
port=self.config["REPLICA_SOCKS5_PROXY_PORT"],
)
sock.connect((repl_host, self.config["REPLICA_PORT"]))
self._replica.connect(sock=sock)

@connection.deleter
def connection(self):
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@ importlib-metadata==4.6.3
zipp==3.5.0
typing-extensions==3.10.0.0
flask_caching==2.0.2
PySocks==1.7.1

0 comments on commit 822379a

Please sign in to comment.