diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..9748e28 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,83 @@ +# syntax = docker/dockerfile:1.4 + +# Use the specified Python base image +FROM python:3.12.4-slim-bookworm + +USER root + +# Set environment variables to ensure non-interactive installation +ENV DEBIAN_FRONTEND=noninteractive \ + PYPY3_VERSION=7.3.17 \ + PYPY3_PYTHON_VERSION=3.10 + +# Install necessary packages +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + wget \ + bzip2 \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Detect architecture and set the appropriate download URL +ARG TARGETARCH +ENV PYPY3_DOWNLOAD_URL="" + +RUN --mount=type=cache,target=/tmp \ + case "$TARGETARCH" in \ + "amd64") \ + echo "Building for amd64 architecture." && \ + ARCH_SUFFIX="linux64" && \ + PYPY3_DOWNLOAD_URL="https://downloads.python.org/pypy/pypy3.10-v${PYPY3_VERSION}-${ARCH_SUFFIX}.tar.bz2" ;; \ + "arm64") \ + echo "Building for arm64 architecture." && \ + ARCH_SUFFIX="aarch64" && \ + PYPY3_DOWNLOAD_URL="https://downloads.python.org/pypy/pypy3.10-v${PYPY3_VERSION}-${ARCH_SUFFIX}.tar.bz2" ;; \ + *) \ + echo "Architecture $TARGETARCH not supported." && \ + exit 1 ;; \ + esac && \ + echo "Downloading PyPy3 from $PYPY3_DOWNLOAD_URL" && \ + wget "$PYPY3_DOWNLOAD_URL" -O /tmp/pypy3.tar.bz2 && \ + mkdir -p /opt/pypy3 && \ + tar -xjf /tmp/pypy3.tar.bz2 -C /opt/pypy3 --strip-components=1 && \ + rm /tmp/pypy3.tar.bz2 + +# Create symbolic links for PyPy3 and PyPy +RUN ln -sf /opt/pypy3/bin/pypy3 /usr/local/bin/pypy3 && \ + ln -sf /opt/pypy3/bin/pypy3 /usr/local/bin/pypy + +# Download and install pip for PyPy3 using get-pip.py +RUN wget https://bootstrap.pypa.io/get-pip.py -O /tmp/get-pip.py && \ + pypy3 /tmp/get-pip.py && \ + rm /tmp/get-pip.py + +# Create a symbolic link for PyPy3's pip as pip-pypy +RUN ln -sf /opt/pypy3/bin/pip /usr/local/bin/pip-pypy + +# Upgrade pip, setuptools, and wheel for PyPy3 +RUN pip-pypy install --no-cache --upgrade pip setuptools wheel + +# (Optional) Verify installations +RUN python3 --version && \ + pypy --version && \ + pip --version && \ + pip-pypy --version + +# Clean up unnecessary packages to reduce image size +RUN apt-get update && \ + apt-get remove --purge -y wget && \ + apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* + +RUN mkdir /app/ +COPY requirements.txt /app/requirements.txt +COPY requirements-dev.txt /app/requirements-dev.txt +COPY requirements-pypy.txt /app/requirements-pypy.txt + +RUN pip install -r /app/requirements.txt +RUN pip install -r /app/requirements-dev.txt +RUN pip-pypy install -r /app/requirements-pypy.txt + + +# Set the default command to bash (optional) +CMD ["bash"] diff --git a/docker-compose-tests.yaml b/docker-compose-tests.yaml index 77d996f..c84f42c 100644 --- a/docker-compose-tests.yaml +++ b/docker-compose-tests.yaml @@ -45,8 +45,10 @@ services: - ./test_mariadb.cnf:/etc/mysql/my.cnf:ro # Adjust path to MariaDB config location if needed replicator: - image: python:3.12.4-slim-bookworm - command: bash -c "pip install -r /app/requirements.txt && pip install -r /app/requirements-dev.txt && touch /tmp/ready && tail -f /dev/null" + image: mysql_ch_replicator + build: + context: . + command: bash -c "touch /tmp/ready && tail -f /dev/null" healthcheck: test: [ 'CMD-SHELL', 'test -f /tmp/ready' ] interval: 2s diff --git a/mysql_ch_replicator/main.py b/mysql_ch_replicator/main.py index 27c9031..e963cee 100755 --- a/mysql_ch_replicator/main.py +++ b/mysql_ch_replicator/main.py @@ -7,11 +7,6 @@ import os from .config import Settings -from .db_replicator import DbReplicator -from .binlog_replicator import BinlogReplicator -from .db_optimizer import DbOptimizer -from .monitoring import Monitoring -from .runner import Runner def set_logging_config(tags, log_file=None, log_level_str=None): @@ -50,6 +45,8 @@ def set_logging_config(tags, log_file=None, log_level_str=None): def run_binlog_replicator(args, config: Settings): + from .binlog_replicator import BinlogReplicator + if not os.path.exists(config.binlog_replicator.data_dir): os.mkdir(config.binlog_replicator.data_dir) @@ -66,6 +63,8 @@ def run_binlog_replicator(args, config: Settings): def run_db_replicator(args, config: Settings): + from .db_replicator import DbReplicator + if not args.db: raise Exception("need to pass --db argument") @@ -99,6 +98,8 @@ def run_db_replicator(args, config: Settings): def run_db_optimizer(args, config: Settings): + from .db_optimizer import DbOptimizer + data_dir = config.binlog_replicator.data_dir if not os.path.exists(data_dir): os.mkdir(data_dir) @@ -117,12 +118,16 @@ def run_db_optimizer(args, config: Settings): def run_monitoring(args, config: Settings): + from .monitoring import Monitoring + set_logging_config('monitor', log_level_str=config.log_level) monitoring = Monitoring(args.db or '', config) monitoring.run() def run_all(args, config: Settings): + from .runner import Runner + set_logging_config('runner', log_level_str=config.log_level) runner = Runner(config, args.wait_initial_replication, args.db) runner.run() diff --git a/requirements-pypy.txt b/requirements-pypy.txt new file mode 100644 index 0000000..1e60d00 --- /dev/null +++ b/requirements-pypy.txt @@ -0,0 +1,4 @@ +pyyaml==6.0.2 +mysql-connector-python==9.1.0 +pymysql==1.1.1 +packaging==24.2 diff --git a/test_mysql_ch_replicator.py b/test_mysql_ch_replicator.py index 6c02de2..a980483 100644 --- a/test_mysql_ch_replicator.py +++ b/test_mysql_ch_replicator.py @@ -1333,12 +1333,22 @@ def test_performance_dbreplicator(): mysql.execute(f''' CREATE TABLE {TEST_TABLE_NAME} ( - id int NOT NULL AUTO_INCREMENT, - name varchar(2048), - age int, + id INT NOT NULL AUTO_INCREMENT, + name VARCHAR(2048), + age INT, + test1 VARCHAR(255) DEFAULT 'azaza12456778', + test2 INT DEFAULT 42483, + test3 VARCHAR(255) DEFAULT 'qjfsjdfjdfjfdjdfjfdjwhfdf', + test4 JSON DEFAULT ('{{"a": 3, "b": "caad", "f": [1, 2, 3]}}'), + test5 INT DEFAULT 32, + test6 INT DEFAULT 9234, + test7 INT DEFAULT 431, + test8 INT DEFAULT 121, + test9 INT DEFAULT 33, + test10 INT DEFAULT 1948, PRIMARY KEY (id) - ); - ''') + ); + ''') binlog_replicator_runner = BinlogReplicatorRunner(cfg_file=config_file) binlog_replicator_runner.run() @@ -1364,7 +1374,7 @@ def _get_last_insert_name(): base_value = 'a' * 2000 for i in range(num_records): - if i % 2000 == 0: + if i % 5000 == 0: print(f'populated {i} elements') mysql.execute( f"INSERT INTO {TEST_TABLE_NAME} (name, age) " @@ -1378,7 +1388,7 @@ def _get_last_insert_name(): binlog_replicator_runner = BinlogReplicatorRunner(cfg_file=config_file) binlog_replicator_runner.run() - assert_wait(lambda: _get_last_insert_name() == 'TEST_VALUE_FINAL', retry_interval=0.5, max_wait_time=1000) + assert_wait(lambda: _get_last_insert_name() == 'TEST_VALUE_FINAL', retry_interval=0.5, max_wait_time=60) t2 = time.time() binlog_replicator_runner.stop() @@ -1387,9 +1397,34 @@ def _get_last_insert_name(): rps = num_records / time_delta print('\n\n') - print("*****************************") + print("********* INSERTS ************") print("records per second:", int(rps)) print("total time (seconds):", round(time_delta, 2)) print("*****************************") print('\n\n') + print("removing mysql data") + for i in range(num_records): + if i % 5000 == 0: + print(f'removed {i} elements') + mysql.execute(f'DELETE FROM {TEST_TABLE_NAME} WHERE ID = {i}', commit=i % 20 == 0) + mysql.execute(f"INSERT INTO {TEST_TABLE_NAME} (name, age) VALUES ('TEST_VALUE_FINAL_2', 0);", commit=True) + + t1 = time.time() + binlog_replicator_runner = BinlogReplicatorRunner(cfg_file=config_file) + binlog_replicator_runner.run() + + assert_wait(lambda: _get_last_insert_name() == 'TEST_VALUE_FINAL_2', retry_interval=0.5, max_wait_time=60) + t2 = time.time() + + binlog_replicator_runner.stop() + + time_delta = t2 - t1 + rps = num_records / time_delta + + print('\n\n') + print("********* DELETES ************") + print("records per second:", int(rps)) + print("total time (seconds):", round(time_delta, 2)) + print("*****************************") + print('\n\n')