Skip to content

Commit

Permalink
Update ci, expand typing
Browse files Browse the repository at this point in the history
  • Loading branch information
DamianZaremba committed Jul 19, 2022
1 parent 24044ed commit 2380dd2
Show file tree
Hide file tree
Showing 9 changed files with 116 additions and 36 deletions.
15 changes: 13 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,17 @@ jobs:
uses: actions/setup-python@v2
with: { python-version: '3.9' }
- name: Install dependencies
run: pip install pylama
run: pip install tox
- name: Run pylama
run: pylama cbng_trainer
run: tox -e pylama
mypy:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.x
uses: actions/setup-python@v2
with: { python-version: '3.9' }
- name: Install dependencies
run: pip install tox
- name: Run mypy
run: tox -e mypy
24 changes: 19 additions & 5 deletions cbng_trainer/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import logging
import sys
from pathlib import PosixPath
from typing import List

import click

Expand All @@ -49,7 +50,12 @@
@click.option('--api-host-wikipedia', default="en.wikipedia.org",
help='Hostname of the wikipedia API')
@click.option('--max-connections', default=5, help='Max connections to use per host')
def cli(ctx, debug, api_host_report, api_host_review, api_host_wikipedia, max_connections):
def cli(ctx: click.Context,
debug: bool,
api_host_report: str,
api_host_review: str,
api_host_wikipedia: str,
max_connections: int) -> None:
logging.basicConfig(level=(logging.DEBUG if debug else logging.INFO),
stream=sys.stderr)
ctx.obj = Settings(max_connections,
Expand All @@ -64,7 +70,11 @@ def cli(ctx, debug, api_host_report, api_host_review, api_host_wikipedia, max_co
multiple=True, type=int)
@click.option('--random-edits', is_flag=True, help='Download random edits')
@click.option('--random-edits-count', default=200, help='Number of random edits to download')
def download_edits(ctx, output, edit_set, random_edits, random_edits_count):
def download_edits(ctx: click.Context,
output: str,
edit_set: List[int],
random_edits: bool,
random_edits_count: int) -> None:
loop = asyncio.get_event_loop()
loop.run_until_complete(dump_edits(ctx.obj,
PosixPath(output),
Expand All @@ -82,7 +92,10 @@ def download_edits(ctx, output, edit_set, random_edits, random_edits_count):
required=True, type=click.Path(True))
@click.option('--release-tag', help='Git release tag',
required=True, default='v1.0.2')
def build_database(ann_input, bayes_input, output, release_tag):
def build_database(ann_input: PosixPath,
bayes_input: PosixPath,
output: PosixPath,
release_tag: str) -> None:
output = PosixPath(output)
core_image = build_docker_image(output, release_tag)
stdout = run_container(core_image,
Expand Down Expand Up @@ -125,7 +138,9 @@ def build_database(ann_input, bayes_input, output, release_tag):
type=click.Path(True))
@click.option('--output', help='Output path', required=False, type=click.Path(True))
@click.option('--release-tag', help='Git release tag', required=True, default='v1.0.2')
def trial_database(input, output, release_tag):
def trial_database(input: PosixPath,
output: PosixPath,
release_tag: str) -> None:
output = PosixPath(output)
core_image = build_docker_image(output, release_tag)

Expand All @@ -147,7 +162,6 @@ def trial_database(input, output, release_tag):
'threshold': plots.THREASHOLD,
'false_positive_rate': plots.FALSE_POSITIVE
}.items():

# Write the plot file out to process
plot_file = trial_path / f'{name}.gnuplot'
with plot_file.open('w') as fh:
Expand Down
23 changes: 14 additions & 9 deletions cbng_trainer/common/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,12 @@
import subprocess
import uuid
from pathlib import PosixPath
from typing import List, Tuple, Optional

logger = logging.getLogger(__name__)


def stop_container(name: str):
def stop_container(name: str) -> bool:
logger.info(f'Asking docker to kill {name}')
p = subprocess.Popen([
'docker',
Expand All @@ -41,11 +42,11 @@ def stop_container(name: str):
stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
if p.returncode != 0:
raise RuntimeError(f'Failed to stop container {name}: {stdout} / {stderr}')
raise RuntimeError(f'Failed to stop container {name}: {stdout!r} / {stderr!r}')
return True


def start_container(image: str, port: int):
def start_container(image: str, port: int) -> str:
container_name = f'cbng-core-{uuid.uuid4()}'
logger.info(f'Asking docker to start {container_name} from {image} using {port}')
p = subprocess.Popen([
Expand All @@ -67,11 +68,15 @@ def start_container(image: str, port: int):
stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
if p.returncode != 0:
raise RuntimeError(f'Failed to start container {image}: {stdout} / {stderr}')
raise RuntimeError(f'Failed to start container {image}: {stdout!r} / {stderr!r}')
return container_name


def run_container(image: str, volumes, arguments, cwd=None, abort_on_error=True):
def run_container(image: str,
volumes: List[Tuple[str, str]],
arguments: List[str],
cwd: Optional[str] = None,
abort_on_error: bool = True) -> bytes:
logger.info(f'Asking docker to run {image} using {volumes} / {arguments}')

runtime_args = []
Expand All @@ -87,13 +92,13 @@ def run_container(image: str, volumes, arguments, cwd=None, abort_on_error=True)
stdout, stderr = p.communicate()
if p.returncode != 0:
if abort_on_error:
raise RuntimeError(f'Failed to run container {image}: {stdout} / {stderr}')
raise RuntimeError(f'Failed to run container {image}: {stdout!r} / {stderr!r}')
else:
logger.error(f'Failed to run container {image}: {stdout} / {stderr}')
logger.error(f'Failed to run container {image}: {stdout!r} / {stderr!r}')
return stdout


def build_docker_image(path: PosixPath, git_tag: str, include_local_binaries=False):
def build_docker_image(path: PosixPath, git_tag: str, include_local_binaries: bool = False) -> str:
docker_file = '''FROM debian:9
ARG CORE_TAG
WORKDIR /opt/cbng-core
Expand Down Expand Up @@ -165,7 +170,7 @@ def build_docker_image(path: PosixPath, git_tag: str, include_local_binaries=Fal
cwd=path.as_posix())
stdout, stderr = p.communicate()
if p.returncode != 0:
raise RuntimeError(f'Failed to build docker image: {stdout} / {stderr}')
raise RuntimeError(f'Failed to build docker image: {stdout!r} / {stderr!r}')

(path / 'Dockerfile').unlink(True)
return image_tag
2 changes: 1 addition & 1 deletion cbng_trainer/common/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class Edit:
current_diff: Diff
previous_diff: Diff

def as_xml(self):
def as_xml(self) -> str:
edit = ElementTree.Element('WPEdit')

ElementTree.SubElement(edit, 'EditType').text = 'change'
Expand Down
19 changes: 15 additions & 4 deletions cbng_trainer/comparator/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,15 @@

import logging
import socket
from typing import Optional
from xml.etree import ElementTree

from cbng_trainer.common.models import Edit, CoreScore

logger = logging.getLogger(__name__)


async def score_edit_via_core(edit: Edit, port: int):
async def score_edit_via_core(edit: Edit, port: int) -> Optional[CoreScore]:
xml = edit.as_xml()
logger.debug(f'Sending to {port}: {xml}')

Expand All @@ -47,10 +48,20 @@ async def score_edit_via_core(edit: Edit, port: int):
response += data

et = ElementTree.fromstring(response)

edit_id = et.find('./WPEdit/editid')
score = et.find('./WPEdit/score')
think_vandalism = et.find('./WPEdit/think_vandalism')

if not (edit_id and edit_id.text
and score and score.text
and think_vandalism and think_vandalism.text):
return None

cs = CoreScore(
int(et.find('./WPEdit/editid').text),
float(et.find('./WPEdit/score').text),
et.find('./WPEdit/think_vandalism').text == 'true',
int(edit_id.text),
float(score.text),
think_vandalism.text == 'true',
)
logger.info(f'[{edit.id}] returning from {port}: {cs}')
return cs
43 changes: 29 additions & 14 deletions cbng_trainer/trainer/reviewed.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,28 @@
'''
import asyncio
import logging
from pathlib import PosixPath
from random import Random
from typing import Optional, List, AsyncIterator, Tuple

import aiohttp
import aiohttp_retry

from cbng_trainer.common.config import Settings
from cbng_trainer.common.models import ReviewedEdit, User, Page, Diff

logger = logging.getLogger(__name__)
HTTP_HEADERS = {'User-Agent': 'ClueBot NG Trainer/1.0'}


async def fetch_edits(session, settings, include_edit_sets, use_random_edits, random_edits_limit):
async def fetch_edits(session: aiohttp_retry.RetryClient,
settings: Settings,
include_edit_sets: Optional[List[int]],
use_random_edits: bool,
random_edits_limit: int) -> AsyncIterator[Tuple[int, bool]]:
logger.info('Fetching edits from review interface')
async with session.get(f'https://{settings.api_hosts.review}/api/export/trainer.json') as r:
async with session.get(f'https://{settings.api_hosts.review}/api/export/trainer.json',
headers=HTTP_HEADERS) as r:
data = await r.json()

random = Random()
Expand All @@ -50,13 +59,16 @@ async def fetch_edits(session, settings, include_edit_sets, use_random_edits, ra
included_edits += 1


async def build_edit_data(session, settings, edit_id, edit_is_vandalism):
async def build_edit_data(session: aiohttp_retry.RetryClient,
settings: Settings,
edit_id: int,
edit_is_vandalism: bool) -> Optional[ReviewedEdit]:
logger.info(f'Fetching extended edit info for {edit_id}')
async with session.get(f'https://{settings.api_hosts.api}', params={
'action': 'training.data',
'rev_id': edit_id,
'include_text': '1',
}) as r:
}, headers=HTTP_HEADERS) as r:
edit_data = await r.json()

if 'error' in edit_data:
Expand Down Expand Up @@ -103,12 +115,15 @@ async def build_edit_data(session, settings, edit_id, edit_is_vandalism):
)


async def load_edits(settings, include_edit_sets, use_random_edits, random_edits_limit):
async def load_edits(settings: Settings,
include_edit_sets: Optional[List[int]],
use_random_edits: bool,
random_edits_limit: int) -> List[ReviewedEdit]:
async with aiohttp_retry.RetryClient(
timeout=aiohttp.ClientTimeout(total=21600),
connector=aiohttp.TCPConnector(limit_per_host=settings.max_host_connections),
raise_for_status=False,
retry_options=aiohttp_retry.ExponentialRetry(attempts=5),
timeout=aiohttp.ClientTimeout(total=21600),
connector=aiohttp.TCPConnector(limit_per_host=settings.max_host_connections),
raise_for_status=False,
retry_options=aiohttp_retry.ExponentialRetry(attempts=5),
) as session:
edits = await asyncio.gather(*[
build_edit_data(session, settings, edit_id, edit_is_vandalism)
Expand All @@ -120,11 +135,11 @@ async def load_edits(settings, include_edit_sets, use_random_edits, random_edits
return [edit for edit in edits if edit is not None]


async def dump_edits(settings,
target_path,
include_edit_sets,
use_random_edits,
random_edits_limit):
async def dump_edits(settings: Settings,
target_path: PosixPath,
include_edit_sets: Optional[List[int]],
use_random_edits: bool,
random_edits_limit: int) -> None:
edits = await load_edits(settings,
include_edit_sets,
use_random_edits,
Expand Down
5 changes: 5 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
tox==3.25.1
pytest==7.1.2
mypy==0.961
types-PyYAML==6.0.10
types-setuptools==63.2.0
9 changes: 8 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,12 @@ universal=1
[metadata]
version = attr:cbng_trainer.__version__

[pylama]
skip = ve/*,.tox/*

[pylama:pycodestyle]
max_line_length = 100
max_line_length = 120

[mypy]
ignore_missing_imports = True
exclude = (ve/.+$)
12 changes: 12 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[tox]
envlist = pylama,mypy

[testenv:pylama]
deps = -r requirements-dev.txt
-r requirements.txt
commands = pylama .

[testenv:mypy]
deps = -r requirements-dev.txt
-r requirements.txt
commands = mypy --strict --no-warn-return-any .

0 comments on commit 2380dd2

Please sign in to comment.