From 53d6da90dbf3c5408693d5a35d0a336760e9c673 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Wed, 28 Sep 2022 16:01:18 +0200 Subject: [PATCH 01/49] Init commit. --- src/eduid/userdb/tests/test_db.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/eduid/userdb/tests/test_db.py b/src/eduid/userdb/tests/test_db.py index 1e281a18d..33fc1c747 100644 --- a/src/eduid/userdb/tests/test_db.py +++ b/src/eduid/userdb/tests/test_db.py @@ -5,7 +5,6 @@ import eduid.userdb.db as db from eduid.userdb.fixtures.users import mocked_user_standard_2, new_unverified_user_example, new_user_example from eduid.userdb.identity import IdentityType -from eduid.userdb.meta import CleanedType from eduid.userdb.testing import MongoTestCase From 1eddece1f6f1f0f508540df66c85e8183920c173 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Wed, 28 Sep 2022 17:01:53 +0200 Subject: [PATCH 02/49] Update hashes. --- requirements/test_requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test_requirements.txt b/requirements/test_requirements.txt index 89660d620..5d5157adf 100644 --- a/requirements/test_requirements.txt +++ b/requirements/test_requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with python 3.10 +# This file is autogenerated by pip-compile with python 3.9 # To update, run: # # make update_deps From 6fae0d5f733415daad78f9dce84e7ae1002cd6e3 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Fri, 30 Sep 2022 12:31:10 +0200 Subject: [PATCH 03/49] First. --- src/eduid/workers/user_cleaner/app.py | 42 +++++++++++++++++++ .../user_cleaner/testing/test_workers.py | 15 +++++++ src/eduid/workers/user_cleaner/worker_skv.py | 0 3 files changed, 57 insertions(+) create mode 100644 src/eduid/workers/user_cleaner/app.py create mode 100644 src/eduid/workers/user_cleaner/testing/test_workers.py create mode 100644 src/eduid/workers/user_cleaner/worker_skv.py diff --git a/src/eduid/workers/user_cleaner/app.py b/src/eduid/workers/user_cleaner/app.py new file mode 100644 index 000000000..40691aceb --- /dev/null +++ b/src/eduid/workers/user_cleaner/app.py @@ -0,0 +1,42 @@ +import signal + +from typing import List +from eduid.userdb.user import User +from pydantic import BaseModel + + +class Shutdown(BaseModel): + now = False + + def __init__(self, **kwargs): + super().__init__(**kwargs) + signal.signal(signal.SIGINT, self.exit_gracefully) + signal.signal(signal.SIGTERM, self.exit_gracefully) + + def exit_gracefully(self): + self.now = True + + +class UserHandling(BaseModel): + users: List[User] + + def __iter__(self): + return iter(self.users) + + def __getitem__(self, item): + return self.users[item] + + def fetch_users(self, cleaning_type: str): + pass + + +class ServiceSKV(Shutdown, UserHandling): + def run(self): + for user in self.users: + if Shutdown.now: + break + pass + + +if __name__ == "__main__": + ServiceSKV().run() diff --git a/src/eduid/workers/user_cleaner/testing/test_workers.py b/src/eduid/workers/user_cleaner/testing/test_workers.py new file mode 100644 index 000000000..71274afce --- /dev/null +++ b/src/eduid/workers/user_cleaner/testing/test_workers.py @@ -0,0 +1,15 @@ +from eduid.common.testing_base import CommonTestCase +from eduid.userdb.fixtures.users import new_user_example + +from eduid.workers.user_cleaner.app import new_service, Service + + +class WorkerTest(CommonTestCase): + def setUp(self, *args, **kwargs): + super().setUp(am_users=[new_user_example]) + + self.service = Service() + self.service.users = [new_user_example] + + def test_worker_skv(self): + self.service.worker_skv() diff --git a/src/eduid/workers/user_cleaner/worker_skv.py b/src/eduid/workers/user_cleaner/worker_skv.py new file mode 100644 index 000000000..e69de29bb From 4ab0f89aa8df3ac058ccbd7e0f87b1604b83e8ea Mon Sep 17 00:00:00 2001 From: masv3971 Date: Mon, 3 Oct 2022 13:36:31 +0200 Subject: [PATCH 04/49] Add first layout. --- src/eduid/workers/user_cleaner/app.py | 49 ++++++++++++++----- src/eduid/workers/user_cleaner/config.py | 5 ++ src/eduid/workers/user_cleaner/run.py | 0 .../user_cleaner/testing/test_workers.py | 17 +++++-- 4 files changed, 54 insertions(+), 17 deletions(-) create mode 100644 src/eduid/workers/user_cleaner/config.py create mode 100644 src/eduid/workers/user_cleaner/run.py diff --git a/src/eduid/workers/user_cleaner/app.py b/src/eduid/workers/user_cleaner/app.py index 40691aceb..54e32ad54 100644 --- a/src/eduid/workers/user_cleaner/app.py +++ b/src/eduid/workers/user_cleaner/app.py @@ -1,11 +1,19 @@ +import logging import signal -from typing import List +from typing import List, Optional, Dict + +from eduid.common.config.parsers import load_config +from eduid.userdb import AmDB +from eduid.userdb.meta import CleanedType from eduid.userdb.user import User +from eduid.common.logging import init_logging from pydantic import BaseModel +from eduid.workers.user_cleaner.config import UserCleanerConfig + -class Shutdown(BaseModel): +class Shutdown: now = False def __init__(self, **kwargs): @@ -17,8 +25,19 @@ def exit_gracefully(self): self.now = True -class UserHandling(BaseModel): - users: List[User] +class Cleaner(Shutdown): + users: Optional[List[User]] = None + + def __init__(self, name: str = "user_cleaner", test_config: Optional[Dict] = None): + self.config = load_config(typ=UserCleanerConfig, app_name=name, ns="api", test_config=test_config) + super().__init__() + self.name = name + + self.db = AmDB(db_uri=self.config.mongo_uri) + + self.logger = logging.getLogger(name=name) + init_logging(config=self.config) + self.logger.info(f"Starting {name} app") def __iter__(self): return iter(self.users) @@ -26,17 +45,21 @@ def __iter__(self): def __getitem__(self, item): return self.users[item] - def fetch_users(self, cleaning_type: str): - pass + def fetch_users(self, cleaning_type: CleanedType, limit: int): + self.users = self.db.get_uncleaned_users(cleaned_type=cleaning_type, limit=limit) + + def run_skv(self): + self.fetch_users(CleanedType.SKV, 1) + print("Hej") + +def init_app(name: str = "user_cleaner", test_config: Optional[Dict] = None) -> Cleaner: + app = Cleaner(name=name, test_config=test_config) -class ServiceSKV(Shutdown, UserHandling): - def run(self): - for user in self.users: - if Shutdown.now: - break - pass + app.logger.info("app running...") + return app if __name__ == "__main__": - ServiceSKV().run() + app_skv = init_app() + app_skv.run_skv() diff --git a/src/eduid/workers/user_cleaner/config.py b/src/eduid/workers/user_cleaner/config.py new file mode 100644 index 000000000..d19d39b75 --- /dev/null +++ b/src/eduid/workers/user_cleaner/config.py @@ -0,0 +1,5 @@ +from eduid.common.config.base import LoggingConfigMixin, RootConfig + + +class UserCleanerConfig(RootConfig, LoggingConfigMixin): + mongo_uri: str = "" diff --git a/src/eduid/workers/user_cleaner/run.py b/src/eduid/workers/user_cleaner/run.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/eduid/workers/user_cleaner/testing/test_workers.py b/src/eduid/workers/user_cleaner/testing/test_workers.py index 71274afce..1307a745e 100644 --- a/src/eduid/workers/user_cleaner/testing/test_workers.py +++ b/src/eduid/workers/user_cleaner/testing/test_workers.py @@ -1,15 +1,24 @@ +from typing import Dict + from eduid.common.testing_base import CommonTestCase from eduid.userdb.fixtures.users import new_user_example -from eduid.workers.user_cleaner.app import new_service, Service +from eduid.workers.user_cleaner.app import init_app class WorkerTest(CommonTestCase): def setUp(self, *args, **kwargs): super().setUp(am_users=[new_user_example]) - self.service = Service() - self.service.users = [new_user_example] + self.test_config = self._get_config() + + self.app = init_app(name="test_cleaner", test_config=self.test_config) + + def _get_config(self) -> Dict: + config = { + "mongo_uri": self.settings["mongo_uri"], + } + return config def test_worker_skv(self): - self.service.worker_skv() + self.app.run_skv() From fae68385e92be7c2bca3d0348e81a81c4ab9afca Mon Sep 17 00:00:00 2001 From: masv3971 Date: Mon, 14 Nov 2022 11:55:12 +0100 Subject: [PATCH 05/49] Skeleton worker structure. --- src/eduid/userdb/fixtures/users.py | 4 +- src/eduid/userdb/meta.py | 2 +- src/eduid/userdb/userdb.py | 4 +- src/eduid/workers/user_cleaner/app.py | 95 +++++++++++++------ src/eduid/workers/user_cleaner/config.py | 11 +++ .../user_cleaner/testing/test_workers.py | 27 ++++-- src/eduid/workers/user_cleaner/utils/skv.py | 12 +++ 7 files changed, 113 insertions(+), 42 deletions(-) create mode 100644 src/eduid/workers/user_cleaner/utils/skv.py diff --git a/src/eduid/userdb/fixtures/users.py b/src/eduid/userdb/fixtures/users.py index fb900e2b6..82f45a314 100644 --- a/src/eduid/userdb/fixtures/users.py +++ b/src/eduid/userdb/fixtures/users.py @@ -64,7 +64,7 @@ from eduid.userdb.identity import IdentityList from eduid.userdb.locked_identity import LockedIdentityList from eduid.userdb.mail import MailAddressList -from eduid.userdb.meta import CleanedType, Meta +from eduid.userdb.meta import CleanerType, Meta from eduid.userdb.nin import NinList from eduid.userdb.phone import PhoneNumberList from eduid.userdb.signup.user import SignupUser @@ -178,7 +178,7 @@ meta=Meta( version=ObjectId("987654321098765432103210"), cleaned={ - CleanedType.SKV: utc_now(), + CleanerType.SKV: utc_now(), }, ), eppn="hubba-bubba", diff --git a/src/eduid/userdb/meta.py b/src/eduid/userdb/meta.py index a10c83eba..bc1b0166f 100644 --- a/src/eduid/userdb/meta.py +++ b/src/eduid/userdb/meta.py @@ -12,7 +12,7 @@ __author__ = "lundberg" -class CleanedType(str, Enum): +class CleanerType(str, Enum): SKV = "skv" TELE = "tele" LADOK = "ladok" diff --git a/src/eduid/userdb/userdb.py b/src/eduid/userdb/userdb.py index 22607e8c7..156fbada8 100644 --- a/src/eduid/userdb/userdb.py +++ b/src/eduid/userdb/userdb.py @@ -48,7 +48,7 @@ UserOutOfSync, ) from eduid.userdb.identity import IdentityType -from eduid.userdb.meta import CleanedType +from eduid.userdb.meta import CleanerType from eduid.userdb.user import User from eduid.userdb.util import utc_now @@ -108,7 +108,7 @@ def _get_users_by_aggregate(self, match: dict[str, Any], sort: dict[str, Any], l return [self.user_from_dict(data=user) for user in users] def get_uncleaned_verified_users( - self, cleaned_type: CleanedType, identity_type: IdentityType, limit: int + self, cleaned_type: CleanerType, identity_type: IdentityType, limit: int ) -> List[UserVar]: match = { "identities": { diff --git a/src/eduid/workers/user_cleaner/app.py b/src/eduid/workers/user_cleaner/app.py index 54e32ad54..88e15b400 100644 --- a/src/eduid/workers/user_cleaner/app.py +++ b/src/eduid/workers/user_cleaner/app.py @@ -1,65 +1,98 @@ +import asyncio import logging import signal -from typing import List, Optional, Dict +from queue import Queue + +from typing import List, Optional, Dict, Union, Any from eduid.common.config.parsers import load_config -from eduid.userdb import AmDB -from eduid.userdb.meta import CleanedType +from eduid.userdb import AmDB, User +from eduid.userdb.identity import IdentityType +from eduid.userdb.meta import CleanerType from eduid.userdb.user import User from eduid.common.logging import init_logging +from eduid.workers.user_cleaner.utils.skv import consume from pydantic import BaseModel from eduid.workers.user_cleaner.config import UserCleanerConfig class Shutdown: - now = False + shutdown_now = False - def __init__(self, **kwargs): - super().__init__(**kwargs) + def __init__(self): + super().__init__() signal.signal(signal.SIGINT, self.exit_gracefully) signal.signal(signal.SIGTERM, self.exit_gracefully) def exit_gracefully(self): - self.now = True - + self.shutdown_now = True -class Cleaner(Shutdown): - users: Optional[List[User]] = None - def __init__(self, name: str = "user_cleaner", test_config: Optional[Dict] = None): - self.config = load_config(typ=UserCleanerConfig, app_name=name, ns="api", test_config=test_config) +class WorkerBase(Shutdown): + def __init__(self, cleaner_type: CleanerType, test_config: Optional[Dict] = None): + self.worker_name = str(cleaner_type.value) + self.config = load_config(typ=UserCleanerConfig, app_name=self.worker_name, ns="api", test_config=test_config) super().__init__() - self.name = name - self.db = AmDB(db_uri=self.config.mongo_uri) - self.logger = logging.getLogger(name=name) + self.logger = logging.getLogger(name=self.worker_name) init_logging(config=self.config) - self.logger.info(f"Starting {name} app") - def __iter__(self): - return iter(self.users) + self.user_count = self.config.workers[self.worker_name].user_count + + self.queue = Queue(maxsize=self.user_count) + + if test_config: + self._test_worker_runs = test_config["test_runs"] + + self.logger.info(f"starting worker {cleaner_type}") + + def enqueuing(self, cleaning_type: CleanerType, identity_type: IdentityType, limit: int): + for user in self.db.get_uncleaned_verified_users( + cleaned_type=cleaning_type, identity_type=identity_type, limit=limit + ): + self.queue.put(user) + c = self.queue.qsize() + mura = 1 + + +class SKV(WorkerBase): + def __init__(self, cleaner_type: CleanerType, test_config: Optional[Dict] = None): + super().__init__(cleaner_type=cleaner_type.SKV, test_config=test_config) + + def update_name(self, user: User): + print(user.eppn) - def __getitem__(self, item): - return self.users[item] + def run(self): + test_runs = 0 + while not self.shutdown_now: + if self._test_worker_runs > 0: + if self._test_worker_runs <= test_runs: + self.shutdown_now = True + test_runs += 1 - def fetch_users(self, cleaning_type: CleanedType, limit: int): - self.users = self.db.get_uncleaned_users(cleaned_type=cleaning_type, limit=limit) + if self.queue.empty(): + self.enqueuing( + cleaning_type=CleanerType.SKV, + identity_type=IdentityType.NIN, + limit=self.config.workers["skv"].user_count, + ) + user = self.queue.get() - def run_skv(self): - self.fetch_users(CleanedType.SKV, 1) - print("Hej") + self.update_name(user=user) + self.queue.task_done() + c = self.queue.qsize() + mura = 1 -def init_app(name: str = "user_cleaner", test_config: Optional[Dict] = None) -> Cleaner: - app = Cleaner(name=name, test_config=test_config) - app.logger.info("app running...") - return app +def init_skv(test_config: Optional[Dict] = None) -> SKV: + worker = SKV(cleaner_type=CleanerType.SKV, test_config=test_config) + return worker if __name__ == "__main__": - app_skv = init_app() - app_skv.run_skv() + skv = init_skv() + skv.run() diff --git a/src/eduid/workers/user_cleaner/config.py b/src/eduid/workers/user_cleaner/config.py index d19d39b75..e3cf1320f 100644 --- a/src/eduid/workers/user_cleaner/config.py +++ b/src/eduid/workers/user_cleaner/config.py @@ -1,5 +1,16 @@ +from enum import Enum +from typing import Dict + +from pydantic import BaseModel + from eduid.common.config.base import LoggingConfigMixin, RootConfig +from eduid.userdb.meta import CleanerType + + +class WorkerInfo(BaseModel): + user_count: int class UserCleanerConfig(RootConfig, LoggingConfigMixin): mongo_uri: str = "" + workers: Dict[str, WorkerInfo] diff --git a/src/eduid/workers/user_cleaner/testing/test_workers.py b/src/eduid/workers/user_cleaner/testing/test_workers.py index 1307a745e..c940b8cf7 100644 --- a/src/eduid/workers/user_cleaner/testing/test_workers.py +++ b/src/eduid/workers/user_cleaner/testing/test_workers.py @@ -1,24 +1,39 @@ +import time from typing import Dict from eduid.common.testing_base import CommonTestCase -from eduid.userdb.fixtures.users import new_user_example +from eduid.userdb.fixtures.users import new_user_example, new_user_example2, new_unverified_user_example -from eduid.workers.user_cleaner.app import init_app +from eduid.workers.user_cleaner.app import init_skv +from eduid.workers.user_cleaner.config import WorkerInfo class WorkerTest(CommonTestCase): def setUp(self, *args, **kwargs): - super().setUp(am_users=[new_user_example]) + super().setUp(am_users=[new_user_example, new_user_example2, new_unverified_user_example]) self.test_config = self._get_config() - self.app = init_app(name="test_cleaner", test_config=self.test_config) - def _get_config(self) -> Dict: config = { "mongo_uri": self.settings["mongo_uri"], + "test_runs": 4, + "workers": { + "skv": WorkerInfo( + user_count=10, + ), + "ladok": WorkerInfo( + user_count=10, + ), + }, } return config + +class TestSKV(WorkerTest): + def setUp(self, *args, **kwargs): + super().setUp() + self.worker_skv = init_skv(test_config=self.test_config) + def test_worker_skv(self): - self.app.run_skv() + self.worker_skv.run() diff --git a/src/eduid/workers/user_cleaner/utils/skv.py b/src/eduid/workers/user_cleaner/utils/skv.py new file mode 100644 index 000000000..1399da8cb --- /dev/null +++ b/src/eduid/workers/user_cleaner/utils/skv.py @@ -0,0 +1,12 @@ +import logging +import time + +from eduid.userdb import User + + +def consume(user: User, logger: logging): + print("user", user) + time.sleep(5) + f = open("/tmp/cleaner.txt", "a") + f.write(user.eppn) + f.close() From 6ab8095fa2d63d6c47d1ea509dac65301bf4ba00 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Tue, 22 Nov 2022 15:32:28 +0100 Subject: [PATCH 06/49] Add navet stuff to worker. --- src/eduid/workers/user_cleaner/app.py | 14 +++++++++++--- src/eduid/workers/user_cleaner/config.py | 5 +++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/eduid/workers/user_cleaner/app.py b/src/eduid/workers/user_cleaner/app.py index 88e15b400..3b2043b00 100644 --- a/src/eduid/workers/user_cleaner/app.py +++ b/src/eduid/workers/user_cleaner/app.py @@ -7,13 +7,13 @@ from typing import List, Optional, Dict, Union, Any from eduid.common.config.parsers import load_config +from eduid.common.rpc.msg_relay import MsgRelay from eduid.userdb import AmDB, User from eduid.userdb.identity import IdentityType from eduid.userdb.meta import CleanerType from eduid.userdb.user import User from eduid.common.logging import init_logging -from eduid.workers.user_cleaner.utils.skv import consume -from pydantic import BaseModel +from eduid.common.clients.amapi_client.amapi_client import AMAPIClient from eduid.workers.user_cleaner.config import UserCleanerConfig @@ -44,6 +44,12 @@ def __init__(self, cleaner_type: CleanerType, test_config: Optional[Dict] = None self.queue = Queue(maxsize=self.user_count) + self.msg_relay = MsgRelay(self.config) + + self.amapi_client = AMAPIClient( + amapi_url=self.config.amapi_url, + ) + if test_config: self._test_worker_runs = test_config["test_runs"] @@ -63,7 +69,9 @@ def __init__(self, cleaner_type: CleanerType, test_config: Optional[Dict] = None super().__init__(cleaner_type=cleaner_type.SKV, test_config=test_config) def update_name(self, user: User): - print(user.eppn) + navet_data = self.msg_relay.get_all_navet_data(nin=user.identities.nin.number) + user.given_name = navet_data.person.name.given_name + user.surname = navet_data.person.name.surname def run(self): test_runs = 0 diff --git a/src/eduid/workers/user_cleaner/config.py b/src/eduid/workers/user_cleaner/config.py index e3cf1320f..976466074 100644 --- a/src/eduid/workers/user_cleaner/config.py +++ b/src/eduid/workers/user_cleaner/config.py @@ -3,7 +3,7 @@ from pydantic import BaseModel -from eduid.common.config.base import LoggingConfigMixin, RootConfig +from eduid.common.config.base import LoggingConfigMixin, RootConfig, MsgConfigMixin from eduid.userdb.meta import CleanerType @@ -11,6 +11,7 @@ class WorkerInfo(BaseModel): user_count: int -class UserCleanerConfig(RootConfig, LoggingConfigMixin): +class UserCleanerConfig(RootConfig, LoggingConfigMixin, MsgConfigMixin): mongo_uri: str = "" workers: Dict[str, WorkerInfo] + amapi_url: str From 12cff53a74bcaf113fbf7ccc92095a8be6f0477a Mon Sep 17 00:00:00 2001 From: masv3971 Date: Tue, 29 Nov 2022 14:51:19 +0100 Subject: [PATCH 07/49] user cleaner barebone. --- .../clients/amapi_client/amapi_client.py | 6 +- src/eduid/common/clients/gnap_client/base.py | 5 +- .../common/clients/gnap_client/sync_client.py | 3 +- src/eduid/common/rpc/msg_relay.py | 14 ++- src/eduid/common/utils.py | 36 ++++++ src/eduid/userdb/meta.py | 2 +- src/eduid/webapp/common/api/helpers.py | 37 +----- .../common/api/tests/test_nin_helpers.py | 10 +- src/eduid/webapp/security/helpers.py | 5 +- src/eduid/workers/amapi/app.py | 2 +- src/eduid/workers/amapi/config.py | 5 +- src/eduid/workers/amapi/tests/test_user.py | 2 +- src/eduid/workers/user_cleaner/app.py | 113 +++++++----------- src/eduid/workers/user_cleaner/config.py | 23 ++-- src/eduid/workers/user_cleaner/run.py | 0 .../user_cleaner/testing/test_workers.py | 6 +- src/eduid/workers/user_cleaner/worker_skv.py | 0 src/eduid/workers/user_cleaner/workers/skv.py | 72 +++++++++++ 18 files changed, 199 insertions(+), 142 deletions(-) delete mode 100644 src/eduid/workers/user_cleaner/run.py delete mode 100644 src/eduid/workers/user_cleaner/worker_skv.py create mode 100644 src/eduid/workers/user_cleaner/workers/skv.py diff --git a/src/eduid/common/clients/amapi_client/amapi_client.py b/src/eduid/common/clients/amapi_client/amapi_client.py index d4696b5e1..a945789ee 100644 --- a/src/eduid/common/clients/amapi_client/amapi_client.py +++ b/src/eduid/common/clients/amapi_client/amapi_client.py @@ -19,15 +19,15 @@ class AMAPIClient(GNAPClient): - def __init__(self, amapi_url: str, app, auth_data=GNAPClientAuthData, **kwargs): - super().__init__(auth_data=auth_data, app=app, **kwargs) + def __init__(self, amapi_url: str, auth_data=GNAPClientAuthData, verify_tls: bool = True, **kwargs): + super().__init__(auth_data=auth_data, verify=verify_tls, **kwargs) self.amapi_url = amapi_url def _users_base_url(self) -> str: return urlappend(self.amapi_url, "users") def _put(self, base_path: str, user: str, endpoint: str, body: Any) -> httpx.Response: - return self.put(urlappend(base_path, f"{user}/{endpoint}"), json=body.json()) + return self.put(url=urlappend(base_path, f"{user}/{endpoint}"), content=body.json()) def update_user_email(self, user: str, body: UserUpdateEmailRequest) -> UserUpdateResponse: ret = self._put(base_path=self._users_base_url(), user=user, endpoint="email", body=body) diff --git a/src/eduid/common/clients/gnap_client/base.py b/src/eduid/common/clients/gnap_client/base.py index 15d62494a..1b1eacd28 100644 --- a/src/eduid/common/clients/gnap_client/base.py +++ b/src/eduid/common/clients/gnap_client/base.py @@ -32,7 +32,8 @@ class GNAPClientException(Exception): class GNAPClientAuthData(BaseModel): - authn_server_url: str + auth_server_url: str + auth_server_verify: bool = True key_name: str client_jwk: JWK access: List[Union[str, Access]] = Field(default_factory=list) @@ -52,7 +53,7 @@ class GNAPBearerTokenMixin(ABC): @property def transaction_uri(self) -> str: - return urlappend(self._auth_data.authn_server_url, "transaction") + return urlappend(self._auth_data.auth_server_url, "transaction") def _create_grant_request_jws(self) -> str: req = GrantRequest( diff --git a/src/eduid/common/clients/gnap_client/sync_client.py b/src/eduid/common/clients/gnap_client/sync_client.py index c1be5b26a..b5774fed4 100644 --- a/src/eduid/common/clients/gnap_client/sync_client.py +++ b/src/eduid/common/clients/gnap_client/sync_client.py @@ -18,7 +18,6 @@ def __init__(self, auth_data: GNAPClientAuthData, **kwargs): super().__init__(**kwargs) - self.verify = kwargs.get("verify", True) self._auth_data = auth_data @staticmethod @@ -35,7 +34,7 @@ def _request_bearer_token(self) -> GrantResponse: url=self.transaction_uri, content=data, headers={"Content-Type": "application/jose+json"}, - verify=self.verify, + verify=self._auth_data.auth_server_verify, ) resp.raise_for_status() return GrantResponse.parse_raw(resp.text) diff --git a/src/eduid/common/rpc/msg_relay.py b/src/eduid/common/rpc/msg_relay.py index 2f4b8cb2d..f0433d82e 100644 --- a/src/eduid/common/rpc/msg_relay.py +++ b/src/eduid/common/rpc/msg_relay.py @@ -13,7 +13,6 @@ logger = logging.getLogger(__name__) - TEMPLATES_RELATION = { "mobile-validator": "mobile-confirm", "mobile-reset-password": "mobile-reset-password", @@ -122,17 +121,20 @@ def is_deregistered(self) -> bool: return bool(self.deregistration_information.cause_code or self.deregistration_information.date) -class NavetData(NavetModelConfig): - case_information: CaseInformation = Field(alias="CaseInformation") - person: Person = Field(alias="Person") - - # Used to parse data from get_postal_address class FullPostalAddress(NavetModelConfig): name: Name = Field(default_factory=Name, alias="Name") official_address: OfficialAddress = Field(default_factory=OfficialAddress, alias="OfficialAddress") +class NavetData(NavetModelConfig): + case_information: CaseInformation = Field(alias="CaseInformation") + person: Person = Field(alias="Person") + + def get_full_postal_address(self) -> FullPostalAddress: + return FullPostalAddress(name=self.person.name, official_address=self.person.postal_addresses) + + class MsgRelay(object): """ This is the interface to the RPC task to fetch data from NAVET, and to send SMSs. diff --git a/src/eduid/common/utils.py b/src/eduid/common/utils.py index f8645f4f0..95511ad95 100644 --- a/src/eduid/common/utils.py +++ b/src/eduid/common/utils.py @@ -1,6 +1,12 @@ # -*- coding: utf-8 -*- __author__ = "lundberg" +from eduid.common.rpc.msg_relay import FullPostalAddress +from eduid.userdb.user import TUserSubclass +import logging + +logger = logging.getLogger(__name__) + def urlappend(base: str, path: str) -> str: """ @@ -41,3 +47,33 @@ def removesuffix(s: str, suffix: str) -> str: return s[: -len(suffix)] else: return s[:] + + +def set_user_names_from_official_address(user: TUserSubclass, user_postal_address: FullPostalAddress) -> TUserSubclass: + """ + :param user: Proofing app private userdb user + :param user_postal_address: user postal address + + :returns: User object + """ + user.given_name = user_postal_address.name.given_name + user.surname = user_postal_address.name.surname + if user.given_name is None or user.surname is None: + # please mypy + raise RuntimeError("No given name or surname found in proofing log user postal address") + given_name_marking = user_postal_address.name.given_name_marking + user.display_name = f"{user.given_name} {user.surname}" + if given_name_marking: + _name_index = (int(given_name_marking) // 10) - 1 # ex. "20" -> 1 (second GivenName is real given name) + try: + _given_name = user.given_name.split()[_name_index] + user.display_name = f"{_given_name} {user.surname}" + except IndexError: + # At least occasionally, we've seen GivenName 'Jan-Erik Martin' with GivenNameMarking 30 + pass + logger.info("User names set from official address") + logger.debug( + f"{user_postal_address.name} resulted in given_name: {user.given_name}, " + f"surname: {user.surname} and display_name: {user.display_name}" + ) + return user diff --git a/src/eduid/userdb/meta.py b/src/eduid/userdb/meta.py index bc1b0166f..22d15ed2f 100644 --- a/src/eduid/userdb/meta.py +++ b/src/eduid/userdb/meta.py @@ -13,7 +13,7 @@ class CleanerType(str, Enum): - SKV = "skv" + SKV = "skatteverket" TELE = "tele" LADOK = "ladok" diff --git a/src/eduid/webapp/common/api/helpers.py b/src/eduid/webapp/common/api/helpers.py index 33d6ed738..0453409e2 100644 --- a/src/eduid/webapp/common/api/helpers.py +++ b/src/eduid/webapp/common/api/helpers.py @@ -9,13 +9,13 @@ from eduid.common.misc.timeutil import utc_now from eduid.common.rpc.exceptions import NoNavetData from eduid.common.rpc.msg_relay import DeregisteredCauseCode, DeregistrationInformation, FullPostalAddress +from eduid.common.utils import set_user_names_from_official_address from eduid.userdb import NinIdentity from eduid.userdb.element import ElementKey from eduid.userdb.identity import IdentityType from eduid.userdb.logs.element import ( NinProofingLogElement, TForeignIdProofingLogElementSubclass, - TNinProofingLogElementSubclass, ) from eduid.userdb.proofing import ProofingUser from eduid.userdb.proofing.state import NinProofingState, OidcProofingState @@ -25,38 +25,6 @@ __author__ = "lundberg" -def set_user_names_from_official_address( - user: TUserSubclass, proofing_log_entry: TNinProofingLogElementSubclass -) -> TUserSubclass: - """ - :param user: Proofing app private userdb user - :param proofing_log_entry: Proofing log entry element - - :returns: User object - """ - user.given_name = proofing_log_entry.user_postal_address.name.given_name - user.surname = proofing_log_entry.user_postal_address.name.surname - if user.given_name is None or user.surname is None: - # please mypy - raise RuntimeError("No given name or surname found in proofing log user postal address") - given_name_marking = proofing_log_entry.user_postal_address.name.given_name_marking - user.display_name = f"{user.given_name} {user.surname}" - if given_name_marking: - _name_index = (int(given_name_marking) // 10) - 1 # ex. "20" -> 1 (second GivenName is real given name) - try: - _given_name = user.given_name.split()[_name_index] - user.display_name = f"{_given_name} {user.surname}" - except IndexError: - # At least occasionally, we've seen GivenName 'Jan-Erik Martin' with GivenNameMarking 30 - pass - current_app.logger.info("User names set from official address") - current_app.logger.debug( - f"{proofing_log_entry.user_postal_address.name} resulted in given_name: {user.given_name}, " - f"surname: {user.surname} and display_name: {user.display_name}" - ) - return user - - def set_user_names_from_foreign_id( user: TUserSubclass, proofing_log_entry: TForeignIdProofingLogElementSubclass, display_name: Optional[str] = None ) -> TUserSubclass: @@ -106,7 +74,6 @@ def add_nin_to_user(user: User, proofing_state: NinProofingState, user_type: Typ def add_nin_to_user(user, proofing_state, user_type=ProofingUser): - proofing_user = user_type.from_user(user, current_app.private_userdb) # Add nin to user if not already there if not proofing_user.identities.nin: @@ -197,7 +164,7 @@ def verify_nin_for_user( proofing_user.identities.nin.verified_by = proofing_state.nin.created_by # Update users name - proofing_user = set_user_names_from_official_address(proofing_user, proofing_log_entry) + proofing_user = set_user_names_from_official_address(proofing_user, proofing_log_entry.user_postal_address) # If user was updated successfully continue with logging the proof and saving the user to central db # Send proofing data to the proofing log diff --git a/src/eduid/webapp/common/api/tests/test_nin_helpers.py b/src/eduid/webapp/common/api/tests/test_nin_helpers.py index 88822ee8f..3d6af26cb 100644 --- a/src/eduid/webapp/common/api/tests/test_nin_helpers.py +++ b/src/eduid/webapp/common/api/tests/test_nin_helpers.py @@ -20,9 +20,9 @@ from eduid.webapp.common.api.helpers import ( add_nin_to_user, set_user_names_from_foreign_id, - set_user_names_from_official_address, verify_nin_for_user, ) +from eduid.common.utils import set_user_names_from_official_address from eduid.webapp.common.api.testing import EduidAPITestCase from eduid.webapp.common.session.eduid_session import SessionFactory @@ -280,7 +280,7 @@ def test_set_user_names_from_offical_address_1(self): proofing_version="2018v1", ) with self.app.app_context(): - user = set_user_names_from_official_address(user, proofing_element) + user = set_user_names_from_official_address(user, proofing_element.user_postal_address) assert user.given_name == "Testaren Test" assert user.surname == "Testsson" assert user.display_name == "Test Testsson" @@ -301,7 +301,7 @@ def test_set_user_names_from_offical_address_2(self): proofing_version="2018v1", ) with self.app.app_context(): - user = set_user_names_from_official_address(user, proofing_element) + user = set_user_names_from_official_address(user, proofing_element.user_postal_address) assert user.given_name == "Test" assert user.surname == "Testsson" assert user.display_name == "Test Testsson" @@ -323,7 +323,7 @@ def test_set_user_names_from_offical_address_3(self): proofing_version="2018v1", ) with self.app.app_context(): - user = set_user_names_from_official_address(user, proofing_element) + user = set_user_names_from_official_address(user, proofing_element.user_postal_address) assert user.given_name == "Pippilotta Viktualia Rullgardina Krusmynta Efraimsdotter" assert user.surname == "Långstrump" assert user.display_name == "Rullgardina Långstrump" @@ -343,7 +343,7 @@ def test_set_user_names_from_offical_address_4(self): proofing_version="2018v1", ) with self.app.app_context(): - user = set_user_names_from_official_address(user, proofing_element) + user = set_user_names_from_official_address(user, proofing_element.user_postal_address) assert user.given_name == "Testaren Test" assert user.surname == "Testsson" assert user.display_name == "Testaren Test Testsson" diff --git a/src/eduid/webapp/security/helpers.py b/src/eduid/webapp/security/helpers.py index e3f7a21ca..c823fb0b8 100644 --- a/src/eduid/webapp/security/helpers.py +++ b/src/eduid/webapp/security/helpers.py @@ -11,7 +11,8 @@ from eduid.userdb import NinIdentity from eduid.userdb.logs.element import NameUpdateProofing from eduid.userdb.security import SecurityUser -from eduid.webapp.common.api.helpers import send_mail, set_user_names_from_official_address +from eduid.webapp.common.api.helpers import send_mail +from eduid.common.utils import set_user_names_from_official_address from eduid.webapp.common.api.messages import FluxData, TranslatableMsg, error_response from eduid.webapp.common.authn.utils import generate_password from eduid.webapp.common.session.namespaces import SP_AuthnRequest @@ -189,7 +190,7 @@ def update_user_official_name(security_user: SecurityUser, navet_data: NavetData user_postal_address=user_postal_address, ) # Update user names - security_user = set_user_names_from_official_address(security_user, proofing_log_entry) + security_user = set_user_names_from_official_address(security_user, proofing_log_entry.user_postal_address) # Do not save the user if proofing log write fails if not current_app.proofing_log.save(proofing_log_entry): diff --git a/src/eduid/workers/amapi/app.py b/src/eduid/workers/amapi/app.py index 65edb015b..15ccae5d3 100644 --- a/src/eduid/workers/amapi/app.py +++ b/src/eduid/workers/amapi/app.py @@ -1,5 +1,5 @@ import logging -from typing import Any, Dict, List, Optional, Union +from typing import Dict, Optional from fastapi import FastAPI diff --git a/src/eduid/workers/amapi/config.py b/src/eduid/workers/amapi/config.py index 99dcaf9b8..6823f4f6a 100644 --- a/src/eduid/workers/amapi/config.py +++ b/src/eduid/workers/amapi/config.py @@ -1,11 +1,11 @@ import logging from enum import Enum from pathlib import Path -from typing import List, Optional, Dict, NewType, Annotated +from typing import List, Optional, Dict, NewType, Annotated, Sequence from pydantic import Field, BaseModel, validator, constr -from eduid.common.config.base import LoggingConfigMixin, RootConfig +from eduid.common.config.base import LoggingConfigMixin, RootConfig, LoggingFilters logger = logging.getLogger(__name__) @@ -42,6 +42,7 @@ class AMApiConfig(RootConfig, LoggingConfigMixin): protocol: str = "http" server_name: str = "localhost:8000" log_format: str = "{asctime} | {levelname:7} | {hostname} | {name:35} | {module:10} | {message}" + log_filters: Sequence[LoggingFilters] = Field(default=[LoggingFilters.NAMES]) mongo_uri: str = "" application_root: str = "" keystore_path: Path diff --git a/src/eduid/workers/amapi/tests/test_user.py b/src/eduid/workers/amapi/tests/test_user.py index 09dbead36..932a8d905 100644 --- a/src/eduid/workers/amapi/tests/test_user.py +++ b/src/eduid/workers/amapi/tests/test_user.py @@ -4,7 +4,7 @@ import json import datetime -from typing import Dict, Optional, Any, Mapping, List +from typing import Dict, Optional, Any import pkg_resources from bson import ObjectId diff --git a/src/eduid/workers/user_cleaner/app.py b/src/eduid/workers/user_cleaner/app.py index 3b2043b00..85b114b97 100644 --- a/src/eduid/workers/user_cleaner/app.py +++ b/src/eduid/workers/user_cleaner/app.py @@ -1,106 +1,79 @@ -import asyncio import logging import signal +from abc import ABC from queue import Queue -from typing import List, Optional, Dict, Union, Any +from typing import Optional, Dict +from eduid.common.clients.amapi_client.amapi_client import AMAPIClient from eduid.common.config.parsers import load_config from eduid.common.rpc.msg_relay import MsgRelay -from eduid.userdb import AmDB, User +from eduid.userdb import AmDB from eduid.userdb.identity import IdentityType from eduid.userdb.meta import CleanerType -from eduid.userdb.user import User from eduid.common.logging import init_logging -from eduid.common.clients.amapi_client.amapi_client import AMAPIClient from eduid.workers.user_cleaner.config import UserCleanerConfig -class Shutdown: - shutdown_now = False - - def __init__(self): - super().__init__() +class WorkerBase(ABC): + def __init__(self, cleaner_type: CleanerType, test_config: Optional[Dict] = None): signal.signal(signal.SIGINT, self.exit_gracefully) signal.signal(signal.SIGTERM, self.exit_gracefully) - def exit_gracefully(self): - self.shutdown_now = True - - -class WorkerBase(Shutdown): - def __init__(self, cleaner_type: CleanerType, test_config: Optional[Dict] = None): self.worker_name = str(cleaner_type.value) - self.config = load_config(typ=UserCleanerConfig, app_name=self.worker_name, ns="api", test_config=test_config) - super().__init__() - self.db = AmDB(db_uri=self.config.mongo_uri) + self.config = load_config(typ=UserCleanerConfig, app_name="user_cleaner", ns="worker", test_config=test_config) + super().__init__() self.logger = logging.getLogger(name=self.worker_name) init_logging(config=self.config) + self.logger.info(f"initialize worker {self.worker_name}") + self.db = AmDB(db_uri=self.config.mongo_uri) + + self.shutdown_now = False - self.user_count = self.config.workers[self.worker_name].user_count + self.user_count = self.config.user_count self.queue = Queue(maxsize=self.user_count) + self.made_changes = 0 + self.max_changes = 0 + self.msg_relay = MsgRelay(self.config) self.amapi_client = AMAPIClient( - amapi_url=self.config.amapi_url, + amapi_url=self.config.amapi.url, + auth_data=self.config.gnap_auth_data, ) - if test_config: - self._test_worker_runs = test_config["test_runs"] + self.logger.info(f"starting worker {cleaner_type.value}") - self.logger.info(f"starting worker {cleaner_type}") - - def enqueuing(self, cleaning_type: CleanerType, identity_type: IdentityType, limit: int): - for user in self.db.get_uncleaned_verified_users( - cleaned_type=cleaning_type, identity_type=identity_type, limit=limit - ): - self.queue.put(user) - c = self.queue.qsize() - mura = 1 - - -class SKV(WorkerBase): - def __init__(self, cleaner_type: CleanerType, test_config: Optional[Dict] = None): - super().__init__(cleaner_type=cleaner_type.SKV, test_config=test_config) - - def update_name(self, user: User): - navet_data = self.msg_relay.get_all_navet_data(nin=user.identities.nin.number) - user.given_name = navet_data.person.name.given_name - user.surname = navet_data.person.name.surname - - def run(self): - test_runs = 0 - while not self.shutdown_now: - if self._test_worker_runs > 0: - if self._test_worker_runs <= test_runs: - self.shutdown_now = True - test_runs += 1 - - if self.queue.empty(): - self.enqueuing( - cleaning_type=CleanerType.SKV, - identity_type=IdentityType.NIN, - limit=self.config.workers["skv"].user_count, - ) - user = self.queue.get() - - self.update_name(user=user) - - self.queue.task_done() - c = self.queue.qsize() - mura = 1 + def exit_gracefully(self, sig, frame) -> None: + self.logger.info(f"Recevied signal: {sig}, shutting down...") + self.shutdown_now = True + def _is_quota_reached(self) -> bool: + if self.made_changes == 0: + return False + return self.made_changes == self.config.change_quota -def init_skv(test_config: Optional[Dict] = None) -> SKV: - worker = SKV(cleaner_type=CleanerType.SKV, test_config=test_config) - return worker + def _add_to_made_changes(self) -> None: + self.made_changes += 1 + def _populate_max_changes(self): + self.db.db_count() -if __name__ == "__main__": - skv = init_skv() - skv.run() + def enqueuing(self, cleaning_type: CleanerType, identity_type: IdentityType, limit: int): + self.logger.info("Enquing users") + users = self.db.get_uncleaned_verified_users( + cleaned_type=cleaning_type, + identity_type=identity_type, + limit=limit, + ) + if len(users) < 1: + self.logger.warning(f"No users where enqueued") + return + for user in users: + self.logger.info(f"adding: {user.eppn}") + self.queue.put(user) diff --git a/src/eduid/workers/user_cleaner/config.py b/src/eduid/workers/user_cleaner/config.py index 976466074..e6517f473 100644 --- a/src/eduid/workers/user_cleaner/config.py +++ b/src/eduid/workers/user_cleaner/config.py @@ -1,17 +1,22 @@ -from enum import Enum -from typing import Dict +from typing import Sequence -from pydantic import BaseModel +from pydantic import Field, BaseModel -from eduid.common.config.base import LoggingConfigMixin, RootConfig, MsgConfigMixin -from eduid.userdb.meta import CleanerType +from eduid.common.clients.gnap_client.base import GNAPClientAuthData +from eduid.common.config.base import LoggingConfigMixin, RootConfig, MsgConfigMixin, LoggingFilters -class WorkerInfo(BaseModel): - user_count: int +class AmAPIConfig(BaseModel): + url: str + tls_verify: bool class UserCleanerConfig(RootConfig, LoggingConfigMixin, MsgConfigMixin): + log_filters: Sequence[LoggingFilters] = Field(default=[LoggingFilters.NAMES]) + log_format: str = "{asctime} | {levelname:7} | {hostname} | {name:35} | {module:10} | {message}" mongo_uri: str = "" - workers: Dict[str, WorkerInfo] - amapi_url: str + debug: bool + user_count: int + change_quota: float + gnap_auth_data: GNAPClientAuthData + amapi: AmAPIConfig diff --git a/src/eduid/workers/user_cleaner/run.py b/src/eduid/workers/user_cleaner/run.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/eduid/workers/user_cleaner/testing/test_workers.py b/src/eduid/workers/user_cleaner/testing/test_workers.py index c940b8cf7..a8627ebb2 100644 --- a/src/eduid/workers/user_cleaner/testing/test_workers.py +++ b/src/eduid/workers/user_cleaner/testing/test_workers.py @@ -5,7 +5,7 @@ from eduid.userdb.fixtures.users import new_user_example, new_user_example2, new_unverified_user_example from eduid.workers.user_cleaner.app import init_skv -from eduid.workers.user_cleaner.config import WorkerInfo +from eduid.workers.user_cleaner.config import WorkerConfig class WorkerTest(CommonTestCase): @@ -19,10 +19,10 @@ def _get_config(self) -> Dict: "mongo_uri": self.settings["mongo_uri"], "test_runs": 4, "workers": { - "skv": WorkerInfo( + "skv": WorkerConfig( user_count=10, ), - "ladok": WorkerInfo( + "ladok": WorkerConfig( user_count=10, ), }, diff --git a/src/eduid/workers/user_cleaner/worker_skv.py b/src/eduid/workers/user_cleaner/worker_skv.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/eduid/workers/user_cleaner/workers/skv.py b/src/eduid/workers/user_cleaner/workers/skv.py new file mode 100644 index 000000000..fb3791aea --- /dev/null +++ b/src/eduid/workers/user_cleaner/workers/skv.py @@ -0,0 +1,72 @@ +from typing import Optional, Dict + +from eduid.common.models.amapi_user import UserUpdateNameRequest +from eduid.common.rpc.msg_relay import NavetData +from eduid.common.utils import set_user_names_from_official_address +from eduid.userdb import User +from eduid.userdb.identity import IdentityType +from eduid.userdb.meta import CleanerType +from eduid.workers.user_cleaner.app import WorkerBase + + +class SKV(WorkerBase): + def __init__(self, cleaner_type: CleanerType, test_config: Optional[Dict] = None): + super().__init__(cleaner_type=cleaner_type.SKV, test_config=test_config) + + def update_name(self, user: User, navet_data: NavetData): + self.logger.info(f"eppn: {user.eppn}") + if navet_data.person.name.given_name is None or navet_data.person.name.surname is None: + # self.logger.info(f"No given_name or surname found in navet for eppn: {user.eppn}") + return + if user.given_name == navet_data.person.name.given_name and user.surname == navet_data.person.name.surname: + # self.logger.info(f"No update for names for eppn: {user.eppn}") + return + + # add to quota + updated_user = set_user_names_from_official_address( + user=user, user_postal_address=navet_data.get_full_postal_address() + ) + + self.amapi_client.update_user_name( + user=user.eppn, + body=UserUpdateNameRequest( + reason="SKV_NAME_UPDATE", + source=CleanerType.SKV.value, + given_name=updated_user.given_name, + display_name=updated_user.display_name, + surname=updated_user.surname, + ), + ) + + def run(self): + while not self.shutdown_now: + if self._is_quota_reached(): + self.logger.warning(f"worker skatteverket has reached its change_quota, killing it") + break + if self.queue.empty(): + self.enqueuing( + cleaning_type=CleanerType.SKV, + identity_type=IdentityType.NIN, + limit=self.config.user_count, + ) + user = self.queue.get() + + navet_data = self.msg_relay.get_all_navet_data(nin=user.identities.nin.number) + + self.update_name(user=user, navet_data=navet_data) + + self.queue.task_done() + + +def init_skv_worker(test_config: Optional[Dict] = None) -> SKV: + worker = SKV(cleaner_type=CleanerType.SKV, test_config=test_config) + return worker + + +def start_worker(): + worker = init_skv_worker() + worker.run() + + +if __name__ == "__main__": + start_worker() From 91f82cd88323748b615f4302ddc43a76f6c8d775 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Wed, 30 Nov 2022 09:07:56 +0100 Subject: [PATCH 08/49] Add delay and change quota. --- src/eduid/workers/user_cleaner/app.py | 23 ++++++++++++------- src/eduid/workers/user_cleaner/config.py | 1 + src/eduid/workers/user_cleaner/workers/skv.py | 15 ++++++++---- 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/src/eduid/workers/user_cleaner/app.py b/src/eduid/workers/user_cleaner/app.py index 85b114b97..0d7c8d0f8 100644 --- a/src/eduid/workers/user_cleaner/app.py +++ b/src/eduid/workers/user_cleaner/app.py @@ -33,12 +33,11 @@ def __init__(self, cleaner_type: CleanerType, test_config: Optional[Dict] = None self.shutdown_now = False - self.user_count = self.config.user_count - - self.queue = Queue(maxsize=self.user_count) + self.queue = Queue(maxsize=self.config.user_count) + self.queue_actual_size = 0 self.made_changes = 0 - self.max_changes = 0 + self.max_changes = 0.0 self.msg_relay = MsgRelay(self.config) @@ -56,16 +55,19 @@ def exit_gracefully(self, sig, frame) -> None: def _is_quota_reached(self) -> bool: if self.made_changes == 0: return False - return self.made_changes == self.config.change_quota + self.logger.debug(f"is_quota_reached:: made_changes: {self.made_changes}, max_changes: {self.max_changes}") + return self.made_changes >= self.max_changes def _add_to_made_changes(self) -> None: self.made_changes += 1 def _populate_max_changes(self): - self.db.db_count() + self.logger.debug( + f"populate_max_changes:: queue_actual_size: {self.queue_actual_size}, change_quota: {self.made_changes}" + ) + self.max_changes = self.queue_actual_size * self.config.change_quota def enqueuing(self, cleaning_type: CleanerType, identity_type: IdentityType, limit: int): - self.logger.info("Enquing users") users = self.db.get_uncleaned_verified_users( cleaned_type=cleaning_type, identity_type=identity_type, @@ -75,5 +77,10 @@ def enqueuing(self, cleaning_type: CleanerType, identity_type: IdentityType, lim self.logger.warning(f"No users where enqueued") return for user in users: - self.logger.info(f"adding: {user.eppn}") self.queue.put(user) + self.logger.info(f"enqueuing user: {user.eppn}") + + self.queue_actual_size = self.queue.qsize() + self._populate_max_changes() + + self.made_changes = 0 diff --git a/src/eduid/workers/user_cleaner/config.py b/src/eduid/workers/user_cleaner/config.py index e6517f473..707f4d0bb 100644 --- a/src/eduid/workers/user_cleaner/config.py +++ b/src/eduid/workers/user_cleaner/config.py @@ -18,5 +18,6 @@ class UserCleanerConfig(RootConfig, LoggingConfigMixin, MsgConfigMixin): debug: bool user_count: int change_quota: float + job_delay: float = 1.0 gnap_auth_data: GNAPClientAuthData amapi: AmAPIConfig diff --git a/src/eduid/workers/user_cleaner/workers/skv.py b/src/eduid/workers/user_cleaner/workers/skv.py index fb3791aea..0c5892db2 100644 --- a/src/eduid/workers/user_cleaner/workers/skv.py +++ b/src/eduid/workers/user_cleaner/workers/skv.py @@ -1,3 +1,4 @@ +import time from typing import Optional, Dict from eduid.common.models.amapi_user import UserUpdateNameRequest @@ -14,19 +15,23 @@ def __init__(self, cleaner_type: CleanerType, test_config: Optional[Dict] = None super().__init__(cleaner_type=cleaner_type.SKV, test_config=test_config) def update_name(self, user: User, navet_data: NavetData): - self.logger.info(f"eppn: {user.eppn}") + self.logger.debug(f"number of changes: {self.made_changes} out of {self.max_changes}, {self.queue_actual_size}") + if navet_data.person.name.given_name is None or navet_data.person.name.surname is None: - # self.logger.info(f"No given_name or surname found in navet for eppn: {user.eppn}") + self.logger.info(f"No given_name or surname found in navet for eppn: {user.eppn}") return if user.given_name == navet_data.person.name.given_name and user.surname == navet_data.person.name.surname: - # self.logger.info(f"No update for names for eppn: {user.eppn}") + self.logger.info(f"No update for names for eppn: {user.eppn}") return - # add to quota updated_user = set_user_names_from_official_address( user=user, user_postal_address=navet_data.get_full_postal_address() ) + self._add_to_made_changes() + + self.logger.debug(f"number of changes: {self.made_changes} out of {self.max_changes}, {self.queue_actual_size}") + self.amapi_client.update_user_name( user=user.eppn, body=UserUpdateNameRequest( @@ -55,6 +60,8 @@ def run(self): self.update_name(user=user, navet_data=navet_data) + time.sleep(self.config.job_delay) + self.queue.task_done() From 9dd0d10e31131746d4eace1e6be64270ce623ce3 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Thu, 1 Dec 2022 11:36:41 +0100 Subject: [PATCH 09/49] Rename file. --- .../workers/user_cleaner/testing/test_skv.py | 7 ++++ .../user_cleaner/testing/test_workers.py | 39 ------------------- 2 files changed, 7 insertions(+), 39 deletions(-) create mode 100644 src/eduid/workers/user_cleaner/testing/test_skv.py delete mode 100644 src/eduid/workers/user_cleaner/testing/test_workers.py diff --git a/src/eduid/workers/user_cleaner/testing/test_skv.py b/src/eduid/workers/user_cleaner/testing/test_skv.py new file mode 100644 index 000000000..837b46a8f --- /dev/null +++ b/src/eduid/workers/user_cleaner/testing/test_skv.py @@ -0,0 +1,7 @@ +from eduid.common.testing_base import CommonTestCase +from eduid.userdb.fixtures.users import new_user_example, new_user_example2, new_unverified_user_example + + +class WorkerTest(CommonTestCase): + def setUp(self, *args, **kwargs): + super().setUp(am_users=[new_user_example, new_user_example2, new_unverified_user_example]) diff --git a/src/eduid/workers/user_cleaner/testing/test_workers.py b/src/eduid/workers/user_cleaner/testing/test_workers.py deleted file mode 100644 index a8627ebb2..000000000 --- a/src/eduid/workers/user_cleaner/testing/test_workers.py +++ /dev/null @@ -1,39 +0,0 @@ -import time -from typing import Dict - -from eduid.common.testing_base import CommonTestCase -from eduid.userdb.fixtures.users import new_user_example, new_user_example2, new_unverified_user_example - -from eduid.workers.user_cleaner.app import init_skv -from eduid.workers.user_cleaner.config import WorkerConfig - - -class WorkerTest(CommonTestCase): - def setUp(self, *args, **kwargs): - super().setUp(am_users=[new_user_example, new_user_example2, new_unverified_user_example]) - - self.test_config = self._get_config() - - def _get_config(self) -> Dict: - config = { - "mongo_uri": self.settings["mongo_uri"], - "test_runs": 4, - "workers": { - "skv": WorkerConfig( - user_count=10, - ), - "ladok": WorkerConfig( - user_count=10, - ), - }, - } - return config - - -class TestSKV(WorkerTest): - def setUp(self, *args, **kwargs): - super().setUp() - self.worker_skv = init_skv(test_config=self.test_config) - - def test_worker_skv(self): - self.worker_skv.run() From ce5b3be398c2a0d058bb146a02fd0b38c0e34133 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Mon, 5 Dec 2022 13:08:41 +0100 Subject: [PATCH 10/49] Add stats. --- src/eduid/workers/user_cleaner/app.py | 8 ++++++++ src/eduid/workers/user_cleaner/config.py | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/eduid/workers/user_cleaner/app.py b/src/eduid/workers/user_cleaner/app.py index 0d7c8d0f8..9bbd2985d 100644 --- a/src/eduid/workers/user_cleaner/app.py +++ b/src/eduid/workers/user_cleaner/app.py @@ -13,6 +13,7 @@ from eduid.userdb.identity import IdentityType from eduid.userdb.meta import CleanerType from eduid.common.logging import init_logging +from eduid.common.stats import init_app_stats from eduid.workers.user_cleaner.config import UserCleanerConfig @@ -26,9 +27,16 @@ def __init__(self, cleaner_type: CleanerType, test_config: Optional[Dict] = None self.config = load_config(typ=UserCleanerConfig, app_name="user_cleaner", ns="worker", test_config=test_config) super().__init__() + + # stats + self.stats = init_app_stats(self.config) + self.stats.count(name="user_cleaner_stats") + + # logging self.logger = logging.getLogger(name=self.worker_name) init_logging(config=self.config) self.logger.info(f"initialize worker {self.worker_name}") + self.db = AmDB(db_uri=self.config.mongo_uri) self.shutdown_now = False diff --git a/src/eduid/workers/user_cleaner/config.py b/src/eduid/workers/user_cleaner/config.py index 707f4d0bb..2b3a036b5 100644 --- a/src/eduid/workers/user_cleaner/config.py +++ b/src/eduid/workers/user_cleaner/config.py @@ -3,7 +3,7 @@ from pydantic import Field, BaseModel from eduid.common.clients.gnap_client.base import GNAPClientAuthData -from eduid.common.config.base import LoggingConfigMixin, RootConfig, MsgConfigMixin, LoggingFilters +from eduid.common.config.base import LoggingConfigMixin, RootConfig, MsgConfigMixin, LoggingFilters, StatsConfigMixin class AmAPIConfig(BaseModel): @@ -11,7 +11,7 @@ class AmAPIConfig(BaseModel): tls_verify: bool -class UserCleanerConfig(RootConfig, LoggingConfigMixin, MsgConfigMixin): +class UserCleanerConfig(RootConfig, LoggingConfigMixin, MsgConfigMixin, StatsConfigMixin): log_filters: Sequence[LoggingFilters] = Field(default=[LoggingFilters.NAMES]) log_format: str = "{asctime} | {levelname:7} | {hostname} | {name:35} | {module:10} | {message}" mongo_uri: str = "" From 4392f4178bcb1f8d5e1a1c21dbb29a08e22e7404 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Mon, 5 Dec 2022 13:23:54 +0100 Subject: [PATCH 11/49] Add init files. --- src/eduid/workers/user_cleaner/__init__.py | 2 ++ src/eduid/workers/user_cleaner/workers/__init__.py | 2 ++ 2 files changed, 4 insertions(+) create mode 100644 src/eduid/workers/user_cleaner/__init__.py create mode 100644 src/eduid/workers/user_cleaner/workers/__init__.py diff --git a/src/eduid/workers/user_cleaner/__init__.py b/src/eduid/workers/user_cleaner/__init__.py new file mode 100644 index 000000000..0379adfa8 --- /dev/null +++ b/src/eduid/workers/user_cleaner/__init__.py @@ -0,0 +1,2 @@ +# -*- coding: utf-8 -*- +__author__ = "masv" \ No newline at end of file diff --git a/src/eduid/workers/user_cleaner/workers/__init__.py b/src/eduid/workers/user_cleaner/workers/__init__.py new file mode 100644 index 000000000..0379adfa8 --- /dev/null +++ b/src/eduid/workers/user_cleaner/workers/__init__.py @@ -0,0 +1,2 @@ +# -*- coding: utf-8 -*- +__author__ = "masv" \ No newline at end of file From a72be83adef16f3b75c50079243385327f392833 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Mon, 5 Dec 2022 13:25:19 +0100 Subject: [PATCH 12/49] Cleand away unused folder. --- src/eduid/workers/user_cleaner/utils/skv.py | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 src/eduid/workers/user_cleaner/utils/skv.py diff --git a/src/eduid/workers/user_cleaner/utils/skv.py b/src/eduid/workers/user_cleaner/utils/skv.py deleted file mode 100644 index 1399da8cb..000000000 --- a/src/eduid/workers/user_cleaner/utils/skv.py +++ /dev/null @@ -1,12 +0,0 @@ -import logging -import time - -from eduid.userdb import User - - -def consume(user: User, logger: logging): - print("user", user) - time.sleep(5) - f = open("/tmp/cleaner.txt", "a") - f.write(user.eppn) - f.close() From 503bef0d9631138b61ef7f215e0f281652cd01f7 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Tue, 6 Dec 2022 13:51:20 +0100 Subject: [PATCH 13/49] Add dry run and change log to debug. --- src/eduid/userdb/meta.py | 3 ++- src/eduid/userdb/user.py | 4 ++- src/eduid/workers/user_cleaner/app.py | 4 +-- src/eduid/workers/user_cleaner/config.py | 1 + src/eduid/workers/user_cleaner/workers/skv.py | 25 +++++++++++-------- 5 files changed, 23 insertions(+), 14 deletions(-) diff --git a/src/eduid/userdb/meta.py b/src/eduid/userdb/meta.py index 22d15ed2f..4e84530a9 100644 --- a/src/eduid/userdb/meta.py +++ b/src/eduid/userdb/meta.py @@ -2,7 +2,7 @@ from datetime import datetime from enum import Enum -from typing import Mapping, Optional +from typing import Dict, Mapping, Optional from bson import ObjectId from pydantic import BaseModel, Field @@ -22,6 +22,7 @@ class Meta(BaseModel): version: ObjectId = Field(default_factory=ObjectId) created_ts: datetime = Field(default_factory=utc_now) modified_ts: Optional[datetime] + cleaned: Optional[Dict[CleanerType, datetime]] class Config: arbitrary_types_allowed = True # allow ObjectId as type diff --git a/src/eduid/userdb/user.py b/src/eduid/userdb/user.py index 706ea4262..8b386310c 100644 --- a/src/eduid/userdb/user.py +++ b/src/eduid/userdb/user.py @@ -120,8 +120,10 @@ def check_revoked(cls, values: Dict[str, Any]): @root_validator() def update_meta_modified_ts(cls, values: Dict[str, Any]): - # as we validate on assignment this will run everytime the User is changed + # as we validate on assignment this will run every time the User is changed if values.get("modified_ts"): + if "meta" not in values: + values["meta"] = Meta values["meta"].modified_ts = values["modified_ts"] return values diff --git a/src/eduid/workers/user_cleaner/app.py b/src/eduid/workers/user_cleaner/app.py index 9bbd2985d..0042fd6d0 100644 --- a/src/eduid/workers/user_cleaner/app.py +++ b/src/eduid/workers/user_cleaner/app.py @@ -23,7 +23,7 @@ def __init__(self, cleaner_type: CleanerType, test_config: Optional[Dict] = None signal.signal(signal.SIGINT, self.exit_gracefully) signal.signal(signal.SIGTERM, self.exit_gracefully) - self.worker_name = str(cleaner_type.value) + self.worker_name = str(cleaner_type) self.config = load_config(typ=UserCleanerConfig, app_name="user_cleaner", ns="worker", test_config=test_config) super().__init__() @@ -86,7 +86,7 @@ def enqueuing(self, cleaning_type: CleanerType, identity_type: IdentityType, lim return for user in users: self.queue.put(user) - self.logger.info(f"enqueuing user: {user.eppn}") + self.logger.debug(f"enqueuing user: {user.eppn}") self.queue_actual_size = self.queue.qsize() self._populate_max_changes() diff --git a/src/eduid/workers/user_cleaner/config.py b/src/eduid/workers/user_cleaner/config.py index 2b3a036b5..4dd3f315b 100644 --- a/src/eduid/workers/user_cleaner/config.py +++ b/src/eduid/workers/user_cleaner/config.py @@ -15,6 +15,7 @@ class UserCleanerConfig(RootConfig, LoggingConfigMixin, MsgConfigMixin, StatsCon log_filters: Sequence[LoggingFilters] = Field(default=[LoggingFilters.NAMES]) log_format: str = "{asctime} | {levelname:7} | {hostname} | {name:35} | {module:10} | {message}" mongo_uri: str = "" + dry_run: bool = True debug: bool user_count: int change_quota: float diff --git a/src/eduid/workers/user_cleaner/workers/skv.py b/src/eduid/workers/user_cleaner/workers/skv.py index 0c5892db2..ba4a5eac8 100644 --- a/src/eduid/workers/user_cleaner/workers/skv.py +++ b/src/eduid/workers/user_cleaner/workers/skv.py @@ -15,7 +15,7 @@ def __init__(self, cleaner_type: CleanerType, test_config: Optional[Dict] = None super().__init__(cleaner_type=cleaner_type.SKV, test_config=test_config) def update_name(self, user: User, navet_data: NavetData): - self.logger.debug(f"number of changes: {self.made_changes} out of {self.max_changes}, {self.queue_actual_size}") + self.logger.debug(f"number of changes: {self.made_changes} out of max_changes: {self.max_changes}, queue_actual_size: {self.queue_actual_size}") if navet_data.person.name.given_name is None or navet_data.person.name.surname is None: self.logger.info(f"No given_name or surname found in navet for eppn: {user.eppn}") @@ -32,17 +32,22 @@ def update_name(self, user: User, navet_data: NavetData): self.logger.debug(f"number of changes: {self.made_changes} out of {self.max_changes}, {self.queue_actual_size}") - self.amapi_client.update_user_name( - user=user.eppn, - body=UserUpdateNameRequest( - reason="SKV_NAME_UPDATE", - source=CleanerType.SKV.value, - given_name=updated_user.given_name, - display_name=updated_user.display_name, - surname=updated_user.surname, - ), + amapi_client_body =UserUpdateNameRequest( + reason="SKV_NAME_UPDATE", + source=CleanerType.SKV.value, + given_name=updated_user.given_name, + display_name=updated_user.display_name, + surname=updated_user.surname, ) + if self.config.dry_run: + self.logger.debug(f"dry_run: eppn: {user.eppn}, amapi_client_body: {amapi_client_body}") + else: + self.amapi_client.update_user_name( + user=user.eppn, + body=amapi_client_body, + ) + def run(self): while not self.shutdown_now: if self._is_quota_reached(): From e62b015ef376dfa8b8ca1a4c10ea21e81cdf2b94 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Tue, 6 Dec 2022 15:23:35 +0100 Subject: [PATCH 14/49] Add teleadress and some spelling. --- .vscode/settings.json | 5 +++++ src/eduid/userdb/meta.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 2206031f7..a464fc580 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -20,6 +20,7 @@ "authns", "baar", "backchannel", + "bson", "checkpw", "chpass", "csrft", @@ -36,6 +37,7 @@ "jsonify", "jwks", "klass", + "ladok", "lundberg", "masv", "mischttp", @@ -46,6 +48,7 @@ "proofings", "proquint", "PWAUTH", + "pydantic", "pysaml2", "reauthn", "replicaset", @@ -57,6 +60,8 @@ "svipe", "svipeid", "swamid", + "TELE", + "teleadress", "Testsson", "timeutil", "Unmarshal", diff --git a/src/eduid/userdb/meta.py b/src/eduid/userdb/meta.py index 4e84530a9..fac671e5b 100644 --- a/src/eduid/userdb/meta.py +++ b/src/eduid/userdb/meta.py @@ -14,7 +14,7 @@ class CleanerType(str, Enum): SKV = "skatteverket" - TELE = "tele" + TELE = "teleadress" LADOK = "ladok" From 90b0ae704b833c6e451deb779998297157d3e891 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Tue, 6 Dec 2022 18:04:46 +0100 Subject: [PATCH 15/49] Add health check. --- src/eduid/common/models/amapi_user.py | 6 +-- src/eduid/workers/user_cleaner/app.py | 19 ++++++++-- src/eduid/workers/user_cleaner/workers/skv.py | 37 ++++++++++--------- 3 files changed, 39 insertions(+), 23 deletions(-) diff --git a/src/eduid/common/models/amapi_user.py b/src/eduid/common/models/amapi_user.py index 08ee79bf0..f19a558aa 100644 --- a/src/eduid/common/models/amapi_user.py +++ b/src/eduid/common/models/amapi_user.py @@ -3,10 +3,10 @@ from typing import Dict, List, Optional from pydantic import BaseModel, Field -from common.misc.timeutil import utc_now +from eduid.common.misc.timeutil import utc_now from eduid.userdb import MailAddress, PhoneNumber -from eduid.userdb.meta import CleanedType, Meta +from eduid.userdb.meta import CleanerType, Meta __author__ = "masv" @@ -55,7 +55,7 @@ class UserUpdateLanguageRequest(UserBaseRequest): language: Optional[str] = None class UserUpdateMetaCleanedRequest(UserBaseRequest): - type: CleanedType + type: CleanerType ts: datetime = Field(default_factory=utc_now) class UserUpdateTerminateRequest(UserBaseRequest): diff --git a/src/eduid/workers/user_cleaner/app.py b/src/eduid/workers/user_cleaner/app.py index 0042fd6d0..27f8c2dc0 100644 --- a/src/eduid/workers/user_cleaner/app.py +++ b/src/eduid/workers/user_cleaner/app.py @@ -1,11 +1,14 @@ import logging import signal +import os from abc import ABC from queue import Queue from typing import Optional, Dict +from pathlib import Path + from eduid.common.clients.amapi_client.amapi_client import AMAPIClient from eduid.common.config.parsers import load_config from eduid.common.rpc.msg_relay import MsgRelay @@ -47,6 +50,8 @@ def __init__(self, cleaner_type: CleanerType, test_config: Optional[Dict] = None self.made_changes = 0 self.max_changes = 0.0 + self.healthy_path = "/tmp/healthy" + self.msg_relay = MsgRelay(self.config) self.amapi_client = AMAPIClient( @@ -54,10 +59,10 @@ def __init__(self, cleaner_type: CleanerType, test_config: Optional[Dict] = None auth_data=self.config.gnap_auth_data, ) - self.logger.info(f"starting worker {cleaner_type.value}") + self.logger.info(f"Starting worker {cleaner_type.value}") def exit_gracefully(self, sig, frame) -> None: - self.logger.info(f"Recevied signal: {sig}, shutting down...") + self.logger.info(f"Received signal: {sig}, shutting down...") self.shutdown_now = True def _is_quota_reached(self) -> bool: @@ -75,13 +80,21 @@ def _populate_max_changes(self): ) self.max_changes = self.queue_actual_size * self.config.change_quota + + def _make_unhealthy(self) -> None: + os.remove(self.healthy_path) if os.path.exists(self.healthy_path) else None + + def _make_healthy(self) -> None: + Path(self.healthy_path).touch() + + def enqueuing(self, cleaning_type: CleanerType, identity_type: IdentityType, limit: int): users = self.db.get_uncleaned_verified_users( cleaned_type=cleaning_type, identity_type=identity_type, limit=limit, ) - if len(users) < 1: + if len(users) == 0: self.logger.warning(f"No users where enqueued") return for user in users: diff --git a/src/eduid/workers/user_cleaner/workers/skv.py b/src/eduid/workers/user_cleaner/workers/skv.py index ba4a5eac8..6dcd606bd 100644 --- a/src/eduid/workers/user_cleaner/workers/skv.py +++ b/src/eduid/workers/user_cleaner/workers/skv.py @@ -51,23 +51,26 @@ def update_name(self, user: User, navet_data: NavetData): def run(self): while not self.shutdown_now: if self._is_quota_reached(): - self.logger.warning(f"worker skatteverket has reached its change_quota, killing it") - break - if self.queue.empty(): - self.enqueuing( - cleaning_type=CleanerType.SKV, - identity_type=IdentityType.NIN, - limit=self.config.user_count, - ) - user = self.queue.get() - - navet_data = self.msg_relay.get_all_navet_data(nin=user.identities.nin.number) - - self.update_name(user=user, navet_data=navet_data) - - time.sleep(self.config.job_delay) - - self.queue.task_done() + self.logger.warning(f"worker skatteverket has reached its change_quota, sleep for 20 seconds") + self._make_unhealthy() + time.sleep(20.0) + else: + self._make_healthy() + if self.queue.empty(): + self.enqueuing( + cleaning_type=CleanerType.SKV, + identity_type=IdentityType.NIN, + limit=self.config.user_count, + ) + user = self.queue.get() + + navet_data = self.msg_relay.get_all_navet_data(nin=user.identities.nin.number) + + self.update_name(user=user, navet_data=navet_data) + + time.sleep(self.config.job_delay) + + self.queue.task_done() def init_skv_worker(test_config: Optional[Dict] = None) -> SKV: From 949032c45d78759b420c31e98d44f6c24f5722f9 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Wed, 7 Dec 2022 11:17:48 +0100 Subject: [PATCH 16/49] Add words to vscode. --- .vscode/settings.json | 5 +++++ src/eduid/workers/user_cleaner/workers/skv.py | 1 + 2 files changed, 6 insertions(+) diff --git a/.vscode/settings.json b/.vscode/settings.json index a464fc580..3b8b832a2 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -24,12 +24,15 @@ "checkpw", "chpass", "csrft", + "deepdiff", "eduid", "eidas", "eidplus", "Ekopost", "eppn", + "fastapi", "Freja", + "gnap", "hashname", "hashpw", "httpx", @@ -52,9 +55,11 @@ "pysaml2", "reauthn", "replicaset", + "respx", "scim", "SCIMAPI", "sess", + "SIGTERM", "skatteverket", "Svenska", "svipe", diff --git a/src/eduid/workers/user_cleaner/workers/skv.py b/src/eduid/workers/user_cleaner/workers/skv.py index 6dcd606bd..55bdc64ef 100644 --- a/src/eduid/workers/user_cleaner/workers/skv.py +++ b/src/eduid/workers/user_cleaner/workers/skv.py @@ -66,6 +66,7 @@ def run(self): navet_data = self.msg_relay.get_all_navet_data(nin=user.identities.nin.number) + # This won't work when more update-functions is added, due to meta.version will change after each save... self.update_name(user=user, navet_data=navet_data) time.sleep(self.config.job_delay) From 5cc7f84afb2bee2be86be3ff0f686c01d5b1d796 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Mon, 12 Dec 2022 15:40:36 +0100 Subject: [PATCH 17/49] Add comments and general sleep method for faster kills of the app. --- src/eduid/workers/user_cleaner/app.py | 33 ++++++++++++++----- src/eduid/workers/user_cleaner/config.py | 2 +- src/eduid/workers/user_cleaner/workers/skv.py | 22 +++++++------ 3 files changed, 38 insertions(+), 19 deletions(-) diff --git a/src/eduid/workers/user_cleaner/app.py b/src/eduid/workers/user_cleaner/app.py index 27f8c2dc0..cb5569e7d 100644 --- a/src/eduid/workers/user_cleaner/app.py +++ b/src/eduid/workers/user_cleaner/app.py @@ -4,6 +4,7 @@ from abc import ABC from queue import Queue +import time from typing import Optional, Dict @@ -22,12 +23,14 @@ class WorkerBase(ABC): - def __init__(self, cleaner_type: CleanerType, test_config: Optional[Dict] = None): + """ + All workers base class, for collective functionality + """ + + def __init__(self, cleaner_type: str, test_config: Optional[Dict] = None): signal.signal(signal.SIGINT, self.exit_gracefully) signal.signal(signal.SIGTERM, self.exit_gracefully) - self.worker_name = str(cleaner_type) - self.config = load_config(typ=UserCleanerConfig, app_name="user_cleaner", ns="worker", test_config=test_config) super().__init__() @@ -36,9 +39,9 @@ def __init__(self, cleaner_type: CleanerType, test_config: Optional[Dict] = None self.stats.count(name="user_cleaner_stats") # logging - self.logger = logging.getLogger(name=self.worker_name) + self.logger = logging.getLogger(name=cleaner_type) init_logging(config=self.config) - self.logger.info(f"initialize worker {self.worker_name}") + self.logger.info(f"initialize worker {cleaner_type}") self.db = AmDB(db_uri=self.config.mongo_uri) @@ -59,36 +62,50 @@ def __init__(self, cleaner_type: CleanerType, test_config: Optional[Dict] = None auth_data=self.config.gnap_auth_data, ) - self.logger.info(f"Starting worker {cleaner_type.value}") + self.logger.info(f"Starting worker {cleaner_type}") def exit_gracefully(self, sig, frame) -> None: + # set variable shutdown_now to True when triggered of SIGTERM or SIGINT self.logger.info(f"Received signal: {sig}, shutting down...") self.shutdown_now = True def _is_quota_reached(self) -> bool: + # return true if change quota is reached, else false + # used to prevent a change function to exceed its indented change quota (minimize damage) if self.made_changes == 0: return False self.logger.debug(f"is_quota_reached:: made_changes: {self.made_changes}, max_changes: {self.max_changes}") return self.made_changes >= self.max_changes def _add_to_made_changes(self) -> None: + # helper to add to made_changes when a change has happened. self.made_changes += 1 - def _populate_max_changes(self): + def _populate_max_changes(self) -> None: + # helper to calculate the maximum allowed changes self.logger.debug( f"populate_max_changes:: queue_actual_size: {self.queue_actual_size}, change_quota: {self.made_changes}" ) self.max_changes = self.queue_actual_size * self.config.change_quota - def _make_unhealthy(self) -> None: + # make Docker health status to unhealthy os.remove(self.healthy_path) if os.path.exists(self.healthy_path) else None def _make_healthy(self) -> None: + # make Docker health status to healthy Path(self.healthy_path).touch() + def _sleep(self, sleep_time: int) -> None: + # sleeps, but respects self.shutdown_now variable, + # then we do not need to wait for the sleep to finish in order to kill this app. + for i in range(0, sleep_time, 1): + if self.shutdown_now: + return + time.sleep(sleep_time) def enqueuing(self, cleaning_type: CleanerType, identity_type: IdentityType, limit: int): + # add User objects to queue for further processing users = self.db.get_uncleaned_verified_users( cleaned_type=cleaning_type, identity_type=identity_type, diff --git a/src/eduid/workers/user_cleaner/config.py b/src/eduid/workers/user_cleaner/config.py index 4dd3f315b..ca361d330 100644 --- a/src/eduid/workers/user_cleaner/config.py +++ b/src/eduid/workers/user_cleaner/config.py @@ -19,6 +19,6 @@ class UserCleanerConfig(RootConfig, LoggingConfigMixin, MsgConfigMixin, StatsCon debug: bool user_count: int change_quota: float - job_delay: float = 1.0 + job_delay: int = 1 gnap_auth_data: GNAPClientAuthData amapi: AmAPIConfig diff --git a/src/eduid/workers/user_cleaner/workers/skv.py b/src/eduid/workers/user_cleaner/workers/skv.py index 55bdc64ef..6df02ad76 100644 --- a/src/eduid/workers/user_cleaner/workers/skv.py +++ b/src/eduid/workers/user_cleaner/workers/skv.py @@ -1,7 +1,7 @@ import time from typing import Optional, Dict -from eduid.common.models.amapi_user import UserUpdateNameRequest +from eduid.common.models.amapi_user import UserUpdateNameRequest, Reason, Source from eduid.common.rpc.msg_relay import NavetData from eduid.common.utils import set_user_names_from_official_address from eduid.userdb import User @@ -15,7 +15,9 @@ def __init__(self, cleaner_type: CleanerType, test_config: Optional[Dict] = None super().__init__(cleaner_type=cleaner_type.SKV, test_config=test_config) def update_name(self, user: User, navet_data: NavetData): - self.logger.debug(f"number of changes: {self.made_changes} out of max_changes: {self.max_changes}, queue_actual_size: {self.queue_actual_size}") + self.logger.debug( + f"number of changes: {self.made_changes} out of max_changes: {self.max_changes}, queue_actual_size: {self.queue_actual_size}" + ) if navet_data.person.name.given_name is None or navet_data.person.name.surname is None: self.logger.info(f"No given_name or surname found in navet for eppn: {user.eppn}") @@ -32,12 +34,12 @@ def update_name(self, user: User, navet_data: NavetData): self.logger.debug(f"number of changes: {self.made_changes} out of {self.max_changes}, {self.queue_actual_size}") - amapi_client_body =UserUpdateNameRequest( - reason="SKV_NAME_UPDATE", - source=CleanerType.SKV.value, - given_name=updated_user.given_name, - display_name=updated_user.display_name, - surname=updated_user.surname, + amapi_client_body = UserUpdateNameRequest( + reason=Reason.NAME_CHANGED.value, + source=Source.SKV_NAVET_V2.value, + given_name=updated_user.given_name, + display_name=updated_user.display_name, + surname=updated_user.surname, ) if self.config.dry_run: @@ -53,7 +55,7 @@ def run(self): if self._is_quota_reached(): self.logger.warning(f"worker skatteverket has reached its change_quota, sleep for 20 seconds") self._make_unhealthy() - time.sleep(20.0) + self._sleep(sleep_time=20) else: self._make_healthy() if self.queue.empty(): @@ -69,7 +71,7 @@ def run(self): # This won't work when more update-functions is added, due to meta.version will change after each save... self.update_name(user=user, navet_data=navet_data) - time.sleep(self.config.job_delay) + self._sleep(sleep_time=self.config.job_delay) self.queue.task_done() From 297531a0189d414d28d8f1229b16e09cb6149835 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Mon, 12 Dec 2022 16:30:39 +0100 Subject: [PATCH 18/49] Add words to dictionary. --- .vscode/settings.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.vscode/settings.json b/.vscode/settings.json index 555443671..7a0d6dfa5 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -17,6 +17,7 @@ "python.envFile": "${workspaceFolder}/.vscode/.env", "cSpell.words": [ "amapi", + "amdb", "authlib", "authns", "baar", @@ -38,6 +39,7 @@ "hashpw", "httpx", "hubba", + "IDNIN", "jsonify", "jwks", "klass", @@ -49,6 +51,8 @@ "navet", "NORDUnet", "oidc", + "orcid", + "prid", "proofings", "proquint", "PWAUTH", @@ -66,6 +70,7 @@ "svipe", "svipeid", "swamid", + "swedenconnect", "TELE", "teleadress", "Testsson", From 393ed719567a9975846f6f3520625bd77d55058d Mon Sep 17 00:00:00 2001 From: masv3971 Date: Mon, 12 Dec 2022 16:32:51 +0100 Subject: [PATCH 19/49] Better syntax. --- src/eduid/workers/user_cleaner/app.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/eduid/workers/user_cleaner/app.py b/src/eduid/workers/user_cleaner/app.py index cb5569e7d..abf078cc2 100644 --- a/src/eduid/workers/user_cleaner/app.py +++ b/src/eduid/workers/user_cleaner/app.py @@ -90,7 +90,8 @@ def _populate_max_changes(self) -> None: def _make_unhealthy(self) -> None: # make Docker health status to unhealthy - os.remove(self.healthy_path) if os.path.exists(self.healthy_path) else None + if os.path.exists(self.healthy_path): + os.remove(self.healthy_path) def _make_healthy(self) -> None: # make Docker health status to healthy From e878033db562f9a418ea97375de19de25800f9b1 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Tue, 13 Dec 2022 09:43:54 +0100 Subject: [PATCH 20/49] Add milliseconds sleep. --- src/eduid/workers/user_cleaner/app.py | 6 +++--- src/eduid/workers/user_cleaner/config.py | 7 ++++++- src/eduid/workers/user_cleaner/workers/skv.py | 4 ++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/eduid/workers/user_cleaner/app.py b/src/eduid/workers/user_cleaner/app.py index abf078cc2..f296358e3 100644 --- a/src/eduid/workers/user_cleaner/app.py +++ b/src/eduid/workers/user_cleaner/app.py @@ -97,13 +97,13 @@ def _make_healthy(self) -> None: # make Docker health status to healthy Path(self.healthy_path).touch() - def _sleep(self, sleep_time: int) -> None: + def _sleep(self, milliseconds: int) -> None: # sleeps, but respects self.shutdown_now variable, # then we do not need to wait for the sleep to finish in order to kill this app. - for i in range(0, sleep_time, 1): + for i in range(0, milliseconds, 1): if self.shutdown_now: return - time.sleep(sleep_time) + time.sleep(1 / milliseconds) def enqueuing(self, cleaning_type: CleanerType, identity_type: IdentityType, limit: int): # add User objects to queue for further processing diff --git a/src/eduid/workers/user_cleaner/config.py b/src/eduid/workers/user_cleaner/config.py index ca361d330..2135d528d 100644 --- a/src/eduid/workers/user_cleaner/config.py +++ b/src/eduid/workers/user_cleaner/config.py @@ -18,7 +18,12 @@ class UserCleanerConfig(RootConfig, LoggingConfigMixin, MsgConfigMixin, StatsCon dry_run: bool = True debug: bool user_count: int + + # within + cleaning_periodicity: int change_quota: float - job_delay: int = 1 + + # in milliseconds + job_delay: int = 1000 gnap_auth_data: GNAPClientAuthData amapi: AmAPIConfig diff --git a/src/eduid/workers/user_cleaner/workers/skv.py b/src/eduid/workers/user_cleaner/workers/skv.py index 6df02ad76..469a6788c 100644 --- a/src/eduid/workers/user_cleaner/workers/skv.py +++ b/src/eduid/workers/user_cleaner/workers/skv.py @@ -55,7 +55,7 @@ def run(self): if self._is_quota_reached(): self.logger.warning(f"worker skatteverket has reached its change_quota, sleep for 20 seconds") self._make_unhealthy() - self._sleep(sleep_time=20) + self._sleep(milliseconds=20000) else: self._make_healthy() if self.queue.empty(): @@ -71,7 +71,7 @@ def run(self): # This won't work when more update-functions is added, due to meta.version will change after each save... self.update_name(user=user, navet_data=navet_data) - self._sleep(sleep_time=self.config.job_delay) + self._sleep(milliseconds=self.config.job_delay) self.queue.task_done() From dce92b80c539f6fdac9753e852b9da4f5f67510f Mon Sep 17 00:00:00 2001 From: masv3971 Date: Fri, 23 Dec 2022 12:36:36 +0100 Subject: [PATCH 21/49] untangle files: cleaner related files. --- src/eduid/workers/user_cleaner/app.py | 101 +++++++++++++----- src/eduid/workers/user_cleaner/config.py | 11 +- .../workers/user_cleaner/testing/test_app.py | 52 +++++++++ .../workers/user_cleaner/testing/test_skv.py | 51 +++++++++ src/eduid/workers/user_cleaner/workers/skv.py | 64 ++++++----- 5 files changed, 224 insertions(+), 55 deletions(-) create mode 100644 src/eduid/workers/user_cleaner/testing/test_app.py diff --git a/src/eduid/workers/user_cleaner/app.py b/src/eduid/workers/user_cleaner/app.py index f296358e3..b8863d2a7 100644 --- a/src/eduid/workers/user_cleaner/app.py +++ b/src/eduid/workers/user_cleaner/app.py @@ -1,3 +1,4 @@ +from dataclasses import Field import logging import signal import os @@ -6,7 +7,8 @@ from queue import Queue import time -from typing import Optional, Dict +from typing import Any, List, Optional, Dict +from pydantic import BaseModel from pathlib import Path @@ -20,6 +22,7 @@ from eduid.common.stats import init_app_stats from eduid.workers.user_cleaner.config import UserCleanerConfig +from eduid.userdb.user import User class WorkerBase(ABC): @@ -27,7 +30,7 @@ class WorkerBase(ABC): All workers base class, for collective functionality """ - def __init__(self, cleaner_type: str, test_config: Optional[Dict] = None): + def __init__(self, cleaner_type: CleanerType, identity_type: IdentityType, test_config: Optional[Dict] = None): signal.signal(signal.SIGINT, self.exit_gracefully) signal.signal(signal.SIGTERM, self.exit_gracefully) @@ -38,16 +41,19 @@ def __init__(self, cleaner_type: str, test_config: Optional[Dict] = None): self.stats = init_app_stats(self.config) self.stats.count(name="user_cleaner_stats") + self.cleaner_type = cleaner_type + self.identity_type = identity_type + # logging - self.logger = logging.getLogger(name=cleaner_type) + self.logger = logging.getLogger(name=self.cleaner_type) init_logging(config=self.config) - self.logger.info(f"initialize worker {cleaner_type}") + self.logger.info(f"initialize worker {self.cleaner_type}") self.db = AmDB(db_uri=self.config.mongo_uri) self.shutdown_now = False - self.queue = Queue(maxsize=self.config.user_count) + self.queue: "Queue[dict[str, Any]]" = Queue(maxsize=0) self.queue_actual_size = 0 self.made_changes = 0 @@ -61,65 +67,112 @@ def __init__(self, cleaner_type: str, test_config: Optional[Dict] = None): amapi_url=self.config.amapi.url, auth_data=self.config.gnap_auth_data, ) + self.execution_delay = self.get_delay_time() self.logger.info(f"Starting worker {cleaner_type}") def exit_gracefully(self, sig, frame) -> None: - # set variable shutdown_now to True when triggered of SIGTERM or SIGINT + """ + Set variable shutdown_now to True when SIGTERM or SIGINT is triggered + """ self.logger.info(f"Received signal: {sig}, shutting down...") self.shutdown_now = True def _is_quota_reached(self) -> bool: - # return true if change quota is reached, else false - # used to prevent a change function to exceed its indented change quota (minimize damage) + """ + Return true if change quota is reached, else false. + Used to prevent a change function to exceed its indented change quota (minimize damage) + """ if self.made_changes == 0: return False self.logger.debug(f"is_quota_reached:: made_changes: {self.made_changes}, max_changes: {self.max_changes}") return self.made_changes >= self.max_changes def _add_to_made_changes(self) -> None: - # helper to add to made_changes when a change has happened. + """ + Helper to add to made_changes after a change. + """ self.made_changes += 1 def _populate_max_changes(self) -> None: - # helper to calculate the maximum allowed changes + """ + Helper to calculate the maximum allowed changes + """ self.logger.debug( f"populate_max_changes:: queue_actual_size: {self.queue_actual_size}, change_quota: {self.made_changes}" ) self.max_changes = self.queue_actual_size * self.config.change_quota def _make_unhealthy(self) -> None: - # make Docker health status to unhealthy + """ + Make Docker health status to unhealthy + """ if os.path.exists(self.healthy_path): os.remove(self.healthy_path) def _make_healthy(self) -> None: - # make Docker health status to healthy + """ + Make Docker health status to healthy + """ Path(self.healthy_path).touch() def _sleep(self, milliseconds: int) -> None: - # sleeps, but respects self.shutdown_now variable, - # then we do not need to wait for the sleep to finish in order to kill this app. - for i in range(0, milliseconds, 1): + """ + Sleep with an eye on self.shutdown_now variable, + then we do not need to wait for the complete sleep duration to finish in order to kill this app. + """ + self.logger.debug(f"Entering sleep cycle {milliseconds}") + index = 0 + while index != milliseconds: + index += 1 if self.shutdown_now: return - time.sleep(1 / milliseconds) + time.sleep(0.001) + + def get_delay_time(self) -> int: + """ + Since a given dataset will differ in size and execution time, a calculated delay time has to be made. + example: time_to_clean_dataset = 10000 milliseconds, user_count = 10, minimum_delay = 1 milliseconds + --> 10000 / (10*1) => 1000 milliseconds/user document + each user document have to sleep for 1000 milliseconds to be done with in time (10000 milliseconds). + """ + user_count = self.db.get_verified_users_count(identity_type=self.identity_type) + time_per_user = self.config.time_to_clean_dataset / (user_count * (self.config.minimum_delay + 1)) + if time_per_user < self.config.minimum_delay: + self.logger.warning(f"execution will be faster then minimum execution time: {self.config.minimum_delay}") + return int(time_per_user) + + def _populate_queue(self, users: List[User]) -> None: + """Populate queue with user representation""" + + for user in users: + queue_object = { + "eppn": user.eppn, + "nin": None, + } + if user.identities.nin is not None: + queue_object["nin"] = user.identities.nin.number + self.queue.put(queue_object) + self.logger.debug(f"enqueuing user: {user.eppn}") + + def enqueuing(self) -> None: + """Add users to queue for further processing""" - def enqueuing(self, cleaning_type: CleanerType, identity_type: IdentityType, limit: int): - # add User objects to queue for further processing users = self.db.get_uncleaned_verified_users( - cleaned_type=cleaning_type, - identity_type=identity_type, - limit=limit, + cleaned_type=self.cleaner_type, + identity_type=self.identity_type, + limit=10000000, ) if len(users) == 0: self.logger.warning(f"No users where enqueued") return - for user in users: - self.queue.put(user) - self.logger.debug(f"enqueuing user: {user.eppn}") + self._populate_queue(users=users) self.queue_actual_size = self.queue.qsize() self._populate_max_changes() self.made_changes = 0 + + +def init_worker_base(cleaner_type: CleanerType, identity_type: IdentityType, test_config: dict[str, Any]) -> WorkerBase: + return WorkerBase(cleaner_type=cleaner_type, identity_type=identity_type, test_config=test_config) diff --git a/src/eduid/workers/user_cleaner/config.py b/src/eduid/workers/user_cleaner/config.py index 2135d528d..097001109 100644 --- a/src/eduid/workers/user_cleaner/config.py +++ b/src/eduid/workers/user_cleaner/config.py @@ -17,13 +17,14 @@ class UserCleanerConfig(RootConfig, LoggingConfigMixin, MsgConfigMixin, StatsCon mongo_uri: str = "" dry_run: bool = True debug: bool - user_count: int - # within - cleaning_periodicity: int change_quota: float - # in milliseconds - job_delay: int = 1000 + # amount of time to clean dataset, value in milliseconds + time_to_clean_dataset: int = 2592000000 + + # minimum time to delay each execution in milliseconds + minimum_delay: int = 1000 + gnap_auth_data: GNAPClientAuthData amapi: AmAPIConfig diff --git a/src/eduid/workers/user_cleaner/testing/test_app.py b/src/eduid/workers/user_cleaner/testing/test_app.py new file mode 100644 index 000000000..f8a4aaaaf --- /dev/null +++ b/src/eduid/workers/user_cleaner/testing/test_app.py @@ -0,0 +1,52 @@ +from typing import Any +from eduid.common.testing_base import CommonTestCase +from eduid.userdb.fixtures.users import new_user_example, new_user_example2, new_unverified_user_example +from eduid.workers.user_cleaner.app import init_worker_base +from eduid.userdb.identity import IdentityType +from eduid.userdb.meta import CleanerType + + +class AppTest(CommonTestCase): + def setUp(self, *args, **kwargs): + super().setUp(am_users=[new_user_example, new_user_example2, new_unverified_user_example]) + + self.app = init_worker_base( + cleaner_type=CleanerType.SKV, identity_type=IdentityType.NIN, test_config=self._get_config() + ) + + def _get_config(self) -> dict[str, Any]: + return { + "app_name": "test", + "mongo_uri": self.settings["mongo_uri"], + "minimum_delay": 1000, + "time_to_clean_dataset": 2592000000, + "debug": True, + "change_quota": 2, + "amapi": { + "url": "", + "tls_verify": False, + }, + "celery": {}, + "gnap_auth_data": { + "auth_server_url": "", + "auth_server_verify": True, + "key_name": "", + "client_jwk": {}, + "access": [""], # List[Union[str, Access]] = Field(default_factory=list) + }, + } + + def test_populate_queue(self): + users = [new_user_example, new_user_example2, new_unverified_user_example] + self.app._populate_queue(users=users) + got_user1 = self.app.queue.get() + assert got_user1["eppn"] == new_user_example.eppn + assert got_user1["nin"] == new_user_example.identities.nin.number + + got_user2 = self.app.queue.get() + assert got_user2["eppn"] == new_user_example2.eppn + assert got_user2["nin"] == new_user_example2.identities.nin.number + + def test_get_delay_time(self): + got = self.app.get_delay_time() + assert got == 1294705 diff --git a/src/eduid/workers/user_cleaner/testing/test_skv.py b/src/eduid/workers/user_cleaner/testing/test_skv.py index 837b46a8f..517c2b991 100644 --- a/src/eduid/workers/user_cleaner/testing/test_skv.py +++ b/src/eduid/workers/user_cleaner/testing/test_skv.py @@ -1,7 +1,58 @@ +from typing import Any, Dict from eduid.common.testing_base import CommonTestCase from eduid.userdb.fixtures.users import new_user_example, new_user_example2, new_unverified_user_example +from eduid.workers.user_cleaner.workers.skv import init_skv_worker +from eduid.userdb.identity import IdentityType +from eduid.userdb.meta import CleanerType +from eduid.common.rpc.msg_relay import Name, NavetData, OfficialAddress, Person, PostalAddresses class WorkerTest(CommonTestCase): def setUp(self, *args, **kwargs): super().setUp(am_users=[new_user_example, new_user_example2, new_unverified_user_example]) + + self.skv = init_skv_worker(test_config=self._get_config()) + + def _get_config(self) -> Dict[str, Any]: + return { + "app_name": "test", + "mongo_uri": self.settings["mongo_uri"], + "minimum_delay": 1, + "time_to_clean_dataset": 30, + "debug": True, + "change_quota": 2, + "amapi": { + "url": "", + "tls_verify": False, + }, + "celery": {}, + "gnap_auth_data": { + "auth_server_url": "", + "auth_server_verify": True, + "key_name": "", + "client_jwk": {}, + "access": [""], # List[Union[str, Access]] = Field(default_factory=list) + }, + } + + def test_update_name(self): + queue_user = { + "eppn": new_user_example.eppn, + "nin": new_user_example.identities.nin.number, + } + navet_data = NavetData( + person=Person( + name=Name( + # given_name_marking: Optional[str] = Field(default=None, alias="GivenNameMarking") + given_name="test_given_name", + # middle_name="test_middle_name": Optional[str] = Field(default=None, alias="MiddleName") + surname="test_surname", + ), + ), + ) + self.skv.update_name(queue_user=queue_user, navet_data=navet_data) + + got = self.skv.db.get_user_by_eppn(new_user_example.eppn) + + assert got.given_name == "test_given_name" + assert got.surname == "test_surname" diff --git a/src/eduid/workers/user_cleaner/workers/skv.py b/src/eduid/workers/user_cleaner/workers/skv.py index 469a6788c..5c620ad8f 100644 --- a/src/eduid/workers/user_cleaner/workers/skv.py +++ b/src/eduid/workers/user_cleaner/workers/skv.py @@ -1,5 +1,5 @@ import time -from typing import Optional, Dict +from typing import Any, Optional, Dict from eduid.common.models.amapi_user import UserUpdateNameRequest, Reason, Source from eduid.common.rpc.msg_relay import NavetData @@ -11,23 +11,37 @@ class SKV(WorkerBase): - def __init__(self, cleaner_type: CleanerType, test_config: Optional[Dict] = None): - super().__init__(cleaner_type=cleaner_type.SKV, test_config=test_config) + """Worker class for Skatteverket""" + + def __init__(self, test_config: Optional[Dict] = None): + super().__init__(cleaner_type=CleanerType.SKV, identity_type=IdentityType.NIN, test_config=test_config) + + def update_name(self, queue_user: dict[str, Any], navet_data: NavetData) -> None: + """ + Update-function for updating "given name" and "surname" from skatteverket, and creates new "display name" if needed. + """ + + db_user = self.db.get_user_by_eppn(eppn=queue_user["eppn"]) + if db_user is None: + return - def update_name(self, user: User, navet_data: NavetData): self.logger.debug( f"number of changes: {self.made_changes} out of max_changes: {self.max_changes}, queue_actual_size: {self.queue_actual_size}" ) if navet_data.person.name.given_name is None or navet_data.person.name.surname is None: - self.logger.info(f"No given_name or surname found in navet for eppn: {user.eppn}") + self.logger.info(f"No given_name or surname found in navet for eppn: {db_user.eppn}") return - if user.given_name == navet_data.person.name.given_name and user.surname == navet_data.person.name.surname: - self.logger.info(f"No update for names for eppn: {user.eppn}") + + if ( + db_user.given_name == navet_data.person.name.given_name + and db_user.surname == navet_data.person.name.surname + ): + self.logger.info(f"No update for names for eppn: {db_user.eppn}") return updated_user = set_user_names_from_official_address( - user=user, user_postal_address=navet_data.get_full_postal_address() + user=db_user, user_postal_address=navet_data.get_full_postal_address() ) self._add_to_made_changes() @@ -43,45 +57,43 @@ def update_name(self, user: User, navet_data: NavetData): ) if self.config.dry_run: - self.logger.debug(f"dry_run: eppn: {user.eppn}, amapi_client_body: {amapi_client_body}") + self.logger.debug(f"dry_run: eppn: {db_user.eppn}, amapi_client_body: {amapi_client_body}") else: self.amapi_client.update_user_name( - user=user.eppn, + user=db_user.eppn, body=amapi_client_body, ) def run(self): + """skatteverket worker entry point""" while not self.shutdown_now: if self._is_quota_reached(): self.logger.warning(f"worker skatteverket has reached its change_quota, sleep for 20 seconds") self._make_unhealthy() self._sleep(milliseconds=20000) - else: - self._make_healthy() - if self.queue.empty(): - self.enqueuing( - cleaning_type=CleanerType.SKV, - identity_type=IdentityType.NIN, - limit=self.config.user_count, - ) - user = self.queue.get() - navet_data = self.msg_relay.get_all_navet_data(nin=user.identities.nin.number) + self._make_healthy() + if self.queue.empty(): + self.enqueuing() + queue_user = self.queue.get() + + navet_data = self.msg_relay.get_all_navet_data(nin=queue_user["nin"]) - # This won't work when more update-functions is added, due to meta.version will change after each save... - self.update_name(user=user, navet_data=navet_data) + self.update_name(queue_user=queue_user, navet_data=navet_data) - self._sleep(milliseconds=self.config.job_delay) + self.queue.task_done() - self.queue.task_done() + self._sleep(milliseconds=self.execution_delay) -def init_skv_worker(test_config: Optional[Dict] = None) -> SKV: - worker = SKV(cleaner_type=CleanerType.SKV, test_config=test_config) +def init_skv_worker(test_config: Optional[dict[str, Any]] = None) -> SKV: + """Initialize skv (skatteverket) worker""" + worker = SKV(test_config=test_config) return worker def start_worker(): + """Start skv (skatteverket) worker""" worker = init_skv_worker() worker.run() From 38ac3a840fec0a5aecb7d0e68e0db5171adcc177 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Thu, 26 Jan 2023 19:11:57 +0100 Subject: [PATCH 22/49] Add wait, meta and cache. --- .vscode/settings.json | 5 +- src/eduid/userdb/tests/test_user_cleaner.py | 14 ++ src/eduid/userdb/user_cleaner/cache.py | 44 +++++++ src/eduid/userdb/user_cleaner/cachedb.py | 73 +++++++++++ src/eduid/userdb/user_cleaner/meta.py | 27 ++++ src/eduid/userdb/user_cleaner/metadb.py | 30 +++++ src/eduid/workers/user_cleaner/app.py | 121 ++++++++---------- src/eduid/workers/user_cleaner/config.py | 9 +- .../workers/user_cleaner/testing/test_app.py | 21 ++- .../workers/user_cleaner/testing/test_skv.py | 9 +- src/eduid/workers/user_cleaner/workers/skv.py | 63 ++++++--- 11 files changed, 320 insertions(+), 96 deletions(-) create mode 100644 src/eduid/userdb/tests/test_user_cleaner.py create mode 100644 src/eduid/userdb/user_cleaner/cache.py create mode 100644 src/eduid/userdb/user_cleaner/cachedb.py create mode 100644 src/eduid/userdb/user_cleaner/meta.py create mode 100644 src/eduid/userdb/user_cleaner/metadb.py diff --git a/.vscode/settings.json b/.vscode/settings.json index 3ebc6e683..370e38de6 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -23,7 +23,7 @@ "baar", "backchannel", "backupdir", - "bson", + "bson", "checkpw", "chpass", "csrft", @@ -49,6 +49,7 @@ "mischttp", "nameid", "navet", + "nins", "NORDUnet", "oidc", "orcid", @@ -87,4 +88,4 @@ "zxcvbn" ], "python.formatting.provider": "black" -} +} \ No newline at end of file diff --git a/src/eduid/userdb/tests/test_user_cleaner.py b/src/eduid/userdb/tests/test_user_cleaner.py new file mode 100644 index 000000000..488b73ab9 --- /dev/null +++ b/src/eduid/userdb/tests/test_user_cleaner.py @@ -0,0 +1,14 @@ +from unittest import TestCase + +from eduid.userdb.testing import MongoTemporaryInstance +from eduid.userdb.user_cleaner.cachedb import CacheDB +from eduid.userdb.user_cleaner.cache import CacheUser + + +class TestUserCleanerCache(TestCase): + def setUp(self): + self.tmp_db = MongoTemporaryInstance.get_instance() + self.user_cleaner_meta_db = CacheDB(db_uri=self.tmp_db.uri, collection="skv_cache") + + def tearDown(self): + self.user_cleaner_meta_db._drop_whole_collection() diff --git a/src/eduid/userdb/user_cleaner/cache.py b/src/eduid/userdb/user_cleaner/cache.py new file mode 100644 index 000000000..41c5b0e53 --- /dev/null +++ b/src/eduid/userdb/user_cleaner/cache.py @@ -0,0 +1,44 @@ +from typing import Any, Mapping, Optional +from pydantic import BaseModel, Field +from datetime import datetime +from eduid.common.misc.timeutil import utc_now + +from eduid.userdb.db import TUserDbDocument +from eduid.userdb.user import User + + +class CacheUser(BaseModel): + eppn: str + created_ts: datetime = Field(default_factory=utc_now) + next_run_ts: Optional[int] = None + + def to_dict(self) -> TUserDbDocument: + """ + Convert Element to a dict in eduid format, that can be used to reconstruct the + Element later. + """ + data = self.dict(exclude_none=True) + + return TUserDbDocument(data) + + @classmethod + def from_dict(cls, data: Mapping[str, Any]) -> "CacheUser": + """Convert a dict to a Element object.""" + return cls(**data) + + def next_run_ts_iso8601(self) -> str: + """ + Convert the next_run_ts to ISO8601 format. + """ + if self.next_run_ts is None: + return "" + return datetime.utcfromtimestamp(self.next_run_ts).strftime("%Y-%m-%d %H:%M:%S") + + def from_user(self, data: User) -> "CacheUser": + """ + Convert a User object to a QueueUser object. + """ + queue_user = CacheUser( + eppn=data.eppn, + ) + return queue_user diff --git a/src/eduid/userdb/user_cleaner/cachedb.py b/src/eduid/userdb/user_cleaner/cachedb.py new file mode 100644 index 000000000..a6f3fd7e7 --- /dev/null +++ b/src/eduid/userdb/user_cleaner/cachedb.py @@ -0,0 +1,73 @@ +from abc import ABC +from time import time +import logging +from eduid.userdb.db import BaseDB, TUserDbDocument +from eduid.userdb.user import User +from eduid.userdb.user_cleaner.cache import CacheUser + +logger = logging.getLogger(__name__) + + +class CacheDB(BaseDB): + """Database class for the user cleaning database cache.""" + + def __init__(self, db_uri: str, collection: str, db_name: str = "eduid_user_cleaner"): + super().__init__(db_uri, db_name, collection) + indexes = {"unique-eppn": {"key": [("eppn", 1)], "unique": True}} + + self.setup_indexes(indexes) + + def save(self, queue_user: CacheUser) -> bool: + """Save a CacheUser object to the database.""" + if self.exists(queue_user.eppn): + logger.debug(f"User {queue_user.eppn} already exists in the queue") + return False + self._coll.insert_one(queue_user.to_dict()) + return True + + def exists(self, eppn: str) -> bool: + """Check if a user exists in the cache.""" + return self._coll.count_documents({"eppn": eppn}) > 0 + + def get_all(self) -> list[CacheUser]: + """Get all users from the cache.""" + res = self._coll.find({}) + return [CacheUser.from_dict(data=doc) for doc in res] + + def count(self) -> int: + """Count the number of users in the queue.""" + return self._coll.count_documents({}) + + def delete(self, eppn: str) -> None: + """delete one user from the cache.""" + self._coll.delete_one({"eppn": eppn}) + + def delete_all(self) -> None: + """Delete all users from the cache.""" + self._coll.delete_many({}) + + def populate( + self, + am_users: list[User], + periodicity: int, + ) -> None: + """Populate cache database with the user from AMDB.""" + + cache_size = len(am_users) + + periodicity_in_seconds = 22 * 60 * 60 * periodicity # strip 2 hours of each day in order to give us some slack + time_constant = int(periodicity_in_seconds / cache_size) + + next_run_ts = int(time()) + (60 * 60) # first process window starts in 1 hour, then the population is done + for am_user in am_users: + next_run_ts += time_constant + queue_user = CacheUser( + eppn=am_user.eppn, + next_run_ts=next_run_ts, + ) + + self.save(queue_user) + + def is_empty(self) -> bool: + """Check if the cache is empty.""" + return self.count() == 0 diff --git a/src/eduid/userdb/user_cleaner/meta.py b/src/eduid/userdb/user_cleaner/meta.py new file mode 100644 index 000000000..0b5d8dc28 --- /dev/null +++ b/src/eduid/userdb/user_cleaner/meta.py @@ -0,0 +1,27 @@ +from typing import Any, Mapping +from datetime import datetime +from pydantic import BaseModel, Field + +from eduid.common.misc.timeutil import utc_now +from eduid.userdb.db import TUserDbDocument +from eduid.userdb.meta import CleanerType + + +class Meta(BaseModel): + periodicity: int + cleaner_type: CleanerType + created_ts: datetime = Field(default_factory=utc_now) + + def to_dict(self) -> TUserDbDocument: + """ + Convert Element to a dict in eduid format, that can be used to reconstruct the + Element later. + """ + data = self.dict(exclude_none=True) + + return TUserDbDocument(data) + + @classmethod + def from_dict(cls, data: Mapping[str, Any]) -> "Meta": + """Convert a dict to a Element object.""" + return cls(**data) diff --git a/src/eduid/userdb/user_cleaner/metadb.py b/src/eduid/userdb/user_cleaner/metadb.py new file mode 100644 index 000000000..94a17d396 --- /dev/null +++ b/src/eduid/userdb/user_cleaner/metadb.py @@ -0,0 +1,30 @@ +from typing import Union +from eduid.userdb.db import BaseDB +from eduid.userdb.user_cleaner.meta import Meta +from eduid.userdb.meta import CleanerType + + +class MetaDB(BaseDB): + """Database class for the meta database.""" + + def __init__(self, db_uri: str, collection: str = "meta", db_name: str = "eduid_user_cleaner"): + super().__init__(db_uri, db_name, collection) + indexes = {"unique-eppn": {"key": [("worker_name", 1)], "unique": True}} + + self.setup_indexes(indexes) + + def save(self, doc: Meta) -> bool: + """Save och replace an existing meta document.""" + res = self._coll.replace_one({"cleaner_type": doc.cleaner_type}, doc.to_dict(), upsert=True) + return res.acknowledged + + def get(self, cleaner_type: CleanerType) -> Union[Meta, None]: + """Get a worker meta from Meta.""" + res = self._coll.find_one({"cleaner_type": cleaner_type}) + if res is None: + return None + return Meta.from_dict(data=res) + + def exist(self, cleaner_type: CleanerType) -> bool: + """Check if a user exists in the cache.""" + return self._coll.count_documents({"cleaner_type": cleaner_type}) > 0 diff --git a/src/eduid/workers/user_cleaner/app.py b/src/eduid/workers/user_cleaner/app.py index b8863d2a7..255249d77 100644 --- a/src/eduid/workers/user_cleaner/app.py +++ b/src/eduid/workers/user_cleaner/app.py @@ -1,4 +1,5 @@ from dataclasses import Field +from datetime import datetime import logging import signal import os @@ -7,8 +8,7 @@ from queue import Queue import time -from typing import Any, List, Optional, Dict -from pydantic import BaseModel +from typing import Any, Mapping, Optional, Union from pathlib import Path @@ -22,17 +22,20 @@ from eduid.common.stats import init_app_stats from eduid.workers.user_cleaner.config import UserCleanerConfig -from eduid.userdb.user import User +from eduid.userdb.user_cleaner.cachedb import CacheDB +from eduid.userdb.user_cleaner.cache import CacheUser +from eduid.userdb.user_cleaner.metadb import MetaDB +from eduid.userdb.user_cleaner.meta import Meta class WorkerBase(ABC): - """ - All workers base class, for collective functionality - """ + """All workers base class, for collective functionality""" - def __init__(self, cleaner_type: CleanerType, identity_type: IdentityType, test_config: Optional[Dict] = None): - signal.signal(signal.SIGINT, self.exit_gracefully) - signal.signal(signal.SIGTERM, self.exit_gracefully) + def __init__( + self, cleaner_type: CleanerType, identity_type: IdentityType, test_config: Optional[Mapping[str, Any]] = None + ): + signal.signal(signal.SIGINT, self._exit_gracefully) + signal.signal(signal.SIGTERM, self._exit_gracefully) self.config = load_config(typ=UserCleanerConfig, app_name="user_cleaner", ns="worker", test_config=test_config) super().__init__() @@ -50,28 +53,27 @@ def __init__(self, cleaner_type: CleanerType, identity_type: IdentityType, test_ self.logger.info(f"initialize worker {self.cleaner_type}") self.db = AmDB(db_uri=self.config.mongo_uri) + self.db_cache: CacheDB + self.db_meta: MetaDB self.shutdown_now = False - self.queue: "Queue[dict[str, Any]]" = Queue(maxsize=0) + self.worker_queue: "Queue[dict[str, Union[str,int]]]" = Queue(maxsize=0) self.queue_actual_size = 0 self.made_changes = 0 self.max_changes = 0.0 - self.healthy_path = "/tmp/healthy" - self.msg_relay = MsgRelay(self.config) self.amapi_client = AMAPIClient( amapi_url=self.config.amapi.url, auth_data=self.config.gnap_auth_data, ) - self.execution_delay = self.get_delay_time() self.logger.info(f"Starting worker {cleaner_type}") - def exit_gracefully(self, sig, frame) -> None: + def _exit_gracefully(self, sig, frame) -> None: """ Set variable shutdown_now to True when SIGTERM or SIGINT is triggered """ @@ -104,71 +106,58 @@ def _populate_max_changes(self) -> None: self.max_changes = self.queue_actual_size * self.config.change_quota def _make_unhealthy(self) -> None: - """ - Make Docker health status to unhealthy - """ - if os.path.exists(self.healthy_path): - os.remove(self.healthy_path) + """Make Docker health status to unhealthy""" + + if os.path.exists(self.config.healthy_path): + os.remove(self.config.healthy_path) def _make_healthy(self) -> None: - """ - Make Docker health status to healthy - """ - Path(self.healthy_path).touch() + """Make Docker health status to healthy""" - def _sleep(self, milliseconds: int) -> None: - """ - Sleep with an eye on self.shutdown_now variable, - then we do not need to wait for the complete sleep duration to finish in order to kill this app. - """ - self.logger.debug(f"Entering sleep cycle {milliseconds}") - index = 0 - while index != milliseconds: - index += 1 + Path(self.config.healthy_path).touch() + + def _sleep(self, seconds: int) -> None: + """Sleep with an eye on shutdown_now""" + count = 0 + while count < seconds * 1000: + count += 1 if self.shutdown_now: + self.logger.debug("Shutdown now, exiting sleep") return time.sleep(0.001) - def get_delay_time(self) -> int: - """ - Since a given dataset will differ in size and execution time, a calculated delay time has to be made. - example: time_to_clean_dataset = 10000 milliseconds, user_count = 10, minimum_delay = 1 milliseconds - --> 10000 / (10*1) => 1000 milliseconds/user document - each user document have to sleep for 1000 milliseconds to be done with in time (10000 milliseconds). - """ - user_count = self.db.get_verified_users_count(identity_type=self.identity_type) - time_per_user = self.config.time_to_clean_dataset / (user_count * (self.config.minimum_delay + 1)) - if time_per_user < self.config.minimum_delay: - self.logger.warning(f"execution will be faster then minimum execution time: {self.config.minimum_delay}") - return int(time_per_user) - - def _populate_queue(self, users: List[User]) -> None: - """Populate queue with user representation""" + def _wait(self, user: CacheUser) -> bool: + """Wait for user to run at next_run_ts with an eye on shutdown_now""" + if user.next_run_ts is None: + self.logger.debug(f"User {user.eppn} has no next_run_ts, skipping wait") + return False + self.logger.debug(f"Waiting for user {user.eppn} to run at {user.next_run_ts_iso8601()}") + while user.next_run_ts > int(time.time()) and not self.shutdown_now: + if self.shutdown_now: + return False + if user.next_run_ts < int(time.time()): + return True + time.sleep(0.01) + return False - for user in users: - queue_object = { - "eppn": user.eppn, - "nin": None, - } - if user.identities.nin is not None: - queue_object["nin"] = user.identities.nin.number - self.queue.put(queue_object) - self.logger.debug(f"enqueuing user: {user.eppn}") + def task_done(self, eppn: str) -> None: + """Mark a task as done""" + self.worker_queue.task_done() + self.db_cache.delete(eppn=eppn) - def enqueuing(self) -> None: - """Add users to queue for further processing""" + def _enqueuing_to_worker_queue(self) -> None: + """Populate worker queue with cached users""" - users = self.db.get_uncleaned_verified_users( - cleaned_type=self.cleaner_type, - identity_type=self.identity_type, - limit=10000000, - ) + users = self.db_cache.get_all() if len(users) == 0: - self.logger.warning(f"No users where enqueued") + self.logger.warning(f"No users where found in cache!") return - self._populate_queue(users=users) - self.queue_actual_size = self.queue.qsize() + for user in users: + self.worker_queue.put(user.to_dict()) + self.logger.debug(f"enqueuing user: {user.eppn}") + + self.queue_actual_size = self.worker_queue.qsize() self._populate_max_changes() self.made_changes = 0 diff --git a/src/eduid/workers/user_cleaner/config.py b/src/eduid/workers/user_cleaner/config.py index 097001109..ea28fdab5 100644 --- a/src/eduid/workers/user_cleaner/config.py +++ b/src/eduid/workers/user_cleaner/config.py @@ -17,14 +17,15 @@ class UserCleanerConfig(RootConfig, LoggingConfigMixin, MsgConfigMixin, StatsCon mongo_uri: str = "" dry_run: bool = True debug: bool + healthy_path: str = "/tmp/healthy" change_quota: float - # amount of time to clean dataset, value in milliseconds - time_to_clean_dataset: int = 2592000000 + # amount of time to clean dataset, value in days + periodicity: int = 30 - # minimum time to delay each execution in milliseconds - minimum_delay: int = 1000 + # minimum time to delay each execution in seconds + minimum_delay: int = 1 gnap_auth_data: GNAPClientAuthData amapi: AmAPIConfig diff --git a/src/eduid/workers/user_cleaner/testing/test_app.py b/src/eduid/workers/user_cleaner/testing/test_app.py index f8a4aaaaf..bf54b226d 100644 --- a/src/eduid/workers/user_cleaner/testing/test_app.py +++ b/src/eduid/workers/user_cleaner/testing/test_app.py @@ -1,6 +1,6 @@ from typing import Any from eduid.common.testing_base import CommonTestCase -from eduid.userdb.fixtures.users import new_user_example, new_user_example2, new_unverified_user_example +from eduid.userdb.fixtures.users import UserFixtures from eduid.workers.user_cleaner.app import init_worker_base from eduid.userdb.identity import IdentityType from eduid.userdb.meta import CleanerType @@ -8,7 +8,12 @@ class AppTest(CommonTestCase): def setUp(self, *args, **kwargs): - super().setUp(am_users=[new_user_example, new_user_example2, new_unverified_user_example]) + super().setUp( + am_users=[ + UserFixtures().new_user_example, + UserFixtures().new_unverified_user_example, + ] + ) self.app = init_worker_base( cleaner_type=CleanerType.SKV, identity_type=IdentityType.NIN, test_config=self._get_config() @@ -37,15 +42,17 @@ def _get_config(self) -> dict[str, Any]: } def test_populate_queue(self): - users = [new_user_example, new_user_example2, new_unverified_user_example] + users = [UserFixtures().mocked_user_standard, UserFixtures().new_user_example] self.app._populate_queue(users=users) got_user1 = self.app.queue.get() - assert got_user1["eppn"] == new_user_example.eppn - assert got_user1["nin"] == new_user_example.identities.nin.number + assert got_user1["eppn"] == UserFixtures().mocked_user_standard.eppn + assert got_user1["nin"] == UserFixtures().mocked_user_standard.identities.nin.number got_user2 = self.app.queue.get() - assert got_user2["eppn"] == new_user_example2.eppn - assert got_user2["nin"] == new_user_example2.identities.nin.number + assert got_user2["eppn"] == UserFixtures().new_user_example.eppn # new_user_example2.eppn + assert ( + got_user2["nin"] == UserFixtures().new_user_example.identities.nin.number + ) # new_user_example2.identities.nin.number def test_get_delay_time(self): got = self.app.get_delay_time() diff --git a/src/eduid/workers/user_cleaner/testing/test_skv.py b/src/eduid/workers/user_cleaner/testing/test_skv.py index 517c2b991..a6793d98a 100644 --- a/src/eduid/workers/user_cleaner/testing/test_skv.py +++ b/src/eduid/workers/user_cleaner/testing/test_skv.py @@ -1,6 +1,6 @@ from typing import Any, Dict from eduid.common.testing_base import CommonTestCase -from eduid.userdb.fixtures.users import new_user_example, new_user_example2, new_unverified_user_example +from eduid.userdb.fixtures.users import UserFixtures from eduid.workers.user_cleaner.workers.skv import init_skv_worker from eduid.userdb.identity import IdentityType from eduid.userdb.meta import CleanerType @@ -9,7 +9,12 @@ class WorkerTest(CommonTestCase): def setUp(self, *args, **kwargs): - super().setUp(am_users=[new_user_example, new_user_example2, new_unverified_user_example]) + super().setUp( + am_users=[ + UserFixtures().new_user_example, + UserFixtures().new_unverified_user_example, + ] + ) self.skv = init_skv_worker(test_config=self._get_config()) diff --git a/src/eduid/workers/user_cleaner/workers/skv.py b/src/eduid/workers/user_cleaner/workers/skv.py index 5c620ad8f..14a5947c8 100644 --- a/src/eduid/workers/user_cleaner/workers/skv.py +++ b/src/eduid/workers/user_cleaner/workers/skv.py @@ -1,27 +1,32 @@ -import time -from typing import Any, Optional, Dict +from typing import Any, Mapping, Optional +from time import time from eduid.common.models.amapi_user import UserUpdateNameRequest, Reason, Source from eduid.common.rpc.msg_relay import NavetData from eduid.common.utils import set_user_names_from_official_address -from eduid.userdb import User from eduid.userdb.identity import IdentityType from eduid.userdb.meta import CleanerType from eduid.workers.user_cleaner.app import WorkerBase +from eduid.userdb.user_cleaner.cache import CacheUser +from eduid.userdb.user_cleaner.cachedb import CacheDB +from eduid.userdb.user_cleaner.metadb import MetaDB +from eduid.userdb.user_cleaner.meta import Meta class SKV(WorkerBase): """Worker class for Skatteverket""" - def __init__(self, test_config: Optional[Dict] = None): + def __init__(self, test_config: Optional[Mapping[str, Any]] = None): super().__init__(cleaner_type=CleanerType.SKV, identity_type=IdentityType.NIN, test_config=test_config) + self.db_cache = CacheDB(db_uri=self.config.mongo_uri, collection="skv_cache") + self.db_meta = MetaDB(db_uri=self.config.mongo_uri) - def update_name(self, queue_user: dict[str, Any], navet_data: NavetData) -> None: + def update_name(self, queue_user: CacheUser, navet_data: NavetData) -> None: """ Update-function for updating "given name" and "surname" from skatteverket, and creates new "display name" if needed. """ - db_user = self.db.get_user_by_eppn(eppn=queue_user["eppn"]) + db_user = self.db.get_user_by_eppn(eppn=queue_user.eppn) if db_user is None: return @@ -66,27 +71,55 @@ def update_name(self, queue_user: dict[str, Any], navet_data: NavetData) -> None def run(self): """skatteverket worker entry point""" + if not self.db_meta.exist(cleaner_type=self.cleaner_type): + # if self.db_meta.get(self.cleaner_type) is None: + meta = Meta( + periodicity=self.config.periodicity, + cleaner_type=self.cleaner_type, + ) + self.db_meta.save(doc=meta) + + meta = self.db_meta.get(self.cleaner_type) + if meta.periodicity != self.config.periodicity: + meta.periodicity = self.config.periodicity + self.db_meta.save(doc=meta) + + self.db_cache.delete_all() + while not self.shutdown_now: if self._is_quota_reached(): self.logger.warning(f"worker skatteverket has reached its change_quota, sleep for 20 seconds") self._make_unhealthy() - self._sleep(milliseconds=20000) + self._sleep(seconds=20000) self._make_healthy() - if self.queue.empty(): - self.enqueuing() - queue_user = self.queue.get() - navet_data = self.msg_relay.get_all_navet_data(nin=queue_user["nin"]) + if self.db_cache.is_empty(): + users = self.db.get_uncleaned_verified_users( + cleaned_type=self.cleaner_type, + identity_type=self.identity_type, + limit=10000000, + ) + self.db_cache.populate(am_users=users, periodicity=self.config.periodicity) + + if self.worker_queue.empty(): + self._enqueuing_to_worker_queue() + queue_user = CacheUser.from_dict(self.worker_queue.get()) - self.update_name(queue_user=queue_user, navet_data=navet_data) + if self._wait(user=queue_user): + db_user = self.db.get_user_by_eppn(eppn=queue_user.eppn) + if db_user.identities.nin is not None and db_user.identities.nin.is_verified: + navet_data = self.msg_relay.get_all_navet_data(nin=db_user.identities.nin.number) - self.queue.task_done() + # update name if needed against navet. + self.update_name(queue_user=queue_user, navet_data=navet_data) - self._sleep(milliseconds=self.execution_delay) + self.task_done(eppn=queue_user.eppn) + else: + self.worker_queue.put(queue_user.to_dict()) -def init_skv_worker(test_config: Optional[dict[str, Any]] = None) -> SKV: +def init_skv_worker(test_config: Optional[Mapping[str, Any]] = None) -> SKV: """Initialize skv (skatteverket) worker""" worker = SKV(test_config=test_config) return worker From a1a790e8b16747ada5602a1a664204781b8d274d Mon Sep 17 00:00:00 2001 From: masv3971 Date: Sun, 29 Jan 2023 15:38:49 +0100 Subject: [PATCH 23/49] Change auth url due to change. --- src/eduid/userdb/user_cleaner/cache.py | 2 +- src/eduid/webapp/signup/tests/test_app.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/eduid/userdb/user_cleaner/cache.py b/src/eduid/userdb/user_cleaner/cache.py index 41c5b0e53..1c5a400db 100644 --- a/src/eduid/userdb/user_cleaner/cache.py +++ b/src/eduid/userdb/user_cleaner/cache.py @@ -36,7 +36,7 @@ def next_run_ts_iso8601(self) -> str: def from_user(self, data: User) -> "CacheUser": """ - Convert a User object to a QueueUser object. + Convert a User object to a CacheUser object. """ queue_user = CacheUser( eppn=data.eppn, diff --git a/src/eduid/webapp/signup/tests/test_app.py b/src/eduid/webapp/signup/tests/test_app.py index 24132e764..004e84338 100644 --- a/src/eduid/webapp/signup/tests/test_app.py +++ b/src/eduid/webapp/signup/tests/test_app.py @@ -81,7 +81,7 @@ def update_config(self, config: dict[str, Any]) -> dict[str, Any]: "environment": "dev", "scim_api_url": "http://localhost/scim/", "gnap_auth_data": { - "authn_server_url": "http://localhost/auth/", + "auth_server_url": "http://localhost/auth/", "key_name": "app_name", "client_jwk": JWK.generate(kid="testkey", kty="EC", size=256).export(as_dict=True), }, From 86aef2c567c50671afc9b1db258dfca81f002fb6 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Mon, 30 Jan 2023 10:26:09 +0100 Subject: [PATCH 24/49] Lost method replaced. --- src/eduid/common/rpc/msg_relay.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/eduid/common/rpc/msg_relay.py b/src/eduid/common/rpc/msg_relay.py index c4c44b9a8..582cd05e2 100644 --- a/src/eduid/common/rpc/msg_relay.py +++ b/src/eduid/common/rpc/msg_relay.py @@ -122,17 +122,20 @@ def is_deregistered(self) -> bool: return bool(self.deregistration_information.cause_code or self.deregistration_information.date) -class NavetData(NavetModelConfig): - case_information: CaseInformation = Field(alias="CaseInformation") - person: Person = Field(alias="Person") - - # Used to parse data from get_postal_address class FullPostalAddress(NavetModelConfig): name: Name = Field(default_factory=Name, alias="Name") official_address: OfficialAddress = Field(default_factory=OfficialAddress, alias="OfficialAddress") +class NavetData(NavetModelConfig): + case_information: CaseInformation = Field(alias="CaseInformation") + person: Person = Field(alias="Person") + + def get_full_postal_address(self) -> FullPostalAddress: + return FullPostalAddress(name=self.person.name, official_address=self.person.postal_addresses) + + class MsgRelay: """ This is the interface to the RPC task to fetch data from NAVET, and to send SMSs. From c7fa42fc11e11510ec3a38a16776366664f58d2b Mon Sep 17 00:00:00 2001 From: masv3971 Date: Mon, 30 Jan 2023 10:27:15 +0100 Subject: [PATCH 25/49] Correct comment. --- src/eduid/workers/user_cleaner/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/eduid/workers/user_cleaner/app.py b/src/eduid/workers/user_cleaner/app.py index 255249d77..b23671d90 100644 --- a/src/eduid/workers/user_cleaner/app.py +++ b/src/eduid/workers/user_cleaner/app.py @@ -146,7 +146,7 @@ def task_done(self, eppn: str) -> None: self.db_cache.delete(eppn=eppn) def _enqueuing_to_worker_queue(self) -> None: - """Populate worker queue with cached users""" + """Populate worker queue with users from db_cache.""" users = self.db_cache.get_all() if len(users) == 0: From ace25cecb91045d5871d517f2b4f9528598e1692 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Mon, 30 Jan 2023 10:28:09 +0100 Subject: [PATCH 26/49] Remove test cases from app, since they should not be in app testing space. --- .../workers/user_cleaner/testing/test_app.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/src/eduid/workers/user_cleaner/testing/test_app.py b/src/eduid/workers/user_cleaner/testing/test_app.py index bf54b226d..58b3e292a 100644 --- a/src/eduid/workers/user_cleaner/testing/test_app.py +++ b/src/eduid/workers/user_cleaner/testing/test_app.py @@ -40,20 +40,3 @@ def _get_config(self) -> dict[str, Any]: "access": [""], # List[Union[str, Access]] = Field(default_factory=list) }, } - - def test_populate_queue(self): - users = [UserFixtures().mocked_user_standard, UserFixtures().new_user_example] - self.app._populate_queue(users=users) - got_user1 = self.app.queue.get() - assert got_user1["eppn"] == UserFixtures().mocked_user_standard.eppn - assert got_user1["nin"] == UserFixtures().mocked_user_standard.identities.nin.number - - got_user2 = self.app.queue.get() - assert got_user2["eppn"] == UserFixtures().new_user_example.eppn # new_user_example2.eppn - assert ( - got_user2["nin"] == UserFixtures().new_user_example.identities.nin.number - ) # new_user_example2.identities.nin.number - - def test_get_delay_time(self): - got = self.app.get_delay_time() - assert got == 1294705 From d7d29b05b26d8ec33bbc59a894172a45060ef425 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Tue, 31 Jan 2023 11:53:12 +0100 Subject: [PATCH 27/49] Add test and add working fastapi client mock. --- .../common/clients/amapi_client/testing.py | 25 +++++-- .../workers/user_cleaner/testing/test_skv.py | 73 +++++++++++-------- 2 files changed, 64 insertions(+), 34 deletions(-) diff --git a/src/eduid/common/clients/amapi_client/testing.py b/src/eduid/common/clients/amapi_client/testing.py index 1ea55fc25..5f499b240 100644 --- a/src/eduid/common/clients/amapi_client/testing.py +++ b/src/eduid/common/clients/amapi_client/testing.py @@ -1,19 +1,34 @@ -from typing import Optional +from typing import Mapping, Optional import respx +from httpx import Response, Request +from eduid.common.models.amapi_user import UserUpdateNameRequest from eduid.common.clients.gnap_client.testing import MockedSyncAuthAPIMixin +from eduid.userdb.userdb import AmDB class MockedAMAPIMixin(MockedSyncAuthAPIMixin): - def start_mock_amapi(self, access_token_value: Optional[str] = None): + def start_mock_amapi(self, central_user_db: Optional[AmDB] = None, access_token_value: Optional[str] = None): self.start_mock_auth_api(access_token_value=access_token_value) - - self.mocked_users = respx.mock(base_url="http://localhost", assert_all_called=False) + self.central_user_db = central_user_db + self.mocked_users = respx.mock(base_url="http://localhost/amapi", assert_all_called=False) put_users_name_route = self.mocked_users.put( url="/users/hubba-bubba/name", name="put_users_name_request", ) - put_users_name_route.pass_through() + put_users_name_route.mock(side_effect=self._save) self.mocked_users.start() self.addCleanup(self.mocked_users.stop) # type: ignore + + def _save(self, request: Request) -> Response: + if self.central_user_db is None: + raise ValueError("save user side affect was called but self.amdb is None") + mock_request = UserUpdateNameRequest.parse_raw(request.content) + + db_user = self.central_user_db.get_user_by_eppn("hubba-bubba") + db_user.given_name = mock_request.given_name + db_user.surname = mock_request.surname + db_user.display_name = mock_request.display_name + self.central_user_db.save(user=db_user) + return Response(200, text='{"status": "true"}') diff --git a/src/eduid/workers/user_cleaner/testing/test_skv.py b/src/eduid/workers/user_cleaner/testing/test_skv.py index a6793d98a..523311d9e 100644 --- a/src/eduid/workers/user_cleaner/testing/test_skv.py +++ b/src/eduid/workers/user_cleaner/testing/test_skv.py @@ -1,13 +1,20 @@ from typing import Any, Dict +from unittest.mock import MagicMock, patch +from eduid.common.clients.amapi_client.testing import MockedAMAPIMixin from eduid.common.testing_base import CommonTestCase from eduid.userdb.fixtures.users import UserFixtures from eduid.workers.user_cleaner.workers.skv import init_skv_worker -from eduid.userdb.identity import IdentityType -from eduid.userdb.meta import CleanerType -from eduid.common.rpc.msg_relay import Name, NavetData, OfficialAddress, Person, PostalAddresses +from eduid.common.rpc.msg_relay import ( + NavetData, +) +import pytest +from jwcrypto.jwk import JWK +from eduid.workers.msg.tasks import MessageSender +from eduid.userdb.user_cleaner.cache import CacheUser -class WorkerTest(CommonTestCase): + +class WorkerTest(CommonTestCase, MockedAMAPIMixin): def setUp(self, *args, **kwargs): super().setUp( am_users=[ @@ -15,6 +22,7 @@ def setUp(self, *args, **kwargs): UserFixtures().new_unverified_user_example, ] ) + self.start_mock_amapi(central_user_db=self.amdb) self.skv = init_skv_worker(test_config=self._get_config()) @@ -25,39 +33,46 @@ def _get_config(self) -> Dict[str, Any]: "minimum_delay": 1, "time_to_clean_dataset": 30, "debug": True, + "dry_run": False, "change_quota": 2, + "celery": {}, "amapi": { - "url": "", + "url": "http://localhost/amapi/", "tls_verify": False, }, - "celery": {}, "gnap_auth_data": { - "auth_server_url": "", - "auth_server_verify": True, - "key_name": "", - "client_jwk": {}, - "access": [""], # List[Union[str, Access]] = Field(default_factory=list) + "auth_server_url": "http://localhost/auth/", + "key_name": "app_name", + "client_jwk": JWK.generate(kid="testkey", kty="EC", size=256).export(as_dict=True), }, } - def test_update_name(self): - queue_user = { - "eppn": new_user_example.eppn, - "nin": new_user_example.identities.nin.number, - } - navet_data = NavetData( - person=Person( - name=Name( - # given_name_marking: Optional[str] = Field(default=None, alias="GivenNameMarking") - given_name="test_given_name", - # middle_name="test_middle_name": Optional[str] = Field(default=None, alias="MiddleName") - surname="test_surname", - ), - ), + @staticmethod + def _get_all_navet_data(): + return NavetData.parse_obj(MessageSender.get_devel_all_navet_data()) + + @patch("eduid.common.rpc.msg_relay.MsgRelay.get_all_navet_data") + def test_update_name(self, mock_get_all_navet_data: MagicMock): + mock_get_all_navet_data.return_value = self._get_all_navet_data() + cache_user = CacheUser( + eppn=UserFixtures().new_user_example.eppn, ) - self.skv.update_name(queue_user=queue_user, navet_data=navet_data) + navet_data = self.skv.msg_relay.get_all_navet_data(nin="197801011234") + self.skv.update_name(queue_user=cache_user, navet_data=navet_data) + + got = self.skv.db.get_user_by_eppn(UserFixtures().new_user_example.eppn) - got = self.skv.db.get_user_by_eppn(new_user_example.eppn) + assert got.given_name == "Testaren Test" + assert got.surname == "Testsson" + + @pytest.mark.skip(reason="Not implemented yet") + def test_populate_queue(self): + self.skv.db_cache.populate( + am_users=[UserFixtures().mocked_user_standard, UserFixtures().new_user_example], periodicity=1 + ) + self.skv._enqueuing_to_worker_queue() + got_user1 = self.skv.worker_queue.get() + assert got_user1["eppn"] == UserFixtures().mocked_user_standard.eppn - assert got.given_name == "test_given_name" - assert got.surname == "test_surname" + got_user2 = self.skv.worker_queue.get() + assert got_user2["eppn"] == UserFixtures().new_user_example.eppn # new_user_example2.eppn From 2df3f6b6290182e642944fc5e2b5444acec2b604 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Tue, 31 Jan 2023 11:54:33 +0100 Subject: [PATCH 28/49] Rename queue_user to cache_user in order to bring ORDER. --- src/eduid/workers/user_cleaner/workers/skv.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/eduid/workers/user_cleaner/workers/skv.py b/src/eduid/workers/user_cleaner/workers/skv.py index 14a5947c8..ec16c9842 100644 --- a/src/eduid/workers/user_cleaner/workers/skv.py +++ b/src/eduid/workers/user_cleaner/workers/skv.py @@ -104,19 +104,19 @@ def run(self): if self.worker_queue.empty(): self._enqueuing_to_worker_queue() - queue_user = CacheUser.from_dict(self.worker_queue.get()) + cache_user = CacheUser.from_dict(self.worker_queue.get()) - if self._wait(user=queue_user): - db_user = self.db.get_user_by_eppn(eppn=queue_user.eppn) + if self._wait(user=cache_user): + db_user = self.db.get_user_by_eppn(eppn=cache_user.eppn) if db_user.identities.nin is not None and db_user.identities.nin.is_verified: navet_data = self.msg_relay.get_all_navet_data(nin=db_user.identities.nin.number) # update name if needed against navet. - self.update_name(queue_user=queue_user, navet_data=navet_data) + self.update_name(queue_user=cache_user, navet_data=navet_data) - self.task_done(eppn=queue_user.eppn) + self.task_done(eppn=cache_user.eppn) else: - self.worker_queue.put(queue_user.to_dict()) + self.worker_queue.put(cache_user.to_dict()) def init_skv_worker(test_config: Optional[Mapping[str, Any]] = None) -> SKV: From f8ab1296d53a060538de0c828c255a9c5a0c8694 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Tue, 31 Jan 2023 15:51:11 +0100 Subject: [PATCH 29/49] Use TUserDbDocument instead of dict. --- src/eduid/userdb/userdb.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/eduid/userdb/userdb.py b/src/eduid/userdb/userdb.py index 36f6bd3f9..4ba6968ec 100644 --- a/src/eduid/userdb/userdb.py +++ b/src/eduid/userdb/userdb.py @@ -401,13 +401,14 @@ def save(self, user: User) -> UserSaveResult: # type: ignore[override] return UserSaveResult(success=bool(result)) - def unverify_mail_aliases(self, user_id: ObjectId, mail_aliases: Optional[list[dict[str, Any]]]) -> int: + def unverify_mail_aliases(self, user_id: ObjectId, mail_aliases: Optional[list[TUserDbDocument]]) -> int: + count = 0 if mail_aliases is None: logger.debug(f"No mailAliases to check duplicates against for user {user_id}.") return count # Get the verified mail addresses from attributes - verified_mail_aliases = [alias["email"] for alias in mail_aliases if alias.get("verified") is True] + verified_mail_aliases = [alias["email"] for alias in mail_aliases if alias["verified"] is True] for email in verified_mail_aliases: try: for user in self.get_users_by_mail(email): @@ -431,7 +432,7 @@ def unverify_mail_aliases(self, user_id: ObjectId, mail_aliases: Optional[list[d pass return count - def unverify_phones(self, user_id: ObjectId, phones: list[dict[str, Any]]) -> int: + def unverify_phones(self, user_id: ObjectId, phones: Optional[list[TUserDbDocument]]) -> int: count = 0 if phones is None: logger.debug(f"No phones to check duplicates against for user {user_id}.") From 6f18a76c035c10a0a3bab6aa42eae253faac4245 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Tue, 31 Jan 2023 15:52:20 +0100 Subject: [PATCH 30/49] Add routes for amapi_client mock. --- .../common/clients/amapi_client/testing.py | 74 +++++++++++++++++-- 1 file changed, 66 insertions(+), 8 deletions(-) diff --git a/src/eduid/common/clients/amapi_client/testing.py b/src/eduid/common/clients/amapi_client/testing.py index 5f499b240..23084a2c9 100644 --- a/src/eduid/common/clients/amapi_client/testing.py +++ b/src/eduid/common/clients/amapi_client/testing.py @@ -2,10 +2,19 @@ import respx from httpx import Response, Request -from eduid.common.models.amapi_user import UserUpdateNameRequest +from eduid.common.misc.timeutil import utc_now +from eduid.common.models.amapi_user import ( + UserUpdateEmailRequest, + UserUpdateLanguageRequest, + UserUpdateNameRequest, + UserUpdatePhoneRequest, +) from eduid.common.clients.gnap_client.testing import MockedSyncAuthAPIMixin from eduid.userdb.userdb import AmDB +from eduid.userdb.mail import MailAddressList +from eduid.userdb.phone import PhoneNumberList +from eduid.userdb.user import User class MockedAMAPIMixin(MockedSyncAuthAPIMixin): @@ -13,22 +22,71 @@ def start_mock_amapi(self, central_user_db: Optional[AmDB] = None, access_token_ self.start_mock_auth_api(access_token_value=access_token_value) self.central_user_db = central_user_db self.mocked_users = respx.mock(base_url="http://localhost/amapi", assert_all_called=False) - put_users_name_route = self.mocked_users.put( - url="/users/hubba-bubba/name", - name="put_users_name_request", + + self.mocked_users.put(url="/users/hubba-bubba/name", name="200_put_name").mock( + side_effect=self._side_effect_update_name + ) + self.mocked_users.put(url="/users/hubba-bubba/email", name="200_put_email").mock( + side_effect=self._side_effect_update_email + ) + self.mocked_users.put(url="/users/hubba-bubba/language", name="200_put_language").mock( + side_effect=self._side_effect_update_language ) - put_users_name_route.mock(side_effect=self._save) + self.mocked_users.start() self.addCleanup(self.mocked_users.stop) # type: ignore - def _save(self, request: Request) -> Response: + def _side_effect_update(self, func_name: str) -> User: if self.central_user_db is None: - raise ValueError("save user side affect was called but self.amdb is None") + raise ValueError(f"side affect '{func_name}' was called but self.amdb is None") + return self.central_user_db.get_user_by_eppn("hubba-bubba") + + def _side_effect_update_name(self, request: Request) -> Response: + db_user = self._side_effect_update(func_name="_side_effect_update_name") + assert self.central_user_db is not None mock_request = UserUpdateNameRequest.parse_raw(request.content) - db_user = self.central_user_db.get_user_by_eppn("hubba-bubba") db_user.given_name = mock_request.given_name db_user.surname = mock_request.surname db_user.display_name = mock_request.display_name self.central_user_db.save(user=db_user) return Response(200, text='{"status": "true"}') + + def _side_effect_update_email(self, request: Request) -> Response: + db_user = self._side_effect_update(func_name="_side_effect_update_email") + assert self.central_user_db is not None + mock_request = UserUpdateEmailRequest.parse_raw(request.content) + + mail_addresses = [mail.to_dict() for mail in mock_request.mail_addresses] + db_user = self.central_user_db.get_user_by_eppn("hubba-bubba") + self.central_user_db.unverify_mail_aliases(user_id=db_user.user_id, mail_aliases=mail_addresses) + + db_user.mail_addresses = MailAddressList(elements=mock_request.mail_addresses) + self.central_user_db.save(user=db_user) + return Response(200, text='{"status": "true"}') + + def _side_effect_update_language(self, request: Request) -> Response: + db_user = self._side_effect_update(func_name="_side_effect_update_language") + assert self.central_user_db is not None + mock_request = UserUpdateLanguageRequest.parse_raw(request.content) + + db_user.language = mock_request.language + self.central_user_db.save(user=db_user) + return Response(200, text='{"status": "true"}') + + def _side_effect_update_phone(self, request: Request) -> Response: + db_user = self._side_effect_update(func_name="_side_effect_update_phone") + assert self.central_user_db is not None + mock_request = UserUpdatePhoneRequest.parse_raw(request.content) + + phones = [phone.to_dict() for phone in mock_request.phone_numbers] + self.central_user_db.unverify_phones(user_id=db_user.user_id, phones=phones) + db_user.phone_numbers = PhoneNumberList(elements=mock_request.phone_numbers) + return Response(200, text='{"status": "true"}') + + def _side_effect_update_terminate(self, request: Request) -> Response: + db_user = self._side_effect_update(func_name="_side_effect_update_terminate") + assert self.central_user_db is not None + + db_user.terminated = utc_now() + return Response(200, text='{"status": "true"}') From 38793396f21ae1f2e53f41e0b5702f06fdce1431 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Tue, 31 Jan 2023 16:01:23 +0100 Subject: [PATCH 31/49] Since almost nothing load in app, it's more efficient to test in skv. --- .../workers/user_cleaner/testing/test_app.py | 42 ------------------- 1 file changed, 42 deletions(-) delete mode 100644 src/eduid/workers/user_cleaner/testing/test_app.py diff --git a/src/eduid/workers/user_cleaner/testing/test_app.py b/src/eduid/workers/user_cleaner/testing/test_app.py deleted file mode 100644 index 58b3e292a..000000000 --- a/src/eduid/workers/user_cleaner/testing/test_app.py +++ /dev/null @@ -1,42 +0,0 @@ -from typing import Any -from eduid.common.testing_base import CommonTestCase -from eduid.userdb.fixtures.users import UserFixtures -from eduid.workers.user_cleaner.app import init_worker_base -from eduid.userdb.identity import IdentityType -from eduid.userdb.meta import CleanerType - - -class AppTest(CommonTestCase): - def setUp(self, *args, **kwargs): - super().setUp( - am_users=[ - UserFixtures().new_user_example, - UserFixtures().new_unverified_user_example, - ] - ) - - self.app = init_worker_base( - cleaner_type=CleanerType.SKV, identity_type=IdentityType.NIN, test_config=self._get_config() - ) - - def _get_config(self) -> dict[str, Any]: - return { - "app_name": "test", - "mongo_uri": self.settings["mongo_uri"], - "minimum_delay": 1000, - "time_to_clean_dataset": 2592000000, - "debug": True, - "change_quota": 2, - "amapi": { - "url": "", - "tls_verify": False, - }, - "celery": {}, - "gnap_auth_data": { - "auth_server_url": "", - "auth_server_verify": True, - "key_name": "", - "client_jwk": {}, - "access": [""], # List[Union[str, Access]] = Field(default_factory=list) - }, - } From d21f3b67dadc3c875a3483218dfeedee73d85a5f Mon Sep 17 00:00:00 2001 From: masv3971 Date: Tue, 31 Jan 2023 17:24:54 +0100 Subject: [PATCH 32/49] Fix typing issue. --- src/eduid/workers/user_cleaner/workers/skv.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/eduid/workers/user_cleaner/workers/skv.py b/src/eduid/workers/user_cleaner/workers/skv.py index ec16c9842..fccb34de6 100644 --- a/src/eduid/workers/user_cleaner/workers/skv.py +++ b/src/eduid/workers/user_cleaner/workers/skv.py @@ -72,17 +72,17 @@ def update_name(self, queue_user: CacheUser, navet_data: NavetData) -> None: def run(self): """skatteverket worker entry point""" if not self.db_meta.exist(cleaner_type=self.cleaner_type): - # if self.db_meta.get(self.cleaner_type) is None: meta = Meta( periodicity=self.config.periodicity, cleaner_type=self.cleaner_type, ) self.db_meta.save(doc=meta) - meta = self.db_meta.get(self.cleaner_type) - if meta.periodicity != self.config.periodicity: - meta.periodicity = self.config.periodicity - self.db_meta.save(doc=meta) + meta_new = self.db_meta.get(cleaner_type=self.cleaner_type) + assert meta_new is not None + if meta_new.periodicity != self.config.periodicity: + meta_new.periodicity = self.config.periodicity + self.db_meta.save(doc=meta_new) self.db_cache.delete_all() From 87fbf3001a490994a73e9ad21a86ec4eb5e43ede Mon Sep 17 00:00:00 2001 From: masv3971 Date: Tue, 31 Jan 2023 17:30:22 +0100 Subject: [PATCH 33/49] Add queue test. --- src/eduid/workers/user_cleaner/testing/test_skv.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/eduid/workers/user_cleaner/testing/test_skv.py b/src/eduid/workers/user_cleaner/testing/test_skv.py index 523311d9e..50b664c3a 100644 --- a/src/eduid/workers/user_cleaner/testing/test_skv.py +++ b/src/eduid/workers/user_cleaner/testing/test_skv.py @@ -65,14 +65,12 @@ def test_update_name(self, mock_get_all_navet_data: MagicMock): assert got.given_name == "Testaren Test" assert got.surname == "Testsson" - @pytest.mark.skip(reason="Not implemented yet") def test_populate_queue(self): - self.skv.db_cache.populate( - am_users=[UserFixtures().mocked_user_standard, UserFixtures().new_user_example], periodicity=1 - ) + uf = UserFixtures() + self.skv.db_cache.populate(am_users=[uf.new_user_example, uf.new_unverified_user_example], periodicity=1) self.skv._enqueuing_to_worker_queue() got_user1 = self.skv.worker_queue.get() - assert got_user1["eppn"] == UserFixtures().mocked_user_standard.eppn + assert got_user1["eppn"] == uf.new_user_example.eppn got_user2 = self.skv.worker_queue.get() - assert got_user2["eppn"] == UserFixtures().new_user_example.eppn # new_user_example2.eppn + assert got_user2["eppn"] == uf.new_unverified_user_example.eppn From 9bc8af6b23518a1a54c73030217ff1fc46a33fcb Mon Sep 17 00:00:00 2001 From: masv3971 Date: Wed, 1 Feb 2023 12:06:33 +0100 Subject: [PATCH 34/49] Change to __future__ annotations. --- src/eduid/userdb/user_cleaner/cache.py | 6 ++++-- src/eduid/userdb/user_cleaner/meta.py | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/eduid/userdb/user_cleaner/cache.py b/src/eduid/userdb/user_cleaner/cache.py index 1c5a400db..8659e88ef 100644 --- a/src/eduid/userdb/user_cleaner/cache.py +++ b/src/eduid/userdb/user_cleaner/cache.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from typing import Any, Mapping, Optional from pydantic import BaseModel, Field from datetime import datetime @@ -22,7 +24,7 @@ def to_dict(self) -> TUserDbDocument: return TUserDbDocument(data) @classmethod - def from_dict(cls, data: Mapping[str, Any]) -> "CacheUser": + def from_dict(cls, data: Mapping[str, Any]) -> CacheUser: """Convert a dict to a Element object.""" return cls(**data) @@ -34,7 +36,7 @@ def next_run_ts_iso8601(self) -> str: return "" return datetime.utcfromtimestamp(self.next_run_ts).strftime("%Y-%m-%d %H:%M:%S") - def from_user(self, data: User) -> "CacheUser": + def from_user(self, data: User) -> CacheUser: """ Convert a User object to a CacheUser object. """ diff --git a/src/eduid/userdb/user_cleaner/meta.py b/src/eduid/userdb/user_cleaner/meta.py index 0b5d8dc28..6357c7621 100644 --- a/src/eduid/userdb/user_cleaner/meta.py +++ b/src/eduid/userdb/user_cleaner/meta.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from typing import Any, Mapping from datetime import datetime from pydantic import BaseModel, Field @@ -22,6 +24,6 @@ def to_dict(self) -> TUserDbDocument: return TUserDbDocument(data) @classmethod - def from_dict(cls, data: Mapping[str, Any]) -> "Meta": + def from_dict(cls, data: Mapping[str, Any]) -> Meta: """Convert a dict to a Element object.""" return cls(**data) From 606280992ddfcfed24f61794031de92c456cdb46 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Wed, 1 Feb 2023 12:08:43 +0100 Subject: [PATCH 35/49] Change to use isoformat instead of strfrime. --- src/eduid/userdb/user_cleaner/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/eduid/userdb/user_cleaner/cache.py b/src/eduid/userdb/user_cleaner/cache.py index 8659e88ef..3ec4d3fe8 100644 --- a/src/eduid/userdb/user_cleaner/cache.py +++ b/src/eduid/userdb/user_cleaner/cache.py @@ -34,7 +34,7 @@ def next_run_ts_iso8601(self) -> str: """ if self.next_run_ts is None: return "" - return datetime.utcfromtimestamp(self.next_run_ts).strftime("%Y-%m-%d %H:%M:%S") + return datetime.utcfromtimestamp(self.next_run_ts).isoformat() def from_user(self, data: User) -> CacheUser: """ From 3b0072a9b466e0065e0a570c9dc8104f8fddfb07 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Wed, 1 Feb 2023 12:11:54 +0100 Subject: [PATCH 36/49] Rename queue to cache due to legacy. --- src/eduid/userdb/user_cleaner/cache.py | 4 ++-- src/eduid/userdb/user_cleaner/cachedb.py | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/eduid/userdb/user_cleaner/cache.py b/src/eduid/userdb/user_cleaner/cache.py index 3ec4d3fe8..6aeb755ff 100644 --- a/src/eduid/userdb/user_cleaner/cache.py +++ b/src/eduid/userdb/user_cleaner/cache.py @@ -40,7 +40,7 @@ def from_user(self, data: User) -> CacheUser: """ Convert a User object to a CacheUser object. """ - queue_user = CacheUser( + cache_user = CacheUser( eppn=data.eppn, ) - return queue_user + return cache_user diff --git a/src/eduid/userdb/user_cleaner/cachedb.py b/src/eduid/userdb/user_cleaner/cachedb.py index a6f3fd7e7..09199da1f 100644 --- a/src/eduid/userdb/user_cleaner/cachedb.py +++ b/src/eduid/userdb/user_cleaner/cachedb.py @@ -17,12 +17,12 @@ def __init__(self, db_uri: str, collection: str, db_name: str = "eduid_user_clea self.setup_indexes(indexes) - def save(self, queue_user: CacheUser) -> bool: + def save(self, cache_user: CacheUser) -> bool: """Save a CacheUser object to the database.""" - if self.exists(queue_user.eppn): - logger.debug(f"User {queue_user.eppn} already exists in the queue") + if self.exists(cache_user.eppn): + logger.debug(f"User {cache_user.eppn} already exists in the cache.") return False - self._coll.insert_one(queue_user.to_dict()) + self._coll.insert_one(cache_user.to_dict()) return True def exists(self, eppn: str) -> bool: @@ -35,7 +35,7 @@ def get_all(self) -> list[CacheUser]: return [CacheUser.from_dict(data=doc) for doc in res] def count(self) -> int: - """Count the number of users in the queue.""" + """Count the number of users in the cache.""" return self._coll.count_documents({}) def delete(self, eppn: str) -> None: @@ -61,12 +61,12 @@ def populate( next_run_ts = int(time()) + (60 * 60) # first process window starts in 1 hour, then the population is done for am_user in am_users: next_run_ts += time_constant - queue_user = CacheUser( + cache_user = CacheUser( eppn=am_user.eppn, next_run_ts=next_run_ts, ) - self.save(queue_user) + self.save(cache_user) def is_empty(self) -> bool: """Check if the cache is empty.""" From 1a757785d3c6ecadf40b64b4cc417bf5ed56a6fc Mon Sep 17 00:00:00 2001 From: masv3971 Date: Wed, 1 Feb 2023 12:16:10 +0100 Subject: [PATCH 37/49] Remove mock from self, since it's not used globaly. --- src/eduid/common/clients/amapi_client/testing.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/eduid/common/clients/amapi_client/testing.py b/src/eduid/common/clients/amapi_client/testing.py index 23084a2c9..29640abc8 100644 --- a/src/eduid/common/clients/amapi_client/testing.py +++ b/src/eduid/common/clients/amapi_client/testing.py @@ -21,20 +21,20 @@ class MockedAMAPIMixin(MockedSyncAuthAPIMixin): def start_mock_amapi(self, central_user_db: Optional[AmDB] = None, access_token_value: Optional[str] = None): self.start_mock_auth_api(access_token_value=access_token_value) self.central_user_db = central_user_db - self.mocked_users = respx.mock(base_url="http://localhost/amapi", assert_all_called=False) + mocked_users = respx.mock(base_url="http://localhost/amapi", assert_all_called=False) - self.mocked_users.put(url="/users/hubba-bubba/name", name="200_put_name").mock( + mocked_users.put(url="/users/hubba-bubba/name", name="200_put_name").mock( side_effect=self._side_effect_update_name ) - self.mocked_users.put(url="/users/hubba-bubba/email", name="200_put_email").mock( + mocked_users.put(url="/users/hubba-bubba/email", name="200_put_email").mock( side_effect=self._side_effect_update_email ) - self.mocked_users.put(url="/users/hubba-bubba/language", name="200_put_language").mock( + mocked_users.put(url="/users/hubba-bubba/language", name="200_put_language").mock( side_effect=self._side_effect_update_language ) - self.mocked_users.start() - self.addCleanup(self.mocked_users.stop) # type: ignore + mocked_users.start() + self.addCleanup(mocked_users.stop) # type: ignore def _side_effect_update(self, func_name: str) -> User: if self.central_user_db is None: From f270aaacbdb83c4e8cc3e27e7405130906bfb8d3 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Wed, 1 Feb 2023 12:21:03 +0100 Subject: [PATCH 38/49] Add stat collection for each completed task. --- src/eduid/workers/user_cleaner/app.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/eduid/workers/user_cleaner/app.py b/src/eduid/workers/user_cleaner/app.py index b23671d90..fbde6af5c 100644 --- a/src/eduid/workers/user_cleaner/app.py +++ b/src/eduid/workers/user_cleaner/app.py @@ -144,6 +144,7 @@ def task_done(self, eppn: str) -> None: """Mark a task as done""" self.worker_queue.task_done() self.db_cache.delete(eppn=eppn) + self.stats.count(name="user_cleaner_task_done") def _enqueuing_to_worker_queue(self) -> None: """Populate worker queue with users from db_cache.""" From 35897dbe61d880292222c112c489e4c4f9d023d6 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Wed, 1 Feb 2023 12:22:37 +0100 Subject: [PATCH 39/49] Remove dubble fnutts around Queue type, not necessary. --- src/eduid/workers/user_cleaner/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/eduid/workers/user_cleaner/app.py b/src/eduid/workers/user_cleaner/app.py index fbde6af5c..cc2f49697 100644 --- a/src/eduid/workers/user_cleaner/app.py +++ b/src/eduid/workers/user_cleaner/app.py @@ -58,7 +58,7 @@ def __init__( self.shutdown_now = False - self.worker_queue: "Queue[dict[str, Union[str,int]]]" = Queue(maxsize=0) + self.worker_queue: Queue[dict[str, Union[str, int]]] = Queue(maxsize=0) self.queue_actual_size = 0 self.made_changes = 0 From 2bf2ec7ec0f80d704b45176515d324b4debe5022 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Wed, 1 Feb 2023 12:25:54 +0100 Subject: [PATCH 40/49] Remove trivial method and apply its logic directly. --- src/eduid/workers/user_cleaner/app.py | 6 ------ src/eduid/workers/user_cleaner/workers/skv.py | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/src/eduid/workers/user_cleaner/app.py b/src/eduid/workers/user_cleaner/app.py index cc2f49697..0e0439c14 100644 --- a/src/eduid/workers/user_cleaner/app.py +++ b/src/eduid/workers/user_cleaner/app.py @@ -90,12 +90,6 @@ def _is_quota_reached(self) -> bool: self.logger.debug(f"is_quota_reached:: made_changes: {self.made_changes}, max_changes: {self.max_changes}") return self.made_changes >= self.max_changes - def _add_to_made_changes(self) -> None: - """ - Helper to add to made_changes after a change. - """ - self.made_changes += 1 - def _populate_max_changes(self) -> None: """ Helper to calculate the maximum allowed changes diff --git a/src/eduid/workers/user_cleaner/workers/skv.py b/src/eduid/workers/user_cleaner/workers/skv.py index fccb34de6..5a317ebbc 100644 --- a/src/eduid/workers/user_cleaner/workers/skv.py +++ b/src/eduid/workers/user_cleaner/workers/skv.py @@ -49,7 +49,7 @@ def update_name(self, queue_user: CacheUser, navet_data: NavetData) -> None: user=db_user, user_postal_address=navet_data.get_full_postal_address() ) - self._add_to_made_changes() + self.made_changes += 1 self.logger.debug(f"number of changes: {self.made_changes} out of {self.max_changes}, {self.queue_actual_size}") From 4ad07787c594ed88d7390b8a075c5482d005c293 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Wed, 1 Feb 2023 12:31:49 +0100 Subject: [PATCH 41/49] Optimize db query. --- src/eduid/userdb/user_cleaner/cachedb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/eduid/userdb/user_cleaner/cachedb.py b/src/eduid/userdb/user_cleaner/cachedb.py index 09199da1f..078e7c1de 100644 --- a/src/eduid/userdb/user_cleaner/cachedb.py +++ b/src/eduid/userdb/user_cleaner/cachedb.py @@ -27,7 +27,7 @@ def save(self, cache_user: CacheUser) -> bool: def exists(self, eppn: str) -> bool: """Check if a user exists in the cache.""" - return self._coll.count_documents({"eppn": eppn}) > 0 + return self.db_count(spec={"eppn": eppn}, limit=1) > 0 def get_all(self) -> list[CacheUser]: """Get all users from the cache.""" From 521432c95650557576f70e897bd5573996f3aacf Mon Sep 17 00:00:00 2001 From: masv3971 Date: Wed, 1 Feb 2023 12:35:19 +0100 Subject: [PATCH 42/49] Use _get_all_docs() istead of _coll.find({}]. --- src/eduid/userdb/user_cleaner/cachedb.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/eduid/userdb/user_cleaner/cachedb.py b/src/eduid/userdb/user_cleaner/cachedb.py index 078e7c1de..07b7c9fa2 100644 --- a/src/eduid/userdb/user_cleaner/cachedb.py +++ b/src/eduid/userdb/user_cleaner/cachedb.py @@ -18,7 +18,7 @@ def __init__(self, db_uri: str, collection: str, db_name: str = "eduid_user_clea self.setup_indexes(indexes) def save(self, cache_user: CacheUser) -> bool: - """Save a CacheUser object to the database.""" + """Save a CacheUser object to cache db.""" if self.exists(cache_user.eppn): logger.debug(f"User {cache_user.eppn} already exists in the cache.") return False @@ -26,12 +26,12 @@ def save(self, cache_user: CacheUser) -> bool: return True def exists(self, eppn: str) -> bool: - """Check if a user exists in the cache.""" + """Check if a user exists in the cache db.""" return self.db_count(spec={"eppn": eppn}, limit=1) > 0 def get_all(self) -> list[CacheUser]: - """Get all users from the cache.""" - res = self._coll.find({}) + """Get all users from the cache db.""" + res = self._get_all_docs() return [CacheUser.from_dict(data=doc) for doc in res] def count(self) -> int: @@ -39,11 +39,11 @@ def count(self) -> int: return self._coll.count_documents({}) def delete(self, eppn: str) -> None: - """delete one user from the cache.""" + """delete one user from the cache db.""" self._coll.delete_one({"eppn": eppn}) def delete_all(self) -> None: - """Delete all users from the cache.""" + """Delete all users from the cache db.""" self._coll.delete_many({}) def populate( From ac5a6cfd1e548b9d88ff08de947b51facb8cd5c2 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Wed, 1 Feb 2023 12:36:57 +0100 Subject: [PATCH 43/49] Use BaseDB drop method. --- src/eduid/userdb/user_cleaner/cachedb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/eduid/userdb/user_cleaner/cachedb.py b/src/eduid/userdb/user_cleaner/cachedb.py index 07b7c9fa2..8a39e71e8 100644 --- a/src/eduid/userdb/user_cleaner/cachedb.py +++ b/src/eduid/userdb/user_cleaner/cachedb.py @@ -44,7 +44,7 @@ def delete(self, eppn: str) -> None: def delete_all(self) -> None: """Delete all users from the cache db.""" - self._coll.delete_many({}) + self._drop_whole_collection() def populate( self, From a94bb529f2e9f4f51ec414c875914aee8bd9e095 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Wed, 1 Feb 2023 12:41:12 +0100 Subject: [PATCH 44/49] Use BaseDB method instead of _coll. --- src/eduid/userdb/user_cleaner/cachedb.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/eduid/userdb/user_cleaner/cachedb.py b/src/eduid/userdb/user_cleaner/cachedb.py index 8a39e71e8..38a4cbdbe 100644 --- a/src/eduid/userdb/user_cleaner/cachedb.py +++ b/src/eduid/userdb/user_cleaner/cachedb.py @@ -9,7 +9,7 @@ class CacheDB(BaseDB): - """Database class for the user cleaning database cache.""" + """Database class for the user cleaning cache db.""" def __init__(self, db_uri: str, collection: str, db_name: str = "eduid_user_cleaner"): super().__init__(db_uri, db_name, collection) @@ -35,7 +35,7 @@ def get_all(self) -> list[CacheUser]: return [CacheUser.from_dict(data=doc) for doc in res] def count(self) -> int: - """Count the number of users in the cache.""" + """Count the number of users in the cache db.""" return self._coll.count_documents({}) def delete(self, eppn: str) -> None: @@ -51,7 +51,7 @@ def populate( am_users: list[User], periodicity: int, ) -> None: - """Populate cache database with the user from AMDB.""" + """Populate cache db with the user from AMDB.""" cache_size = len(am_users) @@ -69,5 +69,5 @@ def populate( self.save(cache_user) def is_empty(self) -> bool: - """Check if the cache is empty.""" - return self.count() == 0 + """Check if the cache db is empty.""" + return self.db_count(spec={}) == 0 From e8f0a07a57a28c89390ab86901a43ea02d5d0635 Mon Sep 17 00:00:00 2001 From: masv3971 Date: Wed, 1 Feb 2023 14:33:33 +0100 Subject: [PATCH 45/49] Replace int (unix epoch) with datetime. --- src/eduid/userdb/user_cleaner/cache.py | 10 +--------- src/eduid/userdb/user_cleaner/cachedb.py | 18 +++++++++++++----- src/eduid/userdb/user_cleaner/meta.py | 4 ++-- src/eduid/workers/user_cleaner/app.py | 7 ++++--- src/eduid/workers/user_cleaner/config.py | 5 +++-- .../workers/user_cleaner/testing/test_skv.py | 7 ++++++- src/eduid/workers/user_cleaner/workers/skv.py | 4 +++- 7 files changed, 32 insertions(+), 23 deletions(-) diff --git a/src/eduid/userdb/user_cleaner/cache.py b/src/eduid/userdb/user_cleaner/cache.py index 6aeb755ff..a316d1f0e 100644 --- a/src/eduid/userdb/user_cleaner/cache.py +++ b/src/eduid/userdb/user_cleaner/cache.py @@ -12,7 +12,7 @@ class CacheUser(BaseModel): eppn: str created_ts: datetime = Field(default_factory=utc_now) - next_run_ts: Optional[int] = None + next_run_ts: Optional[datetime] = None def to_dict(self) -> TUserDbDocument: """ @@ -28,14 +28,6 @@ def from_dict(cls, data: Mapping[str, Any]) -> CacheUser: """Convert a dict to a Element object.""" return cls(**data) - def next_run_ts_iso8601(self) -> str: - """ - Convert the next_run_ts to ISO8601 format. - """ - if self.next_run_ts is None: - return "" - return datetime.utcfromtimestamp(self.next_run_ts).isoformat() - def from_user(self, data: User) -> CacheUser: """ Convert a User object to a CacheUser object. diff --git a/src/eduid/userdb/user_cleaner/cachedb.py b/src/eduid/userdb/user_cleaner/cachedb.py index 38a4cbdbe..9eeb75337 100644 --- a/src/eduid/userdb/user_cleaner/cachedb.py +++ b/src/eduid/userdb/user_cleaner/cachedb.py @@ -1,6 +1,8 @@ from abc import ABC +from datetime import timedelta from time import time import logging +from eduid.common.misc.timeutil import utc_now from eduid.userdb.db import BaseDB, TUserDbDocument from eduid.userdb.user import User from eduid.userdb.user_cleaner.cache import CacheUser @@ -49,16 +51,22 @@ def delete_all(self) -> None: def populate( self, am_users: list[User], - periodicity: int, + periodicity: timedelta, + minimum_delay: timedelta, ) -> None: """Populate cache db with the user from AMDB.""" - cache_size = len(am_users) + number_of_users = len(am_users) - periodicity_in_seconds = 22 * 60 * 60 * periodicity # strip 2 hours of each day in order to give us some slack - time_constant = int(periodicity_in_seconds / cache_size) + # strip 2 hours of each day in order to give us some slack + time_constant = timedelta( + (periodicity - timedelta(hours=2)).total_seconds() / (number_of_users * minimum_delay.total_seconds()) + ) - next_run_ts = int(time()) + (60 * 60) # first process window starts in 1 hour, then the population is done + # first process window starts in 1 hour, then the population is done + next_run_ts = utc_now() + timedelta(hours=1) + + # next_run_ts = int(time()) + (60 * 60) # first process window starts in 1 hour, then the population is done for am_user in am_users: next_run_ts += time_constant cache_user = CacheUser( diff --git a/src/eduid/userdb/user_cleaner/meta.py b/src/eduid/userdb/user_cleaner/meta.py index 6357c7621..1fd5a08a0 100644 --- a/src/eduid/userdb/user_cleaner/meta.py +++ b/src/eduid/userdb/user_cleaner/meta.py @@ -1,7 +1,7 @@ from __future__ import annotations from typing import Any, Mapping -from datetime import datetime +from datetime import datetime, timedelta from pydantic import BaseModel, Field from eduid.common.misc.timeutil import utc_now @@ -10,7 +10,7 @@ class Meta(BaseModel): - periodicity: int + periodicity: timedelta cleaner_type: CleanerType created_ts: datetime = Field(default_factory=utc_now) diff --git a/src/eduid/workers/user_cleaner/app.py b/src/eduid/workers/user_cleaner/app.py index 0e0439c14..2f1b2f7dd 100644 --- a/src/eduid/workers/user_cleaner/app.py +++ b/src/eduid/workers/user_cleaner/app.py @@ -11,6 +11,7 @@ from typing import Any, Mapping, Optional, Union from pathlib import Path +from eduid.common.misc.timeutil import utc_now from eduid.common.clients.amapi_client.amapi_client import AMAPIClient from eduid.common.config.parsers import load_config @@ -125,11 +126,11 @@ def _wait(self, user: CacheUser) -> bool: if user.next_run_ts is None: self.logger.debug(f"User {user.eppn} has no next_run_ts, skipping wait") return False - self.logger.debug(f"Waiting for user {user.eppn} to run at {user.next_run_ts_iso8601()}") - while user.next_run_ts > int(time.time()) and not self.shutdown_now: + self.logger.debug(f"Waiting for user {user.eppn} to run at {user.next_run_ts}") + while user.next_run_ts > utc_now() and not self.shutdown_now: if self.shutdown_now: return False - if user.next_run_ts < int(time.time()): + if user.next_run_ts < utc_now(): return True time.sleep(0.01) return False diff --git a/src/eduid/workers/user_cleaner/config.py b/src/eduid/workers/user_cleaner/config.py index ea28fdab5..148ef71e9 100644 --- a/src/eduid/workers/user_cleaner/config.py +++ b/src/eduid/workers/user_cleaner/config.py @@ -1,3 +1,4 @@ +from datetime import timedelta from typing import Sequence from pydantic import Field, BaseModel @@ -22,10 +23,10 @@ class UserCleanerConfig(RootConfig, LoggingConfigMixin, MsgConfigMixin, StatsCon change_quota: float # amount of time to clean dataset, value in days - periodicity: int = 30 + periodicity: timedelta = timedelta(days=30) # minimum time to delay each execution in seconds - minimum_delay: int = 1 + minimum_delay: timedelta = timedelta(seconds=1) gnap_auth_data: GNAPClientAuthData amapi: AmAPIConfig diff --git a/src/eduid/workers/user_cleaner/testing/test_skv.py b/src/eduid/workers/user_cleaner/testing/test_skv.py index 50b664c3a..16d0be92a 100644 --- a/src/eduid/workers/user_cleaner/testing/test_skv.py +++ b/src/eduid/workers/user_cleaner/testing/test_skv.py @@ -1,3 +1,4 @@ +from datetime import timedelta from typing import Any, Dict from unittest.mock import MagicMock, patch from eduid.common.clients.amapi_client.testing import MockedAMAPIMixin @@ -67,7 +68,11 @@ def test_update_name(self, mock_get_all_navet_data: MagicMock): def test_populate_queue(self): uf = UserFixtures() - self.skv.db_cache.populate(am_users=[uf.new_user_example, uf.new_unverified_user_example], periodicity=1) + self.skv.db_cache.populate( + am_users=[uf.new_user_example, uf.new_unverified_user_example], + periodicity=timedelta(seconds=10), + minimum_delay=timedelta(seconds=1), + ) self.skv._enqueuing_to_worker_queue() got_user1 = self.skv.worker_queue.get() assert got_user1["eppn"] == uf.new_user_example.eppn diff --git a/src/eduid/workers/user_cleaner/workers/skv.py b/src/eduid/workers/user_cleaner/workers/skv.py index 5a317ebbc..99008fce4 100644 --- a/src/eduid/workers/user_cleaner/workers/skv.py +++ b/src/eduid/workers/user_cleaner/workers/skv.py @@ -100,7 +100,9 @@ def run(self): identity_type=self.identity_type, limit=10000000, ) - self.db_cache.populate(am_users=users, periodicity=self.config.periodicity) + self.db_cache.populate( + am_users=users, periodicity=self.config.periodicity, minimum_delay=self.config.minimum_delay + ) if self.worker_queue.empty(): self._enqueuing_to_worker_queue() From 4378c291749afccb4313394a514007787b34daac Mon Sep 17 00:00:00 2001 From: masv3971 Date: Wed, 1 Feb 2023 15:33:52 +0100 Subject: [PATCH 46/49] Remove unused dep. --- src/eduid/workers/user_cleaner/app.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/eduid/workers/user_cleaner/app.py b/src/eduid/workers/user_cleaner/app.py index 2f1b2f7dd..4c7177a90 100644 --- a/src/eduid/workers/user_cleaner/app.py +++ b/src/eduid/workers/user_cleaner/app.py @@ -1,5 +1,4 @@ from dataclasses import Field -from datetime import datetime import logging import signal import os From 44ffed656e61df50659fdc1336370c485acd01bc Mon Sep 17 00:00:00 2001 From: Lasse Yledahl Date: Wed, 11 Oct 2023 08:43:58 +0000 Subject: [PATCH 47/49] move function to common utils --- src/eduid/common/utils.py | 53 ++++++++++++++++++++++---- src/eduid/webapp/common/api/helpers.py | 43 +-------------------- 2 files changed, 47 insertions(+), 49 deletions(-) diff --git a/src/eduid/common/utils.py b/src/eduid/common/utils.py index 510e12c42..bf97f756d 100644 --- a/src/eduid/common/utils.py +++ b/src/eduid/common/utils.py @@ -4,6 +4,8 @@ from eduid.userdb.user import TUserSubclass import logging +from typing import List, Optional + logger = logging.getLogger(__name__) @@ -48,6 +50,48 @@ def removesuffix(s: str, suffix: str) -> str: return s[:] +def get_marked_given_name(given_name: str, given_name_marking: Optional[str]) -> str: + """ + Given name marking denotes up to two given names, and is used to determine + which of the given names are to be primarily used in addressing a person. + For this purpose, the given_name_marking is two numbers: + indexing starting at 1 + the second can be 0 for only one mark + hyphenated names are counted separately (i.e. Jan-Erik are two separate names) + If they are both marked they should be re-hyphenated + + current version of documentation: + https://www.skatteverket.se/download/18.2cf1b5cd163796a5c8bf20e/1530691773712/AllmanBeskrivning.pdf + + :param given_name: Given name + :param given_name_marking: Given name marking + + :return: Marked given name (Tilltalsnamn) + """ + if not given_name_marking or "00" == given_name_marking: + return given_name + + # cheating with indexing + _given_names: List[Optional[str]] = [None] + for name in given_name.split(): + if "-" in name: + # hyphenated names are counted separately + _given_names.extend(name.split("-")) + else: + _given_names.append(name) + + _optional_marked_names: List[Optional[str]] = [] + for i in given_name_marking: + _optional_marked_names.append(_given_names[int(i)]) + # remove None values + # i.e. 0 index and hyphenated names second part placeholder + _marked_names: List[str] = [name for name in _optional_marked_names if name is not None] + if "-".join(_marked_names) in given_name: + return "-".join(_marked_names) + else: + return " ".join(_marked_names) + + def set_user_names_from_official_address(user: TUserSubclass, user_postal_address: FullPostalAddress) -> TUserSubclass: """ :param user: Proofing app private userdb user @@ -63,13 +107,8 @@ def set_user_names_from_official_address(user: TUserSubclass, user_postal_addres given_name_marking = user_postal_address.name.given_name_marking user.display_name = f"{user.given_name} {user.surname}" if given_name_marking: - _name_index = (int(given_name_marking) // 10) - 1 # ex. "20" -> 1 (second GivenName is real given name) - try: - _given_name = user.given_name.split()[_name_index] - user.display_name = f"{_given_name} {user.surname}" - except IndexError: - # At least occasionally, we've seen GivenName 'Jan-Erik Martin' with GivenNameMarking 30 - pass + _given_name = get_marked_given_name(user.given_name, given_name_marking) + user.display_name = f"{_given_name} {user.surname}" logger.info("User names set from official address") logger.debug( f"{user_postal_address.name} resulted in given_name: {user.given_name}, " diff --git a/src/eduid/webapp/common/api/helpers.py b/src/eduid/webapp/common/api/helpers.py index 6713150c7..b004d4d8e 100644 --- a/src/eduid/webapp/common/api/helpers.py +++ b/src/eduid/webapp/common/api/helpers.py @@ -28,52 +28,11 @@ from eduid.userdb.userdb import UserDB from eduid.webapp.common.api.app import EduIDBaseApp from eduid.webapp.common.api.utils import get_from_current_app, save_and_sync_user +from eduid.common.utils import get_marked_given_name __author__ = "lundberg" -def get_marked_given_name(given_name: str, given_name_marking: Optional[str]) -> str: - """ - Given name marking denotes up to two given names, and is used to determine - which of the given names are to be primarily used in addressing a person. - For this purpose, the given_name_marking is two numbers: - indexing starting at 1 - the second can be 0 for only one mark - hyphenated names are counted separately (i.e. Jan-Erik are two separate names) - If they are both marked they should be re-hyphenated - - current version of documentation: - https://www.skatteverket.se/download/18.2cf1b5cd163796a5c8bf20e/1530691773712/AllmanBeskrivning.pdf - - :param given_name: Given name - :param given_name_marking: Given name marking - - :return: Marked given name (Tilltalsnamn) - """ - if not given_name_marking or "00" == given_name_marking: - return given_name - - # cheating with indexing - _given_names: List[Optional[str]] = [None] - for name in given_name.split(): - if "-" in name: - # hyphenated names are counted separately - _given_names.extend(name.split("-")) - else: - _given_names.append(name) - - _optional_marked_names: List[Optional[str]] = [] - for i in given_name_marking: - _optional_marked_names.append(_given_names[int(i)]) - # remove None values - # i.e. 0 index and hyphenated names second part placeholder - _marked_names: List[str] = [name for name in _optional_marked_names if name is not None] - if "-".join(_marked_names) in given_name: - return "-".join(_marked_names) - else: - return " ".join(_marked_names) - - def set_user_names_from_nin_proofing( user: TUserSubclass, proofing_log_entry: TNinProofingLogElementSubclass, From 05b4e09e9e97eeadcc1f9645aa53395ccd74ea69 Mon Sep 17 00:00:00 2001 From: Lasse Yledahl Date: Wed, 11 Oct 2023 08:51:12 +0000 Subject: [PATCH 48/49] make reformat --- .../common/clients/amapi_client/testing.py | 8 +++---- src/eduid/common/utils.py | 6 ++--- src/eduid/userdb/tests/test_user_cleaner.py | 2 +- src/eduid/userdb/user_cleaner/cache.py | 5 ++-- src/eduid/userdb/user_cleaner/cachedb.py | 3 ++- src/eduid/userdb/user_cleaner/meta.py | 3 ++- src/eduid/userdb/user_cleaner/metadb.py | 3 ++- src/eduid/userdb/userdb.py | 1 - src/eduid/webapp/common/api/helpers.py | 2 +- .../common/api/tests/test_nin_helpers.py | 1 - src/eduid/workers/user_cleaner/__init__.py | 2 +- src/eduid/workers/user_cleaner/app.py | 24 ++++++++----------- src/eduid/workers/user_cleaner/config.py | 4 ++-- .../workers/user_cleaner/testing/test_skv.py | 15 ++++++------ .../workers/user_cleaner/workers/__init__.py | 2 +- src/eduid/workers/user_cleaner/workers/skv.py | 8 +++---- 16 files changed, 43 insertions(+), 46 deletions(-) diff --git a/src/eduid/common/clients/amapi_client/testing.py b/src/eduid/common/clients/amapi_client/testing.py index 29640abc8..5baf3cb4d 100644 --- a/src/eduid/common/clients/amapi_client/testing.py +++ b/src/eduid/common/clients/amapi_client/testing.py @@ -1,7 +1,9 @@ from typing import Mapping, Optional import respx -from httpx import Response, Request +from httpx import Request, Response + +from eduid.common.clients.gnap_client.testing import MockedSyncAuthAPIMixin from eduid.common.misc.timeutil import utc_now from eduid.common.models.amapi_user import ( UserUpdateEmailRequest, @@ -9,12 +11,10 @@ UserUpdateNameRequest, UserUpdatePhoneRequest, ) - -from eduid.common.clients.gnap_client.testing import MockedSyncAuthAPIMixin -from eduid.userdb.userdb import AmDB from eduid.userdb.mail import MailAddressList from eduid.userdb.phone import PhoneNumberList from eduid.userdb.user import User +from eduid.userdb.userdb import AmDB class MockedAMAPIMixin(MockedSyncAuthAPIMixin): diff --git a/src/eduid/common/utils.py b/src/eduid/common/utils.py index bf97f756d..9cd9c710e 100644 --- a/src/eduid/common/utils.py +++ b/src/eduid/common/utils.py @@ -1,11 +1,11 @@ __author__ = "lundberg" -from eduid.common.rpc.msg_relay import FullPostalAddress -from eduid.userdb.user import TUserSubclass import logging - from typing import List, Optional +from eduid.common.rpc.msg_relay import FullPostalAddress +from eduid.userdb.user import TUserSubclass + logger = logging.getLogger(__name__) diff --git a/src/eduid/userdb/tests/test_user_cleaner.py b/src/eduid/userdb/tests/test_user_cleaner.py index 488b73ab9..56e3634df 100644 --- a/src/eduid/userdb/tests/test_user_cleaner.py +++ b/src/eduid/userdb/tests/test_user_cleaner.py @@ -1,8 +1,8 @@ from unittest import TestCase from eduid.userdb.testing import MongoTemporaryInstance -from eduid.userdb.user_cleaner.cachedb import CacheDB from eduid.userdb.user_cleaner.cache import CacheUser +from eduid.userdb.user_cleaner.cachedb import CacheDB class TestUserCleanerCache(TestCase): diff --git a/src/eduid/userdb/user_cleaner/cache.py b/src/eduid/userdb/user_cleaner/cache.py index a316d1f0e..4944f673c 100644 --- a/src/eduid/userdb/user_cleaner/cache.py +++ b/src/eduid/userdb/user_cleaner/cache.py @@ -1,10 +1,11 @@ from __future__ import annotations +from datetime import datetime from typing import Any, Mapping, Optional + from pydantic import BaseModel, Field -from datetime import datetime -from eduid.common.misc.timeutil import utc_now +from eduid.common.misc.timeutil import utc_now from eduid.userdb.db import TUserDbDocument from eduid.userdb.user import User diff --git a/src/eduid/userdb/user_cleaner/cachedb.py b/src/eduid/userdb/user_cleaner/cachedb.py index 9eeb75337..51bb093d7 100644 --- a/src/eduid/userdb/user_cleaner/cachedb.py +++ b/src/eduid/userdb/user_cleaner/cachedb.py @@ -1,7 +1,8 @@ +import logging from abc import ABC from datetime import timedelta from time import time -import logging + from eduid.common.misc.timeutil import utc_now from eduid.userdb.db import BaseDB, TUserDbDocument from eduid.userdb.user import User diff --git a/src/eduid/userdb/user_cleaner/meta.py b/src/eduid/userdb/user_cleaner/meta.py index 1fd5a08a0..bf56f965d 100644 --- a/src/eduid/userdb/user_cleaner/meta.py +++ b/src/eduid/userdb/user_cleaner/meta.py @@ -1,7 +1,8 @@ from __future__ import annotations -from typing import Any, Mapping from datetime import datetime, timedelta +from typing import Any, Mapping + from pydantic import BaseModel, Field from eduid.common.misc.timeutil import utc_now diff --git a/src/eduid/userdb/user_cleaner/metadb.py b/src/eduid/userdb/user_cleaner/metadb.py index 94a17d396..4557a4918 100644 --- a/src/eduid/userdb/user_cleaner/metadb.py +++ b/src/eduid/userdb/user_cleaner/metadb.py @@ -1,7 +1,8 @@ from typing import Union + from eduid.userdb.db import BaseDB -from eduid.userdb.user_cleaner.meta import Meta from eduid.userdb.meta import CleanerType +from eduid.userdb.user_cleaner.meta import Meta class MetaDB(BaseDB): diff --git a/src/eduid/userdb/userdb.py b/src/eduid/userdb/userdb.py index 7f2648a28..7c563903f 100644 --- a/src/eduid/userdb/userdb.py +++ b/src/eduid/userdb/userdb.py @@ -401,7 +401,6 @@ def save(self, user: User) -> UserSaveResult: return UserSaveResult(success=bool(result)) def unverify_mail_aliases(self, user_id: ObjectId, mail_aliases: Optional[list[TUserDbDocument]]) -> int: - count = 0 if mail_aliases is None: logger.debug(f"No mailAliases to check duplicates against for user {user_id}.") diff --git a/src/eduid/webapp/common/api/helpers.py b/src/eduid/webapp/common/api/helpers.py index b004d4d8e..d3fe4467d 100644 --- a/src/eduid/webapp/common/api/helpers.py +++ b/src/eduid/webapp/common/api/helpers.py @@ -8,6 +8,7 @@ from eduid.common.rpc.exceptions import NoNavetData from eduid.common.rpc.mail_relay import MailRelay from eduid.common.rpc.msg_relay import DeregisteredCauseCode, DeregistrationInformation, FullPostalAddress, MsgRelay +from eduid.common.utils import get_marked_given_name from eduid.userdb import NinIdentity from eduid.userdb.element import ElementKey from eduid.userdb.exceptions import LockedIdentityViolation @@ -28,7 +29,6 @@ from eduid.userdb.userdb import UserDB from eduid.webapp.common.api.app import EduIDBaseApp from eduid.webapp.common.api.utils import get_from_current_app, save_and_sync_user -from eduid.common.utils import get_marked_given_name __author__ = "lundberg" diff --git a/src/eduid/webapp/common/api/tests/test_nin_helpers.py b/src/eduid/webapp/common/api/tests/test_nin_helpers.py index 3bb6e056d..8a2bd8ef8 100644 --- a/src/eduid/webapp/common/api/tests/test_nin_helpers.py +++ b/src/eduid/webapp/common/api/tests/test_nin_helpers.py @@ -29,7 +29,6 @@ set_user_names_from_nin_proofing, verify_nin_for_user, ) -from eduid.common.utils import set_user_names_from_official_address from eduid.webapp.common.api.testing import EduidAPITestCase from eduid.webapp.common.session.eduid_session import SessionFactory diff --git a/src/eduid/workers/user_cleaner/__init__.py b/src/eduid/workers/user_cleaner/__init__.py index 0379adfa8..7add9a06f 100644 --- a/src/eduid/workers/user_cleaner/__init__.py +++ b/src/eduid/workers/user_cleaner/__init__.py @@ -1,2 +1,2 @@ # -*- coding: utf-8 -*- -__author__ = "masv" \ No newline at end of file +__author__ = "masv" diff --git a/src/eduid/workers/user_cleaner/app.py b/src/eduid/workers/user_cleaner/app.py index 4c7177a90..325d368a2 100644 --- a/src/eduid/workers/user_cleaner/app.py +++ b/src/eduid/workers/user_cleaner/app.py @@ -1,31 +1,27 @@ -from dataclasses import Field import logging -import signal import os +import signal +import time from abc import ABC - +from dataclasses import Field +from pathlib import Path from queue import Queue -import time - from typing import Any, Mapping, Optional, Union -from pathlib import Path -from eduid.common.misc.timeutil import utc_now - from eduid.common.clients.amapi_client.amapi_client import AMAPIClient from eduid.common.config.parsers import load_config +from eduid.common.logging import init_logging +from eduid.common.misc.timeutil import utc_now from eduid.common.rpc.msg_relay import MsgRelay +from eduid.common.stats import init_app_stats from eduid.userdb import AmDB from eduid.userdb.identity import IdentityType from eduid.userdb.meta import CleanerType -from eduid.common.logging import init_logging -from eduid.common.stats import init_app_stats - -from eduid.workers.user_cleaner.config import UserCleanerConfig -from eduid.userdb.user_cleaner.cachedb import CacheDB from eduid.userdb.user_cleaner.cache import CacheUser -from eduid.userdb.user_cleaner.metadb import MetaDB +from eduid.userdb.user_cleaner.cachedb import CacheDB from eduid.userdb.user_cleaner.meta import Meta +from eduid.userdb.user_cleaner.metadb import MetaDB +from eduid.workers.user_cleaner.config import UserCleanerConfig class WorkerBase(ABC): diff --git a/src/eduid/workers/user_cleaner/config.py b/src/eduid/workers/user_cleaner/config.py index 148ef71e9..3e6dafd5e 100644 --- a/src/eduid/workers/user_cleaner/config.py +++ b/src/eduid/workers/user_cleaner/config.py @@ -1,10 +1,10 @@ from datetime import timedelta from typing import Sequence -from pydantic import Field, BaseModel +from pydantic import BaseModel, Field from eduid.common.clients.gnap_client.base import GNAPClientAuthData -from eduid.common.config.base import LoggingConfigMixin, RootConfig, MsgConfigMixin, LoggingFilters, StatsConfigMixin +from eduid.common.config.base import LoggingConfigMixin, LoggingFilters, MsgConfigMixin, RootConfig, StatsConfigMixin class AmAPIConfig(BaseModel): diff --git a/src/eduid/workers/user_cleaner/testing/test_skv.py b/src/eduid/workers/user_cleaner/testing/test_skv.py index 16d0be92a..95d55e45f 100644 --- a/src/eduid/workers/user_cleaner/testing/test_skv.py +++ b/src/eduid/workers/user_cleaner/testing/test_skv.py @@ -1,18 +1,17 @@ from datetime import timedelta from typing import Any, Dict from unittest.mock import MagicMock, patch -from eduid.common.clients.amapi_client.testing import MockedAMAPIMixin -from eduid.common.testing_base import CommonTestCase -from eduid.userdb.fixtures.users import UserFixtures -from eduid.workers.user_cleaner.workers.skv import init_skv_worker -from eduid.common.rpc.msg_relay import ( - NavetData, -) + import pytest from jwcrypto.jwk import JWK -from eduid.workers.msg.tasks import MessageSender +from eduid.common.clients.amapi_client.testing import MockedAMAPIMixin +from eduid.common.rpc.msg_relay import NavetData +from eduid.common.testing_base import CommonTestCase +from eduid.userdb.fixtures.users import UserFixtures from eduid.userdb.user_cleaner.cache import CacheUser +from eduid.workers.msg.tasks import MessageSender +from eduid.workers.user_cleaner.workers.skv import init_skv_worker class WorkerTest(CommonTestCase, MockedAMAPIMixin): diff --git a/src/eduid/workers/user_cleaner/workers/__init__.py b/src/eduid/workers/user_cleaner/workers/__init__.py index 0379adfa8..7add9a06f 100644 --- a/src/eduid/workers/user_cleaner/workers/__init__.py +++ b/src/eduid/workers/user_cleaner/workers/__init__.py @@ -1,2 +1,2 @@ # -*- coding: utf-8 -*- -__author__ = "masv" \ No newline at end of file +__author__ = "masv" diff --git a/src/eduid/workers/user_cleaner/workers/skv.py b/src/eduid/workers/user_cleaner/workers/skv.py index 99008fce4..8e295665d 100644 --- a/src/eduid/workers/user_cleaner/workers/skv.py +++ b/src/eduid/workers/user_cleaner/workers/skv.py @@ -1,16 +1,16 @@ -from typing import Any, Mapping, Optional from time import time +from typing import Any, Mapping, Optional -from eduid.common.models.amapi_user import UserUpdateNameRequest, Reason, Source +from eduid.common.models.amapi_user import Reason, Source, UserUpdateNameRequest from eduid.common.rpc.msg_relay import NavetData from eduid.common.utils import set_user_names_from_official_address from eduid.userdb.identity import IdentityType from eduid.userdb.meta import CleanerType -from eduid.workers.user_cleaner.app import WorkerBase from eduid.userdb.user_cleaner.cache import CacheUser from eduid.userdb.user_cleaner.cachedb import CacheDB -from eduid.userdb.user_cleaner.metadb import MetaDB from eduid.userdb.user_cleaner.meta import Meta +from eduid.userdb.user_cleaner.metadb import MetaDB +from eduid.workers.user_cleaner.app import WorkerBase class SKV(WorkerBase): From b36dbeb2b67d54f974378403e4408d553b843a44 Mon Sep 17 00:00:00 2001 From: Lasse Yledahl Date: Fri, 13 Oct 2023 12:39:38 +0000 Subject: [PATCH 49/49] make the worker startable --- src/eduid/userdb/user_cleaner/cachedb.py | 3 +++ src/eduid/userdb/user_cleaner/meta.py | 6 +++++- src/eduid/userdb/user_cleaner/metadb.py | 13 ++++++++++--- src/eduid/workers/user_cleaner/workers/skv.py | 5 ++++- 4 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/eduid/userdb/user_cleaner/cachedb.py b/src/eduid/userdb/user_cleaner/cachedb.py index 51bb093d7..4470bbc91 100644 --- a/src/eduid/userdb/user_cleaner/cachedb.py +++ b/src/eduid/userdb/user_cleaner/cachedb.py @@ -58,6 +58,9 @@ def populate( """Populate cache db with the user from AMDB.""" number_of_users = len(am_users) + if number_of_users == 0: + logger.info("No users to populate") + return # strip 2 hours of each day in order to give us some slack time_constant = timedelta( diff --git a/src/eduid/userdb/user_cleaner/meta.py b/src/eduid/userdb/user_cleaner/meta.py index bf56f965d..4af57d7dc 100644 --- a/src/eduid/userdb/user_cleaner/meta.py +++ b/src/eduid/userdb/user_cleaner/meta.py @@ -1,5 +1,6 @@ from __future__ import annotations +import copy from datetime import datetime, timedelta from typing import Any, Mapping @@ -21,10 +22,13 @@ def to_dict(self) -> TUserDbDocument: Element later. """ data = self.dict(exclude_none=True) + data["periodicity"] = self.periodicity.total_seconds() return TUserDbDocument(data) @classmethod def from_dict(cls, data: Mapping[str, Any]) -> Meta: """Convert a dict to a Element object.""" - return cls(**data) + _data = copy.deepcopy(dict(data)) + _data["periodicity"] = timedelta(seconds=_data["periodicity"]) + return cls(**_data) diff --git a/src/eduid/userdb/user_cleaner/metadb.py b/src/eduid/userdb/user_cleaner/metadb.py index 4557a4918..3d4b0113a 100644 --- a/src/eduid/userdb/user_cleaner/metadb.py +++ b/src/eduid/userdb/user_cleaner/metadb.py @@ -1,9 +1,12 @@ +import logging from typing import Union from eduid.userdb.db import BaseDB from eduid.userdb.meta import CleanerType from eduid.userdb.user_cleaner.meta import Meta +logger = logging.getLogger(__name__) + class MetaDB(BaseDB): """Database class for the meta database.""" @@ -16,8 +19,12 @@ def __init__(self, db_uri: str, collection: str = "meta", db_name: str = "eduid_ def save(self, doc: Meta) -> bool: """Save och replace an existing meta document.""" - res = self._coll.replace_one({"cleaner_type": doc.cleaner_type}, doc.to_dict(), upsert=True) - return res.acknowledged + try: + res = self._coll.replace_one({"cleaner_type": doc.cleaner_type}, doc.to_dict(), upsert=True) + return res.acknowledged + except Exception as e: + logger.error(f"Failed to save meta document: {e}") + return False def get(self, cleaner_type: CleanerType) -> Union[Meta, None]: """Get a worker meta from Meta.""" @@ -26,6 +33,6 @@ def get(self, cleaner_type: CleanerType) -> Union[Meta, None]: return None return Meta.from_dict(data=res) - def exist(self, cleaner_type: CleanerType) -> bool: + def exists(self, cleaner_type: CleanerType) -> bool: """Check if a user exists in the cache.""" return self._coll.count_documents({"cleaner_type": cleaner_type}) > 0 diff --git a/src/eduid/workers/user_cleaner/workers/skv.py b/src/eduid/workers/user_cleaner/workers/skv.py index 8e295665d..59a79f10f 100644 --- a/src/eduid/workers/user_cleaner/workers/skv.py +++ b/src/eduid/workers/user_cleaner/workers/skv.py @@ -71,7 +71,9 @@ def update_name(self, queue_user: CacheUser, navet_data: NavetData) -> None: def run(self): """skatteverket worker entry point""" - if not self.db_meta.exist(cleaner_type=self.cleaner_type): + self.logger.info("Starting skatteverket worker, getting meta") + if not self.db_meta.exists(cleaner_type=self.cleaner_type): + self.logger.info("meta does not exist, creating") meta = Meta( periodicity=self.config.periodicity, cleaner_type=self.cleaner_type, @@ -130,6 +132,7 @@ def init_skv_worker(test_config: Optional[Mapping[str, Any]] = None) -> SKV: def start_worker(): """Start skv (skatteverket) worker""" worker = init_skv_worker() + worker.logger.info("Starting skv worker") worker.run()