diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 00e8ffca07..290d56bb5c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -20,8 +20,8 @@ jobs: - name: Install dependencies run: | pip install $(grep -iE "pylint|pycodestyle" analyzer/requirements_py/dev/requirements.txt) - - name: Run tests - run: make pylint pycodestyle + - name: Run pycodestyle & pylint + run: make -k pycodestyle pylint tools: name: Tools (report-converter, etc.) diff --git a/analyzer/codechecker_analyzer/analysis_manager.py b/analyzer/codechecker_analyzer/analysis_manager.py index 6b22ca4231..6f7e8a22dd 100644 --- a/analyzer/codechecker_analyzer/analysis_manager.py +++ b/analyzer/codechecker_analyzer/analysis_manager.py @@ -13,16 +13,15 @@ import shutil import signal import sys -import time import traceback import zipfile from threading import Timer import multiprocess -import psutil from codechecker_common.logger import get_logger +from codechecker_common.process import kill_process_tree from codechecker_common.review_status_handler import ReviewStatusHandler from codechecker_statistics_collector.collectors.special_return_value import \ @@ -341,42 +340,6 @@ def handle_failure( os.remove(plist_file) -def kill_process_tree(parent_pid, recursive=False): - """Stop the process tree try it gracefully first. - - Try to stop the parent and child processes gracefuly - first if they do not stop in time send a kill signal - to every member of the process tree. - - There is a similar function in the web part please - consider to update that in case of changing this. - """ - proc = psutil.Process(parent_pid) - children = proc.children(recursive) - - # Send a SIGTERM (Ctrl-C) to the main process - proc.terminate() - - # If children processes don't stop gracefully in time, - # slaughter them by force. - _, still_alive = psutil.wait_procs(children, timeout=5) - for p in still_alive: - p.kill() - - # Wait until this process is running. - n = 0 - timeout = 10 - while proc.is_running(): - if n > timeout: - LOG.warning("Waiting for process %s to stop has been timed out" - "(timeout = %s)! Process is still running!", - parent_pid, timeout) - break - - time.sleep(1) - n += 1 - - def setup_process_timeout(proc, timeout, failure_callback=None): """ diff --git a/bin/CodeChecker b/bin/CodeChecker index ad820b8a05..261e2312b2 100755 --- a/bin/CodeChecker +++ b/bin/CodeChecker @@ -6,10 +6,10 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # # ------------------------------------------------------------------------- - """ Used to kickstart CodeChecker. -Save original environment without modifications. + +Saves original environment without modifications. Used to run the logging in the same env. """ # This is for enabling CodeChecker as a filename (i.e. module name). @@ -25,9 +25,10 @@ import sys import tempfile PROC_PID = None +EXIT_CODE = None -def run_codechecker(checker_env, subcommand=None): +def run_codechecker(checker_env, subcommand=None) -> int: """ Run the CodeChecker. * checker_env - CodeChecker will be run in the checker env. @@ -63,11 +64,13 @@ def run_codechecker(checker_env, subcommand=None): global PROC_PID PROC_PID = proc.pid - proc.wait() - sys.exit(proc.returncode) + global EXIT_CODE + EXIT_CODE = proc.wait() + + return EXIT_CODE -def main(subcommand=None): +def main(subcommand=None) -> int: original_env = os.environ.copy() checker_env = original_env @@ -94,30 +97,32 @@ def main(subcommand=None): print('Saving original build environment failed.') print(ex) - def signal_term_handler(signum, _frame): + def signal_handler(signum, _frame): + """ + Forwards the received signal to the CodeChecker subprocess started by + this `main` script. + """ global PROC_PID if PROC_PID and sys.platform != "win32": - os.kill(PROC_PID, signal.SIGINT) - - _remove_tmp() - sys.exit(128 + signum) - - signal.signal(signal.SIGTERM, signal_term_handler) - signal.signal(signal.SIGINT, signal_term_handler) - - def signal_reload_handler(_sig, _frame): - global PROC_PID - if PROC_PID: - os.kill(PROC_PID, signal.SIGHUP) + try: + os.kill(PROC_PID, signum) + except ProcessLookupError: + pass + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) if sys.platform != "win32": - signal.signal(signal.SIGHUP, signal_reload_handler) + signal.signal(signal.SIGHUP, signal_handler) + signal.signal(signal.SIGCHLD, signal_handler) try: - run_codechecker(checker_env, subcommand) + global EXIT_CODE + EXIT_CODE = run_codechecker(checker_env, subcommand) finally: _remove_tmp() + return EXIT_CODE + if __name__ == "__main__": - main(None) + sys.exit(main(None) or 0) diff --git a/codechecker_common/compatibility/multiprocessing.py b/codechecker_common/compatibility/multiprocessing.py index 14ef7ebebe..eaee9a78e7 100644 --- a/codechecker_common/compatibility/multiprocessing.py +++ b/codechecker_common/compatibility/multiprocessing.py @@ -13,8 +13,15 @@ # pylint: disable=no-name-in-module # pylint: disable=unused-import if sys.platform in ["darwin", "win32"]: - from multiprocess import Pool # type: ignore - from multiprocess import cpu_count + from multiprocess import \ + Pool, Process, \ + Queue, \ + Value, \ + cpu_count else: - from concurrent.futures import ProcessPoolExecutor as Pool # type: ignore - from multiprocessing import cpu_count + from concurrent.futures import ProcessPoolExecutor as Pool + from multiprocessing import \ + Process, \ + Queue, \ + Value, \ + cpu_count diff --git a/codechecker_common/logger.py b/codechecker_common/logger.py index 8c860dee6e..35702fb0b8 100644 --- a/codechecker_common/logger.py +++ b/codechecker_common/logger.py @@ -6,16 +6,18 @@ # # ------------------------------------------------------------------------- - import argparse +import datetime import json import logging from logging import config from pathlib import Path import os +import sys +from typing import Optional -# The logging leaves can be accesses without -# importing the logging module in other modules. +# The logging leaves can be accesses without importing the logging module in +# other modules. DEBUG = logging.DEBUG INFO = logging.INFO WARNING = logging.WARNING @@ -25,14 +27,24 @@ CMDLINE_LOG_LEVELS = ['info', 'debug_analyzer', 'debug'] -DEBUG_ANALYZER = logging.DEBUG_ANALYZER = 15 # type: ignore +DEBUG_ANALYZER = 15 logging.addLevelName(DEBUG_ANALYZER, 'DEBUG_ANALYZER') +_Levels = {"DEBUG": DEBUG, + "DEBUG_ANALYZER": DEBUG_ANALYZER, + "INFO": INFO, + "WARNING": WARNING, + "ERROR": ERROR, + "CRITICAL": CRITICAL, + "NOTSET": NOTSET, + } + + class CCLogger(logging.Logger): def debug_analyzer(self, msg, *args, **kwargs): - if self.isEnabledFor(logging.DEBUG_ANALYZER): - self._log(logging.DEBUG_ANALYZER, msg, args, **kwargs) + if self.isEnabledFor(DEBUG_ANALYZER): + self._log(DEBUG_ANALYZER, msg, args, **kwargs) logging.setLoggerClass(CCLogger) @@ -113,6 +125,36 @@ def validate_loglvl(log_level): return log_level +def raw_sprint_log(logger: logging.Logger, level: str, message: str) \ + -> Optional[str]: + """ + Formats a raw log `message` using the date format of the specified + `logger`, without actually invoking the logging infrastructure. + """ + if not logger.isEnabledFor(_Levels[level]): + return None + + formatter = logger.handlers[0].formatter if len(logger.handlers) > 0 \ + else None + datefmt = formatter.datefmt if formatter else None + time = datetime.datetime.now().strftime(datefmt) if datefmt \ + else str(datetime.datetime.now()) + + return f"[{validate_loglvl(level)} {time}] - {message}" + + +def signal_log(logger: logging.Logger, level: str, message: str): + """ + Simulates a log output and logs a message within a signal handler, without + triggering a `RuntimeError` due to reentrancy in `print`-like method calls. + """ + formatted = raw_sprint_log(logger, level, message) + if not formatted: + return + + os.write(sys.stderr.fileno(), f"{formatted}\n".encode()) + + class LogCfgServer: """ Initialize a log configuration server for dynamic log configuration. diff --git a/codechecker_common/process.py b/codechecker_common/process.py new file mode 100644 index 0000000000..86da476d2a --- /dev/null +++ b/codechecker_common/process.py @@ -0,0 +1,49 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +import time + +import psutil + +from .logger import get_logger + + +LOG = get_logger("system") + + +def kill_process_tree(parent_pid, recursive=False): + """ + Stop the process tree, gracefully at first. + + Try to stop the parent and child processes gracefuly first. + If they do not stop in time, send a kill signal to every member of the + process tree. + """ + proc = psutil.Process(parent_pid) + children = proc.children(recursive) + + # Send a SIGTERM to the main process. + proc.terminate() + + # If children processes don't stop gracefully in time, slaughter them + # by force. + _, still_alive = psutil.wait_procs(children, timeout=5) + for p in still_alive: + p.kill() + + # Wait until this process is running. + n = 0 + timeout = 10 + while proc.is_running(): + if n > timeout: + LOG.warning("Waiting for process %s to stop has been timed out" + "(timeout = %s)! Process is still running!", + parent_pid, timeout) + break + + time.sleep(1) + n += 1 diff --git a/codechecker_common/typehints.py b/codechecker_common/typehints.py new file mode 100644 index 0000000000..642d5ce0ec --- /dev/null +++ b/codechecker_common/typehints.py @@ -0,0 +1,35 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" +Type hint (`typing`) extensions. +""" +from typing import Any, Protocol, TypeVar + + +_T_contra = TypeVar("_T_contra", contravariant=True) + + +class LTComparable(Protocol[_T_contra]): + def __lt__(self, other: _T_contra, /) -> bool: ... + + +class LEComparable(Protocol[_T_contra]): + def __le__(self, other: _T_contra, /) -> bool: ... + + +class GTComparable(Protocol[_T_contra]): + def __gt__(self, other: _T_contra, /) -> bool: ... + + +class GEComparable(Protocol[_T_contra]): + def __ge__(self, other: _T_contra, /) -> bool: ... + + +class Orderable(LTComparable[Any], LEComparable[Any], + GTComparable[Any], GEComparable[Any], Protocol): + """Type hint for something that supports rich comparison operators.""" diff --git a/codechecker_common/util.py b/codechecker_common/util.py index e389b8d1a0..5827a99b10 100644 --- a/codechecker_common/util.py +++ b/codechecker_common/util.py @@ -8,15 +8,21 @@ """ Util module. """ +import datetime +import hashlib import itertools import json import os -from typing import TextIO +import pathlib +import random +from typing import TextIO, Union import portalocker from codechecker_common.logger import get_logger +from .typehints import Orderable + LOG = get_logger('system') @@ -32,7 +38,7 @@ def arg_match(options, args): return matched_args -def clamp(min_: int, value: int, max_: int) -> int: +def clamp(min_: Orderable, value: Orderable, max_: Orderable) -> Orderable: """Clamps ``value`` such that ``min_ <= value <= max_``.""" if min_ > max_: raise ValueError("min <= max required") @@ -50,7 +56,10 @@ def chunks(iterator, n): yield itertools.chain([first], rest_of_chunk) -def load_json(path: str, default=None, lock=False, display_warning=True): +def load_json(path: Union[str, pathlib.Path], + default=None, + lock=False, + display_warning=True): """ Load the contents of the given file as a JSON and return it's value, or default if the file can't be loaded. @@ -112,3 +121,32 @@ def path_for_fake_root(full_path: str, root_path: str = '/') -> str: def strtobool(value: str) -> bool: """Parse a string value to a boolean.""" return value.lower() in ('y', 'yes', 't', 'true', 'on', '1') + + +def generate_random_token(num_bytes: int = 32) -> str: + """ + Returns a random-generated string usable as a token with `num_bytes` + hexadecimal characters in the output. + """ + prefix = str(os.getpid()).encode() + suffix = str(datetime.datetime.now()).encode() + + hash_value = ''.join( + [hashlib.sha256(prefix + os.urandom(num_bytes * 2) + suffix) + .hexdigest() + for _ in range(0, -(num_bytes // -64))]) + idx = random.randrange(0, len(hash_value) - num_bytes + 1) + return hash_value[idx:(idx + num_bytes)] + + +def format_size(num: float, suffix: str = 'B') -> str: + """ + Pretty print storage units. + Source: http://stackoverflow.com/questions/1094841/ + reusable-library-to-get-human-readable-version-of-file-size + """ + for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi', 'Ri']: + if abs(num) < 1024.0: + return f"{num:3.1f} {unit}{suffix}" + num /= 1024.0 + return f"{num:.1f} Qi{suffix}" diff --git a/docs/web/server_config.md b/docs/web/server_config.md index add9bddcb7..7eb0e4f468 100644 --- a/docs/web/server_config.md +++ b/docs/web/server_config.md @@ -17,7 +17,7 @@ Table of Contents * [Size of the compilation database](#size-of-the-compilation-database) * [Authentication](#authentication) -## Number of worker processes +## Number of API worker processes The `worker_processes` section of the config file controls how many processes will be started on the server to process API requests. @@ -25,6 +25,14 @@ will be started on the server to process API requests. The server needs to be restarted if the value is changed in the config file. +### Number of task worker processes +The `background_worker_processes` section of the config file controls how many +processes will be started on the server to process background jobs. + +*Default value*: Fallback to same amount as `worker_processes`. + +The server needs to be restarted if the value is changed in the config file. + ## Run limitation The `max_run_count` section of the config file controls how many runs can be stored on the server for a product. diff --git a/docs/web/user_guide.md b/docs/web/user_guide.md index 846599b76a..1f9f364353 100644 --- a/docs/web/user_guide.md +++ b/docs/web/user_guide.md @@ -39,6 +39,7 @@ - [Manage product configuration of a server (`products`)](#manage-product-configuration-of-a-server-products) - [Query authorization settings (`permissions`)](#query-authorization-settings-permissions) - [Authenticate to the server (`login`)](#authenticate-to-the-server-login) + - [Server-side task management (`serverside-tasks`)](#server-side-task-management-serverside-tasks) - [Exporting source code suppression to suppress file](#exporting-source-code-suppression-to-suppress-file) - [Export comments and review statuses (`export`)](#export-comments-and-review-statuses-export) - [Import comments and review statuses into Codechecker (`import`)](#import-comments-and-review-statuses-into-codechecker-import) @@ -145,8 +146,9 @@ or via the `CodeChecker cmd` command-line client. ``` usage: CodeChecker server [-h] [-w WORKSPACE] [-f CONFIG_DIRECTORY] - [--host LISTEN_ADDRESS] [-v PORT] [--not-host-only] - [--skip-db-cleanup] [--config CONFIG_FILE] + [--machine-id MACHINE_ID] [--host LISTEN_ADDRESS] + [-v PORT] [--not-host-only] [--skip-db-cleanup] + [--config CONFIG_FILE] [--sqlite SQLITE_FILE | --postgresql] [--dbaddress DBADDRESS] [--dbport DBPORT] [--dbusername DBUSERNAME] [--dbname DBNAME] @@ -172,6 +174,20 @@ optional arguments: specific configuration (such as authentication settings, and TLS/SSL certificates) from. (default: /home//.codechecker) + --machine-id MACHINE_ID + A unique identifier to be used to identify the machine + running subsequent instances of the "same" server + process. This value is only used internally to + maintain normal function and bookkeeping of executed + tasks following an unclean server shutdown, e.g., + after a crash or system-level interference. If + unspecified, defaults to a reasonable default value + that is generated from the computer's hostname, as + reported by the operating system. In most scenarios, + there is no need to fine-tune this, except if + subsequent executions of the "same" server is achieved + in distinct environments, e.g., if the server + otherwise is running in a container. --host LISTEN_ADDRESS The IP address or hostname of the server on which it should listen for connections. For IPv6 listening, @@ -464,6 +480,19 @@ optional arguments: is given, the longest match will be removed. You may also use Unix shell-like wildcards (e.g. '/*/jsmith/'). + --detach Runs `store` in fire-and-forget mode: exit immediately + once the server accepted the analysis reports for + storing, without waiting for the server-side data + processing to conclude. Doing this is generally not + recommended, as the client will never be notified of + potential processing failures, and there is no easy way + to wait for the successfully stored results to become + available server-side for potential further processing + (e.g., `CodeChecker cmd diff`). However, using + '--detach' can significantly speed up large-scale + monitoring analyses where access to the results by a + tool is not a goal, such as in the case of non-gating + CI systems. --config CONFIG_FILE Allow the configuration from an explicit configuration file. The values configured in the config file will overwrite the values set in the command line. @@ -1379,6 +1408,295 @@ can be used normally. The password can be saved on the disk. If such "preconfigured" password is not found, the user will be asked, in the command-line, to provide credentials. +#### Server-side task management (`serverside-tasks`) +
+ + $ CodeChecker cmd serverside-tasks --help (click to expand) + + +``` +usage: CodeChecker cmd serverside-tasks [-h] [-t [TOKEN [TOKEN ...]]] + [--await] [--kill] + [--output {plaintext,table,json}] + [--machine-id [MACHINE_ID [MACHINE_ID ...]]] + [--type [TYPE [TYPE ...]]] + [--status [{allocated,enqueued,running,completed,failed,cancelled,dropped} [{allocated,enqueued,running,completed,failed,cancelled,dropped} ...]]] + [--username [USERNAME [USERNAME ...]] + | --no-username] + [--product [PRODUCT [PRODUCT ...]] | + --no-product] + [--enqueued-before TIMESTAMP] + [--enqueued-after TIMESTAMP] + [--started-before TIMESTAMP] + [--started-after TIMESTAMP] + [--finished-before TIMESTAMP] + [--finished-after TIMESTAMP] + [--last-seen-before TIMESTAMP] + [--last-seen-after TIMESTAMP] + [--only-cancelled | --no-cancelled] + [--only-consumed | --no-consumed] + [--url SERVER_URL] + [--verbose {info,debug_analyzer,debug}] + +Query the status of and otherwise filter information for server-side +background tasks executing on a CodeChecker server. In addition, for server +administartors, allows requesting tasks to cancel execution. + +Normally, the querying of a task's status is available only to the following +users: + - The user who caused the creation of the task. + - For tasks that are associated with a specific product, the PRODUCT_ADMIN + users of that product. + - Accounts with SUPERUSER rights (server administrators). + +optional arguments: + -h, --help show this help message and exit + -t [TOKEN [TOKEN ...]], --token [TOKEN [TOKEN ...]] + The identifying token(s) of the task(s) to query. Each + task is associated with a unique token. (default: + None) + --await Instead of querying the status and reporting that, + followed by an exit, block execution of the + 'CodeChecker cmd serverside-tasks' program until the + queried task(s) terminate(s). Makes the CLI's return + code '0' if the task(s) completed successfully, and + non-zero otherwise. If '--kill' is also specified, the + CLI will await the shutdown of the task(s), but will + return '0' if the task(s) were successfully killed as + well. (default: False) + --kill Request the co-operative and graceful termination of + the tasks matching the filter(s) specified. '--kill' + is only available to SUPERUSERs! Note, that this + action only submits a *REQUEST* of termination to the + server, and tasks are free to not support in-progress + kills. Even for tasks that support getting killed, due + to its graceful nature, it might take a considerable + time for the killing to conclude. Killing a task that + has not started RUNNING yet results in it + automatically terminating before it would start. + (default: False) + +output arguments: + --output {plaintext,table,json} + The format of the output to use when showing the + result of the request. (default: plaintext) + +task list filter arguments: + These options can be used to obtain and filter the list of tasks + associated with the 'CodeChecker server' specified by '--url', based on the + various information columns stored for tasks. + + '--token' is usable with the following filters as well. + + Filters with a variable number of options (e.g., '--machine-id A B') will be + in a Boolean OR relation with each other (meaning: machine ID is either "A" + or "B"). + Specifying multiple filters (e.g., '--machine-id A B --username John') will + be considered in a Boolean AND relation (meaning: [machine ID is either "A" or + "B"] and [the task was created by "John"]). + + Listing is only available for the following, privileged users: + - For tasks that are associated with a specific product, the PRODUCT_ADMINs + of that product. + - Server administrators (SUPERUSERs). + + Unprivileged users MUST use only the task's token to query information about + the task. + + + --machine-id [MACHINE_ID [MACHINE_ID ...]] + The IDs of the server instance executing the tasks. + This is an internal identifier set by server + administrators via the 'CodeChecker server' command. + (default: None) + --type [TYPE [TYPE ...]] + The descriptive, but still machine-readable "type" of + the tasks to filter for. (default: None) + --status [{allocated,enqueued,running,completed,failed,cancelled,dropped} [{allocated,enqueued,running,completed,failed,cancelled,dropped} ...]] + The task's execution status(es) in the pipeline. + (default: None) + --username [USERNAME [USERNAME ...]] + The user(s) who executed the action that caused the + tasks' creation. (default: None) + --no-username Filter for tasks without a responsible user that + created them. (default: False) + --product [PRODUCT [PRODUCT ...]] + Filter for tasks that execute in the context of + products specified by the given ENDPOINTs. This query + is only available if you are a PRODUCT_ADMIN of the + specified product(s). (default: None) + --no-product Filter for server-wide tasks (not associated with any + products). This query is only available to SUPERUSERs. + (default: False) + --enqueued-before TIMESTAMP + Filter for tasks that were created BEFORE (or on) the + specified TIMESTAMP, which is given in the format of + 'year:month:day' or + 'year:month:day:hour:minute:second'. If the "time" + part (':hour:minute:second') is not given, 00:00:00 + (midnight) is assumed instead. Timestamps for tasks + are always understood as Coordinated Universal Time + (UTC). (default: None) + --enqueued-after TIMESTAMP + Filter for tasks that were created AFTER (or on) the + specified TIMESTAMP, which is given in the format of + 'year:month:day' or + 'year:month:day:hour:minute:second'. If the "time" + part (':hour:minute:second') is not given, 00:00:00 + (midnight) is assumed instead. Timestamps for tasks + are always understood as Coordinated Universal Time + (UTC). (default: None) + --started-before TIMESTAMP + Filter for tasks that were started execution BEFORE + (or on) the specified TIMESTAMP, which is given in the + format of 'year:month:day' or + 'year:month:day:hour:minute:second'. If the "time" + part (':hour:minute:second') is not given, 00:00:00 + (midnight) is assumed instead. Timestamps for tasks + are always understood as Coordinated Universal Time + (UTC). (default: None) + --started-after TIMESTAMP + Filter for tasks that were started execution AFTER (or + on) the specified TIMESTAMP, which is given in the + format of 'year:month:day' or + 'year:month:day:hour:minute:second'. If the "time" + part (':hour:minute:second') is not given, 00:00:00 + (midnight) is assumed instead. Timestamps for tasks + are always understood as Coordinated Universal Time + (UTC). (default: None) + --finished-before TIMESTAMP + Filter for tasks that concluded execution BEFORE (or + on) the specified TIMESTAMP, which is given in the + format of 'year:month:day' or + 'year:month:day:hour:minute:second'. If the "time" + part (':hour:minute:second') is not given, 00:00:00 + (midnight) is assumed instead. Timestamps for tasks + are always understood as Coordinated Universal Time + (UTC). (default: None) + --finished-after TIMESTAMP + Filter for tasks that concluded execution execution + AFTER (or on) the specified TIMESTAMP, which is given + in the format of 'year:month:day' or + 'year:month:day:hour:minute:second'. If the "time" + part (':hour:minute:second') is not given, 00:00:00 + (midnight) is assumed instead. Timestamps for tasks + are always understood as Coordinated Universal Time + (UTC). (default: None) + --last-seen-before TIMESTAMP + Filter for tasks that reported actual forward progress + in its execution ("heartbeat") BEFORE (or on) the + specified TIMESTAMP, which is given in the format of + 'year:month:day' or + 'year:month:day:hour:minute:second'. If the "time" + part (':hour:minute:second') is not given, 00:00:00 + (midnight) is assumed instead. Timestamps for tasks + are always understood as Coordinated Universal Time + (UTC). (default: None) + --last-seen-after TIMESTAMP + Filter for tasks that reported actual forward progress + in its execution ("heartbeat") AFTER (or on) the + specified TIMESTAMP, which is given in the format of + 'year:month:day' or + 'year:month:day:hour:minute:second'. If the "time" + part (':hour:minute:second') is not given, 00:00:00 + (midnight) is assumed instead. Timestamps for tasks + are always understood as Coordinated Universal Time + (UTC). (default: None) + --only-cancelled Show only tasks that received a cancel request from a + SUPERUSER (see '--kill'). (default: False) + --no-cancelled Show only tasks that had not received a cancel request + from a SUPERUSER (see '--kill'). (default: False) + --only-consumed Show only tasks that concluded their execution and the + responsible user (see '--username') "downloaded" this + fact. (default: False) + --no-consumed Show only tasks that concluded their execution but the + responsible user (see '--username') did not "check" on + the task. (default: False) + +common arguments: + --url SERVER_URL The URL of the server to access, in the format of + '[http[s]://]host:port'. (default: localhost:8001) + --verbose {info,debug_analyzer,debug} + Set verbosity level. + +The return code of 'CodeChecker cmd serverside-tasks' is almost always '0', +unless there is an error. +If **EXACTLY** one '--token' is specified in the arguments without the use of +'--await' or '--kill', the return code is based on the current status of the +task, as identified by the token: + - 0: The task completed successfully. + - 1: (Reserved for operational errors.) + - 2: (Reserved for command-line errors.) + - 4: The task failed to complete due to an error during execution. + - 8: The task is still running... + - 16: The task was cancelled by the administrators, or the server was shut + down. +``` +
+ +The `serverside-tasks` subcommand allows users and administrators to query the status of (and for administrators, request the cancellation) of **server-side background tasks**. +These background tasks are created by a limited set of user actions, where the user's client not waiting for the completion of the task can be beneficial. +A task is always identified by its **token**, which is a random generated value. +This token is presented to the user when appropriate. + +##### Querying the status of a single job + +The primary purpose of `CodeChecker cmd serverside-tasks` is to query the status of a running task, with the `--token TOKEN` flag, e.g., `CodeChecker cmd serverside-tasks --token ABCDEF`. +This will return the task's details: + +``` +Task 'ABCDEF': + - Type: TaskService::DummyTask + - Summary: Dummy task for testing purposes + - Status: CANCELLED + - Enqueued at: 2024-08-19 15:55:34 + - Started at: 2024-08-19 15:55:34 + - Last seen: 2024-08-19 15:55:35 + - Completed at: 2024-08-19 15:55:35 + +Comments on task '8b62497c7d1b7e3945445f5b9c3951d97ae07e58f97cad60a0187221e7d1e2ba': +... +``` + +If `--await` is also specified, the execution of `CodeChecker cmd serverside-task` blocks the caller prompt or script until the task terminates on the server. +This is useful in situations where the side effect of a task is needed to be ready before the script may process further instructions. + +A task can have the following statuses: + + * **Allocated**: The task's token was minted, but the complete input to the task has not yet fully processed. + * **Enqueued**: The task is ready for execution, and the system is waiting for free resources to begin running the implementation. + * **Running**: The task is actively executing. + * **Completed**: The task successfully finished executing. (The side effects of the operations are available at this point.) + * **Failed**: The task's execution was started, but failed for some reason. This could be an error detected in the input, a database issue, or any other _Exception_. The "Comments" field of the task, when queried, will likely contain the details of the error. + * **Cancelled**: The task was cancelled by an administrator ([see later](#requesting-the-termination-of-a-task-only-for-SUPERUSERs)) and the task shut down to this request. + * **Dropped**: The task's execution was interrupted due to an external reason (system crash, service shutdown). + +##### Querying multiple tasks via filters + +For product and server administrators (`PRODUCT_ADMIN` and `SUPERUSER` rights), the `serverside-tasks` subcommand exposes various filter options, which can be used to create even a combination of criteria tasks must match to be returned. +Please refer to the `--help` of the subcommand for the exact list of filters available. +In this mode, the statuses of the tasks are printed in a concise table. + +```sh +$ CodeChecker cmd serverside-tasks --enqueued-after 2024:08:19 --status cancelled + +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +Token | Machine | Type | Summary | Status | Product | User | Enqueued | Started | Last seen | Completed | Cancelled? +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +8b62497c7d1b7e3945445f5b9c3951d97ae07e58f97cad60a0187221e7d1e2ba | xxxxxxxxxxxxx:8001 | TaskService::DummyTask | Dummy task for testing purposes | CANCELLED | | | 2024-08-19 15:55:34 | 2024-08-19 15:55:34 | 2024-08-19 15:55:35 | 2024-08-19 15:55:35 | Yes +6fa0097a9bd1799572c7ccd2afc0272684ed036c11145da7eaf40cc8a07c7241 | xxxxxxxxxxxxx:8001 | TaskService::DummyTask | Dummy task for testing purposes | CANCELLED | | | 2024-08-19 15:55:53 | 2024-08-19 15:55:53 | 2024-08-19 15:55:53 | 2024-08-19 15:55:53 | Yes +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +``` + +##### Requesting the termination of a task (only for `SUPERUSER`s) + +Tasks matching the query filters can be requested for termination ("killed") by specifying `--kill` in addition to the filters. +This will send a request to the server to shut the tasks down. + +**Note**, that this shutdown is not deterministic and is not immediate. +Due to technical reasons, it is up for the task's implementation to find the appropriate position to honour the shutdown request. +Depending on the task's semantics, the input, or simply circumstance, a task may completely ignore the shutdown request and decide to nevertheless complete. + ### Exporting source code suppression to suppress file diff --git a/web/api/Makefile b/web/api/Makefile index e145755563..d322ab77b0 100644 --- a/web/api/Makefile +++ b/web/api/Makefile @@ -37,10 +37,11 @@ build: clean target_dirs thrift:$(THRIFT_VERSION) \ /bin/bash -c " \ thrift $(THRIFT_OPTS) $(TARGET_PY) $(TARGET_JS) /data/authentication.thrift && \ - thrift $(THRIFT_OPTS) $(TARGET_PY) $(TARGET_JS) /data/products.thrift && \ - thrift $(THRIFT_OPTS) $(TARGET_PY) $(TARGET_JS) /data/report_server.thrift && \ - thrift $(THRIFT_OPTS) $(TARGET_PY) $(TARGET_JS) /data/configuration.thrift && \ - thrift $(THRIFT_OPTS) $(TARGET_PY) $(TARGET_JS) /data/server_info.thrift" + thrift $(THRIFT_OPTS) $(TARGET_PY) $(TARGET_JS) /data/configuration.thrift && \ + thrift $(THRIFT_OPTS) $(TARGET_PY) $(TARGET_JS) /data/products.thrift && \ + thrift $(THRIFT_OPTS) $(TARGET_PY) $(TARGET_JS) /data/report_server.thrift && \ + thrift $(THRIFT_OPTS) $(TARGET_PY) $(TARGET_JS) /data/server_info.thrift && \ + thrift $(THRIFT_OPTS) $(TARGET_PY) $(TARGET_JS) /data/tasks.thrift" # Create tarball from the API JavaScript part which will be commited in the # repository and installed as a dependency. diff --git a/web/api/codechecker_api_shared.thrift b/web/api/codechecker_api_shared.thrift index 167f8ab40b..6b7bef2f87 100644 --- a/web/api/codechecker_api_shared.thrift +++ b/web/api/codechecker_api_shared.thrift @@ -4,15 +4,45 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // ------------------------------------------------------------------------- +/** + * Helper enum for expressing a three-way boolean in a filter. + */ +enum Ternary { + BOTH = 0, // Indicates a query where both set and unset booleans are matched. + OFF = 1, // Indicates a query where the filter matches an UNSET boolean. + ON = 2, // Indicates a query where the filter matches a SET boolean. +} + enum ErrorCode { - DATABASE, - IOERROR, - GENERAL, - AUTH_DENIED, // Authentication denied. We do not allow access to the service. - UNAUTHORIZED, // Authorization denied. User does not have right to perform an action. - API_MISMATCH, // The client attempted to query an API version that is not supported by the server. - SOURCE_FILE, // The client sent a source code which contains errors (e.g.: source code comment errors). - REPORT_FORMAT, // The client sent a report with wrong format (e.g. report annotation has bad type in a .plist) + // Any other sort of error encountered during RPC execution. + GENERAL = 2, + + // Executing the request triggered a database-level fault, constraint violation. + DATABASE = 0, + + // The request is malformed or an internal I/O operation failed. + IOERROR = 1, + + // Authentication denied. We do not allow access to the service. + AUTH_DENIED = 3, + + // User does not have the necessary rights to perform an action. + UNAUTHORIZED = 4, + + // The client attempted to query an API version that is not supported by the + // server. + API_MISMATCH = 5, + + // REMOVED IN API v6.59 (CodeChecker v6.25.0)! + // Previously sent by report_server.thrift/codeCheckerDBAccess::massStoreRun() + // when the client uploaded a source file which contained errors, such as + // review status source-code-comment errors. + /* SOURCE_FILE = 6, */ // Never reuse the value of the enum constant! + + // REMOVED IN API v6.59 (CodeChecker v6.25.0)! + // Previously sent by report_server.thrift/codeCheckerDBAccess::massStoreRun() + // when the client uploaded a report with annotations that had invalid types. + /* REPORT_FORMAT = 7, */ // Never reuse the value of the enum constant! } exception RequestFailed { @@ -30,7 +60,7 @@ exception RequestFailed { * PRODUCT: These permissions are configured per-product. * The extra data field looks like the following object: * { i64 productID } -*/ + */ enum Permission { SUPERUSER = 1, // scope: SYSTEM PERMISSION_VIEW = 2, // scope: SYSTEM @@ -42,8 +72,8 @@ enum Permission { } /** -* Status information about the database backend. -*/ + * Status information about the database backend. + */ enum DBStatus { OK, // Everything is ok with the database. MISSING, // The database is missing. @@ -54,3 +84,9 @@ enum DBStatus { SCHEMA_INIT_ERROR, // Failed to create initial database schema. SCHEMA_UPGRADE_FAILED // Failed to upgrade schema. } + +/** + * Common token type identifying a background task. + * (Main implementation for task management API is in tasks.thrift.) + */ +typedef string TaskToken; diff --git a/web/api/js/codechecker-api-node/dist/codechecker-api-6.58.0.tgz b/web/api/js/codechecker-api-node/dist/codechecker-api-6.58.0.tgz deleted file mode 100644 index a8cf8ab10b..0000000000 Binary files a/web/api/js/codechecker-api-node/dist/codechecker-api-6.58.0.tgz and /dev/null differ diff --git a/web/api/js/codechecker-api-node/dist/codechecker-api-6.59.0.tgz b/web/api/js/codechecker-api-node/dist/codechecker-api-6.59.0.tgz new file mode 100644 index 0000000000..3bfe9dd373 Binary files /dev/null and b/web/api/js/codechecker-api-node/dist/codechecker-api-6.59.0.tgz differ diff --git a/web/api/js/codechecker-api-node/package.json b/web/api/js/codechecker-api-node/package.json index 0bfd792add..86e4a596e9 100644 --- a/web/api/js/codechecker-api-node/package.json +++ b/web/api/js/codechecker-api-node/package.json @@ -1,6 +1,6 @@ { "name": "codechecker-api", - "version": "6.58.0", + "version": "6.59.0", "description": "Generated node.js compatible API stubs for CodeChecker server.", "main": "lib", "homepage": "https://github.com/Ericsson/codechecker", diff --git a/web/api/py/codechecker_api/dist/codechecker_api.tar.gz b/web/api/py/codechecker_api/dist/codechecker_api.tar.gz index 3875d3ef7f..0be636d9d6 100644 Binary files a/web/api/py/codechecker_api/dist/codechecker_api.tar.gz and b/web/api/py/codechecker_api/dist/codechecker_api.tar.gz differ diff --git a/web/api/py/codechecker_api/setup.py b/web/api/py/codechecker_api/setup.py index b369453448..fc9d400def 100644 --- a/web/api/py/codechecker_api/setup.py +++ b/web/api/py/codechecker_api/setup.py @@ -8,7 +8,7 @@ with open('README.md', encoding='utf-8', errors="ignore") as f: long_description = f.read() -api_version = '6.58.0' +api_version = '6.59.0' setup( name='codechecker_api', diff --git a/web/api/py/codechecker_api_shared/dist/codechecker_api_shared.tar.gz b/web/api/py/codechecker_api_shared/dist/codechecker_api_shared.tar.gz index 4d607e2b2f..6a8bc206aa 100644 Binary files a/web/api/py/codechecker_api_shared/dist/codechecker_api_shared.tar.gz and b/web/api/py/codechecker_api_shared/dist/codechecker_api_shared.tar.gz differ diff --git a/web/api/py/codechecker_api_shared/setup.py b/web/api/py/codechecker_api_shared/setup.py index a4c2e70d02..90f09bf34e 100644 --- a/web/api/py/codechecker_api_shared/setup.py +++ b/web/api/py/codechecker_api_shared/setup.py @@ -8,7 +8,7 @@ with open('README.md', encoding='utf-8', errors="ignore") as f: long_description = f.read() -api_version = '6.58.0' +api_version = '6.59.0' setup( name='codechecker_api_shared', diff --git a/web/api/report_server.thrift b/web/api/report_server.thrift index 359372e28a..962a1c49d8 100644 --- a/web/api/report_server.thrift +++ b/web/api/report_server.thrift @@ -201,6 +201,17 @@ struct RunData { } typedef list RunDataList +struct SubmittedRunOptions { + 1: string runName, + 2: string tag, + 3: string version, // The version of CodeChecker with + // which the analysis was done. + 4: bool force, // If set, existing results in + // the run are removed first. + 5: list trimPathPrefixes, + 6: optional string description, +} + struct RunHistoryData { 1: i64 runId, // Unique id of the run. 2: string runName, // Name of the run. @@ -208,8 +219,7 @@ struct RunHistoryData { 4: string user, // User name who analysed the run. 5: string time, // Date time when the run was analysed. 6: i64 id, // Id of the run history tag. - // !!!DEPRECATED!!! This field will be empty so use the getCheckCommand() API function to get the check command for a run. - 7: string checkCommand, + 7: string checkCommand, // Check command. !!!DEPRECATED!!! This field will be empty so use the getCheckCommand API function to get the check command for a run. 8: string codeCheckerVersion, // CodeChecker client version of the latest analysis. 9: AnalyzerStatisticsData analyzerStatistics, // Statistics for analyzers. Only number of failed and successfully analyzed // files field will be set. To get full analyzer statistics please use the @@ -943,32 +953,47 @@ service codeCheckerDBAccess { //============================================ // The client can ask the server whether a file is already stored in the - // database. If it is, then it is not necessary to send it in the ZIP file - // with massStoreRun() function. This function requires a list of file hashes - // (sha256) and returns the ones which are not stored yet. + // database. + // If it is present, then it is not necessary to send the file in the ZIP + // to the massStoreRunAsynchronous() function. + // This function requires a list of file hashes (sha256) and returns the + // ones which are not stored yet. + // // PERMISSION: PRODUCT_STORE list getMissingContentHashes(1: list fileHashes) throws (1: codechecker_api_shared.RequestFailed requestError), // The client can ask the server whether a blame info is already stored in the - // database. If it is, then it is not necessary to send it in the ZIP file - // with massStoreRun() function. This function requires a list of file hashes - // (sha256) and returns the ones to which no blame info is stored yet. + // database. + // If it is, then it is not necessary to send the info in the ZIP file + // to the massStoreRunAsynchronous() function. + // This function requires a list of file hashes (sha256) and returns the + // ones to which no blame info is stored yet. + // // PERMISSION: PRODUCT_STORE list getMissingContentHashesForBlameInfo(1: list fileHashes) throws (1: codechecker_api_shared.RequestFailed requestError), // This function stores an entire run encapsulated and sent in a ZIP file. - // The ZIP file has to be compressed and sent as a base64 encoded string. The - // ZIP file must contain a "reports" and an optional "root" sub-folder. - // The former one is the output of 'CodeChecker analyze' command and the - // latter one contains the source files on absolute paths starting as if - // "root" was the "/" directory. The source files are not necessary to be - // wrapped in the ZIP file (see getMissingContentHashes() function). + // The ZIP file has to be compressed by ZLib and the compressed buffer + // sent as a Base64-encoded string. The ZIP file must contain a "reports" and + // an optional "root" sub-directory. The former one is the output of the + // 'CodeChecker analyze' command and the latter one contains the source files + // on absolute paths starting as if "root" was the "/" directory. The source + // files are not necessary to be wrapped in the ZIP file + // (see getMissingContentHashes() function). // // The "version" parameter is the used CodeChecker version which checked this // run. // The "force" parameter removes existing analysis results for a run. + // + // !DEPRECATED!: Use of this function is deprecated as the storing client + // process is prone to infinite hangs while waiting for the return value of + // the Thrift call if the network communication terminates during the time + // the server is processing the sent data, which might take a very long time. + // Appropriately modern clients are expected to use the + // massStoreRunAsynchronous() function and the Task API instead! + // // PERMISSION: PRODUCT_STORE i64 massStoreRun(1: string runName, 2: string tag, @@ -979,6 +1004,35 @@ service codeCheckerDBAccess { 7: optional string description) throws (1: codechecker_api_shared.RequestFailed requestError), + // This function stores an entire analysis run encapsulated and sent as a + // ZIP file. The ZIP file must be compressed by ZLib and sent as a + // Base64-encoded string. It must contain a "reports" and an optional "root" + // sub-directory. "reports" contains the output of the `CodeChecker analyze` + // command, while "root", if present, contains the source code of the project + // with their full paths, with the logical "root" replacing the original + // "/" directory. + // + // The source files are not necessary to be present in the ZIP, see + // getMissingContentHashes() for details. + // + // After performing an initial validation of the well-formedness of the + // submitted structure (ill-formedness is reported as an exception), the + // potentially lengthy processing of the data and the database operations are + // done asynchronously. + // + // This function returns a TaskToken, which SHOULD be used as the argument to + // the tasks::getTaskInfo() function such that clients retrieve the + // processing's state. Clients MAY decide to "detach", i.e., not to wait + // for the processing of the submitted data, and ignore the returned handle. + // Even if the client detached, the processing of the stored reports will + // likely eventually conclude. + // + // PERMISSION: PRODUCT_STORE + codechecker_api_shared.TaskToken massStoreRunAsynchronous( + 1: string zipfileBlob, // Base64-encoded string. + 2: SubmittedRunOptions storeOpts) + throws (1: codechecker_api_shared.RequestFailed requestError), + // Returns true if analysis statistics information can be sent to the server, // otherwise it returns false. // PERMISSION: PRODUCT_STORE diff --git a/web/api/tasks.thrift b/web/api/tasks.thrift new file mode 100644 index 0000000000..e380d59ac3 --- /dev/null +++ b/web/api/tasks.thrift @@ -0,0 +1,162 @@ +// ------------------------------------------------------------------------- +// Part of the CodeChecker project, under the Apache License v2.0 with +// LLVM Exceptions. See LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// ------------------------------------------------------------------------- + +include "codechecker_api_shared.thrift" + +namespace py codeCheckerServersideTasks_v6 +namespace js codeCheckerServersideTasks_v6 + +enum TaskStatus { + ALLOCATED, // Non-terminated state. Token registered but the job hasn't queued yet: the input is still processing. + ENQUEUED, // Non-terminated state. Job in the queue, and all inputs are meaningfully available. + RUNNING, // Non-terminated state. + COMPLETED, // Terminated state. Successfully ran to completion. + FAILED, // Terminated state. Job was running, but the execution failed. + CANCELLED, // Terminated state. Job was cancelled by an administrator, and the cancellation succeeded. + DROPPED, // Terminated state. Job was cancelled due to system reasons (server shutdown, crash, other interference). +} + +struct TaskInfo { + 1: codechecker_api_shared.TaskToken token, + 2: string taskKind, + 3: TaskStatus status, + // If the task is associated with a product, this ID can be used to query + // product information, see products.thirft service. + // The 'productID' is set to 0 if there is no product associated, meaning + // that the task is "global to the server". + 4: i64 productId, + 5: string actorUsername, + 6: string summary, + // Additional, human-readable comments, history, and log output from the + // tasks's processing. + 7: string comments, + 8: i64 enqueuedAtEpoch, + 9: i64 startedAtEpoch, + 10: i64 completedAtEpoch, + 11: i64 lastHeartbeatEpoch, + // Whether the administrator set this job for a co-operative cancellation. + 12: bool cancelFlagSet, +} + +/** + * TaskInfo with additional fields that is sent to administrators only. + */ +struct AdministratorTaskInfo { + 1: TaskInfo normalInfo, + 2: string machineId, // The hopefully unique identifier of the server + // that is/was processing the task. + 3: bool statusConsumed, // Whether the main actor of the task + // (see normalInfo.actorUsername) consumed the + // termination status of the job. +} + +/** + * Metastructure that holds the filters for getTasks(). + * The individual fields of the struct are in "AND" relation with each other. + * For list<> fields, elements of the list filter the same "column" of the + * task information table, and are considered in an "OR" relation. + */ +struct TaskFilter { + 1: list tokens, + 2: list machineIDs, + 3: list kinds, + 4: list statuses, + // If empty, it means "all", including those of no username. + 5: list usernames, + // If True, it means filter for **only** "no username". + // Can not be set together with a non-empty "usernames". + 6: bool filterForNoUsername, + // If empty, it means "all", including those of no product ID. + 7: list productIDs, + // If True, it means filter for **only** "no product ID". + // Can not be set together with a non-empty "productIDs". + 8: bool filterForNoProductID, + 9: i64 enqueuedBeforeEpoch, + 10: i64 enqueuedAfterEpoch, + 11: i64 startedBeforeEpoch, + 12: i64 startedAfterEpoch, + 13: i64 completedBeforeEpoch, + 14: i64 completedAfterEpoch, + 15: i64 heartbeatBeforeEpoch, + 16: i64 heartbeatAfterEpoch, + 17: codechecker_api_shared.Ternary cancelFlag, + 18: codechecker_api_shared.Ternary consumedFlag, +} + +service codeCheckerServersideTaskService { + // Retrieves the status of a task registered on the server, based on its + // identifying "token". + // + // Following this query, if the task is in any terminating states and the + // query was requested by the main actor, the status will be considered + // "consumed", and might be garbage collected by the server at a later + // point in time. + // + // If the server has authentication enabled, this query is only allowed to + // the following users: + // * The user who originally submitted the request that resulted in the + // creation of this job. + // * If the job is associated with a specific product, anyone with + // PRODUCT_ADMIN privileges for that product. + // * Users with SUPERUSER rights. + // + // PERMISSION: . + TaskInfo getTaskInfo( + 1: codechecker_api_shared.TaskToken token) + throws (1: codechecker_api_shared.RequestFailed requestError), + + // Returns privileged information about the tasks stored in the servers' + // databases, based on the given filter. + // + // This query does not set the "consumed" flag on the results, even if the + // querying user was a task's main actor. + // + // If the querying user only has PRODUCT_ADMIN rights, they are only allowed + // to query the tasks corresponding to a product they are PRODUCT_ADMIN of. + // + // PERMISSION: SUPERUSER, PRODUCT_ADMIN + list getTasks( + 1: TaskFilter filters) + throws (1: codechecker_api_shared.RequestFailed requestError), + + // Sets the specified task's "cancel" flag to TRUE, resulting in a request to + // the task's execution to co-operatively terminate itself. + // Returns whether the current RPC call was the one which set the flag. + // + // Tasks will generally terminate themselves at a safe point during their + // processing, but there are no guarantees that a specific task at any given + // point can reach such a safe point. + // There are no guarantees that a specific task is implemented in a way that + // it can ever be terminated via a "cancel" action. + // + // This method does not result in a communication via operating system + // primitives to the running server, and it is not capable of either + // completely shutting down a running server, or, conversely, to resurrect a + // hung server. + // + // Setting the "cancel" flag of an already cancelled task does nothing, and + // it is not possible to un-cancel a task. + // Setting the "cancel" flag of already terminated tasks does nothing. + // In both such cases, the RPC call will return "bool False". + // + // PERMISSION: SUPERUSER + bool cancelTask( + 1: codechecker_api_shared.TaskToken token) + throws (1: codechecker_api_shared.RequestFailed requestError), + + // Used for testing purposes only. + // This function will **ALWAYS** throw an exception when ran outside of a + // testing environment. + // + // The dummy task will increment a temporary counter in the background, with + // intermittent sleeping, up to approximately "timeout" number of seconds, + // after which point it will gracefully terminate. + // The result of the execution is unsuccessful if "shouldFail" is a true. + codechecker_api_shared.TaskToken createDummyTask( + 1: i32 timeout, + 2: bool shouldFail) + throws (1: codechecker_api_shared.RequestFailed requestError), +} diff --git a/web/client/codechecker_client/client.py b/web/client/codechecker_client/client.py index 730a83446b..570d7e28dc 100644 --- a/web/client/codechecker_client/client.py +++ b/web/client/codechecker_client/client.py @@ -23,10 +23,11 @@ from codechecker_web.shared import env from codechecker_web.shared.version import CLIENT_API -from codechecker_client.helpers.authentication import ThriftAuthHelper -from codechecker_client.helpers.product import ThriftProductHelper -from codechecker_client.helpers.results import ThriftResultsHelper from .credential_manager import UserCredentials +from .helpers.authentication import ThriftAuthHelper +from .helpers.product import ThriftProductHelper +from .helpers.results import ThriftResultsHelper +from .helpers.tasks import ThriftServersideTaskHelper from .product import split_product_url LOG = get_logger('system') @@ -65,7 +66,7 @@ def setup_auth_client(protocol, host, port, session_token=None): session token for the session. """ client = ThriftAuthHelper(protocol, host, port, - '/v' + CLIENT_API + '/Authentication', + f"/v{CLIENT_API}/Authentication", session_token) return client @@ -78,7 +79,7 @@ def login_user(protocol, host, port, username, login=False): """ session = UserCredentials() auth_client = ThriftAuthHelper(protocol, host, port, - '/v' + CLIENT_API + '/Authentication') + f"/v{CLIENT_API}/Authentication") if not login: logout_done = auth_client.destroySession() @@ -205,7 +206,7 @@ def setup_product_client(protocol, host, port, auth_client=None, # Attach to the server-wide product service. product_client = ThriftProductHelper( protocol, host, port, - '/v' + CLIENT_API + '/Products', + f"/v{CLIENT_API}/Products", session_token, lambda: get_new_token(protocol, host, port, cred_manager)) else: @@ -213,7 +214,7 @@ def setup_product_client(protocol, host, port, auth_client=None, # as "viewpoint" from which the product service is called. product_client = ThriftProductHelper( protocol, host, port, - '/' + product_name + '/v' + CLIENT_API + '/Products', + f"/{product_name}/v{CLIENT_API}/Products", session_token, lambda: get_new_token(protocol, host, port, cred_manager)) @@ -260,6 +261,29 @@ def setup_client(product_url) -> ThriftResultsHelper: return ThriftResultsHelper( protocol, host, port, - '/' + product_name + '/v' + CLIENT_API + '/CodeCheckerService', + f"/{product_name}/v{CLIENT_API}/CodeCheckerService", session_token, lambda: get_new_token(protocol, host, port, cred_manager)) + + +def setup_task_client(protocol, host, port, auth_client=None, + session_token=None): + """ + Setup the Thrift client for the server-side task management endpoint. + """ + cred_manager = UserCredentials() + session_token = cred_manager.get_token(host, port) + + if not session_token: + auth_client = setup_auth_client(protocol, host, port) + session_token = perform_auth_for_handler(auth_client, host, port, + cred_manager) + + # Attach to the server-wide task management service. + task_client = ThriftServersideTaskHelper( + protocol, host, port, + f"/v{CLIENT_API}/Tasks", + session_token, + lambda: get_new_token(protocol, host, port, cred_manager)) + + return task_client diff --git a/web/client/codechecker_client/cmd/cmd.py b/web/client/codechecker_client/cmd/cmd.py index 6be5de36b6..f68b8a148e 100644 --- a/web/client/codechecker_client/cmd/cmd.py +++ b/web/client/codechecker_client/cmd/cmd.py @@ -14,13 +14,17 @@ import argparse import getpass import datetime +import os import sys from codechecker_api.codeCheckerDBAccess_v6 import ttypes -from codechecker_client import cmd_line_client -from codechecker_client import product_client -from codechecker_client import permission_client, source_component_client, \ +from codechecker_client import \ + cmd_line_client, \ + permission_client, \ + product_client, \ + source_component_client, \ + task_client, \ token_client from codechecker_common import arg, logger, util @@ -1227,6 +1231,231 @@ def __register_permissions(parser): help="The output format to use in showing the data.") +def __register_tasks(parser): + """ + Add `argparse` subcommand `parser` options for the "handle server-side + tasks" action. + """ + if "TEST_WORKSPACE" in os.environ: + testing_args = parser.add_argument_group("testing arguments") + testing_args.add_argument("--create-dummy-task", + dest="dummy_task_args", + metavar="ARG", + default=argparse.SUPPRESS, + type=str, + nargs=2, + help=""" +Exercises the 'createDummyTask(int timeout, bool shouldFail)' API endpoint. +Used for testing purposes. +Note, that the server **MUST** be started in a testing environment as well, +otherwise, the request will be rejected by the server! +""") + + parser.add_argument("-t", "--token", + dest="token", + metavar="TOKEN", + type=str, + nargs='*', + help="The identifying token(s) of the task(s) to " + "query. Each task is associated with a unique " + "token.") + + parser.add_argument("--await", + dest="wait_and_block", + action="store_true", + help=""" +Instead of querying the status and reporting that, followed by an exit, block +execution of the 'CodeChecker cmd serverside-tasks' program until the queried +task(s) terminate(s). +Makes the CLI's return code '0' if the task(s) completed successfully, and +non-zero otherwise. +If '--kill' is also specified, the CLI will await the shutdown of the task(s), +but will return '0' if the task(s) were successfully killed as well. +""") + + parser.add_argument("--kill", + dest="cancel_task", + action="store_true", + help=""" +Request the co-operative and graceful termination of the tasks matching the +filter(s) specified. +'--kill' is only available to SUPERUSERs! +Note, that this action only submits a *REQUEST* of termination to the server, +and tasks are free to not support in-progress kills. +Even for tasks that support getting killed, due to its graceful nature, it +might take a considerable time for the killing to conclude. +Killing a task that has not started RUNNING yet results in it automatically +terminating before it would start. +""") + + output = parser.add_argument_group("output arguments") + output.add_argument("--output", + dest="output_format", + required=False, + default="plaintext", + choices=["plaintext", "table", "json"], + help="The format of the output to use when showing " + "the result of the request.") + + task_list = parser.add_argument_group( + "task list filter arguments", + """These options can be used to obtain and filter the list of tasks +associated with the 'CodeChecker server' specified by '--url', based on the +various information columns stored for tasks. + +'--token' is usable with the following filters as well. + +Filters with a variable number of options (e.g., '--machine-id A B') will be +in a Boolean OR relation with each other (meaning: machine ID is either "A" +or "B"). +Specifying multiple filters (e.g., '--machine-id A B --username John') will +be considered in a Boolean AND relation (meaning: [machine ID is either "A" or +"B"] and [the task was created by "John"]). + +Listing is only available for the following, privileged users: + - For tasks that are associated with a specific product, the PRODUCT_ADMINs + of that product. + - Server administrators (SUPERUSERs). + +Unprivileged users MUST use only the task's token to query information about +the task. + """) + + task_list.add_argument("--machine-id", + type=str, + nargs='*', + help="The IDs of the server instance executing " + "the tasks. This is an internal identifier " + "set by server administrators via the " + "'CodeChecker server' command.") + + task_list.add_argument("--type", + type=str, + nargs='*', + help="The descriptive, but still " + "machine-readable \"type\" of the tasks to " + "filter for.") + + task_list.add_argument("--status", + type=str, + nargs='*', + choices=["allocated", "enqueued", "running", + "completed", "failed", "cancelled", + "dropped"], + help="The task's execution status(es) in the " + "pipeline.") + + username = task_list.add_mutually_exclusive_group(required=False) + username.add_argument("--username", + type=str, + nargs='*', + help="The user(s) who executed the action that " + "caused the tasks' creation.") + username.add_argument("--no-username", + action="store_true", + help="Filter for tasks without a responsible user " + "that created them.") + + product = task_list.add_mutually_exclusive_group(required=False) + product.add_argument("--product", + type=str, + nargs='*', + help="Filter for tasks that execute in the context " + "of products specified by the given ENDPOINTs. " + "This query is only available if you are a " + "PRODUCT_ADMIN of the specified product(s).") + product.add_argument("--no-product", + action="store_true", + help="Filter for server-wide tasks (not associated " + "with any products). This query is only " + "available to SUPERUSERs.") + + timestamp_documentation: str = """ +TIMESTAMP, which is given in the format of 'year:month:day' or +'year:month:day:hour:minute:second'. +If the "time" part (':hour:minute:second') is not given, 00:00:00 (midnight) +is assumed instead. +Timestamps for tasks are always understood as Coordinated Universal Time (UTC). +""" + + task_list.add_argument("--enqueued-before", + type=valid_time, + metavar="TIMESTAMP", + help="Filter for tasks that were created BEFORE " + "(or on) the specified " + + timestamp_documentation) + task_list.add_argument("--enqueued-after", + type=valid_time, + metavar="TIMESTAMP", + help="Filter for tasks that were created AFTER " + "(or on) the specified " + + timestamp_documentation) + + task_list.add_argument("--started-before", + type=valid_time, + metavar="TIMESTAMP", + help="Filter for tasks that were started " + "execution BEFORE (or on) the specified " + + timestamp_documentation) + task_list.add_argument("--started-after", + type=valid_time, + metavar="TIMESTAMP", + help="Filter for tasks that were started " + "execution AFTER (or on) the specified " + + timestamp_documentation) + + task_list.add_argument("--finished-before", + type=valid_time, + metavar="TIMESTAMP", + help="Filter for tasks that concluded execution " + "BEFORE (or on) the specified " + + timestamp_documentation) + task_list.add_argument("--finished-after", + type=valid_time, + metavar="TIMESTAMP", + help="Filter for tasks that concluded execution " + "execution AFTER (or on) the specified " + + timestamp_documentation) + + task_list.add_argument("--last-seen-before", + type=valid_time, + metavar="TIMESTAMP", + help="Filter for tasks that reported actual " + "forward progress in its execution " + "(\"heartbeat\") BEFORE (or on) the " + "specified " + timestamp_documentation) + task_list.add_argument("--last-seen-after", + type=valid_time, + metavar="TIMESTAMP", + help="Filter for tasks that reported actual " + "forward progress in its execution " + "(\"heartbeat\") AFTER (or on) the " + "specified " + timestamp_documentation) + + cancel = task_list.add_mutually_exclusive_group(required=False) + cancel.add_argument("--only-cancelled", + action="store_true", + help="Show only tasks that received a cancel request " + "from a SUPERUSER (see '--kill').") + cancel.add_argument("--no-cancelled", + action="store_true", + help="Show only tasks that had not received a " + "cancel request from a SUPERUSER " + "(see '--kill').") + + consumed = task_list.add_mutually_exclusive_group(required=False) + consumed.add_argument("--only-consumed", + action="store_true", + help="Show only tasks that concluded their " + "execution and the responsible user (see " + "'--username') \"downloaded\" this fact.") + consumed.add_argument("--no-consumed", + action="store_true", + help="Show only tasks that concluded their " + "execution but the responsible user (see " + "'--username') did not \"check\" on the task.") + + def __register_token(parser): """ Add argparse subcommand parser for the "handle token" action. @@ -1538,5 +1767,41 @@ def add_arguments_to_parser(parser): permissions.set_defaults(func=permission_client.handle_permissions) __add_common_arguments(permissions, needs_product_url=False) + tasks = subcommands.add_parser( + "serverside-tasks", + formatter_class=arg.RawDescriptionDefaultHelpFormatter, + description=""" +Query the status of and otherwise filter information for server-side +background tasks executing on a CodeChecker server. In addition, for server +administartors, allows requesting tasks to cancel execution. + +Normally, the querying of a task's status is available only to the following +users: + - The user who caused the creation of the task. + - For tasks that are associated with a specific product, the PRODUCT_ADMIN + users of that product. + - Accounts with SUPERUSER rights (server administrators). +""", + help="Await, query, and cancel background tasks executing on the " + "server.", + epilog=""" +The return code of 'CodeChecker cmd serverside-tasks' is almost always '0', +unless there is an error. +If **EXACTLY** one '--token' is specified in the arguments without the use of +'--await' or '--kill', the return code is based on the current status of the +task, as identified by the token: + - 0: The task completed successfully. + - 1: (Reserved for operational errors.) + - 2: (Reserved for command-line errors.) + - 4: The task failed to complete due to an error during execution. + - 8: The task is still running... + - 16: The task was cancelled by the administrators, or the server was shut + down. +""" + ) + __register_tasks(tasks) + tasks.set_defaults(func=task_client.handle_tasks) + __add_common_arguments(tasks, needs_product_url=False) + # 'cmd' does not have a main() method in itself, as individual subcommands are # handled later on separately. diff --git a/web/client/codechecker_client/cmd/store.py b/web/client/codechecker_client/cmd/store.py index 58e7f307a9..c6f5b3ea86 100644 --- a/web/client/codechecker_client/cmd/store.py +++ b/web/client/codechecker_client/cmd/store.py @@ -31,8 +31,8 @@ from threading import Timer from typing import Dict, Iterable, List, Set, Tuple -from codechecker_api.codeCheckerDBAccess_v6.ttypes import StoreLimitKind -from codechecker_api_shared.ttypes import RequestFailed, ErrorCode +from codechecker_api.codeCheckerDBAccess_v6.ttypes import \ + StoreLimitKind, SubmittedRunOptions from codechecker_report_converter import twodim from codechecker_report_converter.report import Report, report_file, \ @@ -51,14 +51,14 @@ def assemble_blame_info(_, __) -> int: """ raise NotImplementedError() -from codechecker_client import client as libclient -from codechecker_client import product +from codechecker_client import client as libclient, product +from codechecker_client.task_client import await_task_termination from codechecker_common import arg, logger, cmd_config from codechecker_common.checker_labels import CheckerLabels from codechecker_common.compatibility.multiprocessing import Pool from codechecker_common.source_code_comment_handler import \ SourceCodeCommentHandler -from codechecker_common.util import load_json +from codechecker_common.util import format_size, load_json from codechecker_web.shared import webserver_context, host_check from codechecker_web.shared.env import get_default_workspace @@ -66,7 +66,7 @@ def assemble_blame_info(_, __) -> int: LOG = logger.get_logger('system') -MAX_UPLOAD_SIZE = 1 * 1024 * 1024 * 1024 # 1GiB +MAX_UPLOAD_SIZE = 1024 ** 3 # 1024^3 = 1 GiB. AnalyzerResultFileReports = Dict[str, List[Report]] @@ -87,7 +87,7 @@ def assemble_blame_info(_, __) -> int: """Contains information about the report file after parsing. -store_it: True if every information is availabe and the +store_it: True if every information is available and the report can be stored main_report_positions: list of ReportLineInfo containing the main report positions @@ -135,19 +135,6 @@ def _write_summary(self, out=sys.stdout): out.write("\n----=================----\n") -def sizeof_fmt(num, suffix='B'): - """ - Pretty print storage units. - Source: https://stackoverflow.com/questions/1094841/ - reusable-library-to-get-human-readable-version-of-file-size - """ - for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']: - if abs(num) < 1024.0: - return f"{num:3.1f}{unit}{suffix}" - num /= 1024.0 - return f"{num:.1f}Yi{suffix}" - - def get_file_content_hash(file_path): """ Return the file content hash for a file. @@ -270,6 +257,24 @@ def add_arguments_to_parser(parser): "match will be removed. You may also use Unix " "shell-like wildcards (e.g. '/*/jsmith/').") + parser.add_argument("--detach", + dest="detach", + default=argparse.SUPPRESS, + action="store_true", + required=False, + help=""" +Runs `store` in fire-and-forget mode: exit immediately once the server accepted +the analysis reports for storing, without waiting for the server-side data +processing to conclude. +Doing this is generally not recommended, as the client will never be notified +of potential processing failures, and there is no easy way to wait for the +successfully stored results to become available server-side for potential +further processing (e.g., `CodeChecker cmd diff`). +However, using '--detach' can significantly speed up large-scale monitoring +analyses where access to the results by a tool is not a goal, such as in the +case of non-gating CI systems. +""") + cmd_config.add_option(parser) parser.add_argument('-f', '--force', @@ -649,7 +654,7 @@ def assemble_zip(inputs, compressed_zip_size = os.stat(zip_file).st_size LOG.info("Compressing report zip file done (%s / %s).", - sizeof_fmt(zip_size), sizeof_fmt(compressed_zip_size)) + format_size(zip_size), format_size(compressed_zip_size)) # We are responsible for deleting these. shutil.rmtree(temp_dir) @@ -698,7 +703,7 @@ def get_analysis_statistics(inputs, limits): if os.stat(compilation_db).st_size > compilation_db_size: LOG.debug("Compilation database is too big (max: %s).", - sizeof_fmt(compilation_db_size)) + format_size(compilation_db_size)) else: LOG.debug("Copying file '%s' to analyzer statistics " "ZIP...", compilation_db) @@ -721,7 +726,7 @@ def get_analysis_statistics(inputs, limits): if failed_files_size > failure_zip_limit: LOG.debug("We reached the limit of maximum uploadable " "failure zip size (max: %s).", - sizeof_fmt(failure_zip_limit)) + format_size(failure_zip_limit)) break else: LOG.debug("Copying failure zip file '%s' to analyzer " @@ -732,7 +737,7 @@ def get_analysis_statistics(inputs, limits): return statistics_files if has_failed_zip else [] -def storing_analysis_statistics(client, inputs, run_name): +def store_analysis_statistics(client, inputs, run_name): """ Collects and stores analysis statistics information on the server. """ @@ -932,7 +937,7 @@ def main(args): zip_size = os.stat(zip_file).st_size if zip_size > MAX_UPLOAD_SIZE: LOG.error("The result list to upload is too big (max: %s): %s.", - sizeof_fmt(MAX_UPLOAD_SIZE), sizeof_fmt(zip_size)) + format_size(MAX_UPLOAD_SIZE), format_size(zip_size)) sys.exit(1) b64zip = "" @@ -947,77 +952,56 @@ def main(args): description = args.description if 'description' in args else None - LOG.info("Storing results to the server...") + LOG.info("Storing results to the server ...") + task_token: str = client.massStoreRunAsynchronous( + b64zip, + SubmittedRunOptions( + runName=args.name, + tag=args.tag if "tag" in args else None, + version=str(context.version), + force="force" in args, + trimPathPrefixes=trim_path_prefixes, + description=description) + ) + LOG.info("Reports submitted to the server for processing.") - try: - with _timeout_watchdog(timedelta(hours=1), - signal.SIGUSR1): - client.massStoreRun(args.name, - args.tag if 'tag' in args else None, - str(context.version), - b64zip, - 'force' in args, - trim_path_prefixes, - description) - except WatchdogError as we: - LOG.warning("%s", str(we)) - - # Showing parts of the exception stack is important here. - # We **WANT** to see that the timeout happened during a wait on - # Thrift reading from the TCP connection (something deep in the - # Python library code at "sock.recv_into"). - import traceback - _, _, tb = sys.exc_info() - frames = traceback.extract_tb(tb) - first, last = frames[0], frames[-2] - formatted_frames = traceback.format_list([first, last]) - fmt_first, fmt_last = formatted_frames[0], formatted_frames[1] - LOG.info("Timeout was triggered during:\n%s", fmt_first) - LOG.info("Timeout interrupted this low-level operation:\n%s", - fmt_last) - - LOG.error("Timeout!" - "\n\tThe server's reply did not arrive after " - "%d seconds (%s) elapsed since the server-side " - "processing began." - "\n\n\tThis does *NOT* mean that there was an issue " - "with the run you were storing!" - "\n\tThe server might still be processing the results..." - "\n\tHowever, it is more likely that the " - "server had already finished, but the client did not " - "receive a response." - "\n\tUsually, this is caused by the underlying TCP " - "connection failing to signal a low-level disconnect." - "\n\tClients potentially hanging indefinitely in these " - "scenarios is an unfortunate and known issue." - "\n\t\tSee http://github.com/Ericsson/codechecker/" - "issues/3672 for details!" - "\n\n\tThis error here is a temporary measure to ensure " - "an infinite hang is replaced with a well-explained " - "timeout." - "\n\tA more proper solution will be implemented in a " - "subsequent version of CodeChecker.", - we.timeout.total_seconds(), str(we.timeout)) - sys.exit(1) - - # Storing analysis statistics if the server allows them. if client.allowsStoringAnalysisStatistics(): - storing_analysis_statistics(client, args.input, args.name) - - LOG.info("Storage finished successfully.") - except RequestFailed as reqfail: - if reqfail.errorCode == ErrorCode.SOURCE_FILE: - header = ['File', 'Line', 'Checker name'] - table = twodim.to_str( - 'table', header, [c.split('|') for c in reqfail.extraInfo]) - LOG.warning("Setting the review statuses for some reports failed " - "because of non valid source code comments: " - "%s\n %s", reqfail.message, table) - sys.exit(1) + store_analysis_statistics(client, args.input, args.name) + + if "detach" in args: + LOG.warning("Exiting the 'store' subcommand as '--detach' was " + "specified: not waiting for the result of the store " + "operation.\n" + "The server might not have finished processing " + "everything at this point, so do NOT rely on querying " + "the results just yet!\n" + "To await the completion of the processing later, " + "you can execute:\n\n" + "\tCodeChecker cmd serverside-tasks --token %s " + "--await", + task_token) + # Print the token to stdout as well, so scripts can use "--detach" + # meaningfully. + print(task_token) + return + + task_client = libclient.setup_task_client(protocol, host, port) + task_status: str = await_task_termination(LOG, task_token, + task_api_client=task_client) + + if task_status == "COMPLETED": + LOG.info("Storing the reports finished successfully.") + else: + LOG.error("Storing the reports failed! " + "The job terminated in status '%s'. " + "The comments associated with the failure are:\n\n%s", + task_status, + task_client.getTaskInfo(task_token).comments) + sys.exit(1) except Exception as ex: import traceback traceback.print_exc() - LOG.info("Storage failed: %s", str(ex)) + LOG.error("Storing the reports failed: %s", str(ex)) sys.exit(1) finally: os.close(zip_file_handle) diff --git a/web/client/codechecker_client/helpers/results.py b/web/client/codechecker_client/helpers/results.py index c558cfe040..dd6978ee9c 100644 --- a/web/client/codechecker_client/helpers/results.py +++ b/web/client/codechecker_client/helpers/results.py @@ -9,7 +9,7 @@ Helper functions for Thrift api calls. """ -from codechecker_api.codeCheckerDBAccess_v6 import codeCheckerDBAccess +from codechecker_api.codeCheckerDBAccess_v6 import codeCheckerDBAccess, ttypes from codechecker_client.thrift_call import thrift_client_call from .base import BaseClientHelper @@ -181,6 +181,14 @@ def massStoreRun(self, name, tag, version, zipdir, force, trim_path_prefixes, description): pass + @thrift_client_call + def massStoreRunAsynchronous( + self, + zipfile_blob: str, + store_opts: ttypes.SubmittedRunOptions + ) -> str: + raise NotImplementedError("Should have called Thrift code!") + @thrift_client_call def allowsStoringAnalysisStatistics(self): pass diff --git a/web/client/codechecker_client/helpers/tasks.py b/web/client/codechecker_client/helpers/tasks.py new file mode 100644 index 0000000000..026b2665fa --- /dev/null +++ b/web/client/codechecker_client/helpers/tasks.py @@ -0,0 +1,49 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" +Helper for the "serverside tasks" Thrift API. +""" +from typing import Callable, List, Optional + +from codechecker_api.codeCheckerServersideTasks_v6 import \ + codeCheckerServersideTaskService +from codechecker_api.codeCheckerServersideTasks_v6.ttypes import \ + AdministratorTaskInfo, TaskFilter, TaskInfo + +from ..thrift_call import thrift_client_call +from .base import BaseClientHelper + + +# These names are inherited from Thrift stubs. +# pylint: disable=invalid-name +class ThriftServersideTaskHelper(BaseClientHelper): + """Clientside Thrift stub for the `codeCheckerServersideTaskService`.""" + + def __init__(self, protocol: str, host: str, port: int, uri: str, + session_token: Optional[str] = None, + get_new_token: Optional[Callable] = None): + super().__init__(protocol, host, port, uri, + session_token, get_new_token) + + self.client = codeCheckerServersideTaskService.Client(self.protocol) + + @thrift_client_call + def getTaskInfo(self, _token: str) -> TaskInfo: + raise NotImplementedError("Should have called Thrift code!") + + @thrift_client_call + def getTasks(self, _filters: TaskFilter) -> List[AdministratorTaskInfo]: + raise NotImplementedError("Should have called Thrift code!") + + @thrift_client_call + def cancelTask(self, _token: str) -> bool: + raise NotImplementedError("Should have called Thrift code!") + + @thrift_client_call + def createDummyTask(self, _timeout: int, _should_fail: bool) -> str: + raise NotImplementedError("Should have called Thrift code!") diff --git a/web/client/codechecker_client/task_client.py b/web/client/codechecker_client/task_client.py new file mode 100644 index 0000000000..d83f562491 --- /dev/null +++ b/web/client/codechecker_client/task_client.py @@ -0,0 +1,597 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" +Implementation for the ``CodeChecker cmd serverside-tasks`` subcommand. +""" +from argparse import Namespace +from copy import deepcopy +from datetime import datetime, timedelta, timezone +import json +import os +import sys +import time +from typing import Callable, Dict, List, Optional, Tuple, cast + +from codechecker_api_shared.ttypes import Ternary +from codechecker_api.ProductManagement_v6.ttypes import Product +from codechecker_api.codeCheckerServersideTasks_v6.ttypes import \ + AdministratorTaskInfo, TaskFilter, TaskInfo, TaskStatus + +from codechecker_common import logger +from codechecker_common.util import clamp +from codechecker_report_converter import twodim + +from .client import setup_product_client, setup_task_client +from .helpers.product import ThriftProductHelper +from .helpers.tasks import ThriftServersideTaskHelper +from .product import split_server_url + + +# Needs to be set in the handler functions. +LOG: Optional[logger.logging.Logger] = None + + +def init_logger(level, stream=None, logger_name="system"): + logger.setup_logger(level, stream) + + global LOG + LOG = logger.get_logger(logger_name) + + +class TaskTimeoutError(Exception): + """Indicates that `await_task_termination` timed out.""" + + def __init__(self, token: str, task_status: int, delta: timedelta): + super().__init__(f"Task '{token}' is still " + f"'{TaskStatus._VALUES_TO_NAMES[task_status]}', " + f"but did not have any progress for '{delta}' " + f"({delta.total_seconds()} seconds)!") + + +def await_task_termination( + log: logger.logging.Logger, + token: str, + probe_delta_min: timedelta = timedelta(seconds=5), + probe_delta_max: timedelta = timedelta(minutes=2), + timeout_from_last_task_progress: Optional[timedelta] = timedelta(hours=1), + max_consecutive_request_failures: Optional[int] = 10, + task_api_client: Optional[ThriftServersideTaskHelper] = None, + server_address: Optional[Tuple[str, str, str]] = None, +) -> str: + """ + Blocks the execution of the current process until the task specified by + `token` terminates. + When terminated, returns the task's `TaskStatus` as a string. + + `await_task_termination` sleeps the current process (as with + `time.sleep`), and periodically wakes up, with a distance of wake-ups + calculated between `probe_delta_min` and `probe_delta_max`, to check the + status of the task by downloading ``getTaskInfo()`` result from the + server. + + The server to use is specified by either providing a valid + `task_api_client`, at which point the connection of the existing client + will be reused; or by providing a + ``(protocol: str, host: str, port: int)`` tuple under `server_address`, + which will cause `await_task_termination` to set the Task API client up + internally. + + This call blocks the caller stack frame indefinitely, unless + `timeout_from_last_task_progress` is specified. + If so, the function will unblock by raising `TaskTimeoutError` if the + specified time has elapsed since the queried task last exhibited forward + progress. + Forward progress is calculated from the task's ``startedAt`` timestamp, + the ``completedAt`` timestamp, or the ``lastHeartbeat``, whichever is + later in time. + For tasks that had not started executing yet (their ``startedAt`` is + `None`), this timeout does not apply. + + This function is resillient against network problems and request failures + through the connection to the server, if + `max_consecutive_request_failures` is specified. + If so, it will wait the given number of Thrift client failures before + giving up. + """ + if not task_api_client and not server_address: + raise ValueError("Specify 'task_api_client' or 'server_address' " + "to point the function at a server to probe!") + if not task_api_client and server_address: + protocol, host, port = server_address + task_api_client = setup_task_client(protocol, host, port) + if not task_api_client: + raise ConnectionError("Failed to set up Task API client!") + + probe_distance: timedelta = deepcopy(probe_delta_min) + request_failures: int = 0 + last_forward_progress_by_task: Optional[datetime] = None + task_status: int = TaskStatus.ALLOCATED + + def _query_task_status(): + while True: + nonlocal request_failures + try: + ti = task_api_client.getTaskInfo(token) + request_failures = 0 + break + except SystemExit: + # getTaskInfo() is decorated by @thrift_client_call, which + # raises SystemExit by calling sys.exit() internally, if + # something fails. + request_failures += 1 + if max_consecutive_request_failures and request_failures > \ + max_consecutive_request_failures: + raise + log.info("Retrying task status query [%d / %d retries] ...", + request_failures, max_consecutive_request_failures) + + last_forward_progress_by_task: Optional[datetime] = None + epoch_to_consider: int = 0 + if ti.completedAtEpoch: + epoch_to_consider = ti.completedAtEpoch + elif ti.lastHeartbeatEpoch: + epoch_to_consider = ti.lastHeartbeatEpoch + elif ti.startedAtEpoch: + epoch_to_consider = ti.startedAtEpoch + if epoch_to_consider: + last_forward_progress_by_task = cast( + datetime, _utc_epoch_to_datetime(epoch_to_consider)) + + task_status = cast(int, ti.status) + + return last_forward_progress_by_task, task_status + + while True: + task_forward_progressed_at, task_status = _query_task_status() + if task_status in [TaskStatus.COMPLETED, TaskStatus.FAILED, + TaskStatus.CANCELLED, TaskStatus.DROPPED]: + break + + if task_forward_progressed_at: + time_since_last_progress = datetime.now(timezone.utc) \ + - task_forward_progressed_at + if timeout_from_last_task_progress and \ + time_since_last_progress >= \ + timeout_from_last_task_progress: + log.error("'%s' timeout elapsed since task last progressed " + "at '%s', considering " + "hung/locked out/lost/failed...", + timeout_from_last_task_progress, + task_forward_progressed_at) + raise TaskTimeoutError(token, task_status, + time_since_last_progress) + + if last_forward_progress_by_task: + # Tune the next probe's wait period in a fashion similar to + # TCP's low-level AIMD (addition increment, + # multiplicative decrement) algorithm. + time_between_last_two_progresses = \ + last_forward_progress_by_task - task_forward_progressed_at + if not time_between_last_two_progresses: + # No progress since the last probe, increase the timeout + # until the next probe, and hope that some progress will + # have been made by that time. + probe_distance += timedelta(seconds=1) + elif time_between_last_two_progresses <= 2 * probe_distance: + # time_between_last_two_progresses is always at least + # probe_distance, because it is the distance between two + # queried and observed forward progress measurements. + # However, if they are "close enough" to each other, it + # means that the server is progressing well with the task + # and it is likely that the task might be finished "soon". + # + # In this case, it is beneficial to INCREASE the probing + # frequency, in order not to make the user wait "too much" + # before observing a "likely" soon available success. + probe_distance /= 2 + else: + # If the progresses detected from the server are + # "far apart", it can indicate that the server is busy + # with processing the task. + # + # In this case, DECREASING the frequency if beneficial, + # because it is "likely" that a final result will not + # arrive soon, and keeping the current frequency would + # just keep "flooding" the server with queries that do + # not return a meaningfully different result. + probe_distance += timedelta(seconds=1) + else: + # If the forward progress has not been observed yet at all, + # increase the timeout until the next probe, and hope that + # some progress will have been made by that time. + probe_distance += timedelta(seconds=1) + + # At any rate, always keep the probe_distance between the + # requested limits. + probe_distance = \ + clamp(probe_delta_min, probe_distance, probe_delta_max) + + last_forward_progress_by_task = task_forward_progressed_at + + log.debug("Waiting %f seconds (%s) before querying the server...", + probe_distance.total_seconds(), probe_distance) + time.sleep(probe_distance.total_seconds()) + + return TaskStatus._VALUES_TO_NAMES[task_status] + + +def _datetime_to_utc_epoch(d: Optional[datetime]) -> Optional[int]: + return int(d.replace(tzinfo=timezone.utc).timestamp()) if d else None + + +def _utc_epoch_to_datetime(s: Optional[int]) -> Optional[datetime]: + return datetime.fromtimestamp(s, timezone.utc) if s else None + + +def _datetime_to_str(d: Optional[datetime]) -> Optional[str]: + return d.strftime("%Y-%m-%d %H:%M:%S") if d else None + + +def _build_filter(args: Namespace, + product_id_to_endpoint: Dict[int, str], + get_product_api: Callable[[], ThriftProductHelper]) \ + -> Optional[TaskFilter]: + """Build a `TaskFilter` from the command-line `args`.""" + filter_: Optional[TaskFilter] = None + + def get_filter() -> TaskFilter: + nonlocal filter_ + if not filter_: + filter_ = TaskFilter() + return filter_ + + if args.machine_id: + get_filter().machineIDs = args.machine_id + if args.type: + get_filter().kinds = args.type + if args.status: + get_filter().statuses = [TaskStatus._NAMES_TO_VALUES[s.upper()] + for s in args.status] + if args.username: + get_filter().usernames = args.username + elif args.no_username: + get_filter().filterForNoUsername = True + if args.product: + # Users specify products via ENDPOINTs for U.X. friendliness, but the + # API works with product IDs. + def _get_product_id_or_log(endpoint: str) -> Optional[int]: + try: + products: List[Product] = cast( + List[Product], + get_product_api().getProducts(endpoint, None)) + # Endpoints substring-match. + product = next(p for p in products if p.endpoint == endpoint) + p_id = cast(int, product.id) + product_id_to_endpoint[p_id] = endpoint + return p_id + except StopIteration: + LOG.warning("No product with endpoint '%s', omitting it from " + "the query.", + endpoint) + return None + + get_filter().productIDs = list(filter(lambda i: i is not None, + map(_get_product_id_or_log, + args.product))) + elif args.no_product: + get_filter().filterForNoProductID = True + if args.enqueued_before: + get_filter().enqueuedBeforeEpoch = _datetime_to_utc_epoch( + args.enqueued_before) + if args.enqueued_after: + get_filter().enqueuedAfterEpoch = _datetime_to_utc_epoch( + args.enqueued_after) + if args.started_before: + get_filter().startedBeforeEpoch = _datetime_to_utc_epoch( + args.started_before) + if args.started_after: + get_filter().startedAfterEpoch = _datetime_to_utc_epoch( + args.started_after) + if args.finished_before: + get_filter().completedBeforeEpoch = _datetime_to_utc_epoch( + args.finished_before) + if args.finished_after: + get_filter().completedAfterEpoch = _datetime_to_utc_epoch( + args.finished_after) + if args.last_seen_before: + get_filter().heartbeatBeforeEpoch = _datetime_to_utc_epoch( + args.started_before) + if args.last_seen_after: + get_filter().heartbeatAfterEpoch = _datetime_to_utc_epoch( + args.started_after) + if args.only_cancelled: + get_filter().cancelFlag = Ternary._NAMES_TO_VALUES["ON"] + elif args.no_cancelled: + get_filter().cancelFlag = Ternary._NAMES_TO_VALUES["OFF"] + if args.only_consumed: + get_filter().consumedFlag = Ternary._NAMES_TO_VALUES["ON"] + elif args.no_consumed: + get_filter().consumedFlag = Ternary._NAMES_TO_VALUES["OFF"] + + return filter_ + + +def _unapi_info(ti: TaskInfo) -> dict: + """ + Converts a `TaskInfo` API structure into a flat Pythonic `dict` of + non-API types. + """ + return {**{k: v + for k, v in ti.__dict__.items() + if k != "status" and not k.endswith("Epoch")}, + **{k.replace("Epoch", "", 1): + _datetime_to_str(_utc_epoch_to_datetime(v)) + for k, v in ti.__dict__.items() + if k.endswith("Epoch")}, + **{"status": TaskStatus._VALUES_TO_NAMES[cast(int, ti.status)]}, + } + + +def _unapi_admin_info(ati: AdministratorTaskInfo) -> dict: + """ + Converts a `AdministratorTaskInfo` API structure into a flat Pythonic + `dict` of non-API types. + """ + return {**{k: v + for k, v in ati.__dict__.items() + if k != "normalInfo"}, + **_unapi_info(cast(TaskInfo, ati.normalInfo)), + } + + +def _transform_product_ids_to_endpoints( + task_infos: List[dict], + product_id_to_endpoint: Dict[int, str], + get_product_api: Callable[[], ThriftProductHelper] +): + """Replace ``task_infos[N]["productId"]`` with + ``task_infos[N]["productEndpoint"]`` for all elements. + """ + for ti in task_infos: + try: + ti["productEndpoint"] = \ + product_id_to_endpoint[ti["productId"]] \ + if ti["productId"] != 0 else None + except KeyError: + # Take the slow path, and get the ID->Endpoint map from the server. + product_id_to_endpoint = { + product.id: product.endpoint + for product + in get_product_api().getProducts(None, None)} + ti["productEndpoint"] = product_id_to_endpoint[ti["productId"]] + del ti["productId"] + + +def handle_tasks(args: Namespace) -> int: + """Main method for the ``CodeChecker cmd serverside-tasks`` subcommand.""" + # If the given output format is not `table`, redirect the logger's output + # to standard error. + init_logger(args.verbose if "verbose" in args else None, + "stderr" if "output_format" in args + and args.output_format != "table" + else None) + + rc: int = 0 + protocol, host, port = split_server_url(args.server_url) + api = setup_task_client(protocol, host, port) + + if "TEST_WORKSPACE" in os.environ and "dummy_task_args" in args: + timeout, should_fail = \ + int(args.dummy_task_args[0]), \ + args.dummy_task_args[1].lower() in ["y", "yes", "true", "1", "on"] + + dummy_task_token = api.createDummyTask(timeout, should_fail) + LOG.info("Dummy task created with token '%s'.", dummy_task_token) + if not args.token: + args.token = [dummy_task_token] + else: + args.token.append(dummy_task_token) + + # Lazily initialise a Product manager API client as well, it can be needed + # if products are being put into a request filter, or product-specific + # tasks appear on the output. + product_api: Optional[ThriftProductHelper] = None + product_id_to_endpoint: Dict[int, str] = {} + + def get_product_api() -> ThriftProductHelper: + nonlocal product_api + if not product_api: + product_api = setup_product_client(protocol, host, port) + return product_api + + tokens_of_tasks: List[str] = [] + task_filter = _build_filter(args, + product_id_to_endpoint, + get_product_api) + if task_filter: + # If the "filtering" API must be used, the args.token list should also + # be part of the filter. + task_filter.tokens = args.token + + admin_task_infos: List[AdministratorTaskInfo] = \ + api.getTasks(task_filter) + + # Save the tokens of matched tasks for later, in case we have to do + # some further processing. + if args.cancel_task or args.wait_and_block: + tokens_of_tasks = [cast(str, ti.normalInfo.token) + for ti in admin_task_infos] + + task_info_for_print = list(map(_unapi_admin_info, + admin_task_infos)) + _transform_product_ids_to_endpoints(task_info_for_print, + product_id_to_endpoint, + get_product_api) + + if args.output_format == "json": + print(json.dumps(task_info_for_print)) + else: + if args.output_format == "plaintext": + # For the listing of the tasks, the "table" format is more + # appropriate, so we intelligently switch over to that. + args.output_format = "table" + + headers = ["Token", "Machine", "Type", "Summary", "Status", + "Product", "User", "Enqueued", "Started", "Last seen", + "Completed", "Cancelled?"] + rows = [] + for ti in task_info_for_print: + rows.append((ti["token"], + ti["machineId"], + ti["taskKind"], + ti["summary"], + ti["status"], + ti.get("productEndpoint", ""), + ti["actorUsername"] or "", + ti["enqueuedAt"] or "", + ti["startedAt"] or "", + ti["lastHeartbeat"] or "", + ti["completedAt"] or "", + "Yes" if ti["cancelFlagSet"] else "", + )) + print(twodim.to_str(args.output_format, headers, rows)) + else: + # If the filtering API was not used, we need to query the tasks + # directly, based on their token. + if not args.token: + LOG.error("ERROR! To use 'CodeChecker cmd serverside-tasks', " + "a '--token' list or some other filter criteria " + "**MUST** be specified!") + sys.exit(2) # Simulate argparse error code. + + # Otherwise, query the tasks, and print their info. + task_infos: List[TaskInfo] = [api.getTaskInfo(token) + for token in args.token] + if not task_infos: + LOG.error("No tasks retrieved for the specified tokens!") + return 1 + + if args.wait_and_block or args.cancel_task: + # If we need to do something with the tasks later, save the tokens. + tokens_of_tasks = args.token + + task_info_for_print = list(map(_unapi_info, task_infos)) + _transform_product_ids_to_endpoints(task_info_for_print, + product_id_to_endpoint, + get_product_api) + + if len(task_infos) == 1: + # If there was exactly one task in the query, the return code + # of the program should be based on the status of the task. + ti = task_info_for_print[0] + if ti["status"] == "COMPLETED": + rc = 0 + elif ti["status"] == "FAILED": + rc = 4 + elif ti["status"] in ["ALLOCATED", "ENQUEUED", "RUNNING"]: + rc = 8 + elif ti["status"] in ["CANCELLED", "DROPPED"]: + rc = 16 + else: + raise ValueError(f"Unknown task status '{ti['status']}'!") + + if args.output_format == "json": + print(json.dumps(task_info_for_print)) + else: + if len(task_infos) > 1 or args.output_format != "plaintext": + if args.output_format == "plaintext": + # For the listing of the tasks, if there are multiple, the + # "table" format is more appropriate, so we intelligently + # switch over to that. + args.output_format = "table" + + headers = ["Token", "Type", "Summary", "Status", "Product", + "User", "Enqueued", "Started", "Last seen", + "Completed", "Cancelled by administrators?"] + rows = [] + for ti in task_info_for_print: + rows.append((ti["token"], + ti["taskKind"], + ti["summary"], + ti["status"], + ti["productEndpoint"] or "", + ti["actorUsername"] or "", + ti["enqueuedAt"] or "", + ti["startedAt"] or "", + ti["lastHeartbeat"] or "", + ti["completedAt"] or "", + "Yes" if ti["cancelFlagSet"] else "", + )) + + print(twodim.to_str(args.output_format, headers, rows)) + else: + # Otherwise, for exactly ONE task, in "plaintext" mode, print + # the details for humans to read. + ti = task_info_for_print[0] + product_line = \ + f" - Product: {ti['productEndpoint']}\n" \ + if "productEndpoint" in ti else "" + user_line = f" - User: {ti['actorUsername']}\n" \ + if ti["actorUsername"] else "" + cancel_line = " - Cancelled by administrators!\n" \ + if ti["cancelFlagSet"] else "" + print(f"Task '{ti['token']}':\n" + f" - Type: {ti['taskKind']}\n" + f" - Summary: {ti['summary']}\n" + f" - Status: {ti['status']}\n" + f"{product_line}" + f"{user_line}" + f" - Enqueued at: {ti['enqueuedAt'] or ''}\n" + f" - Started at: {ti['startedAt'] or ''}\n" + f" - Last seen: {ti['lastHeartbeat'] or ''}\n" + f" - Completed at: {ti['completedAt'] or ''}\n" + f"{cancel_line}" + ) + if ti["comments"]: + print(f"Comments on task '{ti['token']}':\n") + for line in ti["comments"].split("\n"): + if not line or line == "----------": + # Empty or separator lines. + print(line) + elif " at " in line and line.endswith(":"): + # Lines with the "header" for who made the comment + # and when. + print(line) + else: + print(f"> {line}") + + if args.cancel_task: + for token in tokens_of_tasks: + this_call_cancelled = api.cancelTask(token) + if this_call_cancelled: + LOG.info("Submitted cancellation request for task '%s'.", + token) + else: + LOG.debug("Task '%s' had already been cancelled.", token) + + if args.wait_and_block: + rc = 0 + for token in tokens_of_tasks: + LOG.info("Awaiting the completion of task '%s' ...", token) + status: str = await_task_termination( + cast(logger.logging.Logger, LOG), + token, + task_api_client=api) + if status != "COMPLETED": + if args.cancel_task: + # If '--kill' was specified, keep the return code 0 + # if the task was successfully cancelled as well. + if status != "CANCELLED": + LOG.error("Task '%s' error status: %s!", + token, status) + rc = 1 + else: + LOG.info("Task '%s' terminated in status: %s.", + token, status) + else: + LOG.error("Task '%s' error status: %s!", token, status) + rc = 1 + else: + LOG.info("Task '%s' terminated in status: %s.", token, status) + + return rc diff --git a/web/client/codechecker_client/thrift_call.py b/web/client/codechecker_client/thrift_call.py index 32e5b3dc18..d41f7d7187 100644 --- a/web/client/codechecker_client/thrift_call.py +++ b/web/client/codechecker_client/thrift_call.py @@ -81,7 +81,11 @@ def wrapper(self, *args, **kwargs): LOG.error( 'Client/server API mismatch\n %s', str(reqfailure.message)) else: - LOG.error('API call error: %s\n%s', func_name, str(reqfailure)) + LOG.error("Error during API call: %s", func_name) + LOG.debug("%s", str(reqfailure)) + LOG.error("%s", str(reqfailure.message)) + if reqfailure.extraInfo: + LOG.error("%s", '\n'.join(reqfailure.extraInfo)) sys.exit(1) except TApplicationException as ex: LOG.error("Internal server error: %s", str(ex.message)) diff --git a/web/codechecker_web/shared/version.py b/web/codechecker_web/shared/version.py index e5d544a750..2ac2d84ae7 100644 --- a/web/codechecker_web/shared/version.py +++ b/web/codechecker_web/shared/version.py @@ -18,7 +18,7 @@ # The newest supported minor version (value) for each supported major version # (key) in this particular build. SUPPORTED_VERSIONS = { - 6: 58 + 6: 59 } # Used by the client to automatically identify the latest major and minor diff --git a/web/server/codechecker_server/api/authentication.py b/web/server/codechecker_server/api/authentication.py index 1430ad9fd6..9e73923e45 100644 --- a/web/server/codechecker_server/api/authentication.py +++ b/web/server/codechecker_server/api/authentication.py @@ -19,6 +19,7 @@ AuthorisationList, HandshakeInformation, Permissions, SessionTokenData from codechecker_common.logger import get_logger +from codechecker_common.util import generate_random_token from codechecker_server.profiler import timeit @@ -28,7 +29,6 @@ from ..permissions import handler_from_scope_params as make_handler, \ require_manager, require_permission from ..server import permissions -from ..session_manager import generate_session_token LOG = get_logger('server') @@ -363,7 +363,7 @@ def newToken(self, description): """ self.__require_privilaged_access() with DBSession(self.__config_db) as session: - token = generate_session_token() + token = generate_random_token(32) user = self.getLoggedInUser() groups = ';'.join(self.__auth_session.groups) session_token = Session(token, user, groups, description, False) diff --git a/web/server/codechecker_server/api/common.py b/web/server/codechecker_server/api/common.py new file mode 100644 index 0000000000..2fc699a24f --- /dev/null +++ b/web/server/codechecker_server/api/common.py @@ -0,0 +1,49 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +import sqlalchemy + +from codechecker_api_shared.ttypes import RequestFailed, ErrorCode + +from codechecker_common.logger import get_logger + + +LOG = get_logger("server") + + +def exc_to_thrift_reqfail(function): + """ + Convert internal exceptions to a `RequestFailed` Thrift exception, which + can be sent back to the RPC client. + """ + func_name = function.__name__ + + def wrapper(*args, **kwargs): + try: + res = function(*args, **kwargs) + return res + except sqlalchemy.exc.SQLAlchemyError as alchemy_ex: + # Convert SQLAlchemy exceptions. + msg = str(alchemy_ex) + import traceback + traceback.print_exc() + + # pylint: disable=raise-missing-from + raise RequestFailed(ErrorCode.DATABASE, msg) + except RequestFailed as rf: + LOG.warning("%s:\n%s", func_name, rf.message) + raise + except Exception as ex: + import traceback + traceback.print_exc() + msg = str(ex) + LOG.warning("%s:\n%s", func_name, msg) + + # pylint: disable=raise-missing-from + raise RequestFailed(ErrorCode.GENERAL, msg) + + return wrapper diff --git a/web/server/codechecker_server/api/mass_store_run.py b/web/server/codechecker_server/api/mass_store_run.py index 87ab4e2a52..e1d2870285 100644 --- a/web/server/codechecker_server/api/mass_store_run.py +++ b/web/server/codechecker_server/api/mass_store_run.py @@ -5,31 +5,37 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # # ------------------------------------------------------------------------- +""" +Implementation of the ``massStoreRunAsynchronous()`` API function that store +run data to a product's report database. +Called via `report_server`, but factored out here for readability. +""" import base64 +from collections import defaultdict +from datetime import datetime, timedelta +from hashlib import sha256 import json import os +from pathlib import Path import sqlalchemy import tempfile import time +from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union, \ + cast import zipfile import zlib -from collections import defaultdict -from datetime import datetime, timedelta -from hashlib import sha256 -from tempfile import TemporaryDirectory -from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast - -import codechecker_api_shared +from codechecker_api_shared.ttypes import DBStatus, ErrorCode, RequestFailed from codechecker_api.codeCheckerDBAccess_v6 import ttypes from codechecker_common import skiplist_handler from codechecker_common.logger import get_logger from codechecker_common.review_status_handler import ReviewStatusHandler, \ SourceReviewStatus -from codechecker_common.util import load_json, path_for_fake_root +from codechecker_common.util import format_size, load_json, path_for_fake_root +from codechecker_report_converter import twodim from codechecker_report_converter.util import trim_path_prefixes from codechecker_report_converter.report import \ FakeChecker, Report, UnknownChecker, report_file @@ -45,10 +51,12 @@ ExtendedReportData, \ File, FileContent, \ Report as DBReport, ReportAnnotations, ReviewStatus as ReviewStatusRule, \ - Run, RunLock, RunHistory + Run, RunLock as DBRunLock, RunHistory from ..metadata import checker_is_unavailable, MetadataInfoParser - -from .report_server import ThriftRequestHandler +from ..product import Product as ServerProduct +from ..session_manager import SessionManager +from ..task_executors.abstract_task import AbstractTask, TaskCancelHonoured +from ..task_executors.task_manager import TaskManager from .thrift_enum_helper import report_extended_data_type_str @@ -56,32 +64,37 @@ STORE_TIME_LOG = get_logger('store_time') -class LogTask: +class StepLog: + """ + Simple context manager that logs an arbitrary step's comment and time + taken annotated with a run name. + """ + def __init__(self, run_name: str, message: str): - self.__run_name = run_name - self.__msg = message - self.__start_time = time.time() + self._run_name = run_name + self._msg = message + self._start_time = time.time() - def __enter__(self, *args): - LOG.info("[%s] %s...", self.__run_name, self.__msg) + def __enter__(self, *_args): + LOG.info("[%s] %s...", self._run_name, self._msg) - def __exit__(self, *args): - LOG.info("[%s] %s. Done. (Duration: %s sec)", self.__run_name, - self.__msg, round(time.time() - self.__start_time, 2)) + def __exit__(self, *_args): + LOG.info("[%s] %s. Done. (Duration: %.2f sec)", + self._run_name, self._msg, time.time() - self._start_time) -class RunLocking: +class RunLock: def __init__(self, session: DBSession, run_name: str): self.__session = session self.__run_name = run_name self.__run_lock = None - def __enter__(self, *args): + def __enter__(self, *_args): # Load the lock record for "FOR UPDATE" so that the transaction that # handles the run's store operations has a lock on the database row # itself. - self.__run_lock = self.__session.query(RunLock) \ - .filter(RunLock.name == self.__run_name) \ + self.__run_lock = self.__session.query(DBRunLock) \ + .filter(DBRunLock.name == self.__run_name) \ .with_for_update(nowait=True) \ .one() @@ -98,39 +111,161 @@ def __enter__(self, *args): self.__run_name, self.__run_lock.locked_at) return self - def __exit__(self, *args): + def __exit__(self, *_args): self.__run_lock = None self.__session = None + def store_run_lock_in_db(self, associated_user: str): + """ + Stores a `DBRunLock` record for the given run name into the database. + """ + try: + # If the run can be stored, we need to lock it first. If there is + # already a lock in the database for the given run name which is + # expired and multiple processes are trying to get this entry from + # the database for update we may get the following exception: + # could not obtain lock on row in relation "run_locks" + # This is the reason why we have to wrap this query to a try/except + # block. + run_lock: Optional[DBRunLock] = self.__session.query(DBRunLock) \ + .filter(DBRunLock.name == self.__run_name) \ + .with_for_update(nowait=True) \ + .one_or_none() + except (sqlalchemy.exc.OperationalError, + sqlalchemy.exc.ProgrammingError) as ex: + LOG.error("Failed to get run lock for '%s': %s", + self.__run_name, ex) + raise RequestFailed( + ErrorCode.DATABASE, + "Someone is already storing to the same run. Please wait " + "while the other storage is finished and try it again.") \ + from ex + + if not run_lock: + # If there is no lock record for the given run name, the run + # is not locked -> create a new lock. + self.__session.add(DBRunLock(self.__run_name, associated_user)) + LOG.debug("Acquiring 'run_lock' for '%s' on run '%s' ...", + associated_user, self.__run_name) + elif run_lock.has_expired( + db_cleanup.RUN_LOCK_TIMEOUT_IN_DATABASE): + # There can be a lock in the database, which has already + # expired. In this case, we assume that the previous operation + # has failed, and thus, we can re-use the already present lock. + run_lock.touch() + run_lock.username = associated_user + LOG.debug("Reusing existing, stale 'run_lock' record on " + "run '%s' ...", + self.__run_name) + else: + # In case the lock exists and it has not expired, we must + # consider the run a locked one. + when = run_lock.when_expires( + db_cleanup.RUN_LOCK_TIMEOUT_IN_DATABASE) + username = run_lock.username or "another user" + LOG.info("Refusing to store into run '%s' as it is locked by " + "%s. Lock will expire at '%s'.", + self.__run_name, username, when) + raise RequestFailed( + ErrorCode.DATABASE, + f"The run named '{self.__run_name}' is being stored into by " + f"{username}. If the other store operation has failed, this " + f"lock will expire at '{when}'.") + + # At any rate, if the lock has been created or updated, commit it + # into the database. + try: + self.__session.commit() + except (sqlalchemy.exc.IntegrityError, + sqlalchemy.orm.exc.StaleDataError) as ex: + # The commit of this lock can fail. + # + # In case two store ops attempt to lock the same run name at the + # same time, committing the lock in the transaction that commits + # later will result in an IntegrityError due to the primary key + # constraint. + # + # In case two store ops attempt to lock the same run name with + # reuse and one of the operation hangs long enough before COMMIT + # so that the other operation commits and thus removes the lock + # record, StaleDataError is raised. In this case, also consider + # the run locked, as the data changed while the transaction was + # waiting, as another run wholly completed. + + LOG.info("Run '%s' got locked while current transaction " + "tried to acquire a lock. Considering run as locked.", + self.__run_name) + raise RequestFailed( + ErrorCode.DATABASE, + f"The run named '{self.__run_name}' is being stored into by " + "another user.") from ex + + LOG.debug("Successfully acquired 'run_lock' for '%s' on run '%s'.", + associated_user, self.__run_name) + + def drop_run_lock_from_db(self): + """Remove the run_lock row from the database for the current run.""" + # Using with_for_update() here so the database (in case it supports + # this operation) locks the lock record's row from any other access. + LOG.debug("Releasing 'run_lock' from run '%s' ...") + run_lock: Optional[DBRunLock] = self.__session.query(DBRunLock) \ + .filter(DBRunLock.name == self.__run_name) \ + .with_for_update(nowait=True).one() + if not run_lock: + raise KeyError( + f"No 'run_lock' in database for run '{self.__run_name}'") + locked_at = run_lock.locked_at + username = run_lock.username + + self.__session.delete(run_lock) + self.__session.commit() -def unzip(b64zip: str, output_dir: str) -> int: + LOG.debug("Released 'run_lock' (originally acquired by '%s' on '%s') " + "from run '%s'.", + username, str(locked_at), self.__run_name) + + +def unzip(run_name: str, b64zip: str, output_dir: Path) -> int: """ - This function unzips the base64 encoded zip file. This zip is extracted - to a temporary directory and the ZIP is then deleted. The function returns - the size of the extracted decompressed zip file. + This function unzips a Base64 encoded and ZLib-compressed ZIP file. + This ZIP is extracted to a temporary directory and the ZIP is then deleted. + The function returns the size of the extracted decompressed ZIP file. """ - if len(b64zip) == 0: + if not b64zip: return 0 - with tempfile.NamedTemporaryFile(suffix='.zip') as zip_file: - LOG.debug("Unzipping mass storage ZIP '%s' to '%s'...", - zip_file.name, output_dir) - + with tempfile.NamedTemporaryFile( + suffix=".zip", dir=output_dir) as zip_file: + LOG.debug("Decompressing input massStoreRun() ZIP to '%s' ...", + zip_file.name) + start_time = time.time() zip_file.write(zlib.decompress(base64.b64decode(b64zip))) - with zipfile.ZipFile(zip_file, 'r', allowZip64=True) as zipf: + zip_file.flush() + end_time = time.time() + + size = os.stat(zip_file.name).st_size + LOG.debug("Decompressed input massStoreRun() ZIP '%s' -> '%s' " + "(compression ratio: %.2f%%) in '%s'.", + format_size(len(b64zip)), format_size(size), + (size / len(b64zip)), + timedelta(seconds=end_time - start_time)) + + with StepLog(run_name, "Extract massStoreRun() ZIP contents"), \ + zipfile.ZipFile(zip_file, 'r', allowZip64=True) as zip_handle: + LOG.debug("Extracting massStoreRun() ZIP '%s' to '%s' ...", + zip_file.name, output_dir) try: - zipf.extractall(output_dir) - return os.stat(zip_file.name).st_size + zip_handle.extractall(output_dir) + return size except Exception: LOG.error("Failed to extract received ZIP.") import traceback traceback.print_exc() raise - return 0 def get_file_content(file_path: str) -> bytes: - """Return the file content for the given filepath. """ + """Return the file content for the given `file_path`.""" with open(file_path, 'rb') as f: return f.read() @@ -202,7 +337,7 @@ def add_file_record( def get_blame_file_data( - blame_file: str + blame_file: Path ) -> Tuple[Optional[str], Optional[str], Optional[str]]: """ Get blame information from the given file. @@ -214,7 +349,7 @@ def get_blame_file_data( remote_url = None tracking_branch = None - if os.path.isfile(blame_file): + if blame_file.is_file(): data = load_json(blame_file) if data: remote_url = data.get("remote_url") @@ -234,210 +369,320 @@ def checker_name_for_report(report: Report) -> Tuple[str, str]: report.checker_name or UnknownChecker[1]) -class MassStoreRun: - def __init__( - self, - report_server: ThriftRequestHandler, - name: str, - tag: Optional[str], - version: Optional[str], - b64zip: str, - force: bool, - trim_path_prefix_list: Optional[List[str]], - description: Optional[str] - ): - """ Initialize object. """ - self.__report_server = report_server +class MassStoreRunInputHandler: + """Prepares a `MassStoreRunTask` from an API input.""" + + # Note: The implementation of this class is executed in the "foreground", + # in the context of an API handler process! + # **DO NOT** put complex logic here that would take too much time to + # validate. + # Long-running actions of a storage process should be in + # MassStoreRunImplementation instead! + + def __init__(self, + session_manager: SessionManager, + config_db_sessionmaker, + product_db_sessionmaker, + task_manager: TaskManager, + package_context, + product_id: int, + run_name: str, + run_description: Optional[str], + store_tag: Optional[str], + client_version: str, + force_overwrite_of_run: bool, + path_prefixes_to_trim: Optional[List[str]], + zipfile_contents_base64: str, + user_name: str): + self._input_handling_start_time = time.time() + self._session_manager = session_manager + self._config_db = config_db_sessionmaker + self._product_db = product_db_sessionmaker + self._tm = task_manager + self._package_context = package_context + self._input_zip_blob = zipfile_contents_base64 + self.client_version = client_version + self.force_overwrite_of_run = force_overwrite_of_run + self.path_prefixes_to_trim = path_prefixes_to_trim + self.run_name = run_name + self.run_description = run_description + self.store_tag = store_tag + self.user_name = user_name + + with DBSession(self._config_db) as session: + product: Optional[Product] = session.query(Product) \ + .get(product_id) + if not product: + raise KeyError(f"No product with ID '{product_id}'") + + self._product = product + + def check_store_input_validity_at_face_value(self): + """ + Performs semantic checks of a ``massStoreRunAsynchronous()`` Thrift + call that can be done with trivial amounts of work (i.e., without + actually parsing the full input ZIP). + """ + self._check_run_limit() + self._store_run_lock() # Fails if the run can not be stored into. - self.__name = name - self.__tag = tag - self.__version = version - self.__b64zip = b64zip - self.__force = force - self.__trim_path_prefixes = trim_path_prefix_list - self.__description = description + def create_mass_store_task(self, + is_actually_asynchronous=False) \ + -> "MassStoreRunTask": + """ + Constructs the `MassStoreRunTask` for the handled and verified input. - self.__mips: Dict[str, MetadataInfoParser] = {} - self.__analysis_info: Dict[str, AnalysisInfo] = {} - self.__checker_row_cache: Dict[Tuple[str, str], Checker] = {} - self.__duration: int = 0 - self.__report_count: int = 0 - self.__report_limit: int = 0 - self.__wrong_src_code_comments: List[str] = [] - self.__already_added_report_hashes: Set[str] = set() - self.__new_report_hashes: Dict[str, Tuple] = {} - self.__all_report_checkers: Set[str] = set() - self.__added_reports: List[Tuple[DBReport, Report]] = [] - self.__reports_with_fake_checkers: Dict[ - # Either a DBReport *without* an ID, or the ID of a committed - # DBReport. - str, Tuple[Report, Union[DBReport, int]]] = {} + Calling this function results in observable changes outside the + process's memory, as it records the task into the database and + extracts things to the server's storage area. + """ + token = self._tm.allocate_task_record( + "report_server::massStoreRunAsynchronous()" + if is_actually_asynchronous + else "report_server::massStoreRun()", + ("Legacy s" if not is_actually_asynchronous else "S") + + f"tore of results to '{self._product.endpoint}' - " + f"'{self.run_name}'", + self.user_name, + self._product) + temp_dir = self._tm.create_task_data(token) + extract_dir = temp_dir / "store_zip" + os.makedirs(extract_dir, exist_ok=True) - self.__get_report_limit_for_product() + try: + with StepLog(self.run_name, + "Save massStoreRun() ZIP data to server storage"): + zip_size = unzip(self.run_name, + self._input_zip_blob, + extract_dir) + + if not zip_size: + raise RequestFailed(ErrorCode.GENERAL, + "The uploaded ZIP file is empty!") + except Exception: + LOG.error("Failed to extract massStoreRunAsynchronous() ZIP!") + import traceback + traceback.print_exc() + raise - @property - def __manager(self): - return self.__report_server._manager + self._input_handling_end_time = time.time() - @property - def __config_database(self): - return self.__report_server._config_database + try: + with open(temp_dir / "store_configuration.json", 'w', + encoding="utf-8") as cfg_f: + json.dump({ + "client_version": self.client_version, + "force_overwrite": self.force_overwrite_of_run, + "path_prefixes_to_trim": self.path_prefixes_to_trim, + "run_name": self.run_name, + "run_description": self.run_description, + "store_tag": self.store_tag, + "user_name": self.user_name, + }, cfg_f) + except Exception: + LOG.error("Failed to write massStoreRunAsynchronous() " + "configuration!") + import traceback + traceback.print_exc() + raise - @property - def __product(self): - return self.__report_server._product + task = MassStoreRunTask(token, temp_dir, + self._package_context, + self._product.id, + zip_size, + self._input_handling_end_time - + self._input_handling_start_time) - @property - def __context(self): - return self.__report_server._context + if not is_actually_asynchronous: + self._tm.add_comment( + task, + "WARNING!\nExecuting a legacy 'massStoreRun()' API call!", + "SYSTEM") - @property - def user_name(self): - return self.__report_server._get_username() + return task - def __check_run_limit(self): + def _check_run_limit(self): """ - Checks the maximum allowed of uploadable runs for the current product. + Checks the maximum allowed number of uploadable runs for the current + product. """ - max_run_count = self.__manager.get_max_run_count() - - with DBSession(self.__config_database) as session: - product = session.query(Product).get(self.__product.id) - if product.run_limit: - max_run_count = product.run_limit - - # Session that handles constraints on the run. - with DBSession(self.__report_server._Session) as session: - if not max_run_count: - return - - LOG.debug("Check the maximum number of allowed runs which is %d", - max_run_count) - - run = session.query(Run) \ - .filter(Run.name == self.__name) \ + run_limit: Optional[int] = self._session_manager.get_max_run_count() + if self._product.run_limit: + run_limit = self._product.run_limit + + if not run_limit: + # Allowing the user to upload an unlimited number of runs. + return + LOG.debug("Checking the maximum number of allowed runs in '%s', " + "which is %d.", + self._product.endpoint, run_limit) + + with DBSession(self._product_db) as session: + existing_run: Optional[Run] = session.query(Run) \ + .filter(Run.name == self.run_name) \ .one_or_none() - - # If max_run_count is not set in the config file, it will allow - # the user to upload unlimited runs. - run_count = session.query(Run.id).count() - # If we are not updating a run or the run count is reached the - # limit it will throw an exception. - if not run and run_count >= max_run_count: - remove_run_count = run_count - max_run_count + 1 - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes.ErrorCode.GENERAL, - f"You reached the maximum number of allowed runs " - f"({run_count}/{max_run_count})! Please remove at least " - f"{remove_run_count} run(s) before you try it again.") - - def __store_run_lock(self, session: DBSession): + if not existing_run and run_count >= run_limit: + raise RequestFailed( + ErrorCode.GENERAL, + "You reached the maximum number of allowed runs " + f"({run_count}/{run_limit})! " + f"Please remove at least {run_count - run_limit + 1} " + "run(s) before you try again!") + + def _store_run_lock(self): + """Commits a `DBRunLock` for the to-be-stored `Run`, if available.""" + with DBSession(self._product_db) as session: + RunLock(session, self.run_name) \ + .store_run_lock_in_db(self.user_name) + + +class MassStoreRunTask(AbstractTask): + """Executes `MassStoreRun` as a background job.""" + + def __init__(self, token: str, data_path: Path, + package_context, + product_id: int, + input_zip_size: int, + preparation_time_elapsed: float): """ - Store a RunLock record for the given run name into the database. + Creates the `AbstractTask` implementation for + ``massStoreRunAsynchronous()``. + + `preparation_time_elapsed` records how much time was spent by the + input handling that prepared the task. + This time will be added to the total time spent processing the results + in the background. + (The time spent in waiting between task enschedulement and task + execution is not part of the total time.) """ - try: - # If the run can be stored, we need to lock it first. If there is - # already a lock in the database for the given run name which is - # expired and multiple processes are trying to get this entry from - # the database for update we may get the following exception: - # could not obtain lock on row in relation "run_locks" - # This is the reason why we have to wrap this query to a try/except - # block. - run_lock = session.query(RunLock) \ - .filter(RunLock.name == self.__name) \ - .with_for_update(nowait=True).one_or_none() - except (sqlalchemy.exc.OperationalError, - sqlalchemy.exc.ProgrammingError) as ex: - LOG.error("Failed to get run lock for '%s': %s", self.__name, ex) - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes.ErrorCode.DATABASE, - "Someone is already storing to the same run. Please wait " - "while the other storage is finished and try it again.") + super().__init__(token, data_path) + self._package_context = package_context + self._product_id = product_id + self.input_zip_size = input_zip_size + self.time_spent_on_task_preparation = preparation_time_elapsed - if not run_lock: - # If there is no lock record for the given run name, the run - # is not locked -- create a new lock. - run_lock = RunLock(self.__name, self.user_name) - session.add(run_lock) - elif run_lock.has_expired( - db_cleanup.RUN_LOCK_TIMEOUT_IN_DATABASE): - # There can be a lock in the database, which has already - # expired. In this case, we assume that the previous operation - # has failed, and thus, we can re-use the already present lock. - run_lock.touch() - run_lock.username = self.user_name - else: - # In case the lock exists and it has not expired, we must - # consider the run a locked one. - when = run_lock.when_expires( - db_cleanup.RUN_LOCK_TIMEOUT_IN_DATABASE) + def _implementation(self, tm: TaskManager): + try: + with open(self.data_path / "store_configuration.json", 'r', + encoding="utf-8") as cfg_f: + self.store_configuration = json.load(cfg_f) + except Exception: + LOG.error("Invalid or unusable massStoreRunAsynchronous() " + "configuration!") + raise - username = run_lock.username if run_lock.username is not None \ - else "another user" + with DBSession(tm.configuration_database_session_factory) as session: + db_product: Optional[Product] = session.query(Product) \ + .get(self._product_id) + if not db_product: + raise KeyError(f"No product with ID '{self._product_id}'") + + self._product = ServerProduct(db_product.id, + db_product.endpoint, + db_product.display_name, + db_product.connection, + self._package_context, + tm.environment) + + self._product.connect() + if self._product.db_status != DBStatus.OK: + raise EnvironmentError("Database for product " + f"'{self._product.endpoint}' is in " + "a bad shape!") + + def __cancel_if_needed(): + tm.heartbeat(self) + if tm.should_cancel(self): + raise TaskCancelHonoured(self) + + m = MassStoreRun(__cancel_if_needed, + self.data_path / "store_zip", + self._package_context, + tm.configuration_database_session_factory, + self._product, + self.store_configuration["run_name"], + self.store_configuration["store_tag"], + self.store_configuration["client_version"], + self.store_configuration["force_overwrite"], + self.store_configuration["path_prefixes_to_trim"], + self.store_configuration["run_description"], + self.store_configuration["user_name"], + ) + m.store(self.input_zip_size, self.time_spent_on_task_preparation) - LOG.info("Refusing to store into run '%s' as it is locked by " - "%s. Lock will expire at '%s'.", self.__name, username, - when) - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes.ErrorCode.DATABASE, - f"The run named '{self.__name}' is being stored into by " - f"{username}. If the other store operation has failed, this " - f"lock will expire at '{when}'.") - # At any rate, if the lock has been created or updated, commit it - # into the database. - try: - session.commit() - except (sqlalchemy.exc.IntegrityError, - sqlalchemy.orm.exc.StaleDataError) as ex: - # The commit of this lock can fail. - # - # In case two store ops attempt to lock the same run name at the - # same time, committing the lock in the transaction that commits - # later will result in an IntegrityError due to the primary key - # constraint. - # - # In case two store ops attempt to lock the same run name with - # reuse and one of the operation hangs long enough before COMMIT - # so that the other operation commits and thus removes the lock - # record, StaleDataError is raised. In this case, also consider - # the run locked, as the data changed while the transaction was - # waiting, as another run wholly completed. +class MassStoreRun: + """Implementation for ``massStoreRunAsynchronous()``.""" + + # Note: The implementation of this class is called from MassStoreRunTask + # and it is executed in the background, in the context of a Task worker + # process. + # This is the place where complex implementation logic must go, but be + # careful, there is no way to communicate with the user's client anymore! + + def __init__(self, + graceful_cancel: Callable[[], None], + zip_dir: Path, + package_context, + config_db, + product: ServerProduct, + name: str, + tag: Optional[str], + version: Optional[str], + force: bool, + trim_path_prefix_list: Optional[List[str]], + description: Optional[str], + user_name: str, + ): + self._zip_dir = zip_dir + self._name = name + self._tag = tag + self._version = version + self._force = force + self._trim_path_prefixes = trim_path_prefix_list + self._description = description + self._user_name = user_name + self.__config_db = config_db + self.__package_context = package_context + self.__product = product + self.__graceful_cancel_if_requested = graceful_cancel - LOG.info("Run '%s' got locked while current transaction " - "tried to acquire a lock. Considering run as locked.", - self.__name) - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes.ErrorCode.DATABASE, - f"The run named '{self.__name}' is being stored into by " - "another user.") from ex + self.__mips: Dict[str, MetadataInfoParser] = {} + self.__analysis_info: Dict[str, AnalysisInfo] = {} + self.__checker_row_cache: Dict[Tuple[str, str], Checker] = {} + self.__duration: int = 0 + self.__report_count: int = 0 + self.__report_limit: int = 0 + self.__wrong_src_code_comments: List[str] = [] + self.__already_added_report_hashes: Set[str] = set() + self.__new_report_hashes: Dict[str, Tuple] = {} + self.__all_report_checkers: Set[str] = set() + self.__added_reports: List[Tuple[DBReport, Report]] = [] + self.__reports_with_fake_checkers: Dict[ + # Either a DBReport *without* an ID, or the ID of a committed + # DBReport. + str, Tuple[Report, Union[DBReport, int]]] = {} - def __free_run_lock(self, session: DBSession): - """ Remove the lock from the database for the given run name. """ - # Using with_for_update() here so the database (in case it supports - # this operation) locks the lock record's row from any other access. - run_lock = session.query(RunLock) \ - .filter(RunLock.name == self.__name) \ - .with_for_update(nowait=True).one() - session.delete(run_lock) - session.commit() + with DBSession(config_db) as session: + product = session.query(Product).get(self.__product.id) + self.__report_limit = product.report_limit def __store_source_files( self, - source_root: str, + source_root: Path, filename_to_hash: Dict[str, str] ) -> Dict[str, int]: """ Storing file contents from plist. """ - file_path_to_id = {} for file_name, file_hash in filename_to_hash.items(): - source_file_path = path_for_fake_root(file_name, source_root) + self.__graceful_cancel_if_requested() + source_file_path = path_for_fake_root(file_name, str(source_root)) LOG.debug("Storing source file: %s", source_file_path) trimmed_file_path = trim_path_prefixes( - file_name, self.__trim_path_prefixes) + file_name, self._trim_path_prefixes) if not os.path.isfile(source_file_path): # The file was not in the ZIP file, because we already @@ -445,7 +690,7 @@ def __store_source_files( # record in the database or we need to add one. LOG.debug('%s not found or already stored.', trimmed_file_path) - with DBSession(self.__report_server._Session) as session: + with DBSession(self.__product.session_factory) as session: fid = add_file_record( session, trimmed_file_path, file_hash) @@ -458,7 +703,7 @@ def __store_source_files( source_file_path, file_hash) continue - with DBSession(self.__report_server._Session) as session: + with DBSession(self.__product.session_factory) as session: self.__add_file_content(session, source_file_path, file_hash) file_path_to_id[trimmed_file_path] = add_file_record( @@ -468,7 +713,7 @@ def __store_source_files( def __add_blame_info( self, - blame_root: str, + blame_root: Path, filename_to_hash: Dict[str, str] ): """ @@ -480,11 +725,12 @@ def __add_blame_info( .zip file. This function stores blame info even if the corresponding source file is not in the .zip file. """ - with DBSession(self.__report_server._Session) as session: + with DBSession(self.__product.session_factory) as session: for subdir, _, files in os.walk(blame_root): for f in files: - blame_file = os.path.join(subdir, f) - file_path = blame_file[len(blame_root.rstrip("/")):] + self.__graceful_cancel_if_requested() + blame_file = Path(subdir) / f + file_path = f"/{str(blame_file.relative_to(blame_root))}" blame_info, remote_url, tracking_branch = \ get_blame_file_data(blame_file) @@ -599,8 +845,8 @@ def __store_checker_identifiers(self, checkers: Set[Tuple[str, str]]): while tries < max_tries: tries += 1 try: - LOG.debug("[%s] Begin attempt %d...", self.__name, tries) - with DBSession(self.__report_server._Session) as session: + LOG.debug("[%s] Begin attempt %d...", self._name, tries) + with DBSession(self.__product.session_factory) as session: known_checkers = {(r.analyzer_name, r.checker_name) for r in session .query(Checker.analyzer_name, @@ -608,7 +854,8 @@ def __store_checker_identifiers(self, checkers: Set[Tuple[str, str]]): .all()} for analyzer, checker in \ sorted(all_checkers - known_checkers): - s = self.__context.checker_labels.severity(checker) + s = self.__package_context.checker_labels \ + .severity(checker) s = ttypes.Severity._NAMES_TO_VALUES[s] session.add(Checker(analyzer, checker, s)) LOG.debug("Acquiring ID for checker '%s/%s' " @@ -620,7 +867,7 @@ def __store_checker_identifiers(self, checkers: Set[Tuple[str, str]]): sqlalchemy.exc.ProgrammingError) as ex: LOG.error("Storing checkers of run '%s' failed: %s.\n" "Waiting %d before trying again...", - self.__name, ex, wait_time) + self._name, ex, wait_time) time.sleep(wait_time.total_seconds()) wait_time *= 2 except Exception as ex: @@ -630,10 +877,9 @@ def __store_checker_identifiers(self, checkers: Set[Tuple[str, str]]): traceback.print_exc() raise - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes.ErrorCode.DATABASE, - "Storing the names of the checkers in the run failed due to " - "excessive contention!") + raise ConnectionRefusedError("Storing the names of the checkers in " + "the run failed due to excessive " + "contention!") def __store_analysis_statistics( self, @@ -661,7 +907,7 @@ def __store_analysis_statistics( stats[analyzer_type]["versions"].add(res["version"]) if "failed_sources" in res: - if self.__version == '6.9.0': + if self._version == '6.9.0': stats[analyzer_type]["failed_sources"].add( 'Unavailable in CodeChecker 6.9.0!') else: @@ -757,89 +1003,82 @@ def __add_or_update_run( By default updates the results if name already exists. Using the force flag removes existing analysis results for a run. """ - try: - LOG.debug("Adding run '%s'...", self.__name) + LOG.debug("Adding run '%s'...", self._name) + + run = session.query(Run) \ + .filter(Run.name == self._name) \ + .one_or_none() + + update_run = True + if run and self._force: + # Clean already collected results. + if not run.can_delete: + # Deletion is already in progress. + msg = f"Can't delete {run.id}" + LOG.debug(msg) + raise EnvironmentError(msg) + + LOG.info('Removing previous analysis results...') + session.delete(run) + # Not flushing after delete leads to a constraint violation + # error later, when adding run entity with the same name as + # the old one. + session.flush() - run = session.query(Run) \ - .filter(Run.name == self.__name) \ - .one_or_none() + checker_run = Run(self._name, self._version) + session.add(checker_run) + session.flush() + run_id = checker_run.id - update_run = True - if run and self.__force: - # Clean already collected results. - if not run.can_delete: - # Deletion is already in progress. - msg = f"Can't delete {run.id}" - LOG.debug(msg) - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes.ErrorCode.DATABASE, - msg) - - LOG.info('Removing previous analysis results...') - session.delete(run) - # Not flushing after delete leads to a constraint violation - # error later, when adding run entity with the same name as - # the old one. - session.flush() - - checker_run = Run(self.__name, self.__version) - session.add(checker_run) - session.flush() - run_id = checker_run.id - - elif run: - # There is already a run, update the results. - run.date = datetime.now() - run.duration = -1 - session.flush() - run_id = run.id - else: - # There is no run create new. - checker_run = Run(self.__name, self.__version) - session.add(checker_run) - session.flush() - run_id = checker_run.id - update_run = False - - # Add run to the history. - LOG.debug("Adding run history.") - - if self.__tag is not None: - run_history = session.query(RunHistory) \ - .filter(RunHistory.run_id == run_id, - RunHistory.version_tag == self.__tag) \ - .one_or_none() - - if run_history: - run_history.version_tag = None - session.add(run_history) - - cc_versions = set() - for mip in self.__mips.values(): - if mip.cc_version: - cc_versions.add(mip.cc_version) - - cc_version = '; '.join(cc_versions) if cc_versions else None - run_history = RunHistory( - run_id, self.__tag, self.user_name, run_history_time, - cc_version, self.__description) - - session.add(run_history) + elif run: + # There is already a run, update the results. + run.date = datetime.now() + run.duration = -1 session.flush() + run_id = run.id + else: + # There is no run create new. + checker_run = Run(self._name, self._version) + session.add(checker_run) + session.flush() + run_id = checker_run.id + update_run = False + + # Add run to the history. + LOG.debug("Adding run history.") - LOG.debug("Adding run done.") + if self._tag is not None: + run_history = session.query(RunHistory) \ + .filter(RunHistory.run_id == run_id, + RunHistory.version_tag == self._tag) \ + .one_or_none() - self.__store_analysis_statistics(session, run_history.id) - self.__store_analysis_info(session, run_history) + if run_history: + run_history.version_tag = None + session.add(run_history) - session.flush() - LOG.debug("Storing analysis statistics done.") + cc_versions = set() + for mip in self.__mips.values(): + if mip.cc_version: + cc_versions.add(mip.cc_version) - return run_id, update_run - except Exception as ex: - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes.ErrorCode.GENERAL, - str(ex)) + cc_version = '; '.join(cc_versions) if cc_versions else None + run_history = RunHistory( + run_id, self._tag, self._user_name, run_history_time, + cc_version, self._description) + + session.add(run_history) + session.flush() + + LOG.debug("Adding run done.") + + self.__store_analysis_statistics(session, run_history.id) + self.__store_analysis_info(session, run_history) + + session.flush() + LOG.debug("Storing analysis statistics done.") + + return run_id, update_run def __get_checker(self, session: DBSession, @@ -879,49 +1118,43 @@ def __add_report( fixed_at: Optional[datetime] = None ) -> int: """ Add report to the database. """ - try: - checker = self.__checker_for_report(session, report) + checker = self.__checker_for_report(session, report) + if not checker: + # It would be too easy to create a 'Checker' instance with the + # observed data right here, but __add_report() is called in + # the context of the *BIG* TRANSACTION which has all the + # reports of the entire store pending. Losing all that + # information on a potential UNIQUE CONSTRAINT violation due + # to multiple concurrent massStoreRun()s trying to store the + # same checker ID which was never seen in a 'metadata.json' is + # not worth it. + checker = self.__get_checker(session, + FakeChecker[0], FakeChecker[1]) if not checker: - # It would be too easy to create a 'Checker' instance with the - # observed data right here, but __add_report() is called in - # the context of the *BIG* TRANSACTION which has all the - # reports of the entire store pending. Losing all that - # information on a potential UNIQUE CONSTRAINT violation due - # to multiple concurrent massStoreRun()s trying to store the - # same checker ID which was never seen in a 'metadata.json' is - # not worth it. - checker = self.__get_checker(session, - FakeChecker[0], FakeChecker[1]) - if not checker: - LOG.fatal("Psuedo-checker '%s/%s' has no " - "identity in the database, even though " - "__store_checker_identifiers() should have " - "always preemptively created it!", - FakeChecker[0], FakeChecker[1]) - raise KeyError(FakeChecker[1]) - - db_report = DBReport( - file_path_to_id[report.file.path], run_id, report.report_hash, - checker, report.line, report.column, - len(report.bug_path_events), report.message, detection_status, - review_status.status, review_status.author, - review_status.message, run_history_time, - review_status.in_source, detection_time, fixed_at) - if analysis_info: - db_report.analysis_info.append(analysis_info) - - session.add(db_report) - self.__added_reports.append((db_report, report)) - if db_report.checker.checker_name == FakeChecker[1]: - self.__reports_with_fake_checkers[report_path_hash] = \ - (report, db_report) - - return db_report.id - - except Exception as ex: - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes.ErrorCode.GENERAL, - str(ex)) + LOG.fatal("Psuedo-checker '%s/%s' has no " + "identity in the database, even though " + "__store_checker_identifiers() should have " + "always preemptively created it!", + FakeChecker[0], FakeChecker[1]) + raise KeyError(FakeChecker[1]) + + db_report = DBReport( + file_path_to_id[report.file.path], run_id, report.report_hash, + checker, report.line, report.column, + len(report.bug_path_events), report.message, detection_status, + review_status.status, review_status.author, + review_status.message, run_history_time, + review_status.in_source, detection_time, fixed_at) + if analysis_info: + db_report.analysis_info.append(analysis_info) + + session.add(db_report) + self.__added_reports.append((db_report, report)) + if db_report.checker.checker_name == FakeChecker[1]: + self.__reports_with_fake_checkers[report_path_hash] = \ + (report, db_report) + + return db_report.id def __get_faked_checkers(self) \ -> Set[Tuple[str, str]]: @@ -966,78 +1199,67 @@ def __realise_fake_checkers(self, session): so all it does is upgrade the 'checker_id' FOREIGN KEY field to point at the real checker. """ - try: - grouped_by_checker: Dict[Tuple[str, str], List[int]] = \ - defaultdict(list) - for _, (report, db_id) in \ - self.__reports_with_fake_checkers.items(): - checker: Tuple[str, str] = checker_name_for_report(report) - grouped_by_checker[checker].append(cast(int, db_id)) - - for chk, report_ids in grouped_by_checker.items(): - analyzer_name, checker_name = chk - chk_obj = cast(Checker, self.__get_checker(session, - analyzer_name, - checker_name)) - session.query(DBReport) \ - .filter(DBReport.id.in_(report_ids)) \ - .update({"checker_id": chk_obj.id}, - synchronize_session=False) - except Exception as ex: - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes.ErrorCode.DATABASE, - str(ex)) + grouped_by_checker: Dict[Tuple[str, str], List[int]] = \ + defaultdict(list) + for _, (report, db_id) in \ + self.__reports_with_fake_checkers.items(): + checker: Tuple[str, str] = checker_name_for_report(report) + grouped_by_checker[checker].append(cast(int, db_id)) + + for chk, report_ids in grouped_by_checker.items(): + analyzer_name, checker_name = chk + chk_obj = cast(Checker, self.__get_checker(session, + analyzer_name, + checker_name)) + session.query(DBReport) \ + .filter(DBReport.id.in_(report_ids)) \ + .update({"checker_id": chk_obj.id}, + synchronize_session=False) def __add_report_context(self, session, file_path_to_id): - try: - for db_report, report in self.__added_reports: - LOG.debug("Storing bug path positions.") - for i, p in enumerate(report.bug_path_positions): - session.add(BugReportPoint( - p.range.start_line, p.range.start_col, - p.range.end_line, p.range.end_col, - i, file_path_to_id[p.file.path], db_report.id)) - - LOG.debug("Storing bug path events.") - for i, event in enumerate(report.bug_path_events): - session.add(BugPathEvent( - event.range.start_line, event.range.start_col, - event.range.end_line, event.range.end_col, - i, event.message, file_path_to_id[event.file.path], - db_report.id)) - - LOG.debug("Storing notes.") - for note in report.notes: - data_type = report_extended_data_type_str( - ttypes.ExtendedReportDataType.NOTE) - - session.add(ExtendedReportData( - note.range.start_line, note.range.start_col, - note.range.end_line, note.range.end_col, - note.message, file_path_to_id[note.file.path], - db_report.id, data_type)) - - LOG.debug("Storing macro expansions.") - for macro in report.macro_expansions: - data_type = report_extended_data_type_str( - ttypes.ExtendedReportDataType.MACRO) - - session.add(ExtendedReportData( - macro.range.start_line, macro.range.start_col, - macro.range.end_line, macro.range.end_col, - macro.message, file_path_to_id[macro.file.path], - db_report.id, data_type)) - - if report.annotations: - self.__validate_and_add_report_annotations( - session, db_report.id, report.annotations) + for db_report, report in self.__added_reports: + LOG.debug("Storing bug path positions.") + for i, p in enumerate(report.bug_path_positions): + session.add(BugReportPoint( + p.range.start_line, p.range.start_col, + p.range.end_line, p.range.end_col, + i, file_path_to_id[p.file.path], db_report.id)) + + LOG.debug("Storing bug path events.") + for i, event in enumerate(report.bug_path_events): + session.add(BugPathEvent( + event.range.start_line, event.range.start_col, + event.range.end_line, event.range.end_col, + i, event.message, file_path_to_id[event.file.path], + db_report.id)) + + LOG.debug("Storing notes.") + for note in report.notes: + data_type = report_extended_data_type_str( + ttypes.ExtendedReportDataType.NOTE) + + session.add(ExtendedReportData( + note.range.start_line, note.range.start_col, + note.range.end_line, note.range.end_col, + note.message, file_path_to_id[note.file.path], + db_report.id, data_type)) + + LOG.debug("Storing macro expansions.") + for macro in report.macro_expansions: + data_type = report_extended_data_type_str( + ttypes.ExtendedReportDataType.MACRO) + + session.add(ExtendedReportData( + macro.range.start_line, macro.range.start_col, + macro.range.end_line, macro.range.end_col, + macro.message, file_path_to_id[macro.file.path], + db_report.id, data_type)) + + if report.annotations: + self.__validate_and_add_report_annotations( + session, db_report.id, report.annotations) - session.flush() - - except Exception as ex: - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes.ErrorCode.GENERAL, - str(ex)) + session.flush() def __process_report_file( self, @@ -1074,7 +1296,7 @@ def get_missing_file_ids(report: Report) -> List[str]: for report in reports: self.__report_count += 1 - report.trim_path_prefixes(self.__trim_path_prefixes) + report.trim_path_prefixes(self._trim_path_prefixes) missing_ids_for_files = get_missing_file_ids(report) if missing_ids_for_files: @@ -1117,7 +1339,7 @@ def get_missing_file_ids(report: Report) -> List[str]: except ValueError as err: self.__wrong_src_code_comments.append(str(err)) - review_status.author = self.user_name + review_status.author = self._user_name review_status.date = run_history_time # False positive and intentional reports are considered as closed @@ -1181,24 +1403,17 @@ def __validate_and_add_report_annotations( try: allowed_annotations[key]["func"](value) session.add(ReportAnnotations(report_id, key, value)) - except KeyError: - # pylint: disable=raise-missing-from - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes.ErrorCode.REPORT_FORMAT, - f"'{key}' is not an allowed report annotation.", - allowed_annotations.keys()) - except ValueError: - # pylint: disable=raise-missing-from - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes.ErrorCode.REPORT_FORMAT, - f"'{value}' has wrong format. '{key}' annotations must be " - f"'{allowed_annotations[key]['display']}'.") - - def __get_report_limit_for_product(self): - with DBSession(self.__config_database) as session: - product = session.query(Product).get(self.__product.id) - if product.report_limit: - self.__report_limit = product.report_limit + except KeyError as ke: + raise TypeError(f"'{key}' is not an allowed report " + "annotation. " + "The allowed annotations are: " + f"{allowed_annotations.keys()}") \ + from ke + except ValueError as ve: + raise ValueError(f"'{value}' is in a wrong format! " + f"'{key}' annotations must be " + f"'{allowed_annotations[key]['display']}'.") \ + from ve def __check_report_count(self): """ @@ -1210,13 +1425,7 @@ def __check_report_count(self): LOG.error("The number of reports in the given report folder is " + "larger than the allowed." + f"The limit: {self.__report_limit}!") - extra_info = [ - "report_limit", - f"limit:{self.__report_limit}" - ] - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes. - ErrorCode.GENERAL, + raise OverflowError( "**Report Limit Exceeded** " + "This report folder cannot be stored because the number of " + "reports in the result folder is too high. Usually noisy " + @@ -1226,14 +1435,13 @@ def __check_report_count(self): "counts. Disable checkers that have generated an excessive " + "number of reports and then rerun the analysis to be able " + "to store the results on the server. " + - f"Limit: {self.__report_limit}", - extra_info) + f"Limit: {self.__report_limit}") def __store_reports( self, session: DBSession, - report_dir: str, - source_root: str, + report_dir: Path, + source_root: Path, run_id: int, file_path_to_id: Dict[str, int], run_history_time: datetime @@ -1241,11 +1449,11 @@ def __store_reports( """ Parse up and store the plist report files. """ def get_skip_handler( - report_dir: str + report_dir: Path ) -> skiplist_handler.SkipListHandler: """ Get a skip list handler based on the given report directory.""" - skip_file_path = os.path.join(report_dir, 'skip_file') - if not os.path.exists(skip_file_path): + skip_file_path = report_dir / "skip_file" + if not skip_file_path.exists(): return skiplist_handler.SkipListHandler() LOG.debug("Pocessing skip file %s", skip_file_path) @@ -1282,9 +1490,8 @@ def get_skip_handler( for root_dir_path, _, report_file_paths in os.walk(report_dir): LOG.debug("Get reports from '%s' directory", root_dir_path) - skip_handler = get_skip_handler(root_dir_path) - - review_status_handler = ReviewStatusHandler(source_root) + skip_handler = get_skip_handler(Path(root_dir_path)) + review_status_handler = ReviewStatusHandler(str(source_root)) review_status_cfg = \ os.path.join(root_dir_path, 'review_status.yaml') @@ -1303,6 +1510,7 @@ def get_skip_handler( LOG.debug("Parsing input file '%s'", f) report_file_path = os.path.join(root_dir_path, f) + self.__graceful_cancel_if_requested() self.__process_report_file( report_file_path, session, run_id, file_path_to_id, run_history_time, @@ -1346,7 +1554,7 @@ def get_skip_handler( session.flush() - LOG.info("[%s] Processed %d analyzer result file(s).", self.__name, + LOG.info("[%s] Processed %d analyzer result file(s).", self._name, processed_result_file_count) # If a checker was found in a plist file it can not be disabled so we @@ -1377,8 +1585,8 @@ def get_skip_handler( report.fixed_at = run_history_time if reports_to_delete: - self.__report_server._removeReports( - session, list(reports_to_delete)) + from .report_server import remove_reports + remove_reports(session, reports_to_delete) def finish_checker_run( self, @@ -1401,153 +1609,137 @@ def finish_checker_run( return False - def store(self) -> int: - """ Store run results to the server. """ + def store(self, + original_zip_size: int, + time_spent_on_task_preparation: float): + """Store run results to the server.""" + self.__graceful_cancel_if_requested() start_time = time.time() - # Check constraints of the run. - self.__check_run_limit() - - with DBSession(self.__report_server._Session) as session: - self.__store_run_lock(session) - try: - with TemporaryDirectory( - dir=self.__context.codechecker_workspace - ) as zip_dir: - with LogTask(run_name=self.__name, - message="Unzip storage file"): - zip_size = unzip(self.__b64zip, zip_dir) - - if zip_size == 0: - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes. - ErrorCode.GENERAL, - "The received zip file content is empty!") - - LOG.debug("Using unzipped folder '%s'", zip_dir) - - source_root = os.path.join(zip_dir, 'root') - blame_root = os.path.join(zip_dir, 'blame') - report_dir = os.path.join(zip_dir, 'reports') - content_hash_file = os.path.join( - zip_dir, 'content_hashes.json') - - filename_to_hash = load_json(content_hash_file, {}) - - with LogTask(run_name=self.__name, - message="Store source files"): - LOG.info("[%s] Storing %d source file(s).", self.__name, - len(filename_to_hash.keys())) - file_path_to_id = self.__store_source_files( - source_root, filename_to_hash) - self.__add_blame_info(blame_root, filename_to_hash) - - run_history_time = datetime.now() - - # Parse all metadata information from the report directory. - with LogTask(run_name=self.__name, - message="Parse 'metadata.json's"): - for root_dir_path, _, _ in os.walk(report_dir): - metadata_file_path = os.path.join( - root_dir_path, 'metadata.json') - - self.__mips[root_dir_path] = \ - MetadataInfoParser(metadata_file_path) - - with LogTask(run_name=self.__name, - message="Store look-up ID for checkers in " - "'metadata.json'"): - checkers_in_metadata = { - (analyzer, checker) - for metadata in self.__mips.values() - for analyzer in metadata.analyzers - for checker - in metadata.checkers.get(analyzer, {}).keys()} - self.__store_checker_identifiers(checkers_in_metadata) - - try: - # This session's transaction buffer stores the actual - # run data into the database. - with DBSession(self.__report_server._Session) as session, \ - RunLocking(session, self.__name): - # Actual store operation begins here. - run_id, update_run = self.__add_or_update_run( - session, run_history_time) - - with LogTask(run_name=self.__name, - message="Store reports"): - self.__store_reports( - session, report_dir, source_root, run_id, - file_path_to_id, run_history_time) - - session.commit() - self.__load_report_ids_for_reports_with_fake_checkers( - session) + LOG.debug("Using unzipped folder '%s'", self._zip_dir) + + source_root = self._zip_dir / "root" + blame_root = self._zip_dir / "blame" + report_dir = self._zip_dir / "reports" + filename_to_hash = load_json( + self._zip_dir / "content_hashes.json", {}) + + # Store information that is "global" on the product database level. + with StepLog(self._name, "Store source files"): + LOG.info("[%s] Storing %d source file(s).", self._name, + len(filename_to_hash.keys())) + file_path_to_id = self.__store_source_files( + source_root, filename_to_hash) + self.__add_blame_info(blame_root, filename_to_hash) + + run_history_time = datetime.now() + + with StepLog(self._name, "Parse 'metadata.json's"): + for root_dir_path, _, _ in os.walk(report_dir): + self.__graceful_cancel_if_requested() + metadata_file_path = os.path.join( + root_dir_path, 'metadata.json') + + self.__mips[root_dir_path] = \ + MetadataInfoParser(metadata_file_path) + + self.__graceful_cancel_if_requested() + with StepLog(self._name, + "Store look-up ID for checkers in 'metadata.json'"): + checkers_in_metadata = { + (analyzer, checker) + for metadata in self.__mips.values() + for analyzer in metadata.analyzers + for checker + in metadata.checkers.get(analyzer, {}).keys()} + self.__store_checker_identifiers(checkers_in_metadata) + try: + # This session's transaction buffer stores the actual run data + # into the database. + with DBSession(self.__product.session_factory) as session, \ + RunLock(session, self._name): + run_id, update_run = self.__add_or_update_run( + session, run_history_time) + + with StepLog(self._name, "Store 'reports'"): + self.__store_reports( + session, report_dir, source_root, run_id, + file_path_to_id, run_history_time) + + self.__graceful_cancel_if_requested() + session.commit() + self.__load_report_ids_for_reports_with_fake_checkers( + session) + + # The task should not be cancelled after this point, as the + # "main" bulk of the modifications to the database had already + # been committed, and the user would be left with potentially + # a bunch of "fake checkers" visible in the database. + + if self.__reports_with_fake_checkers: + with StepLog( + self._name, + "Get look-up IDs for checkers not present in " + "'metadata.json'"): + additional_checkers = self.__get_faked_checkers() + # __store_checker_identifiers() has its own + # TRANSACTION! + self.__store_checker_identifiers( + additional_checkers) + + with DBSession(self.__product.session_factory) as session, \ + RunLock(session, self._name): + # The data of the run has been successfully committed + # into the database. Deal with post-processing issues + # that could only be done after-the-fact. if self.__reports_with_fake_checkers: - with LogTask(run_name=self.__name, - message="Get look-up ID for checkers " - "not present in 'metadata.json'"): - additional_checkers = self.__get_faked_checkers() - # __store_checker_identifiers() has its own - # TRANSACTION! - self.__store_checker_identifiers( - additional_checkers) - - with DBSession(self.__report_server._Session) as session, \ - RunLocking(session, self.__name): - # The data of the run has been successfully committed - # into the database. Deal with post-processing issues - # that could only be done after-the-fact. - if self.__reports_with_fake_checkers: - with LogTask(run_name=self.__name, - message="Fix-up report-to-checker " - "associations"): - self.__realise_fake_checkers(session) - - self.finish_checker_run(session, run_id) - session.commit() - - # If it's a run update, do not increment the number - # of runs of the current product. - inc_num_of_runs = 1 if not update_run else None - - self.__report_server._set_run_data_for_curr_product( - inc_num_of_runs, run_history_time) - - runtime = round(time.time() - start_time, 2) - zip_size_kb = round(zip_size / 1024) - - tag_desc = "" - if self.__tag: - tag_desc = f", under tag '{self.__tag}'" - - LOG.info("'%s' stored results (%s KB " - "/decompressed/) to run '%s' (id: %d) %s in " - "%s seconds.", self.user_name, - zip_size_kb, self.__name, run_id, tag_desc, - runtime) - - iso_start_time = datetime.fromtimestamp( - start_time).isoformat() - - log_msg = f"{iso_start_time}, " +\ - f"{runtime}s, " +\ - f'"{self.__product.name}", ' +\ - f'"{self.__name}", ' +\ - f"{zip_size_kb}KB, " +\ - f"{self.__report_count}, " +\ - f"{run_id}" - - STORE_TIME_LOG.info(log_msg) - - return run_id - except (sqlalchemy.exc.OperationalError, - sqlalchemy.exc.ProgrammingError) as ex: - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes.ErrorCode.DATABASE, - f"Storing reports to the database failed: {ex}") + with StepLog(self._name, + "Fix-up report-to-checker associations"): + self.__realise_fake_checkers(session) + + self.finish_checker_run(session, run_id) + session.commit() + + end_time = time.time() + + # If the current store() updated an existing run, do not + # increment the number of runs saved for the product. + self.__product.set_cached_run_data( + self.__config_db, + number_of_runs_change=(0 if update_run else 1), + last_store_date=run_history_time) + + run_time: float = (end_time - start_time) + \ + time_spent_on_task_preparation + zip_size_kib: float = original_zip_size / 1024 + + LOG.info("'%s' stored results (decompressed size: %.2f KiB) " + "to run '%s' (ID: %d%s) in %.2f seconds.", + self._user_name, zip_size_kib, self._name, run_id, + f", under tag '{self._tag}'" if self._tag else "", + run_time) + + iso_start_time = datetime.fromtimestamp(start_time) \ + .isoformat() + + log_msg = f"{iso_start_time}, " \ + f"{round(run_time, 2)}s, " \ + f'"{self.__product.name}", ' \ + f'"{self._name}", ' \ + f"{round(zip_size_kib)}KiB, " \ + f"{self.__report_count}, " \ + f"{run_id}" + + STORE_TIME_LOG.info(log_msg) + except (sqlalchemy.exc.OperationalError, + sqlalchemy.exc.ProgrammingError) as ex: + LOG.error("Database error! Storing reports to the " + "database failed: %s", ex) + raise + except TaskCancelHonoured: + raise except Exception as ex: LOG.error("Failed to store results: %s", ex) import traceback @@ -1560,10 +1752,17 @@ def store(self) -> int: # (If the failure is undetectable, the coded grace period expiry # of the lock will allow further store operations to the given # run name.) - with DBSession(self.__report_server._Session) as session: - self.__free_run_lock(session) + with DBSession(self.__product.session_factory) as session: + RunLock(session, self._name).drop_run_lock_from_db() if self.__wrong_src_code_comments: - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes.ErrorCode.SOURCE_FILE, - self.__wrong_src_code_comments) + wrong_files_as_table = twodim.to_str( + "table", + ["File", "Line", "Checker name"], + [wrong_comment.split('|', 3) + for wrong_comment in self.__wrong_src_code_comments]) + + raise ValueError("One or more source files contained invalid " + "source code comments! " + "Failed to set review statuses.\n\n" + f"{wrong_files_as_table}") diff --git a/web/server/codechecker_server/api/report_server.py b/web/server/codechecker_server/api/report_server.py index c98cbc71c0..24f2b39280 100644 --- a/web/server/codechecker_server/api/report_server.py +++ b/web/server/codechecker_server/api/report_server.py @@ -22,7 +22,7 @@ from copy import deepcopy from collections import OrderedDict, defaultdict, namedtuple from datetime import datetime, timedelta -from typing import Any, Dict, List, Optional, Set, Tuple +from typing import Any, Collection, Dict, List, Optional, Set, Tuple import sqlalchemy from sqlalchemy.sql.expression import or_, and_, not_, func, \ @@ -44,7 +44,8 @@ ReviewStatusRuleSortType, RunData, RunFilter, RunHistoryData, \ RunReportCount, RunSortType, RunTagCount, \ ReviewStatus as API_ReviewStatus, \ - SourceComponentData, SourceFileData, SortMode, SortType + SourceComponentData, SourceFileData, SortMode, SortType, \ + SubmittedRunOptions from codechecker_common import util from codechecker_common.logger import get_logger @@ -69,6 +70,7 @@ Run, RunHistory, RunHistoryAnalysisInfo, RunLock, \ SourceComponent +from .common import exc_to_thrift_reqfail from .thrift_enum_helper import detection_status_enum, \ detection_status_str, report_status_enum, \ review_status_enum, review_status_str, report_extended_data_type_enum @@ -141,39 +143,6 @@ def slugify(text): return norm_text -def exc_to_thrift_reqfail(function): - """ - Convert internal exceptions to RequestFailed exception - which can be sent back on the thrift connections. - """ - func_name = function.__name__ - - def wrapper(*args, **kwargs): - try: - res = function(*args, **kwargs) - return res - - except sqlalchemy.exc.SQLAlchemyError as alchemy_ex: - # Convert SQLAlchemy exceptions. - msg = str(alchemy_ex) - import traceback - traceback.print_exc() - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes.ErrorCode.DATABASE, msg) - except codechecker_api_shared.ttypes.RequestFailed as rf: - LOG.warning("%s:\n%s", func_name, rf.message) - raise - except Exception as ex: - import traceback - traceback.print_exc() - msg = str(ex) - LOG.warning("%s:\n%s", func_name, msg) - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes.ErrorCode.GENERAL, msg) - - return wrapper - - def get_component_values( session: DBSession, component_name: str @@ -1371,13 +1340,26 @@ def get_is_opened_case(subquery): ) +def remove_reports(session: DBSession, + report_ids: Collection, + chunk_size: int = SQLITE_MAX_VARIABLE_NUMBER): + """ + Removes `Report`s in chunks. + """ + for r_ids in util.chunks(iter(report_ids), chunk_size): + session.query(Report) \ + .filter(Report.id.in_(r_ids)) \ + .delete(synchronize_session=False) + + class ThriftRequestHandler: """ Connect to database and handle thrift client requests. """ def __init__(self, - manager, + session_manager, + task_manager, Session, product, auth_session, @@ -1390,7 +1372,8 @@ def __init__(self, raise ValueError("Cannot initialize request handler without " "a product to serve.") - self._manager = manager + self._manager = session_manager + self._task_manager = task_manager self._product = product self._auth_session = auth_session self._config_database = config_database @@ -1408,34 +1391,6 @@ def _get_username(self): """ return self._auth_session.user if self._auth_session else "Anonymous" - def _set_run_data_for_curr_product( - self, - inc_num_of_runs: Optional[int], - latest_storage_date: Optional[datetime] = None - ): - """ - Increment the number of runs related to the current product with the - given value and set the latest storage date. - """ - values = {} - - if inc_num_of_runs is not None: - values["num_of_runs"] = Product.num_of_runs + inc_num_of_runs - # FIXME: This log is likely overkill. - LOG.info("Run counter in the config database was %s by %i.", - 'increased' if inc_num_of_runs >= 0 else 'decreased', - abs(inc_num_of_runs)) - - if latest_storage_date is not None: - values["latest_storage_date"] = latest_storage_date - - with DBSession(self._config_database) as session: - session.query(Product) \ - .filter(Product.id == self._product.id) \ - .update(values) - - session.commit() - def __require_permission(self, required): """ Helper method to raise an UNAUTHORIZED exception if the user does not @@ -3626,16 +3581,6 @@ def removeRunResults(self, run_ids): failed = True return not failed - def _removeReports(self, session, report_ids, - chunk_size=SQLITE_MAX_VARIABLE_NUMBER): - """ - Removing reports in chunks. - """ - for r_ids in util.chunks(iter(report_ids), chunk_size): - session.query(Report) \ - .filter(Report.id.in_(r_ids)) \ - .delete(synchronize_session=False) - @exc_to_thrift_reqfail @timeit def removeRunReports(self, run_ids, report_filter, cmp_data): @@ -3665,7 +3610,7 @@ def removeRunReports(self, run_ids, report_filter, cmp_data): reports_to_delete = [r[0] for r in q] if reports_to_delete: - self._removeReports(session, reports_to_delete) + remove_reports(session, reports_to_delete) session.commit() session.close() @@ -3737,9 +3682,9 @@ def removeRun(self, run_id, run_filter): LOG.info("Runs '%s' were removed by '%s'.", "', '".join(runs), self._get_username()) - # Decrement the number of runs but do not update the latest storage - # date. - self._set_run_data_for_curr_product(-1 * deleted_run_cnt) + self._product.set_cached_run_data( + self._config_database, + number_of_runs_change=-1 * deleted_run_cnt) # Remove unused comments and unused analysis info from the database. # Originally db_cleanup.remove_unused_data() was used here which @@ -3922,16 +3867,92 @@ def getMissingContentHashesForBlameInfo(self, file_hashes): return list(set(file_hashes) - set(fc.content_hash for fc in q)) + def __massStoreRun_common(self, is_async: bool, zipfile_blob: str, + store_opts: SubmittedRunOptions) -> str: + self.__require_store() + if not store_opts.runName: + raise ValueError("A run name is needed to know where to store!") + + from .mass_store_run import MassStoreRunInputHandler, MassStoreRunTask + ih = MassStoreRunInputHandler(self._manager, + self._config_database, + self._Session, + self._task_manager, + self._context, + self._product.id, + store_opts.runName, + store_opts.description, + store_opts.tag, + store_opts.version, + store_opts.force, + store_opts.trimPathPrefixes, + zipfile_blob, + self._get_username()) + ih.check_store_input_validity_at_face_value() + m: MassStoreRunTask = ih.create_mass_store_task(is_async) + self._task_manager.push_task(m) + + return m.token + @exc_to_thrift_reqfail @timeit - def massStoreRun(self, name, tag, version, b64zip, force, - trim_path_prefixes, description): - self.__require_store() + def massStoreRun(self, + name: str, + tag: Optional[str], + version: str, + b64zip: str, + force: bool, + trim_path_prefixes: Optional[List[str]], + description: Optional[str]) -> int: + store_opts = SubmittedRunOptions(runName=name, + tag=tag, + version=version, + force=force, + trimPathPrefixes=trim_path_prefixes, + description=description, + ) + token = self.__massStoreRun_common(False, b64zip, store_opts) + + LOG.info("massStoreRun(): Blocking until task '%s' terminates ...", + token) + + # To be compatible with older (<= 6.24, API <= 6.58) clients which + # may keep using the old API endpoint, simulate awaiting the + # background task in the API handler. + while True: + time.sleep(5) + t = self._task_manager.get_task_record(token) + if t.is_in_terminated_state: + if t.status == "failed": + raise codechecker_api_shared.ttypes.RequestFailed( + codechecker_api_shared.ttypes.ErrorCode.GENERAL, + "massStoreRun()'s processing failed. Here follow " + f"the details:\n\n{t.comments}") + if t.status == "cancelled": + raise codechecker_api_shared.ttypes.RequestFailed( + codechecker_api_shared.ttypes.ErrorCode.GENERAL, + "Server administrators cancelled the processing of " + "the massStoreRun() request!") + break + + # Prior to CodeChecker 6.25.0 (API v6.59), massStoreRun() was + # completely synchronous and blocking, and the implementation of the + # storage logic returned the ID of the run that was stored by the + # call. + # massStoreRun() was implemented in + # commit 2b29d787599da0318cd23dbe816377b9bce7236c (September 2017), + # replacing the previously used (and then completely removed!) + # addCheckerRun() function, which also returned the run's ID. + # The official client implementation stopped using this returned value + # from the moment of massStoreRun()'s implementation. + return -1 - from codechecker_server.api.mass_store_run import MassStoreRun - m = MassStoreRun(self, name, tag, version, b64zip, force, - trim_path_prefixes, description) - return m.store() + @exc_to_thrift_reqfail + @timeit + def massStoreRunAsynchronous(self, zipfile_blob: str, + store_opts: SubmittedRunOptions) -> str: + token = self.__massStoreRun_common(True, zipfile_blob, store_opts) + return token @exc_to_thrift_reqfail @timeit diff --git a/web/server/codechecker_server/api/tasks.py b/web/server/codechecker_server/api/tasks.py new file mode 100644 index 0000000000..9abe4fb743 --- /dev/null +++ b/web/server/codechecker_server/api/tasks.py @@ -0,0 +1,391 @@ + +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" +Handle Thrift requests for background task management. +""" +import datetime +import os +import time +from typing import Dict, List, Optional + +from sqlalchemy.sql.expression import and_, or_ + +from codechecker_api_shared.ttypes import RequestFailed, ErrorCode, Ternary +from codechecker_api.codeCheckerServersideTasks_v6.ttypes import \ + AdministratorTaskInfo, TaskFilter, TaskInfo, TaskStatus + +from codechecker_common.logger import get_logger + +from codechecker_server.profiler import timeit + +from ..database.config_db_model import BackgroundTask as DBTask, Product +from ..database.database import DBSession, conv +from ..task_executors.abstract_task import AbstractTask, TaskCancelHonoured +from ..task_executors.task_manager import TaskManager +from .. import permissions +from .common import exc_to_thrift_reqfail + +LOG = get_logger("server") + + +class TestingDummyTask(AbstractTask): + """Implementation of task object created by ``createDummyTask()``.""" + def __init__(self, token: str, timeout: int, should_fail: bool): + super().__init__(token, None) + self.timeout = timeout + self.should_fail = should_fail + + def _implementation(self, tm: TaskManager) -> None: + counter: int = 0 + while counter < self.timeout: + tm.heartbeat(self) + + counter += 1 + LOG.debug("Dummy task ticking... [%d / %d]", + counter, self.timeout) + + if tm.should_cancel(self): + LOG.info("Dummy task '%s' was %s at tick [%d / %d]!", + self.token, + "KILLED BY SHUTDOWN" if tm.is_shutting_down + else "CANCELLED BY ADMIN", + counter, + self.timeout) + raise TaskCancelHonoured(self) + + time.sleep(1) + + if self.should_fail: + raise ValueError("Task self-failure as per the user's request.") + + +def _db_timestamp_to_posix_epoch(d: Optional[datetime.datetime]) \ + -> Optional[int]: + return int(d.replace(tzinfo=datetime.timezone.utc).timestamp()) if d \ + else None + + +def _posix_epoch_to_db_timestamp(s: Optional[int]) \ + -> Optional[datetime.datetime]: + return datetime.datetime.fromtimestamp(s, datetime.timezone.utc) if s \ + else None + + +def _make_task_info(t: DBTask) -> TaskInfo: + """Format API `TaskInfo` from `DBTask`.""" + return TaskInfo( + token=t.token, + taskKind=t.kind, + status=TaskStatus._NAMES_TO_VALUES[t.status.upper()], + productId=t.product_id or 0, + actorUsername=t.username, + summary=t.summary, + comments=t.comments, + enqueuedAtEpoch=_db_timestamp_to_posix_epoch(t.enqueued_at), + startedAtEpoch=_db_timestamp_to_posix_epoch(t.started_at), + completedAtEpoch=_db_timestamp_to_posix_epoch(t.finished_at), + lastHeartbeatEpoch=_db_timestamp_to_posix_epoch( + t.last_seen_at), + cancelFlagSet=t.cancel_flag, + ) + + +def _make_admin_task_info(t: DBTask) -> AdministratorTaskInfo: + """Format API `AdministratorTaskInfo` from `DBTask`.""" + return AdministratorTaskInfo( + normalInfo=_make_task_info(t), + machineId=t.machine_id, + statusConsumed=t.consumed, + ) + + +# These names are inherited from Thrift stubs. +# pylint: disable=invalid-name +class ThriftTaskHandler: + """ + Manages Thrift requests concerning the user-facing Background Tasks API. + """ + + def __init__(self, + configuration_database_sessionmaker, + task_manager: TaskManager, + auth_session): + self._config_db = configuration_database_sessionmaker + self._task_manager = task_manager + self._auth_session = auth_session + + def _get_username(self) -> Optional[str]: + """ + Returns the actually logged in user name. + """ + return self._auth_session.user if self._auth_session else None + + @exc_to_thrift_reqfail + @timeit + def getTaskInfo(self, token: str) -> TaskInfo: + """ + Returns the `TaskInfo` for the task identified by `token`. + """ + with DBSession(self._config_db) as session: + db_task: Optional[DBTask] = session.query(DBTask).get(token) + if not db_task: + raise RequestFailed(ErrorCode.GENERAL, + f"Task '{token}' does not exist!") + + has_right_to_query_status: bool = False + should_set_consumed_flag: bool = False + + if db_task.username == self._get_username(): + has_right_to_query_status = True + should_set_consumed_flag = db_task.is_in_terminated_state + elif db_task.product_id is not None: + associated_product: Optional[Product] = \ + session.query(Product).get(db_task.product_id) + if not associated_product: + LOG.error("No product with ID '%d', but a task is " + "associated with it.", + db_task.product_id) + else: + has_right_to_query_status = \ + permissions.require_permission( + permissions.PRODUCT_ADMIN, + {"config_db_session": session, + "productID": associated_product.id}, + self._auth_session) + + if not has_right_to_query_status: + has_right_to_query_status = permissions.require_permission( + permissions.SUPERUSER, + {"config_db_session": session}, + self._auth_session) + + if not has_right_to_query_status: + raise RequestFailed( + ErrorCode.UNAUTHORIZED, + "Only the task's submitter, a PRODUCT_ADMIN (of the " + "product the task is associated with), or a SUPERUSER " + "can getTaskInfo()!") + + info = _make_task_info(db_task) + + if should_set_consumed_flag: + db_task.consumed = True + session.commit() + + return info + + @exc_to_thrift_reqfail + @timeit + def getTasks(self, filters: TaskFilter) -> List[AdministratorTaskInfo]: + """Obtain tasks matching the `filters` for administrators.""" + if filters.filterForNoProductID and filters.productIDs: + raise RequestFailed(ErrorCode.GENERAL, + "Invalid request, do not set " + "\"no product ID\" and some product IDs in " + "the same filter!") + if filters.filterForNoUsername and filters.usernames: + raise RequestFailed(ErrorCode.GENERAL, + "Invalid request, do not set " + "\"no username\" and some usernames in the " + "same filter!") + + with DBSession(self._config_db) as session: + if filters.filterForNoProductID: + if not permissions.require_permission( + permissions.SUPERUSER, + {"config_db_session": session}, + self._auth_session): + raise RequestFailed( + ErrorCode.UNAUTHORIZED, + "Querying service tasks (not associated with a " + "product) requires SUPERUSER privileges!") + if filters.productIDs: + no_admin_products = [ + prod_id for prod_id in filters.productIDs + if not permissions.require_permission( + permissions.PRODUCT_ADMIN, + {"config_db_session": session, "productID": prod_id}, + self._auth_session)] + if no_admin_products: + no_admin_products = [session.query(Product) + .get(product_id).endpoint + for product_id in no_admin_products] + # pylint: disable=consider-using-f-string + raise RequestFailed(ErrorCode.UNAUTHORIZED, + "Querying product tasks requires " + "PRODUCT_ADMIN rights, but it is " + "missing from product(s): '%s'!" + % ("', '".join(no_admin_products))) + + AND = [] + if filters.tokens: + AND.append(or_(*(DBTask.token.ilike(conv(token)) + for token in filters.tokens))) + + if filters.machineIDs: + AND.append(or_(*(DBTask.machine_id.ilike(conv(machine_id)) + for machine_id in filters.machineIDs))) + + if filters.kinds: + AND.append(or_(*(DBTask.kind.ilike(conv(kind)) + for kind in filters.kinds))) + + if filters.statuses: + AND.append(or_(DBTask.status.in_([ + TaskStatus._VALUES_TO_NAMES[status].lower() + for status in filters.statuses]))) + + if filters.usernames: + AND.append(or_(*(DBTask.username.ilike(conv(username)) + for username in filters.usernames))) + elif filters.filterForNoUsername: + AND.append(DBTask.username.is_(None)) + + if filters.productIDs: + AND.append(or_(DBTask.product_id.in_(filters.productIDs))) + elif filters.filterForNoProductID: + AND.append(DBTask.product_id.is_(None)) + + if filters.enqueuedBeforeEpoch: + AND.append(DBTask.enqueued_at <= _posix_epoch_to_db_timestamp( + filters.enqueuedBeforeEpoch)) + + if filters.enqueuedAfterEpoch: + AND.append(DBTask.enqueued_at >= _posix_epoch_to_db_timestamp( + filters.enqueuedAfterEpoch)) + + if filters.startedBeforeEpoch: + AND.append(DBTask.started_at <= _posix_epoch_to_db_timestamp( + filters.startedBeforeEpoch)) + + if filters.startedAfterEpoch: + AND.append(DBTask.started_at >= _posix_epoch_to_db_timestamp( + filters.startedAfterEpoch)) + + if filters.completedBeforeEpoch: + AND.append(DBTask.finished_at <= _posix_epoch_to_db_timestamp( + filters.completedBeforeEpoch)) + + if filters.completedAfterEpoch: + AND.append(DBTask.finished_at >= _posix_epoch_to_db_timestamp( + filters.completedAfterEpoch)) + + if filters.heartbeatBeforeEpoch: + AND.append(DBTask.last_seen_at <= + _posix_epoch_to_db_timestamp( + filters.heartbeatBeforeEpoch)) + + if filters.heartbeatAfterEpoch: + AND.append(DBTask.last_seen_at >= + _posix_epoch_to_db_timestamp( + filters.heartbeatAfterEpoch)) + + if filters.cancelFlag: + if filters.cancelFlag == Ternary._NAMES_TO_VALUES["OFF"]: + AND.append(DBTask.cancel_flag.is_(False)) + elif filters.cancelFlag == Ternary._NAMES_TO_VALUES["ON"]: + AND.append(DBTask.cancel_flag.is_(True)) + + if filters.consumedFlag: + if filters.consumedFlag == Ternary._NAMES_TO_VALUES["OFF"]: + AND.append(DBTask.consumed.is_(False)) + elif filters.consumedFlag == Ternary._NAMES_TO_VALUES["ON"]: + AND.append(DBTask.consumed.is_(True)) + + ret: List[AdministratorTaskInfo] = [] + has_superuser: Optional[bool] = None + product_admin_rights: Dict[int, bool] = {} + for db_task in session.query(DBTask).filter(and_(*AND)).all(): + if not db_task.product_id: + # Tasks associated with the server, and not a specific + # product, should only be visible to SUPERUSERs. + if has_superuser is None: + has_superuser = permissions.require_permission( + permissions.SUPERUSER, + {"config_db_session": session}, + self._auth_session) + if not has_superuser: + continue + else: + # Tasks associated with a product should only be visible + # to PRODUCT_ADMINs of that product. + try: + if not product_admin_rights[db_task.product_id]: + continue + except KeyError: + product_admin_rights[db_task.product_id] = \ + permissions.require_permission( + permissions.PRODUCT_ADMIN, + {"config_db_session": session, + "productID": db_task.product_id}, + self._auth_session) + if not product_admin_rights[db_task.product_id]: + continue + + ret.append(_make_admin_task_info(db_task)) + + return ret + + @exc_to_thrift_reqfail + @timeit + def cancelTask(self, token: str) -> bool: + """ + Sets the ``cancel_flag`` of the task specified by `token` to `True` + in the database, **REQUESTING** that the task gracefully terminate + itself. + + There are no guarantees that tasks will respect this! + """ + with DBSession(self._config_db) as session: + if not permissions.require_permission( + permissions.SUPERUSER, + {"config_db_session": session}, + self._auth_session): + raise RequestFailed( + ErrorCode.UNAUTHORIZED, + "cancelTask() requires server-level SUPERUSER rights.") + + db_task: Optional[DBTask] = session.query(DBTask).get(token) + if not db_task: + raise RequestFailed(ErrorCode.GENERAL, + f"Task '{token}' does not exist!") + + if not db_task.can_be_cancelled: + return False + + db_task.add_comment("SUPERUSER requested cancellation.", + self._get_username()) + db_task.cancel_flag = True + session.commit() + + return True + + @exc_to_thrift_reqfail + @timeit + def createDummyTask(self, timeout: int, should_fail: bool) -> str: + """ + Used for testing purposes only. + + This function will **ALWAYS** throw an exception when ran outside of a + testing environment. + """ + if "TEST_WORKSPACE" not in os.environ: + raise RequestFailed(ErrorCode.GENERAL, + "createDummyTask() is only available in " + "testing environments!") + + token = self._task_manager.allocate_task_record( + "TaskService::DummyTask", + "Dummy task for testing purposes", + self._get_username(), + None) + + t = TestingDummyTask(token, timeout, should_fail) + self._task_manager.push_task(t) + + return token diff --git a/web/server/codechecker_server/cmd/server.py b/web/server/codechecker_server/cmd/server.py index 33bbbd20f1..7b49982669 100644 --- a/web/server/codechecker_server/cmd/server.py +++ b/web/server/codechecker_server/cmd/server.py @@ -18,13 +18,11 @@ import signal import socket import sys -import time from typing import List, Optional, Tuple, cast from alembic import config from alembic import script from alembic.util import CommandError -import psutil from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import sessionmaker @@ -32,7 +30,7 @@ from codechecker_report_converter import twodim -from codechecker_common import arg, cmd_config, logger, util +from codechecker_common import arg, cmd_config, logger, process, util from codechecker_common.compatibility.multiprocessing import Pool, cpu_count from codechecker_server import instance_manager, server @@ -101,6 +99,25 @@ def add_arguments_to_parser(parser): "authentication settings, TLS certificate" " (cert.pem) and key (key.pem)) from.") + parser.add_argument("--machine-id", + type=str, + dest="machine_id", + default=argparse.SUPPRESS, + required=False, + help=""" +A unique identifier to be used to identify the machine running subsequent +instances of the "same" server process. +This value is only used internally to maintain normal function and bookkeeping +of executed tasks following an unclean server shutdown, e.g., after a crash or +system-level interference. + +If unspecified, defaults to a reasonable default value that is generated from +the computer's hostname, as reported by the operating system. +In most scenarios, there is no need to fine-tune this, except if subsequent +executions of the "same" server is achieved in distinct environments, e.g., +if the server otherwise is running in a container. +""") + parser.add_argument('--host', type=str, dest="listen_address", @@ -424,7 +441,7 @@ def arg_match(options): setattr(args, "instance_manager", True) # If everything is fine, do call the handler for the subcommand. - main(args) + return main(args) parser.set_defaults( func=__handle, func_process_config_file=cmd_config.process_config_file) @@ -762,42 +779,6 @@ def _get_migration_decisions() -> List[Tuple[str, str, bool]]: return 0 -def kill_process_tree(parent_pid, recursive=False): - """Stop the process tree try it gracefully first. - - Try to stop the parent and child processes gracefuly - first if they do not stop in time send a kill signal - to every member of the process tree. - - There is a similar function in the analyzer part please - consider to update that in case of changing this. - """ - proc = psutil.Process(parent_pid) - children = proc.children(recursive) - - # Send a SIGTERM (Ctrl-C) to the main process - proc.terminate() - - # If children processes don't stop gracefully in time, - # slaughter them by force. - _, still_alive = psutil.wait_procs(children, timeout=5) - for p in still_alive: - p.kill() - - # Wait until this process is running. - n = 0 - timeout = 10 - while proc.is_running(): - if n > timeout: - LOG.warning("Waiting for process %s to stop has been timed out" - "(timeout = %s)! Process is still running!", - parent_pid, timeout) - break - - time.sleep(1) - n += 1 - - def __instance_management(args): """Handles the instance-manager commands --list/--stop/--stop-all.""" @@ -842,7 +823,7 @@ def __instance_management(args): continue try: - kill_process_tree(i['pid']) + process.kill_process_tree(i['pid']) LOG.info("Stopped CodeChecker server running on port %s " "in workspace %s (PID: %s)", i['port'], i['workspace'], i['pid']) @@ -1106,16 +1087,21 @@ def server_init_start(args): 'doc_root': context.doc_root, 'version': context.package_git_tag} + # Create a machine ID if the user did not specify one. + machine_id = getattr(args, "machine_id", + f"{socket.gethostname()}:{args.view_port}") + try: - server.start_server(args.config_directory, - package_data, - args.view_port, - cfg_sql_server, - args.listen_address, - 'force_auth' in args, - args.skip_db_cleanup, - context, - environ) + return server.start_server(args.config_directory, + package_data, + args.view_port, + cfg_sql_server, + args.listen_address, + 'force_auth' in args, + args.skip_db_cleanup, + context, + environ, + machine_id) except socket.error as err: if err.errno == errno.EADDRINUSE: LOG.error("Server can't be started, maybe port number (%s) is " @@ -1152,4 +1138,4 @@ def main(args): except FileNotFoundError as fnerr: LOG.error(fnerr) sys.exit(1) - server_init_start(args) + return server_init_start(args) diff --git a/web/server/codechecker_server/database/config_db_model.py b/web/server/codechecker_server/database/config_db_model.py index 00f0c4948e..e2ee5a550b 100644 --- a/web/server/codechecker_server/database/config_db_model.py +++ b/web/server/codechecker_server/database/config_db_model.py @@ -8,8 +8,9 @@ """ SQLAlchemy ORM model for the product configuration database. """ -from datetime import datetime +from datetime import datetime, timezone import sys +from typing import Optional from sqlalchemy import Boolean, CHAR, Column, DateTime, Enum, ForeignKey, \ Integer, MetaData, String, Text @@ -158,6 +159,200 @@ def __init__(self, config_key, config_value): self.config_value = config_value +class BackgroundTask(Base): + """ + Information about background tasks executed on a CodeChecker service, + potentially as part of a cluster, stored in the database. + These entities store the metadata for the task objects, but no information + about the actual "input" of the task exists in the database! + """ + __tablename__ = "background_tasks" + + _token_length = 64 + + machine_id = Column(String, index=True) + """ + A unique, implementation-specific identifier of the actual CodeChecker + server instance that knows how to execute the task. + """ + + token = Column(CHAR(length=_token_length), primary_key=True) + kind = Column(String, nullable=False, index=True) + status = Column(Enum( + # A job token (and thus a BackgroundTask record) was allocated, but + # the job is still under preparation. + "allocated", + + # The job is pending on the server, but the server has all the data + # available to eventually perform the job. + "enqueued", + + # The server is actually performing the job. + "running", + + # The server successfully finished completing the job. + "completed", + + # The execution of the job failed. + # In this stage, the "comments" field likely contains more information + # that is not machine-readable. + "failed", + + # The job never started, or its execution was terminated at the + # request of the administrators. + "cancelled", + + # The job never started, or its execution was terminated due to a + # system-level reason (such as the server's foced shutdown). + "dropped", + ), + nullable=False, + default="enqueued", + index=True) + + product_id = Column(Integer, + ForeignKey("products.id", + deferrable=False, + initially="IMMEDIATE", + ondelete="CASCADE"), + nullable=True, + index=True) + """ + If the job is tightly associated with a product, the ID of the `Product` + entity with which it is associated. + """ + + username = Column(String, nullable=True) + """ + The main actor who was responsible for the creation of the job task. + """ + + summary = Column(String, nullable=False) + comments = Column(Text, nullable=True) + + enqueued_at = Column(DateTime, nullable=True) + started_at = Column(DateTime, nullable=True) + finished_at = Column(DateTime, nullable=True) + + last_seen_at = Column(DateTime, nullable=True) + """ + Contains the timestamp, only when the job is not yet "finished", when the + job last synchronised against the database, e.g., when it last checked the + "cancel_flag" field. + + This is used for health checking whether the background worker is actually + doing something, as a second line of defence to uncover "dropped" jobs, + e.g., when the servers have failed and the new server can not identify + jobs from its "previous life". + """ + + consumed = Column(Boolean, nullable=False, + default=False, server_default=false()) + """ + Whether the status of the job was checked **BY THE MAIN ACTOR** (username). + """ + + cancel_flag = Column(Boolean, nullable=False, + default=False, server_default=false()) + """ + Whether a SUPERUSER has signalled that the job should be cancelled. + + Note, that cancelling is a co-operative action: jobs are never actually + "killed" on the O.S. level from the outside; rather, each job is expected + to be implemented in a way that they regularly query this bit, and if set, + act accordingly. + """ + + def __init__(self, + token: str, + kind: str, + summary: str, + machine_id: str, + user_name: Optional[str], + product: Optional[Product] = None, + ): + self.machine_id = machine_id + self.token = token + self.kind = kind + self.status = "allocated" + self.summary = summary + self.username = user_name + self.last_seen_at = datetime.now(timezone.utc) + + if product: + self.product_id = product.id + + def add_comment(self, comment: str, actor: Optional[str] = None): + if not self.comments: + self.comments = "" + elif self.comments: + self.comments += "\n----------\n" + + self.comments += f"{actor if actor else ''} " \ + f"at {str(datetime.now(timezone.utc))}:\n{comment}" + + def heartbeat(self): + """Update `last_seen_at`.""" + if self.status in ["enqueued", "running"]: + self.last_seen_at = datetime.now(timezone.utc) + + def set_enqueued(self): + """Marks the job as successfully enqueued.""" + if self.status != "allocated": + raise ValueError( + f"Invalid transition '{str(self.status)}' -> 'enqueued'") + + self.status = "enqueued" + self.enqueued_at = datetime.now(timezone.utc) + + def set_running(self): + """Marks the job as currently executing.""" + if self.status != "enqueued": + raise ValueError( + f"Invalid transition '{str(self.status)}' -> 'running'") + + self.status = "running" + self.started_at = datetime.now(timezone.utc) + + def set_finished(self, successfully: bool = True): + """Marks the job as successfully completed or failed.""" + new_status = "completed" if successfully else "failed" + if self.status != "running": + raise ValueError( + f"Invalid transition '{str(self.status)}' -> '{new_status}'") + + self.status = new_status + self.finished_at = datetime.now(timezone.utc) + + def set_abandoned(self, force_dropped_status: bool = False): + """ + Marks the job as cancelled or dropped based on whether the + cancel flag is set. + """ + new_status = "cancelled" \ + if not force_dropped_status and self.cancel_flag \ + else "dropped" + + self.status = new_status + self.finished_at = datetime.now(timezone.utc) + + @property + def is_in_terminated_state(self) -> bool: + """ + Returns whether the current task has finished execution in some way, + for some reason. + """ + return self.status not in ["allocated", "enqueued", "running"] + + @property + def can_be_cancelled(self) -> bool: + """ + Returns whether the task is in a state where setting `cancel_flag` + is meaningful. + """ + return not self.is_in_terminated_state and not self.cancel_flag + + IDENTIFIER = { 'identifier': "ConfigDatabase", 'orm_meta': CC_META diff --git a/web/server/codechecker_server/migrations/config/versions/73b04c41885b_implemented_keeping_track_of_background_tasks.py b/web/server/codechecker_server/migrations/config/versions/73b04c41885b_implemented_keeping_track_of_background_tasks.py new file mode 100644 index 0000000000..45b3ab7bc1 --- /dev/null +++ b/web/server/codechecker_server/migrations/config/versions/73b04c41885b_implemented_keeping_track_of_background_tasks.py @@ -0,0 +1,79 @@ +""" +Implemented keeping track of background tasks through corresponding records +in the server-wide configuration database. + +Revision ID: 73b04c41885b +Revises: 00099e8bc212 +Create Date: 2023-09-21 14:24:27.395597 +""" + +from alembic import op +import sqlalchemy as sa + + +# Revision identifiers, used by Alembic. +revision = '73b04c41885b' +down_revision = '00099e8bc212' +branch_labels = None +depends_on = None + + +def upgrade(): + op.create_table( + "background_tasks", + sa.Column("machine_id", sa.String(), nullable=True), + sa.Column("token", sa.CHAR(length=64), nullable=False), + sa.Column("kind", sa.String(), nullable=False), + sa.Column("status", sa.Enum("allocated", + "enqueued", + "running", + "completed", + "failed", + "cancelled", + "dropped", + name="background_task_statuses"), + nullable=False), + sa.Column("product_id", sa.Integer(), nullable=True), + sa.Column("summary", sa.String(), nullable=False), + sa.Column("comments", sa.Text(), nullable=True), + sa.Column("username", sa.String(), nullable=True), + sa.Column("enqueued_at", sa.DateTime(), nullable=True), + sa.Column("started_at", sa.DateTime(), nullable=True), + sa.Column("finished_at", sa.DateTime(), nullable=True), + sa.Column("last_seen_at", sa.DateTime(), nullable=True), + sa.Column("consumed", sa.Boolean(), nullable=False, + server_default=sa.false()), + sa.Column("cancel_flag", sa.Boolean(), nullable=False, + server_default=sa.false()), + + sa.ForeignKeyConstraint( + ["product_id"], ["products.id"], + name=op.f("fk_background_tasks_product_id_products"), + deferrable=False, + ondelete="CASCADE", + initially="IMMEDIATE"), + sa.PrimaryKeyConstraint("token", name=op.f("pk_background_tasks")) + ) + op.create_index(op.f("ix_background_tasks_kind"), "background_tasks", + ["kind"], unique=False) + op.create_index(op.f("ix_background_tasks_machine_id"), "background_tasks", + ["machine_id"], unique=False) + op.create_index(op.f("ix_background_tasks_product_id"), "background_tasks", + ["product_id"], unique=False) + op.create_index(op.f("ix_background_tasks_status"), "background_tasks", + ["status"], unique=False) + + +def downgrade(): + ctx = op.get_context() + dialect = ctx.dialect.name + + op.drop_index(op.f("ix_background_tasks_status"), "background_tasks") + op.drop_index(op.f("ix_background_tasks_product_id"), "background_tasks") + op.drop_index(op.f("ix_background_tasks_machine_id"), "background_tasks") + op.drop_index(op.f("ix_background_tasks_kind"), "background_tasks") + + op.drop_table("action_history") + + if dialect == "postgresql": + op.execute("DROP TYPE background_task_statuses;") diff --git a/web/server/codechecker_server/product.py b/web/server/codechecker_server/product.py new file mode 100644 index 0000000000..3c18de4339 --- /dev/null +++ b/web/server/codechecker_server/product.py @@ -0,0 +1,236 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" +The in-memory representation and access methods for querying and mutating a +"Product": a separate and self-contained database and entity containing +analysis results and associated information, which a CodeChecker server can +connect to. +""" +from datetime import datetime +from typing import Optional + +from sqlalchemy.orm import sessionmaker + +from codechecker_api_shared.ttypes import DBStatus + +from codechecker_common.logger import get_logger + +from .database import database, db_cleanup +from .database.config_db_model import Product as DBProduct +from .database.database import DBSession +from .database.run_db_model import \ + IDENTIFIER as RUN_META, \ + Run, RunLock + + +LOG = get_logger("server") + + +class Product: + """ + Represents a product, which is a distinct storage of analysis reports in + a separate database (and database connection) with its own access control. + """ + + # The amount of SECONDS that need to pass after the last unsuccessful + # connect() call so the next could be made. + CONNECT_RETRY_TIMEOUT = 300 + + def __init__(self, id_: int, endpoint: str, display_name: str, + connection_string: str, context, check_env): + """ + Set up a new managed product object for the configuration given. + """ + self.__id = id_ + self.__endpoint = endpoint + self.__display_name = display_name + self.__connection_string = connection_string + self.__driver_name = None + self.__context = context + self.__check_env = check_env + self.__engine = None + self.__session = None + self.__db_status = DBStatus.MISSING + + self.__last_connect_attempt = None + + @property + def id(self): + return self.__id + + @property + def endpoint(self): + """ + Returns the accessible URL endpoint of the product. + """ + return self.__endpoint + + @property + def name(self): + """ + Returns the display name of the product. + """ + return self.__display_name + + @property + def session_factory(self): + """ + Returns the session maker on this product's database engine which + can be used to initiate transactional connections. + """ + return self.__session + + @property + def driver_name(self): + """ + Returns the name of the sql driver (sqlite, postgres). + """ + return self.__driver_name + + @property + def db_status(self): + """ + Returns the status of the database which belongs to this product. + Call connect to update it. + """ + return self.__db_status + + @property + def last_connection_failure(self): + """ + Returns the reason behind the last executed connection attempt's + failure. + """ + return self.__last_connect_attempt[1] if self.__last_connect_attempt \ + else None + + def connect(self, init_db=False): + """ + Initiates the actual connection to the database configured for the + product. + + Each time the connect is called the db_status is updated. + """ + LOG.debug("Checking '%s' database.", self.endpoint) + + sql_server = database.SQLServer.from_connection_string( + self.__connection_string, + self.__endpoint, + RUN_META, + self.__context.run_migration_root, + interactive=False, + env=self.__check_env) + + if isinstance(sql_server, database.PostgreSQLServer): + self.__driver_name = 'postgresql' + elif isinstance(sql_server, database.SQLiteDatabase): + self.__driver_name = 'sqlite' + + try: + LOG.debug("Trying to connect to the database") + + # Create the SQLAlchemy engine. + self.__engine = sql_server.create_engine() + LOG.debug(self.__engine) + + self.__session = sessionmaker(bind=self.__engine) + + self.__engine.execute('SELECT 1') + self.__db_status = sql_server.check_schema() + self.__last_connect_attempt = None + + if self.__db_status == DBStatus.SCHEMA_MISSING and init_db: + LOG.debug("Initializing new database schema.") + self.__db_status = sql_server.connect(init_db) + + except Exception as ex: + LOG.exception("The database for product '%s' cannot be" + " connected to.", self.endpoint) + self.__db_status = DBStatus.FAILED_TO_CONNECT + self.__last_connect_attempt = (datetime.now(), str(ex)) + + def get_details(self): + """ + Get details for a product from the database. + + It may throw different error messages depending on the used SQL driver + adapter in case of connection error. + """ + with DBSession(self.session_factory) as run_db_session: + run_locks = run_db_session.query(RunLock.name) \ + .filter(RunLock.locked_at.isnot(None)) \ + .all() + + runs_in_progress = set(run_lock[0] for run_lock in run_locks) + + num_of_runs = run_db_session.query(Run).count() + + latest_store_to_product = "" + if num_of_runs: + last_updated_run = run_db_session.query(Run) \ + .order_by(Run.date.desc()) \ + .limit(1) \ + .one_or_none() + + latest_store_to_product = last_updated_run.date + + return num_of_runs, runs_in_progress, latest_store_to_product + + def teardown(self): + """ + Disposes the database connection to the product's backend. + """ + if self.__db_status == DBStatus.FAILED_TO_CONNECT: + return + + self.__engine.dispose() + + self.__session = None + self.__engine = None + + def cleanup_run_db(self): + """ + Cleanup the run database which belongs to this product. + """ + LOG.info("[%s] Garbage collection started...", self.endpoint) + + db_cleanup.remove_expired_data(self) + db_cleanup.remove_unused_data(self) + db_cleanup.update_contextual_data(self, self.__context) + + LOG.info("[%s] Garbage collection finished.", self.endpoint) + return True + + def set_cached_run_data(self, + config_db_session_factory, + number_of_runs_change: Optional[int] = None, + last_store_date: Optional[datetime] = None): + """ + Update the configuration database row for the current `Product` + for the keys that contain cached summaries of what would otherwise + be queriable from the product's database. + """ + updates = {} + + if number_of_runs_change: + updates["num_of_runs"] = DBProduct.num_of_runs \ + + number_of_runs_change + LOG.info("%s: Changing 'num_of_runs' in CONFIG database by %s%i.", + self.__endpoint, + '+' if number_of_runs_change > 0 else '-', + abs(number_of_runs_change)) + + if last_store_date: + updates["latest_storage_date"] = last_store_date + + if updates: + with DBSession(config_db_session_factory) as session: + session.query(DBProduct) \ + .filter(DBProduct.id == self.__id) \ + .update(updates) + session.commit() diff --git a/web/server/codechecker_server/routing.py b/web/server/codechecker_server/routing.py index 79ac8d0686..34fbb82f87 100644 --- a/web/server/codechecker_server/routing.py +++ b/web/server/codechecker_server/routing.py @@ -15,25 +15,28 @@ from codechecker_web.shared.version import SUPPORTED_VERSIONS -# A list of top-level path elements under the webserver root -# which should not be considered as a product route. -NON_PRODUCT_ENDPOINTS = ['index.html', - 'images', - 'docs', - 'live', - 'ready'] +# A list of top-level path elements under the webserver root which should not +# be considered as a product route. +NON_PRODUCT_ENDPOINTS = ["index.html", + "images", + "docs", + "live", + "ready", + ] # A list of top-level path elements in requests (such as Thrift endpoints) # which should not be considered as a product route. -NON_PRODUCT_ENDPOINTS += ['Authentication', - 'Products', - 'CodeCheckerService'] +NON_PRODUCT_ENDPOINTS += ["Authentication", + "Products", + "CodeCheckerService", + "Tasks", + ] # A list of top-level path elements under the webserver root which should -# be protected by authentication requirement when accessing the server. +# be protected by authentication requirements when accessing the server. PROTECTED_ENTRY_POINTS = ['', # Empty string in a request is 'index.html'. - 'index.html'] + "index.html"] def is_valid_product_endpoint(uripart): @@ -68,9 +71,8 @@ def is_supported_version(version): If supported, returns the major and minor version as a tuple. """ - version = version.lstrip('v') - version_parts = version.split('.') + version_parts = version.split('.', 2) # We don't care if accidentally the version tag contains a revision number. major, minor = int(version_parts[0]), int(version_parts[1]) @@ -113,9 +115,8 @@ def split_client_POST_request(path): Returns the product endpoint, the API version and the API service endpoint as a tuple of 3. """ - # A standard POST request from an API client looks like: - # http://localhost:8001/[product-name]// + # http://localhost:8001/[product-name]/v/ # where specifying the product name is optional. split_path = urlparse(path).path.split('/', 3) diff --git a/web/server/codechecker_server/server.py b/web/server/codechecker_server/server.py index 40bdf6db4d..183a390f8a 100644 --- a/web/server/codechecker_server/server.py +++ b/web/server/codechecker_server/server.py @@ -12,11 +12,12 @@ import atexit -import datetime +from collections import Counter from functools import partial from hashlib import sha256 from http.server import HTTPServer, SimpleHTTPRequestHandler import os +import pathlib import posixpath from random import sample import shutil @@ -25,10 +26,10 @@ import ssl import sys import stat -from typing import List, Optional, Tuple +import time +from typing import Dict, List, Optional, Tuple, cast import urllib -import multiprocess from sqlalchemy.orm import sessionmaker from sqlalchemy.sql.expression import func from thrift.protocol import TJSONProtocol @@ -47,11 +48,14 @@ codeCheckerProductService as ProductAPI_v6 from codechecker_api.ServerInfo_v6 import \ serverInfoService as ServerInfoAPI_v6 +from codechecker_api.codeCheckerServersideTasks_v6 import \ + codeCheckerServersideTaskService as TaskAPI_v6 from codechecker_common import util -from codechecker_common.logger import get_logger from codechecker_common.compatibility.multiprocessing import \ - Pool, cpu_count + Pool, Process, Queue, Value, cpu_count +from codechecker_common.logger import get_logger, signal_log +from codechecker_common.util import generate_random_token from codechecker_web.shared import database_status from codechecker_web.shared.version import get_version_str @@ -63,12 +67,15 @@ from .api.report_server import ThriftRequestHandler as ReportHandler_v6 from .api.server_info_handler import \ ThriftServerInfoHandler as ServerInfoHandler_v6 -from .database import database, db_cleanup +from .api.tasks import ThriftTaskHandler as TaskHandler_v6 from .database.config_db_model import Product as ORMProduct, \ Configuration as ORMConfiguration from .database.database import DBSession -from .database.run_db_model import IDENTIFIER as RUN_META, Run, RunLock -from .tmp import get_tmp_dir_hash +from .database.run_db_model import Run +from .product import Product +from .task_executors.main import executor as background_task_executor +from .task_executors.task_manager import \ + TaskManager as BackgroundTaskManager LOG = get_logger('server') @@ -85,8 +92,8 @@ def __init__(self, request, client_address, server): self.path = None super().__init__(request, client_address, server) - def log_message(self, *args): - """ Silencing http server. """ + def log_message(self, *_args): + """Silencing HTTP server.""" return def send_thrift_exception(self, error_msg, iprot, oprot, otrans): @@ -104,7 +111,7 @@ def send_thrift_exception(self, error_msg, iprot, oprot, otrans): result = otrans.getvalue() self.send_response(200) self.send_header("content-type", "application/x-thrift") - self.send_header("Content-Length", len(result)) + self.send_header("Content-Length", str(len(result))) self.end_headers() self.wfile.write(result) @@ -369,22 +376,22 @@ def do_POST(self): major_version, _ = version_supported if major_version == 6: - if request_endpoint == 'Authentication': + if request_endpoint == "Authentication": auth_handler = AuthHandler_v6( self.server.manager, self.auth_session, self.server.config_session) processor = AuthAPI_v6.Processor(auth_handler) - elif request_endpoint == 'Configuration': + elif request_endpoint == "Configuration": conf_handler = ConfigHandler_v6( self.auth_session, self.server.config_session) processor = ConfigAPI_v6.Processor(conf_handler) - elif request_endpoint == 'ServerInfo': + elif request_endpoint == "ServerInfo": server_info_handler = ServerInfoHandler_v6(version) processor = ServerInfoAPI_v6.Processor( server_info_handler) - elif request_endpoint == 'Products': + elif request_endpoint == "Products": prod_handler = ProductHandler_v6( self.server, self.auth_session, @@ -392,7 +399,13 @@ def do_POST(self): product, version) processor = ProductAPI_v6.Processor(prod_handler) - elif request_endpoint == 'CodeCheckerService': + elif request_endpoint == "Tasks": + task_handler = TaskHandler_v6( + self.server.config_session, + self.server.task_manager, + self.auth_session) + processor = TaskAPI_v6.Processor(task_handler) + elif request_endpoint == "CodeCheckerService": # This endpoint is a product's report_server. if not product: error_msg = \ @@ -408,6 +421,7 @@ def do_POST(self): acc_handler = ReportHandler_v6( self.server.manager, + self.server.task_manager, product.session_factory, product, self.auth_session, @@ -437,7 +451,7 @@ def do_POST(self): self.send_response(200) self.send_header("content-type", "application/x-thrift") - self.send_header("Content-Length", len(result)) + self.send_header("Content-Length", str(len(result))) self.end_headers() self.wfile.write(result) return @@ -484,182 +498,6 @@ def translate_path(self, path): return path -class Product: - """ - Represents a product, which is a distinct storage of analysis reports in - a separate database (and database connection) with its own access control. - """ - - # The amount of SECONDS that need to pass after the last unsuccessful - # connect() call so the next could be made. - CONNECT_RETRY_TIMEOUT = 300 - - def __init__(self, id_: int, endpoint: str, display_name: str, - connection_string: str, context, check_env): - """ - Set up a new managed product object for the configuration given. - """ - self.__id = id_ - self.__endpoint = endpoint - self.__display_name = display_name - self.__connection_string = connection_string - self.__driver_name = None - self.__context = context - self.__check_env = check_env - self.__engine = None - self.__session = None - self.__db_status = DBStatus.MISSING - - self.__last_connect_attempt = None - - @property - def id(self): - return self.__id - - @property - def endpoint(self): - """ - Returns the accessible URL endpoint of the product. - """ - return self.__endpoint - - @property - def name(self): - """ - Returns the display name of the product. - """ - return self.__display_name - - @property - def session_factory(self): - """ - Returns the session maker on this product's database engine which - can be used to initiate transactional connections. - """ - return self.__session - - @property - def driver_name(self): - """ - Returns the name of the sql driver (sqlite, postgres). - """ - return self.__driver_name - - @property - def db_status(self): - """ - Returns the status of the database which belongs to this product. - Call connect to update it. - """ - return self.__db_status - - @property - def last_connection_failure(self): - """ - Returns the reason behind the last executed connection attempt's - failure. - """ - return self.__last_connect_attempt[1] if self.__last_connect_attempt \ - else None - - def connect(self, init_db=False): - """ - Initiates the actual connection to the database configured for the - product. - - Each time the connect is called the db_status is updated. - """ - LOG.debug("Checking '%s' database.", self.endpoint) - - sql_server = database.SQLServer.from_connection_string( - self.__connection_string, - self.__endpoint, - RUN_META, - self.__context.run_migration_root, - interactive=False, - env=self.__check_env) - - if isinstance(sql_server, database.PostgreSQLServer): - self.__driver_name = 'postgresql' - elif isinstance(sql_server, database.SQLiteDatabase): - self.__driver_name = 'sqlite' - - try: - LOG.debug("Trying to connect to the database") - - # Create the SQLAlchemy engine. - self.__engine = sql_server.create_engine() - LOG.debug(self.__engine) - - self.__session = sessionmaker(bind=self.__engine) - - self.__engine.execute('SELECT 1') - self.__db_status = sql_server.check_schema() - self.__last_connect_attempt = None - - if self.__db_status == DBStatus.SCHEMA_MISSING and init_db: - LOG.debug("Initializing new database schema.") - self.__db_status = sql_server.connect(init_db) - - except Exception as ex: - LOG.exception("The database for product '%s' cannot be" - " connected to.", self.endpoint) - self.__db_status = DBStatus.FAILED_TO_CONNECT - self.__last_connect_attempt = (datetime.datetime.now(), str(ex)) - - def get_details(self): - """ - Get details for a product from the database. - - It may throw different error messages depending on the used SQL driver - adapter in case of connection error. - """ - with DBSession(self.session_factory) as run_db_session: - run_locks = run_db_session.query(RunLock.name) \ - .filter(RunLock.locked_at.isnot(None)) \ - .all() - - runs_in_progress = set(run_lock[0] for run_lock in run_locks) - - num_of_runs = run_db_session.query(Run).count() - - latest_store_to_product = "" - if num_of_runs: - last_updated_run = run_db_session.query(Run) \ - .order_by(Run.date.desc()) \ - .limit(1) \ - .one_or_none() - - latest_store_to_product = last_updated_run.date - - return num_of_runs, runs_in_progress, latest_store_to_product - - def teardown(self): - """ - Disposes the database connection to the product's backend. - """ - if self.__db_status == DBStatus.FAILED_TO_CONNECT: - return - - self.__engine.dispose() - - self.__session = None - self.__engine = None - - def cleanup_run_db(self): - """ - Cleanup the run database which belongs to this product. - """ - LOG.info("[%s] Garbage collection started...", self.endpoint) - - db_cleanup.remove_expired_data(self) - db_cleanup.remove_unused_data(self) - db_cleanup.update_contextual_data(self, self.__context) - - LOG.info("[%s] Garbage collection finished.", self.endpoint) - return True - - def _do_db_cleanup(context, check_env, id_: int, endpoint: str, display_name: str, connection_str: str) -> Tuple[Optional[bool], str]: @@ -745,7 +583,10 @@ def __init__(self, pckg_data, context, check_env, - manager): + manager: session_manager.SessionManager, + machine_id: str, + task_queue: Queue, + server_shutdown_flag: Value): LOG.debug("Initializing HTTP server...") @@ -756,6 +597,7 @@ def __init__(self, self.context = context self.check_env = check_env self.manager = manager + self.address, self.port = server_address self.__products = {} # Create a database engine for the configuration database. @@ -764,6 +606,12 @@ def __init__(self, self.config_session = sessionmaker(bind=self.__engine) self.manager.set_database_connection(self.config_session) + self.__task_queue = task_queue + self.task_manager = BackgroundTaskManager( + task_queue, self.config_session, self.check_env, + server_shutdown_flag, machine_id, + pathlib.Path(self.context.codechecker_workspace)) + # Load the initial list of products and set up the server. cfg_sess = self.config_session() permissions.initialise_defaults('SYSTEM', { @@ -780,7 +628,7 @@ def __init__(self, cfg_sess.close() try: - HTTPServer.__init__(self, server_address, + HTTPServer.__init__(self, (self.address, self.port), RequestHandlerClass, bind_and_activate=True) ssl_key_file = os.path.join(config_directory, "key.pem") @@ -806,13 +654,23 @@ def __init__(self, else: LOG.info("Searching for SSL key at %s, cert at %s, " - "not found...", ssl_key_file, ssl_cert_file) + "not found!", ssl_key_file, ssl_cert_file) LOG.info("Falling back to simple, insecure HTTP.") except Exception as e: LOG.error("Couldn't start the server: %s", e.__str__()) raise + # If the server was started with the port 0, the OS will pick an + # available port. + # For this reason, we will update the port variable after server + # ininitialisation. + self.port = self.socket.getsockname()[1] + + @property + def formatted_address(self) -> str: + return f"{str(self.address)}:{self.port}" + def configure_keepalive(self): """ Enable keepalive on the socket and some TCP keepalive configuration @@ -855,17 +713,40 @@ def configure_keepalive(self): LOG.error('Failed to set TCP max keepalive probe: %s', ret) def terminate(self): - """ - Terminating the server. - """ + """Terminates the server and releases associated resources.""" try: self.server_close() + self.__task_queue.close() + self.__task_queue.join_thread() self.__engine.dispose() + + sys.exit(128 + signal.SIGINT) except Exception as ex: LOG.error("Failed to shut down the WEB server!") LOG.error(str(ex)) sys.exit(1) + def serve_forever_with_shutdown_handler(self): + """ + Calls `HTTPServer.serve_forever` but handles SIGINT (2) signals + gracefully such that the open resources are properly cleaned up. + """ + def _handler(signum: int, _frame): + if signum not in [signal.SIGINT]: + signal_log(LOG, "ERROR", "Signal " + f"<{signal.Signals(signum).name} ({signum})> " + "handling attempted by " + "'serve_forever_with_shutdown_handler'!") + return + + signal_log(LOG, "DEBUG", f"{os.getpid()}: Received " + f"{signal.Signals(signum).name} ({signum}), " + "performing shutdown ...") + self.terminate() + + signal.signal(signal.SIGINT, _handler) + return self.serve_forever() + def add_product(self, orm_product, init_db=False): """ Adds a product to the list of product databases connected to @@ -990,6 +871,10 @@ class CCSimpleHttpServerIPv6(CCSimpleHttpServer): address_family = socket.AF_INET6 + @property + def formatted_address(self) -> str: + return f"[{str(self.address)}]:{self.port}" + def __make_root_file(root_file): """ @@ -1000,7 +885,7 @@ def __make_root_file(root_file): LOG.debug("Generating initial superuser (root) credentials...") username = ''.join(sample("ABCDEFGHIJKLMNOPQRSTUVWXYZ", 6)) - password = get_tmp_dir_hash()[:8] + password = generate_random_token(8) LOG.info("A NEW superuser credential was generated for the server. " "This information IS SAVED, thus subsequent server starts " @@ -1028,16 +913,16 @@ def __make_root_file(root_file): return secret -def start_server(config_directory, package_data, port, config_sql_server, - listen_address, force_auth, skip_db_cleanup: bool, - context, check_env): +def start_server(config_directory: str, package_data, port: int, + config_sql_server, listen_address: str, + force_auth: bool, skip_db_cleanup: bool, + context, check_env, machine_id: str) -> int: """ - Start http server to handle web client and thrift requests. + Starts the HTTP server to handle Web client and Thrift requests, execute + background jobs. """ LOG.debug("Starting CodeChecker server...") - server_addr = (listen_address, port) - root_file = os.path.join(config_directory, 'root.user') if not os.path.exists(root_file): LOG.warning("Server started without 'root.user' present in " @@ -1103,92 +988,457 @@ def start_server(config_directory, package_data, port, config_sql_server, else: LOG.debug("Skipping db_cleanup, as requested.") + api_processes: Dict[int, Process] = {} + requested_api_threads = cast(int, manager.worker_processes) \ + or cpu_count() + + bg_processes: Dict[int, Process] = {} + requested_bg_threads = cast(int, + manager.background_worker_processes) \ + or requested_api_threads + # Note that Queue under the hood uses OS-level primitives such as a socket + # or a pipe, where the read-write buffers have a **LIMITED** capacity, and + # are usually **NOT** backed by the full amount of available system memory. + bg_task_queue: Queue = Queue() + is_server_shutting_down = Value('B', False) + + def _cleanup_incomplete_tasks(action: str) -> int: + config_db = config_sql_server.create_engine() + config_session_factory = sessionmaker(bind=config_db) + tm = BackgroundTaskManager( + bg_task_queue, config_session_factory, check_env, + is_server_shutting_down, machine_id, + pathlib.Path(context.codechecker_workspace)) + + try: + tm.destroy_all_temporary_data() + except OSError: + LOG.warning("Clearing task-temporary storage space failed!") + import traceback + traceback.print_exc() + + try: + return tm.drop_all_incomplete_tasks(action) + finally: + config_db.dispose() + + dropped_tasks = _cleanup_incomplete_tasks( + "New server started with the same machine_id, assuming the old " + "server is dead and won't be able to finish the task.") + if dropped_tasks: + LOG.info("At server startup, dropped %d background tasks left behind " + "by a previous server instance matching machine ID '%s'.", + dropped_tasks, machine_id) + server_clazz = CCSimpleHttpServer - if ':' in server_addr[0]: + if ':' in listen_address: # IPv6 address specified for listening. # FIXME: Python>=3.8 automatically handles IPv6 if ':' is in the bind # address, see https://bugs.python.org/issue24209. server_clazz = CCSimpleHttpServerIPv6 - http_server = server_clazz(server_addr, + http_server = server_clazz((listen_address, port), RequestHandler, config_directory, config_sql_server, package_data, context, check_env, - manager) + manager, + machine_id, + bg_task_queue, + is_server_shutting_down) + + try: + instance_manager.register(os.getpid(), + os.path.abspath( + context.codechecker_workspace), + port) + except IOError as ex: + LOG.debug(ex.strerror) - # If the server was started with the port 0, the OS will pick an available - # port. For this reason we will update the port variable after server - # initialization. - port = http_server.socket.getsockname()[1] + def unregister_handler(pid): + # Handle errors during instance unregistration. + # The workspace might be removed so updating the config content might + # fail. + try: + instance_manager.unregister(pid) + except IOError as ex: + LOG.debug(ex.strerror) - processes = [] + atexit.register(unregister_handler, os.getpid()) - def signal_handler(signum, _): + def _start_process_with_no_signal_handling(**kwargs): """ - Handle SIGTERM to stop the server running. + Starts a `multiprocessing.Process` in a context where the signal + handling is temporarily disabled, such that the child process does not + inherit any signal handling from the parent. + + Child processes spawned after the main process set up its signals + MUST NOT inherit the signal handling because that would result in + multiple children firing on the SIGTERM handler, for example. + + For this reason, we temporarily disable the signal handling here by + returning to the initial defaults, and then restore the main process's + signal handling to be the usual one. """ - LOG.info("Shutting down the WEB server on [%s:%d]", - '[' + listen_address + ']' - if server_clazz is CCSimpleHttpServerIPv6 else listen_address, - port) - http_server.terminate() + signals_to_disable = [signal.SIGINT, signal.SIGTERM] + if sys.platform != "win32": + signals_to_disable += [signal.SIGCHLD, signal.SIGHUP] - # Terminate child processes. - for pp in processes: - pp.terminate() + existing_signal_handlers = {} + for signum in signals_to_disable: + existing_signal_handlers[signum] = signal.signal( + signum, signal.SIG_DFL) + + p = Process(**kwargs) + p.start() + + for signum in signals_to_disable: + signal.signal(signum, existing_signal_handlers[signum]) + + return p + + # Save a process-wide but not shared counter in the main process for how + # many subprocesses of each kind had been spawned, as this will be used in + # the internal naming of the workers. + spawned_api_proc_count: int = 0 + spawned_bg_proc_count: int = 0 + + def spawn_api_process(): + """Starts a single HTTP API worker process for CodeChecker server.""" + nonlocal spawned_api_proc_count + spawned_api_proc_count += 1 + + p = _start_process_with_no_signal_handling( + target=http_server.serve_forever_with_shutdown_handler, + name=f"CodeChecker-API-{spawned_api_proc_count}") + api_processes[cast(int, p.pid)] = p + signal_log(LOG, "DEBUG", f"API handler child process {p.pid} started!") + return p + + LOG.info("Using %d API request handler processes ...", + requested_api_threads) + for _ in range(requested_api_threads): + spawn_api_process() + + def spawn_bg_process(): + """Starts a single Task worker process for CodeChecker server.""" + nonlocal spawned_bg_proc_count + spawned_bg_proc_count += 1 + + p = _start_process_with_no_signal_handling( + target=background_task_executor, + args=(bg_task_queue, + config_sql_server, + check_env, + is_server_shutting_down, + machine_id, + ), + name=f"CodeChecker-Task-{spawned_bg_proc_count}") + bg_processes[cast(int, p.pid)] = p + signal_log(LOG, "DEBUG", f"Task child process {p.pid} started!") + return p + + LOG.info("Using %d Task handler processes ...", requested_bg_threads) + for _ in range(requested_bg_threads): + spawn_bg_process() + + termination_signal_timestamp = Value('d', 0) + + def forced_termination_signal_handler(signum: int, _frame): + """ + Handle SIGINT (2) and SIGTERM (15) received a second time to stop the + server ungracefully. + """ + if signum not in [signal.SIGINT, signal.SIGTERM]: + signal_log(LOG, "ERROR", "Signal " + f"<{signal.Signals(signum).name} ({signum})> " + "handling attempted by " + "'forced_termination_signal_handler'!") + return + if not is_server_shutting_down.value or \ + abs(termination_signal_timestamp.value) <= \ + sys.float_info.epsilon: + return + if time.time() - termination_signal_timestamp.value <= 2.0: + # Allow some time to pass between the handling of the normal + # termination vs. doing something in the "forced" handler, because + # a human's ^C keypress in a terminal can generate multiple SIGINTs + # in a quick succession. + return + signal.signal(signal.SIGINT, signal.SIG_IGN) + signal.signal(signal.SIGTERM, signal.SIG_IGN) + + signal_log(LOG, "WARNING", "Termination signal " + f"<{signal.Signals(signum).name} ({signum})> " + "received a second time, **FORCE** killing the WEB server " + f"on [{http_server.formatted_address}] ...") + + for p in list(api_processes.values()) + list(bg_processes.values()): + try: + p.kill() + except (OSError, ValueError): + pass + + # No mercy this time. sys.exit(128 + signum) - def reload_signal_handler(*_args, **_kwargs): + exit_code = Value('B', 0) + + def termination_signal_handler(signum: int, _frame): + """ + Handle SIGINT (2) and SIGTERM (15) to stop the server gracefully. + """ + # Debounce termination signals at this point. + signal.signal(signal.SIGINT, forced_termination_signal_handler) + signal.signal(signal.SIGTERM, forced_termination_signal_handler) + + if is_server_shutting_down.value: + return + if signum not in [signal.SIGINT, signal.SIGTERM]: + signal_log(LOG, "ERROR", "Signal " + f"<{signal.Signals(signum).name} ({signum})> " + "handling attempted by 'termination_signal_handler'!") + return + + is_server_shutting_down.value = True + termination_signal_timestamp.value = time.time() + + exit_code.value = 128 + signum + signal_log(LOG, "INFO", "Shutting down the WEB server on " + f"[{http_server.formatted_address}] ... " + "Please allow some time for graceful clean-up!") + + # Terminate child processes. + # For these subprocesses, let the processes properly clean up after + # themselves in a graceful shutdown scenario. + # For this reason, we fire a bunch of SIGHUPs first, indicating + # that the main server process wants to exit, and then wait for + # the children to die once all of them got the signal. + for pid in api_processes: + try: + signal_log(LOG, "DEBUG", f"SIGINT! API child PID: {pid} ...") + os.kill(pid, signal.SIGINT) + except (OSError, ValueError): + pass + for pid in list(api_processes.keys()): + p = api_processes[pid] + try: + signal_log(LOG, "DEBUG", f"join() API child PID: {pid} ...") + p.join() + p.close() + except (OSError, ValueError): + pass + finally: + del api_processes[pid] + + bg_task_queue.close() + bg_task_queue.join_thread() + for pid in bg_processes: + try: + signal_log(LOG, "DEBUG", f"SIGHUP! Task child PID: {pid} ...") + os.kill(pid, signal.SIGHUP) + except (OSError, ValueError): + pass + for pid in list(bg_processes.keys()): + p = bg_processes[pid] + try: + signal_log(LOG, "DEBUG", f"join() Task child PID: {pid} ...") + p.join() + p.close() + except (OSError, ValueError): + pass + finally: + del bg_processes[pid] + + def reload_signal_handler(signum: int, _frame): """ - Reloads server configuration file. + Handle SIGHUP (1) to reload the server's configuration file to memory. """ + if signum not in [signal.SIGHUP]: + signal_log(LOG, "ERROR", "Signal " + f"<{signal.Signals(signum).name} ({signum})> " + "handling attempted by 'reload_signal_handler'!") + return + + signal_log(LOG, "INFO", + "Received signal to reload server configuration ...") + manager.reload_config() - try: - instance_manager.register(os.getpid(), - os.path.abspath( - context.codechecker_workspace), - port) - except IOError as ex: - LOG.debug(ex.strerror) + signal_log(LOG, "INFO", "Server configuration reload: Done.") - LOG.info("Server waiting for client requests on [%s:%d]", - '[' + listen_address + ']' - if server_clazz is CCSimpleHttpServerIPv6 else listen_address, - port) + sigchild_event_counter = Value('I', 0) + is_already_handling_sigchild = Value('B', False) - def unregister_handler(pid): + def child_signal_handler(signum: int, _frame): """ - Handle errors during instance unregistration. - The workspace might be removed so updating the - config content might fail. + Handle SIGCHLD (17) that signals a child process's interruption or + death by creating a new child to ensure that the requested number of + workers are always alive. """ - try: - instance_manager.unregister(pid) - except IOError as ex: - LOG.debug(ex.strerror) + if is_already_handling_sigchild.value: + # Do not perform this handler recursively to prevent spawning too + # many children. + return + if is_server_shutting_down.value: + # Do not handle SIGCHLD events during normal shutdown, because + # our own subprocess termination calls would fire this handler. + return + if signum not in [signal.SIGCHLD]: + signal_log(LOG, "ERROR", "Signal " + f"<{signal.Signals(signum).name} ({signum})> " + "handling attempted by 'child_signal_handler'!") + return - atexit.register(unregister_handler, os.getpid()) + is_already_handling_sigchild.value = True - for _ in range(manager.worker_processes - 1): - p = multiprocess.Process(target=http_server.serve_forever) - processes.append(p) - p.start() + force_slow_path: bool = False + event_counter: int = sigchild_event_counter.value + if event_counter >= \ + min(requested_api_threads, requested_bg_threads) // 2: + force_slow_path = True + else: + sigchild_event_counter.value = event_counter + 1 - signal.signal(signal.SIGINT, signal_handler) - signal.signal(signal.SIGTERM, signal_handler) + # How many new processes need to be spawned for each type of worker + # process? + spawn_needs: Counter = Counter() + def _check_process_one(kind: str, proclist: Dict[int, Process], + pid: int): + try: + p = proclist[pid] + except KeyError: + return + + # Unfortunately, "Process.is_alive()" cannot be used here, because + # during the handling of SIGCHLD during a child's death, according + # to the state of Python's data structures, the child is still + # alive. + # We run a low-level non-blocking wait again, which will + # immediately return, but properly reap the child process if it has + # terminated. + try: + _, status_signal = os.waitpid(pid, os.WNOHANG) + if status_signal == 0: + # The process is still alive. + return + except ChildProcessError: + pass + + signal_log(LOG, "WARNING", + f"'{kind}' child process (PID {pid}, \"{p.name}\") " + "is not alive anymore!") + spawn_needs[kind] += 1 + + try: + del proclist[pid] + except KeyError: + # Due to the bunching up of signals and that Python runs the + # C-level signals with a custom logic inside the interpreter, + # coupled with the fact that PIDs can be reused, the same PID + # can be reported dead in a quick succession of signals, + # resulting in a KeyError here. + pass + + def _check_processes_many(kind: str, proclist: Dict[int, Process]): + for pid in sorted(proclist.keys()): + _check_process_one(kind, proclist, pid) + + # Try to find the type of the interrupted/dead process based on signal + # information first. + # This should be quicker and more deterministic. + try: + child_pid, child_signal = os.waitpid(-1, os.WNOHANG) + if child_signal == 0: + # Go to the slow path and check the children manually, we did + # not receive a reply from waitpid() with an actual dead child. + raise ChildProcessError() + + _check_process_one("api", api_processes, child_pid) + _check_process_one("background", bg_processes, child_pid) + except ChildProcessError: + # We have not gotten a PID, or it was not found, so we do not know + # who died; in this case, it is better to go on the slow path and + # query all our children individually. + spawn_needs.clear() # Forces the Counter to be empty. + + if force_slow_path: + # A clever sequence of child killings in variously sized batches + # can easily result in missing a few signals here and there, and + # missing a few dead children because 'os.waitpid()' allows us to + # fall into a false "fast path" situation. + # To remedy this, we every so often force a slow path to ensure + # the number of worker processes is as close to the requested + # amount of possible. + + # Forces the Counter to be empty, even if the fast path put an + # entry in there. + spawn_needs.clear() + + if not spawn_needs: + _check_processes_many("api", api_processes) + _check_processes_many("background", bg_processes) + + if force_slow_path: + sigchild_event_counter.value = 0 + signal_log(LOG, "WARNING", + "Too many children died since last full status " + "check, performing one ...") + + # If we came into the handler with a "forced slow path" situation, + # ensure that we spawn enough new processes to backfill the + # missing amount, even if due to the flakyness of signal handling, + # we might not have actually gotten "N" times SIGCHLD firings for + # the death of N children, if they happened in a bundle situation, + # e.g., kill N/4, then kill N/2, then kill 1 or 2, then kill the + # remaining. + spawn_needs["api"] = \ + util.clamp(0, requested_api_threads - len(api_processes), + requested_api_threads) + spawn_needs["background"] = \ + util.clamp(0, requested_bg_threads - len(bg_processes), + requested_bg_threads) + + for kind, num in spawn_needs.items(): + signal_log(LOG, "INFO", + f"(Re-)starting {num} '{kind}' child process(es) ...") + + if kind == "api": + for _ in range(num): + spawn_api_process() + elif kind == "background": + for _ in range(num): + spawn_bg_process() + + is_already_handling_sigchild.value = False + + signal.signal(signal.SIGINT, termination_signal_handler) + signal.signal(signal.SIGTERM, termination_signal_handler) if sys.platform != "win32": + signal.signal(signal.SIGCHLD, child_signal_handler) signal.signal(signal.SIGHUP, reload_signal_handler) - # Main process also acts as a worker. - http_server.serve_forever() + LOG.info("Server waiting for client requests on [%s]", + http_server.formatted_address) + + # We can not use a multiprocessing.Event here because that would result in + # a deadlock, as the process waiting on the event is the one receiving the + # shutdown signal. + while not is_server_shutting_down.value: + time.sleep(5) + + dropped_tasks = _cleanup_incomplete_tasks("Server shut down, task will " + "be never be completed.") + if dropped_tasks: + LOG.info("At server shutdown, dropped %d background tasks that will " + "never be completed.", dropped_tasks) - LOG.info("Webserver quit.") + LOG.info("CodeChecker server quit (main process).") + return exit_code.value def add_initial_run_database(config_sql_server, product_connection): diff --git a/web/server/codechecker_server/session_manager.py b/web/server/codechecker_server/session_manager.py index 276af909cd..662eaa62b0 100644 --- a/web/server/codechecker_server/session_manager.py +++ b/web/server/codechecker_server/session_manager.py @@ -11,16 +11,14 @@ import hashlib import json -import os import re -import uuid from datetime import datetime from typing import Optional from codechecker_common.compatibility.multiprocessing import cpu_count from codechecker_common.logger import get_logger -from codechecker_common.util import load_json +from codechecker_common.util import generate_random_token, load_json from codechecker_web.shared.env import check_file_owner_rw from codechecker_web.shared.version import SESSION_COOKIE_NAME as _SCN @@ -47,29 +45,29 @@ SESSION_COOKIE_NAME = _SCN -def generate_session_token(): - """ - Returns a random session token. - """ - return uuid.UUID(bytes=os.urandom(16)).hex - - def get_worker_processes(scfg_dict): """ Return number of worker processes from the config dictionary. - Return 'worker_processes' field from the config dictionary or returns the - default value if this field is not set or the value is negative. + Return 'worker_processes' and 'background_worker_processes' fields from + the config dictionary or returns the default value if this field is not + set or the value is negative. """ default = cpu_count() - worker_processes = scfg_dict.get('worker_processes', default) + worker_processes = scfg_dict.get("worker_processes", default) + background_worker_processes = scfg_dict.get("background_worker_processes", + default) - if worker_processes < 0: + if not worker_processes or worker_processes < 0: LOG.warning("Number of worker processes can not be negative! Default " "value will be used: %s", default) worker_processes = default + if not background_worker_processes or background_worker_processes < 0: + LOG.warning("Number of task worker processes can not be negative! " + "Default value will be used: %s", worker_processes) + background_worker_processes = worker_processes - return worker_processes + return worker_processes, background_worker_processes class _Session: @@ -182,7 +180,8 @@ def __init__(self, configuration_file, root_sha, force_auth=False): # so it should NOT be handled by session_manager. A separate config # handler for the server's stuff should be created, that can properly # instantiate SessionManager with the found configuration. - self.__worker_processes = get_worker_processes(scfg_dict) + self.__worker_processes, self.__background_worker_processes = \ + get_worker_processes(scfg_dict) self.__max_run_count = scfg_dict.get('max_run_count', None) self.__store_config = scfg_dict.get('store', {}) self.__keepalive_config = scfg_dict.get('keepalive', {}) @@ -328,6 +327,10 @@ def is_enabled(self): def worker_processes(self): return self.__worker_processes + @property + def background_worker_processes(self) -> int: + return self.__background_worker_processes + def get_realm(self): return { "realm": self.__auth_config.get('realm_name'), @@ -622,7 +625,7 @@ def create_session(self, auth_string): return False # Generate a new token and create a local session. - token = generate_session_token() + token = generate_random_token(32) user_name = validation.get('username') groups = validation.get('groups', []) is_root = validation.get('root', False) diff --git a/web/server/codechecker_server/task_executors/__init__.py b/web/server/codechecker_server/task_executors/__init__.py new file mode 100644 index 0000000000..4259749345 --- /dev/null +++ b/web/server/codechecker_server/task_executors/__init__.py @@ -0,0 +1,7 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- diff --git a/web/server/codechecker_server/task_executors/abstract_task.py b/web/server/codechecker_server/task_executors/abstract_task.py new file mode 100644 index 0000000000..1f1508ae17 --- /dev/null +++ b/web/server/codechecker_server/task_executors/abstract_task.py @@ -0,0 +1,202 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" +Contains the base class to be inherited and implemented by all background task +types. +""" +import logging +import os +import pathlib +import shutil +from typing import Optional + +from codechecker_common.logger import get_logger + +from ..database.config_db_model import BackgroundTask as DBTask + + +LOG = get_logger("server") + + +class TaskCancelHonoured(Exception): + """ + Specialised tag exception raised by `AbstractTask` implementations in a + checkpoint after having checked that their ``cancel_flag`` was set, in + order to terminate task-specific execution and to register the + cancellation's success by the `AbstractTask.execute` method. + + This exception should **NOT** be caught by user code. + """ + + def __init__(self, task_obj: "AbstractTask"): + super().__init__(f"Task '{task_obj.token}' honoured CANCEL request.") + self.task_obj = task_obj + + +class AbstractTask: + """ + Base class implementing common execution and bookkeeping methods to + facilitate the dispatch of tasks to background worker processes. + + Instances of this class **MUST** be marshallable by ``pickle``, as they + are transported over an IPC `Queue`. + It is important that instances do not grow too large, as the underlying + OS-level primitives of a `Queue` can get full, which can result in a + deadlock situation. + + The run-time contents of the instance should only contain the bare minimum + metadata required for the implementation to execute in the background. + + Implementors of subclasses **MAY REASONABLY ASSUME** that an + `AbstractTask` scheduled in the API handler process of a server will be + actually executed by a background worker in the same process group, on the + same machine instance. + """ + + def __init__(self, token: str, data_path: Optional[pathlib.Path]): + self._token = token + self._data_path = data_path + + @property + def token(self) -> str: + """Returns the task's identifying token, its primary ID.""" + return self._token + + @property + def data_path(self) -> Optional[pathlib.Path]: + """ + Returns the filesystem path where the task's input data is prepared. + """ + return self._data_path + + def destroy_data(self): + """ + Deletes the contents of `data_path`. + """ + if not self._data_path: + return + + try: + shutil.rmtree(self._data_path) + LOG.debug("Wiping temporary data of task '%s' at '%s' ...", + self._token, self._data_path) + except Exception as ex: + LOG.warning("Failed to remove background task's data_dir at " + "'%s':\n%s", self.data_path, str(ex)) + + def _implementation(self, _task_manager: "TaskManager") -> None: + """ + Implemented by subclasses to perform the logic specific to the task. + + Subclasses should use the `task_manager` object, injected from the + context of the executed subprocess, to query and mutate service-level + information about the current task. + """ + raise NotImplementedError(f"No implementation for task class {self}!") + + def execute(self, task_manager: "TaskManager") -> None: + """ + Executes the `_implementation` of the task, overridden by subclasses, + to perform a task-specific business logic. + + This high-level wrapper deals with capturing `Exception`s, setting + appropriate status information in the database (through the + injected `task_manager`) and logging failures accordingly. + """ + if task_manager.should_cancel(self): + def _log_cancel_and_abandon(db_task: DBTask): + db_task.add_comment("CANCEL!\nTask cancelled before " + "execution began!", + "SYSTEM[AbstractTask::execute()]") + db_task.set_abandoned(force_dropped_status=False) + + task_manager._mutate_task_record(self, _log_cancel_and_abandon) + return + + try: + task_manager._mutate_task_record( + self, lambda dbt: dbt.set_running()) + except KeyError: + # KeyError is thrown if a task without a corresponding database + # record is attempted to be executed. + LOG.error("Failed to execute task '%s' due to database exception", + self.token) + except Exception as ex: + LOG.error("Failed to execute task '%s' due to database exception" + "\n%s", + self.token, str(ex)) + # For any other record, try to set the task abandoned due to an + # exception. + try: + task_manager._mutate_task_record( + self, lambda dbt: + dbt.set_abandoned(force_dropped_status=True)) + except Exception: + return + + LOG.debug("Task '%s' running on machine '%s' executor #%d", + self.token, task_manager.machine_id, os.getpid()) + + try: + self._implementation(task_manager) + LOG.debug("Task '%s' finished on machine '%s' executor #%d", + self.token, + task_manager.machine_id, + os.getpid()) + + try: + task_manager._mutate_task_record( + self, lambda dbt: dbt.set_finished(successfully=True)) + except Exception as ex: + LOG.error("Failed to set task '%s' finished due to " + "database exception:\n%s", + self.token, str(ex)) + except TaskCancelHonoured: + def _log_cancel_and_abandon(db_task: DBTask): + db_task.add_comment("CANCEL!\nCancel request of admin " + "honoured by task.", + "SYSTEM[AbstractTask::execute()]") + db_task.set_abandoned(force_dropped_status=False) + + def _log_drop_and_abandon(db_task: DBTask): + db_task.add_comment("SHUTDOWN!\nTask honoured graceful " + "cancel signal generated by " + "server shutdown.", + "SYSTEM[AbstractTask::execute()]") + db_task.set_abandoned(force_dropped_status=True) + + if not task_manager.is_shutting_down: + task_manager._mutate_task_record(self, _log_cancel_and_abandon) + else: + task_manager._mutate_task_record(self, _log_drop_and_abandon) + + import traceback + LOG.debug("Task '%s' honoured the administrator's cancel request " + "at:\n%s", + self.token, traceback.format_exc()) + except Exception as ex: + LOG.error("Failed to execute task '%s' on machine '%s' " + "executor #%d: %s", + self.token, task_manager.machine_id, os.getpid(), + str(ex)) + import traceback + traceback.print_exc() + + def _log_exception_and_fail(db_task: DBTask): + db_task.add_comment( + f"FAILED!\nException during execution:\n{str(ex)}", + "SYSTEM[AbstractTask::execute()]") + if LOG.isEnabledFor(logging.DEBUG): + db_task.add_comment("Debug exception information:\n" + f"{traceback.format_exc()}", + "SYSTEM[AbstractTask::execute()]") + db_task.set_finished(successfully=False) + + task_manager._mutate_task_record(self, _log_exception_and_fail) + finally: + self.destroy_data() diff --git a/web/server/codechecker_server/task_executors/main.py b/web/server/codechecker_server/task_executors/main.py new file mode 100644 index 0000000000..320de737c5 --- /dev/null +++ b/web/server/codechecker_server/task_executors/main.py @@ -0,0 +1,143 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" +Implements a dedicated subprocess that deals with running `AbstractTask` +subclasses in the background. +""" +from datetime import timedelta +import os +from queue import Empty +import signal + +from sqlalchemy.orm import sessionmaker + +from codechecker_common.compatibility.multiprocessing import Queue, Value +from codechecker_common.logger import get_logger, signal_log + +from ..database.config_db_model import BackgroundTask as DBTask +from .abstract_task import AbstractTask +from .task_manager import TaskManager + + +WAIT_TIME_FOR_TASK_QUEUE_CLEARING_AT_SERVER_SHUTDOWN = timedelta(seconds=5) + +LOG = get_logger("server") + + +def executor(queue: Queue, + config_db_sql_server, + server_environment, + server_shutdown_flag: "Value", + machine_id: str): + """ + The "main()" function implementation for a background task executor + process. + + This process sets up the state of the local process, and then deals with + popping jobs from the queue and executing them in the local context. + """ + # First things first, a background worker process should NOT respect the + # termination signals received from the parent process, because it has to + # run its own cleanup logic before shutting down. + signal.signal(signal.SIGINT, signal.SIG_IGN) + signal.signal(signal.SIGTERM, signal.SIG_IGN) + + kill_flag = Value('B', False) + + def executor_hangup_handler(signum: int, _frame): + """ + Handle SIGHUP (1) to do a graceful shutdown of the background worker. + """ + if signum not in [signal.SIGHUP]: + signal_log(LOG, "ERROR", "Signal " + f"<{signal.Signals(signum).name} ({signum})> " + "handling attempted by 'executor_hangup_handler'!") + return + + signal_log(LOG, "DEBUG", f"{os.getpid()}: Received " + f"{signal.Signals(signum).name} ({signum}), preparing for " + "shutdown ...") + kill_flag.value = True + + signal.signal(signal.SIGHUP, executor_hangup_handler) + + config_db_engine = config_db_sql_server.create_engine() + tm = TaskManager(queue, sessionmaker(bind=config_db_engine), + server_environment, kill_flag, machine_id) + + while not kill_flag.value: + try: + # Do not block indefinitely when waiting for a job, to allow + # checking whether the kill flags were set. + t: AbstractTask = queue.get(block=True, timeout=1) + except Empty: + continue + + LOG.debug("Executor PID %d popped task '%s' (%s) ...", + os.getpid(), t.token, str(t)) + + t.execute(tm) + + # Once the main loop of task execution process has finished, there might + # still be tasks left in the queue. + # If the server is shutting down (this is distinguished from the local kill + # flag, because a 'SIGHUP' might arrive from any source, not just a valid + # graceful shutdown!), then these jobs would be lost if the process just + # exited, with no information reported to the database. + # We need set these tasks to dropped as much as possible. + def _log_shutdown_and_abandon(db_task: DBTask): + db_task.add_comment("SHUTDOWN!\nTask never started due to the " + "server shutdown!", "SYSTEM") + db_task.set_abandoned(force_dropped_status=True) + + def _drop_task_at_shutdown(t: AbstractTask): + try: + LOG.debug("Dropping task '%s' due to server shutdown...", t.token) + tm._mutate_task_record(t, _log_shutdown_and_abandon) + except Exception: + pass + finally: + t.destroy_data() + + if server_shutdown_flag.value: + # Unfortunately, it is not guaranteed which process will wake up first + # when popping objects from the queue. + # Blocking indefinitely would not be a solution here, because all + # producers (API threads) had likely already exited at this point. + # However, simply observing no elements for a short period of time is + # also not enough, as at the very last moments of a server's lifetime, + # one process might observe the queue to be empty, simply because + # another process stole the object that was put into it. + # + # To be on the safe side of things, we require to observe the queue to + # be *constantly* empty over a longer period of repetitive sampling. + empty_sample_count: int = 0 + while empty_sample_count < int( + WAIT_TIME_FOR_TASK_QUEUE_CLEARING_AT_SERVER_SHUTDOWN + .total_seconds()): + try: + t: AbstractTask = queue.get(block=True, timeout=1) + except Empty: + empty_sample_count += 1 + continue + + empty_sample_count = 0 + _drop_task_at_shutdown(t) + + queue.close() + queue.join_thread() + + try: + config_db_engine.dispose() + except Exception as ex: + LOG.error("Failed to shut down task executor %d!\n%s", + os.getpid(), str(ex)) + return + + LOG.debug("Task executor subprocess PID %d exited main loop.", + os.getpid()) diff --git a/web/server/codechecker_server/task_executors/task_manager.py b/web/server/codechecker_server/task_executors/task_manager.py new file mode 100644 index 0000000000..6b6929109a --- /dev/null +++ b/web/server/codechecker_server/task_executors/task_manager.py @@ -0,0 +1,284 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" +Contains status management and query methods to handle bookkeeping for +dispatched background tasks. +""" +import os +from pathlib import Path +import re +import shutil +import tempfile +from typing import Callable, Optional + +import sqlalchemy + +from codechecker_common.compatibility.multiprocessing import Queue, Value +from codechecker_common.logger import get_logger, signal_log +from codechecker_common.util import generate_random_token + +from ..database.config_db_model import BackgroundTask as DBTask, Product +from ..database.database import DBSession + +MAX_TOKEN_RANDOM_RETRIES = 10 +CHARS_INVALID_IN_PATH = re.compile(r"[\'\"<>:\\/\|\*\?\. ]") + +LOG = get_logger("server") + + +class ExecutorInProgressShutdownError(Exception): + """ + Exception raised to indicate that the background executors are under + shutdown. + """ + def __init__(self): + super().__init__("Task executor is shutting down!") + + +class TaskManager: + """ + Handles the creation of "Task" status objects in the database and pushing + in-memory `AbstractTask` subclass instances to a `Queue`. + + This class is instantiatied for EVERY WORKER separately, and is not a + shared resource! + """ + + def __init__(self, q: Queue, config_db_session_factory, + server_environment, + executor_kill_flag: Value, + machine_id: str, + temp_dir: Optional[Path] = None): + self._queue = q + self._database_factory = config_db_session_factory + self._server_environment = server_environment + self._is_shutting_down = executor_kill_flag + self._machine_id = machine_id + self._temp_dir_root = (temp_dir or Path(tempfile.gettempdir())) \ + / "codechecker_tasks" \ + / CHARS_INVALID_IN_PATH.sub('_', machine_id) + + os.makedirs(self._temp_dir_root, exist_ok=True) + + @property + def configuration_database_session_factory(self): + """ + Returns a `sqlalchemy.orm.sessionmaker` instance for the server + configuration database. + """ + return self._database_factory + + @property + def environment(self): + """Returns the ``check_env`` injected into the task manager.""" + return self._server_environment + + @property + def machine_id(self) -> str: + """Returns the ``machine_id`` the instance was constructed with.""" + return self._machine_id + + def allocate_task_record(self, kind: str, summary: str, + user_name: Optional[str], + product: Optional[Product] = None) -> str: + """ + Creates the token and the status record for a new task with the given + initial metadata. + + Returns the token of the task, which is a unique identifier of the + allocated record. + """ + try_count: int = 0 + while True: + with DBSession(self._database_factory) as session: + try: + token = generate_random_token(DBTask._token_length) + + task = DBTask(token, kind, summary, self.machine_id, + user_name, product) + session.add(task) + session.commit() + + return token + except sqlalchemy.exc.IntegrityError as ie: + # The only failure that can happen is the PRIMARY KEY's + # UNIQUE violation, which means we hit jackpot by + # generating an already used token! + try_count += 1 + + if try_count >= MAX_TOKEN_RANDOM_RETRIES: + raise KeyError( + "Failed to generate a unique ID for task " + f"{kind} ({summary}) after " + f"{MAX_TOKEN_RANDOM_RETRIES} retries!") from ie + + def create_task_data(self, token: str) -> Path: + """ + Creates a temporary directory which is **NOT** cleaned up + automatically by the current context, and which is suitable for + putting arbitrary files underneath to communicate large inputs + (that should not be put in the `Queue`) to the `execute` method of + an `AbstractTask`. + + The larger business logic of the Server implementation may still clean + up the temporary directories, e.g., if the pending tasks are being + dropped during a shutdown, making retention of this "temporary data" + useless. + See `destroy_temporary_data`. + """ + task_temp_dir = tempfile.mkdtemp(prefix=f"{token}-", + dir=self._temp_dir_root) + return Path(task_temp_dir) + + def destroy_all_temporary_data(self): + """ + Removes the contents of task-temporary directories under the + `TaskManager`'s initial `temp_dir` and current "machine ID". + """ + try: + shutil.rmtree(self._temp_dir_root) + except Exception as ex: + LOG.warning("Failed to remove background tasks' data_dirs at " + "'%s':\n%s", self._temp_dir_root, str(ex)) + + def drop_all_incomplete_tasks(self, action: str) -> int: + """ + Sets all tasks in the database that were associated with the given + `machine_id` to ``"dropped"`` status, indicating that the status was + changed during the `action`. + + Returns the number of `DBTask`s actually changed. + """ + count: int = 0 + with DBSession(self._database_factory) as session: + for t in session.query(DBTask) \ + .filter(DBTask.machine_id == self.machine_id, + DBTask.status.in_(["allocated", + "enqueued", + "running"])) \ + .all(): + count += 1 + t.add_comment(f"DROPPED!\n{action}", + "SYSTEM") + t.set_abandoned(force_dropped_status=True) + + session.commit() + return count + + def get_task_record(self, token: str) -> DBTask: + """ + Retrieves the `DBTask` for the task identified by `task_obj`. + + This class should not be mutated, only the fields queried. + """ + with DBSession(self._database_factory) as session: + db_task: Optional[DBTask] = \ + session.query(DBTask).get(token) + if not db_task: + raise KeyError(f"No task record for token '{token}' " + "in the database") + session.expunge(db_task) + return db_task + + def _get_task_record(self, task_obj: "AbstractTask") -> DBTask: + """ + Retrieves the `DBTask` for the task identified by `task_obj`. + + This class should not be mutated, only the fields queried. + """ + return self.get_task_record(task_obj.token) + + def _mutate_task_record(self, task_obj: "AbstractTask", + mutator: Callable[[DBTask], None]): + """ + Executes the given `mutator` function for the `DBTask` record + corresponding to the `task_obj` description available in memory. + """ + with DBSession(self._database_factory) as session: + db_task: Optional[DBTask] = \ + session.query(DBTask).get(task_obj.token) + if not db_task: + raise KeyError(f"No task record for token '{task_obj.token}' " + "in the database") + + try: + mutator(db_task) + except Exception: + session.rollback() + + import traceback + traceback.print_exc() + raise + + session.commit() + + def push_task(self, task_obj: "AbstractTask"): + """Enqueues the given `task_obj` onto the `Queue`.""" + if self.is_shutting_down: + raise ExecutorInProgressShutdownError() + + # Note, that the API handler process calling push_task() might be + # killed before writing to the queue, so an actually enqueued task + # (according to the DB) might never be consumed by a background + # process. + # As we have to COMMIT the status change before the actual processing + # in order to show the time stamp to the user(s), there is no better + # way to make this more atomic. + try: + self._mutate_task_record(task_obj, lambda dbt: dbt.set_enqueued()) + self._queue.put(task_obj) + except SystemExit as sex: + try: + signal_log(LOG, "WARNING", f"Process #{os.getpid()}: " + "push_task() killed via SystemExit during " + f"enqueue of task '{task_obj.token}'!") + + def _log_and_abandon(db_task: DBTask): + db_task.add_comment( + "SHUTDOWN!\nEnqueueing process terminated during the " + "ongoing enqueue! The task will never be executed!", + "SYSTEM[TaskManager::push_task()]") + db_task.set_abandoned(force_dropped_status=True) + + self._mutate_task_record(task_obj, _log_and_abandon) + finally: + raise sex + + @property + def is_shutting_down(self) -> bool: + """ + Returns whether the shutdown flag for the executor associated with the + `TaskManager` had been set. + """ + return self._is_shutting_down.value + + def should_cancel(self, task_obj: "AbstractTask") -> bool: + """ + Returns whether the task identified by `task_obj` should be + co-operatively cancelled. + """ + db_task = self._get_task_record(task_obj) + return self.is_shutting_down or \ + (db_task.status in ["enqueued", "running"] + and db_task.cancel_flag) + + def add_comment(self, task_obj: "AbstractTask", comment: str, + actor: Optional[str] = None): + """ + Adds `comment` in the name of `actor` to the task record corresponding + to `task_obj`. + """ + self._mutate_task_record(task_obj, + lambda dbt: dbt.add_comment(comment, actor)) + + def heartbeat(self, task_obj: "AbstractTask"): + """ + Triggers ``heartbeat()`` timestamp update in the database for + `task_obj`. + """ + self._mutate_task_record(task_obj, lambda dbt: dbt.heartbeat()) diff --git a/web/server/codechecker_server/tmp.py b/web/server/codechecker_server/tmp.py deleted file mode 100644 index bbc5e77bea..0000000000 --- a/web/server/codechecker_server/tmp.py +++ /dev/null @@ -1,37 +0,0 @@ -# ------------------------------------------------------------------------- -# -# Part of the CodeChecker project, under the Apache License v2.0 with -# LLVM Exceptions. See LICENSE for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# ------------------------------------------------------------------------- -""" -Temporary directory module. -""" - - -import datetime -import hashlib -import os - - -from codechecker_common.logger import get_logger - -LOG = get_logger('system') - - -def get_tmp_dir_hash(): - """Generate a hash based on the current time and process id.""" - - pid = os.getpid() - time = datetime.datetime.now() - - data = str(pid) + str(time) - - dir_hash = hashlib.md5() - dir_hash.update(data.encode("utf-8")) - - LOG.debug('The generated temporary directory hash is %s.', - dir_hash.hexdigest()) - - return dir_hash.hexdigest() diff --git a/web/server/config/server_config.json b/web/server/config/server_config.json index e42745f08d..a5ad4999c8 100644 --- a/web/server/config/server_config.json +++ b/web/server/config/server_config.json @@ -1,4 +1,6 @@ { + "background_worker_processes": null, + "worker_processes": null, "max_run_count": null, "store": { "analysis_statistics_dir": null, diff --git a/web/server/vue-cli/package-lock.json b/web/server/vue-cli/package-lock.json index d908b8c278..07b56dce13 100644 --- a/web/server/vue-cli/package-lock.json +++ b/web/server/vue-cli/package-lock.json @@ -11,7 +11,7 @@ "@mdi/font": "^6.5.95", "chart.js": "^2.9.4", "chartjs-plugin-datalabels": "^0.7.0", - "codechecker-api": "file:../../api/js/codechecker-api-node/dist/codechecker-api-6.58.0.tgz", + "codechecker-api": "file:../../api/js/codechecker-api-node/dist/codechecker-api-6.59.0.tgz", "codemirror": "^5.65.0", "date-fns": "^2.28.0", "js-cookie": "^3.0.1", @@ -5113,9 +5113,9 @@ } }, "node_modules/codechecker-api": { - "version": "6.58.0", - "resolved": "file:../../api/js/codechecker-api-node/dist/codechecker-api-6.58.0.tgz", - "integrity": "sha512-N6qK5cnLt32jnJlSyyGMmW6FCzybDljyH1RrGOZ1Gk9n1vV7WluJbC9InYWsZ5lbK7xVyIrphTKXhqC4ARKF6g==", + "version": "6.59.0", + "resolved": "file:../../api/js/codechecker-api-node/dist/codechecker-api-6.59.0.tgz", + "integrity": "sha512-DN1vQkV3P/5jwI62Sd+JzvNALe/i7km2iDd8GKfK6vQYdYPnHg3ZpwK1vyRcF0dsegZhjfgoMzOAclH+nwk+Yg==", "license": "SEE LICENSE IN LICENSE", "dependencies": { "thrift": "0.13.0-hotfix.1" @@ -21145,8 +21145,8 @@ "dev": true }, "codechecker-api": { - "version": "file:../../api/js/codechecker-api-node/dist/codechecker-api-6.58.0.tgz", - "integrity": "sha512-N6qK5cnLt32jnJlSyyGMmW6FCzybDljyH1RrGOZ1Gk9n1vV7WluJbC9InYWsZ5lbK7xVyIrphTKXhqC4ARKF6g==", + "version": "file:../../api/js/codechecker-api-node/dist/codechecker-api-6.59.0.tgz", + "integrity": "sha512-DN1vQkV3P/5jwI62Sd+JzvNALe/i7km2iDd8GKfK6vQYdYPnHg3ZpwK1vyRcF0dsegZhjfgoMzOAclH+nwk+Yg==", "requires": { "thrift": "0.13.0-hotfix.1" } diff --git a/web/server/vue-cli/package.json b/web/server/vue-cli/package.json index 2239777668..f31789b897 100644 --- a/web/server/vue-cli/package.json +++ b/web/server/vue-cli/package.json @@ -27,7 +27,7 @@ }, "dependencies": { "@mdi/font": "^6.5.95", - "codechecker-api": "file:../../api/js/codechecker-api-node/dist/codechecker-api-6.58.0.tgz", + "codechecker-api": "file:../../api/js/codechecker-api-node/dist/codechecker-api-6.59.0.tgz", "chart.js": "^2.9.4", "chartjs-plugin-datalabels": "^0.7.0", "codemirror": "^5.65.0", diff --git a/web/tests/functional/instance_manager/test_instances.py b/web/tests/functional/instance_manager/test_instances.py index 0e7fc3a1d6..0851548100 100644 --- a/web/tests/functional/instance_manager/test_instances.py +++ b/web/tests/functional/instance_manager/test_instances.py @@ -10,7 +10,6 @@ Instance manager tests. """ - import os import shutil import subprocess @@ -178,7 +177,7 @@ def test_shutdown_record_keeping(self): EVENT_2.set() # Give the server some grace period to react to the kill command. - time.sleep(5) + time.sleep(30) test_cfg = env.import_test_cfg(self._test_workspace) codechecker_1 = test_cfg['codechecker_1'] diff --git a/web/tests/functional/tasks/__init__.py b/web/tests/functional/tasks/__init__.py new file mode 100644 index 0000000000..4259749345 --- /dev/null +++ b/web/tests/functional/tasks/__init__.py @@ -0,0 +1,7 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- diff --git a/web/tests/functional/tasks/test_task_management.py b/web/tests/functional/tasks/test_task_management.py new file mode 100644 index 0000000000..a53a1e1b4f --- /dev/null +++ b/web/tests/functional/tasks/test_task_management.py @@ -0,0 +1,494 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" +Contains tests of the ``"/Tasks"`` API endpoint to query, using the +``DummyTask``, normal task management related API functions. +""" +from copy import deepcopy +from datetime import datetime, timezone +import os +import pathlib +import shutil +import unittest +import time +from typing import List, Optional, cast + +import multiprocess + +from codechecker_api_shared.ttypes import RequestFailed, Ternary +from codechecker_api.codeCheckerServersideTasks_v6.ttypes import \ + AdministratorTaskInfo, TaskFilter, TaskInfo, TaskStatus + +from libtest import codechecker, env + + +# Stop events for the CodeChecker servers. +STOP_SERVER = multiprocess.Event() +STOP_SERVER_AUTH = multiprocess.Event() +STOP_SERVER_NO_AUTH = multiprocess.Event() + +TEST_WORKSPACE: Optional[str] = None + + +# Note: Test names in this file follow a strict ordinal convention, because +# the assertions are created with a specific execution history! + +class TaskManagementAPITests(unittest.TestCase): + def setup_class(self): + global TEST_WORKSPACE + TEST_WORKSPACE = env.get_workspace("tasks") + os.environ["TEST_WORKSPACE"] = TEST_WORKSPACE + + codechecker_cfg = { + "check_env": env.test_env(TEST_WORKSPACE), + "workspace": TEST_WORKSPACE, + "checkers": [], + "viewer_host": "localhost", + "viewer_port": env.get_free_port(), + "viewer_product": "tasks", + } + + # Run a normal server that is only used to manage the + # "test_package_product". + codechecker.start_server(codechecker_cfg, STOP_SERVER, + ["--machine-id", "workspace-manager"]) + + codechecker_cfg_no_auth = deepcopy(codechecker_cfg) + codechecker_cfg_no_auth.update({ + "viewer_port": env.get_free_port(), + }) + + # Run a normal server which does not require authentication. + codechecker.start_server(codechecker_cfg_no_auth, STOP_SERVER_NO_AUTH, + ["--machine-id", "unprivileged"]) + + codechecker_cfg_auth = deepcopy(codechecker_cfg) + codechecker_cfg_auth.update({ + "viewer_port": env.get_free_port(), + }) + + # Run a privileged server which does require authentication. + (pathlib.Path(TEST_WORKSPACE) / "root.user").unlink() + env.enable_auth(TEST_WORKSPACE) + codechecker.start_server(codechecker_cfg_auth, STOP_SERVER_AUTH, + ["--machine-id", "privileged"]) + + env.export_test_cfg(TEST_WORKSPACE, + {"codechecker_cfg": codechecker_cfg, + "codechecker_cfg_no_auth": + codechecker_cfg_no_auth, + "codechecker_cfg_auth": codechecker_cfg_auth}) + + codechecker.add_test_package_product(codechecker_cfg, TEST_WORKSPACE) + + def teardown_class(self): + # TODO: If environment variable is set keep the workspace and print + # out the path. + global TEST_WORKSPACE + + STOP_SERVER_NO_AUTH.set() + STOP_SERVER_NO_AUTH.clear() + STOP_SERVER_AUTH.set() + STOP_SERVER_AUTH.clear() + + codechecker.remove_test_package_product(TEST_WORKSPACE) + STOP_SERVER.set() + STOP_SERVER.clear() + + print(f"Removing: {TEST_WORKSPACE}") + shutil.rmtree(cast(str, TEST_WORKSPACE), ignore_errors=True) + + def setup_method(self, _): + test_workspace = os.environ["TEST_WORKSPACE"] + self._test_env = env.import_test_cfg(test_workspace) + + print(f"Running {self.__class__.__name__} tests in {test_workspace}") + + auth_server = self._test_env["codechecker_cfg_auth"] + no_auth_server = self._test_env["codechecker_cfg_no_auth"] + + self._auth_client = env.setup_auth_client(test_workspace, + auth_server["viewer_host"], + auth_server["viewer_port"]) + + root_token = self._auth_client.performLogin("Username:Password", + "root:root") + admin_token = self._auth_client.performLogin("Username:Password", + "admin:admin123") + + self._anonymous_task_client = env.setup_task_client( + test_workspace, + no_auth_server["viewer_host"], no_auth_server["viewer_port"]) + self._admin_task_client = env.setup_task_client( + test_workspace, + auth_server["viewer_host"], auth_server["viewer_port"], + session_token=admin_token) + self._privileged_task_client = env.setup_task_client( + test_workspace, + auth_server["viewer_host"], auth_server["viewer_port"], + session_token=root_token) + + def test_task_1_query_status(self): + task_token = self._anonymous_task_client.createDummyTask(10, False) + + time.sleep(5) + task_info: TaskInfo = self._anonymous_task_client.getTaskInfo( + task_token) + self.assertEqual(task_info.token, task_token) + self.assertEqual(task_info.status, + TaskStatus._NAMES_TO_VALUES["RUNNING"]) + self.assertEqual(task_info.productId, 0) + self.assertIsNone(task_info.actorUsername) + self.assertIn("Dummy task", task_info.summary) + self.assertEqual(task_info.cancelFlagSet, False) + + time.sleep(10) # A bit more than exactly what remains of 10 seconds! + task_info = self._anonymous_task_client.getTaskInfo(task_token) + self.assertEqual(task_info.status, + TaskStatus._NAMES_TO_VALUES["COMPLETED"]) + self.assertEqual(task_info.cancelFlagSet, False) + self.assertIsNotNone(task_info.enqueuedAtEpoch) + self.assertIsNotNone(task_info.startedAtEpoch) + self.assertLessEqual(task_info.enqueuedAtEpoch, + task_info.startedAtEpoch) + self.assertIsNotNone(task_info.completedAtEpoch) + self.assertLess(task_info.startedAtEpoch, task_info.completedAtEpoch) + self.assertEqual(task_info.cancelFlagSet, False) + + def test_task_2_query_status_of_failed(self): + task_token = self._anonymous_task_client.createDummyTask(10, True) + + time.sleep(5) + task_info: TaskInfo = self._anonymous_task_client.getTaskInfo( + task_token) + self.assertEqual(task_info.token, task_token) + self.assertEqual(task_info.status, + TaskStatus._NAMES_TO_VALUES["RUNNING"]) + self.assertEqual(task_info.cancelFlagSet, False) + + time.sleep(10) # A bit more than exactly what remains of 10 seconds! + task_info = self._anonymous_task_client.getTaskInfo(task_token) + self.assertEqual(task_info.status, + TaskStatus._NAMES_TO_VALUES["FAILED"]) + self.assertEqual(task_info.cancelFlagSet, False) + + def test_task_3_cancel(self): + task_token = self._anonymous_task_client.createDummyTask(10, False) + + time.sleep(3) + cancel_req: bool = self._privileged_task_client.cancelTask(task_token) + self.assertTrue(cancel_req) + + time.sleep(3) + cancel_req_2: bool = self._privileged_task_client.cancelTask( + task_token) + # The task was already cancelled, so cancel_req_2 is not the API call + # that cancelled the task. + self.assertFalse(cancel_req_2) + + time.sleep(5) # A bit more than exactly what remains of 10 seconds! + task_info: TaskInfo = self._anonymous_task_client.getTaskInfo( + task_token) + self.assertEqual(task_info.status, + TaskStatus._NAMES_TO_VALUES["CANCELLED"]) + self.assertEqual(task_info.cancelFlagSet, True) + self.assertIn("root", task_info.comments) + self.assertIn("SUPERUSER requested cancellation.", task_info.comments) + self.assertIn("CANCEL!\nCancel request of admin honoured by task.", + task_info.comments) + self.assertIsNotNone(task_info.enqueuedAtEpoch) + self.assertIsNotNone(task_info.startedAtEpoch) + self.assertLessEqual(task_info.enqueuedAtEpoch, + task_info.startedAtEpoch) + self.assertIsNotNone(task_info.completedAtEpoch) + self.assertLess(task_info.startedAtEpoch, task_info.completedAtEpoch) + + def test_task_4_get_tasks_as_admin(self): + with self.assertRaises(RequestFailed): + self._admin_task_client.getTasks(TaskFilter( + # No SUPERUSER rights of test admin. + filterForNoProductID=True + )) + with self.assertRaises(RequestFailed): + self._admin_task_client.getTasks(TaskFilter( + # Default product, no PRODUCT_ADMIN rights of test admin. + productIDs=[1] + )) + with self.assertRaises(RequestFailed): + self._privileged_task_client.getTasks(TaskFilter( + productIDs=[1], + filterForNoProductID=True + )) + with self.assertRaises(RequestFailed): + self._privileged_task_client.getTasks(TaskFilter( + usernames=["foo", "bar"], + filterForNoUsername=True + )) + + # PRODUCT_ADMIN rights on test-specific product... + task_infos: List[AdministratorTaskInfo] = \ + self._admin_task_client.getTasks(TaskFilter(productIDs=[2])) + # ... but no product-specific tasks exist in this test suite. + self.assertEqual(len(task_infos), 0) + + task_infos = self._privileged_task_client.getTasks(TaskFilter()) + self.assertEqual(len(task_infos), 3) + + self.assertEqual(sum(1 for t in task_infos + if t.normalInfo.status == + TaskStatus._NAMES_TO_VALUES["COMPLETED"]), 1) + self.assertEqual(sum(1 for t in task_infos + if t.normalInfo.status == + TaskStatus._NAMES_TO_VALUES["FAILED"]), 1) + self.assertEqual(sum(1 for t in task_infos + if t.normalInfo.status == + TaskStatus._NAMES_TO_VALUES["CANCELLED"]), 1) + + def test_task_5_info_query_filters(self): + current_time_epoch = int(datetime.now(timezone.utc).timestamp()) + + task_infos: List[AdministratorTaskInfo] = \ + self._privileged_task_client.getTasks(TaskFilter( + machineIDs=["nonexistent"] + )) + self.assertEqual(len(task_infos), 0) + + task_infos = self._privileged_task_client.getTasks(TaskFilter( + machineIDs=["unprivileged"] + )) + self.assertEqual(len(task_infos), 3) + + tokens_from_previous_test = [t.normalInfo.token for t in task_infos] + + task_infos = self._admin_task_client.getTasks(TaskFilter( + tokens=tokens_from_previous_test + )) + # Admin client is not a SUPERUSER, it should not get the list of + # tasks visible only to superusers because they are "server-level". + self.assertEqual(len(task_infos), 0) + + task_infos = self._privileged_task_client.getTasks(TaskFilter( + machineIDs=["privileged"] + )) + self.assertEqual(len(task_infos), 0) + + task_infos = self._privileged_task_client.getTasks(TaskFilter( + startedBeforeEpoch=current_time_epoch + )) + self.assertEqual(len(task_infos), 3) + + task_infos = self._privileged_task_client.getTasks(TaskFilter( + startedAfterEpoch=current_time_epoch + )) + self.assertEqual(len(task_infos), 0) + + task_infos = self._privileged_task_client.getTasks(TaskFilter( + cancelFlag=Ternary._NAMES_TO_VALUES["ON"] + )) + self.assertEqual(len(task_infos), 1) + + task_infos = self._privileged_task_client.getTasks(TaskFilter( + cancelFlag=Ternary._NAMES_TO_VALUES["OFF"] + )) + self.assertEqual(len(task_infos), 2) + + task_infos = self._privileged_task_client.getTasks(TaskFilter( + consumedFlag=Ternary._NAMES_TO_VALUES["ON"] + )) + self.assertEqual(len(task_infos), 3) + + task_infos = self._privileged_task_client.getTasks(TaskFilter( + consumedFlag=Ternary._NAMES_TO_VALUES["OFF"] + )) + self.assertEqual(len(task_infos), 0) + + task_infos = self._privileged_task_client.getTasks(TaskFilter()) + + current_time_epoch = int(datetime.now(timezone.utc).timestamp()) + for i in range(10): + target_api = self._anonymous_task_client if i % 2 == 0 \ + else self._admin_task_client + for j in range(10): + target_api.createDummyTask(1, bool(j % 2 == 0)) + + task_infos = self._privileged_task_client.getTasks(TaskFilter()) + self.assertEqual(len(task_infos), 103) + + task_infos = self._privileged_task_client.getTasks(TaskFilter( + enqueuedAfterEpoch=current_time_epoch, + )) + self.assertEqual(len(task_infos), 100) + + task_infos = self._privileged_task_client.getTasks(TaskFilter( + enqueuedAfterEpoch=current_time_epoch, + machineIDs=["unprivileged"] + )) + self.assertEqual(len(task_infos), 50) + + task_infos = self._privileged_task_client.getTasks(TaskFilter( + enqueuedAfterEpoch=current_time_epoch, + machineIDs=["privileged"] + )) + self.assertEqual(len(task_infos), 50) + + task_infos = self._privileged_task_client.getTasks(TaskFilter( + enqueuedAfterEpoch=current_time_epoch, + filterForNoUsername=True, + )) + self.assertEqual(len(task_infos), 50) + + task_infos = self._privileged_task_client.getTasks(TaskFilter( + enqueuedAfterEpoch=current_time_epoch, + usernames=["admin"], + )) + self.assertEqual(len(task_infos), 50) + + task_infos = self._privileged_task_client.getTasks(TaskFilter( + enqueuedAfterEpoch=current_time_epoch, + usernames=["root"], + )) + self.assertEqual(len(task_infos), 0) + + task_infos = self._privileged_task_client.getTasks(TaskFilter( + enqueuedAfterEpoch=current_time_epoch, + startedAfterEpoch=current_time_epoch + )) + # Some tasks ought to have started at least. + self.assertGreater(len(task_infos), 0) + + task_infos = self._privileged_task_client.getTasks(TaskFilter( + enqueuedAfterEpoch=current_time_epoch, + startedAfterEpoch=current_time_epoch, + completedAfterEpoch=current_time_epoch + )) + # Some tasks ought to have also finished at least. + self.assertGreater(len(task_infos), 0) + + # Let every task terminate. We should only need 1 second per task, + # running likely in a multithreaded environment. + # Let's have some leeway, though... + time.sleep(2 * (100 * 1 // cast(int, os.cpu_count()))) + + task_infos = self._privileged_task_client.getTasks(TaskFilter( + enqueuedAfterEpoch=current_time_epoch, + startedAfterEpoch=current_time_epoch, + completedAfterEpoch=current_time_epoch + )) + # All tasks should have finished. + self.assertEqual(len(task_infos), 100) + + task_infos = self._privileged_task_client.getTasks(TaskFilter( + enqueuedAfterEpoch=current_time_epoch, + startedAfterEpoch=current_time_epoch, + completedAfterEpoch=current_time_epoch, + statuses=[TaskStatus._NAMES_TO_VALUES["COMPLETED"]] + )) + self.assertEqual(len(task_infos), 50) + + task_infos = self._privileged_task_client.getTasks(TaskFilter( + enqueuedAfterEpoch=current_time_epoch, + startedAfterEpoch=current_time_epoch, + completedAfterEpoch=current_time_epoch, + statuses=[TaskStatus._NAMES_TO_VALUES["FAILED"]] + )) + self.assertEqual(len(task_infos), 50) + + task_infos = self._privileged_task_client.getTasks(TaskFilter( + enqueuedAfterEpoch=current_time_epoch, + startedAfterEpoch=current_time_epoch, + completedAfterEpoch=current_time_epoch, + cancelFlag=Ternary._NAMES_TO_VALUES["ON"] + )) + self.assertEqual(len(task_infos), 0) + + task_infos = self._privileged_task_client.getTasks(TaskFilter( + enqueuedAfterEpoch=current_time_epoch, + startedAfterEpoch=current_time_epoch, + completedAfterEpoch=current_time_epoch, + consumedFlag=Ternary._NAMES_TO_VALUES["ON"] + )) + self.assertEqual(len(task_infos), 0) + + task_infos = self._privileged_task_client.getTasks(TaskFilter( + machineIDs=["*privileged"] + )) + self.assertEqual(len(task_infos), 103) + + task_infos = self._privileged_task_client.getTasks(TaskFilter( + kinds=["*Dummy*"] + )) + self.assertEqual(len(task_infos), 103) + + # Try to consume the task status from the wrong user! + task_infos = self._privileged_task_client.getTasks(TaskFilter( + enqueuedAfterEpoch=current_time_epoch, + startedAfterEpoch=current_time_epoch, + completedAfterEpoch=current_time_epoch, + filterForNoUsername=True, + statuses=[TaskStatus._NAMES_TO_VALUES["COMPLETED"]] + )) + self.assertEqual(len(task_infos), 25) + a_token: str = task_infos[0].normalInfo.token + with self.assertRaises(RequestFailed): + self._admin_task_client.getTaskInfo(a_token) + + task_infos = self._privileged_task_client.getTasks(TaskFilter( + machineIDs=["workspace-manager"] + )) + self.assertEqual(len(task_infos), 0) + + def test_task_6_dropping(self): + current_time_epoch = int(datetime.now(timezone.utc).timestamp()) + many_task_count = 4 * cast(int, os.cpu_count()) + for _ in range(many_task_count): + self._anonymous_task_client.createDummyTask(600, False) + + STOP_SERVER_NO_AUTH.set() + time.sleep(30) + STOP_SERVER_NO_AUTH.clear() + after_shutdown_time_epoch = int(datetime.now(timezone.utc) + .timestamp()) + + task_infos: List[AdministratorTaskInfo] = \ + self._privileged_task_client.getTasks(TaskFilter( + enqueuedAfterEpoch=current_time_epoch, + statuses=[ + TaskStatus._NAMES_TO_VALUES["ENQUEUED"], + TaskStatus._NAMES_TO_VALUES["RUNNING"], + TaskStatus._NAMES_TO_VALUES["COMPLETED"], + TaskStatus._NAMES_TO_VALUES["FAILED"], + TaskStatus._NAMES_TO_VALUES["CANCELLED"] + ] + )) + self.assertEqual(len(task_infos), 0) + + task_infos = self._privileged_task_client.getTasks(TaskFilter( + enqueuedAfterEpoch=current_time_epoch, + statuses=[TaskStatus._NAMES_TO_VALUES["DROPPED"]], + # System-level dropping is not a "cancellation" action! + cancelFlag=Ternary._NAMES_TO_VALUES["OFF"] + )) + self.assertEqual(len(task_infos), many_task_count) + dropped_task_infos = {ti.normalInfo.token: ti for ti in task_infos} + + # Some tasks will have started, and the server pulled out from under. + task_infos = self._privileged_task_client.getTasks(TaskFilter( + enqueuedAfterEpoch=current_time_epoch, + startedBeforeEpoch=after_shutdown_time_epoch, + statuses=[TaskStatus._NAMES_TO_VALUES["DROPPED"]] + )) + for ti in task_infos: + self.assertIn("SHUTDOWN!\nTask honoured graceful cancel signal " + "generated by server shutdown.", + ti.normalInfo.comments) + del dropped_task_infos[ti.normalInfo.token] + + # The rest could have never started. + for ti in dropped_task_infos.values(): + self.assertTrue("DROPPED!\n" in ti.normalInfo.comments or + "SHUTDOWN!\n" in ti.normalInfo.comments) diff --git a/web/tests/libtest/env.py b/web/tests/libtest/env.py index 1610db8bef..f89e495992 100644 --- a/web/tests/libtest/env.py +++ b/web/tests/libtest/env.py @@ -18,16 +18,18 @@ import socket import stat import subprocess +from typing import cast from codechecker_common.util import load_json -from .thrift_client_to_db import get_auth_client -from .thrift_client_to_db import get_config_client -from .thrift_client_to_db import get_product_client -from .thrift_client_to_db import get_viewer_client +from .thrift_client_to_db import \ + get_auth_client, \ + get_config_client, \ + get_product_client, \ + get_task_client, \ + get_viewer_client -from functional import PKG_ROOT -from functional import REPO_ROOT +from functional import PKG_ROOT, REPO_ROOT def get_free_port(): @@ -236,6 +238,30 @@ def setup_config_client(workspace, session_token=session_token, protocol=proto) +def setup_task_client(workspace, + host=None, port=None, + uri="/Tasks", + auto_handle_connection=True, + session_token=None, + protocol="http"): + if not host and not port: + codechecker_cfg = import_test_cfg(workspace)["codechecker_cfg"] + port = codechecker_cfg["viewer_port"] + host = codechecker_cfg["viewer_host"] + + if session_token is None: + session_token = get_session_token(workspace, host, port) + if session_token == "_PROHIBIT": + session_token = None + + return get_task_client(port=port, + host=cast(str, host), + uri=uri, + auto_handle_connection=auto_handle_connection, + session_token=session_token, + protocol=protocol) + + def repository_root(): return os.path.abspath(os.environ['REPO_ROOT']) diff --git a/web/tests/libtest/thrift_client_to_db.py b/web/tests/libtest/thrift_client_to_db.py index de7788c929..2b5c5a11e8 100644 --- a/web/tests/libtest/thrift_client_to_db.py +++ b/web/tests/libtest/thrift_client_to_db.py @@ -238,6 +238,26 @@ def __getattr__(self, attr): return partial(self._thrift_client_call, attr) +class CCTaskHelper(ThriftAPIHelper): + def __init__(self, proto, host, port, uri, auto_handle_connection=True, + session_token=None): + from codechecker_api.codeCheckerServersideTasks_v6 \ + import codeCheckerServersideTaskService + from codechecker_client.credential_manager import SESSION_COOKIE_NAME + + url = create_product_url(proto, host, port, f"/v{VERSION}{uri}") + transport = THttpClient.THttpClient(url) + protocol = TJSONProtocol.TJSONProtocol(transport) + client = codeCheckerServersideTaskService.Client(protocol) + if session_token: + headers = {'Cookie': f"{SESSION_COOKIE_NAME}={session_token}"} + transport.setCustomHeaders(headers) + super().__init__(transport, client, auto_handle_connection) + + def __getattr__(self, attr): + return partial(self._thrift_client_call, attr) + + def get_all_run_results( client, run_id=None, @@ -303,3 +323,10 @@ def get_config_client(port, host='localhost', uri='/Configuration', return CCConfigHelper(protocol, host, port, uri, auto_handle_connection, session_token) + + +def get_task_client(port, host="localhost", uri="/Tasks", + auto_handle_connection=True, session_token=None, + protocol="http"): + return CCTaskHelper(protocol, host, port, uri, auto_handle_connection, + session_token)