diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 91287f2..e2dc161 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -17,7 +17,7 @@ jobs: - name: Install dependencies run: | python3 -m pip install --upgrade pip - pip3 install requests ujson graphviz ply pytest atomicwrites more-itertools pluggy py attrs setuptools six django==2.1 psycopg2 clade pyyaml pycparser sympy + pip3 install requests ujson graphviz ply pytest atomicwrites more-itertools pluggy py attrs setuptools six django psycopg2 clade pyyaml pycparser sympy - name: Deployment of CV run: | DEPLOY_DIR=build make install -j$(nproc) @@ -41,7 +41,7 @@ jobs: - name: Install dependencies run: | python3 -m pip install --upgrade pip - pip3 install requests ujson graphviz ply pytest atomicwrites more-itertools pluggy py attrs setuptools six django==2.1 psycopg2 pycparser sympy + pip3 install requests ujson graphviz ply pytest atomicwrites more-itertools pluggy py attrs setuptools six django psycopg2 pycparser sympy - name: Deployment of Benchmark Visualizer run: | DEPLOY_DIR=build make install-benchmark-visualizer -j$(nproc) @@ -62,7 +62,7 @@ jobs: - name: Install dependencies run: | python3 -m pip install --upgrade pip - pip3 install requests ujson graphviz ply pytest atomicwrites more-itertools pluggy py attrs setuptools six django==2.1 psycopg2 pycparser sympy + pip3 install requests ujson graphviz ply pytest atomicwrites more-itertools pluggy py attrs setuptools six django psycopg2 pycparser sympy - name: Deployment of MEA run: | DEPLOY_DIR=build make install-mea -j$(nproc) diff --git a/.pylintrc b/.pylintrc index 7632ac5..a8fa62d 100644 --- a/.pylintrc +++ b/.pylintrc @@ -16,5 +16,8 @@ disable= too-many-nested-blocks, import-outside-toplevel, no-member, - duplicate-code + duplicate-code, + too-few-public-methods, + unnecessary-lambda, + no-name-in-module diff --git a/Makefile b/Makefile index 3fc5e47..cea6110 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ cpu_cores=$(shell nproc) # Additional tools. -klever="klever" +cvv="cvv" cil="cil" frama_c_cil="frama_c_cil" benchexec="benchexec" @@ -36,8 +36,8 @@ auto_script="auto_check.py" # Directories root_dir=$(shell pwd) install_dir=tools -klever_dir=${install_dir}/${klever} -mea_lib=${install_dir}/${klever}/bridge/reports/mea/core.py +cvv_dir=${install_dir}/${cvv} +mea_lib=web/reports/mea/core.py cil_dir=${install_dir}/${cil} frama_c_cil_dir=${install_dir}/${frama_c_cil} benchexec_dir=${install_dir}/${benchexec} @@ -50,23 +50,23 @@ cil_arch="cil.xz" compiled_cil_arch="frama_c_cil.xz" # Repositories -klever_repo="https://github.com/mutilin/klever.git" +cvv_repo="https://gitlab.ispras.ru/verification/cvv.git" benchexec_repo="https://github.com/sosy-lab/benchexec.git" cif_repo="https://github.com/ldv-klever/cif.git" cif_compiled_link="https://github.com/ldv-klever/cif/releases/download/v1.2/linux-x86_64-cif-1.2.tar.xz" cil_compiled_link="https://forge.ispras.ru/attachments/download/9905/frama-c-cil-c012809.tar.xz" # Aux constants. -cvwi_branch=cv-v2.0 +cvv_branch=master benchexec_branch=3.16 cif_revision=master tools_config_file=${install_dir}/config.json -download-klever: - @$(call download_tool,${klever},${klever_dir},${klever_repo}) - @cd ${klever_dir}; git checkout ${cvwi_branch}; git pull +download-cvv: + @$(call download_tool,${cvv},${cvv_dir},${cvv_repo}) + @cd ${cvv_dir}; git checkout ${cvv_branch}; git pull download-benchexec: @$(call download_tool,${benchexec},${benchexec_dir},${benchexec_repo}) @@ -84,14 +84,13 @@ download-frama-c-cil: @rm -f ${compiled_cil_arch} @cd ${install_dir}; wget ${cil_compiled_link} -O ${compiled_cil_arch} -download: download-klever download-benchexec download-cpa +download: download-cvv download-benchexec download-cpa @echo "*** Downloading has been completed ***" -build-klever: download-klever - @echo "*** Building ${klever} ***" - @echo "from bridge.development import *" > ${klever_dir}/bridge/bridge/settings.py - @echo "{}" > ${klever_dir}/bridge/bridge/db.json - @cp -r ${mea_lib} ${root_dir}/scripts/aux/mea.py +build-cvv: download-cvv + @echo "*** Building ${cvv} ***" + @echo "from web.development import *" > ${cvv_dir}/web/web/settings.py + @echo "{}" > ${cvv_dir}/web/web/db.json build-benchexec: download-benchexec @echo "*** Building ${benchexec} ***" @@ -117,7 +116,7 @@ build-frama-c-cil: download-frama-c-cil @mkdir -p ${frama_c_cil_dir} @cd ${frama_c_cil_dir}; tar -xf ../${compiled_cil_arch} -build: build-klever build-benchexec build-cil build-cpa +build: build-cvv build-benchexec build-cil build-cpa @echo "*** Building has been completed ***" clean-cpa: @@ -139,20 +138,19 @@ check-deploy-dir: @$(call check_dir,${DEPLOY_DIR},DEPLOY_DIR) -install-klever: build-klever check-deploy-dir - @echo "*** Installing ${klever} ***" +install-cvv: build-cvv check-deploy-dir + @echo "*** Installing ${cvv} ***" @mkdir -p ${DEPLOY_DIR}/${install_dir} - @rm -rf ${DEPLOY_DIR}/${klever_dir} - @cp -r ${klever_dir} ${DEPLOY_DIR}/${klever_dir} + @rm -rf ${DEPLOY_DIR}/${cvv_dir} + @cp -r ${cvv_dir} ${DEPLOY_DIR}/${cvv_dir} @mkdir -p ${DEPLOY_DIR}/scripts/aux - @cp -r ${mea_lib} ${DEPLOY_DIR}/scripts/aux/mea.py - @$(call shrink_installation,${DEPLOY_DIR}/${klever_dir}) + @$(call shrink_installation,${DEPLOY_DIR}/${cvv_dir}) -deploy-klever-cv: build-klever check-deploy-dir - @echo "*** Deploying ${klever}-CV web-interface ***" +deploy-cvv: build-cvv check-deploy-dir + @echo "*** Deploying ${cvv} web-interface ***" @mkdir -p ${DEPLOY_DIR} @rm -rf ${DEPLOY_DIR} - @cp -r ${klever_dir} ${DEPLOY_DIR} + @cp -r ${cvv_dir} ${DEPLOY_DIR} @$(call shrink_installation,${DEPLOY_DIR}) install-benchexec: build-benchexec check-deploy-dir @@ -200,53 +198,37 @@ install-scripts: check-deploy-dir cp -r ${root_dir}/${plugin_dir} . ; \ mkdir -p buildbot -install-witness-visualizer: check-deploy-dir build-klever +install-witness-visualizer: check-deploy-dir build-cvv @mkdir -p ${DEPLOY_DIR}/${install_dir} @cp ${tools_config_file} ${DEPLOY_DIR}/${install_dir} - @rm -rf ${DEPLOY_DIR}/${klever_dir} - @mkdir -p ${DEPLOY_DIR}/${klever_dir} - @mkdir -p ${DEPLOY_DIR}/${klever_dir}/core/core/vrp/et/ - @cp ${klever_dir}/core/core/vrp/et/*.py ${DEPLOY_DIR}/${klever_dir}/core/core/vrp/et/ - @mkdir -p ${DEPLOY_DIR}/${klever_dir}/bridge/ - @mkdir -p ${DEPLOY_DIR}/${klever_dir}/bridge/templates/reports/ - @mkdir -p ${DEPLOY_DIR}/${klever_dir}/bridge/reports/ - @mkdir -p ${DEPLOY_DIR}/${klever_dir}/bridge/bridge/ - @mkdir -p ${DEPLOY_DIR}/${klever_dir}/bridge/media/ - @cp -r ${klever_dir}/bridge/static ${DEPLOY_DIR}/${klever_dir}/bridge/ - @cp ${klever_dir}/bridge/templates/base.html ${DEPLOY_DIR}/${klever_dir}/bridge/templates/ - @cp ${klever_dir}/bridge/reports/templates/reports/*.html ${DEPLOY_DIR}/${klever_dir}/bridge/templates/reports/ - @cp -r ${klever_dir}/bridge/reports/mea ${DEPLOY_DIR}/${klever_dir}/bridge/reports/ - @cp -r ${klever_dir}/bridge/reports/static ${DEPLOY_DIR}/${klever_dir}/bridge/reports/ - @cp ${klever_dir}/bridge/reports/etv.py ${DEPLOY_DIR}/${klever_dir}/bridge/reports/ - @cp ${klever_dir}/bridge/bridge/* ${DEPLOY_DIR}/${klever_dir}/bridge/bridge/ - @rm -rf ${DEPLOY_DIR}/${klever_dir}/bridge/static/codemirror - @rm -rf ${DEPLOY_DIR}/${klever_dir}/bridge/static/calendar - @rm -rf ${DEPLOY_DIR}/${klever_dir}/bridge/static/jstree - @rm -rf ${DEPLOY_DIR}/${klever_dir}/bridge/static/js/population.js + @rm -rf ${DEPLOY_DIR}/${cvv_dir} + @cp -r ${cvv_dir}/web ${DEPLOY_DIR}/${cvv_dir} + @rm -rf ${DEPLOY_DIR}/${cvv_dir}/web/static/codemirror + @rm -rf ${DEPLOY_DIR}/${cvv_dir}/web/static/calendar + @rm -rf ${DEPLOY_DIR}/${cvv_dir}/web/static/jstree + @rm -rf ${DEPLOY_DIR}/${cvv_dir}/web/static/js/population.js @cd ${DEPLOY_DIR} ; \ cp -r ${root_dir}/scripts/ . ; \ rm -f scripts/${launch_script} ; \ rm -f scripts/${auto_script} ; \ - rm -f scripts/${bv_script} ; \ - cp ${klever_dir}/bridge/reports/mea/core.py scripts/aux/mea.py + rm -f scripts/${bv_script} @echo "*** Witness Visualizer has been successfully installed into the directory ${DEPLOY_DIR} ***" install-mea: install-witness-visualizer @cd ${DEPLOY_DIR} ; \ rm -f scripts/${wv_script} ; \ - rm -rf ${klever_dir}/bridge/ + rm -rf ${cvv_dir}/web/ @echo "*** MEA has been successfully installed into the directory ${DEPLOY_DIR} ***" install-benchmark-visualizer: install-witness-visualizer - @cp -r ${klever_dir}/utils/ ${DEPLOY_DIR}/${klever_dir}/ + @cp -r ${cvv_dir}/utils/ ${DEPLOY_DIR}/${cvv_dir}/ @cp -f ${root_dir}/scripts/${bv_script} ${DEPLOY_DIR}/scripts/ - @cp ${klever_dir}/core/core/*.py ${DEPLOY_DIR}/${klever_dir}/core/core/ -install: check-deploy-dir install-klever install-benchexec install-cil install-cpa install-scripts install-cpa +install: check-deploy-dir install-cvv install-benchexec install-cil install-cpa install-scripts install-cpa @$(call verify_installation,${DEPLOY_DIR}) @echo "*** Successfully installed into the directory ${DEPLOY_DIR}' ***" -install-with-cloud: check-deploy-dir install-klever install-benchexec install-cil install-cpa-with-cloud-links install-scripts +install-with-cloud: check-deploy-dir install-cvv install-benchexec install-cil install-cpa-with-cloud-links install-scripts @$(call verify_installation,${DEPLOY_DIR}) @echo "*** Successfully installed into the directory ${DEPLOY_DIR}' with access to verification cloud ***" @@ -324,7 +306,7 @@ endef # $1 - deploy directory define verify_installation echo "Verifying installation in directory '$1'" - for tool in ${benchexec} ${klever}; do \ + for tool in ${benchexec} ${cvv}; do \ if [ -d "${1}/${install_dir}/$${tool}" ]; then \ echo "Tool '$${tool}' is installed" ; \ else \ @@ -337,5 +319,5 @@ endef # $1 - deploy directory define shrink_installation echo "Removing aux files in directory '$1'" - @cd ${1} && rm -rf presets/ .git bridge/reports/test_files/ + @cd ${1} && rm -rf .git .idea endef diff --git a/README.md b/README.md index 81f9088..9630334 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ Witness Visualizer converts generic witnesses from [SV-COMP](https://sv-comp.sos Python (version>=3.4), python modules: ```shell -sudo pip3 install requests ujson graphviz ply pytest atomicwrites more-itertools pluggy py attrs setuptools six django==2.1 psycopg2 pycparser sympy +sudo pip3 install requests ujson graphviz ply pytest atomicwrites more-itertools pluggy py attrs setuptools six django psycopg2 pycparser sympy ``` #### Deployment diff --git a/configs/example.json b/configs/example.json index 2e3484a..380ce47 100644 --- a/configs/example.json +++ b/configs/example.json @@ -5,7 +5,7 @@ }, "tools": { "cil": "abs path to cilly.asm.exe", - "klever": "abs path to Klever core", + "cvv": "abs path to CVV", "benchexec": "abs path to benchexec", "cpachecker": { "unreachability": "abs path to CPAchecker branch ldv-bam", @@ -105,10 +105,10 @@ "master": "host, on which cloud master is running" }, "uploader": { - "upload results": "if true, then results will be automatically uploaded into the server (Klever web-interface)", + "upload results": "if true, then results will be automatically uploaded into the server (CVV web-interface)", "identifier": "job identifier on the server", "parent id": "if true, then the given identifier will be treated as parent job identifier", - "server": ": of Klever web-interface", + "server": ": of CVV web-interface", "user": "valid user name on the server (this user should have access for this job identifier)", "password": "password for the user", "name": "name of the report to be uploaded" diff --git a/docs/tools_ru.md b/docs/tools_ru.md index cfdcb79..419987c 100755 --- a/docs/tools_ru.md +++ b/docs/tools_ru.md @@ -14,8 +14,8 @@ sudo apt install git openjdk-11-jdk python3 python3-dev ant lcov cmake libmpc-de sudo pip3 install requests ujson graphviz ply pytest atomicwrites pathlib2 more-itertools pluggy py attrs setuptools six django==2.1 clade psycopg2 pyyaml pycparser sympy ``` -1. Инструмент klever — web-интерфейс, отвечающий за визуализацию результатов. -Репозиторий: https://github.com/mutilin/klever.git +1. Инструмент CVV — web-интерфейс, отвечающий за визуализацию результатов. +Репозиторий: https://github.com/vmordan/cvv Развёртывание web-интерфейса описано в инструкции docs/web_interface.txt. 2. Инструмент CIL отвечает за объединение исходных файлов и их упрощение. diff --git a/docs/web_interface.txt b/docs/web_interface.txt index 57ebb31..91914f3 100644 --- a/docs/web_interface.txt +++ b/docs/web_interface.txt @@ -1,9 +1,9 @@ Deployment of the Continuous Verification web-interface for results visualization ================================================================================= -1. Deploy Klever-CV web-interface with command: +1. Deploy CVV web-interface with command: -make deploy-klever-cv DEPLOY_DIR= +make deploy-cvv DEPLOY_DIR= WARNING: everything in the will be removed. This path will be used for keeping database files, therefore it should not be modified for correct visualization of results. diff --git a/scripts/components/__init__.py b/scripts/components/__init__.py index a76cdc4..b09c9d6 100644 --- a/scripts/components/__init__.py +++ b/scripts/components/__init__.py @@ -23,12 +23,10 @@ # Default location of additional tools. CIL = "cil" -ET_LIB = "klever" ET_HTML_LIB = "et html" BENCHEXEC = "benchexec" CPACHECKER = "cpachecker" CIF = "cif" -CLADE = "clade" UPLOADER = "uploader" DEFAULT_CPACHECKER_SCRIPTS_PATH = "scripts" diff --git a/scripts/components/coverage_processor.py b/scripts/components/coverage_processor.py index 94668e2..7cb3ecf 100644 --- a/scripts/components/coverage_processor.py +++ b/scripts/components/coverage_processor.py @@ -27,11 +27,11 @@ import os import re import subprocess -import sys import zipfile from components import * from components.component import Component +from coverage.lcov import LCOV TAG_COVERAGE_MODE = "mode" TAG_COVERAGE_PERCENT_MODE = "percent mode" @@ -210,6 +210,7 @@ def __init__(self, launcher_component: Component = None, basic_config=None, inst self.mode = self.component_config.get(TAG_COVERAGE_MODE, DEFAULT_COVERAGE_MODE) self.percent_mode = self.component_config.get(TAG_COVERAGE_PERCENT_MODE, COVERAGE_PERCENT_LOG) + # TODO: check other modes - do we need this argument? self.full_mode = self.component_config.get(TAG_FULL_COVERAGE_MODE, "full") self.internal_logger = logging.getLogger(name=COMPONENT_COVERAGE) self.internal_logger.setLevel(self.logger.level) @@ -271,13 +272,6 @@ def __full_coverage(self, source_dirs: set, coverage_file: str): dummy_dir = os.path.join(src_dir, CLADE_WORK_DIR) break - # Export libs. - et_parser_lib = self.get_tool_path(self._get_tool_default_path(ET_LIB), - self.config.get(TAG_TOOLS, {}).get(ET_LIB)) - sys.path.append(et_parser_lib) - # noinspection PyUnresolvedReferences - from core.coverage import LCOV - lcov = LCOV(self.internal_logger, coverage_file, dummy_dir, source_dirs, [], self.launcher_dir, self.full_mode, ignore_files={os.path.join(DIRECTORY_WITH_GENERATED_FILES, diff --git a/scripts/components/mea.py b/scripts/components/mea.py index 3c32bb5..b909604 100644 --- a/scripts/components/mea.py +++ b/scripts/components/mea.py @@ -26,22 +26,15 @@ import json import logging import multiprocessing -import operator -import re import resource import time import zipfile -# noinspection PyUnresolvedReferences -from aux.mea import DEFAULT_CONVERSION_FUNCTION, CONVERSION_FUNCTION_MODEL_FUNCTIONS, \ - CONVERSION_FUNCTION_CALL_TREE, CONVERSION_FUNCTION_NOTES, convert_error_trace, \ - compare_error_traces, is_equivalent, DEFAULT_COMPARISON_FUNCTION, \ - DEFAULT_SIMILARITY_THRESHOLD, TAG_COMPARISON_FUNCTION, TAG_CONVERSION_FUNCTION, \ - TAG_ADDITIONAL_MODEL_FUNCTIONS, CONVERSION_FUNCTION_FULL - from aux.common import * from components import * from components.component import Component +from mea.core import * +from mea.et import import_error_trace ERROR_TRACE_FILE = "error trace.json" CONVERTED_ERROR_TRACES = "converted error traces.json" @@ -86,7 +79,6 @@ def __init__(self, general_config: dict, error_traces: list, install_dir: str, r os.makedirs(self.result_dir, exist_ok=True) else: self.result_dir = None - self.__export_et_parser_lib() self.rule = rule # List of files with error traces. @@ -309,7 +301,8 @@ def __print_trace_archive(self, error_trace_file_name: str, witness_type=WITNESS zfp.write(source_files, arcname=ERROR_TRACE_SOURCES) zfp.write(converted_traces_files, arcname=CONVERTED_ERROR_TRACES) if self.result_dir: - os.environ.setdefault("DJANGO_SETTINGS_MODULE", "bridge.settings") + self.__export_et_parser_lib() + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "web.settings") import django from django.conf import settings settings.INSTALLED_APPS = ( @@ -320,7 +313,7 @@ def __print_trace_archive(self, error_trace_file_name: str, witness_type=WITNESS 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', - 'reports', + 'reports', 'jobs' ) django.setup() # noinspection PyUnresolvedReferences @@ -348,11 +341,8 @@ def __process_parsed_trace(parsed_error_trace: dict): parsed_error_trace['files'] = src_files def __parse_trace(self, error_trace_file: str, supported_types: set) -> dict: - # noinspection PyUnresolvedReferences - from core.vrp.et import import_error_trace - # Those messages are waste of space. - logger = self._create_logger( + logger = Component._create_logger( "Witness processor", logging.WARNING if self.debug else logging.ERROR ) try: @@ -429,9 +419,6 @@ def __get_option_for_rule(self, tag: str, default_value): return self.component_config.get(self.rule, {}).get(tag, default) def __export_et_parser_lib(self): - et_parser_lib = self.get_tool_path(self._get_tool_default_path(ET_LIB), - self.config.get(TAG_TOOLS, {}).get(ET_LIB)) - sys.path.append(et_parser_lib) et_html_lib = self.get_tool_path(self._get_tool_default_path(ET_HTML_LIB), self.config.get(TAG_TOOLS, {}).get(ET_HTML_LIB)) sys.path.append(et_html_lib) diff --git a/scripts/coverage/__init__.py b/scripts/coverage/__init__.py new file mode 100644 index 0000000..5aad4a7 --- /dev/null +++ b/scripts/coverage/__init__.py @@ -0,0 +1,18 @@ +# +# CV is a framework for continuous verification. +# +# Copyright (c) 2018-2023 ISP RAS (http://www.ispras.ru) +# Ivannikov Institute for System Programming of the Russian Academy of Sciences +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/scripts/coverage/lcov.py b/scripts/coverage/lcov.py new file mode 100644 index 0000000..9b6caf9 --- /dev/null +++ b/scripts/coverage/lcov.py @@ -0,0 +1,378 @@ +# +# CV is a framework for continuous verification. +# This module was based on Klever-CV repository (https://github.com/mutilin/klever/tree/cv-v2.0). +# +# Copyright (c) 2018-2023 ISP RAS (http://www.ispras.ru) +# Ivannikov Institute for System Programming of the Russian Academy of Sciences +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +Module for lcov coverage processing. +""" + +import json +import os +import re + + +def _add_to_coverage(merged_coverage_info, coverage_info): + for file_name in coverage_info: + merged_coverage_info.setdefault(file_name, { + 'total functions': coverage_info[file_name][0]['total functions'], + 'covered lines': {}, + 'covered functions': {}, + 'covered function names': [] + }) + + for coverage in coverage_info[file_name]: + for path in ('covered lines', 'covered functions'): + for line, value in coverage[path].items(): + merged_coverage_info[file_name][path].setdefault(line, 0) + merged_coverage_info[file_name][path][line] += value + if coverage.get('covered function names'): + for name in coverage['covered function names']: + if name not in merged_coverage_info[file_name]['covered function names']: + merged_coverage_info[file_name]['covered function names'].append(name) + + +def _get_coverage(merged_coverage_info): + # Map combined coverage to the required format + line_coverage = {} + function_coverage = {} + function_statistics = {} + function_name_staticitcs = {} + + for file_name in list(merged_coverage_info.keys()): + for line, value in merged_coverage_info[file_name]['covered lines'].items(): + line_coverage.setdefault(value, {}) + line_coverage[value].setdefault(file_name, []) + line_coverage[value][file_name].append(int(line)) + + for line, value in merged_coverage_info[file_name]['covered functions'].items(): + function_coverage.setdefault(value, {}) + function_coverage[value].setdefault(file_name, []) + function_coverage[value][file_name].append(int(line)) + + function_statistics[file_name] = [len(merged_coverage_info[file_name]['covered functions']), + merged_coverage_info[file_name]['total functions']] + + if merged_coverage_info[file_name].get('covered function names'): + function_name_staticitcs[file_name] = \ + list(merged_coverage_info[file_name]['covered function names']) + function_name_staticitcs['overall'] = None + + # Merge covered lines into the range + for key, value in line_coverage.items(): + for file_name, lines in value.items(): + value[file_name] = __build_ranges(lines) + + return { + 'line coverage': [[key, value] for key, value in line_coverage.items()], + 'function coverage': { + 'statistics': function_statistics, + 'coverage': [[key, value] for key, value in function_coverage.items()] + }, + 'functions statistics': {'statistics': function_name_staticitcs, 'values': []} + } + + +def __build_ranges(lines): + if not lines: + return [] + res = [] + prev = 0 + lines = sorted(lines) + for i in range(1, len(lines)): + if lines[i] != lines[i - 1] + 1: + # The sequence is broken. + if i - 1 != prev: + # There is more than one line in the sequence. . + if i - 2 == prev: + # There is more than two lines in the sequence. Add the range. + res.append(lines[prev]) + res.append(lines[i - 1]) + else: + # Otherwise, add these lines separately. + res.append([lines[prev], lines[i - 1]]) + else: + # Just add a single non-sequence line. + res.append(lines[prev]) + prev = i + + # This step is the same as in the loop body. + if prev != len(lines) - 1: + if prev == len(lines) - 2: + res.append(lines[prev]) + res.append(lines[-1]) + else: + res.append([lines[prev], lines[-1]]) + else: + res.append(lines[prev]) + + return res + + +def _make_relative_path(dirs, file_or_dir, absolutize=False): + # Normalize paths first of all. + dirs = [os.path.normpath(directory) for directory in dirs] + file_or_dir = os.path.normpath(file_or_dir) + + # Check all dirs are absolute or relative. + is_dirs_abs = False + if all(os.path.isabs(directory) for directory in dirs): + is_dirs_abs = True + elif all(not os.path.isabs(directory) for directory in dirs): + pass + else: + raise ValueError('Can not mix absolute and relative dirs') + + if os.path.isabs(file_or_dir): + # Making absolute file_or_dir relative to relative dirs has no sense. + if not is_dirs_abs: + return file_or_dir + else: + # One needs to absolutize file_or_dir since it can be relative to Clade storage. + if absolutize: + if not is_dirs_abs: + raise ValueError('Do not absolutize file_or_dir for relative dirs') + + file_or_dir = os.path.join(os.path.sep, file_or_dir) + # file_or_dir is already relative. + elif is_dirs_abs: + return file_or_dir + + # Find and return if so path relative to the longest directory. + for directory in sorted(dirs, key=lambda t: len(t), reverse=True): + if os.path.commonpath([file_or_dir, directory]) == directory: + return os.path.relpath(file_or_dir, directory) + + return file_or_dir + + +class LCOV: + """ + Coverage processor for lcov results + """ + NEW_FILE_PREFIX = "TN:" + EOR_PREFIX = "end_of_record" + FILENAME_PREFIX = "SF:" + LINE_PREFIX = "DA:" + FUNCTION_PREFIX = "FNDA:" + FUNCTION_NAME_PREFIX = "FN:" + PARIALLY_ALLOWED_EXT = ('.c', '.i', '.c.aux') + + def __init__(self, logger, coverage_file, clade_dir, source_dirs, search_dirs, main_work_dir, + completeness, coverage_id=None, coverage_info_dir=None, collect_functions=True, + ignore_files=None, default_file=None): + # Public + self.logger = logger + self.coverage_file = coverage_file + self.clade_dir = clade_dir + self.source_dirs = [os.path.normpath(p) for p in source_dirs] + self.search_dirs = [os.path.normpath(p) for p in search_dirs] + self.main_work_dir = main_work_dir + self.completeness = completeness + self.coverage_info_dir = coverage_info_dir + self.arcnames = {} + self.collect_functions = collect_functions + if ignore_files is None: + ignore_files = set() + self.ignore_files = ignore_files + self.default_file = default_file + + # Sanity checks + if self.completeness not in ('full', 'partial', 'lightweight', 'none', None): + raise NotImplementedError(f"Coverage type {self.completeness} is not supported") + + # Import coverage + try: + if self.completeness in ('full', 'partial', 'lightweight'): + self.coverage_info = self.parse() + + if coverage_id: + with open(coverage_id, 'w', encoding='utf-8') as file_obj: + json.dump(self.coverage_info, file_obj, ensure_ascii=True, sort_keys=True, + indent="\t") + + coverage = {} + _add_to_coverage(coverage, self.coverage_info) + with open('coverage.json', 'w', encoding='utf-8') as file_obj: + json.dump(_get_coverage(coverage), file_obj, ensure_ascii=True, sort_keys=True, + indent=None) + except Exception: + if os.path.isfile('coverage.json'): + os.remove('coverage.json') + raise + + def parse(self) -> dict: + """ + Parses lcov results + """ + dir_map = ( + ('sources', self.source_dirs), + ('specifications', ( + os.path.normpath(os.path.join(self.main_work_dir, 'job', 'root', 'specifications')), + )), + ('generated', ( + os.path.normpath(self.main_work_dir), + )) + ) + + ignore_file = False + + if not os.path.isfile(self.coverage_file): + raise FileNotFoundError(f'There is no coverage file {self.coverage_file}') + + # Gettings dirs, that should be excluded. + excluded_dirs = set() + if self.completeness in ('partial', 'lightweight'): + with open(self.coverage_file, encoding='utf-8') as file_obj: + # Build map, that contains dir as key and list of files in the dir as value + all_files = {} + for line in file_obj: + line = line.rstrip('\n') + if line.startswith(self.FILENAME_PREFIX): + file_name = line[len(self.FILENAME_PREFIX):] + file_name = os.path.normpath(file_name) + if os.path.isfile(file_name): + path, file = os.path.split(file_name) + # All pathes should be absolute, otherwise we cannot match source dirs + path = os.path.join(os.path.sep, + _make_relative_path([self.clade_dir], path)) + all_files.setdefault(path, []) + all_files[path].append(file) + + for path, files in all_files.items(): + # Lightweight coverage keeps only source code dirs. + if self.completeness == 'lightweight' and \ + all(os.path.commonpath([s, path]) != s for s in self.source_dirs): + self.logger.debug(f'Excluded {path}') + excluded_dirs.add(path) + continue + # Partial coverage keeps only dirs, that contains source files. + for file in files: + if file.endswith('.c') or file.endswith('.c.aux'): + break + else: + excluded_dirs.add(path) + + # Parsing coverage file + coverage_info = {} + with open(self.coverage_file, encoding='utf-8') as file_obj: + count_covered_functions = None + for line in file_obj: + line = line.rstrip('\n') + + if ignore_file and not line.startswith(self.FILENAME_PREFIX): + continue + + if line.startswith(self.NEW_FILE_PREFIX): + # Clean + file_name = None + covered_lines = {} + function_to_line = {} + covered_functions = {} + count_covered_functions = 0 + elif line.startswith(self.FILENAME_PREFIX): + # Get file name, determine his directory and determine, should we ignore this + real_file_name = line[len(self.FILENAME_PREFIX):] + real_file_name = os.path.normpath(real_file_name) + if self.default_file: + # TODO: dirty workaround (required for specific cases). + dw_name = re.sub(r'.+/vcloud-\S+/worker/working_dir_[^/]+/', '', + real_file_name) + real_file_name = self.default_file + for source_dir in self.source_dirs: + for tmp_file_name in [self.default_file, dw_name]: + tmp_file_name = os.path.join(source_dir, tmp_file_name) + if os.path.exists(tmp_file_name): + real_file_name = tmp_file_name + break + file_name = os.path.join(os.path.sep, + _make_relative_path([self.clade_dir], real_file_name)) + if os.path.isfile(real_file_name) and \ + all(os.path.commonpath((p, file_name)) != p for p in excluded_dirs): + for dest, srcs in dir_map: + for src in srcs: + if os.path.commonpath([real_file_name, src]) != src: + continue + if dest in ('generated', 'specifications'): + new_file_name = os.path.join(dest, os.path.basename(file_name)) + else: + new_file_name = os.path.join(dest, os.path.relpath(file_name, + src)) + + if new_file_name in self.ignore_files: + continue + ignore_file = False + break + else: + continue + break + # This "else" corresponds "for" + else: + # Check other prefixes + new_file_name = _make_relative_path(self.search_dirs, file_name) + if new_file_name == file_name: + ignore_file = True + continue + ignore_file = False + new_file_name = os.path.join('specifications', new_file_name) + + self.arcnames[real_file_name] = new_file_name + old_file_name, file_name = real_file_name, new_file_name + else: + ignore_file = True + elif line.startswith(self.LINE_PREFIX): + # Coverage of the specified line + splts = line[len(self.LINE_PREFIX):].split(',') + covered_lines[int(splts[0])] = int(splts[1]) + elif line.startswith(self.FUNCTION_NAME_PREFIX): + # Mapping of the function name to the line number + splts = line[len(self.FUNCTION_NAME_PREFIX):].split(',') + function_to_line.setdefault(splts[1], 0) + function_to_line[splts[1]] = int(splts[0]) + elif line.startswith(self.FUNCTION_PREFIX): + # Coverage of the specified function + splts = line[len(self.FUNCTION_PREFIX):].split(',') + if splts[0] == "0": + continue + covered_functions[function_to_line[splts[1]]] = int(splts[0]) + count_covered_functions += 1 + elif line.startswith(self.EOR_PREFIX): + # End coverage for the specific file + + # Add functions, which were not covered + + covered_functions.update({ + line: 0 for line in set(function_to_line.values()).difference( + set(covered_functions.keys()))}) + + coverage_info.setdefault(file_name, []) + + new_cov = { + 'file name': old_file_name, + 'arcname': file_name, + 'total functions': len(function_to_line), + 'covered lines': covered_lines, + 'covered functions': covered_functions + } + if self.collect_functions: + new_cov['covered function names'] = \ + list((name for name, line in function_to_line.items() + if covered_functions[line] != 0)) + coverage_info[file_name].append(new_cov) + + return coverage_info diff --git a/scripts/mea/__init__.py b/scripts/mea/__init__.py new file mode 100644 index 0000000..5aad4a7 --- /dev/null +++ b/scripts/mea/__init__.py @@ -0,0 +1,18 @@ +# +# CV is a framework for continuous verification. +# +# Copyright (c) 2018-2023 ISP RAS (http://www.ispras.ru) +# Ivannikov Institute for System Programming of the Russian Academy of Sciences +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/scripts/mea/core.py b/scripts/mea/core.py new file mode 100644 index 0000000..2cc7fc8 --- /dev/null +++ b/scripts/mea/core.py @@ -0,0 +1,527 @@ +# +# CV is a framework for continuous verification. +# This module was based on Klever-CV repository (https://github.com/mutilin/klever/tree/cv-v2.0). +# +# Copyright (c) 2018-2023 ISP RAS (http://www.ispras.ru) +# Ivannikov Institute for System Programming of the Russian Academy of Sciences +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +This library presents core functions for MEA, such as conversion and comparison of error traces. +""" + +# pylint: disable=invalid-name, consider-iterating-dictionary + +import operator +import re + + +# Conversion functions. +CONVERSION_FUNCTION_CALL_TREE = "call tree" +CONVERSION_FUNCTION_MODEL_FUNCTIONS = "model functions" +CONVERSION_FUNCTION_CONDITIONS = "conditions" +CONVERSION_FUNCTION_ASSIGNMENTS = "assignments" +CONVERSION_FUNCTION_NOTES = "error descriptions" +CONVERSION_FUNCTION_FULL = "full" +DEFAULT_CONVERSION_FUNCTION = CONVERSION_FUNCTION_MODEL_FUNCTIONS +CACHED_CONVERSION_FUNCTIONS = [ + CONVERSION_FUNCTION_CALL_TREE, + CONVERSION_FUNCTION_MODEL_FUNCTIONS, + CONVERSION_FUNCTION_NOTES] + +# Comparison functions. +COMPARISON_FUNCTION_EQUAL = "equal" +COMPARISON_FUNCTION_INCLUDE = "include" +COMPARISON_FUNCTION_INCLUDE_WITH_ERROR = "include with error" +COMPARISON_FUNCTION_INCLUDE_PARTIAL = "partial include" +COMPARISON_FUNCTION_INCLUDE_PARTIAL_ORDERED = "partial include ordered" +COMPARISON_FUNCTION_SKIP = "skip" +DEFAULT_COMPARISON_FUNCTION = COMPARISON_FUNCTION_EQUAL + +# Tags for configurations. +TAG_CONVERSION_FUNCTION = "conversion_function" +TAG_COMPARISON_FUNCTION = "comparison_function" +TAG_EDITED_ERROR_TRACE = "edited_error_trace" + +# Conversion fucntions arguments. +TAG_ADDITIONAL_MODEL_FUNCTIONS = "additional_model_functions" +TAG_FILTERED_MODEL_FUNCTIONS = "filtered_model_functions" +TAG_USE_NOTES = "use_notes" +TAG_USE_WARNS = "use_warns" +TAG_IGNORE_NOTES_TEXT = "ignore_notes_text" + +# Converted error trace tags. +CET_OP = "op" +CET_OP_CALL = "CALL" +CET_OP_RETURN = "RET" +CET_OP_ASSUME = "ASSUME" +CET_OP_ASSIGN = "ASSIGN" +CET_OP_NOTE = "NOTE" +CET_OP_WARN = "WARN" +CET_THREAD = "thread" +CET_SOURCE = "source" +CET_DISPLAY_NAME = "name" +CET_ID = "id" +CET_LINE = "line" +ASSIGN_MARK = " = " + +DEFAULT_SIMILARITY_THRESHOLD = 100 # in % (all threads are equal) +DEFAULT_PROPERTY_CHECKS_TEXT = "property check description" + + +def convert_error_trace(error_trace: dict, conversion_function: str, args: dict = dict) -> list: + """ + Convert json error trace into internal representation (list of selected elements). + """ + functions = { + CONVERSION_FUNCTION_MODEL_FUNCTIONS: __convert_model_functions, + CONVERSION_FUNCTION_CALL_TREE: __convert_call_tree_filter, + CONVERSION_FUNCTION_CONDITIONS: __convert_conditions, + CONVERSION_FUNCTION_FULL: __convert_full, + CONVERSION_FUNCTION_ASSIGNMENTS: __convert_assignments, + CONVERSION_FUNCTION_NOTES: __convert_notes + } + if conversion_function not in functions.keys(): + conversion_function = DEFAULT_CONVERSION_FUNCTION + result = functions[conversion_function](error_trace, args) + + if (args.get(TAG_USE_NOTES, args.get(TAG_USE_WARNS, False)) or + args.get(TAG_IGNORE_NOTES_TEXT, False)) and \ + conversion_function not in [CONVERSION_FUNCTION_FULL, CONVERSION_FUNCTION_NOTES]: + result += __convert_notes(error_trace, args) + result = sorted(result, key=operator.itemgetter(CET_ID)) + + filtered_functions = set(args.get(TAG_FILTERED_MODEL_FUNCTIONS, [])) + if filtered_functions: + result = __filter_functions(result, filtered_functions) + + return result + + +def is_equivalent(comparison_results: float, similarity_threshold: int) -> bool: + """ + Returns true, if compared error traces are considered to be equivalent in terms of + specified threshold. + """ + return comparison_results and (comparison_results * 100 >= similarity_threshold) + + +def compare_error_traces(edited_error_trace: list, compared_error_trace: list, + comparison_function: str) -> float: + """ + Compare two error traces by means of specified function and return similarity coefficient + for their threads equivalence (in case of a single thread function returns True/False). + """ + et1_threaded, et2_threaded = __transform_to_threads(edited_error_trace, compared_error_trace) + if not et1_threaded and not et2_threaded: + # Return true for empty converted error traces (so they will be applied to all + # reports with the same attributes) + return 1.0 + functions = { + COMPARISON_FUNCTION_EQUAL: __compare_equal, + COMPARISON_FUNCTION_INCLUDE: __compare_include, + COMPARISON_FUNCTION_INCLUDE_WITH_ERROR: __compare_include_with_error, + COMPARISON_FUNCTION_INCLUDE_PARTIAL: __compare_include_partial, + COMPARISON_FUNCTION_INCLUDE_PARTIAL_ORDERED: __compare_include_partial_ordered, + COMPARISON_FUNCTION_SKIP: __compare_skip + } + if comparison_function not in functions.keys(): + comparison_function = DEFAULT_COMPARISON_FUNCTION + equal_threads = functions[comparison_function](et1_threaded, et2_threaded) + equal_threads = min(equal_threads, len(et1_threaded), len(et2_threaded)) + return __get_similarity_coefficient(et1_threaded, et2_threaded, equal_threads) + + +# noinspection PyUnusedLocal +def __convert_call_tree_filter(error_trace: dict, args: dict = None) -> list: + # pylint: disable=unused-argument + converted_error_trace = [] + counter = 0 + # TODO: check this in core (one node for call and return edges). + double_funcs = {} + for edge in error_trace['edges']: + if 'env' in edge: + continue + if 'enter' in edge and 'return' in edge: + double_funcs[edge['enter']] = edge['return'] + if 'enter' in edge: + function_call = error_trace['funcs'][edge['enter']] + converted_error_trace.append({ + CET_OP: CET_OP_CALL, + CET_THREAD: edge['thread'], + CET_SOURCE: edge['source'], + CET_LINE: edge['start line'], + CET_DISPLAY_NAME: function_call, + CET_ID: counter + }) + elif 'return' in edge: + function_return = error_trace['funcs'][edge['return']] + converted_error_trace.append({ + CET_OP: CET_OP_RETURN, + CET_THREAD: edge['thread'], + CET_LINE: edge['start line'], + CET_SOURCE: edge['source'], + CET_DISPLAY_NAME: function_return, + CET_ID: counter + }) + double_return = edge['return'] + while True: + if double_return in double_funcs.keys(): + converted_error_trace.append({ + CET_OP: CET_OP_RETURN, + CET_THREAD: edge['thread'], + CET_LINE: edge['start line'], + CET_SOURCE: edge['source'], + CET_DISPLAY_NAME: error_trace['funcs'][double_funcs[double_return]], + CET_ID: counter + }) + tmp = double_return + double_return = double_funcs[double_return] + del double_funcs[tmp] + else: + break + counter += 1 + return converted_error_trace + + +def __convert_model_functions(error_trace: dict, args: dict = None) -> list: + if args is None: + args = {} + additional_model_functions = set(args.get(TAG_ADDITIONAL_MODEL_FUNCTIONS, [])) + model_functions = __get_model_functions(error_trace, additional_model_functions) + converted_error_trace = __convert_call_tree_filter(error_trace, args) + removed_indexes = set() + thread_start_indexes = set() + cur_thread = -1 + for counter, item in enumerate(converted_error_trace): + op = item[CET_OP] + thread = item[CET_THREAD] + name = item[CET_DISPLAY_NAME] + if cur_thread != thread: + thread_start_indexes.add(counter) + cur_thread = thread + if counter in removed_indexes: + continue + if op == CET_OP_CALL: + is_save = False + remove_items = 0 + for checking_elem in converted_error_trace[counter:]: + remove_items += 1 + checking_op = checking_elem[CET_OP] + checking_name = checking_elem[CET_DISPLAY_NAME] + checking_thread = checking_elem[CET_THREAD] + if checking_op == CET_OP_RETURN and checking_name == name: + break + if checking_thread != thread: + remove_items -= 1 + break + if checking_op == CET_OP_CALL: + if checking_name in model_functions: + is_save = True + break + if not is_save: + for index in range(counter, counter + remove_items): + removed_indexes.add(index) + resulting_error_trace = [] + for counter, item in enumerate(converted_error_trace): + if counter not in removed_indexes or counter in thread_start_indexes: + resulting_error_trace.append(item) + return resulting_error_trace + + +def __filter_functions(converted_error_trace: list, filtered_functions: set) -> list: + result = [] + filtered_stack = [] + cur_thread = None + for item in converted_error_trace: + op = item[CET_OP] + thread = item[CET_THREAD] + name = item[CET_DISPLAY_NAME] + if cur_thread and not cur_thread == thread: + filtered_stack.clear() + if name in filtered_functions: + if op == CET_OP_CALL: + filtered_stack.append(name) + cur_thread = thread + elif op == CET_OP_RETURN: + if filtered_stack: + filtered_stack.pop() + elif not filtered_stack: + result.append(item) + return result + + +# noinspection PyUnusedLocal +def __convert_conditions(error_trace: dict, args: dict = None) -> list: + # pylint: disable=unused-argument + converted_error_trace = [] + counter = 0 + for edge in error_trace['edges']: + if 'condition' in edge: + assume = edge['condition'] + converted_error_trace.append({ + CET_OP: CET_OP_ASSUME, + CET_THREAD: edge['thread'], + CET_SOURCE: edge['source'], + CET_LINE: edge['start line'], + CET_DISPLAY_NAME: assume, + CET_ID: counter + }) + counter += 1 + return converted_error_trace + + +# noinspection PyUnusedLocal +def __convert_assignments(error_trace: dict, args: dict = None) -> list: + # pylint: disable=unused-argument + converted_error_trace = [] + counter = 0 + for edge in error_trace['edges']: + if 'source' in edge: + source = edge['source'] + if ASSIGN_MARK in source: + converted_error_trace.append({ + CET_OP: CET_OP_ASSIGN, + CET_THREAD: edge['thread'], + CET_SOURCE: edge['source'], + CET_LINE: edge['start line'], + CET_DISPLAY_NAME: source, + CET_ID: counter + }) + counter += 1 + return converted_error_trace + + +def __convert_notes(error_trace: dict, args=None) -> list: + if args is None: + args = {} + converted_error_trace = [] + counter = 0 + use_notes = args.get(TAG_USE_NOTES, False) + use_warns = args.get(TAG_USE_WARNS, False) + ignore_text = args.get(TAG_IGNORE_NOTES_TEXT, False) + if not use_notes and not use_warns: + # Ignore, since we need at least one flag as True. + use_notes = True + use_warns = True + + for edge in error_trace['edges']: + text = DEFAULT_PROPERTY_CHECKS_TEXT + if 'note' in edge: + if not ignore_text: + text = edge['note'] + if use_notes: + converted_error_trace.append({ + CET_OP: CET_OP_NOTE, + CET_THREAD: edge['thread'], + CET_SOURCE: edge['source'], + CET_LINE: edge['start line'], + CET_DISPLAY_NAME: text, + CET_ID: counter + }) + elif 'warn' in edge: + if not ignore_text: + text = edge['warn'] + if use_warns: + converted_error_trace.append({ + CET_OP: CET_OP_WARN, + CET_THREAD: edge['thread'], + CET_SOURCE: edge['source'], + CET_LINE: edge['start line'], + CET_DISPLAY_NAME: text, + CET_ID: counter + }) + counter += 1 + return converted_error_trace + + +# noinspection PyUnusedLocal +def __convert_full(error_trace: dict, args: dict = None) -> list: + # pylint: disable=unused-argument + converted_error_trace = __convert_call_tree_filter(error_trace, args) + \ + __convert_conditions(error_trace, args) + \ + __convert_assignments(error_trace, args) + \ + __convert_notes(error_trace, args) + converted_error_trace = sorted(converted_error_trace, key=operator.itemgetter(CET_ID)) + return converted_error_trace + + +def __get_model_functions(error_trace: dict, additional_model_functions: set) -> set: + """ + Extract model functions from error trace. + """ + stack = [] + model_functions = additional_model_functions + patterns = set() + for func in model_functions: + if not str(func).isidentifier(): + patterns.add(func) + for edge in error_trace['edges']: + if 'enter' in edge: + func = error_trace['funcs'][edge['enter']] + if patterns: + for pattern_func in patterns: + if re.match(pattern_func, func): + model_functions.add(func) + stack.append(func) + if 'return' in edge: + # func = error_trace['funcs'][edge['return']] + if stack: + stack.pop() + if 'warn' in edge or 'note' in edge: + if stack: + model_functions.add(stack[len(stack) - 1]) + model_functions = model_functions - patterns + return model_functions + + +def __prep_elem_for_cmp(elem: dict, error_trace: dict) -> None: + op = elem[CET_OP] + thread = elem[CET_THREAD] + if thread not in error_trace: + error_trace[thread] = [] + if op in [CET_OP_RETURN, CET_OP_CALL]: + error_trace[thread].append((op, elem[CET_DISPLAY_NAME])) + elif op == CET_OP_ASSUME: + thread_aux = f"{thread}_aux" + if thread_aux not in error_trace: + error_trace[thread_aux] = [] + error_trace[thread_aux].append((op, elem[CET_DISPLAY_NAME], elem[CET_SOURCE])) + elif op in [CET_OP_WARN, CET_OP_NOTE, CET_OP_ASSIGN]: + thread_aux = f"{thread}_aux" + if thread_aux not in error_trace: + error_trace[thread_aux] = [] + error_trace[thread_aux].append((op, elem[CET_DISPLAY_NAME])) + + +def __transform_to_threads(edited_error_trace: list, compared_error_trace: list) -> (dict, dict): + et1 = {} + et2 = {} + for et_elem in edited_error_trace: + __prep_elem_for_cmp(et_elem, et1) + for et_elem in compared_error_trace: + __prep_elem_for_cmp(et_elem, et2) + et1_threaded = {} + et2_threaded = {} + for thread, trace in et1.items(): + if trace: + et1_threaded[thread] = tuple(trace) + for thread, trace in et2.items(): + if trace: + et2_threaded[thread] = tuple(trace) + return et1_threaded, et2_threaded + + +def __sublist(sublist: tuple, big_list: tuple) -> bool: + """ + Check that list sublist is included into the list big_list. + """ + sublist = ",".join(str(v) for v in sublist) + big_list = ",".join(str(v) for v in big_list) + return sublist in big_list + + +def __compare_skip(edited_error_trace: dict, compared_error_trace: dict) -> int: + return min(len(edited_error_trace), len(compared_error_trace)) + + +def __compare_equal(edited_error_trace: dict, compared_error_trace: dict) -> int: + result = {} + for id_1, thread_1 in edited_error_trace.items(): + for id_2, thread_2 in compared_error_trace.items(): + if thread_1 == thread_2: + if id_1 not in result: + result[id_1] = [] + result[id_1].append(id_2) + return __convert_to_number_of_compared_threads(result) + + +def __compare_include(edited_error_trace: dict, compared_error_trace: dict) -> int: + result = {} + for id_1, thread_1 in edited_error_trace.items(): + for id_2, thread_2 in compared_error_trace.items(): + if __sublist(thread_1, thread_2): + if id_1 not in result: + result[id_1] = [] + result[id_1].append(id_2) + return __convert_to_number_of_compared_threads(result) + + +def __compare_include_with_error(edited_error_trace: dict, compared_error_trace: dict) -> int: + for cet in [edited_error_trace, compared_error_trace]: + for thread, trace in cet.items(): + cet[thread] = trace + (('WARN', ''), ) + return __compare_include(edited_error_trace, compared_error_trace) + + +def __convert_to_number_of_compared_threads(result: dict) -> int: + used_transitions = set() + max_number_of_threads = 0 + while True: + used_ids_2 = set() + number_of_threads = 0 + for id_1, ids_2 in result.items(): + for id_2 in ids_2: + id_str = f"{id_1}_{id_2}" + if id_2 not in used_ids_2 and id_str not in used_transitions: + used_ids_2.add(id_2) + used_transitions.add(id_str) + number_of_threads += 1 + break + if number_of_threads > max_number_of_threads: + max_number_of_threads = number_of_threads + + if number_of_threads == 0: + break + return max_number_of_threads + + +def __compare_include_partial(edited_error_trace: dict, compared_error_trace: dict) -> int: + result = {} + for id_1, thread_1 in edited_error_trace.items(): + for id_2, thread_2 in compared_error_trace.items(): + if all(elem in thread_2 for elem in thread_1): + if id_1 not in result: + result[id_1] = [] + result[id_1].append(id_2) + return __convert_to_number_of_compared_threads(result) + + +def __compare_include_partial_ordered(edited_error_trace: dict, compared_error_trace: dict) -> int: + result = {} + for id_1, thread_1 in edited_error_trace.items(): + for id_2, thread_2 in compared_error_trace.items(): + last_index = 0 + is_eq = True + for elem in thread_1: + if elem in thread_2[last_index:]: + last_index = thread_2.index(elem) + else: + is_eq = False + break + if is_eq: + if id_1 not in result: + result[id_1] = [] + result[id_1].append(id_2) + return __convert_to_number_of_compared_threads(result) + + +def __get_similarity_coefficient(et_threaded_1: dict, et_threaded_2: dict, common_elements: int) \ + -> float: + # Currently represented only as Jaccard index. + diff_elements = len(et_threaded_1) + len(et_threaded_2) - common_elements + if diff_elements: + return round(common_elements / diff_elements, 2) + return 0.0 diff --git a/scripts/mea/et/__init__.py b/scripts/mea/et/__init__.py new file mode 100644 index 0000000..6568cfe --- /dev/null +++ b/scripts/mea/et/__init__.py @@ -0,0 +1,69 @@ +# +# CV is a framework for continuous verification. +# This module was based on Klever-CV repository (https://github.com/mutilin/klever/tree/cv-v2.0). +# +# Copyright (c) 2018-2023 ISP RAS (http://www.ispras.ru) +# Ivannikov Institute for System Programming of the Russian Academy of Sciences +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# pylint: disable=invalid-name +""" +This library is intended for witnesses parsing. +""" + +from mea.et.parser import WitnessParser +from mea.et.tmpvars import generic_simplifications + + +def import_error_trace(logger, witness, source_dir=None): + """ + Main function for importing a witness into the CV internal format + """ + # Parse a witness. + witness_parser = WitnessParser(logger, witness, source_dir) + internal_witness = witness_parser.internal_witness + + # Remove ugly code + if internal_witness.witness_type != "correctness": + generic_simplifications(logger, internal_witness) + + # Process notes (such as property checks, property violations and environment comments) + internal_witness.process_verifier_notes() + + # Do final checks + internal_witness.final_checks(witness_parser.entry_point) + + return internal_witness.serialize() + + +# This is intended for testing purposes, when one has a witness and would like to debug its +# transformations. +if __name__ == '__main__': + import json + import logging + import sys + + gl_logger = logging.getLogger() + gl_logger.setLevel(logging.DEBUG) + handler = logging.StreamHandler(sys.stdout) + handler.setLevel(logging.DEBUG) + formatter = logging.Formatter( + '%(asctime)s (%(filename)s:%(lineno)03d) %(levelname)5s> %(message)s', '%Y-%m-%d %H:%M:%S') + handler.setFormatter(formatter) + gl_logger.addHandler(handler) + + et = import_error_trace(gl_logger, 'witness.0.graphml') + + with open('error internal_witness.json', 'w', encoding='utf8') as fp: + json.dump(et, fp, ensure_ascii=False, sort_keys=True, indent="\t") diff --git a/scripts/mea/et/internal_witness.py b/scripts/mea/et/internal_witness.py new file mode 100644 index 0000000..692b6dc --- /dev/null +++ b/scripts/mea/et/internal_witness.py @@ -0,0 +1,404 @@ +# +# CV is a framework for continuous verification. +# This module was based on Klever-CV repository (https://github.com/mutilin/klever/tree/cv-v2.0). +# +# Copyright (c) 2018-2023 ISP RAS (http://www.ispras.ru) +# Ivannikov Institute for System Programming of the Russian Academy of Sciences +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# pylint: disable=missing-docstring +""" +Internal representation of witness in CV format. +""" + +import json +import os +import re + + +# Capitalize first letters of attribute names. +def capitalize_attr_names(attrs): + # Each attribute is dictionary with one element which value is either string or array of + # subattributes. + for attr in attrs: + # Does capitalize attribute name. + attr['name'] = attr['name'][0].upper() + attr['name'][1:] + + if isinstance(attr['value'], list): + capitalize_attr_names(attr['value']) + + +class InternalWitness: + """ + This class keeps witness in CV internal format. + """ + MODEL_COMMENT_TYPES = 'AUX_FUNC|AUX_FUNC_CALLBACK|MODEL_FUNC|NOTE|ASSERT|ENVIRONMENT_MODEL' + MAX_COMMENT_LENGTH = 128 + + def __init__(self, logger): + self._attrs = [] + self._edges = [] + self._files = [] + self._funcs = [] + self._logger = logger + self._entry_node_id = None + self._model_funcs = {} + self._spec_funcs = {} + self._env_models = {} + self._notes = {} + self._asserts = {} + self._actions = [] + self._callback_actions = [] + self.aux_funcs = {} + self.emg_comments = {} + self._threads = [] + self.witness_type = None + self.invariants = {} + self._warnings = [] + self.is_call_stack = False + self.is_main_function = False + self.is_conditions = False + self.is_notes = False + + @property + def functions(self): + return enumerate(self._funcs) + + @property + def files(self): + return enumerate(self._files) + + def __get_edge_index(self, edge, default): + if edge: + try: + return self._edges.index(edge) + except Exception as exception: + self._logger.warning(f"Cannot get index for edge {edge} due to: {exception}") + return default + else: + return default + + def get_edges(self, start=None, end=None): + start_index = self.__get_edge_index(start, 0) + end_index = self.__get_edge_index(end, len(self._edges)) + return self._edges[start_index:end_index] + + def prune(self): + # pylint: disable=consider-using-set-comprehension + sink_edges = set([self._edges.index(e) for e in self._edges if e.get('sink')]) + self._edges = [e for index, e in enumerate(self._edges) if index not in sink_edges] + + def serialize(self): + capitalize_attr_names(self._attrs) + + data = { + 'attrs': self._attrs, + 'edges': self._edges, + 'entry node': 0, + 'files': self._files, + 'funcs': self._funcs, + 'actions': self._actions, + 'callback actions': self._callback_actions, + 'type': self.witness_type, + 'warnings': self._warnings + } + return data + + def add_attr(self, name, value, associate, compare): + self._attrs.append({ + 'name': name, + 'value': value, + 'associate': associate, + 'compare': compare + }) + + def add_entry_node_id(self, node_id): + self._entry_node_id = node_id + + # noinspection PyUnusedLocal + def add_edge(self, source, target): + # pylint: disable=unused-argument + # TODO: check coherence of source and target. + edge = {} + self._edges.append(edge) + if target in self.invariants: + edge['invariants'] = self.invariants[target] + return edge + + def add_file(self, file_name): + file_name = os.path.normpath(os.path.abspath(file_name)) + if file_name not in self._files: + if not os.path.isfile(file_name): + no_file_str = f"There is no file {file_name}" + self._logger.warning(no_file_str) + if no_file_str not in self._warnings: + self._warnings.append(f"There is no file {file_name}") + raise FileNotFoundError + self._files.append(file_name) + return self._resolve_file_id(file_name) + return self._resolve_file_id(file_name) + + def add_function(self, name): + if name not in self._funcs: + self._funcs.append(name) + return len(self._funcs) - 1 + return self.resolve_function_id(name) + + def _add_aux_func(self, identifier, is_callback, formal_arg_names): + self.aux_funcs[identifier] = {'is callback': is_callback, + 'formal arg names': formal_arg_names} + + def _add_emg_comment(self, file, line, data): + if file not in self.emg_comments: + self.emg_comments[file] = {} + self.emg_comments[file][line] = data + + def _resolve_file_id(self, file): + return self._files.index(file) + + def _resolve_file(self, identifier): + return self._files[identifier] + + def resolve_function_id(self, name): + return self._funcs.index(name) + + def add_invariant(self, invariant, node_id): + self.invariants[node_id] = invariant + + def _resolve_function(self, identifier): + return self._funcs[identifier] + + def process_comment(self, comment: str) -> str: + if len(comment) > self.MAX_COMMENT_LENGTH: + comment = comment[:self.MAX_COMMENT_LENGTH] + "..." + return comment + + def add_model_function(self, func_name: str, comment: str = None): + if not comment: + comment = func_name + func_id = self.add_function(func_name) + self._model_funcs[func_id] = comment + self._spec_funcs[func_name] = comment + + def process_verifier_notes(self): + # Get information from sources. + self._parse_model_comments() + self._logger.info('Mark witness with model comments') + if self._model_funcs or self._notes: + self.is_notes = True + + warn_edges = [] + for edge in self._edges: + if 'warn' in edge: + warn_edges.append(edge['warn']) + file_id = edge.get('file', None) + if isinstance(file_id, int): + file = self._resolve_file(file_id) + else: + continue + + start_line = edge.get('start line') + + if 'enter' in edge: + func_id = edge['enter'] + if func_id in self._model_funcs: + note = self._model_funcs[func_id] + self._logger.debug(f"Add note {note} for model function " + f"'{self._resolve_function(func_id)}'") + edge['note'] = self.process_comment(note) + if func_id in self._env_models: + env_note = self._env_models[func_id] + self._logger.debug(f"Add note {env_note} for environment function '" + f"{self._resolve_function(func_id)}'") + edge['env'] = self.process_comment(env_note) + + if file_id in self._notes and start_line in self._notes[file_id]: + note = self._notes[file_id][start_line] + self._logger.debug(f"Add note {note} for statement from '{file}:{start_line}'") + edge['note'] = self.process_comment(note) + elif file_id in self._asserts and start_line in self._asserts[file_id]: + warn = self._asserts[file_id][start_line] + self._logger.debug(f"Add warning {warn} for statement from '{file}:{start_line}'") + edge['warn'] = self.process_comment(warn) + warn_edges.append(warn) + else: + if 'source' in edge: + for spec_func, note in self._spec_funcs.items(): + if spec_func in edge['source']: + edge['note'] = self.process_comment(note) + break + + if not warn_edges and self.witness_type == 'violation': + if self._edges: + last_edge = self._edges[-1] + if 'note' in last_edge: + last_edge['warn'] = f"Violation of '{self.process_comment(last_edge['note'])}'" + del last_edge['note'] + else: + last_edge['warn'] = 'Property violation' + del self._model_funcs, self._notes, self._asserts, self._env_models + + def _parse_model_comments(self): + self._logger.info('Parse model comments from source files referred by witness') + emg_comment = re.compile(r'/\*\sLDV\s(.*)\s\*/') + + for file_id, file in self.files: + if not os.path.isfile(file): + continue + + self._logger.debug(f'Parse model comments from {file}') + + with open(file, encoding='utf8', errors='ignore') as file_obj: + line = 0 + for text in file_obj: + line += 1 + + # Try match EMG comment + # Expect comment like /* TYPE Instance Text */ + match = emg_comment.search(text) + if match: + data = json.loads(match.group(1)) + self._add_emg_comment(file_id, line, data) + + # Match rest comments + match = re.search( + rf'/\*\s+({self.MODEL_COMMENT_TYPES})\s+(\S+)\s+(.*)\*/', text) + if match: + kind, func_name, comment = match.groups() + + comment = comment.rstrip() + if kind in ("NOTE", "WARN"): + comment = f"{func_name} {comment}" + + if file_id not in self._notes: + self._notes[file_id] = {} + self._notes[file_id][line + 1] = comment + self._logger.debug( + f"Get note '{comment}' for statement from '{file}:{line + 1}'") + # Some assertions will become warnings. + if kind == 'ASSERT': + if file_id not in self._asserts: + self._asserts[file_id] = {} + self._asserts[file_id][line + 1] = comment + self._logger.debug(f"Get assertion '{comment}' for statement " + f"from '{file}:{line + 1}'") + else: + func_name = func_name.rstrip() + if not comment: + comment = func_name + + formal_arg_names = [] + if kind in ('AUX_FUNC', 'AUX_FUNC_CALLBACK'): + # Get necessary function declaration located on following line. + try: + func_decl = next(file_obj) + # Don't forget to increase counter. + line += 1 + + # Try to get names for formal arguments (in form "type name") + # that is required for removing auxiliary function calls. + match = re.search(rf'{func_name}\s*\((.+)\)', func_decl) + if match: + formal_args_str = match.group(1) + + # Remove arguments of function pointers and braces around + # corresponding argument names. + formal_args_str = re.sub(r'\((.+)\)\(.+\)', r'\g<1>', + formal_args_str) + + for formal_arg in formal_args_str.split(','): + match = re.search(r'^.*\W+(\w+)\s*$', formal_arg) + + # Give up if meet complicated formal argument. + if not match: + formal_arg_names = [] + break + + formal_arg_names.append(match.group(1)) + except StopIteration: + self._logger.warning( + 'Auxiliary function definition does not exist') + continue + + # Deal with functions referenced by witness. + try: + func_id = self.resolve_function_id(func_name) + except ValueError: + self.add_function(func_name) + func_id = self.resolve_function_id(func_name) + + if kind == 'AUX_FUNC': + self._add_aux_func(func_id, False, formal_arg_names) + self._logger.debug( + f"Get auxiliary function '{func_name}' from '{file}:{line}'") + elif kind == 'AUX_FUNC_CALLBACK': + self._add_aux_func(func_id, True, formal_arg_names) + self._logger.debug(f"Get auxiliary function '{func_name}' for " + f"callback from '{file}:{line}'") + elif kind == 'ENVIRONMENT_MODEL': + self._env_models[func_id] = comment + self._logger.debug(f"Get environment model '{comment}' for " + f"function '{func_name}' from '{file}:{line}'") + else: + self._model_funcs[func_id] = comment + self._logger.debug(f"Get note '{comment}' for model function " + f"'{func_name}' from '{file}:{line}'") + + def add_thread(self, thread_id: str): + self._threads.append(thread_id) + + def final_checks(self, entry_point="main"): + # Check for warnings + if self.witness_type == 'violation': + if not self.is_call_stack: + self._warnings.append('No call stack (please add tags "enterFunction" and ' + '"returnFrom" to improve visualization)') + if not self.is_conditions: + self._warnings.append( + 'No conditions (please add tags "control" to improve visualization)') + if not self.is_main_function and self._edges: + self._warnings.append('No entry point (entry point call was generated)') + entry_elem = { + 'enter': self.add_function(entry_point), + 'start line': 0, + 'file': 0, + 'env': 'entry point', + 'source': f"{entry_point}()" + } + if not self._threads: + entry_elem['thread'] = '1' + else: + entry_elem['thread'] = str(self._threads[0]) + self._edges.insert(0, entry_elem) + # if not self.is_notes: + # self._warnings.append( + # 'Optional: no violation hints (please add tags "note" and "warn" to ' + # 'improve visualization)') + if not self._threads: + is_main_process = False + for edge in self._edges: + if not is_main_process and 'enter' in edge: + is_main_process = True + if is_main_process: + edge['thread'] = '1' + else: + edge['thread'] = '0' + + def get_func_name(self, identifier: int): + return self._funcs[identifier] + + def get_file_name(self, identifier: int): + if self._files: + return self._files[identifier] + return None diff --git a/scripts/mea/et/parser.py b/scripts/mea/et/parser.py new file mode 100644 index 0000000..b25cdbe --- /dev/null +++ b/scripts/mea/et/parser.py @@ -0,0 +1,344 @@ +# +# CV is a framework for continuous verification. +# This module was based on Klever-CV repository (https://github.com/mutilin/klever/tree/cv-v2.0). +# +# Copyright (c) 2018-2023 ISP RAS (http://www.ispras.ru) +# Ivannikov Institute for System Programming of the Russian Academy of Sciences +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +Parser for witnesses. +""" + +import os +import re +from xml.etree import ElementTree + +from mea.et.internal_witness import InternalWitness + + +class WitnessParser: + """ + Class parses a given witness (both correctness and violation are supported). + """ + WITNESS_NS = {'graphml': 'http://graphml.graphdrawing.org/xmlns'} + + def __init__(self, logger, witness, source_dir=None): + self._logger = logger + self.entry_point = None + self.source_dir = source_dir + self._violation_hints = set() + self.default_program_file = None # default source file + self.global_program_file = None # ~CIL file + self.internal_witness = InternalWitness(logger) + self._parse_witness(witness) + self._check_given_files() + + def __check_for_default_file(self, name: str, edge: dict): + try: + identifier = self.internal_witness.add_file(self.__resolve_src_path(name)) + last_used_file = identifier + edge['file'] = last_used_file + return last_used_file + except FileNotFoundError: + # File is missing, warning already was generated, parser should continue its work. + return None + + def _check_given_files(self): + last_used_file = None + for edge in self.internal_witness.get_edges(): + if 'file' in edge and edge['file'] is not None: + last_used_file = edge['file'] + elif self.default_program_file: + tmp_file = self.__check_for_default_file(self.default_program_file, edge) + if tmp_file: + last_used_file = tmp_file + elif last_used_file is not None: + edge['file'] = last_used_file + elif self.global_program_file: + tmp_file = self.__check_for_default_file(self.global_program_file, edge) + if tmp_file: + last_used_file = tmp_file + else: + self._logger.warning(f"There is no source file for edge {edge}") + + def __check_file_name(self, name: str): + name = self.__resolve_src_path(name) + if os.path.exists(name): + return name + return None + + def __resolve_src_path(self, name: str): + """ + This function implements very specific logic. + """ + if os.path.exists(name): + return name + if self.source_dir: + abs_path = os.path.join(self.source_dir, name) + if os.path.exists(abs_path): + return abs_path + # TODO: workaround for some tools. + resolved_name = re.sub(r'.+/vcloud-\S+/worker/working_dir_[^/]+/', '', name) + if os.path.exists(resolved_name): + return resolved_name + return name + + def _parse_witness(self, witness): + self._logger.info(f'Parse witness {witness}') + if os.stat(witness).st_size == 0: + raise ElementTree.ParseError("Witness is empty") + with open(witness, encoding='utf8') as witness_obj: + tree = ElementTree.parse(witness_obj) + root = tree.getroot() + graph = root.find('graphml:graph', self.WITNESS_NS) + for data in root.findall('graphml:key', self.WITNESS_NS): + name = data.attrib.get('attr.name') + if name in ("originFileName", "originfile"): + for def_data in data.findall('graphml:default', self.WITNESS_NS): + new_name = self.__check_file_name(def_data.text) + if new_name: + self.default_program_file = new_name + break + if not graph: + return + for data in graph.findall('graphml:data', self.WITNESS_NS): + key = data.attrib.get('key') + if key == 'programfile': + new_name = self.__check_file_name(data.text) + if new_name: + self.global_program_file = new_name + elif key == 'witness-type': + witness_type = data.text + if witness_type == 'correctness_witness': + self.internal_witness.witness_type = 'correctness' + elif witness_type == 'violation_witness': + self.internal_witness.witness_type = 'violation' + else: + self._logger.warning(f"Unsupported witness type: {witness_type}") + elif key == 'specification': + automaton = data.text + for line in automaton.split('\n'): + note = None + match = re.search(r'init\((\w+)\(\)\)', line) + if match: + self.entry_point = match.group(1) + match = re.search(r'ERROR\(\"(.+)"\)', line) + if match: + note = match.group(1) + match = re.search(r'MATCH\s*{\S+\s*=(\S+)\(.*\)}', line) + if match: + func_name = match.group(1) + self.internal_witness.add_model_function(func_name, note) + self._violation_hints.add(func_name) + continue + match = re.search(r'MATCH\s*{(\S+)\(.*\)}', line) + if match: + func_name = match.group(1) + self._violation_hints.add(func_name) + self.internal_witness.add_model_function(func_name, note) + continue + self.__parse_witness_data(graph) + sink_nodes_map = self.__parse_witness_nodes(graph) + self.__parse_witness_edges(graph, sink_nodes_map) + + def __parse_witness_data(self, graph): + for data in graph.findall('graphml:data', self.WITNESS_NS): + if 'klever-attrs' in data.attrib and data.attrib['klever-attrs'] == 'true': + self.internal_witness.add_attr( + data.attrib.get('key'), data.text, + data.attrib.get('associate', "false") == 'true', + data.attrib.get('compare', "false") == 'true') + + def __parse_witness_nodes(self, graph): + sink_nodes_map = {} + unsupported_node_data_keys = {} + nodes_number = 0 + + for node in graph.findall('graphml:node', self.WITNESS_NS): + is_sink = False + + node_id = node.attrib['id'] + for data in node.findall('graphml:data', self.WITNESS_NS): + data_key = data.attrib.get('key') + if data_key == 'entry': + self.internal_witness.add_entry_node_id(node_id) + self._logger.debug(f'Parse entry node {node_id}') + elif data_key == 'sink': + is_sink = True + self._logger.debug(f'Parse sink node {node_id}') + elif data_key == 'violation': + pass + elif data_key == 'invariant': + self.internal_witness.add_invariant(data.text, node_id) + elif data_key not in unsupported_node_data_keys: + self._logger.warning(f'Node data key {data_key} is not supported') + unsupported_node_data_keys[data_key] = None + + # Do not track sink nodes as all other nodes. + # All edges leading to sink nodes will be excluded as well. + if is_sink: + sink_nodes_map[node_id] = None + else: + nodes_number += 1 + + self._logger.debug( + f'Parse {nodes_number} nodes and {len(sink_nodes_map)} sink nodes') + return sink_nodes_map + + def __parse_witness_edges(self, graph, sink_nodes_map): + unsupported_edge_data_keys = {} + # Use maps for source files and functions as for nodes. Add artificial map to 0 for + # default file without explicitly specifying its path. + # The number of edges leading to sink nodes. Such edges will be completely removed. + sink_edges_num = 0 + edges_num = 0 + is_source_file = False + + for edge in graph.findall('graphml:edge', self.WITNESS_NS): + + source_node_id = edge.attrib.get('source') + target_node_id = edge.attrib.get('target') + + if target_node_id in sink_nodes_map: + sink_edges_num += 1 + continue + + # Update lists of input and output edges for source and target nodes. + _edge = self.internal_witness.add_edge(source_node_id, target_node_id) + + start_offset = 0 + end_offset = 0 + condition = None + invariant = None + invariant_scope = None + for data in edge.findall('graphml:data', self.WITNESS_NS): + data_key = data.attrib.get('key') + if data_key == 'originfile': + try: + identifier = self.internal_witness.add_file( + self.__resolve_src_path(data.text)) + _edge['file'] = identifier + except FileNotFoundError: + _edge['file'] = None + elif data_key == 'startline': + _edge['start line'] = int(data.text) + elif data_key == 'endline': + _edge['end line'] = int(data.text) + elif data_key == 'sourcecode': + is_source_file = True + _edge['source'] = data.text + elif data_key in ('enterFunction', 'returnFrom', 'assumption.scope'): + function_name = data.text + func_index = self.internal_witness.add_function(function_name) + if data_key == 'enterFunction': + if func_index - len(self._violation_hints) == 0: + if self.entry_point: + if self.entry_point == function_name: + self.internal_witness.is_main_function = True + if self.internal_witness.witness_type == 'violation': + _edge['env'] = "entry point" + else: + self.internal_witness.is_main_function = True + else: + self.internal_witness.is_call_stack = True + func_id = self.internal_witness.resolve_function_id(function_name) + _edge['enter'] = func_id + elif data_key == 'returnFrom': + _edge['return'] = self.internal_witness.resolve_function_id(function_name) + else: + _edge['assumption scope'] = self.internal_witness.resolve_function_id( + function_name) + elif data_key == 'control': + val = data.text + condition = val + if val == 'condition-true': + _edge['condition'] = True + elif val == 'condition-false': + _edge['condition'] = False + self.internal_witness.is_conditions = True + elif data_key == 'assumption': + _edge['assumption'] = data.text + elif data_key == 'threadId': + _edge['thread'] = data.text + self.internal_witness.add_thread(data.text) + elif data_key == 'startoffset': + start_offset = int(data.text) + elif data_key == 'endoffset': + end_offset = int(data.text) + elif data_key in ('note', 'warning'): + _edge[data_key if data_key == 'note' else 'warn'] = \ + self.internal_witness.process_comment(data.text) + self.internal_witness.is_notes = True + elif data_key == 'env': + _edge['env'] = self.internal_witness.process_comment(data.text) + elif data_key not in unsupported_edge_data_keys: + self._logger.warning(f'Edge data key {data_key} is not supported') + unsupported_edge_data_keys[data_key] = None + + if invariant and invariant_scope: + self.internal_witness.add_invariant(invariant, invariant_scope) + + if "source" not in _edge: + _edge['source'] = "" + if 'enter' in _edge: + _edge['source'] = self.internal_witness.get_func_name(_edge['enter']) + elif 'return' in _edge: + _edge['source'] = 'return' + elif not is_source_file: + if 'assumption' in _edge: + _edge['source'] = _edge['assumption'] + elif 'start line' in _edge: + if start_offset and self.global_program_file: + src_file = self.global_program_file + elif 'file' in _edge: + src_file = self.internal_witness.get_file_name(_edge['file']) + if not src_file and self.global_program_file: + src_file = self.global_program_file + elif self.global_program_file: + src_file = self.global_program_file + else: + src_file = None + if src_file: + with open(src_file, encoding='utf8') as src_obj: + if start_offset: + offset = 1 + if end_offset: + offset += end_offset - start_offset + src_obj.seek(start_offset) + _edge['source'] = src_obj.read(offset) + else: + counter = 1 + for line in src_obj.readlines(): + if counter == _edge['start line']: + line = line.rstrip().lstrip() + if 'condition' in _edge: + res = re.match(r'[^(]*\((.+)\)[^)]*', line) + if res: + line = res.group(1) + _edge['source'] = line + break + counter += 1 + if condition == 'condition-false': + _edge['source'] = f"!({_edge['source']})" + + if 'thread' not in _edge: + _edge['thread'] = "0" + if 'start line' not in _edge: + _edge['start line'] = 0 + + edges_num += 1 + + self._logger.debug(f'Parse {edges_num} edges and {sink_edges_num} sink edges') diff --git a/scripts/mea/et/tmpvars.py b/scripts/mea/et/tmpvars.py new file mode 100644 index 0000000..742e419 --- /dev/null +++ b/scripts/mea/et/tmpvars.py @@ -0,0 +1,151 @@ +# +# CV is a framework for continuous verification. +# This module was based on Klever-CV repository (https://github.com/mutilin/klever/tree/cv-v2.0). +# +# Copyright (c) 2018-2023 ISP RAS (http://www.ispras.ru) +# Ivannikov Institute for System Programming of the Russian Academy of Sciences +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +Contains specific simplifications of violation witnesses for visualization. +""" + +import re + + +def generic_simplifications(logger, trace): + """ + Performs all simplifications + """ + logger.info('Simplify error trace') + _basic_simplification(trace) + _remove_switch_cases(logger, trace) + trace.prune() + + +def _basic_simplification(error_trace): + # Remove all edges without source attribute. Otherwise visualization will be very poor. + for edge in error_trace.get_edges(): + source_line = edge.get('source', "") + if not source_line: + # Now we do need source code to be presented with all edges. + edge['sink'] = True + + # Make source code more human readable. + # Remove all broken indentations - error traces visualizer will add its own ones but + # will do this in much more attractive way. + source_line = re.sub(r'[ \t]*\n[ \t]*', ' ', source_line) + + # Remove "[...]" around conditions. + if 'condition' in edge: + source_line = source_line.strip('[]') + + # Get rid of continues whitespaces. + source_line = re.sub(r'[ \t]+', ' ', source_line) + + # Remove space before trailing ";". + source_line = re.sub(r' ;$', ';', source_line) + + # Remove space before "," and ")". + source_line = re.sub(r' (,\|\))', r'\g<1>', source_line) + + # Replace "!(... ==/!=/<=/>=/ ...)" with "... !=/==/>/=/<= ...". + cond_replacements = {'==': '!=', '!=': '==', '<=': '>', '>=': '<', '<': '>=', '>': '<='} + for orig_cond, replacement_cond in cond_replacements.items(): + res = re.match(rf'^!\((.+) {orig_cond} (.+)\)$', source_line) + if res: + source_line = f'{res.group(1)} {replacement_cond} {res.group(2)}' + # Do not proceed after some replacement is applied - others won't be done. + break + + # Remove unnessary "(...)" around returned values/expressions. + source_line = re.sub(r'^return \((.*)\);$', r'return \g<1>;', source_line) + + # Make source code and assumptions more human readable (common improvements). + for source_kind in ('source', 'assumption'): + if source_kind in edge: + # Remove unnessary "(...)" around integers. + edge[source_kind] = re.sub(r' \((-?\d+\w*)\)', r' \g<1>', edge[source_kind]) + + # Replace "& " with "&". + edge[source_kind] = re.sub(r'& ', '&', edge[source_kind]) + if source_line in ("1", "\"\""): + edge['sink'] = True + edge['source'] = source_line + + +def _remove_switch_cases(logger, error_trace): + # Get rid of redundant switch cases. Replace: + # assume(var != A) + # assume(var != B) + # ... + # assume(var == Z) + # with: + # assume(var == Z) + removed_switch_cases_num = 0 + for edge in error_trace.get_edges(): + # Begin to match pattern just for edges that represent conditions. + if 'condition' not in edge: + continue + + # Get all continues conditions. + cond_edges = [] + for cond_edge in error_trace.get_edges(start=edge): + if 'condition' not in cond_edge: + break + cond_edges.append(cond_edge) + + # Do not proceed if there is not continues conditions. + if len(cond_edges) == 1: + continue + + var = None + start_idx = 0 + cond_edges_to_remove = [] + for idx, cond_edge in enumerate(cond_edges): + res = re.search(r'^(.+) ([=!]=)', cond_edge['source']) + + # Start from scratch if meet unexpected format of condition. + if not res: + var = None + continue + + # Do not proceed until first condition matches pattern. + if var is None and res.group(2) != '!=': + continue + + # Begin to collect conditions. + if var is None: + start_idx = idx + var = res.group(1) + continue + # Start from scratch if first expression condition differs. + if var != res.group(1): + var = None + continue + + # Finish to collect conditions. Pattern matches. + if var is not None and res.group(2) == '==': + cond_edges_to_remove.extend(cond_edges[start_idx:idx]) + var = None + continue + + for cond_edge in reversed(cond_edges_to_remove): + if not cond_edge.get('sink'): + cond_edge['sink'] = True + removed_switch_cases_num += 1 + + if removed_switch_cases_num: + logger.debug(f'{removed_switch_cases_num} switch cases were removed') diff --git a/tools/config.json b/tools/config.json index 8e36963..b2fa5c9 100644 --- a/tools/config.json +++ b/tools/config.json @@ -1,27 +1,26 @@ -{ - "default tool path": { - "cil": [ - "frama_c_cil/toplevel.opt", - "cil/obj/x86_LINUX/cilly.asm.exe" - ], - "klever": "klever/core", - "et html": "klever/bridge", - "uploader": "klever/utils/bin/upload-reports.py", - "benchexec": "benchexec/bin", - "cif": "cif/bin" - }, - "cil args": { - "frama_c_cil/toplevel.opt": [ - "-no-autoload-plugins", "-no-findlib", "-machdep", "gcc_x86_64", "-c11", "-kernel-warn-key", - "CERT:MSC:38=active", "-remove-unused-inline-functions", "-remove-unused-static-functions", - "-no-annot", "-no-single-return", "-fold-temp-vars", "-keep-logical-operators", - "-aggressive-merging", "-print", "-print-lines", "-no-print-annot", "-print-cil-as-is", - "-shrink-initializers", "-allow-duplication", "-ocode" - ], - "cil/obj/x86_LINUX/cilly.asm.exe": [ - "--printCilAsIs", "--domakeCFG", "--decil", "--noInsertImplicitCasts", - "--useLogicalOperators", "--ignore-merge-conflicts", "--no-convert-direct-calls", - "--no-convert-field-offsets", "--no-split-structs", "--rmUnusedInlines", "--out" - ] - } +{ + "default tool path": { + "cil": [ + "frama_c_cil/toplevel.opt", + "cil/obj/x86_LINUX/cilly.asm.exe" + ], + "et html": "cvv/web", + "uploader": "cvv/utils/bin/upload-reports.py", + "benchexec": "benchexec/bin", + "cif": "cif/bin" + }, + "cil args": { + "frama_c_cil/toplevel.opt": [ + "-no-autoload-plugins", "-no-findlib", "-machdep", "gcc_x86_64", "-c11", "-kernel-warn-key", + "CERT:MSC:38=active", "-remove-unused-inline-functions", "-remove-unused-static-functions", + "-no-annot", "-no-single-return", "-fold-temp-vars", "-keep-logical-operators", + "-aggressive-merging", "-print", "-print-lines", "-no-print-annot", "-print-cil-as-is", + "-shrink-initializers", "-allow-duplication", "-ocode" + ], + "cil/obj/x86_LINUX/cilly.asm.exe": [ + "--printCilAsIs", "--domakeCFG", "--decil", "--noInsertImplicitCasts", + "--useLogicalOperators", "--ignore-merge-conflicts", "--no-convert-direct-calls", + "--no-convert-field-offsets", "--no-split-structs", "--rmUnusedInlines", "--out" + ] + } } \ No newline at end of file