diff --git a/Dockerfile.cps-spa-detection-tool b/Dockerfile.cps-spa-detection-tool new file mode 100644 index 0000000..7cfc09f --- /dev/null +++ b/Dockerfile.cps-spa-detection-tool @@ -0,0 +1,33 @@ +FROM python:3.9.1 + +RUN adduser --disabled-password --gecos '' user +USER user +RUN mkdir ~/cps-spa-detection-tool + +# set work directory +WORKDIR /home/user/cps-spa-detection-tool + +# Set virtual environment +ENV VIRTUAL_ENV=venv +RUN python3 -m venv $VIRTUAL_ENV +ENV PATH="$VIRTUAL_ENV/bin:$PATH" + +# Install dependencies +RUN python3 -m pip install --upgrade pip virtualenv +RUN pip install --upgrade pip setuptools wheel + +# Copy files required for setting up virtual env +COPY setup.py /home/user/cps-spa-detection-tool +COPY setup.cfg /home/user/cps-spa-detection-tool +COPY requirements.txt /home/user/cps-spa-detection-tool + +# Copy files related to testing and reporting +COPY pyproject.toml /home/user/cps-spa-detection-tool +COPY build_virtual_env.sh /home/user/cps-spa-detection-tool + +# Copy the module +COPY /dt /home/user/cps-spa-detection-tool/dt +COPY /tests /home/user/cps-spa-detection-tool/tests + +RUN pip install -r requirements.txt +RUN pip install --editable /home/user/cps-spa-detection-tool diff --git a/README.md b/README.md new file mode 100644 index 0000000..0efea33 --- /dev/null +++ b/README.md @@ -0,0 +1,21 @@ +# CPS SPA Detection Tool + +## Setup +For portability and replicability of this tool, we use docker. +For easier docker setup, we provide two scripts for building docker image and running the docker container. + +__! Note:__ For Windows, first install (and have it running) **_Docker for Windows_**. Then use **_Git Bash_** to run the following scripts. + +### Docker image setup +Execute the following script for building the docker image: + +`. docker_scripts/build-cps-spa-detection-tool.sh` + +### Docker image container +The script `docker_scripts/run-cps-spa-detection-tool-container.sh` is created for this task. +For running the mining for remote repositories, this script can be executed without any input parameter. +However, to perform the mining process for local repositories, we should pass the directory of local repositories as the input argument: + +`. docker_scripts/run-cps-spa-detection-tool-container.sh [local_repositories]` + +__! Note:__ This input argument should be an absolute path. \ No newline at end of file diff --git a/build_virtual_env.sh b/build_virtual_env.sh new file mode 100644 index 0000000..cc89c81 --- /dev/null +++ b/build_virtual_env.sh @@ -0,0 +1,9 @@ +if [ -d "env" ]; then + rm -rf env +fi + +python3 -m venv env +. env/bin/activate +python3 -m pip install --upgrade pip virtualenv +python3 -m pip install -r requirements.txt +pip install --editable . diff --git a/docker_scripts/build-cps-spa-detection-tool.sh b/docker_scripts/build-cps-spa-detection-tool.sh new file mode 100644 index 0000000..d0b5856 --- /dev/null +++ b/docker_scripts/build-cps-spa-detection-tool.sh @@ -0,0 +1,17 @@ +# Stop and remove the previous docker container +CHECK_CONTAINERS=$(docker container ls | grep 'cps-spa-detection-tool-container') +if [ -n "$CHECK_CONTAINERS" ]; then + echo "Stopping and removing existing container..." + docker stop cps-spa-detection-tool-container > /dev/null + docker rm cps-spa-detection-tool-container > /dev/null +fi + +# Remove previous docker image +CHECK_IMAGES=$(docker images | grep 'cps-spa-detection-tool') +if [ -n "$CHECK_IMAGES" ]; then + docker rmi 'cps-spa-detection-tool' +fi + +# Build the new image from Dockerfile.cps-spa-detection-tool +docker image build -t cps-spa-detection-tool \ +$(pwd) -f Dockerfile.cps-spa-detection-tool diff --git a/docker_scripts/run-cps-spa-detection-tool-container.sh b/docker_scripts/run-cps-spa-detection-tool-container.sh new file mode 100644 index 0000000..3f2c003 --- /dev/null +++ b/docker_scripts/run-cps-spa-detection-tool-container.sh @@ -0,0 +1,27 @@ +# Stop and remove the previous docker container +CHECK_CONTAINERS=$(docker container ls | grep 'cps-spa-detection-tool-container') +if [ -n "$CHECK_CONTAINERS" ]; then + echo "Stopping and removing existing container..." + docker stop cps-spa-detection-tool-container > /dev/null + docker rm cps-spa-detection-tool-container > /dev/null +fi + +# Make results dir if it is needed +if [ ! -d "results" ];then + mkdir results +fi + +# Mount projects directory for local repo analysis +EXTRA_MOUNT="" +if [ -n "$1" ]; then + if [ -d "$1" ]; then + # The input argument should be an absolute path + EXTRA_MOUNT="--mount type=bind,source=$1,target=/home/user/repo-mining/projects" + fi +fi + +# Run a new docker container. +docker run -dit --name cps-spa-detection-tool-container \ +--mount type=bind,source="$(pwd)/results",target=/home/user/repo-mining/results \ +$EXTRA_MOUNT \ +cps-spa-detection-tool diff --git a/dt/__init__.py b/dt/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dt/main.py b/dt/main.py new file mode 100644 index 0000000..5596b44 --- /dev/null +++ b/dt/main.py @@ -0,0 +1,16 @@ +# This is a sample Python script. + +# Press Shift+F10 to execute it or replace it with your code. +# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings. + + +def print_hi(name): + # Use a breakpoint in the code line below to debug your script. + print(f'Hi, {name}') # Press Ctrl+F8 to toggle the breakpoint. + + +# Press the green button in the gutter to run the script. +if __name__ == '__main__': + print_hi('PyCharm') + +# See PyCharm help at https://www.jetbrains.com/help/pycharm/ diff --git a/dt/search_current.py b/dt/search_current.py new file mode 100644 index 0000000..deb0738 --- /dev/null +++ b/dt/search_current.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python +""" +Searching through the diffs of each commit +""" +import os +import re +from chardet.universaldetector import UniversalDetector +import pd.dict_repo_list +from datetime import datetime +from graph_creation import create_graph + + +def dig_for_code(key_project: str, search_for_pattern: str, repo_dictionary: dict) -> int: + """ + Starts the mining process on the repository indicated by the given URL + Through the current state of the repository. Only looking at files with specified extensions. + + Args: + key_project: Project name from the dictionary. + search_for_pattern: Pattern to find in the code to occur. + repo_dictionary: Dictionary of with the project name and local location. + + Returns: + count: How often the keyword occurs in the code of specified project. + """ + url = repo_dictionary[key_project]["local"] + count = 0 + + p = re.compile(search_for_pattern, re.M) + for root, directories, files in os.walk(url): + for name in files: + file = os.path.join(root, name) + + file_name, file_extension = os.path.splitext(file) + # noinspection SpellCheckingInspection + search_in_ext = ['.c', '.cpp', '.h', '.hpp', '.cxx', '.hxx', '.cc', '.hh', '.h++', + '.ipp', '.inl', '.txx', '.tpp', '.tpl', + '.c++m', '.cppm', '.cxxm', '.kt', + '.java', '.go', '.py', '.rb', '.rs', + '.scala', '.sc', '.swift', '.js', '.ts', '.tsx', '.sh'] + + if file_extension.lower() in search_in_ext: + try: + content_file = open(file, 'r') + for line in content_file: + check = re.findall(p, line) + count += len(check) + content_file.close() + except UnicodeDecodeError: + """ + Some files are using an encoding that cannot be immediately read. + Most of these files, seem to be using Windows-1252 encoding. + To keep the duration of this script as short as possible, this encoding will be tried first. + """ + try: + enc = 'Windows-1252' + content_file = open(file, 'r', encoding=enc) + for line in content_file: + check = re.findall(p, line) + count += len(check) + content_file.close() + except UnicodeDecodeError: + """ + When the Windows-1252 encoding is not correct, chardet is being used. + This tool tries to detect which encoding is used. + """ + try: + rd_file = open(file, "rb") + raw_data = rd_file.readlines() + detector = UniversalDetector() + for rd_line in raw_data: + detector.feed(rd_line) + if detector.done: + break + detector.close() + rd_file.close() + if detector.result: + enc = detector.result["encoding"] + if enc: + print(f"encoding: {enc}") + content_file = open(file, 'r', encoding=enc) + for line in content_file: + check = re.findall(p, line) + count += len(check) + content_file.close() + else: + print("No encoding result.") + else: + print("No Result from detector.") + except UnicodeDecodeError: + """ + In case chardet is not able to detect which encoding was used. + """ + print(f"UnicodeDecodeError: {file}") + except Exception as e: + print(f"Different error encountered: {file}, error: {e}") + except Exception as e: + print(f"Different error encountered: {file}, error: {e}") + return count + + +def start_searching(search_for_pattern: str, title_graph: str, search_type: str): + """ + Start the search with received pattern. + + Args: + search_for_pattern: Pattern to search with in this current round. + title_graph: Title connected to the search pattern. + search_type: Searching through the current state of the repository. + """ + data_graph = {} + pd.dict_repo_list.build_repo_dict() + repo_dictionary = pd.dict_repo_list.projects + for key_repo_name in repo_dictionary.keys(): + counted = dig_for_code(key_repo_name, search_for_pattern, repo_dictionary) + print(f"{key_repo_name}: {counted}") + if counted > 0: + data_graph[key_repo_name] = counted + if data_graph: + create_graph(data_graph, title_graph, search_type) + + +def main(): + now = datetime.now() + current_time = now.strftime("%H:%M:%S") + print(f"Start time: {current_time}") + + dict_search_patterns = { + "sleep function": r'^(.*)(sleep\()', + "Sleep function": r'^(.*)(Sleep\()', + "sleep_for": r'^(.*)(sleep_for)', + "setTimeout": r'^(.*)(setTimeout)', + "sleep space": r'^(.*)(sleep" ")', + } + for name in dict_search_patterns: + print(f"Searching: {name}") + start_searching(dict_search_patterns[name], name, "current") + + now = datetime.now() + current_time = now.strftime("%H:%M:%S") + print(f"End time: {current_time}") + + +if __name__ == "__main__": + main() diff --git a/dt/search_selection.py b/dt/search_selection.py new file mode 100644 index 0000000..3b62909 --- /dev/null +++ b/dt/search_selection.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python +""" +Searching trough the diffs of each commit +""" +import re +from pydriller import RepositoryMining +import pd.dict_repo_list +from graph_creation import create_graph +from utils import build_results_path +from utils import list_file_content +import os +import pathlib +from typing import Optional + +"""Change this location if the results should be printed or found elsewhere.""" +location_results_dir = os.path.join(pathlib.Path.home(), "CPS_SPA_Detection_Tool", "results") +location_results_file = os.path.join(location_results_dir, "resultsOutputSelection.txt") +location_repo_results_dir = os.path.join(location_results_dir, "repo") + + +def dig_for_code(key_project: str, search_for_pattern: str, repo_dictionary: dict) -> Optional[int]: + """ + Starts the mining process on the repository indicated by the given URL + Uses list of hashes from previous selection script. + + Args: + key_project: Project name from the dictionary. + search_for_pattern: Pattern to find in the code to occur. + repo_dictionary: Dictionary of with the project name and local location. + + Returns: + count: How often the keyword occurs in the code of specified project. + """ + url = repo_dictionary[key_project]["local"] + hash_file_location = build_results_path(key_project) + + count = 0 + + if hash_file_location: + try: + results_file = open(location_results_file, 'a') + print(f"\nProject: {key_project}", file=results_file) + results_file.close() + except FileNotFoundError: + print("File to print results does not exist.") + + list_of_hashes_project = list_file_content(hash_file_location) + + for each_hash in list_of_hashes_project: + stripped_each_hash = str.rstrip(each_hash) + for commit in RepositoryMining(url, single=stripped_each_hash).traverse_commits(): + reset = True + for m in commit.modifications: + """ Results are for each file in the commit """ + p = re.compile(search_for_pattern, re.M) + check = re.findall(p, m.diff) + if check: + if reset: + try: + results_file = open(location_results_file, 'a') + print(f"\nCommit hash: {commit.hash}", file=results_file) + results_file.close() + except FileNotFoundError: + print("File to print results does not exist.") + reset = False + try: + results_file = open(location_results_file, 'a') + print(f"\nFile name: {m.filename}", file=results_file) + print(check, file=results_file) + print(f"File path new: {m.new_path}, File path old: {m.old_path}", file=results_file) + results_file.close() + except FileNotFoundError: + print("File to print results does not exist.") + + count += len(check) + else: + print(f"No file found for project: {key_project}") + + try: + results_file = open(location_results_file, 'a') + print(f"\nNo file found for project: {key_project}", file=results_file) + results_file.close() + except FileNotFoundError: + print("File to print results does not exist.") + + return None + return count + + +def start_searching(search_for_pattern: str, title_graph: str, search_type: str): + """ + Gather data to generate a graph with the results. + + Args: + search_for_pattern: Pattern to search through the code for. + title_graph: Name for the resulting graph. + search_type: This script is using a selection of commits to search through. + + """ + data_graph = {} + pd.dict_repo_list.build_repo_dict() + repo_dictionary = pd.dict_repo_list.projects + for key_repo_name in repo_dictionary.keys(): + counted = dig_for_code(key_repo_name, search_for_pattern, repo_dictionary) + if counted is not None: + try: + results_file = open(location_results_file, 'a') + print(f"\nNumber of results: {key_repo_name}: {counted}", file=results_file) + results_file.close() + except FileNotFoundError: + print("File to print results does not exist.") + print(f"{key_repo_name}: {counted}") + if counted > 0: + data_graph[key_repo_name] = counted + if data_graph: + create_graph(data_graph, title_graph, search_type) + + +def main(): + """ + Main function; containing a dictionary with the regex search pattern. + Which will be used to search trough the code later. + dictionary = {Name graph : regex pattern} + """ + dict_search_patterns = { + "selection sleep add": r'^(\+)(.*)(sleep\()', + "selection sleep remove": r'^(\-)(.*)(sleep\()', + } + if os.path.exists(location_repo_results_dir): + if os.listdir(location_repo_results_dir): + print("Start analysis.") + try: + results_file = open(location_results_file, 'w') + print(f"Running search_selection.py", file=results_file) + print(f"Patterns: {dict_search_patterns}", file=results_file) + results_file.close() + except OSError as e: + print(f"Error: {location_results_file}, {e.strerror}") + for name in dict_search_patterns: + try: + results_file = open(location_results_file, 'a') + print(f"Searching: {name}", file=results_file) + results_file.close() + except FileNotFoundError: + print("File to print results does not exist.") + print(f"Searching: {name}") + start_searching(dict_search_patterns[name], name, "selection") + else: + print("No files found in the repo directory to analyse.") + else: + print("Directory with repository results does not exist.") + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..b1b0b62 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + +[tool.pytest.ini_options] +minversion = "6.0" +addopts = "-vs" +testpaths = [ + "tests", +] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..bfc32f9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,27 @@ +pydriller==1.15.5 +gitpython==3.1.18 +attrs==20.2.0 +coverage==5.3 +flake8==3.8.3 +importlib-metadata==2.0.0 +iniconfig==1.0.1 +jedi==0.17.2 +mccabe==0.6.1 +packaging==20.4 +parso==0.7.1 +pluggy==0.13.1 +py==1.10.0 +pycodestyle==2.6.0 +pyflakes==2.2.0 +pyparsing==2.4.7 +pytest==6.1.1 +pytest-cov==2.10.1 +pytest-mock==3.3.1 +six==1.15.0 +toml==0.10.1 +zipp==3.4.0 +mutmut==2.1.0 +pdoc3==0.9.2 +numpy==1.20.0 +matplotlib==3.4.2 +chardet==4.0.0 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..f561131 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,18 @@ +[metadata] +name = dt +version = 0.0.1 + +[options] +packages = dt +install_requires = + requests + importlib; python_version == "3.9.1" + +[mutmut] +paths_to_mutate = dt/ +backup = False +tests_dir = dt/ +dict_synonyms = Struct, NamedStruct + +[flake8] +max-line-length = 100 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..a4f49f9 --- /dev/null +++ b/setup.py @@ -0,0 +1,2 @@ +import setuptools +setuptools.setup()