-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 1f11138
Showing
13 changed files
with
480 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
FROM python:3.9.1 | ||
|
||
RUN adduser --disabled-password --gecos '' user | ||
USER user | ||
RUN mkdir ~/cps-spa-detection-tool | ||
|
||
# set work directory | ||
WORKDIR /home/user/cps-spa-detection-tool | ||
|
||
# Set virtual environment | ||
ENV VIRTUAL_ENV=venv | ||
RUN python3 -m venv $VIRTUAL_ENV | ||
ENV PATH="$VIRTUAL_ENV/bin:$PATH" | ||
|
||
# Install dependencies | ||
RUN python3 -m pip install --upgrade pip virtualenv | ||
RUN pip install --upgrade pip setuptools wheel | ||
|
||
# Copy files required for setting up virtual env | ||
COPY setup.py /home/user/cps-spa-detection-tool | ||
COPY setup.cfg /home/user/cps-spa-detection-tool | ||
COPY requirements.txt /home/user/cps-spa-detection-tool | ||
|
||
# Copy files related to testing and reporting | ||
COPY pyproject.toml /home/user/cps-spa-detection-tool | ||
COPY build_virtual_env.sh /home/user/cps-spa-detection-tool | ||
|
||
# Copy the module | ||
COPY /dt /home/user/cps-spa-detection-tool/dt | ||
COPY /tests /home/user/cps-spa-detection-tool/tests | ||
|
||
RUN pip install -r requirements.txt | ||
RUN pip install --editable /home/user/cps-spa-detection-tool |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# CPS SPA Detection Tool | ||
|
||
## Setup | ||
For portability and replicability of this tool, we use docker. | ||
For easier docker setup, we provide two scripts for building docker image and running the docker container. | ||
|
||
__! Note:__ For Windows, first install (and have it running) **_Docker for Windows_**. Then use **_Git Bash_** to run the following scripts. | ||
|
||
### Docker image setup | ||
Execute the following script for building the docker image: | ||
|
||
`. docker_scripts/build-cps-spa-detection-tool.sh` | ||
|
||
### Docker image container | ||
The script `docker_scripts/run-cps-spa-detection-tool-container.sh` is created for this task. | ||
For running the mining for remote repositories, this script can be executed without any input parameter. | ||
However, to perform the mining process for local repositories, we should pass the directory of local repositories as the input argument: | ||
|
||
`. docker_scripts/run-cps-spa-detection-tool-container.sh [local_repositories]` | ||
|
||
__! Note:__ This input argument should be an absolute path. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
if [ -d "env" ]; then | ||
rm -rf env | ||
fi | ||
|
||
python3 -m venv env | ||
. env/bin/activate | ||
python3 -m pip install --upgrade pip virtualenv | ||
python3 -m pip install -r requirements.txt | ||
pip install --editable . |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# Stop and remove the previous docker container | ||
CHECK_CONTAINERS=$(docker container ls | grep 'cps-spa-detection-tool-container') | ||
if [ -n "$CHECK_CONTAINERS" ]; then | ||
echo "Stopping and removing existing container..." | ||
docker stop cps-spa-detection-tool-container > /dev/null | ||
docker rm cps-spa-detection-tool-container > /dev/null | ||
fi | ||
|
||
# Remove previous docker image | ||
CHECK_IMAGES=$(docker images | grep 'cps-spa-detection-tool') | ||
if [ -n "$CHECK_IMAGES" ]; then | ||
docker rmi 'cps-spa-detection-tool' | ||
fi | ||
|
||
# Build the new image from Dockerfile.cps-spa-detection-tool | ||
docker image build -t cps-spa-detection-tool \ | ||
$(pwd) -f Dockerfile.cps-spa-detection-tool |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# Stop and remove the previous docker container | ||
CHECK_CONTAINERS=$(docker container ls | grep 'cps-spa-detection-tool-container') | ||
if [ -n "$CHECK_CONTAINERS" ]; then | ||
echo "Stopping and removing existing container..." | ||
docker stop cps-spa-detection-tool-container > /dev/null | ||
docker rm cps-spa-detection-tool-container > /dev/null | ||
fi | ||
|
||
# Make results dir if it is needed | ||
if [ ! -d "results" ];then | ||
mkdir results | ||
fi | ||
|
||
# Mount projects directory for local repo analysis | ||
EXTRA_MOUNT="" | ||
if [ -n "$1" ]; then | ||
if [ -d "$1" ]; then | ||
# The input argument should be an absolute path | ||
EXTRA_MOUNT="--mount type=bind,source=$1,target=/home/user/repo-mining/projects" | ||
fi | ||
fi | ||
|
||
# Run a new docker container. | ||
docker run -dit --name cps-spa-detection-tool-container \ | ||
--mount type=bind,source="$(pwd)/results",target=/home/user/repo-mining/results \ | ||
$EXTRA_MOUNT \ | ||
cps-spa-detection-tool |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# This is a sample Python script. | ||
|
||
# Press Shift+F10 to execute it or replace it with your code. | ||
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings. | ||
|
||
|
||
def print_hi(name): | ||
# Use a breakpoint in the code line below to debug your script. | ||
print(f'Hi, {name}') # Press Ctrl+F8 to toggle the breakpoint. | ||
|
||
|
||
# Press the green button in the gutter to run the script. | ||
if __name__ == '__main__': | ||
print_hi('PyCharm') | ||
|
||
# See PyCharm help at https://www.jetbrains.com/help/pycharm/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
#!/usr/bin/env python | ||
""" | ||
Searching through the diffs of each commit | ||
""" | ||
import os | ||
import re | ||
from chardet.universaldetector import UniversalDetector | ||
import pd.dict_repo_list | ||
from datetime import datetime | ||
from graph_creation import create_graph | ||
|
||
|
||
def dig_for_code(key_project: str, search_for_pattern: str, repo_dictionary: dict) -> int: | ||
""" | ||
Starts the mining process on the repository indicated by the given URL | ||
Through the current state of the repository. Only looking at files with specified extensions. | ||
Args: | ||
key_project: Project name from the dictionary. | ||
search_for_pattern: Pattern to find in the code to occur. | ||
repo_dictionary: Dictionary of with the project name and local location. | ||
Returns: | ||
count: How often the keyword occurs in the code of specified project. | ||
""" | ||
url = repo_dictionary[key_project]["local"] | ||
count = 0 | ||
|
||
p = re.compile(search_for_pattern, re.M) | ||
for root, directories, files in os.walk(url): | ||
for name in files: | ||
file = os.path.join(root, name) | ||
|
||
file_name, file_extension = os.path.splitext(file) | ||
# noinspection SpellCheckingInspection | ||
search_in_ext = ['.c', '.cpp', '.h', '.hpp', '.cxx', '.hxx', '.cc', '.hh', '.h++', | ||
'.ipp', '.inl', '.txx', '.tpp', '.tpl', | ||
'.c++m', '.cppm', '.cxxm', '.kt', | ||
'.java', '.go', '.py', '.rb', '.rs', | ||
'.scala', '.sc', '.swift', '.js', '.ts', '.tsx', '.sh'] | ||
|
||
if file_extension.lower() in search_in_ext: | ||
try: | ||
content_file = open(file, 'r') | ||
for line in content_file: | ||
check = re.findall(p, line) | ||
count += len(check) | ||
content_file.close() | ||
except UnicodeDecodeError: | ||
""" | ||
Some files are using an encoding that cannot be immediately read. | ||
Most of these files, seem to be using Windows-1252 encoding. | ||
To keep the duration of this script as short as possible, this encoding will be tried first. | ||
""" | ||
try: | ||
enc = 'Windows-1252' | ||
content_file = open(file, 'r', encoding=enc) | ||
for line in content_file: | ||
check = re.findall(p, line) | ||
count += len(check) | ||
content_file.close() | ||
except UnicodeDecodeError: | ||
""" | ||
When the Windows-1252 encoding is not correct, chardet is being used. | ||
This tool tries to detect which encoding is used. | ||
""" | ||
try: | ||
rd_file = open(file, "rb") | ||
raw_data = rd_file.readlines() | ||
detector = UniversalDetector() | ||
for rd_line in raw_data: | ||
detector.feed(rd_line) | ||
if detector.done: | ||
break | ||
detector.close() | ||
rd_file.close() | ||
if detector.result: | ||
enc = detector.result["encoding"] | ||
if enc: | ||
print(f"encoding: {enc}") | ||
content_file = open(file, 'r', encoding=enc) | ||
for line in content_file: | ||
check = re.findall(p, line) | ||
count += len(check) | ||
content_file.close() | ||
else: | ||
print("No encoding result.") | ||
else: | ||
print("No Result from detector.") | ||
except UnicodeDecodeError: | ||
""" | ||
In case chardet is not able to detect which encoding was used. | ||
""" | ||
print(f"UnicodeDecodeError: {file}") | ||
except Exception as e: | ||
print(f"Different error encountered: {file}, error: {e}") | ||
except Exception as e: | ||
print(f"Different error encountered: {file}, error: {e}") | ||
return count | ||
|
||
|
||
def start_searching(search_for_pattern: str, title_graph: str, search_type: str): | ||
""" | ||
Start the search with received pattern. | ||
Args: | ||
search_for_pattern: Pattern to search with in this current round. | ||
title_graph: Title connected to the search pattern. | ||
search_type: Searching through the current state of the repository. | ||
""" | ||
data_graph = {} | ||
pd.dict_repo_list.build_repo_dict() | ||
repo_dictionary = pd.dict_repo_list.projects | ||
for key_repo_name in repo_dictionary.keys(): | ||
counted = dig_for_code(key_repo_name, search_for_pattern, repo_dictionary) | ||
print(f"{key_repo_name}: {counted}") | ||
if counted > 0: | ||
data_graph[key_repo_name] = counted | ||
if data_graph: | ||
create_graph(data_graph, title_graph, search_type) | ||
|
||
|
||
def main(): | ||
now = datetime.now() | ||
current_time = now.strftime("%H:%M:%S") | ||
print(f"Start time: {current_time}") | ||
|
||
dict_search_patterns = { | ||
"sleep function": r'^(.*)(sleep\()', | ||
"Sleep function": r'^(.*)(Sleep\()', | ||
"sleep_for": r'^(.*)(sleep_for)', | ||
"setTimeout": r'^(.*)(setTimeout)', | ||
"sleep space": r'^(.*)(sleep" ")', | ||
} | ||
for name in dict_search_patterns: | ||
print(f"Searching: {name}") | ||
start_searching(dict_search_patterns[name], name, "current") | ||
|
||
now = datetime.now() | ||
current_time = now.strftime("%H:%M:%S") | ||
print(f"End time: {current_time}") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.