Skip to content

Commit

Permalink
Add intermediate reports in cve binary tool to merge outputs from dif…
Browse files Browse the repository at this point in the history
…ferent sources (intel#1169)

* add --append flag in cli

* refactor -a -append code

* Take optional intermediate report path argument

* add --tag paramter for intermediate report

* add tag arg in test_output

* add merge argument for intermediate reports

* refactor code and autoremove temporary generated merged

* reformat cli.py

* update prefix for intermediate reports

* Add tests for MergeReports
  • Loading branch information
imsahil007 authored Jun 23, 2021
1 parent 1099187 commit 63f1c51
Show file tree
Hide file tree
Showing 12 changed files with 697 additions and 14 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/pythonapp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ jobs:
test/test_extractor.py
test/test_condensed_downloads.py
test/test_package_list_parser.py
test/test_merge.py
long_tests:
name: Long tests on python3.8
Expand Down Expand Up @@ -214,6 +215,7 @@ jobs:
test/test_util.py
test/test_condensed_downloads.py
test/test_package_list_parser.py
test/test_merge.py
- name: Run Synchronous test
run: >
pytest -v --cov --cov-append --cov-report=xml
Expand Down Expand Up @@ -281,6 +283,7 @@ jobs:
test/test_util.py
test/test_condensed_downloads.py
test/test_package_list_parser.py
test/test_merge.py
- name: Run Synchronous test
run: >
pytest -v
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,6 @@ doc/_build
test/downloads/
cve_bin_tool_requirements.csv
!test/condensed-downloads/*.tar.gz
intermediate*.json


45 changes: 45 additions & 0 deletions cve_bin_tool/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
)
from cve_bin_tool.input_engine import InputEngine, TriageData
from cve_bin_tool.log import LOGGER
from cve_bin_tool.merge import MergeReports
from cve_bin_tool.output_engine import OutputEngine
from cve_bin_tool.package_list_parser import PackageListParser
from cve_bin_tool.util import ProductInfo
Expand Down Expand Up @@ -143,6 +144,19 @@ def main(argv=None):
choices=["low", "medium", "high", "critical"],
help="minimum CVE severity to report (default: low)",
)
output_group.add_argument(
"-a",
"--append",
nargs="?",
const=True,
help="save output as intermediate report in json format",
)
output_group.add_argument(
"-t",
"--tag",
action="store",
help="add a unique tag to differentiate between multiple intermediate reports",
)
parser.add_argument("-V", "--version", action="version", version=VERSION)
parser.add_argument(
"-u",
Expand All @@ -162,6 +176,12 @@ def main(argv=None):
action="store_true",
help="skips checking for a new version",
)
parser.add_argument(
"-m",
"--merge",
action=StringToListAction,
help="comma separated intermediate reports path for merging",
)

checker_group = parser.add_argument_group("Checkers")
checker_group.add_argument(
Expand Down Expand Up @@ -197,6 +217,9 @@ def main(argv=None):
"output_file": "",
"html_theme": "",
"package_list": "",
"append": False,
"tag": "",
"merge": None,
}

with ErrorHandler(mode=ErrorMode.NoTrace):
Expand Down Expand Up @@ -233,6 +256,17 @@ def main(argv=None):
**********************************************
"""
LOGGER.warning(warning_nolinux)
if args["merge"]:
LOGGER.info(
"You can use -f --format and -o --output-file for saving merged intermediate reports in a file"
)
merged_cves = MergeReports(merge_files=args["merge"])
if args["input_file"]:
LOGGER.warning(
"Ignoring -i --input-file while merging intermediate reports"
)
args["input_file"] = merged_cves.merge_reports()
# Creates a Object for OutputEngine

# Database update related settings
# Connect to the database
Expand Down Expand Up @@ -276,6 +310,12 @@ def main(argv=None):
"Please specify a directory to scan or an input file required"
)

# Output validation
if not args["append"] and args["tag"]:
LOGGER.warning(
f"Please specify -a --append to generate intermediate reports while using -t --tag"
)

if args["directory"] and not os.path.exists(args["directory"]):
parser.print_usage()
with ErrorHandler(logger=LOGGER, mode=ErrorMode.NoTrace):
Expand Down Expand Up @@ -374,10 +414,15 @@ def main(argv=None):
filename=args["output_file"],
themes_dir=args["html_theme"],
time_of_last_update=cvedb_orig.time_of_last_update,
tag=args["tag"],
products_with_cve=cve_scanner.products_with_cve,
products_without_cve=cve_scanner.products_without_cve,
total_files=total_files,
append=args["append"],
)
if args["merge"] and args["input_file"]:
# remove the merged json from .cache
os.remove(args["input_file"])

if not args["quiet"]:
output.output_file(args["format"])
Expand Down
5 changes: 5 additions & 0 deletions cve_bin_tool/error_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ class InvalidJsonError(Exception):
"""Given File is an Invalid JSON"""


class InvalidIntermediateJsonError(Exception):
"""Given Intermediate File is not in valid Format"""


class EmptyCache(Exception):
"""
Raised when NVD is opened when verify=False and there are no files in the
Expand Down Expand Up @@ -177,4 +181,5 @@ def __exit__(self, exc_type, exc_val, exc_tb):
CVEDataMissing: -15,
InvalidCheckerError: -16,
NVDRateLimit: -17,
InvalidIntermediateJsonError: -18,
}
159 changes: 159 additions & 0 deletions cve_bin_tool/merge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# Copyright (C) 2021 Intel Corporation
# SPDX-License-Identifier: GPL-3.0-or-later

import json
import os
from datetime import datetime
from logging import Logger
from typing import Dict, List

from .cvedb import DISK_LOCATION_DEFAULT
from .error_handler import (
ErrorHandler,
ErrorMode,
InvalidIntermediateJsonError,
InvalidJsonError,
MissingFieldsError,
)
from .log import LOGGER
from .util import DirWalk

REQUIRED_INTERMEDIATE_METADATA = {
"scanned_dir",
"total_files",
"products_without_cve",
"products_with_cve",
"tag",
"timestamp",
}


class MergeReports:
def __init__(
self,
merge_files: List[str],
logger: Logger = None,
error_mode=ErrorMode.TruncTrace,
cache_dir=DISK_LOCATION_DEFAULT,
):
self.logger = logger or LOGGER.getChild(self.__class__.__name__)
self.merge_files = merge_files
self.all_cve_data = []
self.file_stack = []
self.error_mode = error_mode
self.total_inter_files = 0
self.total_files = 0
self.products_with_cve = 0
self.products_without_cve = 0
self.cache_dir = cache_dir

self.walker = DirWalk(
pattern=";".join(
file_path if file_path.endswith(".json") else file_path + "*.json"
for file_path in self.merge_files
),
yield_files=True,
).walk

def recursive_scan(self, merge_files):
"""Recursive scan all json in a directory/regex path"""
for intermediate_path in merge_files:
if os.path.isdir(intermediate_path):
for filepath in self.walker([intermediate_path]):
self.file_stack.append(filepath)
yield filepath
self.file_stack.pop()
elif os.path.isfile(intermediate_path) and not os.path.islink(
intermediate_path
):
self.file_stack.append(intermediate_path)
yield intermediate_path
self.file_stack.pop()

def scan_intermediate_file(self, filename):
"""Reads intermediate json file through filename and verify missing fields"""
self.logger.info(f"Loading file: {filename}")

missing_fields = set()
with open(filename) as json_file:
json_file = json_file.read()
inter_data = json.loads(json_file)
if not inter_data or not isinstance(inter_data, dict):
with ErrorHandler(mode=self.error_mode):
raise InvalidJsonError(filename)

required_fields = set({"metadata", "report"})
missing_fields = required_fields - set(inter_data.keys())

if missing_fields == set():
if isinstance(inter_data["metadata"], dict):

missing_fields = set(REQUIRED_INTERMEDIATE_METADATA) - set(
inter_data["metadata"].keys()
)
if missing_fields == set():

if isinstance(inter_data["report"], list):
self.logger.info(
f"Adding data from {os.path.basename(filename)} with timestamp {inter_data['metadata']['timestamp']}"
)
self.total_inter_files += 1
return inter_data

if missing_fields != set():
with ErrorHandler(mode=self.error_mode):
raise MissingFieldsError(f"{missing_fields} are required fields")

with ErrorHandler(mode=self.error_mode):
raise InvalidIntermediateJsonError(filename)

def merge_reports(self):
"""Merge valid intermediate dictionaries"""

for inter_file in self.recursive_scan(self.merge_files):
# Remove duplicate paths from cve-entries
self.all_cve_data.append(self.scan_intermediate_file(inter_file))

if self.all_cve_data:
self.all_cve_data = self.remove_intermediate_duplicates()
merged_file_path = self.save_merged_intermediate()
return merged_file_path

self.logger.error("No valid Intermediate reports found!")
return ""

def save_merged_intermediate(self):
"""Save a temporary merged report in .cache/cve-bin-tool"""

if not os.path.isdir(self.cache_dir):
os.makedirs(self.cache_dir)

now = datetime.now().strftime("%Y-%m-%d.%H-%M-%S")
filename = os.path.join(self.cache_dir, f"merged-{now}.json")
with open(filename, "w") as f:
json.dump(self.all_cve_data, f, indent=" ")

return filename

def remove_intermediate_duplicates(self) -> List[Dict[str, str]]:
"""Returns a list of dictionary with same format as cve-bin-tool json output"""

output = {}
for inter_data in self.all_cve_data:
self.products_with_cve += inter_data["metadata"]["products_with_cve"]
self.products_without_cve += inter_data["metadata"]["products_without_cve"]
for cve in inter_data["report"]:
if cve["cve_number"] != "UNKNOWN":
if cve["cve_number"] not in output:
output[cve["cve_number"]] = cve
self.total_files += len(cve["paths"].split(","))
else:
path_list = output[cve["cve_number"]]["paths"].split(",")
self.total_files -= len(path_list)
path_list.extend(cve["paths"].split(","))
# remove duplicate paths(if any)
path_list = list(set(path_list))
self.total_files += len(path_list)
output[cve["cve_number"]]["path"] = path_list

return list(output.values())
Loading

0 comments on commit 63f1c51

Please sign in to comment.