Merge pull request #978 from bruntib/basename_regex

Local compare mode to multiple runs
Ericsson · Sep 29, 2017 · 3bcd9ca · 3bcd9ca
2 parents af7376e + 7ecc74f
commit 3bcd9ca
Show file tree

Hide file tree

Showing 4 changed files with 104 additions and 57 deletions.
diff --git a/docs/user_guide.md b/docs/user_guide.md
@@ -929,7 +929,7 @@ The `--sqlite` (or `--postgresql` and the various `--db-` arguments) can be
 used to specify where the database, containing the analysis reports is.
 
 `--config-directory` specifies where the server configuration files, such as
-[authentication config](docs/authentication.md) is. For example, one can start
+[authentication config](authentication.md) is. For example, one can start
 two servers with two different product layout, but with the same authorisation
 configuration:
 
@@ -945,7 +945,7 @@ resides the `config.sqlite` file, containing the product configuration.
 If the server is started in `--sqlite` mode and fresh, that is, no product
 configuration file is found, a product named `Default`, using `Default.sqlite`
 in the configuration directory is automatically created. Please see
-[Product management](docs/products.md) for details on how to configure products.
+[Product management](products.md) for details on how to configure products.
 
 ### Master superuser and authentication forcing
 
@@ -1137,7 +1137,10 @@ optional arguments:
   -b BASE_RUN, --basename BASE_RUN
                         The 'base' (left) side of the difference: this
                         analysis run is used as the initial state in the
-                        comparison.
+                        comparison. The basename can be a Python regex which
+                        is meant to cover the whole run name. So if you have
+                        run_1_a_name, run_2_b_name and run_2_c_name then
+                        run_.*_[ab]_name selects the first two.
   -n NEW_RUN, --newname NEW_RUN
                         The 'new' (right) side of the difference: this
                         analysis run is compared to the -b/--basename run.
@@ -1179,6 +1182,11 @@ be compared to each other:
 CodeChecker cmd diff -p 8001 --basename my_project --newname my_new_checkin --new
 ~~~~~~~~~~~~~~~~~~~~~
 
+There is opportunity to compare a run to multiple baselines. You can simply
+provide a regular expression by `-b` flag which covers the required run names.
+The Python regex syntax has to be used:
+https://docs.python.org/2/library/re.html#regular-expression-syntax.
+
 ### Show summarised count of results (`sum`)
 
 ~~~~~~~~~~~~~~~~~~~~~
@@ -1279,7 +1287,7 @@ the database on the server.
 
 ### Manage product configuration of a server (`products`)
 
-Please see [Product management](docs/products.md) for details.
+Please see [Product management](products.md) for details.
 
 ### Authenticate to the server (`login`)
 
@@ -1305,7 +1313,7 @@ common arguments:
                         Set verbosity level. (default: info)
 ~~~~~~~~~~~~~~~~~~~~~
 
-If a server [requires privileged access](docs/authentication.md), you must
+If a server [requires privileged access](authentication.md), you must
 log in before you can access the data on the particular server. Once
 authenticated, your session is available for some time and `CodeChecker cmd`
 can be used normally.

diff --git a/libcodechecker/cmd/cmd_line_client.py b/libcodechecker/cmd/cmd_line_client.py
@@ -8,6 +8,7 @@
 from datetime import datetime
 import json
 import os
+import re
 import sys
 
 from codeCheckerDBAccess_v6 import constants, ttypes
@@ -33,25 +34,14 @@ def default(self, obj):
         return d
 
 
-def get_run_ids(client):
-    """
-    Returns a map for run names and run_ids.
-    """
-
-    runs = client.getRunData(None)
-
-    run_data = {}
-    for run in runs:
-        run_data[run.name] = (run.runId, run.runDate)
-
-    return run_data
-
-
 def check_run_names(client, check_names):
     """
-    Check if the given names are valid runs on the server.
+    Check if the given names are valid runs on the server. If any of the names
+    is not found then the script finishes. Otherwise a dictionary returns which
+    maps run names to runs. The dictionary contains only the runs in
+    check_names or all runs if check_names is empty or None.
     """
-    run_info = get_run_ids(client)
+    run_info = {run.name: run for run in client.getRunData(None)}
 
     if not check_names:
         return run_info
@@ -128,7 +118,7 @@ def handle_list_results(args):
 
     run_info = check_run_names(client, [args.name])
 
-    run_id, _ = run_info.get(args.name)
+    run = run_info.get(args.name)
 
     limit = constants.MAX_QUERY_SIZE
     offset = 0
@@ -138,13 +128,13 @@ def handle_list_results(args):
     add_filter_conditions(report_filter, args.filter)
 
     all_results = []
-    results = client.getRunResults([run_id], limit, offset, None,
+    results = client.getRunResults([run.runId], limit, offset, None,
                                    report_filter, None)
 
     while results:
         all_results.extend(results)
         offset += limit
-        results = client.getRunResults([run_id], limit, offset, None,
+        results = client.getRunResults([run.runId], limit, offset, None,
                                        report_filter, None)
 
     if args.output_format == 'json':
@@ -174,7 +164,7 @@ def handle_list_results(args):
 
 def handle_diff_results(args):
 
-    def getDiffResults(client, baseid, cmp_data):
+    def getDiffResults(client, baseids, cmp_data):
 
         report_filter = ttypes.ReportFilter()
         add_filter_conditions(report_filter, args.filter)
@@ -185,7 +175,7 @@ def getDiffResults(client, baseid, cmp_data):
         offset = 0
 
         all_results = []
-        results = client.getRunResults([baseid],
+        results = client.getRunResults(baseids,
                                        limit,
                                        offset,
                                        sort_mode,
@@ -195,7 +185,7 @@ def getDiffResults(client, baseid, cmp_data):
         while results:
             all_results.extend(results)
             offset += limit
-            results = client.getRunResults([baseid],
+            results = client.getRunResults(baseids,
                                            limit,
                                            offset,
                                            sort_mode,
@@ -237,7 +227,7 @@ def getLineFromRemoteFile(client, fid, lineno):
         lines = base64.b64decode(source.fileContent).split('\n')
         return "" if len(lines) < lineno else lines[lineno - 1]
 
-    def getDiffReportDir(client, baseid, report_dir, diff_type):
+    def getDiffReportDir(client, baseids, report_dir, diff_type):
 
         report_filter = ttypes.ReportFilter()
         add_filter_conditions(report_filter, args.filter)
@@ -249,7 +239,7 @@ def getDiffReportDir(client, baseid, report_dir, diff_type):
         offset = 0
 
         base_results = []
-        results = client.getRunResults([baseid],
+        results = client.getRunResults(baseids,
                                        limit,
                                        offset,
                                        sort_mode,
@@ -258,7 +248,7 @@ def getDiffReportDir(client, baseid, report_dir, diff_type):
         while results:
             base_results.extend(results)
             offset += limit
-            results = client.getRunResults([baseid],
+            results = client.getRunResults(baseids,
                                            limit,
                                            offset,
                                            sort_mode,
@@ -367,24 +357,37 @@ def printReports(client, reports, output_format):
 
     report_dir_mode = False
     if os.path.isdir(args.newname):
-        # If newname is a valid directory
-        # we assume that it is a report dir
-        # and we are in local compare mode.
-        run_info = check_run_names(client, [args.basename])
+        # If newname is a valid directory we assume that it is a report dir and
+        # we are in local compare mode.
         report_dir_mode = True
     else:
-        run_info = check_run_names(client, [args.basename, args.newname])
-        newid = run_info[args.newname][0]
+        run_info = check_run_names(client, [args.newname])
+        newid = run_info[args.newname].runId
+
+    try:
+        basename_regex = '^' + args.basename + '$'
+        base_runs = filter(lambda run: re.match(basename_regex, run.name),
+                           client.getRunData(None))
+        base_ids = map(lambda run: run.runId, base_runs)
+    except re.error:
+        LOG.error('Invalid regex format in ' + args.basename)
+        sys.exit(1)
+
+    if len(base_ids) == 0:
+        LOG.warning("No run names match the given pattern: " + args.basename)
+        sys.exit(1)
+
+    LOG.info("Matching against runs: " +
+             ', '.join(map(lambda run: run.name, base_runs)))
 
-    baseid = run_info[args.basename][0]
     results = []
     if report_dir_mode:
         diff_type = 'new'
         if 'unresolved' in args:
             diff_type = 'unresolved'
         elif 'resolved' in args:
             diff_type = 'resolved'
-        results = getDiffReportDir(client, baseid,
+        results = getDiffReportDir(client, base_ids,
                                    os.path.abspath(args.newname),
                                    diff_type)
     else:
@@ -396,7 +399,7 @@ def printReports(client, reports, output_format):
         elif 'resolved' in args:
             cmp_data.diffType = ttypes.DiffType.RESOLVED
 
-        results = getDiffResults(client, baseid, cmp_data)
+        results = getDiffResults(client, base_ids, cmp_data)
 
     printReports(client, results, args.output_format)
 
@@ -417,14 +420,11 @@ def checkerCount(dict, key):
     client = setup_client(args.product_url)
 
     if 'all_results' in args:
-        items = check_run_names(client, None).items()
+        items = check_run_names(client, None)
     else:
-        items = []
-        run_info = check_run_names(client, args.names)
-        for name in args.names:
-            items.append((name, run_info.get(name)))
+        items = check_run_names(client, args.names)
 
-    run_ids = [item[1][0] for item in items]
+    run_ids = map(lambda _, run: run.runId, items)
 
     all_checkers_report_filter = ttypes.ReportFilter()
     all_checkers = client.getCheckerCounts(run_ids, all_checkers_report_filter,
@@ -494,20 +494,20 @@ def is_later(d1, d2):
 
         return d1 > d2
 
-    run_info = get_run_ids(client)
-
     if 'name' in args:
         check_run_names(client, args.name)
 
         def condition(name, runid, date):
             return name in args.name
-    elif 'all_after_run' in args and args.all_after_run in run_info:
-        run_date = run_info[args.all_after_run][1]
+    elif 'all_after_run' in args:
+        run_info = check_run_names(client, [args.all_after_run])
+        run_date = run_info[args.all_after_run].runDate
 
         def condition(name, runid, date):
             return is_later(date, run_date)
-    elif 'all_before_run' in args and args.all_before_run in run_info:
-        run_date = run_info[args.all_before_run][1]
+    elif 'all_before_run' in args:
+        run_info = check_run_names(client, [args.all_before_run])
+        run_date = run_info[args.all_before_run].runDate
 
         def condition(name, runid, date):
             return is_later(run_date, date)
@@ -521,9 +521,9 @@ def condition(name, runid, date):
         def condition(name, runid, date):
             return False
 
-    client.removeRunResults([runid for (name, (runid, date))
-                            in run_info.items()
-                            if condition(name, runid, date)])
+    client.removeRunResults([run.runId for run
+                            in client.getRunData(None)
+                            if condition(run.name, run.runId, run.runDate)])
 
     LOG.info("Done.")
 
@@ -540,14 +540,14 @@ def bug_hash_filter(bug_id, filepath):
     client = setup_client(args.product_url)
 
     run_info = check_run_names(client, [args.name])
-    run_id, _ = run_info.get(args.name)
+    run = run_info.get(args.name)
 
     if 'input' in args:
         with open(args.input) as supp_file:
             suppress_data = suppress_file_handler.get_suppress_data(supp_file)
 
         for bug_id, file_name, comment in suppress_data:
-            reports = client.getRunResults([run_id], limit, 0, None,
+            reports = client.getRunResults([run.runId], limit, 0, None,
                                            bug_hash_filter(bug_id, file_name),
                                            None)
 

diff --git a/libcodechecker/libhandlers/cmd.py b/libcodechecker/libhandlers/cmd.py
@@ -165,7 +165,11 @@ def __register_diff(parser):
                         default=argparse.SUPPRESS,
                         help="The 'base' (left) side of the difference: this "
                              "analysis run is used as the initial state in "
-                             "the comparison.")
+                             "the comparison. The basename can be a Python "
+                             "regex which is meant to cover the whole run "
+                             "name. So if you have run_1_a_name, run_2_b_name "
+                             "and run_2_c_name then run_.*_[ab]_name selects "
+                             "the first two.")
 
     parser.add_argument('-n', '--newname',
                         type=str,

diff --git a/tests/functional/diff/test_diff.py b/tests/functional/diff/test_diff.py
@@ -561,3 +561,38 @@ def test_local_compare_res_count_unresovled(self):
         self.assertEqual(count, 5)
         count = len(re.findall(r'\[unix\.Malloc\]', out))
         self.assertEqual(count, 1)
+
+    def test_local_compare_res_count_unresovled_regex(self):
+        """
+        Count the unresolved results with no filter in local compare mode.
+        """
+        base_run_name = self._run_names[0]
+
+        # Change test_files_blablabla to test_.*_blablabla
+        base_run_name = base_run_name.replace('files', '.*')
+
+        diff_cmd = [self._codechecker_cmd, "cmd", "diff",
+                    "--unresolved",
+                    "--url", self._url,
+                    "-b", base_run_name,
+                    "-n", self._report_dir
+                    ]
+        print(diff_cmd)
+        process = subprocess.Popen(
+            diff_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+            env=self._test_config['codechecker_cfg']['check_env'],
+            cwd=os.environ['TEST_WORKSPACE'])
+        out, err = process.communicate()
+        print(out+err)
+
+        # # 3 disappeared core.StackAddressEscape issues
+        count = len(re.findall(r'\[core\.DivideZero\]', out))
+        self.assertEqual(count, 10)
+        count = len(re.findall(r'\[deadcode\.DeadStores\]', out))
+        self.assertEqual(count, 6)
+        count = len(re.findall(r'\[core\.NullDereference\]', out))
+        self.assertEqual(count, 4)
+        count = len(re.findall(r'\[cplusplus\.NewDelete\]', out))
+        self.assertEqual(count, 5)
+        count = len(re.findall(r'\[unix\.Malloc\]', out))
+        self.assertEqual(count, 1)