From b5265607b40657b24e2f1d057c2715e8fd54ebac Mon Sep 17 00:00:00 2001 From: Daniel Krupp Date: Wed, 4 Oct 2017 15:43:28 +0200 Subject: [PATCH] Source language set to c++ when g++ is called. ld-logger logs the full path of the compiler (insted of only basename) so that compiler settings auto-detection can be done by the analyze even if the compiler is not in the PATH when analyze is called. Include path auto-detection is switched off from ld-logger and always done by the analyze sub-command. --add-compiler-defaults flag gets deprecated as auto-detection is always performed (even if it is not set). Fix logger test Since `--add-compiler-defaults` is forced, the `--target=...` flag also becomes the part of the build command when analyzing. If clang is used as the compiler for the project, compilation target is autodetected also, this way the handling becoming identical to gcc. build-logger got documented. HOWTO is extended with incremental analsysis description. We removed cross-compilation description from the HOWTO as it is done automatically. It is not recommended anymore to set in in the saargs and tidy-args files. --- docs/cross-compilation.md | 67 -------------------- docs/usage.md | 48 ++++++++++++-- docs/user_guide.md | 1 + libcodechecker/analyze/log_parser.py | 24 +++---- libcodechecker/libhandlers/analyze.py | 3 +- libcodechecker/libhandlers/check.py | 3 +- libcodechecker/log/option_parser.py | 19 ++++++ tests/unit/test_log_parser.py | 15 +++-- vendor/build-logger/README.md | 69 +++++++++++++++++++++ vendor/build-logger/src/ldlogger-tool-gcc.c | 43 ++++++++++++- 10 files changed, 199 insertions(+), 93 deletions(-) delete mode 100644 docs/cross-compilation.md create mode 100644 vendor/build-logger/README.md diff --git a/docs/cross-compilation.md b/docs/cross-compilation.md deleted file mode 100644 index c6a4728b1a..0000000000 --- a/docs/cross-compilation.md +++ /dev/null @@ -1,67 +0,0 @@ -# How to analyze a project that contains cross-compilation - -It may happen that due to special gcc build-time configuration (see https://gcc.gnu.org/onlinedocs/libstdc++/manual/configure.html) -clang used by CodeChecker cannot do the analysis due different configuration. -This is especially the case when gcc is used for **cross-compilation**. - -**In case of cross-compilation always use the same target for clang analysis as you use in the gcc build.** - -The list of targets supported by clang is described -[here](http://llvm.org/doxygen/Triple_8h_source.html). - -A file (`commonhandler.cpp`) for example compiles like this to powerpc target: -``` -powerpc64-linux-g++ -c -o /home//mylib_ppc64_linux/commonhandler.cpp -``` -when we try to analyze this file with Clang Static Analyzer with the same command line options like -``` - CodeChecker check -b "powerpc64-linux-g++ -c -o /home//mylib_ppc64_linux/commonhandler.cpp" --analyzers clangsa --verbose debug -``` -we get the following error: -``` - .../platform_ifc/RTLIB.h:42: - /proj/.../inc/myprod_te_lib.h:60:10: fatal error: 'lib.h' file not found - #include /* Original lib.h */ - 1 error generated. -``` -Why? Because `powerpc64-linux-g++` is an product specific cross compiler and has some compiled in include paths, and defines, while clang, -that was used by CodeChecker is configured to compile to x86_64 Linux target. - -**Solution** -You can pass any parameters to clang using `--saargs` parameter. - -For example the error above can be solved for Clang Static Analyzer analysis like - -``` - CodeChecker check -b "powerpc64-linux-g++ -c -o - /home//mylib_ppc64_linux/commonhandler.cpp" --analyzers clangsa --saargs ./saargs.txt -``` - -where -``` - saargs.txt: - - -target ppc64 - -I/proj/platform/linux/sdk_install/sysroots/ppc64-linux/usr/include/c++/5.2.0 - -I/proj/platform/linux/sdk_install/sysroots/ppc64-linux/usr/include/c++/5.2.0/powerpc64-wrs-linux - -I/proj/platform/linux/sdk_install/sysroots/ppc64-linux/usr/include/c++/5.2.0/backward - -I/proj/platform/linux/sdk_install/sysroots/ppc64-linux/usr/include -``` -similarly create the same configuration for clang-tidy (with a littlebit different syntax): -``` -tidyargs.txt: - - -extra-arg="-target" -extra-arg="ppc64" - -extra-arg="-I/proj/platform/linux/sdk_install/sysroots/ppc64-linux/usr/include/c++/5.2.0" - -extra-arg="-I/proj/platform/linux/sdk_install/sysroots/ppc64-linux/usr/include/c++/5.2.0/powerpc64-wrs-linux" - -extra-arg="-I/proj/platform/linux/sdk_install/sysroots/ppc64-linux/usr/include/c++/5.2.0/backward" - -extra-arg="-I/proj/platform/linux/sdk_install/sysroots/ppc64-linux/usr/include" -``` - -So the full analysis command will be -``` - CodeChecker check -b "powerpc64-linux-g++ -c -o - /home//mylib_ppc64_linux/commonhandler.cpp" --saargs ./saargs.txt --tidyargs ./tidyargs.txt -``` - -We needed to add the powerpc64 compilation target (`-target ppc64`) and the standard c++ library paths for 5.2.0 (which is configured into the the powerpc64-linux-g++ binary. diff --git a/docs/usage.md b/docs/usage.md index de3471d680..bf92e2c8b1 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -51,15 +51,55 @@ Once the build is logged successfully (and the `compilation.json`) was created, Hint: You can do the 1st and the 2nd step in one round by executing `check` - ``` +``` cd tmux make clean - CodeChecker check -b "make" + CodeChecker check -b "make" -o ./reports +``` or to run on 22 threads - CodeChecker check -j22 -b "make clean;make -j22" + +``` + CodeChecker check -j22 -b "make clean;make -j22" -o ./reports ``` -[What to do if the analysis fails (analysis settings for cross-compilation)](/docs/cross-compilation.md) + +### Cross-Compilation +Cross-compilers are auto-detected by CodeChecker, so +the `--target` and the compiler pre-configured +include paths of `gcc/g++` are automatically passed to `clang` when analyzing. + +**Make sure that the compilers used for building the project (e.g. `/usr/bin/gcc`) are +accessible when `CodeChecker analyze` or `check` is invoked.** + +### Incremental Analysis + The analysis can be run for only the changed files and the `report-directory` will be + correctly updated with the new results. + + ``` + cd tmux + make clean + CodeChecker check -b "make" -o reports + + #Change only 1 file in tmux + vi ./cmd-find.c + + #Only cmd-find.c will be re-analyzed + CodeChecker check -b "make" -o reports +``` +Now the `reports` directory contains also the results of the updated `cmd-find.c`. + +### Analysis Failures + +The `reports/failed` folder contains all build-actions that +were failed to analyze. For these there will be no results. + +Possible reasons for failed analysis: +* The original `gcc` compiler options were not recognized by `clang`, or not all include paths were +correctly detected, so Clang analysis was unsuccessful. +* Clang was more strict when parsing the C/C++ code than the original compiler (e.g.`gcc`). + Any non-standard compliant or `gcc` specific code needs to be removed to successfully analyze the file. +* Clang crashed during the analysis. + ## Step 3: Store analysis results in a CodeChecker DB and visualize results You can store the analysis results in a central database and view the results in a web viewer diff --git a/docs/user_guide.md b/docs/user_guide.md index 532f20440f..c25caf186e 100644 --- a/docs/user_guide.md +++ b/docs/user_guide.md @@ -365,6 +365,7 @@ analyzer arguments: Currently supported analyzers are: clangsa, clang- tidy. --add-compiler-defaults + DEPRECATED. Always True. Retrieve compiler-specific configuration from the compilers themselves, and use them with Clang. This is used when the compiler on the system is special, e.g. diff --git a/libcodechecker/analyze/log_parser.py b/libcodechecker/analyze/log_parser.py index a5405faa33..d075244cfb 100644 --- a/libcodechecker/analyze/log_parser.py +++ b/libcodechecker/analyze/log_parser.py @@ -62,7 +62,16 @@ def get_compiler_includes(compiler, lang, compile_opts, extra_opts=None): if line.startswith(end_mark): do_append = False if do_append: - include_paths.append("-isystem " + line) + # On OSX there are framework includes, + # where we need to strip the "(framework directory)" string. + # For instance: + # /System/Library/Frameworks (framework directory) + fpos = line.find("(framework directory)") + if fpos == -1: + include_paths.append("-isystem " + line) + else: + include_paths.append("-isystem " + line[0:fpos-1]) + if line.startswith(start_mark): do_append = True @@ -75,16 +84,10 @@ def get_compiler_target(compiler): """ Returns the target triple of the given compiler as a string. - If the compiler is not a version of GCC, an empty string is returned. - Compilers other than GCC might have default targets differing from - the build target. """ target_label = "Target:" target = "" - gcc_label = "gcc" - gcc = False - cmd = compiler + ' -v' LOG.debug("Retrieving target platform information via '" + cmd + "'") @@ -99,10 +102,6 @@ def get_compiler_target(compiler): line = line.strip().split() if line[0] == target_label: target = line[1] - if line[0] == gcc_label: - gcc = True - if not gcc: - target = "" except OSError as oerr: LOG.error("Cannot find compiler target: " + oerr.strerror + "\n") @@ -112,6 +111,9 @@ def get_compiler_target(compiler): def parse_compile_commands_json(logfile, add_compiler_defaults=False): import json + # The add-compiler-defaults is a deprecated argument + # and we always perform target and include auto-detection. + add_compiler_defaults = True LOG.debug('parse_compile_commands_json: ' + str(add_compiler_defaults)) actions = [] diff --git a/libcodechecker/libhandlers/analyze.py b/libcodechecker/libhandlers/analyze.py index 3abb44ada8..99f83c8e1c 100644 --- a/libcodechecker/libhandlers/analyze.py +++ b/libcodechecker/libhandlers/analyze.py @@ -184,7 +184,8 @@ def is_ctu_capable(): action='store_true', required=False, default=argparse.SUPPRESS, - help="Retrieve compiler-specific configuration " + help="DEPRECATED. Always True. Retrieve " + "compiler-specific configuration " "from the compilers themselves, and use " "them with Clang. This is used when the " "compiler on the system is special, e.g. " diff --git a/libcodechecker/libhandlers/check.py b/libcodechecker/libhandlers/check.py index 374884f4fd..4b8ee47219 100644 --- a/libcodechecker/libhandlers/check.py +++ b/libcodechecker/libhandlers/check.py @@ -184,7 +184,8 @@ def add_arguments_to_parser(parser): action='store_true', required=False, default=argparse.SUPPRESS, - help="Retrieve compiler-specific configuration " + help="DEPRECATED. Always True. Retrieve " + " compiler-specific configuration " "from the analyzers themselves, and use " "them with Clang. This is used when the " "compiler on the system is special, e.g. " diff --git a/libcodechecker/log/option_parser.py b/libcodechecker/log/option_parser.py index 1fb979b893..b83e620cd2 100644 --- a/libcodechecker/log/option_parser.py +++ b/libcodechecker/log/option_parser.py @@ -177,9 +177,18 @@ '^-mmultiple$': 0, '^-mthumb-interwork$': 0, '^-mupdate$': 0, + + # Deprecated ARM specific option + # to Generate a stack frame that is compliant + # with the ARM Procedure Call Standard. + '^-mapcs': 0, '^-fno-merge-const-bfstores$': 0, '^-fno-ipa-sra$': 0, '^-mno-thumb-interwork$': 0, + # ARM specific option. + # Prevent the reordering of + # instructions in the function prologue. + '^-mno-sched-prolog': 0, # This is not unknown but we want to preserve asserts to improve the # quality of analysis. '^-DNDEBUG$': 0 @@ -489,6 +498,13 @@ def parse_options(args): result_map.compile_opts[idx] = opt.replace('"', r'"\"') result_map.compiler = shlex.split(args)[0] + + # If the compiler is C++ (contains ++ in its name) + # we set the language explicitly to c++. + cpp_regex = re.compile('.*\+\+.*') + if cpp_regex.match(result_map.compiler) is not None: + result_map.lang = 'c++' + is_source = False for source_file in result_map.files: lang = get_language(os.path.splitext(source_file)[1].rstrip('"')) @@ -500,6 +516,9 @@ def parse_options(args): result_map.lang = lang break + if result_map.lang: + LOG.debug("Detected language" + result_map.lang) + # If there are no source files in the compilation argument # handle it as a link command. if not is_source: diff --git a/tests/unit/test_log_parser.py b/tests/unit/test_log_parser.py index 97ee8cf0d7..c2077583cf 100644 --- a/tests/unit/test_log_parser.py +++ b/tests/unit/test_log_parser.py @@ -52,7 +52,7 @@ def test_old_ldlogger(self): self.assertEqual(' '.join(results.files), r'"-DVARIABLE="some value"" /tmp/a.cpp') - self.assertEqual(len(build_action.analyzer_options), 0) + self.assertEqual(len(build_action.analyzer_options), 1) def test_new_ldlogger(self): """ @@ -64,12 +64,13 @@ def test_new_ldlogger(self): # now properly log the multiword arguments. When these are parsed by # the log_parser, the define's value will be passed to the analyzer. # - # Logfile contains -DVARIABLE="some value". + # Logfile contains -DVARIABLE="some value" + # and --target=x86_64-linux-gnu. build_action = log_parser.parse_log(logfile)[0] self.assertEqual(list(build_action.sources)[0], r'/tmp/a.cpp') - self.assertEqual(len(build_action.analyzer_options), 1) + self.assertEqual(len(build_action.analyzer_options), 2) self.assertEqual(build_action.analyzer_options[0], r'-DVARIABLE="\"some value"\"') @@ -88,14 +89,14 @@ def test_old_intercept_build(self): logfile = os.path.join(self.__test_files, "intercept-old.json") # Scan-build-py shipping with clang-5.0 makes a logfile that contains: - # -DVARIABLE=\"some value\" + # -DVARIABLE=\"some value\" and --target=x86_64-linux-gnu # # The define is passed to the analyzer properly. build_action = log_parser.parse_log(logfile)[0] self.assertEqual(list(build_action.sources)[0], r'/tmp/a.cpp') - self.assertEqual(len(build_action.analyzer_options), 1) + self.assertEqual(len(build_action.analyzer_options), 2) self.assertEqual(build_action.analyzer_options[0], r'-DVARIABLE="\"some value"\"') @@ -117,13 +118,15 @@ def test_new_intercept_build(self): # command string. This argument vector contains the define as it's # element in the following format: # -DVARIABLE=\"some value\" + # and the target triplet, e.g.: + # --target=x86_64-linux-gnu # # The define is passed to the analyzer properly. build_action = log_parser.parse_log(logfile)[0] self.assertEqual(list(build_action.sources)[0], r'/tmp/a.cpp') - self.assertEqual(len(build_action.analyzer_options), 1) + self.assertEqual(len(build_action.analyzer_options), 2) self.assertEqual(build_action.analyzer_options[0], r'-DVARIABLE="\"some value"\"') diff --git a/vendor/build-logger/README.md b/vendor/build-logger/README.md new file mode 100644 index 0000000000..76b5bffd57 --- /dev/null +++ b/vendor/build-logger/README.md @@ -0,0 +1,69 @@ +# Build Logger + +This tool can capture the build process and generate a +[JSON Compilation Database](https://clang.llvm.org/docs/JSONCompilationDatabase.html) + +## Compilation + +To build the project execute +~~~~~~~ +cd vendor/build-logger +make -f Makefile.manual +~~~~~~~ + +## Usage + +Set the following environment variables: +~~~~~~~ +export LD_PRELOAD=ldlogger.so +export LD_LIBRARY_PATH=`pwd`/build/lib:$LD_LIBRARY_PATH +export CC_LOGGER_GCC_LIKE="gcc:g++:clang" +#The output compilation JSON file +export CC_LOGGER_FILE=`pwd`/compilation.json +~~~~~~~ + +then when you call `gcc` from a sub-shell (e.g. as a part of a Make build process), + `compilation.json` will be created. +For example: +`bash -c "gcc -c something.c"` +will create +~~~~~~~ +compilation.json: +[ + { + "directory": "/home/john_doe/", + "command": "/usr/bin/gcc-4.8 -c /home/john_doe/something.c", + "file": "/home/john_doe/something.c" + } +] +~~~~~~~ + + + +## Environment Variables + +### `CC_LOGGER_GCC_LIKE` +You can change the compilers that should be logged. +Set `CC_LOGGER_GCC_LIKE` environment variable to a colon separated list. + + For example (default): + + ```export CC_LOGGER_GCC_LIKE="gcc:g++:clang"``` + + The logger will match any compilers with `gcc`,`g++` or `clang` in their filenames. + + +### `CC_LOGGER_FILE` +Output file to generate compilation database into. +This can be a relative or absolute path. + +### `CC_LOGGER_JAVAC_LIKE` +You can specify the `javac` like +compilers that should be logged as a colon separated string list. + +### `CC_LOGGER_DEF_DIRS` +If the environment variable is defined, +the logger will extend the compiler argument list in the compilation +database with the pre-configured include paths of the logged compiler. + + diff --git a/vendor/build-logger/src/ldlogger-tool-gcc.c b/vendor/build-logger/src/ldlogger-tool-gcc.c index f23bcb65c0..a80406b3d3 100644 --- a/vendor/build-logger/src/ldlogger-tool-gcc.c +++ b/vendor/build-logger/src/ldlogger-tool-gcc.c @@ -14,7 +14,8 @@ #include #include #include - +#include +#include /** * States for GCCargument parser. */ @@ -168,6 +169,9 @@ static void getDefaultArguments(const char* prog_, LoggerVector* args_) int incStarted = 0; strcpy(command, prog_); + /* WARNING: this always gets the C++ compiler include + * dirs even if we are compiling C file. + * */ strcat(command, " -xc++ -E -v - < /dev/null 2>&1"); cmdOut = popen(command, "r"); @@ -227,6 +231,23 @@ static void getDefaultArguments(const char* prog_, LoggerVector* args_) pclose(cmdOut); } +char* findFullPath(const char* executable, char* fullpath) { + char* path; + char* dir; + path = strdup(getenv("PATH")); + for (dir = strtok(path, ":"); dir; dir = strtok(NULL, ":")) { + strcpy(fullpath, dir); + strcpy(fullpath + strlen(dir), "/"); + strcpy(fullpath + strlen(dir) + 1, executable); + if (access(fullpath, F_OK ) != -1 ) { + free(path); + return fullpath; + } + } + free(path); + return 0; +} + int loggerGccParserCollectActions( const char* prog_, const char* toolName_, @@ -235,11 +256,27 @@ int loggerGccParserCollectActions( { size_t i; /* Position of the last include path + 1 */ + char full_prog_path[PATH_MAX+1]; + char *path_ptr; + size_t lastIncPos = 1; GccArgsState state = Normal; LoggerAction* action = loggerActionNew(toolName_); - loggerVectorAdd(&action->arguments, loggerStrDup(toolName_)); + /* If prog_ is a relative path we try to + * convert it to absolute path. + */ + path_ptr = realpath(prog_, full_prog_path); + + /* If we cannot convert it, we try to find the + * executable in the PATH. + */ + if (!path_ptr) + path_ptr = findFullPath(toolName_, full_prog_path); + if (path_ptr) /* Log compiler with full path. */ + loggerVectorAdd(&action->arguments, loggerStrDup(full_prog_path)); + else /* Compiler was not found in path, log the binary name only. */ + loggerVectorAdd(&action->arguments, loggerStrDup(toolName_)); for (i = 1; argv_[i]; ++i) { @@ -259,7 +296,7 @@ int loggerGccParserCollectActions( } } - if (!getenv("CC_LOGGER_NO_DEF_DIRS")) + if (getenv("CC_LOGGER_DEF_DIRS")) { LoggerVector defIncludes; loggerVectorInit(&defIncludes);