Skip to content

Commit

Permalink
Merge branch 'master' into loverall/httpd
Browse files Browse the repository at this point in the history
  • Loading branch information
ESultanik committed Aug 3, 2023
2 parents deffb5b + fcb7354 commit b63fc3d
Show file tree
Hide file tree
Showing 53 changed files with 9,135 additions and 5,835 deletions.
8 changes: 8 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[flake8]
exclude =
.git,
__pycache__,
polytracker/src/compiler-rt,
examples,
the_klondike,
third_party
2 changes: 2 additions & 0 deletions .github/workflows/dockerimage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ jobs:
submodules: recursive
- name: Build the base image
run: docker build . --file Dockerfile --tag trailofbits/polytracker --no-cache
- name: Run the PolyTracker tests
run: docker run --rm --workdir /polytracker trailofbits/polytracker pytest
2 changes: 0 additions & 2 deletions .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,3 @@ jobs:
- name: MyPy
run: |
mypy --python-version ${{ matrix.python-version }} --ignore-missing-imports polytracker tests build_in_docker eval --exclude compiler-rt
- name: PolyTracker tests
run: docker run --rm trailofbits/polytracker pytest
28 changes: 28 additions & 0 deletions examples/Dockerfile-acropalypse.demo
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@

FROM trailofbits/polytracker
LABEL org.opencontainers.image.authors="[email protected]"

WORKDIR /polytracker/acropalypse


RUN curl -L https://downloads.sourceforge.net/libpng/libpng-1.6.39.tar.xz --output libpng.tar.xz
RUN tar xvf libpng.tar.xz
WORKDIR /polytracker/acropalypse/libpng-1.6.39

RUN curl -L https://zlib.net/zlib-1.2.13.tar.xz --output zlib.tar.xz
RUN tar xf zlib.tar.xz

WORKDIR /polytracker/acropalypse/libpng-1.6.39/zlib-1.2.13/
RUN polytracker build ./configure
RUN polytracker build make
RUN polytracker extract-bc -o ../libz.bc libz.a

WORKDIR /polytracker/acropalypse/libpng-1.6.39

RUN CPPFLAGS="-I$(pwd)/zlib-1.2.13/include" LDFLAGS="-L$(pwd)/zlib-1.2.13/lib" polytracker build ./configure --disable-shared
RUN CPPFLAGS="-I$(pwd)/zlib-1.2.13/include" LDFLAGS="-L$(pwd)/zlib-1.2.13/lib" polytracker build make pngtest
RUN polytracker extract-bc -o pngtest.bc pngtest

RUN llvm-link -o pngtest-linked.bc pngtest.bc libz.bc
RUN polytracker instrument-bc --taint --ftrace pngtest-linked.bc -o instrumented.bc
RUN polytracker lower-bc instrumented.bc -t pngtest -o pngtest.instrumented
9 changes: 6 additions & 3 deletions examples/analysis/README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
# Analysis Scripts

## last updated Nov 22, [email protected]
## last updated May 2023, [email protected]

Small scripts and other assorted tooling which might be copied into containers or run in the native working environment to automate learning about how Polytracker works.
### analysis/nitf/
Some early experiments with NITF parser exploration that eventually resulted in the paper referenced below.

### analysis/ubet/
Tooling, examples, and other artifacts of the LangSec '23 paper [Automatically Detecting Variability Bugs Through Hybrid Control and Data Flow Analysis](https://langsec.org/spw23/papers.html#variability) that are not yet integrated into main PolyTracker. This tooling is generally referenced from Dockerfiles in the parent directory.

The point of these is reproduceability and knowledge sharing.
10 changes: 10 additions & 0 deletions examples/analysis/ubet/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
FROM ubuntu:jammy
ENV DEBIAN_FRONTEND=noninteractive

# We just need the PolyTracker python analysis code to run eval_nitro.py, and not the PolyTracker llvm environment.
RUN apt-get update && \
apt-get upgrade -y && \
apt-get install -y clang g++ python3 python3-pip && \
pip3 install polytracker

WORKDIR /polytracker/the_klondike/nitro/build/ubet
65 changes: 65 additions & 0 deletions examples/analysis/ubet/Dockerfile.nitro
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
FROM trailofbits/polytracker:latest
ENV DEBIAN_FRONTEND=noninteractive
LABEL org.opencontainers.image.authors="[email protected], [email protected]"
WORKDIR /polytracker/the_klondike

# kaoudis, May 2023: Nitro (or more likely, coda-oss) has done something weird
# and Nitro no longer can find PYTHON_HOME. Using ENABLE_PYTHON=OFF for now
# so that this Dockerfile at least builds. Enable the Nitro SWIG bindings
# at your own risk!

RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev libssl-dev python3

RUN pip install cxxfilt

RUN git clone https://github.com/mdaus/nitro.git
WORKDIR /polytracker/the_klondike/nitro
RUN git checkout b39ccc4c07e84e6c05cecb9ae24143373a3ed8e2
WORKDIR /polytracker/the_klondike/nitro/build/release

# Build Nitro: Release
RUN polytracker build cmake ../.. \
-DCMAKE_C_FLAGS="-w -D_POSIX_C_SOURCE=200809L -DCODA_OSS_NO_is_trivially_copyable" \
-DCMAKE_CXX_FLAGS="-w -D_POSIX_C_SOURCE=200809L -DCODA_OSS_NO_is_trivially_copyable" \
-DCMAKE_BUILD_TYPE=Release \
-DCODA_BUILD_TESTS=OFF \
-DENABLE_PYTHON=OFF

RUN polytracker build cmake --build . -j$((`nproc`+1)) --target show_nitf++ --config Release

RUN cp modules/c++/nitf/show_nitf++ nitro_Release

RUN polytracker instrument-targets \
--taint \
--ftrace \
--cflog \
show_nitf++

RUN mv show_nitf++.instrumented nitro_trackRelease

# Build Nitro: Debug
WORKDIR /polytracker/the_klondike/nitro/build/debug
RUN polytracker build cmake ../.. \
-DCMAKE_C_FLAGS="-w -D_POSIX_C_SOURCE=200809L -DCODA_OSS_NO_is_trivially_copyable" \
-DCMAKE_CXX_FLAGS="-w -D_POSIX_C_SOURCE=200809L -DCODA_OSS_NO_is_trivially_copyable" \
-DCMAKE_BUILD_TYPE=Debug \
-DCODA_BUILD_TESTS=OFF \
-DENABLE_PYTHON=OFF

RUN polytracker build cmake --build . -j$((`nproc`+1)) --clean-first --target show_nitf++ --config Debug
RUN cp modules/c++/nitf/show_nitf++ nitro_Debug
RUN polytracker instrument-targets \
--taint \
--ftrace \
--cflog \
show_nitf++

RUN mv show_nitf++.instrumented nitro_trackDebug

# If this Dockerfile is run with run.sh, this will link to the external
# location where the evaluation scripts live, and you'll be dropped into a
# shell so you can work in a configured environment.
# Note to the unwary: compiler-rt sanitizers and Polytracker are NOT COMPATIBLE.
# If you need compiler-rt, please use Dockerfile.nitro.sanitizers.
WORKDIR /polytracker/the_klondike/nitro/build/ubet
34 changes: 34 additions & 0 deletions examples/analysis/ubet/Dockerfile.nitro.sanitizers
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
FROM ubuntu:focal
ENV DEBIAN_FRONTEND=noninteractive
LABEL org.opencontainers.image.authors="[email protected], [email protected]"
WORKDIR /nitro

RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev libssl-dev git cmake clang-12 build-essential python python-numpy

RUN git clone https://github.com/mdaus/nitro.git
WORKDIR /polytracker/the_klondike/nitro
RUN git checkout b39ccc4c07e84e6c05cecb9ae24143373a3ed8e2
WORKDIR /nitro/nitro/build/release

RUN cmake ../.. \
-DCMAKE_C_FLAGS="-w -D_POSIX_C_SOURCE=200809L -DCODA_OSS_NO_is_trivially_copyable -fsanitize=address,undefined" \
-DCMAKE_CXX_FLAGS="-w -D_POSIX_C_SOURCE=200809L -DCODA_OSS_NO_is_trivially_copyable -fsanitize=address,undefined" \
-DCMAKE_LINK_FLAGS="-fsanitize=address,undefined" \
-DCMAKE_BUILD_TYPE=Release -DCODA_BUILD_TESTS=OFF -DENABLE_PYTHON=OFF

RUN cmake --build . -j$((`nproc`+1)) --target show_nitf++ --config Release

RUN cp modules/c++/nitf/show_nitf++ nitro_Release

WORKDIR /nitro/nitro/build/debug

RUN cmake ../.. \
-DCMAKE_C_FLAGS="-w -D_POSIX_C_SOURCE=200809L -DCODA_OSS_NO_is_trivially_copyable -fsanitize=address,undefined" \
-DCMAKE_CXX_FLAGS="-w -D_POSIX_C_SOURCE=200809L -DCODA_OSS_NO_is_trivially_copyable -fsanitize=address,undefined" \
-DCMAKE_LINK_FLAGS="-fsanitize=address,undefined" \
-DCMAKE_BUILD_TYPE=Debug -DCODA_BUILD_TESTS=OFF -DENABLE_PYTHON=OFF

RUN cmake --build . -j$((`nproc`+1)) --target show_nitf++ --config Debug

RUN cp modules/c++/nitf/show_nitf++ nitro_Debug
5 changes: 5 additions & 0 deletions examples/analysis/ubet/Dockerfile.polytracker
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
FROM trailofbits/polytracker:latest

RUN apt-get update && apt-get upgrade -y
RUN apt-get install -y g++
WORKDIR /workdir
43 changes: 43 additions & 0 deletions examples/analysis/ubet/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# UBet

In general this directory contains the analysis scripts, configuration, and *most* other things necessary to reproduce our results from the LangSec '23 paper [Automatically Detecting Variability Bugs Through Hybrid Control and Data Flow Analysis](https://langsec.org/spw23/papers.html#variability).

## Reproducing our results
| :wrench: Getting Started |
| ------------------------ |
If you have not yet done so, clone [Nitro](https://github.com/mdaus/nitro) and ensure it builds with PolyTracker in Docker for you, as demonstrated in the base NITF Dockerfile `polytracker/examples/Dockerfile-nitro-nitf.demo`. You will notice our build process is somewhat different compared to [how the Nitro maintainers recommend building the software](https://github.com/mdaus/nitro#building-nitro) since that requires GCC, or MSVC.

### :whale: Dockerfiles here
- `Dockerfile` creates a clean, reproducible testing environment we used to build the toy motivation examples and to build some earlier experiments that didn't make it into the LangSec version of our paper
- `Dockerfile.nitro` builds an instrumented version of Nitro with the needed dependencies available to reproduce the experiments described in the paper
- `Dockerfile.polytracker` creates a clean, reproducible PolyTracker based testing environment. The compiler-rt sanitizers aren't available here, since PolyTracker a) requires the WLLVM/gclang compiler front-end (it *does* work with Clang, but is really intended to work with gclang) and b) alters the ABI list and other critical items in a way that is not compatible with base dfsan and the rest of LLVM compiler-rt anymore. You will get weird errors if you try to run compiler-rt sanitizers in a PolyTracker based environment.
- `Dockerfile.nitro.sanitizers` builds Nitro with UBSan and ASan and attempts to use them to show some of the issues inherent in Nitro. We build Nitro with these compiler-rt sanitizers in a way as close to the way we build Nitro for PolyTracker as possible.

### NITF
The examples we reference in the paper primarily relate to the [NITF](https://jitc.fhu.disa.mil/projects/nitf/testdata.aspx) (National Imagery Transmission Format) reference parser Nitro, though in our motivation section we also use some specifically targeted toy examples, available under `polytracker/examples/analysis/ubet/examples/motivation` and named by listing.

NITF is a binary image file format. Each NITF packages one or more visual data representations (video, fingerprints, CAT scan, JPEG, etc.) with extra metadata and other conditionally included information e.g., captions, information for rendering visual redactions, or geo-reference data. Nitro parses multiple mutually incompatible versions of the NITF specification. To simulate the effects of encountering a particular bad input we would like reproduce the effects of in a testing, local, or staging environment we applied Nitro instrumented with UBet to a corpus of 148 valid and known-invalid NITF files.

#### :blue_book: NITF standard
There are three publicly available versions of MIL-STD-2500 (A, B, and C) that collectively describe [NITF](https://www.wikidata.org/wiki/Q26218335) as Nitro understands it. As *we* understand it, MIL-STD-2500a and MIL-STD-2500b together describe NITF 2.0 (note most NITF 2.0 files will map better to the fields described in MIL-STD-2500a, but some NITF 2.0 files will map better to the fields described in MIL-STD-2500b!). MIL-STD-2500c is closest to NITF 2.1. [MIL-STD-1300a](https://web.archive.org/web/20130217094453/http://www.gwg.nga.mil/ntb/baseline/docs/1300a/1300a.pdf) may also be relevant to understanding the format. `NSIF` is another closely related format that is good to understand to figure out the overlaps between the A, B, and C NITF standards.

#### Reproducing our results, or making results like them
From the current working directory (`examples/analysis/ubet`):

```
docker build -t trailofbits/polytracker-nitro -f <DOCKERFILE HERE>
docker run -ti --rm -v $(pwd):workdir trailofbits/polytracker-nitro
cd /workdir
find nitfdir/ -type f | python3 eval_nitro.py --locate
mkdir output
python3 eval_nitro.py --cflog --compare output/U_2001E.NTF/
```

There is also a script `run.sh` in the cwd that you can use to just drop into an appropriately configured environment using one of the above Dockerfiles for any experiments you'd like to run.

| :exclamation: Note for the unwary |
| --------------------------------- |
Nitro replaces an old semi-custom build system known as [WAF](https://github.com/mdaus/nitro#building-with-waf) with a new build layer on top of CMake, [coda-oss](https://github.com/mdaus/coda-oss) that bakes in a bespoke stdlib implementation. We've had to [macro some of this out](https://github.com/trailofbits/polytracker/blob/master/examples/Dockerfile-nitro-nitf.demo#L16), since it relies on implementation-specific behaviour of GCC and is not entirely compatible with Clang. We are aware of other implementation-specific and undefined behaviour related issues within the coda-oss code that we are in the process of gathering more data on using this analysis and instrumentation code, in order to report to the Nitro maintainers, beyond the bugs discussed in the LangSec paper.

### Dead code in Nitro
Nitro repository also contains some [possibly-dead](https://github.com/mdaus/nitro#platforms) code that we did not evaluate or interact with - namely the Matlab and Java and related bindings located there. We focused on building and instrumenting its C++ implementation initially. This also applies to Nitro's Python, since Nitro uses SWIG to generate Python bindings.
47 changes: 47 additions & 0 deletions examples/analysis/ubet/build_nitro.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/bash

# NASSERT/NDEBUG builds "O3"
mkdir release
cd release || exit
polytracker build cmake ../.. \
-DCMAKE_C_FLAGS="-w -D_POSIX_C_SOURCE=200809L -DCODA_OSS_NO_is_trivially_copyable" \
-DCMAKE_CXX_FLAGS="-w -D_POSIX_C_SOURCE=200809L -DCODA_OSS_NO_is_trivially_copyable" \
-DCMAKE_BUILD_TYPE=Debug -DNASSERT=1 -DNDEBUG=1 -DCODA_BUILD_TESTS=OFF

polytracker build cmake --build . -j$(($(nproc) + 1)) --target show_nitf++ --config Debug
polytracker extract-bc -o baseO3.bc modules/c++/nitf/show_nitf++
opt -load "${COMPILER_DIR}/pass/libPolytrackerPass.so" -load-pass-plugin "${COMPILER_DIR}/pass/libPolytrackerPass.so" -passes=pt-tcf -o "after_preoptO3.bc" "baseO3.bc"
echo "Optmize bitcode"
polytracker opt-bc --output O3.bc after_preoptO3.bc
echo "Instrument optimized bitcode"
polytracker instrument-bc --ftrace --taint --output instrumentedO3.bc O3.bc
echo "Lower optimized bitcode"
polytracker lower-bc -t show_nitf++ -o nitro_trackRelease instrumentedO3.bc

cd .. || exit

# O0 build
mkdir debug
cd debug || exit
polytracker build cmake ../.. \
-DCMAKE_C_FLAGS="-w -D_POSIX_C_SOURCE=200809L -DCODA_OSS_NO_is_trivially_copyable" \
-DCMAKE_CXX_FLAGS="-w -D_POSIX_C_SOURCE=200809L -DCODA_OSS_NO_is_trivially_copyable" \
-DCMAKE_BUILD_TYPE=Debug -DCODA_BUILD_TESTS=OFF

polytracker build cmake --build . -j$(($(nproc) + 1)) --target show_nitf++ --config Debug
polytracker extract-bc -o baseO0.bc modules/c++/nitf/show_nitf++

opt -load "${COMPILER_DIR}/pass/libPolytrackerPass.so" -load-pass-plugin "${COMPILER_DIR}/pass/libPolytrackerPass.so" -passes=pt-tcf -o "after_preoptO0.bc" "baseO0.bc"

cp after_preoptO0.bc O0.bc

echo "Instrument non-optimized bitcode"
polytracker instrument-bc --ftrace --taint --output instrumentedO0.bc O0.bc

echo "Lower non-optimized bitcode"
polytracker lower-bc -t show_nitf++ -o nitro_trackDebug instrumentedO0.bc

cd .. || exit

cp release/nitro_trackRelease .
cp debug/nitro_trackDebug .
85 changes: 85 additions & 0 deletions examples/analysis/ubet/compress_tdag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
from typing import List
from polytracker import taint_dag
from argparse import ArgumentParser
from pathlib import Path
import os

# Note: This is being integrated into PolyTracker directly as `polytracker compress <tdag_file.tdag>`


def copy_section(
fin, fout, section_in: taint_dag.TDSectionMeta, section_out: taint_dag.TDSectionMeta
):
assert section_in.size == section_out.size
assert section_in.tag == section_out.tag
assert section_in.align == section_out.align
os.copy_file_range(
fin.fileno(),
fout.fileno(),
section_in.size,
section_in.offset,
section_out.offset,
)


def compact_section(
starting_offset: int, section_in: taint_dag.TDSectionMeta
) -> taint_dag.TDSectionMeta:
section_out = taint_dag.TDSectionMeta()
section_out.offset = section_in.align * round(starting_offset / section_in.align)
section_out.align = section_in.align
section_out.size = section_in.size
section_out.tag = section_in.tag
return section_out


def main():
parser = ArgumentParser(
prog="compress_tdag", description="Compress a sparse tdag file"
)
parser.add_argument(
"-i",
"--input",
help="Sparse input (source) tdag file",
type=Path,
required=True,
)
parser.add_argument(
"-o",
"--output",
help="Dense output (destination) tdag file",
type=Path,
required=True,
)

args = parser.parse_args()

with open(args.input, "rb") as fin, open(args.output, "wb") as fout:
fmeta_in = taint_dag.TDFileMeta()
sections_in = []
sections_out = []

fin.readinto(fmeta_in)
for n in range(fmeta_in.section_count):
section = taint_dag.TDSectionMeta()
fin.readinto(section)
sections_in.append(section)
header_len = fin.tell()
print(fmeta_in, sections_in, header_len)

starting_offset = fin.tell()
fout.write(fmeta_in)
for section in sections_in:
section_out = compact_section(starting_offset, section)
sections_out.append(section_out)
fout.write(section_out)
starting_offset = section_out.offset + section_out.size

print("COPY!")
for section_in, section_out in zip(sections_in, sections_out):
print(section_in, section_out)
copy_section(fin, fout, section_in, section_out)


if __name__ == "__main__":
main()
Loading

0 comments on commit b63fc3d

Please sign in to comment.