Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Code Analysis Framework #228

Merged
merged 1 commit into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
199 changes: 199 additions & 0 deletions code.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
package main

import (
"fmt"
"strings"

"github.com/safedep/dry/utils"
"github.com/safedep/vet/internal/ui"
"github.com/safedep/vet/pkg/code"
"github.com/safedep/vet/pkg/code/languages"
"github.com/safedep/vet/pkg/common/logger"
"github.com/safedep/vet/pkg/storage/graph"
"github.com/spf13/cobra"
)

var (
codeAppDirectories = []string{}
codeImportDirectories = []string{}
codeGraphDatabase string
codeLanguage string
)

func newCodeCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "code",
Short: "[EXPERIMENTAL] Perform code analysis with insights data",
RunE: func(cmd *cobra.Command, args []string) error {
return nil
},
}

cmd.Flags().StringArrayVarP(&codeAppDirectories, "src", "", []string{}, "Source code root directory to analyze")
cmd.Flags().StringArrayVarP(&codeImportDirectories, "imports", "", []string{}, "Language specific directory to find imported source")
cmd.Flags().StringVarP(&codeGraphDatabase, "db", "", "", "Path to the database")
cmd.Flags().StringVarP(&codeLanguage, "lang", "", "python", "Language of the source code")

err := cmd.MarkFlagRequired("db")
if err != nil {
logger.Errorf("Failed to mark flag as required: %v", err)
}

cmd.AddCommand(newCodeCreateDatabaseCommand())
cmd.AddCommand(newCodeImportReachabilityCommand())

return cmd
}

func newCodeCreateDatabaseCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "create-db",
Short: "Analyse code and create a database for further analysis",
RunE: func(cmd *cobra.Command, args []string) error {
startCreateDatabase()
return nil
},
}

return cmd
}

func newCodeImportReachabilityCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "import-reachability",
Short: "Analyse import reachability",
RunE: func(cmd *cobra.Command, args []string) error {
startImportReachability()
return nil
},
}

return cmd
}

func startCreateDatabase() {
failOnError("code-create-db", internalStartCreateDatabase())
}

func startImportReachability() {
failOnError("code-import-reachability-analysis", internalStartImportReachability())
}

func internalStartImportReachability() error {
codePrintExperimentalWarning()

if utils.IsEmptyString(codeGraphDatabase) {
return fmt.Errorf("no database path provided")
}

// TODO: We need a code graph loader to load the code graph from the database
// before invoking analysis modules

return nil
}

func internalStartCreateDatabase() error {
codePrintExperimentalWarning()
logger.Debugf("Starting code analysis")

if len(codeAppDirectories) == 0 {
return fmt.Errorf("no source code directory provided")
}

if len(codeImportDirectories) == 0 {
return fmt.Errorf("no import directory provided")
}

if utils.IsEmptyString(codeGraphDatabase) {
return fmt.Errorf("no database path provided")
}

codeRepoCfg := code.FileSystemSourceRepositoryConfig{
SourcePaths: codeAppDirectories,
ImportPaths: codeImportDirectories,
}

codeRepo, err := code.NewFileSystemSourceRepository(codeRepoCfg)
if err != nil {
return fmt.Errorf("failed to create source repository: %w", err)
}

codeLang, err := codeGetLanguage()
if err != nil {
return fmt.Errorf("failed to create source language: %w", err)
}

codeRepo.ConfigureForLanguage(codeLang)

graph, err := graph.NewPropertyGraph(&graph.LocalPropertyGraphConfig{
Name: "code-analysis",
DatabasePath: codeGraphDatabase,
})

if err != nil {
return fmt.Errorf("failed to create graph database: %w", err)
}

builderConfig := code.CodeGraphBuilderConfig{
RecursiveImport: true,
}

builder, err := code.NewCodeGraphBuilder(builderConfig, codeRepo, codeLang, graph)
if err != nil {
return fmt.Errorf("failed to create code graph builder: %w", err)
}

redirectLogToFile(logFile)

var fileProcessedTracker any
var importsProcessedTracker any
var functionsProcessedTracker any

builder.RegisterEventHandler("ui-callback",
func(event code.CodeGraphBuilderEvent, metrics code.CodeGraphBuilderMetrics) error {
switch event.Kind {
case code.CodeGraphBuilderEventFileQueued:
ui.IncrementTrackerTotal(fileProcessedTracker, 1)
case code.CodeGraphBuilderEventFileProcessed:
ui.IncrementProgress(fileProcessedTracker, 1)
}

ui.UpdateValue(importsProcessedTracker, int64(metrics.ImportsCount))
ui.UpdateValue(functionsProcessedTracker, int64(metrics.FunctionsCount))

return nil
})

ui.StartProgressWriter()

fileProcessedTracker = ui.TrackProgress("Processing source files", 0)
importsProcessedTracker = ui.TrackProgress("Processing imports", 0)
functionsProcessedTracker = ui.TrackProgress("Processing functions", 0)

err = builder.Build()
if err != nil {
return fmt.Errorf("failed to build code graph: %w", err)
}

ui.MarkTrackerAsDone(fileProcessedTracker)
ui.MarkTrackerAsDone(importsProcessedTracker)
ui.MarkTrackerAsDone(functionsProcessedTracker)
ui.StopProgressWriter()

logger.Debugf("Code analysis completed")
return nil
}

func codePrintExperimentalWarning() {
ui.PrintWarning("Code analysis is experimental and may have breaking change")
}

func codeGetLanguage() (code.SourceLanguage, error) {
lang := strings.ToLower(codeLanguage)
switch lang {
case "python":
return languages.NewPythonSourceLanguage()
default:
return nil, fmt.Errorf("unsupported language: %s", codeLanguage)
}
}
87 changes: 87 additions & 0 deletions docs/docs/guides/code-analysis.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
---
sidebar_position: 2
title: 🏄 Code Analysis
---

# Code Analysis

:::note

EXPERIMENTAL: This feature is experimental and may introduce breaking changes.

:::

`vet` has a code analysis framework built on top of [tree-sitter](#) parsers. The goal
of this framework is to support multiple languages, source repositories (local and remote),
and create a representation of code that can be analysed for common software
supply chain security related use-cases such as

- Identify shadowed imports
- Identify evidence of a dependency actually being used
- Import reachability analysis
- Function reachability analysis

:::warning

The code analysis framework is designed specifically to be simple, fast and
not to be a full-fledged static analysis tool. It is currently in early stages
of development and may not support all languages or maintain API compatibility.

:::

## Build a Code Analysis Database

- Analyse code and build a database for further analysis.

```bash
vet code --db /tmp/code.db \
--src /path/to/app \
--imports /virtualenvs/app/lib/python3.11/site-packages \
--lang python \
create-db
```

The above command does the following:

- Uses Python as the language for parsing source code
- Analyses application code recursively in `/path/to/app`
- Analyses dependencies in `/virtualenvs/app/lib/python3.11/site-packages`
- Creates a database at `/tmp/code.db` for further analysis

## Manual Query Execution

Use [cayleygraph](https://cayley.gitbook.io/cayley/) to query the database.

```bash
docker run -it -p 64210:64210 -v /tmp/code.db:/db cayleygraph/cayley -a /db -d bolt
```

- Navigate to `http://127.0.0.1:64210` in your browser

### Query Examples

#### Dependency Graph

Build dependency graph for your application

```js
g.V().Tag("source").out("imports").Tag("target").all()
```

![Dependency Graph](/img/vet-code-demo-import-graph.png)

#### Import Reachability

Check if a specific import is reachable in your application

```js
g.V("app").followRecursive(g.M().out("imports")).is("six").all()
```

- `app` is the application originating from `app.py`
- `six` is a python module imported transitively

### Query API

Refer to [Gizmo Query Language](https://cayley.gitbook.io/cayley/query-languages/gizmoapi)
for documentation on constructing custom queries.
Binary file added docs/static/img/vet-code-demo-import-graph.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
28 changes: 27 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ toolchain go1.22.1
require (
github.com/AlecAivazis/survey/v2 v2.3.7
github.com/CycloneDX/cyclonedx-go v0.9.0
github.com/cayleygraph/cayley v0.7.7
github.com/cayleygraph/quad v1.2.5
github.com/cli/oauth v1.0.1
github.com/deepmap/oapi-codegen v1.16.3
github.com/gofri/go-github-ratelimit v1.1.0
Expand All @@ -25,7 +27,7 @@ require (
github.com/spf13/cobra v1.8.0
github.com/stretchr/testify v1.9.0
golang.org/x/oauth2 v0.21.0
google.golang.org/protobuf v1.34.1
google.golang.org/protobuf v1.34.2
gopkg.in/yaml.v2 v2.4.0
)

Expand All @@ -44,13 +46,19 @@ require (
github.com/antlr4-go/antlr/v4 v4.13.1 // indirect
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
github.com/aymerick/douceur v0.2.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/boltdb/bolt v1.3.1 // indirect
github.com/bytedance/sonic v1.11.8 // indirect
github.com/bytedance/sonic/loader v0.1.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/chainguard-dev/git-urls v1.0.2 // indirect
github.com/cloudflare/circl v1.3.8 // indirect
github.com/cloudwego/base64x v0.1.4 // indirect
github.com/cloudwego/iasm v0.2.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dennwc/base v1.0.0 // indirect
github.com/dlclark/regexp2 v1.11.0 // indirect
github.com/dop251/goja v0.0.0-20190105122144-6d5bf35058fa // indirect
github.com/fatih/structs v1.1.0 // indirect
github.com/flosch/pongo2/v4 v4.0.2 // indirect
github.com/gabriel-vasile/mimetype v1.4.4 // indirect
Expand All @@ -59,14 +67,22 @@ require (
github.com/go-playground/locales v0.14.1 // indirect
github.com/go-playground/universal-translator v0.18.1 // indirect
github.com/go-playground/validator/v10 v10.21.0 // indirect
github.com/go-sourcemap/sourcemap v2.1.2+incompatible // indirect
github.com/gobuffalo/envy v1.7.1 // indirect
github.com/gobuffalo/logger v1.0.1 // indirect
github.com/gobuffalo/packd v0.3.0 // indirect
github.com/gobuffalo/packr/v2 v2.7.1 // indirect
github.com/goccy/go-json v0.10.3 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/snappy v0.0.4 // indirect
github.com/gomarkdown/markdown v0.0.0-20240419095408-642f0ee99ae2 // indirect
github.com/google/go-querystring v1.1.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/gorilla/css v1.0.1 // indirect
github.com/hidal-go/hidalgo v0.0.0-20190814174001-42e03f3b5eaa // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/iris-contrib/schema v0.0.6 // indirect
github.com/joho/godotenv v1.3.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/kataras/blocks v0.0.8 // indirect
Expand All @@ -93,15 +109,23 @@ require (
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/oklog/ulid/v2 v2.1.0 // indirect
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
github.com/piprate/json-gold v0.3.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/pquerna/cachecontrol v0.0.0-20180517163645-1555304b9b35 // indirect
github.com/prometheus/client_golang v1.19.0 // indirect
github.com/prometheus/client_model v0.6.0 // indirect
github.com/prometheus/common v0.50.0 // indirect
github.com/prometheus/procfs v0.13.0 // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/rogpeppe/go-internal v1.12.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/schollz/closestmatch v2.1.0+incompatible // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stoewer/go-strcase v1.3.0 // indirect
github.com/tdewolff/minify/v2 v2.20.33 // indirect
github.com/tdewolff/parse/v2 v2.7.14 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/tylertreat/BoomFilters v0.0.0-20181028192813-611b3dbe80e8 // indirect
github.com/ugorji/go/codec v1.2.12 // indirect
github.com/valyala/bytebufferpool v1.0.0 // indirect
github.com/valyala/fasttemplate v1.2.2 // indirect
Expand All @@ -115,10 +139,12 @@ require (
golang.org/x/exp v0.0.0-20240604190554-fc45aab8b7f8 // indirect
golang.org/x/mod v0.18.0 // indirect
golang.org/x/net v0.26.0 // indirect
golang.org/x/sync v0.7.0 // indirect
golang.org/x/sys v0.21.0 // indirect
golang.org/x/term v0.21.0 // indirect
golang.org/x/text v0.16.0 // indirect
golang.org/x/time v0.5.0 // indirect
golang.org/x/tools v0.22.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20240610135401-a8a62080eff3 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240610135401-a8a62080eff3 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
Expand Down
Loading
Loading