Skip to content

Commit 5e860b0

Browse files
committedNov 22, 2024·
PR Clone fix
1 parent 1d53212 commit 5e860b0

File tree

6 files changed

+222
-150
lines changed

6 files changed

+222
-150
lines changed
 

‎.github/workflows/build.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,4 @@ jobs:
2828
context: .
2929
file: ./Dockerfile
3030
push: true
31-
tags: ${{ secrets.DOCKERHUB_USERNAME }}/hela:v7
31+
tags: ${{ secrets.DOCKERHUB_USERNAME }}/hela:v8

‎src/scans/tools/license_tool.rs

+43-16
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,27 @@
1-
21
use std::{collections::HashMap, time::Instant};
32

43
use mongodb::bson::uuid;
54
use serde_json::json;
65

7-
use crate::{utils::{common::{execute_command, post_json_data}, file_utils::find_files_recursively}, scans::tools::sca_tool::SUPPORTED_MANIFESTS};
8-
6+
use crate::{
7+
scans::tools::sca_tool::SUPPORTED_MANIFESTS,
8+
utils::{common::execute_command, file_utils::find_files_recursively},
9+
};
910

1011
pub struct LicenseTool;
1112

1213
impl LicenseTool {
1314
pub fn new() -> Self {
1415
LicenseTool
1516
}
16-
17-
pub async fn run_scan(&self, _path: &str, _commit_id: Option<&str>, _branch: Option<&str>, verbose: bool) {
17+
18+
pub async fn run_scan(
19+
&self,
20+
_path: &str,
21+
_commit_id: Option<&str>,
22+
_branch: Option<&str>,
23+
verbose: bool,
24+
) {
1825
let start_time = Instant::now();
1926
if verbose {
2027
println!("[+] Running License compliance scan on path: {}", _path);
@@ -33,11 +40,11 @@ impl LicenseTool {
3340
if let Some(_branch) = _branch {
3441
let clone_command = format!("git clone -b {} {} /tmp/app", _branch, _path);
3542
execute_command(&clone_command, false).await;
36-
}else{
43+
} else {
3744
let clone_command = format!("git clone {} /tmp/app", _path);
3845
execute_command(&clone_command, false).await;
3946
}
40-
}else{
47+
} else {
4148
if verbose {
4249
println!("[+] Copying project to /tmp/app...");
4350
}
@@ -58,20 +65,27 @@ impl LicenseTool {
5865
// now run secret scan on /tmp/code folder
5966
_path = format!("/tmp/code");
6067
}
61-
let manifests = find_files_recursively(&_path, unsafe { SUPPORTED_MANIFESTS.to_vec() }, ignore_dirs).await;
68+
let manifests =
69+
find_files_recursively(&_path, unsafe { SUPPORTED_MANIFESTS.to_vec() }, ignore_dirs)
70+
.await;
6271
let mut manifest_license = HashMap::new();
6372
for manifest in manifests.iter() {
6473
let file_name = manifest.split("/").last().unwrap();
6574
let folder_path = manifest.replace(file_name, "");
6675
let random_file_name = format!("{}.json", uuid::Uuid::new().to_string());
6776
// if manifest ends with pom.xml then pass -t java otherwise nothing
68-
let mut license_command = format!("cd {} && cdxgen -o {}", folder_path, random_file_name);
77+
let mut license_command =
78+
format!("cd {} && cdxgen -o {}", folder_path, random_file_name);
6979
if file_name.ends_with("pom.xml") {
70-
license_command = format!("cd {} && cdxgen -o {} -t java", folder_path, random_file_name);
80+
license_command = format!(
81+
"cd {} && cdxgen -o {} -t java",
82+
folder_path, random_file_name
83+
);
7184
}
7285
execute_command(&license_command, false).await;
7386
// Read JSON file and parse data
74-
let license_json = std::fs::read_to_string(format!("{}/{}", folder_path, random_file_name)).unwrap();
87+
let license_json =
88+
std::fs::read_to_string(format!("{}/{}", folder_path, random_file_name)).unwrap();
7589
let json_data = serde_json::from_str::<serde_json::Value>(&license_json).unwrap();
7690
// extract license data from "components" key there will be list of components so grab licenses from there
7791
let components = json_data["components"].as_array().unwrap();
@@ -87,8 +101,14 @@ impl LicenseTool {
87101
license_names.push(license["id"].as_str().unwrap().to_string());
88102
}
89103
}
90-
component_licenses.insert(format!("{}@{}", component_name, component_version), license_names);
91-
manifest_license.insert(format!("{}/{}", folder_path, file_name), component_licenses.clone());
104+
component_licenses.insert(
105+
format!("{}@{}", component_name, component_version),
106+
license_names,
107+
);
108+
manifest_license.insert(
109+
format!("{}/{}", folder_path, file_name),
110+
component_licenses.clone(),
111+
);
92112
}
93113
}
94114
// save data in output.json and before that get json data from output.json file if it exists and then append new data to it
@@ -99,10 +119,17 @@ impl LicenseTool {
99119
output_json = serde_json::from_str::<serde_json::Value>(&output_json_data).unwrap();
100120
}
101121
output_json["license"] = json!(manifest_license);
102-
std::fs::write("/tmp/output.json", serde_json::to_string_pretty(&output_json).unwrap()).unwrap();
122+
std::fs::write(
123+
"/tmp/output.json",
124+
serde_json::to_string_pretty(&output_json).unwrap(),
125+
)
126+
.unwrap();
103127
let end_time = Instant::now();
104128
let elapsed_time = end_time - start_time;
105129
let elapsed_seconds = elapsed_time.as_secs_f64().round();
106-
println!("Execution time for License Compliance scan: {:?} seconds", elapsed_seconds);
130+
println!(
131+
"Execution time for License Compliance scan: {:?} seconds",
132+
elapsed_seconds
133+
);
107134
}
108-
}
135+
}

‎src/scans/tools/sast_tool.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ impl SastTool {
2121
) {
2222
let start_time = Instant::now();
2323
if verbose {
24-
println!("[+] Running SAST scan on path: {}", _path.clone());
24+
println!("[+] Running SAST scan on path: {}", _path);
2525
}
2626
println!("Commit ID: {:?}", _commit_id);
2727
println!("Branch: {:?}", _branch);
@@ -54,7 +54,7 @@ impl SastTool {
5454
if verbose {
5555
println!("[+] Copying project to /tmp/app...");
5656
}
57-
let copy_command = format!("cp -r {} /tmp/app", _path.clone());
57+
let copy_command = format!("cp -r {} /tmp/app", _path);
5858
execute_command(&copy_command, true).await;
5959
}
6060
}

‎src/scans/tools/sca_tool.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use std::{collections::HashMap, fs, time::Instant};
33
use serde_json::{json, Value};
44

55
use crate::utils::{
6-
common::{checkout, execute_command, post_json_data},
6+
common::{checkout, execute_command},
77
file_utils::find_files_recursively,
88
};
99

@@ -251,7 +251,7 @@ impl ScaTool {
251251
if verbose {
252252
println!("[+] Copying project to /tmp/app...");
253253
}
254-
let copy_command = format!("cp -r {} /tmp/app", _path.clone());
254+
let copy_command = format!("cp -r {} /tmp/app", _path);
255255
execute_command(&copy_command, true).await;
256256
}
257257
}

‎src/scans/tools/secret_tool.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ impl SecretTool {
3232
if verbose {
3333
println!("[+] Copying project to /tmp/app...");
3434
}
35-
let copy_command = format!("cp -r {} /tmp/app", _path.clone());
35+
let copy_command = format!("cp -r {} /tmp/app", _path);
3636
execute_command(&copy_command, true).await;
3737
}
3838
}

‎src/utils/common.rs

+173-128
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,23 @@ use futures::StreamExt;
44
use mongodb::{
55
bson::{doc, Bson, Document},
66
error::Error,
7-
options::{ClientOptions, FindOptions},
7+
options::ClientOptions,
88
Client, Collection,
99
};
1010

1111
use chrono::Utc;
1212
use regex::Regex;
13-
use reqwest::header::{HeaderMap, HeaderValue};
1413
use serde_json::Value;
1514
use sha2::{Digest, Sha256};
16-
use std::fs::{self, File};
17-
use std::io::Read;
18-
use std::path::{Path, PathBuf};
19-
use std::process::Stdio;
15+
use std::io::{Read, Write};
16+
use std::path::Path;
2017
use std::time::Duration;
2118
use std::{collections::HashMap, process::Command};
2219
use std::{collections::HashSet, env};
20+
use std::{
21+
fs::{self, File},
22+
path::PathBuf,
23+
};
2324

2425
// define static exit codes and message
2526
pub const EXIT_CODE_LICENSE_FAILED: i32 = 101;
@@ -47,7 +48,7 @@ pub async fn upload_to_defect_dojo(
4748
engagement_name: &str,
4849
filename: &str,
4950
) -> Result<(), reqwest::Error> {
50-
let mut file = File::open(filename.clone()).unwrap();
51+
let mut file = File::open(filename).unwrap();
5152

5253
let mut buffer = Vec::new();
5354
file.read_to_end(&mut buffer).unwrap();
@@ -142,7 +143,7 @@ pub fn redact_github_token(input: &str) -> String {
142143

143144
async fn connect_to_mongodb(
144145
mongo_uri: &str,
145-
db_name: &str,
146+
_db_name: &str,
146147
) -> Result<Client, mongodb::error::Error> {
147148
let client_options = ClientOptions::parse(mongo_uri).await?;
148149
let client = Client::with_options(client_options)?;
@@ -266,159 +267,203 @@ pub async fn execute_command(command: &str, suppress_error: bool) -> String {
266267
stdout.to_string()
267268
}
268269

269-
pub fn checkout(
270-
clone_url: &str,
271-
clone_path: &str,
272-
branch: Option<&str>,
273-
pr_branch: Option<&str>,
274-
) -> Result<(), Box<dyn std::error::Error>> {
275-
// Clone the repository; use the specified branch or default branch if `branch` is None
276-
let mut clone_cmd = Command::new("git");
277-
clone_cmd.arg("clone").arg(clone_url).arg(clone_path);
278-
if let Some(branch_name) = branch {
279-
clone_cmd.arg("--branch").arg(branch_name);
280-
}
281-
let output = clone_cmd.output()?;
282-
if !output.status.success() {
283-
let error_msg = String::from_utf8_lossy(&output.stderr);
284-
return Err(format!("Failed to clone repository: {}", error_msg).into());
285-
}
270+
fn delete_except(files: &[String], base_dir: &Path) -> Result<(), Box<dyn std::error::Error>> {
271+
println!("Deleting all files except the following:");
272+
println!("__________________________________________ {:?}", files);
273+
let files_to_keep: Vec<PathBuf> = files
274+
.iter()
275+
.map(|file| base_dir.join(file.trim()))
276+
.collect();
286277

287-
// Set the working directory to the cloned path
288-
let cloned_path = Path::new(clone_path).canonicalize()?;
289-
let repo_path = cloned_path.to_str().unwrap();
290-
env::set_current_dir(&cloned_path)?;
278+
traverse_and_delete(base_dir, &files_to_keep)?;
291279

292-
// Configure Git user for commits in this repository
293-
Command::new("git")
294-
.args(&["config", "user.email", "ci@example.com"])
295-
.output()?;
296-
Command::new("git")
297-
.args(&["config", "user.name", "CI Bot"])
298-
.output()?;
280+
Ok(())
281+
}
299282

300-
// Store the set of changed files
301-
let mut changed_files = HashSet::new();
283+
fn traverse_and_delete(base_dir: &Path, files_to_keep: &[PathBuf]) -> Result<(), std::io::Error> {
284+
for entry in fs::read_dir(base_dir)? {
285+
let entry = entry?;
286+
let path = entry.path();
302287

303-
// If a pr_branch is provided, fetch it as a local branch and compare with the base branch
304-
if let Some(pr_branch_name) = pr_branch {
305-
// Fetch the PR branch and create a local branch
306-
let fetch_output = Command::new("git")
307-
.args(&[
308-
"fetch",
309-
"origin",
310-
&format!("{}:{}", pr_branch_name, pr_branch_name),
311-
])
312-
.output()?;
313-
if !fetch_output.status.success() {
314-
let error_msg = String::from_utf8_lossy(&fetch_output.stderr);
315-
return Err(format!(
316-
"Failed to fetch PR branch '{}': {}",
317-
pr_branch_name, error_msg
318-
)
319-
.into());
288+
// Skip the .git directory
289+
if path.is_dir() && path.file_name().map_or(false, |name| name == ".git") {
290+
continue;
320291
}
321292

322-
// Perform a diff between `branch` (or the default branch) and `pr_branch`
323-
let base_branch = branch.unwrap_or("HEAD");
324-
let diff_output = Command::new("git")
325-
.args(&["diff", "--name-only", base_branch, pr_branch_name])
326-
.output()?;
327-
328-
if !diff_output.status.success() {
329-
let error_msg = String::from_utf8_lossy(&diff_output.stderr);
330-
return Err(format!("Failed to diff branches: {}", error_msg).into());
293+
if path.is_dir() {
294+
traverse_and_delete(&path, files_to_keep)?;
331295
}
332296

333-
// Parse the diff output into a set of changed files
334-
let diff_output_str = String::from_utf8_lossy(&diff_output.stdout);
335-
for line in diff_output_str.lines() {
336-
changed_files.insert(line.trim().to_string());
297+
// Check if the path should be deleted (only delete files)
298+
if path.is_file() && !files_to_keep.contains(&path.canonicalize()?) {
299+
fs::remove_file(&path)?;
337300
}
338-
} else {
339-
// If no PR branch, list all files in the base branch
340-
let list_output = Command::new("git")
341-
.args(&["ls-tree", "-r", "--name-only", "HEAD"])
342-
.output()?;
301+
}
302+
303+
Ok(())
304+
}
305+
306+
fn delete_empty_directories(start_dir: &Path) -> Result<(), std::io::Error> {
307+
for entry in fs::read_dir(start_dir)? {
308+
let entry = entry?;
309+
let path = entry.path();
343310

344-
if !list_output.status.success() {
345-
let error_msg = String::from_utf8_lossy(&list_output.stderr);
346-
return Err(format!("Failed to list files in base branch: {}", error_msg).into());
311+
// Skip the .git directory
312+
if path.is_dir() && path.file_name().map_or(false, |name| name == ".git") {
313+
continue;
347314
}
348315

349-
// Parse the list output into a set of files
350-
let list_output_str = String::from_utf8_lossy(&list_output.stdout);
351-
for line in list_output_str.lines() {
352-
changed_files.insert(line.trim().to_string());
316+
if path.is_dir() {
317+
delete_empty_directories(&path)?;
318+
if fs::read_dir(&path)?.next().is_none() {
319+
fs::remove_dir(&path)?;
320+
}
353321
}
354322
}
355323

356-
// Print the changed files for debugging purposes
357-
println!("Changed files:\n{:#?}", changed_files);
324+
Ok(())
325+
}
358326

359-
// Ensure the working directory is up-to-date before checking out files
360-
Command::new("git")
361-
.args(&["checkout", pr_branch.unwrap_or("HEAD")])
362-
.output()?;
327+
fn get_cumulative_pr_files(
328+
base_branch: Option<&str>,
329+
pr_branch: Option<&str>,
330+
) -> Result<Vec<String>, Box<dyn std::error::Error>> {
331+
if let Some(pr) = pr_branch {
332+
// If base branch is provided, merge it into a temp branch
333+
if let Some(base) = base_branch {
334+
// Step 1: Checkout the base branch
335+
Command::new("git").args(&["checkout", base]).output()?;
336+
337+
// Step 2: Create a temporary merge branch
338+
Command::new("git")
339+
.args(&["checkout", "-b", "temp_pr_merge_branch", base])
340+
.output()?;
341+
342+
// Step 3: Merge the PR branch into the temporary branch
343+
let merge_output = Command::new("git")
344+
.args(&["merge", "--no-ff", &format!("origin/{}", pr)])
345+
.output()?;
346+
if !merge_output.status.success() {
347+
let error_msg = String::from_utf8_lossy(&merge_output.stderr);
348+
return Err(format!("Failed to merge PR branch: {}", error_msg).into());
349+
}
363350

364-
// Ensure each changed file is checked out from the PR branch
365-
for file in &changed_files {
366-
let checkout_output = Command::new("git")
367-
.args(&["checkout", pr_branch.unwrap_or("HEAD"), "--", file])
368-
.output()?;
351+
// Step 4: Get the list of changed files between base and temp PR branch
352+
let diff_output = Command::new("git")
353+
.args(&["diff", "--name-only", base, "temp_pr_merge_branch"])
354+
.output()?;
355+
if !diff_output.status.success() {
356+
let error_msg = String::from_utf8_lossy(&diff_output.stderr);
357+
return Err(format!("Failed to get changed files: {}", error_msg).into());
358+
}
359+
360+
let changed_files: Vec<String> = String::from_utf8_lossy(&diff_output.stdout)
361+
.lines()
362+
.map(String::from)
363+
.collect();
364+
365+
// No cleanup: Stay on the temporary branch to get the PR branch content
366+
367+
Ok(changed_files)
368+
} else {
369+
// If only PR branch is provided, just get the list of files in that branch
370+
let diff_output = Command::new("git")
371+
.args(&["ls-tree", "-r", "--name-only", &format!("origin/{}", pr)])
372+
.output()?;
373+
if !diff_output.status.success() {
374+
let error_msg = String::from_utf8_lossy(&diff_output.stderr);
375+
return Err(format!("Failed to list files in PR branch: {}", error_msg).into());
376+
}
369377

370-
if !checkout_output.status.success() {
371-
let error_msg = String::from_utf8_lossy(&checkout_output.stderr);
372-
println!("Failed to checkout file '{}': {}", file, error_msg);
378+
let changed_files: Vec<String> = String::from_utf8_lossy(&diff_output.stdout)
379+
.lines()
380+
.map(String::from)
381+
.collect();
382+
Ok(changed_files)
373383
}
384+
} else {
385+
Err("PR branch is required to fetch changes.".into())
374386
}
387+
}
388+
389+
fn save_pr_branch_files(
390+
changed_files: &[String],
391+
pr_branch: &str,
392+
) -> Result<(), Box<dyn std::error::Error>> {
393+
for file in changed_files {
394+
let file_content = Command::new("git")
395+
.args(&["show", &format!("origin/{}:{}", pr_branch, file)])
396+
.output()?;
397+
if !file_content.status.success() {
398+
let error_msg = String::from_utf8_lossy(&file_content.stderr);
399+
return Err(format!("Failed to get content of file {}: {}", file, error_msg).into());
400+
}
375401

376-
// Remove all files not in the `changed_files` set
377-
remove_unwanted_files(repo_path, &changed_files)?;
402+
let file_path = Path::new(file);
403+
if let Some(parent) = file_path.parent() {
404+
fs::create_dir_all(parent)?;
405+
}
378406

379-
println!("Only the changed files have been kept locally.");
407+
let mut file_handle = File::create(file_path)?;
408+
file_handle.write_all(&file_content.stdout)?;
409+
}
380410

381411
Ok(())
382412
}
383413

384-
/// Removes all files that are not in the `files_to_keep` set, but preserves directories.
385-
///
386-
/// # Arguments
387-
///
388-
/// * `repo_path` - The path of the repository.
389-
/// * `files_to_keep` - A set of file paths to keep relative to the `repo_path`.
390-
fn remove_unwanted_files(
391-
repo_path: &str,
392-
files_to_keep: &HashSet<String>,
414+
pub fn checkout(
415+
clone_url: &str,
416+
clone_path: &str,
417+
base_branch: Option<&str>,
418+
pr_branch: Option<&str>,
393419
) -> Result<(), Box<dyn std::error::Error>> {
394-
// Recursively remove unwanted files
395-
for entry in fs::read_dir(repo_path)? {
396-
let entry = entry?;
397-
let path = entry.path();
420+
// Step 1: Clone the repository
421+
let mut clone_cmd = Command::new("git");
422+
clone_cmd.arg("clone").arg(clone_url).arg(clone_path);
423+
if let Some(branch) = base_branch {
424+
clone_cmd.arg("--branch").arg(branch);
425+
}
398426

399-
// Skip the .git directory to preserve repository integrity
400-
if path.is_dir() && path.file_name().map_or(false, |name| name == ".git") {
401-
continue;
402-
}
427+
let output = clone_cmd.output()?;
428+
if !output.status.success() {
429+
let error_msg = String::from_utf8_lossy(&output.stderr);
430+
return Err(format!("Failed to clone repository: {}", error_msg).into());
431+
}
403432

404-
// Determine the relative path
405-
let relative_path = path.strip_prefix(repo_path)?.to_str().unwrap().to_string();
433+
let cloned_path = Path::new(clone_path).canonicalize()?;
434+
env::set_current_dir(&cloned_path)?;
406435

407-
// Check if the file should be kept or removed
408-
if path.is_file() && !files_to_keep.contains(&relative_path) {
409-
println!("Removing file: {}", relative_path);
410-
fs::remove_file(&path)?;
411-
} else if path.is_dir() {
412-
// Recursively clean up subdirectories
413-
remove_unwanted_files(path.to_str().unwrap(), files_to_keep)?;
436+
// Fetch the PR branch
437+
if let Some(pr) = pr_branch {
438+
let fetch_output = Command::new("git")
439+
.args(&["fetch", "origin", pr])
440+
.output()?;
441+
if !fetch_output.status.success() {
442+
let error_msg = String::from_utf8_lossy(&fetch_output.stderr);
443+
return Err(format!("Failed to fetch PR branch: {}", error_msg).into());
444+
}
445+
}
414446

415-
// Check if the directory is empty and remove it
416-
if fs::read_dir(&path)?.next().is_none() {
417-
println!("Removing empty directory: {}", relative_path);
418-
fs::remove_dir(&path)?;
419-
}
447+
// Get the list of changed files
448+
let changed_files = match (base_branch, pr_branch) {
449+
(Some(base), Some(pr)) => get_cumulative_pr_files(Some(base), Some(pr))?,
450+
(None, Some(pr)) => get_cumulative_pr_files(None, Some(pr))?,
451+
_ => {
452+
return Err("At least PR branch must be specified.".into());
420453
}
454+
};
455+
456+
println!("Changed files:\n{:?}", changed_files);
457+
458+
// Save the content of the changed files from the PR branch
459+
if let Some(pr) = pr_branch {
460+
save_pr_branch_files(&changed_files, pr)?;
421461
}
462+
463+
// Now proceed with deletion based on the changed files
464+
delete_except(&changed_files, &cloned_path)?;
465+
delete_empty_directories(&cloned_path)?;
466+
422467
Ok(())
423468
}
424469

0 commit comments

Comments
 (0)
Please sign in to comment.