diff --git a/README.md b/README.md index f37cc46..4d7b585 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ You can download the pre-built binaries from the [releases](https://github.com/u urlhunter requires 3 parameters to run: `-keywords`, `-date` and `-o`. -For example: `urlhunter -keywords keywords.txt -date 2020-11-20 -o out.txt` +For example: `urlhunter --keywords keywords.txt --date 2020-11-20 --o out.txt` ### --keywords diff --git a/go.mod b/go.mod index 3e5f6a4..10c7a2a 100644 --- a/go.mod +++ b/go.mod @@ -4,5 +4,6 @@ go 1.15 require ( github.com/fatih/color v1.10.0 + github.com/rzhade3/beaconspec v0.0.0-20220908173914-b45182d7ddf3 github.com/schollz/progressbar/v3 v3.7.1 ) diff --git a/go.sum b/go.sum index 62e32a8..3c370b6 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,5 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/fatih/color v1.10.0 h1:s36xzo75JdqLaaWoiEHk767eHiwo0598uUxyfiPkDsg= github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM= @@ -11,11 +12,14 @@ github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/Qd github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/schollz/progressbar v1.0.0 h1:gbyFReLHDkZo8mxy/dLWMr+Mpb1MokGJ1FqCiqacjZM= +github.com/rzhade3/beaconspec v0.0.0-20220908173914-b45182d7ddf3 h1:2YkbhM98YoshI0K0BD95IoCFx+KNN1L/G0P5WzY2kac= +github.com/rzhade3/beaconspec v0.0.0-20220908173914-b45182d7ddf3/go.mod h1:iTcJ+0KrnJXKBZvYH/Q6GKLhFuiXzD3z2PRae7xWqpY= github.com/schollz/progressbar/v3 v3.7.1 h1:aQR/t6d+1nURSdoMn6c7n0vJi5xQ3KndpF0n7R5wrik= github.com/schollz/progressbar/v3 v3.7.1/go.mod h1:CG/f0JmacksUc6TkZToO7tVq4t03zIQSQUtTd7F9GR4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20201112155050-0c6587e931a9 h1:umElSU9WZirRdgu2yFHY0ayQkEnKiOC1TtM3fWXFnoU= diff --git a/main.go b/main.go index 76c6fe9..ac11892 100644 --- a/main.go +++ b/main.go @@ -19,6 +19,7 @@ import ( "time" "github.com/fatih/color" + "github.com/rzhade3/beaconspec" "github.com/schollz/progressbar/v3" ) @@ -62,7 +63,6 @@ var err error var archivesPath string func main() { - var keywordFile string var dateParam string var outFile string @@ -80,7 +80,6 @@ func main() { flag.Usage = func() { fmt.Print(usage) } flag.Parse() - if keywordFile == "" || dateParam == "" || outFile == "" { crash("You must specify all arguments.", err) return @@ -191,7 +190,7 @@ func getArchive(body []byte, date string, keywordFile string, outfile string) { _, err := Unzip(filepath.Join(archivesPath, fullname, item.Name), filepath.Join(archivesPath, fullname)) if err != nil { os.Remove(filepath.Join(archivesPath, fullname, item.Name)) - crash(item.Name + " looks damaged. It's removed now. Run the program again to re-download.", err) + crash(item.Name+" looks damaged. It's removed now. Run the program again to re-download.", err) } } @@ -227,10 +226,9 @@ func getArchive(body []byte, date string, keywordFile string, outfile string) { } func searchFile(fileLocation string, keyword string, outfile string) { - var path string - if strings.HasPrefix(fileLocation, "archives"){ + if strings.HasPrefix(fileLocation, "archives") { path_parts := strings.Split(fileLocation, string(os.PathSeparator)) path = filepath.Join(path_parts[1], path_parts[2]) } else { @@ -250,54 +248,71 @@ func searchFile(fileLocation string, keyword string, outfile string) { panic(err) } defer f.Close() + + metadata, err := beaconspec.ReadMetadata(fileLocation) + if err != nil { + warning(err.Error()) + return + } + + var matcher func([]byte) bool if strings.HasPrefix(keyword, "regex") { - regexValue := strings.Split(keyword, " ")[1] - r, err := regexp.Compile(regexValue) - if err != nil { - warning("Invalid Regex!") - return - } - for scanner.Scan() { - if r.MatchString(scanner.Text()) { - textToWrite := strings.Split(scanner.Text(), "|")[1] - if _, err := f.WriteString(textToWrite + "\n"); err != nil { - panic(err) - } - } - } + matcher, err = regexMatch(keyword) + } else if strings.Contains(keyword, ",") { + matcher, err = multiKeywordMatcher(keyword) } else { - if strings.Contains(keyword, ",") { - keywords := strings.Split(keyword, ",") - for scanner.Scan() { - foundFlag := true - for i := 0; i < len(keywords); i++ { - if bytes.Contains(scanner.Bytes(), []byte(keywords[i])) { - continue - } else { - foundFlag = false - } - } - if foundFlag { - textToWrite := strings.Split(scanner.Text(), "|")[1] - if _, err := f.WriteString(textToWrite + "\n"); err != nil { - panic(err) - } - } - } + matcher, err = stringMatch(keyword) + } + if err != nil { + warning(err.Error()) + return + } + + for scanner.Scan() { + if matcher(scanner.Bytes()) { - } else { - toFind := []byte(keyword) - for scanner.Scan() { - if bytes.Contains(scanner.Bytes(), toFind) { - textToWrite := strings.Split(scanner.Text(), "|")[1] - if _, err := f.WriteString(textToWrite + "\n"); err != nil { - panic(err) - } - } + line, err := beaconspec.ParseLine(scanner.Text(), metadata) + if err != nil { + panic(err) + } + textToWrite := fmt.Sprintf("%s,%s\n", line.Source, line.Target) + if _, err := f.WriteString(textToWrite); err != nil { + panic(err) } } } +} + +func regexMatch(keyword string) (func([]byte) bool, error) { + regexValue := strings.Split(keyword, " ")[1] + r, err := regexp.Compile(regexValue) + return func(b []byte) bool { + s := string(b) + return r.MatchString(s) + }, err +} +func multiKeywordMatcher(keyword string) (func([]byte) bool, error) { + keywords := strings.Split(keyword, ",") + bytes_keywords := make([][]byte, len(keywords)) + for i, k := range keywords { + bytes_keywords[i] = []byte(k) + } + return func(text []byte) bool { + for _, k := range bytes_keywords { + if !bytes.Contains(text, k) { + return false + } + } + return true + }, nil +} + +func stringMatch(keyword string) (func([]byte) bool, error) { + bytes_keyword := []byte(keyword) + return func(b []byte) bool { + return bytes.Contains(b, bytes_keyword) + }, nil } func ifArchiveExists(fullname string) bool { @@ -367,20 +382,6 @@ func downloadFile(url string) { color.Green("Download Finished!") } -func ByteCountSI(b int64) string { - const unit = 1000 - if b < unit { - return fmt.Sprintf("%d B", b) - } - div, exp := int64(unit), 0 - for n := b / unit; n >= unit; n /= unit { - div *= unit - exp++ - } - return fmt.Sprintf("%.1f %cB", - float64(b)/float64(div), "kMGTPE"[exp]) -} - func Unzip(src string, dest string) ([]string, error) { var filenames []string r, err := zip.OpenReader(src) @@ -447,4 +448,4 @@ func crash(message string, err error) { func warning(message string) { color.Yellow("[WARNING]: " + message + "\n") -} \ No newline at end of file +}