diff --git a/.goreleaser.yml b/.goreleaser.yml new file mode 100644 index 0000000..77a9033 --- /dev/null +++ b/.goreleaser.yml @@ -0,0 +1,35 @@ +# This is an example goreleaser.yaml file with some sane defaults. +# Make sure to check the documentation at http://goreleaser.com +before: + hooks: + # You may remove this if you don't use go modules. + - go mod download + # you may remove this if you don't need go generate + - go generate ./... +builds: + - env: + - CGO_ENABLED=0 + goos: + - linux + - windows + - darwin +archives: + - replacements: + darwin: macOS + linux: Linux + windows: Windows + 386: i386 + format: tar.gz + format_overrides: + - goos: windows + format: zip +checksum: + name_template: 'checksums.txt' +snapshot: + name_template: "{{ .Tag }}-next" +changelog: + sort: asc + filters: + exclude: + - '^docs:' + - '^test:' diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..3e5f6a4 --- /dev/null +++ b/go.mod @@ -0,0 +1,8 @@ +module github.com/utkusen/urlhunter + +go 1.15 + +require ( + github.com/fatih/color v1.10.0 + github.com/schollz/progressbar/v3 v3.7.1 +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..62e32a8 --- /dev/null +++ b/go.sum @@ -0,0 +1,30 @@ +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/fatih/color v1.10.0 h1:s36xzo75JdqLaaWoiEHk767eHiwo0598uUxyfiPkDsg= +github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM= +github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw= +github.com/mattn/go-colorable v0.1.8 h1:c1ghPdyEDarC70ftn0y+A/Ee++9zz8ljHG1b13eJ0s8= +github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= +github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHXY= +github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= +github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0= +github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= +github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= +github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/schollz/progressbar v1.0.0 h1:gbyFReLHDkZo8mxy/dLWMr+Mpb1MokGJ1FqCiqacjZM= +github.com/schollz/progressbar/v3 v3.7.1 h1:aQR/t6d+1nURSdoMn6c7n0vJi5xQ3KndpF0n7R5wrik= +github.com/schollz/progressbar/v3 v3.7.1/go.mod h1:CG/f0JmacksUc6TkZToO7tVq4t03zIQSQUtTd7F9GR4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20201112155050-0c6587e931a9 h1:umElSU9WZirRdgu2yFHY0ayQkEnKiOC1TtM3fWXFnoU= +golang.org/x/crypto v0.0.0-20201112155050-0c6587e931a9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201113135734-0a15ea8d9b02 h1:5Ftd3YbC/kANXWCBjvppvUmv1BMakgFcBKA7MpYYp4M= +golang.org/x/sys v0.0.0-20201113135734-0a15ea8d9b02/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/main.go b/main.go new file mode 100644 index 0000000..7b327a8 --- /dev/null +++ b/main.go @@ -0,0 +1,361 @@ +package main + +import ( + "archive/zip" + "bufio" + "bytes" + "encoding/json" + "flag" + "fmt" + "io" + "io/ioutil" + "net/http" + "os" + "os/exec" + "path/filepath" + "regexp" + "strings" + "time" + + "github.com/fatih/color" + "github.com/schollz/progressbar/v3" +) + +var baseurl string = "https://archive.org/services/search/v1/scrape?debug=false&xvar=production&total_only=false&count=10000&fields=identifier%2Citem_size&q=Urlteam%20Release" + +func main() { + keywordFile := flag.String("keywords", "", "A txt file that contains strings to search.") + dateParam := flag.String("date", "", "A single date or a range to search. Single: YYYY-MM-DD Range:YYYY-MM-DD:YYYY-MM-DD") + outFile := flag.String("o", "", "Output file") + flag.Parse() + if *keywordFile == "" || *dateParam == "" || *outFile == "" { + color.Red("Please specify all arguments!") + flag.PrintDefaults() + return + } + fmt.Println(` + o Utku Sen's + \_/\o + ( Oo) \|/ + (_=-) .===O- ~~U~R~L~~ -O- + / \_/U' hunter /|\ + || |_/ + \\ | utkusen.com + {K || twitter.com/utkusen + + `) + _ = os.Mkdir("archives", os.ModePerm) + if strings.Contains(*dateParam, ":") { + startDate, err := time.Parse("2006-01-02", strings.Split(*dateParam, ":")[0]) + if err != nil { + color.Red("Wrong date format!") + return + } + endDate, err := time.Parse("2006-01-02", strings.Split(*dateParam, ":")[1]) + if err != nil { + color.Red("Wrong date format!") + return + } + for rd := rangeDate(startDate, endDate); ; { + date := rd() + if date.IsZero() { + break + } + getArchive(getArchiveList(), string(date.Format("2006-01-02")), *keywordFile, *outFile) + } + } else { + if *dateParam != "latest" { + _, err := time.Parse("2006-01-02", *dateParam) + if err != nil { + color.Red("Wrong date format!") + return + } + } + + getArchive(getArchiveList(), *dateParam, *keywordFile, *outFile) + } + color.Green("Search complete!") +} + +func getArchiveList() []byte { + resp, err := http.Get(baseurl) + if err != nil { + panic(err) + } + defer resp.Body.Close() + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + panic(err) + } + return body +} + +func getArchive(body []byte, date string, keywordFile string, outfile string) { + fmt.Println("Search starting for: " + date) + type Response struct { + Items []struct { + Identifier string `json:"identifier"` + ItemSize int64 `json:"item_size"` + } `json:"items"` + Count int `json:"count"` + Total int `json:"total"` + } + var response Response + json.Unmarshal(body, &response) + flag := false + var fullname string + if date == "latest" { + fullname = response.Items[len(response.Items)-1].Identifier + flag = true + } else { + for i := 0; i < len(response.Items); i++ { + if strings.Contains(response.Items[i].Identifier, date) { + fullname = response.Items[i].Identifier + flag = true + break + } + } + } + + if flag == false { + color.Red("Couldn't find an archive with that date!") + return + } + if ifArchiveExists(fullname) { + color.Cyan(fullname + " Archive already exists!") + } else { + _ = os.Remove("archives/" + fullname + "/goo-gl/______.txt") + _ = os.Remove("archives/" + fullname + "/bitly_6/______.txt") + googfile := "goo-gl." + strings.Split(fullname, "_")[1] + ".zip" + bitfile := "bitly_6." + strings.Split(fullname, "_")[1] + ".zip" + if fileExists("archives/"+fullname+"/"+googfile) == false { + color.Red(googfile + " doesn't exists locally.") + url1 := "https://archive.org/download/" + fullname + "/" + googfile + downloadFile(url1) + } + if fileExists("archives/"+fullname+"/"+bitfile) == false { + color.Red(bitfile + " doesn't exists locally.") + url2 := "https://archive.org/download/" + fullname + "/" + bitfile + downloadFile(url2) + } + color.Magenta("Unzipping: " + googfile) + _, err := Unzip("archives/"+fullname+"/"+googfile, "archives/"+fullname) + if err != nil { + color.Red(googfile + " looks damaged. It's removed now. Run the program again to re-download.") + os.Remove("archives/" + fullname + "/" + googfile) + os.Exit(1) + } + color.Magenta("Unzipping: " + bitfile) + _, err = Unzip("archives/"+fullname+"/"+bitfile, "archives/"+fullname) + if err != nil { + color.Red(bitfile + " looks damaged. It's removed. Run the program again.") + os.Remove("archives/" + fullname + "/" + bitfile) + os.Exit(1) + } + color.Cyan("Decompressing XZ Archives..") + _, err = exec.Command("xz", "--decompress", "archives/"+fullname+"/goo-gl/______.txt.xz").Output() + if err != nil { + panic(err) + } + _, err = exec.Command("xz", "--decompress", "archives/"+fullname+"/bitly_6/______.txt.xz").Output() + if err != nil { + panic(err) + } + color.Cyan("Removing Zip Files..") + _ = os.Remove("archives/" + fullname + "/" + googfile) + _ = os.Remove("archives/" + fullname + "/" + bitfile) + } + fileBytes, err := ioutil.ReadFile(keywordFile) + if err != nil { + panic(err) + } + keywordSlice := strings.Split(string(fileBytes), "\n") + for i := 0; i < len(keywordSlice); i++ { + if keywordSlice[i] == "" { + continue + } + searchFile("archives/"+fullname+"/goo-gl/______.txt", keywordSlice[i], outfile) + searchFile("archives/"+fullname+"/bitly_6/______.txt", keywordSlice[i], outfile) + } + +} + +func searchFile(fileLocation string, keyword string, outfile string) { + path := strings.Split(fileLocation, "/")[1] + "/" + strings.Split(fileLocation, "/")[2] + fmt.Println("Searching: " + keyword + " in: " + path) + f, err := os.Open(fileLocation) + scanner := bufio.NewScanner(f) + if err != nil { + panic(err) + } + defer f.Close() + f, err = os.OpenFile(outfile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + panic(err) + } + defer f.Close() + if strings.HasPrefix(keyword, "regex") { + regexValue := strings.Split(keyword, " ")[1] + r, err := regexp.Compile(regexValue) + if err != nil { + color.Red("Invalid Regex!") + return + } + for scanner.Scan() { + if r.MatchString(scanner.Text()) { + textToWrite := strings.Split(scanner.Text(), "|")[1] + if _, err := f.WriteString(textToWrite + "\n"); err != nil { + panic(err) + } + } + } + } else { + if strings.Contains(keyword, ",") { + keywords := strings.Split(keyword, ",") + for scanner.Scan() { + foundFlag := true + for i := 0; i < len(keywords); i++ { + if bytes.Contains(scanner.Bytes(), []byte(keywords[i])) { + continue + } else { + foundFlag = false + } + } + if foundFlag == true { + textToWrite := strings.Split(scanner.Text(), "|")[1] + if _, err := f.WriteString(textToWrite + "\n"); err != nil { + panic(err) + } + } + } + + } else { + toFind := []byte(keyword) + for scanner.Scan() { + if bytes.Contains(scanner.Bytes(), toFind) { + textToWrite := strings.Split(scanner.Text(), "|")[1] + if _, err := f.WriteString(textToWrite + "\n"); err != nil { + panic(err) + } + } + } + } + } + +} + +func ifArchiveExists(fullname string) bool { + googtxt := "archives/" + fullname + "/goo-gl/______.txt" + bittxt := "archives/" + fullname + "/bitly_6/______.txt" + googflag := fileExists(googtxt) + bitflag := fileExists(bittxt) + if googflag == false || bitflag == false { + return false + } else { + return true + } +} + +func fileExists(filename string) bool { + info, err := os.Stat(filename) + if os.IsNotExist(err) { + return false + } + return !info.IsDir() +} + +func downloadFile(url string) { + dirname := strings.Split(url, "/")[4] + filename := strings.Split(url, "/")[5] + fmt.Println("Downloading: " + url) + _ = os.MkdirAll("archives/"+dirname, os.ModePerm) + req, err := http.NewRequest("GET", url, nil) + if err != nil { + panic(err) + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + panic(err) + } + defer resp.Body.Close() + f, err := os.OpenFile("archives/"+dirname+"/"+filename, os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + panic(err) + } + defer f.Close() + bar := progressbar.DefaultBytes( + resp.ContentLength, + "", + ) + io.Copy(io.MultiWriter(f, bar), resp.Body) + color.Green("Download Finished!") +} + +func ByteCountSI(b int64) string { + const unit = 1000 + if b < unit { + return fmt.Sprintf("%d B", b) + } + div, exp := int64(unit), 0 + for n := b / unit; n >= unit; n /= unit { + div *= unit + exp++ + } + return fmt.Sprintf("%.1f %cB", + float64(b)/float64(div), "kMGTPE"[exp]) +} + +func Unzip(src string, dest string) ([]string, error) { + var filenames []string + r, err := zip.OpenReader(src) + if err != nil { + return filenames, err + } + defer r.Close() + for _, f := range r.File { + fpath := filepath.Join(dest, f.Name) + if !strings.HasPrefix(fpath, filepath.Clean(dest)+string(os.PathSeparator)) { + return filenames, fmt.Errorf("%s: illegal file path", fpath) + } + + filenames = append(filenames, fpath) + if f.FileInfo().IsDir() { + os.MkdirAll(fpath, os.ModePerm) + continue + } + if err = os.MkdirAll(filepath.Dir(fpath), os.ModePerm); err != nil { + return filenames, err + } + outFile, err := os.OpenFile(fpath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode()) + if err != nil { + return filenames, err + } + rc, err := f.Open() + if err != nil { + return filenames, err + } + _, err = io.Copy(outFile, rc) + outFile.Close() + rc.Close() + if err != nil { + return filenames, err + } + } + return filenames, nil +} + +func rangeDate(start, end time.Time) func() time.Time { + y, m, d := start.Date() + start = time.Date(y, m, d, 0, 0, 0, 0, time.UTC) + y, m, d = end.Date() + end = time.Date(y, m, d, 0, 0, 0, 0, time.UTC) + + return func() time.Time { + if start.After(end) { + return time.Time{} + } + date := start + start = start.AddDate(0, 0, 1) + return date + } +}