Skip to content

Commit

Permalink
Added -a, --archives to specify a location to store the archives
Browse files Browse the repository at this point in the history
  • Loading branch information
ItsIgnacioPortal committed May 19, 2022
1 parent 47d4faf commit 161bbb2
Showing 1 changed file with 35 additions and 19 deletions.
54 changes: 35 additions & 19 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ type Files struct {
} `xml:"file"`
}

const usage = `Usage: ./urlhunter --keywords /path/to/keywordsFile --date DATE-RANGE-HERE --output /path/to/outputFile
const usage = `Usage: ./urlhunter --keywords /path/to/keywordsFile --date DATE-RANGE-HERE --output /path/to/outputFile [--archives /path/to/archives]
Example: ./urlhunter --keywords keywords.txt --date 2020-11-20 --output out.txt
-k, --keywords /path/to/keywordsFile
Path to a file that contains strings to search.
Expand All @@ -53,9 +53,13 @@ Example: ./urlhunter --keywords keywords.txt --date 2020-11-20 --output out.txt
-o, --output /path/to/outputFile
Path to a file where the output will be written.
-a, --archives /path/to/archives
Path to the directory where you're storing your archive files. If this is your first time running this tool, the archives will be downloaded on a new ./archives folder
`

var err error
var archivesPath string

func main() {

Expand All @@ -69,7 +73,9 @@ func main() {
flag.StringVar(&dateParam, "date", "", "A single date or a range to search. Single: YYYY-MM-DD Range:YYYY-MM-DD:YYYY-MM-DD")
flag.StringVar(&outFile, "o", "", "Output file")
flag.StringVar(&outFile, "output", "", "Output file")

flag.StringVar(&archivesPath, "a", "archives", "Archives file path")
flag.StringVar(&archivesPath, "archives", "archives", "Archives file path")

//https://www.antoniojgutierrez.com/posts/2021-05-14-short-and-long-options-in-go-flags-pkg/
flag.Usage = func() { fmt.Print(usage) }
flag.Parse()
Expand All @@ -79,6 +85,7 @@ func main() {
crash("You must specify all arguments.", err)
return
}

fmt.Println(`
o Utku Sen's
\_/\o
Expand All @@ -90,7 +97,7 @@ func main() {
{K || twitter.com/utkusen
`)
_ = os.Mkdir("archives", os.ModePerm)
_ = os.Mkdir(archivesPath, os.ModePerm)
if strings.Contains(dateParam, ":") {
startDate, err := time.Parse("2006-01-02", strings.Split(dateParam, ":")[0])
if err != nil {
Expand Down Expand Up @@ -169,28 +176,28 @@ func getArchive(body []byte, date string, keywordFile string, outfile string) {
info(fullname + " already exists locally. Skipping download..")
} else {
for _, item := range dumpFiles.File {
dumpFilepath, _ := filepath.Glob(filepath.Join("archives", fullname, item.DumpType, "*.txt"))
dumpFilepath, _ := filepath.Glob(filepath.Join(archivesPath, fullname, item.DumpType, "*.txt"))
if len(dumpFilepath) > 0 {
_ = os.Remove(dumpFilepath[0])
}

if !fileExists(filepath.Join("archives", fullname, item.Name)) {
if !fileExists(filepath.Join(archivesPath, fullname, item.Name)) {
info(item.Name + " doesn't exist locally. The file will be downloaded.")
url1 := "https://archive.org/download/" + fullname + "/" + item.Name
downloadFile(url1)
}

info("Unzipping: " + item.Name)
_, err := Unzip(filepath.Join("archives", fullname, item.Name), filepath.Join("archives", fullname))
_, err := Unzip(filepath.Join(archivesPath, fullname, item.Name), filepath.Join(archivesPath, fullname))
if err != nil {
os.Remove(filepath.Join("archives", fullname, item.Name))
os.Remove(filepath.Join(archivesPath, fullname, item.Name))
crash(item.Name + " looks damaged. It's removed now. Run the program again to re-download.", err)
}
}

info("Decompressing XZ Archives..")
for _, item := range dumpFiles.File {
tarfile, _ := filepath.Glob(filepath.Join("archives", fullname, item.DumpType, "*.txt.xz"))
tarfile, _ := filepath.Glob(filepath.Join(archivesPath, fullname, item.DumpType, "*.txt.xz"))
_, err := exec.Command("xz", "--decompress", tarfile[0]).Output()
if err != nil {
crash("Error decompressing the downloaded archives", err)
Expand All @@ -199,7 +206,7 @@ func getArchive(body []byte, date string, keywordFile string, outfile string) {

info("Removing Zip Files..")
for _, item := range dumpFiles.File {
_ = os.Remove(filepath.Join("archives", fullname, item.Name))
_ = os.Remove(filepath.Join(archivesPath, fullname, item.Name))
}
}
fileBytes, err := ioutil.ReadFile(keywordFile)
Expand All @@ -212,17 +219,26 @@ func getArchive(body []byte, date string, keywordFile string, outfile string) {
continue
}
for _, item := range dumpFiles.File {
dump_path, _ := filepath.Glob(filepath.Join("archives", fullname, item.DumpType, "*.txt"))
dump_path, _ := filepath.Glob(filepath.Join(archivesPath, fullname, item.DumpType, "*.txt"))
searchFile(dump_path[0], keywordSlice[i], outfile)
}
}

}

func searchFile(fileLocation string, keyword string, outfile string) {
path_parts := strings.Split(fileLocation, string(os.PathSeparator))
path := filepath.Join(path_parts[1], path_parts[2])
info("Searching: " + keyword + " in: " + path)

var path string

if strings.HasPrefix(fileLocation, "archives"){
path_parts := strings.Split(fileLocation, string(os.PathSeparator))
path = filepath.Join(path_parts[1], path_parts[2])
} else {
path = fileLocation
}

info("Searching: \"" + keyword + "\" in " + path)

f, err := os.Open(fileLocation)
scanner := bufio.NewScanner(f)
if err != nil {
Expand Down Expand Up @@ -287,7 +303,7 @@ func searchFile(fileLocation string, keyword string, outfile string) {
func ifArchiveExists(fullname string) bool {
dumpFiles := archiveMetadata(fullname)
for _, item := range dumpFiles.File {
archiveFilepaths, err := filepath.Glob(filepath.Join("archives", fullname, item.DumpType, "*.txt"))
archiveFilepaths, err := filepath.Glob(filepath.Join(archivesPath, fullname, item.DumpType, "*.txt"))
if len(archiveFilepaths) == 0 || err != nil {
return false
}
Expand All @@ -297,12 +313,12 @@ func ifArchiveExists(fullname string) bool {

func archiveMetadata(fullname string) Files {
metadataFilename := "urlteam_" + strings.Split(fullname, "_")[1] + "_files.xml"
if !fileExists(filepath.Join("archives", fullname, metadataFilename)) {
if !fileExists(filepath.Join(archivesPath, fullname, metadataFilename)) {
info(metadataFilename + " doesn't exist locally. The file will be downloaded.")
metadataUrl := "https://archive.org/download/" + fullname + "/" + metadataFilename
downloadFile(metadataUrl)
}
byteValue, _ := ioutil.ReadFile(filepath.Join("archives", fullname, metadataFilename))
byteValue, _ := ioutil.ReadFile(filepath.Join(archivesPath, fullname, metadataFilename))
files := Files{}
xml.Unmarshal(byteValue, &files)
// Not all files are dumps, this struct will only contain zip dumps
Expand All @@ -328,7 +344,7 @@ func downloadFile(url string) {
dirname := strings.Split(url, "/")[4]
filename := strings.Split(url, "/")[5]
info("Downloading: " + url)
_ = os.MkdirAll(filepath.Join("archives", dirname), os.ModePerm)
_ = os.MkdirAll(filepath.Join(archivesPath, dirname), os.ModePerm)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
panic(err)
Expand All @@ -338,7 +354,7 @@ func downloadFile(url string) {
panic(err)
}
defer resp.Body.Close()
f, err := os.OpenFile(filepath.Join("archives", dirname, filename), os.O_CREATE|os.O_WRONLY, 0644)
f, err := os.OpenFile(filepath.Join(archivesPath, dirname, filename), os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
panic(err)
}
Expand Down Expand Up @@ -421,7 +437,7 @@ func rangeDate(start, end time.Time) func() time.Time {
}

func info(message string) {
fmt.Print("[+]: " + message + "\n")
fmt.Println("[+]: " + message)
}

func crash(message string, err error) {
Expand Down

0 comments on commit 161bbb2

Please sign in to comment.