Skip to content

Commit

Permalink
Moved from hacks repo
Browse files Browse the repository at this point in the history
  • Loading branch information
tomnomnom committed Jan 24, 2018
0 parents commit c8d79b3
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 0 deletions.
15 changes: 15 additions & 0 deletions README.mkd
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# waybackurls

Accept line-delimited domains on stdin, fetch known URLs from the Wayback Machine for `*.domain` and output them on stdout.

Usage example:

```
▶ cat domains.txt | waybackurls > urls
```

Install:

```
▶ go get github.com/tomnomnom/hacks/waybackurls
```
84 changes: 84 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package main

import (
"bufio"
"encoding/json"
"flag"
"fmt"
"io/ioutil"
"net/http"
"os"
)

const fetchURL = "http://web.archive.org/cdx/search/cdx?url=*.%s/*&output=json&fl=original&collapse=urlkey"

func main() {

var domains []string

flag.Parse()

if flag.NArg() > 0 {
// fetch for a single domain
domains = []string{flag.Arg(0)}
} else {

// fetch for all domains from stdin
sc := bufio.NewScanner(os.Stdin)
for sc.Scan() {
domains = append(domains, sc.Text())
}

if err := sc.Err(); err != nil {
fmt.Fprintf(os.Stderr, "failed to read input: %s\n", err)
}
}

for _, domain := range domains {

urls, err := getWaybackURLs(domain)
if err != nil {
fmt.Fprintf(os.Stderr, "failed to fetch URLs for [%s]\n", domain)
continue
}

for _, url := range urls {
fmt.Println(url)
}
}

}

func getWaybackURLs(domain string) ([]string, error) {

out := make([]string, 0)

res, err := http.Get(fmt.Sprintf(fetchURL, domain))
if err != nil {
return out, err
}

raw, err := ioutil.ReadAll(res.Body)

res.Body.Close()
if err != nil {
return out, err
}

var wrapper [][]string
err = json.Unmarshal(raw, &wrapper)

skip := true
for _, urls := range wrapper {
// The first item is always just the string "original",
// so we should skip the first item
if skip {
skip = false
continue
}
out = append(out, urls...)
}

return out, nil

}

0 comments on commit c8d79b3

Please sign in to comment.