Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for datastore publish #569

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,15 +119,17 @@ Flags:
-c, --config string Configuration file path, defaults: ./wayback.conf, ~/wayback.conf, /etc/wayback.conf
-d, --daemon strings Run as daemon service, supported services are telegram, web, mastodon, twitter, discord, slack, irc, xmpp
--debug Enable debug mode (default mode is false)
--ga Wayback webpages to Ghostarchive (default true)
-h, --help help for wayback
--ia Wayback webpages to Internet Archive
--ia Wayback webpages to Internet Archive (default true)
--info Show application information
--ip Wayback webpages to IPFS
--ip Wayback webpages to IPFS (default true)
--ipfs-host string IPFS daemon host, do not require, unless enable ipfs (default "127.0.0.1")
-m, --ipfs-mode string IPFS mode (default "pinner")
-p, --ipfs-port uint IPFS daemon port (default 5001)
--is Wayback webpages to Archive Today
--ph Wayback webpages to Telegraph
--is Wayback webpages to Archive Today (default true)
--migrate Run SQL migrations
--ph Wayback webpages to Telegraph (default true)
--print Show application configurations
-t, --token string Telegram Bot API Token
--tor Snapshot webpage via Tor anonymity network
Expand Down
5 changes: 4 additions & 1 deletion cmd/wayback/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ var (

configFile string

migrate bool

rootCmd = &cobra.Command{
Use: "wayback",
Short: "A command-line tool and daemon service for archiving webpages.",
Expand Down Expand Up @@ -75,7 +77,7 @@ func init() {
rootCmd.Flags().BoolVarP(&ip, "ip", "", true, "Wayback webpages to IPFS")
rootCmd.Flags().BoolVarP(&ph, "ph", "", true, "Wayback webpages to Telegraph")
rootCmd.Flags().BoolVarP(&ga, "ga", "", true, "Wayback webpages to Ghostarchive")
rootCmd.Flags().StringSliceVarP(&daemon, "daemon", "d", []string{}, "Run as daemon service, supported services are telegram, web, mastodon, twitter, discord, slack, irc")
rootCmd.Flags().StringSliceVarP(&daemon, "daemon", "d", []string{}, "Run as daemon service, supported services are telegram, web, mastodon, twitter, discord, slack, irc, xmpp")
rootCmd.Flags().StringVarP(&host, "ipfs-host", "", "127.0.0.1", "IPFS daemon host, do not require, unless enable ipfs")
rootCmd.Flags().UintVarP(&port, "ipfs-port", "p", 5001, "IPFS daemon port")
rootCmd.Flags().StringVarP(&mode, "ipfs-mode", "m", "pinner", "IPFS mode")
Expand All @@ -88,6 +90,7 @@ func init() {
rootCmd.Flags().BoolVarP(&debug, "debug", "", false, "Enable debug mode (default mode is false)")
rootCmd.Flags().BoolVarP(&info, "info", "", false, "Show application information")
rootCmd.Flags().BoolVarP(&print, "print", "", false, "Show application configurations")
rootCmd.Flags().BoolVarP(&migrate, "migrate", "", false, "Run SQL migrations")
}

func checkRequiredFlags(cmd *cobra.Command) error {
Expand Down
26 changes: 25 additions & 1 deletion cmd/wayback/serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,36 @@ import (
var signalChan chan (os.Signal) = make(chan os.Signal, 1)

func serve(_ *cobra.Command, opts *config.Options, _ []string) {
store, err := storage.Open(opts, "")
db, err := storage.NewConnectionPool(
opts.DatabaseURL(),
opts.DatabaseMinConns(),
opts.DatabaseMaxConns(),
opts.DatabaseConnectionLifetime(),
)
if err != nil {
logger.Fatal("unable to connect to database: %v", err)
}
defer db.Close()

bolt, err := storage.Open(opts, "")
if err != nil {
logger.Fatal("open storage failed: %v", err)
}
store := storage.NewStorage(db, bolt)
defer store.Close()

if !opts.IsDefaultDatabaseURL() {
if err = store.Ping(); err != nil {
logger.Fatal("ping database failed: %v", err)
}

if migrate {
if err = storage.Migrate(db); err != nil {
logger.Fatal("migrate database failed: %v", err)
}
}
}

cfg := []pooling.Option{
pooling.Capacity(opts.PoolingSize()),
pooling.Timeout(opts.WaybackTimeout()),
Expand Down
180 changes: 180 additions & 0 deletions config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,186 @@ func TestIPFSMode(t *testing.T) {
}
}

func TestDatabaseURL(t *testing.T) {
var tests = []struct {
url string
expected string
}{
{
url: defDatabaseURL,
expected: defDatabaseURL,
},
{
url: "foo bar",
expected: "foo bar",
},
}

for i, test := range tests {
t.Run(strconv.Itoa(i), func(t *testing.T) {
os.Clearenv()
os.Setenv("WAYBACK_DATABASE_URL", test.url)

parser := NewParser()
opts, err := parser.ParseEnvironmentVariables()
if err != nil {
t.Fatalf(`Parsing environment variables failed: %v`, err)
}

expected := test.expected
got := opts.DatabaseURL()

if got != expected {
t.Errorf(`Unexpected database URL, got %v instead of %s`, got, expected)
}
})
}
}

func TestIsDefaultDatabaseURL(t *testing.T) {
var tests = []struct {
url string
expected bool
}{
{
url: defDatabaseURL,
expected: true,
},
{
url: "foo bar",
expected: false,
},
}

for i, test := range tests {
t.Run(strconv.Itoa(i), func(t *testing.T) {
os.Clearenv()
os.Setenv("WAYBACK_DATABASE_URL", test.url)

parser := NewParser()
opts, err := parser.ParseEnvironmentVariables()
if err != nil {
t.Fatalf(`Parsing environment variables failed: %v`, err)
}

expected := test.expected
got := opts.IsDefaultDatabaseURL()

if got != expected {
t.Errorf(`Unexpected default database URL, got %t instead of %t`, got, expected)
}
})
}
}

func TestDatabaseMaxConns(t *testing.T) {
var tests = []struct {
maxConns int
expected int
}{
{
maxConns: defDatabaseMaxConns,
expected: defDatabaseMaxConns,
},
{
maxConns: 100,
expected: 100,
},
}

for i, test := range tests {
t.Run(strconv.Itoa(i), func(t *testing.T) {
os.Clearenv()
os.Setenv("WAYBACK_DATABASE_MAX_CONNS", strconv.Itoa(test.maxConns))

parser := NewParser()
opts, err := parser.ParseEnvironmentVariables()
if err != nil {
t.Fatalf(`Parsing environment variables failed: %v`, err)
}

expected := test.expected
got := opts.DatabaseMaxConns()

if got != expected {
t.Errorf(`Unexpected maxConns, got %v instead of %d`, got, expected)
}
})
}
}

func TestDatabaseMinConns(t *testing.T) {
var tests = []struct {
minConns int
expected int
}{
{
minConns: defDatabaseMinConns,
expected: defDatabaseMinConns,
},
{
minConns: 100,
expected: 100,
},
}

for i, test := range tests {
t.Run(strconv.Itoa(i), func(t *testing.T) {
os.Clearenv()
os.Setenv("WAYBACK_DATABASE_MIN_CONNS", strconv.Itoa(test.minConns))

parser := NewParser()
opts, err := parser.ParseEnvironmentVariables()
if err != nil {
t.Fatalf(`Parsing environment variables failed: %v`, err)
}

expected := test.expected
got := opts.DatabaseMinConns()

if got != expected {
t.Errorf(`Unexpected minConns, got %v instead of %d`, got, expected)
}
})
}
}

func TestDatabaseConnectionLifetime(t *testing.T) {
var tests = []struct {
connectionLifetime int
expected time.Duration
}{
{
connectionLifetime: defDatabaseConnectionLifetime,
expected: defDatabaseConnectionLifetime * time.Minute,
},
{
connectionLifetime: 100,
expected: 100 * time.Minute,
},
}

for i, test := range tests {
t.Run(strconv.Itoa(i), func(t *testing.T) {
os.Clearenv()
os.Setenv("WAYBACK_DATABASE_CONNECTION_LIFETIME", strconv.Itoa(test.connectionLifetime))

parser := NewParser()
opts, err := parser.ParseEnvironmentVariables()
if err != nil {
t.Fatalf(`Parsing environment variables failed: %v`, err)
}

expected := test.expected
got := opts.DatabaseConnectionLifetime()

if got != expected {
t.Errorf(`Unexpected connection lifetime, got %v instead of %d`, got, expected)
}
})
}
}

func TestIPFSTarget(t *testing.T) {
var tests = []struct {
token string // managed ipfs token
Expand Down
45 changes: 45 additions & 0 deletions config/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,12 @@ const (

defOmnivoreApikey = ""

defRunMigrations = false
defDatabaseURL = "user=postgres password=postgres dbname=wayback sslmode=disable"
defDatabaseMaxConns = 20
defDatabaseMinConns = 1
defDatabaseConnectionLifetime = 5

maxAttachSizeTelegram = 50000000 // 50MB
maxAttachSizeDiscord = 8000000 // 8MB
maxAttachSizeSlack = 5000000000 // 5GB
Expand Down Expand Up @@ -131,6 +137,7 @@ type Options struct {

ipfs *ipfs
slots map[string]bool
database *database
telegram *telegram
mastodon *mastodon
discord *discord
Expand Down Expand Up @@ -160,6 +167,13 @@ type Options struct {
waybackFallback bool
}

type database struct {
url string
maxConns int
minConns int
connectionLifetime int
}

type ipfs struct {
host string
port int
Expand Down Expand Up @@ -282,6 +296,12 @@ func NewOptions() *Options {
waybackMaxRetries: defWaybackMaxRetries,
waybackUserAgent: defWaybackUserAgent,
waybackFallback: defWaybackFallback,
database: &database{
url: defDatabaseURL,
maxConns: defDatabaseMaxConns,
minConns: defDatabaseMinConns,
connectionLifetime: defDatabaseConnectionLifetime,
},
ipfs: &ipfs{
host: defIPFSHost,
port: defIPFSPort,
Expand Down Expand Up @@ -443,6 +463,31 @@ func (o *Options) EnabledMetrics() bool {
return o.metrics
}

// IsDefaultDatabaseURL returns true if the default database URL is used.
func (o *Options) IsDefaultDatabaseURL() bool {
return o.database.url == defDatabaseURL
}

// DatabaseURL returns the database URL.
func (o *Options) DatabaseURL() string {
return o.database.url
}

// DatabaseMaxConns returns the maximum number of database connections.
func (o *Options) DatabaseMaxConns() int {
return o.database.maxConns
}

// DatabaseMinConns returns the minimum number of database connections.
func (o *Options) DatabaseMinConns() int {
return o.database.minConns
}

// DatabaseConnectionLifetime returns the maximum amount of time a connection may be reused.
func (o *Options) DatabaseConnectionLifetime() time.Duration {
return time.Duration(o.database.connectionLifetime) * time.Minute
}

// IPFSHost returns the host of IPFS daemon service.
func (o *Options) IPFSHost() string {
return o.ipfs.host
Expand Down
8 changes: 8 additions & 0 deletions config/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,14 @@ func (p *Parser) parseLines(lines []string) (err error) {
p.opts.chromeRemoteAddr = parseString(val, defChromeRemoteAddr)
case "WAYBACK_PROXY":
p.opts.proxy = parseString(val, defProxy)
case "WAYBACK_DATABASE_URL":
p.opts.database.url = parseString(val, defDatabaseURL)
case "WAYBACK_DATABASE_MAX_CONNS":
p.opts.database.maxConns = parseInt(val, defDatabaseMaxConns)
case "WAYBACK_DATABASE_MIN_CONNS":
p.opts.database.minConns = parseInt(val, defDatabaseMinConns)
case "WAYBACK_DATABASE_CONNECTION_LIFETIME":
p.opts.database.connectionLifetime = parseInt(val, defDatabaseConnectionLifetime)
case "WAYBACK_IPFS_HOST":
p.opts.ipfs.host = parseString(val, defIPFSHost)
case "WAYBACK_IPFS_PORT":
Expand Down
4 changes: 4 additions & 0 deletions docs/environment.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ Use the `-c` / `--config` option to specify the build definition file to use.
| - | `WAYBACK_MEILI_INDEXING` | `capsules` | Meilisearch indexing name |
| - | `WAYBACK_MEILI_APIKEY` | - | Meilisearch admin API key |
| - | `WAYBACK_OMNIVORE_APIKEY` | - | Omnivore API key |
| - | `WAYBACK_DATABASE_URL` | - | The URL of the Postgres database |
| - | `WAYBACK_DATABASE_MAX_CONNS` | `20` | Maximum connections of the Postgres database |
| - | `WAYBACK_DATABASE_MIN_CONNS` | `1` | Minimum connections of the Postgres database |
| - | `WAYBACK_DATABASE_CONNECTION_LIFETIME` | `5` | Connection lifetime of the Postgres database |
| `-d`, `--daemon` | - | - | Run as daemon service, e.g. `telegram`, `web`, `mastodon`, `twitter`, `discord` |
| `--ia` | `WAYBACK_ENABLE_IA` | `true` | Wayback webpages to **Internet Archive** |
| `--is` | `WAYBACK_ENABLE_IS` | `true` | Wayback webpages to **Archive Today** |
Expand Down
13 changes: 13 additions & 0 deletions docs/integrations/datastore.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
---
title: Publish to Database
---

Note: Only Postgres is supported.

## Configuration

- `WAYBACK_DATABASE_URL`: The URL of the Postgres database, e.g. `user=postgres password=postgres dbname=wayback sslmode=disable`.
- `WAYBACK_DATABASE_MAX_CONNS`: Maximum connections of the Postgres database (optional).
- `WAYBACK_DATABASE_MIN_CONNS`: Minimum connections of the Postgres database (optional).
- `WAYBACK_DATABASE_CONNECTION_LIFETIME`: Connection lifetime of the Postgres database (optional).

Loading
Loading