Skip to content

Commit

Permalink
Keep track of most recent item in the feed
Browse files Browse the repository at this point in the history
And use it as termination sentinel.

Also add go module files.

Fixes #12
Fixes #15
  • Loading branch information
mpl committed May 10, 2020
1 parent 6a2f034 commit ecbc0ac
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 13 deletions.
8 changes: 8 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
module github.com/mpl/gphotos-cdp

go 1.12

require (
github.com/chromedp/cdproto v0.0.0-20190812224334-39ef923dcb8d
github.com/chromedp/chromedp v0.4.0
)
16 changes: 16 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
github.com/chromedp/cdproto v0.0.0-20190812224334-39ef923dcb8d h1:00kLGv5nKzpFchNhGDXDRbKtYx/WoT983Ka2t8/pzRE=
github.com/chromedp/cdproto v0.0.0-20190812224334-39ef923dcb8d/go.mod h1:0YChpVzuLJC5CPr+x3xkHN6Z8KOSXjNbL7qV8Wc4GW0=
github.com/chromedp/chromedp v0.4.0 h1:0AJC5ejETuh/6n7Tcsw4u4G0eKZkI9aVRwckWaImLUE=
github.com/chromedp/chromedp v0.4.0/go.mod h1:DC3QUn4mJ24dwjcaGQLoZrhm4X/uPHZ6spDbS2uFhm4=
github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee h1:s+21KNqlpePfkah2I+gwHF8xmJWRjooY+5248k6m4A0=
github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee/go.mod h1:L0fX3K22YWvt/FAX9NnzrNzcI4wNYi9Yku4O0LKYflo=
github.com/gobwas/pool v0.2.0 h1:QEmUOlnSjWtnpRGHF3SauEiOsy82Cup83Vf2LcMlnc8=
github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw=
github.com/gobwas/ws v1.0.2 h1:CoAavW/wd/kulfZmSIBt6p24n4j7tHgNVCjsfHVNUbo=
github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM=
github.com/knq/sysutil v0.0.0-20181215143952-f05b59f0f307 h1:vl4eIlySbjertFaNwiMjXsGrFVK25aOWLq7n+3gh2ls=
github.com/knq/sysutil v0.0.0-20181215143952-f05b59f0f307/go.mod h1:BjPj+aVjl9FW/cCGiF3nGh5v+9Gd3VCgBQbod/GlMaQ=
github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e h1:hB2xlXdHp/pmPZq0y3QnmWAArdw9PqbmotexnWx/FU8=
github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a h1:aYOabOQFp6Vj6W1F80affTUvO9UxmJRx8K0gsfABByQ=
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
49 changes: 36 additions & 13 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ import (
"sync"
"time"

"github.com/chromedp/cdproto/cdp"
"github.com/chromedp/cdproto/input"
"github.com/chromedp/cdproto/page"
"github.com/chromedp/chromedp"
Expand Down Expand Up @@ -101,6 +100,9 @@ type Session struct {
// really) that was downloaded. If set, it is used as a sentinel, to indicate that
// we should skip dowloading all items older than this one.
lastDone string
// firstItem is the most recent item in the feed. It is determined at the
// beginning of the run, and is used as the final sentinel.
firstItem string
}

// getLastDone returns the URL of the most recent item that was downloaded in
Expand Down Expand Up @@ -248,6 +250,10 @@ func (s *Session) login(ctx context.Context) error {
// 2) if the last session marked what was the most recent downloaded photo, it navigates to it
// 3) otherwise it jumps to the end of the timeline (i.e. the oldest photo)
func (s *Session) firstNav(ctx context.Context) error {
if err := s.setFirstItem(ctx); err != nil {
return err
}

if *startFlag != "" {
chromedp.Navigate(*startFlag).Do(ctx)
chromedp.WaitReady("body", chromedp.ByQuery).Do(ctx)
Expand All @@ -270,29 +276,43 @@ func (s *Session) firstNav(ctx context.Context) error {
return nil
}

// navToEnd waits for the page to be ready to receive scroll key events, by
// trying to select an item with the right arrow key, and then scrolls down to the
// end of the page, i.e. to the oldest items.
func navToEnd(ctx context.Context) error {
// setFirstItem looks for the first item, and sets it as s.firstItem.
// We always run it first even for code paths that might not need s.firstItem,
// because we also run it for the side-effect of waiting for the first page load to
// be done, and to be ready to receive scroll key events.
func (s *Session) setFirstItem(ctx context.Context) error {
// wait for page to be loaded, i.e. that we can make an element active by using
// the right arrow key.
for {
chromedp.KeyEvent(kb.ArrowRight).Do(ctx)
time.Sleep(tick)
var ids []cdp.NodeID
attributes := make(map[string]string)
if err := chromedp.Run(ctx,
chromedp.NodeIDs(`document.activeElement`, &ids, chromedp.ByJSPath)); err != nil {
chromedp.Attributes(`document.activeElement`, &attributes, chromedp.ByJSPath)); err != nil {
return err
}
if len(ids) > 0 {
if *verboseFlag {
log.Printf("We are ready, because element %v is selected", ids[0])
}
break
if len(attributes) == 0 {
time.Sleep(tick)
continue
}
time.Sleep(tick)

photoHref, ok := attributes["href"]
if !ok || !strings.HasPrefix(photoHref, "./photo/") {
time.Sleep(tick)
continue
}

s.firstItem = strings.TrimPrefix(photoHref, "./photo/")
break
}
if *verboseFlag {
log.Printf("Page loaded, most recent item in the feed is: %s", s.firstItem)
}
return nil
}

// navToEnd scrolls down to the end of the page, i.e. to the oldest items.
func navToEnd(ctx context.Context) error {
// try jumping to the end of the page. detect we are there and have stopped
// moving when two consecutive screenshots are identical.
var previousScr, scr []byte
Expand Down Expand Up @@ -593,6 +613,9 @@ func (s *Session) navN(N int) func(context.Context) error {
if N > 0 && n >= N {
break
}
if strings.HasSuffix(location, s.firstItem) {
break
}

if err := navLeft(ctx); err != nil {
return fmt.Errorf("error at %v: %v", location, err)
Expand Down

0 comments on commit ecbc0ac

Please sign in to comment.