diff --git a/internal/cmd/pocket.go b/internal/cmd/pocket.go index 6e84fbddf..a0e4c2af6 100644 --- a/internal/cmd/pocket.go +++ b/internal/cmd/pocket.go @@ -1,14 +1,21 @@ package cmd import ( + "context" + "encoding/csv" + "errors" "fmt" "os" + "path/filepath" + "regexp" + "slices" "strconv" "strings" "time" "github.com/PuerkitoBio/goquery" "github.com/go-shiori/shiori/internal/core" + "github.com/go-shiori/shiori/internal/database" "github.com/go-shiori/shiori/internal/model" "github.com/spf13/cobra" ) @@ -16,7 +23,7 @@ import ( func pocketCmd() *cobra.Command { cmd := &cobra.Command{ Use: "pocket source-file", - Short: "Import bookmarks from Pocket's exported HTML file", + Short: "Import bookmarks from Pocket's data export file", Args: cobra.ExactArgs(1), Run: pocketHandler, } @@ -25,17 +32,43 @@ func pocketCmd() *cobra.Command { } func pocketHandler(cmd *cobra.Command, args []string) { - _, deps := initShiori(cmd.Context(), cmd) + ctx := cmd.Context() + _, deps := initShiori(ctx, cmd) // Open pocket's file - srcFile, err := os.Open(args[0]) + filePath := args[0] + srcFile, err := os.Open(filePath) if err != nil { cError.Println(err) os.Exit(1) } defer srcFile.Close() - // Parse pocket's file + var bookmarks []model.BookmarkDTO + switch filepath.Ext(filePath) { + case ".html": + bookmarks = parseHtmlExport(ctx, deps.Database, srcFile) + case ".csv": + bookmarks = parseCsvExport(ctx, deps.Database, srcFile) + default: + cError.Println("Invalid file format. Only HTML and CSV are supported.") + os.Exit(1) + } + + // Save bookmark to database + bookmarks, err = deps.Database.SaveBookmarks(ctx, true, bookmarks...) + if err != nil { + cError.Printf("Failed to save bookmarks: %v\n", err) + os.Exit(1) + } + + // Print imported bookmarks + fmt.Println() + printBookmarks(bookmarks...) +} + +// Parse bookmarks from HTML file +func parseHtmlExport(ctx context.Context, db database.DB, srcFile *os.File) []model.BookmarkDTO { bookmarks := []model.BookmarkDTO{} mapURL := make(map[string]struct{}) @@ -49,69 +82,137 @@ func pocketHandler(cmd *cobra.Command, args []string) { // Get metadata title := a.Text() url, _ := a.Attr("href") - strTags, _ := a.Attr("tags") - strModified, _ := a.Attr("time_added") - intModified, _ := strconv.ParseInt(strModified, 10, 64) - modified := time.Unix(intModified, 0) - - // Clean up URL - var err error - url, err = core.RemoveUTMParams(url) + tagsStr, _ := a.Attr("tags") + timeAddedStr, _ := a.Attr("time_added") + + title, url, timeAdded, tags, err := verifyMetadata(title, url, timeAddedStr, tagsStr) if err != nil { - cError.Printf("Skip %s: URL is not valid\n", url) + cError.Printf("Skip %s: %v\n", url, err) return } - // Make sure title is valid Utf-8 - title = validateTitle(title, url) - - // Check if the URL already exist before, both in bookmark - // file or in database - if _, exist := mapURL[url]; exist { - cError.Printf("Skip %s: URL already exists\n", url) + if err = handleDuplicates(ctx, db, mapURL, url); err != nil { + cError.Printf("Skip %s: %v\n", url, err) return } - _, exist, err := deps.Database.GetBookmark(cmd.Context(), 0, url) - if err != nil { - cError.Printf("Skip %s: Get Bookmark fail, %v", url, err) - return + // Add item to list + bookmark := model.BookmarkDTO{ + URL: url, + Title: title, + ModifiedAt: timeAdded.Format(model.DatabaseDateFormat), + CreatedAt: timeAdded.Format(model.DatabaseDateFormat), + Tags: tags, } - if exist { - cError.Printf("Skip %s: URL already exists\n", url) - mapURL[url] = struct{}{} - return - } + mapURL[url] = struct{}{} + bookmarks = append(bookmarks, bookmark) + }) + + return bookmarks +} + +// Parse bookmarks from CSV file +func parseCsvExport(ctx context.Context, db database.DB, srcFile *os.File) []model.BookmarkDTO { + bookmarks := []model.BookmarkDTO{} + mapURL := make(map[string]struct{}) - // Get bookmark tags - tags := []model.Tag{} - for _, strTag := range strings.Split(strTags, ",") { - if strTag != "" { - tags = append(tags, model.Tag{Name: strTag}) + reader := csv.NewReader(srcFile) + records, err := reader.ReadAll() + if err != nil { + cError.Println(err) + os.Exit(1) + } + + for i, cols := range records { + // Check and skip header + if i == 0 { + expected := []string{"title", "url", "time_added", "cursor", "tags", "status"} + if slices.Compare(cols, expected) != 0 { + cError.Printf("Invalid CSV format. Header must be: %s\n", strings.Join(expected, ",")) + os.Exit(1) } + continue + } + + // Get metadata + title, url, timeAdded, tags, err := verifyMetadata(cols[0], cols[1], cols[2], cols[4]) + if err != nil { + cError.Printf("Skip %s: %v\n", url, err) + continue + } + + if err = handleDuplicates(ctx, db, mapURL, url); err != nil { + cError.Printf("Skip %s: %v\n", url, err) + continue } // Add item to list bookmark := model.BookmarkDTO{ URL: url, Title: title, - ModifiedAt: modified.Format(model.DatabaseDateFormat), + ModifiedAt: timeAdded.Format(model.DatabaseDateFormat), + CreatedAt: timeAdded.Format(model.DatabaseDateFormat), Tags: tags, } mapURL[url] = struct{}{} bookmarks = append(bookmarks, bookmark) - }) + } - // Save bookmark to database - bookmarks, err = deps.Database.SaveBookmarks(cmd.Context(), true, bookmarks...) + return bookmarks +} + +// Parse metadata and verify it's validity +func verifyMetadata(title, url, timeAddedStr, tags string) (string, string, time.Time, []model.Tag, error) { + // Clean up URL + var err error + url, err = core.RemoveUTMParams(url) if err != nil { - cError.Printf("Failed to save bookmarks: %v\n", err) - os.Exit(1) + err = fmt.Errorf("URL is not valid, %w", err) + return "", "", time.Time{}, nil, err } - // Print imported bookmark - fmt.Println() - printBookmarks(bookmarks...) + // Make sure title is valid Utf-8 + title = validateTitle(title, url) + + // Parse time added + timeAddedInt, err := strconv.ParseInt(timeAddedStr, 10, 64) + if err != nil { + err = fmt.Errorf("Invalid time added, %w", err) + return "", "", time.Time{}, nil, err + } + timeAdded := time.Unix(timeAddedInt, 0) + + // Get bookmark tags + tagsList := []model.Tag{} + // We need to split tags by both comma or pipe, + // because Pocket's CSV export use pipe as separator, + // while HTML export use comma. + for _, tag := range regexp.MustCompile(`[,|]`).Split(tags, -1) { + if tag != "" { + tagsList = append(tagsList, model.Tag{Name: tag}) + } + } + + return title, url, timeAdded, tagsList, nil +} + +// Checks if the URL already exist, both in bookmark +// file or in database +func handleDuplicates(ctx context.Context, db database.DB, mapURL map[string]struct{}, url string) error { + if _, exists := mapURL[url]; exists { + return errors.New("URL already exists") + } + + _, exists, err := db.GetBookmark(ctx, 0, url) + if err != nil { + return fmt.Errorf("Failed getting bookmark, %w", err) + } + + if exists { + return errors.New("URL already exists") + } + + return nil }