diff --git a/vivaweb.go b/vivaweb.go index a604b38..c9b79d4 100644 --- a/vivaweb.go +++ b/vivaweb.go @@ -7,6 +7,7 @@ import ( "github.com/playwright-community/playwright-go" "log" "regexp" + "time" ) const BASE_URL = "https://vivaplus.tv" @@ -86,6 +87,19 @@ func isValidVideoUrl(url string) bool { return re.MatchString(url) } +func isBlacklistedUrl(url string) bool { + return url == "/supporters/payments/checkout/posts/63266/available_tiers" +} + +func parseDateString(dateStr string) (time.Time, error) { + const layout = "Jan 2, 2006" + t, err := time.Parse(layout, dateStr) + if err != nil { + return time.Time{}, err + } + return t, nil +} + func (w *WebClient) DiscoverAllVideos(db *sql.DB) error { log.Printf("Loading list of all videos...") @@ -117,20 +131,27 @@ func (w *WebClient) DiscoverAllVideos(db *sql.DB) error { } for _, l := range locators { + // Get the URL to the video page href, err := l.GetAttribute("href") if err != nil { return err } + // Ensure that it's valid and we haven't already scanned it if _, exists := previousUrls[href]; exists { continue // The item was already scanned. } + previousUrls[href] = struct{}{} + + if isBlacklistedUrl(href) { + continue // We want to skip this one + } if !isValidVideoUrl(href) { return fmt.Errorf("url has bad format: %s", href) } - previousUrls[href] = struct{}{} + // Insert it into the database result := tx.QueryRow("select count(1) from videos where url = :url", href) var count int err = result.Scan(&count) @@ -148,12 +169,13 @@ func (w *WebClient) DiscoverAllVideos(db *sql.DB) error { return fmt.Errorf("error inserting into db: %w", err) } } + + // Scroll to the bottom log.Printf("Scrolling...") err = w.page.Keyboard().Press("End") if err != nil { return fmt.Errorf("error scrolling to end of page: %w", err) } - //w.page.tim } finish: err = tx.Commit() @@ -163,6 +185,11 @@ finish: return nil } +func isRelativeTimeFormat(input string) bool { + re := regexp.MustCompile(`^\d+[mhs]\s+ago$`) + return re.MatchString(input) +} + func (w *WebClient) FetchVideoMetadata(db *sql.DB) error { log.Printf("Fetching video metadata...") for { @@ -198,6 +225,19 @@ func (w *WebClient) FetchVideoMetadata(db *sql.DB) error { return fmt.Errorf("error retrieving description: %w", err) } + // Get upload date + uploadDateStr, err := w.getInnerText(".video-page__meta") + if err != nil { + return fmt.Errorf("error retrieving upload date: %w", err) + } + uploadDate := time.Now() + if !isRelativeTimeFormat(uploadDateStr) { + uploadDate, err = parseDateString(uploadDateStr) + if err != nil { + return fmt.Errorf("error parsing date string '%s': %w", uploadDateStr, err) + } + } + // Get cast url videoElement, err := w.page.QuerySelector("video") if err != nil { @@ -214,7 +254,7 @@ func (w *WebClient) FetchVideoMetadata(db *sql.DB) error { return fmt.Errorf("error starting transaction: %w", err) } defer tx.Rollback() - result, err := tx.Exec("update videos set title = ?, description = ?, cast = ? where id = ?", title, description, castSource, id) + result, err := tx.Exec("update videos set title = ?, description = ?, cast = ?, upload_date = ? where id = ?", title, description, castSource, uploadDate, id) if err != nil { return fmt.Errorf("error updating database: %w", err) }