From 2c3b06a98b32afd3328c2e7be210147745488536 Mon Sep 17 00:00:00 2001
From: Sebastiaan de Schaetzen <sebastiaan.de.schaetzen@gmail.com>
Date: Thu, 6 Feb 2025 07:52:47 +0100
Subject: [PATCH] Parse upload date

---
 vivaweb.go | 46 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 43 insertions(+), 3 deletions(-)

diff --git a/vivaweb.go b/vivaweb.go
index a604b38..c9b79d4 100644
--- a/vivaweb.go
+++ b/vivaweb.go
@@ -7,6 +7,7 @@ import (
 	"github.com/playwright-community/playwright-go"
 	"log"
 	"regexp"
+	"time"
 )
 
 const BASE_URL = "https://vivaplus.tv"
@@ -86,6 +87,19 @@ func isValidVideoUrl(url string) bool {
 	return re.MatchString(url)
 }
 
+func isBlacklistedUrl(url string) bool {
+	return url == "/supporters/payments/checkout/posts/63266/available_tiers"
+}
+
+func parseDateString(dateStr string) (time.Time, error) {
+	const layout = "Jan 2, 2006"
+	t, err := time.Parse(layout, dateStr)
+	if err != nil {
+		return time.Time{}, err
+	}
+	return t, nil
+}
+
 func (w *WebClient) DiscoverAllVideos(db *sql.DB) error {
 	log.Printf("Loading list of all videos...")
 
@@ -117,20 +131,27 @@ func (w *WebClient) DiscoverAllVideos(db *sql.DB) error {
 		}
 
 		for _, l := range locators {
+			// Get the URL to the video page
 			href, err := l.GetAttribute("href")
 			if err != nil {
 				return err
 			}
 
+			// Ensure that it's valid and we haven't already scanned it
 			if _, exists := previousUrls[href]; exists {
 				continue // The item was already scanned.
 			}
+			previousUrls[href] = struct{}{}
+
+			if isBlacklistedUrl(href) {
+				continue // We want to skip this one
+			}
 
 			if !isValidVideoUrl(href) {
 				return fmt.Errorf("url has bad format: %s", href)
 			}
-			previousUrls[href] = struct{}{}
 
+			// Insert it into the database
 			result := tx.QueryRow("select count(1) from videos where url = :url", href)
 			var count int
 			err = result.Scan(&count)
@@ -148,12 +169,13 @@ func (w *WebClient) DiscoverAllVideos(db *sql.DB) error {
 				return fmt.Errorf("error inserting into db: %w", err)
 			}
 		}
+
+		// Scroll to the bottom
 		log.Printf("Scrolling...")
 		err = w.page.Keyboard().Press("End")
 		if err != nil {
 			return fmt.Errorf("error scrolling to end of page: %w", err)
 		}
-		//w.page.tim
 	}
 finish:
 	err = tx.Commit()
@@ -163,6 +185,11 @@ finish:
 	return nil
 }
 
+func isRelativeTimeFormat(input string) bool {
+	re := regexp.MustCompile(`^\d+[mhs]\s+ago$`)
+	return re.MatchString(input)
+}
+
 func (w *WebClient) FetchVideoMetadata(db *sql.DB) error {
 	log.Printf("Fetching video metadata...")
 	for {
@@ -198,6 +225,19 @@ func (w *WebClient) FetchVideoMetadata(db *sql.DB) error {
 			return fmt.Errorf("error retrieving description: %w", err)
 		}
 
+		// Get upload date
+		uploadDateStr, err := w.getInnerText(".video-page__meta")
+		if err != nil {
+			return fmt.Errorf("error retrieving upload date: %w", err)
+		}
+		uploadDate := time.Now()
+		if !isRelativeTimeFormat(uploadDateStr) {
+			uploadDate, err = parseDateString(uploadDateStr)
+			if err != nil {
+				return fmt.Errorf("error parsing date string '%s': %w", uploadDateStr, err)
+			}
+		}
+
 		// Get cast url
 		videoElement, err := w.page.QuerySelector("video")
 		if err != nil {
@@ -214,7 +254,7 @@ func (w *WebClient) FetchVideoMetadata(db *sql.DB) error {
 			return fmt.Errorf("error starting transaction: %w", err)
 		}
 		defer tx.Rollback()
-		result, err := tx.Exec("update videos set title = ?, description = ?, cast = ? where id = ?", title, description, castSource, id)
+		result, err := tx.Exec("update videos set title = ?, description = ?, cast = ?, upload_date = ? where id = ?", title, description, castSource, uploadDate, id)
 		if err != nil {
 			return fmt.Errorf("error updating database: %w", err)
 		}