Parse upload date
This commit is contained in:
parent
34023f609b
commit
2c3b06a98b
46
vivaweb.go
46
vivaweb.go
@ -7,6 +7,7 @@ import (
|
|||||||
"github.com/playwright-community/playwright-go"
|
"github.com/playwright-community/playwright-go"
|
||||||
"log"
|
"log"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
const BASE_URL = "https://vivaplus.tv"
|
const BASE_URL = "https://vivaplus.tv"
|
||||||
@ -86,6 +87,19 @@ func isValidVideoUrl(url string) bool {
|
|||||||
return re.MatchString(url)
|
return re.MatchString(url)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isBlacklistedUrl(url string) bool {
|
||||||
|
return url == "/supporters/payments/checkout/posts/63266/available_tiers"
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseDateString(dateStr string) (time.Time, error) {
|
||||||
|
const layout = "Jan 2, 2006"
|
||||||
|
t, err := time.Parse(layout, dateStr)
|
||||||
|
if err != nil {
|
||||||
|
return time.Time{}, err
|
||||||
|
}
|
||||||
|
return t, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (w *WebClient) DiscoverAllVideos(db *sql.DB) error {
|
func (w *WebClient) DiscoverAllVideos(db *sql.DB) error {
|
||||||
log.Printf("Loading list of all videos...")
|
log.Printf("Loading list of all videos...")
|
||||||
|
|
||||||
@ -117,20 +131,27 @@ func (w *WebClient) DiscoverAllVideos(db *sql.DB) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, l := range locators {
|
for _, l := range locators {
|
||||||
|
// Get the URL to the video page
|
||||||
href, err := l.GetAttribute("href")
|
href, err := l.GetAttribute("href")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ensure that it's valid and we haven't already scanned it
|
||||||
if _, exists := previousUrls[href]; exists {
|
if _, exists := previousUrls[href]; exists {
|
||||||
continue // The item was already scanned.
|
continue // The item was already scanned.
|
||||||
}
|
}
|
||||||
|
previousUrls[href] = struct{}{}
|
||||||
|
|
||||||
|
if isBlacklistedUrl(href) {
|
||||||
|
continue // We want to skip this one
|
||||||
|
}
|
||||||
|
|
||||||
if !isValidVideoUrl(href) {
|
if !isValidVideoUrl(href) {
|
||||||
return fmt.Errorf("url has bad format: %s", href)
|
return fmt.Errorf("url has bad format: %s", href)
|
||||||
}
|
}
|
||||||
previousUrls[href] = struct{}{}
|
|
||||||
|
|
||||||
|
// Insert it into the database
|
||||||
result := tx.QueryRow("select count(1) from videos where url = :url", href)
|
result := tx.QueryRow("select count(1) from videos where url = :url", href)
|
||||||
var count int
|
var count int
|
||||||
err = result.Scan(&count)
|
err = result.Scan(&count)
|
||||||
@ -148,12 +169,13 @@ func (w *WebClient) DiscoverAllVideos(db *sql.DB) error {
|
|||||||
return fmt.Errorf("error inserting into db: %w", err)
|
return fmt.Errorf("error inserting into db: %w", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Scroll to the bottom
|
||||||
log.Printf("Scrolling...")
|
log.Printf("Scrolling...")
|
||||||
err = w.page.Keyboard().Press("End")
|
err = w.page.Keyboard().Press("End")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("error scrolling to end of page: %w", err)
|
return fmt.Errorf("error scrolling to end of page: %w", err)
|
||||||
}
|
}
|
||||||
//w.page.tim
|
|
||||||
}
|
}
|
||||||
finish:
|
finish:
|
||||||
err = tx.Commit()
|
err = tx.Commit()
|
||||||
@ -163,6 +185,11 @@ finish:
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isRelativeTimeFormat(input string) bool {
|
||||||
|
re := regexp.MustCompile(`^\d+[mhs]\s+ago$`)
|
||||||
|
return re.MatchString(input)
|
||||||
|
}
|
||||||
|
|
||||||
func (w *WebClient) FetchVideoMetadata(db *sql.DB) error {
|
func (w *WebClient) FetchVideoMetadata(db *sql.DB) error {
|
||||||
log.Printf("Fetching video metadata...")
|
log.Printf("Fetching video metadata...")
|
||||||
for {
|
for {
|
||||||
@ -198,6 +225,19 @@ func (w *WebClient) FetchVideoMetadata(db *sql.DB) error {
|
|||||||
return fmt.Errorf("error retrieving description: %w", err)
|
return fmt.Errorf("error retrieving description: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get upload date
|
||||||
|
uploadDateStr, err := w.getInnerText(".video-page__meta")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error retrieving upload date: %w", err)
|
||||||
|
}
|
||||||
|
uploadDate := time.Now()
|
||||||
|
if !isRelativeTimeFormat(uploadDateStr) {
|
||||||
|
uploadDate, err = parseDateString(uploadDateStr)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error parsing date string '%s': %w", uploadDateStr, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Get cast url
|
// Get cast url
|
||||||
videoElement, err := w.page.QuerySelector("video")
|
videoElement, err := w.page.QuerySelector("video")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -214,7 +254,7 @@ func (w *WebClient) FetchVideoMetadata(db *sql.DB) error {
|
|||||||
return fmt.Errorf("error starting transaction: %w", err)
|
return fmt.Errorf("error starting transaction: %w", err)
|
||||||
}
|
}
|
||||||
defer tx.Rollback()
|
defer tx.Rollback()
|
||||||
result, err := tx.Exec("update videos set title = ?, description = ?, cast = ? where id = ?", title, description, castSource, id)
|
result, err := tx.Exec("update videos set title = ?, description = ?, cast = ?, upload_date = ? where id = ?", title, description, castSource, uploadDate, id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("error updating database: %w", err)
|
return fmt.Errorf("error updating database: %w", err)
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user