Parse upload date
This commit is contained in:
parent
34023f609b
commit
2c3b06a98b
46
vivaweb.go
46
vivaweb.go
@ -7,6 +7,7 @@ import (
|
||||
"github.com/playwright-community/playwright-go"
|
||||
"log"
|
||||
"regexp"
|
||||
"time"
|
||||
)
|
||||
|
||||
const BASE_URL = "https://vivaplus.tv"
|
||||
@ -86,6 +87,19 @@ func isValidVideoUrl(url string) bool {
|
||||
return re.MatchString(url)
|
||||
}
|
||||
|
||||
func isBlacklistedUrl(url string) bool {
|
||||
return url == "/supporters/payments/checkout/posts/63266/available_tiers"
|
||||
}
|
||||
|
||||
func parseDateString(dateStr string) (time.Time, error) {
|
||||
const layout = "Jan 2, 2006"
|
||||
t, err := time.Parse(layout, dateStr)
|
||||
if err != nil {
|
||||
return time.Time{}, err
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
|
||||
func (w *WebClient) DiscoverAllVideos(db *sql.DB) error {
|
||||
log.Printf("Loading list of all videos...")
|
||||
|
||||
@ -117,20 +131,27 @@ func (w *WebClient) DiscoverAllVideos(db *sql.DB) error {
|
||||
}
|
||||
|
||||
for _, l := range locators {
|
||||
// Get the URL to the video page
|
||||
href, err := l.GetAttribute("href")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Ensure that it's valid and we haven't already scanned it
|
||||
if _, exists := previousUrls[href]; exists {
|
||||
continue // The item was already scanned.
|
||||
}
|
||||
previousUrls[href] = struct{}{}
|
||||
|
||||
if isBlacklistedUrl(href) {
|
||||
continue // We want to skip this one
|
||||
}
|
||||
|
||||
if !isValidVideoUrl(href) {
|
||||
return fmt.Errorf("url has bad format: %s", href)
|
||||
}
|
||||
previousUrls[href] = struct{}{}
|
||||
|
||||
// Insert it into the database
|
||||
result := tx.QueryRow("select count(1) from videos where url = :url", href)
|
||||
var count int
|
||||
err = result.Scan(&count)
|
||||
@ -148,12 +169,13 @@ func (w *WebClient) DiscoverAllVideos(db *sql.DB) error {
|
||||
return fmt.Errorf("error inserting into db: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Scroll to the bottom
|
||||
log.Printf("Scrolling...")
|
||||
err = w.page.Keyboard().Press("End")
|
||||
if err != nil {
|
||||
return fmt.Errorf("error scrolling to end of page: %w", err)
|
||||
}
|
||||
//w.page.tim
|
||||
}
|
||||
finish:
|
||||
err = tx.Commit()
|
||||
@ -163,6 +185,11 @@ finish:
|
||||
return nil
|
||||
}
|
||||
|
||||
func isRelativeTimeFormat(input string) bool {
|
||||
re := regexp.MustCompile(`^\d+[mhs]\s+ago$`)
|
||||
return re.MatchString(input)
|
||||
}
|
||||
|
||||
func (w *WebClient) FetchVideoMetadata(db *sql.DB) error {
|
||||
log.Printf("Fetching video metadata...")
|
||||
for {
|
||||
@ -198,6 +225,19 @@ func (w *WebClient) FetchVideoMetadata(db *sql.DB) error {
|
||||
return fmt.Errorf("error retrieving description: %w", err)
|
||||
}
|
||||
|
||||
// Get upload date
|
||||
uploadDateStr, err := w.getInnerText(".video-page__meta")
|
||||
if err != nil {
|
||||
return fmt.Errorf("error retrieving upload date: %w", err)
|
||||
}
|
||||
uploadDate := time.Now()
|
||||
if !isRelativeTimeFormat(uploadDateStr) {
|
||||
uploadDate, err = parseDateString(uploadDateStr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error parsing date string '%s': %w", uploadDateStr, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Get cast url
|
||||
videoElement, err := w.page.QuerySelector("video")
|
||||
if err != nil {
|
||||
@ -214,7 +254,7 @@ func (w *WebClient) FetchVideoMetadata(db *sql.DB) error {
|
||||
return fmt.Errorf("error starting transaction: %w", err)
|
||||
}
|
||||
defer tx.Rollback()
|
||||
result, err := tx.Exec("update videos set title = ?, description = ?, cast = ? where id = ?", title, description, castSource, id)
|
||||
result, err := tx.Exec("update videos set title = ?, description = ?, cast = ?, upload_date = ? where id = ?", title, description, castSource, uploadDate, id)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error updating database: %w", err)
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user