All checks were successful
Build / build (push) Successful in 1m55s
304 lines
7.7 KiB
Go
304 lines
7.7 KiB
Go
package main
|
|
|
|
import (
|
|
"database/sql"
|
|
"errors"
|
|
"fmt"
|
|
"github.com/playwright-community/playwright-go"
|
|
"log"
|
|
"regexp"
|
|
"time"
|
|
)
|
|
|
|
const BASE_URL = "https://vivaplus.tv"
|
|
const SIGN_IN_URL = BASE_URL + "/supporters/sign_in"
|
|
|
|
type WebClient struct {
|
|
pw *playwright.Playwright
|
|
browser playwright.Browser
|
|
page playwright.Page
|
|
//browser *browser.Browser
|
|
//csrfToken string
|
|
//cookies *cookiejar.Jar
|
|
}
|
|
|
|
func NewWebClient(options *playwright.RunOptions) *WebClient {
|
|
pw, err := playwright.Run(options)
|
|
if err != nil {
|
|
log.Fatalf("error running playwright: %v", err)
|
|
}
|
|
browser, err := pw.Firefox.Launch()
|
|
if err != nil {
|
|
log.Fatalf("error running firefox: %v", err)
|
|
}
|
|
page, err := browser.NewPage()
|
|
if err != nil {
|
|
log.Fatalf("error creating page: %v", err)
|
|
}
|
|
return &WebClient{
|
|
pw: pw,
|
|
browser: browser,
|
|
page: page,
|
|
}
|
|
}
|
|
|
|
func (w *WebClient) Login(username, password string) error {
|
|
_, err := w.page.Goto(SIGN_IN_URL)
|
|
if err != nil {
|
|
return fmt.Errorf("error navigating to sign-in page: %w", err)
|
|
}
|
|
|
|
err = w.page.GetByTestId("SupporterLogin.EmailInput").Fill(username)
|
|
if err != nil {
|
|
return fmt.Errorf("error filling in email: %w", err)
|
|
}
|
|
err = w.page.GetByTestId("SupporterLogin.PasswordInput").Fill(password)
|
|
if err != nil {
|
|
return fmt.Errorf("error filling in email: %w", err)
|
|
}
|
|
|
|
log.Printf("Clicking login...")
|
|
err = w.page.GetByTestId("SupporterLogin.SubmitButton").Click()
|
|
if err != nil {
|
|
return fmt.Errorf("error clicking login: %w", err)
|
|
}
|
|
|
|
err = w.page.WaitForURL("https://vivaplus.tv/supporters")
|
|
if err != nil {
|
|
return fmt.Errorf("error waiting for url: %w", err)
|
|
}
|
|
log.Printf("Logged in!")
|
|
|
|
return nil
|
|
}
|
|
|
|
func isValidVideoUrl(url string) bool {
|
|
// Define the regular expression pattern
|
|
pattern := `^/supporters/videos/\d+$`
|
|
|
|
// Compile the regular expression
|
|
re, err := regexp.Compile(pattern)
|
|
if err != nil {
|
|
fmt.Println("Error compiling regex:", err)
|
|
return false
|
|
}
|
|
|
|
// Check if the path matches the pattern
|
|
return re.MatchString(url)
|
|
}
|
|
|
|
func isBlacklistedUrl(url string) bool {
|
|
return url == "/supporters/payments/checkout/posts/63266/available_tiers"
|
|
}
|
|
|
|
func parseDateString(dateStr string) (time.Time, error) {
|
|
const layout = "Jan 2, 2006"
|
|
t, err := time.Parse(layout, dateStr)
|
|
if err != nil {
|
|
return time.Time{}, err
|
|
}
|
|
return t, nil
|
|
}
|
|
|
|
func (w *WebClient) DiscoverAllVideos(db *sql.DB) error {
|
|
log.Printf("Loading list of all videos...")
|
|
|
|
_, err := w.page.Goto("https://vivaplus.tv/supporters/videos/all?order=desc")
|
|
if err != nil {
|
|
return fmt.Errorf("error opening page: %w", err)
|
|
}
|
|
|
|
count, err := w.page.GetByTestId("VideoCatalog.Video").Count()
|
|
println(count, err)
|
|
|
|
println("Looping over videos...")
|
|
tx, err := db.Begin()
|
|
if err != nil {
|
|
return fmt.Errorf("error starting transaction: %w", err)
|
|
}
|
|
defer tx.Rollback()
|
|
|
|
// Find the next run number
|
|
var currentRun int
|
|
row := tx.QueryRow("select max(run) from videos")
|
|
err = row.Scan(¤tRun)
|
|
if err != nil {
|
|
return fmt.Errorf("error retrieving current run: %w", err)
|
|
}
|
|
currentRun++
|
|
|
|
previousUrls := make(map[string]struct{})
|
|
|
|
for {
|
|
locators, err := w.page.GetByTestId("VideoCatalog.Video").All()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, l := range locators {
|
|
// Get the URL to the video page
|
|
href, err := l.GetAttribute("href")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Ensure that it's valid and we haven't already scanned it
|
|
if _, exists := previousUrls[href]; exists {
|
|
continue // The item was already scanned.
|
|
}
|
|
previousUrls[href] = struct{}{}
|
|
|
|
if isBlacklistedUrl(href) {
|
|
continue // We want to skip this one
|
|
}
|
|
|
|
if !isValidVideoUrl(href) {
|
|
return fmt.Errorf("url has bad format: %s", href)
|
|
}
|
|
|
|
// Get thumbnail
|
|
thumbnailEl := l.Locator(".video__image:first-child")
|
|
thumbnail, err := thumbnailEl.GetAttribute("src")
|
|
if err != nil {
|
|
return fmt.Errorf("error retrieving thumbnail: %w", err)
|
|
}
|
|
|
|
// Ensure the record does not already exist. If it does, we've fetched
|
|
// all new videos
|
|
result := tx.QueryRow("select count(1) from videos where url = :url", href)
|
|
var count int
|
|
err = result.Scan(&count)
|
|
if err != nil {
|
|
return fmt.Errorf("error fetching data from db: %w", err)
|
|
}
|
|
if count == 1 {
|
|
log.Printf("Video already exists: %s", href)
|
|
goto finish
|
|
}
|
|
|
|
// Insert it into the database
|
|
log.Printf("Adding video %s", href)
|
|
_, err = tx.Exec("insert into videos(url, thumbnail, run) values (?, ?, ?)", href, thumbnail, currentRun)
|
|
if err != nil {
|
|
return fmt.Errorf("error inserting into db: %w", err)
|
|
}
|
|
}
|
|
|
|
// Scroll to the bottom
|
|
log.Printf("Scrolling...")
|
|
err = w.page.Keyboard().Press("End")
|
|
if err != nil {
|
|
return fmt.Errorf("error scrolling to end of page: %w", err)
|
|
}
|
|
}
|
|
finish:
|
|
err = tx.Commit()
|
|
if err != nil {
|
|
return fmt.Errorf("error committing: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func isRelativeTimeFormat(input string) bool {
|
|
re := regexp.MustCompile(`^\d+[mhs]\s+ago$`)
|
|
return re.MatchString(input)
|
|
}
|
|
|
|
func (w *WebClient) FetchVideoMetadata(db *sql.DB) error {
|
|
log.Printf("Fetching video metadata...")
|
|
for {
|
|
// Fetch the next record from the database
|
|
row := db.QueryRow("select id, url from videos where title is null limit 1")
|
|
var id int
|
|
var href string
|
|
err := row.Scan(&id, &href)
|
|
if errors.Is(err, sql.ErrNoRows) {
|
|
log.Printf("Fetched all metadata")
|
|
return nil
|
|
}
|
|
if err != nil {
|
|
return fmt.Errorf("error fetching record: %w", err)
|
|
}
|
|
log.Printf("Fetching data for %s", href)
|
|
|
|
// Fetch the video metadata from the web page
|
|
_, err = w.page.Goto(BASE_URL + href)
|
|
if err != nil {
|
|
return fmt.Errorf("error loading page: %w", err)
|
|
}
|
|
|
|
// Get video title
|
|
title, err := w.getInnerText(".video-page__title")
|
|
if err != nil {
|
|
return fmt.Errorf("error retrieving title: %w", err)
|
|
}
|
|
|
|
// Get video description
|
|
description, err := w.getInnerText("[data-trim-target='content']")
|
|
if err != nil {
|
|
return fmt.Errorf("error retrieving description: %w", err)
|
|
}
|
|
|
|
// Get upload date
|
|
uploadDateStr, err := w.getInnerText(".video-page__meta")
|
|
if err != nil {
|
|
return fmt.Errorf("error retrieving upload date: %w", err)
|
|
}
|
|
uploadDate := time.Now()
|
|
if !isRelativeTimeFormat(uploadDateStr) {
|
|
uploadDate, err = parseDateString(uploadDateStr)
|
|
if err != nil {
|
|
return fmt.Errorf("error parsing date string '%s': %w", uploadDateStr, err)
|
|
}
|
|
}
|
|
|
|
// Get cast url
|
|
videoElement, err := w.page.QuerySelector("video")
|
|
if err != nil {
|
|
return fmt.Errorf("error retrieving video element: %w", err)
|
|
}
|
|
if videoElement == nil {
|
|
return fmt.Errorf("could not find video element")
|
|
}
|
|
castSource, err := videoElement.GetAttribute("cast-src")
|
|
|
|
// Store info in database
|
|
tx, err := db.Begin()
|
|
if err != nil {
|
|
return fmt.Errorf("error starting transaction: %w", err)
|
|
}
|
|
defer tx.Rollback()
|
|
result, err := tx.Exec("update videos set title = ?, description = ?, cast = ?, upload_date = ?, year = ? where id = ?", title, description, castSource, uploadDate.Format(time.DateOnly), uploadDate.Year(), id)
|
|
if err != nil {
|
|
return fmt.Errorf("error updating database: %w", err)
|
|
}
|
|
rowsAffected, err := result.RowsAffected()
|
|
if err != nil {
|
|
return fmt.Errorf("error retrieving affected rows: %w", err)
|
|
}
|
|
if rowsAffected != 1 {
|
|
return fmt.Errorf("unexpected number of rows affected: %d", rowsAffected)
|
|
}
|
|
err = tx.Commit()
|
|
if err != nil {
|
|
return fmt.Errorf("error commiting changeds: %w", err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (w *WebClient) getInnerText(selector string) (string, error) {
|
|
// Get video title
|
|
titleElement, err := w.page.QuerySelector(selector)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if titleElement == nil {
|
|
return "", err
|
|
}
|
|
title, err := titleElement.InnerText()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return title, nil
|
|
}
|