vivaplusdl/vivaweb.go

250 lines
6.1 KiB
Go

package main
import (
"database/sql"
"errors"
"fmt"
"github.com/playwright-community/playwright-go"
"log"
"regexp"
)
const BASE_URL = "https://vivaplus.tv"
const SIGN_IN_URL = BASE_URL + "/supporters/sign_in"
type WebClient struct {
pw *playwright.Playwright
browser playwright.Browser
page playwright.Page
//browser *browser.Browser
//csrfToken string
//cookies *cookiejar.Jar
}
func NewWebClient(options *playwright.RunOptions) *WebClient {
pw, err := playwright.Run(options)
if err != nil {
log.Fatalf("error running playwright: %w", err)
}
browser, err := pw.Firefox.Launch()
if err != nil {
log.Fatalf("error running firefox: %w", err)
}
page, err := browser.NewPage()
if err != nil {
log.Fatalf("error creating page: %w", err)
}
return &WebClient{
pw: pw,
browser: browser,
page: page,
}
}
func (w *WebClient) Login(username, password string) error {
_, err := w.page.Goto(SIGN_IN_URL)
if err != nil {
return fmt.Errorf("error navigating to sign-in page: %w", err)
}
err = w.page.GetByTestId("SupporterLogin.EmailInput").Fill(username)
if err != nil {
return fmt.Errorf("error filling in email: %w", err)
}
err = w.page.GetByTestId("SupporterLogin.PasswordInput").Fill(password)
if err != nil {
return fmt.Errorf("error filling in email: %w", err)
}
log.Printf("Clicking login...")
err = w.page.GetByTestId("SupporterLogin.SubmitButton").Click()
if err != nil {
return fmt.Errorf("error clicking login: %w", err)
}
err = w.page.WaitForURL("https://vivaplus.tv/supporters")
if err != nil {
return fmt.Errorf("error waiting for url: %w", err)
}
log.Printf("Logged in!")
return nil
}
func isValidVideoUrl(url string) bool {
// Define the regular expression pattern
pattern := `^/supporters/videos/\d+$`
// Compile the regular expression
re, err := regexp.Compile(pattern)
if err != nil {
fmt.Println("Error compiling regex:", err)
return false
}
// Check if the path matches the pattern
return re.MatchString(url)
}
func (w *WebClient) DiscoverAllVideos(db *sql.DB) error {
log.Printf("Loading list of all videos...")
_, err := w.page.Goto("https://vivaplus.tv/supporters/videos/all?order=desc")
if err != nil {
return fmt.Errorf("error opening page: %w", err)
}
//page.Keyboard().Press("End")
//time.Sleep(3 * time.Second)
count, err := w.page.GetByTestId("VideoCatalog.Video").Count()
//count, err := el.Count()
println(count, err)
println("Looping over videos...")
tx, err := db.Begin()
if err != nil {
return fmt.Errorf("error starting transaction: %w", err)
}
defer tx.Rollback()
previousUrls := make(map[string]struct{})
for {
locators, err := w.page.GetByTestId("VideoCatalog.Video").All()
if err != nil {
return err
}
for _, l := range locators {
href, err := l.GetAttribute("href")
if err != nil {
return err
}
if _, exists := previousUrls[href]; exists {
continue // The item was already scanned.
}
if !isValidVideoUrl(href) {
return fmt.Errorf("url has bad format: %s", href)
}
previousUrls[href] = struct{}{}
result := tx.QueryRow("select count(1) from videos where url = :url", href)
var count int
err = result.Scan(&count)
if err != nil {
return fmt.Errorf("error fetching data from db: %w", err)
}
if count == 1 {
log.Printf("Video already exists: %s", href)
goto finish
}
log.Printf("Adding video %s", href)
_, err = tx.Exec("insert into videos(url) values (:url)", href)
if err != nil {
return fmt.Errorf("error inserting into db: %w", err)
}
}
log.Printf("Scrolling...")
err = w.page.Keyboard().Press("End")
if err != nil {
return fmt.Errorf("error scrolling to end of page: %w", err)
}
//w.page.tim
}
finish:
err = tx.Commit()
if err != nil {
return fmt.Errorf("error committing: %w", err)
}
return nil
}
func (w *WebClient) FetchVideoMetadata(db *sql.DB) error {
log.Printf("Fetching video metadata...")
for {
// Fetch the next record from the database
row := db.QueryRow("select id, url from videos where title is null limit 1")
var id int
var href string
err := row.Scan(&id, &href)
if errors.Is(err, sql.ErrNoRows) {
log.Printf("Fetched all metadata")
return nil
}
if err != nil {
return fmt.Errorf("error fetching record: %w", err)
}
log.Printf("Fetching data from %s", href)
// Fetch the video metadata from the web page
_, err = w.page.Goto(BASE_URL + href)
if err != nil {
return fmt.Errorf("error loading page: %w", err)
}
// Get video title
title, err := w.getInnerText(".video-page__title")
if err != nil {
return fmt.Errorf("error retrieving title: %w", err)
}
// Get video description
description, err := w.getInnerText("[data-trim-target='content']")
if err != nil {
return fmt.Errorf("error retrieving description: %w", err)
}
// Get cast url
videoElement, err := w.page.QuerySelector("video")
if err != nil {
return fmt.Errorf("error retrieving video element: %w", err)
}
if videoElement == nil {
return fmt.Errorf("could not find video element")
}
castSource, err := videoElement.GetAttribute("cast-src")
// Store info in database
tx, err := db.Begin()
if err != nil {
return fmt.Errorf("error starting transaction: %w", err)
}
defer tx.Rollback()
result, err := tx.Exec("update videos set title = ?, description = ?, cast = ? where id = ?", title, description, castSource, id)
if err != nil {
return fmt.Errorf("error updating database: %w", err)
}
rowsAffected, err := result.RowsAffected()
if err != nil {
return fmt.Errorf("error retrieving affected rows: %w", err)
}
if rowsAffected != 1 {
return fmt.Errorf("unexpected number of rows affected: %d", rowsAffected)
}
err = tx.Commit()
if err != nil {
return fmt.Errorf("error commiting changeds: %w", err)
}
}
}
func (w *WebClient) getInnerText(selector string) (string, error) {
// Get video title
titleElement, err := w.page.QuerySelector(selector)
if err != nil {
return "", err
}
if titleElement == nil {
return "", err
}
title, err := titleElement.InnerText()
if err != nil {
return "", err
}
return title, nil
}