Add metadata fetching
This commit is contained in:
parent
19c7517daf
commit
d03ba415d4
13
main.go
13
main.go
@ -28,11 +28,18 @@ func main() {
|
||||
log.Fatalf("error decoding password: %v", err)
|
||||
}
|
||||
|
||||
err = w.VivaLogin(username, string(password))
|
||||
err = w.Login(username, string(password))
|
||||
if err != nil {
|
||||
log.Fatalf("error login in: %v", err)
|
||||
}
|
||||
|
||||
w.VivaDiscoverAllVideos(db)
|
||||
//w.VivaAddMetadata(db)
|
||||
err = w.DiscoverAllVideos(db)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
err = w.FetchVideoMetadata(db)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
3
migrations/2_add_columns.sql
Normal file
3
migrations/2_add_columns.sql
Normal file
@ -0,0 +1,3 @@
|
||||
ALTER TABLE videos ADD COLUMN upload_date;
|
||||
ALTER TABLE videos ADD COLUMN cast;
|
||||
ALTER TABLE videos ADD COLUMN description;
|
114
vivaweb.go
114
vivaweb.go
@ -2,13 +2,15 @@ package main
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/playwright-community/playwright-go"
|
||||
"log"
|
||||
"regexp"
|
||||
)
|
||||
|
||||
const BASE_URL = "https://vivaplus.tv/"
|
||||
const SIGN_IN_URL = BASE_URL + "supporters/sign_in"
|
||||
const BASE_URL = "https://vivaplus.tv"
|
||||
const SIGN_IN_URL = BASE_URL + "/supporters/sign_in"
|
||||
|
||||
type WebClient struct {
|
||||
pw *playwright.Playwright
|
||||
@ -39,7 +41,7 @@ func NewWebClient(options *playwright.RunOptions) *WebClient {
|
||||
}
|
||||
}
|
||||
|
||||
func (w *WebClient) VivaLogin(username, password string) error {
|
||||
func (w *WebClient) Login(username, password string) error {
|
||||
_, err := w.page.Goto(SIGN_IN_URL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error navigating to sign-in page: %w", err)
|
||||
@ -69,7 +71,22 @@ func (w *WebClient) VivaLogin(username, password string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *WebClient) VivaDiscoverAllVideos(db *sql.DB) error {
|
||||
func isValidVideoUrl(url string) bool {
|
||||
// Define the regular expression pattern
|
||||
pattern := `^/supporters/videos/\d+$`
|
||||
|
||||
// Compile the regular expression
|
||||
re, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
fmt.Println("Error compiling regex:", err)
|
||||
return false
|
||||
}
|
||||
|
||||
// Check if the path matches the pattern
|
||||
return re.MatchString(url)
|
||||
}
|
||||
|
||||
func (w *WebClient) DiscoverAllVideos(db *sql.DB) error {
|
||||
log.Printf("Loading list of all videos...")
|
||||
|
||||
_, err := w.page.Goto("https://vivaplus.tv/supporters/videos/all?order=desc")
|
||||
@ -108,6 +125,10 @@ func (w *WebClient) VivaDiscoverAllVideos(db *sql.DB) error {
|
||||
if _, exists := previousUrls[href]; exists {
|
||||
continue // The item was already scanned.
|
||||
}
|
||||
|
||||
if !isValidVideoUrl(href) {
|
||||
return fmt.Errorf("url has bad format: %s", href)
|
||||
}
|
||||
previousUrls[href] = struct{}{}
|
||||
|
||||
result := tx.QueryRow("select count(1) from videos where url = :url", href)
|
||||
@ -141,3 +162,88 @@ finish:
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *WebClient) FetchVideoMetadata(db *sql.DB) error {
|
||||
log.Printf("Fetching video metadata...")
|
||||
for {
|
||||
// Fetch the next record from the database
|
||||
row := db.QueryRow("select id, url from videos where title is null limit 1")
|
||||
var id int
|
||||
var href string
|
||||
err := row.Scan(&id, &href)
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
log.Printf("Fetched all metadata")
|
||||
return nil
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("error fetching record: %w", err)
|
||||
}
|
||||
log.Printf("Fetching data from %s", href)
|
||||
|
||||
// Fetch the video metadata from the web page
|
||||
_, err = w.page.Goto(BASE_URL + href)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error loading page: %w", err)
|
||||
}
|
||||
|
||||
// Get video title
|
||||
title, err := w.getInnerText(".video-page__title")
|
||||
if err != nil {
|
||||
return fmt.Errorf("error retrieving title: %w", err)
|
||||
}
|
||||
|
||||
// Get video description
|
||||
description, err := w.getInnerText("[data-trim-target='content']")
|
||||
if err != nil {
|
||||
return fmt.Errorf("error retrieving description: %w", err)
|
||||
}
|
||||
|
||||
// Get cast url
|
||||
videoElement, err := w.page.QuerySelector("video")
|
||||
if err != nil {
|
||||
return fmt.Errorf("error retrieving video element: %w", err)
|
||||
}
|
||||
if videoElement == nil {
|
||||
return fmt.Errorf("could not find video element")
|
||||
}
|
||||
castSource, err := videoElement.GetAttribute("cast-src")
|
||||
|
||||
// Store info in database
|
||||
tx, err := db.Begin()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error starting transaction: %w", err)
|
||||
}
|
||||
defer tx.Rollback()
|
||||
result, err := tx.Exec("update videos set title = ?, description = ?, cast = ? where id = ?", title, description, castSource, id)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error updating database: %w", err)
|
||||
}
|
||||
rowsAffected, err := result.RowsAffected()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error retrieving affected rows: %w", err)
|
||||
}
|
||||
if rowsAffected != 1 {
|
||||
return fmt.Errorf("unexpected number of rows affected: %d", rowsAffected)
|
||||
}
|
||||
err = tx.Commit()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error commiting changeds: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (w *WebClient) getInnerText(selector string) (string, error) {
|
||||
// Get video title
|
||||
titleElement, err := w.page.QuerySelector(selector)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if titleElement == nil {
|
||||
return "", err
|
||||
}
|
||||
title, err := titleElement.InnerText()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return title, nil
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user