Add metadata fetching
This commit is contained in:
		
							
								
								
									
										13
									
								
								main.go
									
									
									
									
									
								
							
							
						
						
									
										13
									
								
								main.go
									
									
									
									
									
								
							@@ -28,11 +28,18 @@ func main() {
 | 
			
		||||
		log.Fatalf("error decoding password: %v", err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	err = w.VivaLogin(username, string(password))
 | 
			
		||||
	err = w.Login(username, string(password))
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		log.Fatalf("error login in: %v", err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	w.VivaDiscoverAllVideos(db)
 | 
			
		||||
	//w.VivaAddMetadata(db)
 | 
			
		||||
	err = w.DiscoverAllVideos(db)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		panic(err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	err = w.FetchVideoMetadata(db)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		panic(err)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										3
									
								
								migrations/2_add_columns.sql
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								migrations/2_add_columns.sql
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,3 @@
 | 
			
		||||
ALTER TABLE videos ADD COLUMN upload_date;
 | 
			
		||||
ALTER TABLE videos ADD COLUMN cast;
 | 
			
		||||
ALTER TABLE videos ADD COLUMN description;
 | 
			
		||||
							
								
								
									
										114
									
								
								vivaweb.go
									
									
									
									
									
								
							
							
						
						
									
										114
									
								
								vivaweb.go
									
									
									
									
									
								
							@@ -2,13 +2,15 @@ package main
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"database/sql"
 | 
			
		||||
	"errors"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"github.com/playwright-community/playwright-go"
 | 
			
		||||
	"log"
 | 
			
		||||
	"regexp"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const BASE_URL = "https://vivaplus.tv/"
 | 
			
		||||
const SIGN_IN_URL = BASE_URL + "supporters/sign_in"
 | 
			
		||||
const BASE_URL = "https://vivaplus.tv"
 | 
			
		||||
const SIGN_IN_URL = BASE_URL + "/supporters/sign_in"
 | 
			
		||||
 | 
			
		||||
type WebClient struct {
 | 
			
		||||
	pw      *playwright.Playwright
 | 
			
		||||
@@ -39,7 +41,7 @@ func NewWebClient(options *playwright.RunOptions) *WebClient {
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (w *WebClient) VivaLogin(username, password string) error {
 | 
			
		||||
func (w *WebClient) Login(username, password string) error {
 | 
			
		||||
	_, err := w.page.Goto(SIGN_IN_URL)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return fmt.Errorf("error navigating to sign-in page: %w", err)
 | 
			
		||||
@@ -69,7 +71,22 @@ func (w *WebClient) VivaLogin(username, password string) error {
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (w *WebClient) VivaDiscoverAllVideos(db *sql.DB) error {
 | 
			
		||||
func isValidVideoUrl(url string) bool {
 | 
			
		||||
	// Define the regular expression pattern
 | 
			
		||||
	pattern := `^/supporters/videos/\d+$`
 | 
			
		||||
 | 
			
		||||
	// Compile the regular expression
 | 
			
		||||
	re, err := regexp.Compile(pattern)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		fmt.Println("Error compiling regex:", err)
 | 
			
		||||
		return false
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Check if the path matches the pattern
 | 
			
		||||
	return re.MatchString(url)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (w *WebClient) DiscoverAllVideos(db *sql.DB) error {
 | 
			
		||||
	log.Printf("Loading list of all videos...")
 | 
			
		||||
 | 
			
		||||
	_, err := w.page.Goto("https://vivaplus.tv/supporters/videos/all?order=desc")
 | 
			
		||||
@@ -108,6 +125,10 @@ func (w *WebClient) VivaDiscoverAllVideos(db *sql.DB) error {
 | 
			
		||||
			if _, exists := previousUrls[href]; exists {
 | 
			
		||||
				continue // The item was already scanned.
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			if !isValidVideoUrl(href) {
 | 
			
		||||
				return fmt.Errorf("url has bad format: %s", href)
 | 
			
		||||
			}
 | 
			
		||||
			previousUrls[href] = struct{}{}
 | 
			
		||||
 | 
			
		||||
			result := tx.QueryRow("select count(1) from videos where url = :url", href)
 | 
			
		||||
@@ -141,3 +162,88 @@ finish:
 | 
			
		||||
	}
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (w *WebClient) FetchVideoMetadata(db *sql.DB) error {
 | 
			
		||||
	log.Printf("Fetching video metadata...")
 | 
			
		||||
	for {
 | 
			
		||||
		// Fetch the next record from the database
 | 
			
		||||
		row := db.QueryRow("select id, url from videos where title is null limit 1")
 | 
			
		||||
		var id int
 | 
			
		||||
		var href string
 | 
			
		||||
		err := row.Scan(&id, &href)
 | 
			
		||||
		if errors.Is(err, sql.ErrNoRows) {
 | 
			
		||||
			log.Printf("Fetched all metadata")
 | 
			
		||||
			return nil
 | 
			
		||||
		}
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return fmt.Errorf("error fetching record: %w", err)
 | 
			
		||||
		}
 | 
			
		||||
		log.Printf("Fetching data from %s", href)
 | 
			
		||||
 | 
			
		||||
		// Fetch the video metadata from the web page
 | 
			
		||||
		_, err = w.page.Goto(BASE_URL + href)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return fmt.Errorf("error loading page: %w", err)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// Get video title
 | 
			
		||||
		title, err := w.getInnerText(".video-page__title")
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return fmt.Errorf("error retrieving title: %w", err)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// Get video description
 | 
			
		||||
		description, err := w.getInnerText("[data-trim-target='content']")
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return fmt.Errorf("error retrieving description: %w", err)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// Get cast url
 | 
			
		||||
		videoElement, err := w.page.QuerySelector("video")
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return fmt.Errorf("error retrieving video element: %w", err)
 | 
			
		||||
		}
 | 
			
		||||
		if videoElement == nil {
 | 
			
		||||
			return fmt.Errorf("could not find video element")
 | 
			
		||||
		}
 | 
			
		||||
		castSource, err := videoElement.GetAttribute("cast-src")
 | 
			
		||||
 | 
			
		||||
		// Store info in database
 | 
			
		||||
		tx, err := db.Begin()
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return fmt.Errorf("error starting transaction: %w", err)
 | 
			
		||||
		}
 | 
			
		||||
		defer tx.Rollback()
 | 
			
		||||
		result, err := tx.Exec("update videos set title = ?, description = ?, cast = ? where id = ?", title, description, castSource, id)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return fmt.Errorf("error updating database: %w", err)
 | 
			
		||||
		}
 | 
			
		||||
		rowsAffected, err := result.RowsAffected()
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return fmt.Errorf("error retrieving affected rows: %w", err)
 | 
			
		||||
		}
 | 
			
		||||
		if rowsAffected != 1 {
 | 
			
		||||
			return fmt.Errorf("unexpected number of rows affected: %d", rowsAffected)
 | 
			
		||||
		}
 | 
			
		||||
		err = tx.Commit()
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return fmt.Errorf("error commiting changeds: %w", err)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (w *WebClient) getInnerText(selector string) (string, error) {
 | 
			
		||||
	// Get video title
 | 
			
		||||
	titleElement, err := w.page.QuerySelector(selector)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return "", err
 | 
			
		||||
	}
 | 
			
		||||
	if titleElement == nil {
 | 
			
		||||
		return "", err
 | 
			
		||||
	}
 | 
			
		||||
	title, err := titleElement.InnerText()
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return "", err
 | 
			
		||||
	}
 | 
			
		||||
	return title, nil
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user