diff --git a/main.go b/main.go index 752b3cf..5806ae4 100644 --- a/main.go +++ b/main.go @@ -28,11 +28,18 @@ func main() { log.Fatalf("error decoding password: %v", err) } - err = w.VivaLogin(username, string(password)) + err = w.Login(username, string(password)) if err != nil { log.Fatalf("error login in: %v", err) } - w.VivaDiscoverAllVideos(db) - //w.VivaAddMetadata(db) + err = w.DiscoverAllVideos(db) + if err != nil { + panic(err) + } + + err = w.FetchVideoMetadata(db) + if err != nil { + panic(err) + } } diff --git a/migrations/2_add_columns.sql b/migrations/2_add_columns.sql new file mode 100644 index 0000000..74c625d --- /dev/null +++ b/migrations/2_add_columns.sql @@ -0,0 +1,3 @@ +ALTER TABLE videos ADD COLUMN upload_date; +ALTER TABLE videos ADD COLUMN cast; +ALTER TABLE videos ADD COLUMN description; diff --git a/vivaweb.go b/vivaweb.go index 16fb5c5..a604b38 100644 --- a/vivaweb.go +++ b/vivaweb.go @@ -2,13 +2,15 @@ package main import ( "database/sql" + "errors" "fmt" "github.com/playwright-community/playwright-go" "log" + "regexp" ) -const BASE_URL = "https://vivaplus.tv/" -const SIGN_IN_URL = BASE_URL + "supporters/sign_in" +const BASE_URL = "https://vivaplus.tv" +const SIGN_IN_URL = BASE_URL + "/supporters/sign_in" type WebClient struct { pw *playwright.Playwright @@ -39,7 +41,7 @@ func NewWebClient(options *playwright.RunOptions) *WebClient { } } -func (w *WebClient) VivaLogin(username, password string) error { +func (w *WebClient) Login(username, password string) error { _, err := w.page.Goto(SIGN_IN_URL) if err != nil { return fmt.Errorf("error navigating to sign-in page: %w", err) @@ -69,7 +71,22 @@ func (w *WebClient) VivaLogin(username, password string) error { return nil } -func (w *WebClient) VivaDiscoverAllVideos(db *sql.DB) error { +func isValidVideoUrl(url string) bool { + // Define the regular expression pattern + pattern := `^/supporters/videos/\d+$` + + // Compile the regular expression + re, err := regexp.Compile(pattern) + if err != nil { + fmt.Println("Error compiling regex:", err) + return false + } + + // Check if the path matches the pattern + return re.MatchString(url) +} + +func (w *WebClient) DiscoverAllVideos(db *sql.DB) error { log.Printf("Loading list of all videos...") _, err := w.page.Goto("https://vivaplus.tv/supporters/videos/all?order=desc") @@ -108,6 +125,10 @@ func (w *WebClient) VivaDiscoverAllVideos(db *sql.DB) error { if _, exists := previousUrls[href]; exists { continue // The item was already scanned. } + + if !isValidVideoUrl(href) { + return fmt.Errorf("url has bad format: %s", href) + } previousUrls[href] = struct{}{} result := tx.QueryRow("select count(1) from videos where url = :url", href) @@ -141,3 +162,88 @@ finish: } return nil } + +func (w *WebClient) FetchVideoMetadata(db *sql.DB) error { + log.Printf("Fetching video metadata...") + for { + // Fetch the next record from the database + row := db.QueryRow("select id, url from videos where title is null limit 1") + var id int + var href string + err := row.Scan(&id, &href) + if errors.Is(err, sql.ErrNoRows) { + log.Printf("Fetched all metadata") + return nil + } + if err != nil { + return fmt.Errorf("error fetching record: %w", err) + } + log.Printf("Fetching data from %s", href) + + // Fetch the video metadata from the web page + _, err = w.page.Goto(BASE_URL + href) + if err != nil { + return fmt.Errorf("error loading page: %w", err) + } + + // Get video title + title, err := w.getInnerText(".video-page__title") + if err != nil { + return fmt.Errorf("error retrieving title: %w", err) + } + + // Get video description + description, err := w.getInnerText("[data-trim-target='content']") + if err != nil { + return fmt.Errorf("error retrieving description: %w", err) + } + + // Get cast url + videoElement, err := w.page.QuerySelector("video") + if err != nil { + return fmt.Errorf("error retrieving video element: %w", err) + } + if videoElement == nil { + return fmt.Errorf("could not find video element") + } + castSource, err := videoElement.GetAttribute("cast-src") + + // Store info in database + tx, err := db.Begin() + if err != nil { + return fmt.Errorf("error starting transaction: %w", err) + } + defer tx.Rollback() + result, err := tx.Exec("update videos set title = ?, description = ?, cast = ? where id = ?", title, description, castSource, id) + if err != nil { + return fmt.Errorf("error updating database: %w", err) + } + rowsAffected, err := result.RowsAffected() + if err != nil { + return fmt.Errorf("error retrieving affected rows: %w", err) + } + if rowsAffected != 1 { + return fmt.Errorf("unexpected number of rows affected: %d", rowsAffected) + } + err = tx.Commit() + if err != nil { + return fmt.Errorf("error commiting changeds: %w", err) + } + } +} + +func (w *WebClient) getInnerText(selector string) (string, error) { + // Get video title + titleElement, err := w.page.QuerySelector(selector) + if err != nil { + return "", err + } + if titleElement == nil { + return "", err + } + title, err := titleElement.InnerText() + if err != nil { + return "", err + } + return title, nil +}