package main import ( "database/sql" "errors" "fmt" "gitea.seeseepuff.be/seeseemelk/mysqlite" "github.com/playwright-community/playwright-go" "log" "regexp" "time" ) const BASE_URL = "https://vivaplus.tv" const SIGN_IN_URL = BASE_URL + "/supporters/sign_in" type WebClient struct { pw *playwright.Playwright browser playwright.Browser page playwright.Page //browser *browser.Browser //csrfToken string //cookies *cookiejar.Jar } func NewWebClient(options *playwright.RunOptions) *WebClient { pw, err := playwright.Run(options) if err != nil { log.Fatalf("error running playwright: %v", err) } browser, err := pw.Firefox.Launch() if err != nil { log.Fatalf("error running firefox: %v", err) } page, err := browser.NewPage() if err != nil { log.Fatalf("error creating page: %v", err) } return &WebClient{ pw: pw, browser: browser, page: page, } } func (w *WebClient) Login(username, password string) error { _, err := w.page.Goto(SIGN_IN_URL) if err != nil { return fmt.Errorf("error navigating to sign-in page: %w", err) } err = w.page.GetByTestId("SupporterLogin.EmailInput").Fill(username) if err != nil { return fmt.Errorf("error filling in email: %w", err) } err = w.page.GetByTestId("SupporterLogin.PasswordInput").Fill(password) if err != nil { return fmt.Errorf("error filling in email: %w", err) } log.Printf("Clicking login...") err = w.page.GetByTestId("SupporterLogin.SubmitButton").Click() if err != nil { return fmt.Errorf("error clicking login: %w", err) } err = w.page.WaitForURL("https://vivaplus.tv/supporters") if err != nil { return fmt.Errorf("error waiting for url: %w", err) } log.Printf("Logged in!") return nil } func isValidVideoUrl(url string) bool { // Define the regular expression pattern pattern := `^/supporters/videos/\d+$` // Compile the regular expression re, err := regexp.Compile(pattern) if err != nil { fmt.Println("Error compiling regex:", err) return false } // Check if the path matches the pattern return re.MatchString(url) } func isBlacklistedUrl(url string) bool { return url == "/supporters/payments/checkout/posts/63266/available_tiers" } func parseDateString(dateStr string) (time.Time, error) { const layout = "Jan 2, 2006" t, err := time.Parse(layout, dateStr) if err != nil { return time.Time{}, err } return t, nil } func (w *WebClient) DiscoverAllVideos(db *mysqlite.Db) error { log.Printf("Loading list of all videos...") _, err := w.page.Goto("https://vivaplus.tv/supporters/videos/all?order=desc") if err != nil { return fmt.Errorf("error opening page: %w", err) } count, err := w.page.GetByTestId("VideoCatalog.Video").Count() println(count, err) println("Looping over videos...") tx, err := db.Begin() if err != nil { return fmt.Errorf("error starting transaction: %w", err) } defer tx.MustRollback() // Find the next run number var currentRun int err = tx.Query("select max(run) from videos").ScanSingle(¤tRun) if err != nil { return fmt.Errorf("error retrieving current run: %w", err) } currentRun++ previousUrls := make(map[string]struct{}) for { locators, err := w.page.GetByTestId("VideoCatalog.Video").All() if err != nil { return err } for _, l := range locators { // Get the URL to the video page href, err := l.GetAttribute("href") if err != nil { return err } // Ensure that it's valid and we haven't already scanned it if _, exists := previousUrls[href]; exists { continue // The item was already scanned. } previousUrls[href] = struct{}{} if isBlacklistedUrl(href) { continue // We want to skip this one } if !isValidVideoUrl(href) { return fmt.Errorf("url has bad format: %s", href) } // Get thumbnail thumbnailEl := l.Locator(".video__image:first-child") thumbnail, err := thumbnailEl.GetAttribute("src") if err != nil { return fmt.Errorf("error retrieving thumbnail: %w", err) } // Ensure the record does not already exist. If it does, we've fetched // all new videos err = tx.Query("select count(1) from videos where url = :url").Bind(href).ScanSingle(&count) if err != nil { return fmt.Errorf("error fetching data from db: %w", err) } if count == 1 { log.Printf("Video already exists: %s", href) goto finish } // Insert it into the database log.Printf("Adding video %s", href) err = tx.Query("insert into videos(url, thumbnail, run) values (?, ?, ?)").Bind(href, thumbnail, currentRun).Exec() if err != nil { return fmt.Errorf("error inserting into db: %w", err) } } // Scroll to the bottom log.Printf("Scrolling...") err = w.page.Keyboard().Press("End") if err != nil { return fmt.Errorf("error scrolling to end of page: %w", err) } } finish: err = tx.Commit() if err != nil { return fmt.Errorf("error committing: %w", err) } return nil } func isRelativeTimeFormat(input string) bool { re := regexp.MustCompile(`^\d+[mhs]\s+ago$`) return re.MatchString(input) } func (w *WebClient) FetchVideoMetadata(db *mysqlite.Db) error { log.Printf("Fetching video metadata...") for { // Fetch the next record from the database var id int var href string err := db.Query("select id, url from videos where `cast` is null limit 1").ScanSingle(&id, &href) if errors.Is(err, sql.ErrNoRows) { log.Printf("Fetched all metadata") return nil } if err != nil { return fmt.Errorf("error fetching record: %w", err) } log.Printf("Fetching data for %s", href) // Fetch the video metadata from the web page _, err = w.page.Goto(BASE_URL + href) if err != nil { return fmt.Errorf("error loading page: %w", err) } // Get video title title, err := w.getInnerText(".video-page__title") if err != nil { return fmt.Errorf("error retrieving title: %w", err) } // Get video description description, err := w.getInnerText("[data-trim-target='content']") if err != nil { return fmt.Errorf("error retrieving description: %w", err) } // Get upload date uploadDateStr, err := w.getInnerText(".video-page__meta") if err != nil { return fmt.Errorf("error retrieving upload date: %w", err) } uploadDate := time.Now() if !isRelativeTimeFormat(uploadDateStr) { uploadDate, err = parseDateString(uploadDateStr) if err != nil { return fmt.Errorf("error parsing date string '%s': %w", uploadDateStr, err) } } // Get cast url videoElement, err := w.page.QuerySelector("video") if err != nil { return fmt.Errorf("error retrieving video element: %w", err) } if videoElement == nil { return fmt.Errorf("could not find video element") } castSource, err := videoElement.GetAttribute("cast-src") // Store info in database err = updateVideoMetadata(db, id, title, description, castSource, uploadDate.Format(time.DateOnly), uploadDate.Year()) if err != nil { return err } } } func updateVideoMetadata(db *mysqlite.Db, id int, title, description, castSource, uploadDate string, year int) error { tx, err := db.Begin() if err != nil { return fmt.Errorf("error starting transaction: %w", err) } defer tx.MustRollback() err = tx.Query("update videos set title = ?, description = ?, cast = ?, upload_date = ?, year = ? where id = ?"). Bind(title, description, castSource, uploadDate, year, id).Exec() if err != nil { return fmt.Errorf("error updating database: %w", err) } err = tx.Commit() if err != nil { return fmt.Errorf("error commiting changeds: %w", err) } return nil } func (w *WebClient) getInnerText(selector string) (string, error) { // Get video title titleElement, err := w.page.QuerySelector(selector) if err != nil { return "", err } if titleElement == nil { return "", err } title, err := titleElement.InnerText() if err != nil { return "", err } return title, nil }