Working on scraping videos
This commit is contained in:
parent
1d0fd243fe
commit
27c58d06e0
6
go.mod
6
go.mod
@ -7,5 +7,11 @@ require github.com/mattn/go-sqlite3 v1.14.24
|
||||
require (
|
||||
github.com/PuerkitoBio/goquery v1.10.1 // indirect
|
||||
github.com/andybalholm/cascadia v1.3.3 // indirect
|
||||
github.com/deckarep/golang-set/v2 v2.7.0 // indirect
|
||||
github.com/go-jose/go-jose/v3 v3.0.3 // indirect
|
||||
github.com/go-stack/stack v1.8.1 // indirect
|
||||
github.com/headzoo/surf v1.0.1 // indirect
|
||||
github.com/playwright-community/playwright-go v0.4902.0 // indirect
|
||||
golang.org/x/net v0.33.0 // indirect
|
||||
gopkg.in/headzoo/surf.v1 v1.0.1 // indirect
|
||||
)
|
||||
|
19
go.sum
19
go.sum
@ -2,9 +2,24 @@ github.com/PuerkitoBio/goquery v1.10.1 h1:Y8JGYUkXWTGRB6Ars3+j3kN0xg1YqqlwvdTV8W
|
||||
github.com/PuerkitoBio/goquery v1.10.1/go.mod h1:IYiHrOMps66ag56LEH7QYDDupKXyo5A8qrjIx3ZtujY=
|
||||
github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
|
||||
github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/deckarep/golang-set/v2 v2.7.0 h1:gIloKvD7yH2oip4VLhsv3JyLLFnC0Y2mlusgcvJYW5k=
|
||||
github.com/deckarep/golang-set/v2 v2.7.0/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4=
|
||||
github.com/go-jose/go-jose/v3 v3.0.3 h1:fFKWeig/irsp7XD2zBxvnmA/XaRWp5V3CBsZXJF7G7k=
|
||||
github.com/go-jose/go-jose/v3 v3.0.3/go.mod h1:5b+7YgP7ZICgJDBdfjZaIt+H/9L9T/YQrVfLAMboGkQ=
|
||||
github.com/go-stack/stack v1.8.1 h1:ntEHSVwIt7PNXNpgPmVfMrNhLtgjlmnZha2kOpuRiDw=
|
||||
github.com/go-stack/stack v1.8.1/go.mod h1:dcoOX6HbPZSZptuspn9bctJ+N/CnF5gGygcUP3XYfe4=
|
||||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/headzoo/surf v1.0.1 h1:wk3+LT8gjnCxEwfBJl6MhaNg154En5KjgmgzAG9uMS0=
|
||||
github.com/headzoo/surf v1.0.1/go.mod h1:/bct0m/iMNEqpn520y01yoaWxsAEigGFPnvyR1ewR5M=
|
||||
github.com/mattn/go-sqlite3 v1.14.24 h1:tpSp2G2KyMnnQu99ngJ47EIkWVmliIizyZBfPrBWDRM=
|
||||
github.com/mattn/go-sqlite3 v1.14.24/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||
github.com/playwright-community/playwright-go v0.4902.0 h1:SslPUKmc35YgTBZKTLhokxrqTsVk3/mirj+TkqR6dC0=
|
||||
github.com/playwright-community/playwright-go v0.4902.0/go.mod h1:kBNWs/w2aJ2ZUp1wEOOFLXgOqvppFngM5OS+qyhl+ZM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||
@ -70,3 +85,7 @@ golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
|
||||
golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
|
||||
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/headzoo/surf.v1 v1.0.1 h1:oDBy9b5NlTb2Hvl3hF8NN+Qy7ypC9/g5YDP85pPh13k=
|
||||
gopkg.in/headzoo/surf.v1 v1.0.1/go.mod h1:T0BH8276y+OPL0E4tisxCFjBVIAKGbwdYU7AS7/EpQQ=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
|
37
main.go
37
main.go
@ -1,38 +1,35 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"github.com/playwright-community/playwright-go"
|
||||
"log"
|
||||
"os"
|
||||
)
|
||||
|
||||
func main() {
|
||||
options := &playwright.RunOptions{
|
||||
Browsers: []string{"chromium"},
|
||||
}
|
||||
err := playwright.Install(options)
|
||||
if err != nil {
|
||||
log.Panicf("error installing playwright: %w", err)
|
||||
}
|
||||
|
||||
db := openDatabase()
|
||||
defer db.Close()
|
||||
|
||||
username := os.Getenv("VIVAPLUS_USER")
|
||||
password := os.Getenv("VIVAPLUS_PASS")
|
||||
w := NewWebClient(options)
|
||||
|
||||
//username := os.Getenv("VIVAPLUS_USER")
|
||||
////password := os.Getenv("VIVAPLUS_PASS")
|
||||
//password, err := base64.StdEncoding.DecodeString(os.Getenv("VIVAPLUS_PASS"))
|
||||
//if err != nil {
|
||||
// log.Fatalf("error decoding password: %v", err)
|
||||
//}
|
||||
|
||||
w := NewWebClient()
|
||||
err := w.VivaLogin(username, string(password))
|
||||
if err != nil {
|
||||
log.Fatalf("error login in: %v", err)
|
||||
}
|
||||
|
||||
//form := url.Values{}
|
||||
//form.Set("email", username)
|
||||
//form.Set("password", string(password))
|
||||
//
|
||||
//// First fetch csrf token by doing a get. It is found in a meta tag with name="csrf-token"
|
||||
//
|
||||
//resp, err := http.Post("https://vivaplus.tv/supporters/sign_in", "application/x-www-form-urlencoded;charset=UTF-8", strings.NewReader(form.Encode()))
|
||||
//err = w.VivaLogin(username, string(password))
|
||||
//if err != nil {
|
||||
// log.Fatalf("error logging in: %v", err)
|
||||
// log.Fatalf("error login in: %v", err)
|
||||
//}
|
||||
//
|
||||
//log.Printf("Status code: %d", resp.StatusCode)
|
||||
////println(resp)
|
||||
|
||||
w.VivaFindAllVideos()
|
||||
}
|
||||
|
129
vivaweb.go
129
vivaweb.go
@ -1,105 +1,82 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/playwright-community/playwright-go"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/http/cookiejar"
|
||||
"net/url"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const BASE_URL = "https://vivaplus.tv/"
|
||||
const SIGN_IN_URL = BASE_URL + "supporters/sign_in"
|
||||
|
||||
type WebClient struct {
|
||||
csrfToken string
|
||||
cookies *cookiejar.Jar
|
||||
pw *playwright.Playwright
|
||||
browser playwright.Browser
|
||||
//browser *browser.Browser
|
||||
//csrfToken string
|
||||
//cookies *cookiejar.Jar
|
||||
}
|
||||
|
||||
func NewWebClient() *WebClient {
|
||||
jar, err := cookiejar.New(nil)
|
||||
func NewWebClient(options *playwright.RunOptions) *WebClient {
|
||||
pw, err := playwright.Run(options)
|
||||
if err != nil {
|
||||
log.Fatalf("error creating cookiejar: %w", err)
|
||||
log.Fatalf("error running playwright: %w", pw)
|
||||
}
|
||||
browser, err := pw.Firefox.Launch()
|
||||
if err != nil {
|
||||
log.Fatalf("error running firefox: %w", pw)
|
||||
}
|
||||
return &WebClient{
|
||||
cookies: jar,
|
||||
pw: pw,
|
||||
browser: browser,
|
||||
}
|
||||
}
|
||||
|
||||
func (w *WebClient) RegisterCookies(rawUrl string, resp *http.Response) {
|
||||
u, err := url.Parse(rawUrl)
|
||||
if err != nil {
|
||||
log.Fatalf("error parsing url: %v", err)
|
||||
}
|
||||
w.cookies.SetCookies(u, resp.Cookies())
|
||||
}
|
||||
|
||||
func (w *WebClient) UseCookies(rawUrl string, req *http.Request) {
|
||||
u, err := url.Parse(rawUrl)
|
||||
if err != nil {
|
||||
log.Fatalf("error parsing url: %v", err)
|
||||
}
|
||||
for _, c := range w.cookies.Cookies(u) {
|
||||
req.AddCookie(c)
|
||||
}
|
||||
}
|
||||
|
||||
func (w *WebClient) FetchCsrfToken() error {
|
||||
resp, err := http.Get(SIGN_IN_URL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting sign in page: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error parsing sign in page: %w", err)
|
||||
}
|
||||
w.RegisterCookies(SIGN_IN_URL, resp)
|
||||
|
||||
csrfTokenElement := doc.Find("meta[name='csrf-token']").First()
|
||||
if csrfTokenElement == nil {
|
||||
return errors.New("error getting csrf token element")
|
||||
}
|
||||
var exists bool
|
||||
w.csrfToken, exists = csrfTokenElement.Attr("content")
|
||||
if !exists {
|
||||
return errors.New("error content attribute does not exist")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *WebClient) VivaLogin(username, password string) error {
|
||||
err := w.FetchCsrfToken()
|
||||
page, err := w.browser.NewPage()
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("error creating page: %w", err)
|
||||
}
|
||||
|
||||
form := url.Values{}
|
||||
form.Set("email", username)
|
||||
form.Set("password", password)
|
||||
|
||||
println("Encoded form:", form.Encode())
|
||||
println("CSRF token:", w.csrfToken)
|
||||
req, err := http.NewRequest("POST", SIGN_IN_URL, strings.NewReader(form.Encode()))
|
||||
w.UseCookies(SIGN_IN_URL, req)
|
||||
for _, c := range req.Cookies() {
|
||||
println("Cookie:", c.Name, "=", c.Value)
|
||||
}
|
||||
_, err = page.Goto(SIGN_IN_URL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating login request: %w", err)
|
||||
}
|
||||
req.Header.Add("x-csrf-token", w.csrfToken)
|
||||
req.Header.Add("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8")
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error logging in: %w", err)
|
||||
return fmt.Errorf("error navigating to sign-in page: %w", err)
|
||||
}
|
||||
|
||||
log.Printf("Status code: %d", resp.StatusCode)
|
||||
err = page.GetByTestId("SupporterLogin.EmailInput").Fill(username)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error filling in email: %w", err)
|
||||
}
|
||||
err = page.GetByTestId("SupporterLogin.PasswordInput").Fill(password)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error filling in email: %w", err)
|
||||
}
|
||||
|
||||
err = page.GetByTestId("SupporterLogin.SubmitButton").Click()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *WebClient) VivaFindAllVideos() error {
|
||||
log.Printf("Loading list of all videos...")
|
||||
page, err := w.browser.NewPage()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating page: %w", err)
|
||||
}
|
||||
|
||||
_, err = page.Goto("https://vivaplus.tv/supporters/videos/all?order=asc")
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening page: %w", err)
|
||||
}
|
||||
|
||||
count, err := page.GetByTestId("VideoCatalog.Video").Count()
|
||||
//count, err := el.Count()
|
||||
println(count, err)
|
||||
//attr, err := el.GetAttribute("busy")
|
||||
//println(attr, err)
|
||||
//err = el.ScrollIntoViewIfNeeded()
|
||||
//println(err)
|
||||
//attr, err = el.GetAttribute("busy")
|
||||
//println(attr, err)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user