diff --git a/.gitignore b/.gitignore index c0c10d9..1438942 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,7 @@ imdb/.imdbcache/ # Sample output files output.html +output.json # Api key files apikey.txt diff --git a/imdb/advancedsearch.go b/imdb/advancedsearch.go index 040bb35..707a2d8 100644 --- a/imdb/advancedsearch.go +++ b/imdb/advancedsearch.go @@ -4,10 +4,9 @@ package imdb import ( + "encoding/json" "fmt" "net/http" - "strconv" - "strings" "github.com/Jisin0/filmigo/encode" "github.com/Jisin0/filmigo/types" @@ -73,20 +72,62 @@ type AdvancedSearchTitleOpts struct { // Single result from the AdvancedSearchTitle result list. type AdvancedSearchTitleResult struct { - // Index number of the item. - Index int - // Title: Name of the movie/show or person. - Title string + // Indicates wether the title can be rated on imdb. + CanRate bool `json:"canRate"` + // Parental certificate of the title: 15 indicates TV-MA, 12 indicates PG-13, 18 indicates TV-MA. + Certificate string `json:"certificate"` + // The year in which a TVShow ended. Only for Series and Mini-Series. + EndYear int `json:"endYear"` + // Genres of the title. + Genres []string `json:"genres"` + // Indicates wether the movie has onli watching option (highly inaccurate). + HasWatchOption bool `json:"hasWatchOption"` + // Full original Title of the movie/show. + OriginalTitle string `json:"originalTitleText"` + // Plot of the movie/show. + Plot string `json:"plot"` // Image: Poster image of a title or profile image of a person. - Image string - // Link: Link to the title or person's imdb page. - Link string - // Metadata: Metadata for titles containing the year of release, duration and us certificate. - Metadata []string - // Rating: A string containing rating info for ex: 7.5 (35K). - Rating string - // Description: A description of the title or person. - Description string + Image AdvSearchImage `json:"primaryImage"` + // Rating data about the title. + Rating struct { + // Value of rating out of 10. + Value int `json:"aggregateRating"` + // Number of votes received for the title. + Votes int64 `json:"voteCount"` + } `json:"ratingSummary"` + // Year in which the title was first released. + ReleaseYear int `json:"releaseYear"` + // Runtime of the title in minutes. + Runtime int `json:"runtime"` + // Imdb id of the title. + ID string `json:"titleId"` + // Title of the movie or show. + Title string `json:"titleText"` + // Data about the type of title. + Type struct { + // Indicates wether the title can have episodes. + CanHaveEpisodes bool `json:"canHaveEpisodes"` + // Id of the type. Possible values include movie, tvSeries, tvMiniSeries etc. + ID string `json:"id"` + // User-Friendly text about the type for ex: TV Series for tvSeries. + Text string `json:"text"` + } `json:"titleType"` + // Video id of the trailer of the title. + TrailerID string `json:"trailerId"` +} + +// Poster image of an AvancedSearch result. +type AdvSearchImage struct { + // Caption of the image. + Caption string `json:"caption"` + // Height of the image in pixels. + Height int `json:"height"` + // ID of the image (not sure where to use this) + ID string `json:"id"` + // URL of the image. + URL string `json:"url"` + // WIdth of the image in pixels. + Width int `json:"width"` } // AdvancedSearchTitle uses the search page to search for titles using many configuration options. Use SearchX methods for simple fast searches using the api. @@ -124,67 +165,33 @@ func (*ImdbClient) AdvancedSearchTitle(opts *AdvancedSearchTitleOpts) ([]*Advanc defer resp.Body.Close() - list, err := htmlquery.Query(doc, "//main/div[@role='presentation']/div[last()]//div[@role='tabpanel']//section/div[2]/div[2]/ul") - if err != nil || list == nil { - return nil, err + dataNode := htmlquery.FindOne(doc, "//script[@id='__NEXT_DATA__']") + if dataNode == nil { + return nil, errors.New("results not found") } - elements, err := htmlquery.QueryAll(list, "//div[ends-with(@class, 'dli-parent')]") - if err != nil { - return nil, errors.Wrap(err, "failed elements query") + // temporary type to get to deeply nested results. + // using a third party lib like gjson should be considered. + type a struct { + Props struct { + PropsPage struct { + SearchResults struct { + TitleResults struct { + Items []*AdvancedSearchTitleResult `json:"titleListItems"` + } `json:"titleResults"` + } `json:"searchResults"` + } `json:"pageProps"` + } `json:"props"` } - var results []*AdvancedSearchTitleResult - - for _, e := range elements { - var item AdvancedSearchTitleResult - - if posterNode, _ := htmlquery.Query(e, "//img"); posterNode != nil { - for _, a := range posterNode.Attr { - if a.Key == "src" { - item.Image = a.Val - } - } - } - - if titleNode, _ := htmlquery.Query(e, "//h3"); titleNode != nil { - s := htmlquery.InnerText(titleNode) - if split := strings.SplitN(s, ".", 2); len(split) > 1 { - n, _ := strconv.Atoi(split[0]) - - item.Index = n - s = strings.TrimSpace(split[1]) - } - - item.Title = s + var data a - aNode, _ := htmlquery.Query(titleNode, "/..") - for _, a := range aNode.Attr { - if a.Key == href { - item.Link = baseImdbURL + a.Val - } - } - } + json.Unmarshal([]byte(htmlquery.InnerText(dataNode)), &data) - if metadataNode, _ := htmlquery.Query(e, "//div[ends-with(@class, 'dli-title-metadata')]"); metadataNode != nil { - var items []string + results := data.Props.PropsPage.SearchResults.TitleResults.Items - for _, span := range htmlquery.Find(metadataNode, "/span") { - items = append(items, htmlquery.InnerText(span)) - } - - item.Metadata = items - } - - if ratingsNode, _ := htmlquery.Query(e, "//span[starts-with(@data-testid, 'ratingGroup')]"); ratingsNode != nil { - item.Rating = htmlquery.InnerText(ratingsNode) - } - - if descriptionNode, _ := htmlquery.Query(e, "/div[last()]//div[contains(@class, 'inner')]"); descriptionNode != nil { - item.Description = htmlquery.InnerText(descriptionNode) - } - - results = append(results, &item) + if len(results) < 1 { + return results, errors.New("results not found") } return results, nil @@ -194,7 +201,7 @@ func (*ImdbClient) AdvancedSearchTitle(opts *AdvancedSearchTitleOpts) ([]*Advanc // // - client : Client to make imdb requests through. func (s *AdvancedSearchTitleResult) FullTitle(client *ImdbClient) (*Movie, error) { - return client.GetMovie(s.Link) + return client.GetMovie(s.ID) } // Options for the AdvancedSearchName query see https://imdb.com/search/title to see the list and syntax for each option. @@ -224,27 +231,41 @@ type AdvancedSearchNameOpts struct { } // Single results item from an AdvancedSearchName results list. -type AdvacedSearchNameResult struct { - // Index number of the item. - Index int - // Title: Name of the movie/show or person. - Title string - // Image: Poster image of a title or profile image of a person. - Image string +type AdvancedSearchNameResult struct { + // Title: Name of the person. + Title string `json:"nameText"` + // Bio or short decription of the person. + Bio string `json:"bio"` + // Data about a title the person is known for. + KnownFor struct { + // Indicates wether the title can have episodes. + CanHaveEpisodes bool `json:"canHaveEpisodes"` + // Orginal or full title of the movie or show. + OriginalTitle string `json:"originalTitle"` + // Imdb ID of the title. + ID string `json:"titleId"` + // Name of the title. + Title string `json:"titleText"` + // Range of years in which the title was released. + YearRange struct { + // Year in which the title was first released. + ReleaseYear int `json:"year"` + // Year in which a series ended or last broadcasted. + EndYear int `json:"endYear"` + } `json:"yearRange"` + } `json:"knownFor"` + // Imdb ID of the person. + ID string `json:"nameId"` + // Image: Profile image of a person. + Image AdvSearchImage `json:"primaryImage"` // Professions: Roles taken by a person for ex: Director, Actress, Producer. - Professions []string - // Top title of a actor/actress. Only for people/names. - TopTitle types.Link - // Link: Link to the title or person's imdb page. - Link string - // Description: A description of the title or person. - Description string + Professions []string `json:"primaryProfessions"` } // AdvancedSearchName uses the search page to search for names using many configuration options. Use SearchX methods for simple fast searches using the api. // // opts - configure search options. -func (*ImdbClient) AdvancedSearchName(opts *AdvancedSearchNameOpts) ([]*AdvacedSearchNameResult, error) { +func (*ImdbClient) AdvancedSearchName(opts *AdvancedSearchNameOpts) ([]*AdvancedSearchNameResult, error) { urlParams, _ := encode.URLParams(*opts) urlParams = encode.URLMapParams(opts.ExtraParams, urlParams) @@ -255,77 +276,33 @@ func (*ImdbClient) AdvancedSearchName(opts *AdvancedSearchNameOpts) ([]*AdvacedS return nil, err } - list, err := htmlquery.Query(doc, "//main/div[@role='presentation']/div[last()]//div[@role='tabpanel']//section/div[2]/div[2]/ul") - if err != nil || list == nil { - return nil, errors.Wrap(err, "failed to find people list") + dataNode := htmlquery.FindOne(doc, "//script[@id='__NEXT_DATA__']") + if dataNode == nil { + return nil, errors.New("results not found") } - elements, err := htmlquery.QueryAll(list, "//div[ends-with(@class, 'dli-parent')]") - if err != nil { - return nil, errors.Wrap(err, "failed elements query") + // temporary type to get to deeply nested results. + // using a third party lib like gjson should be considered. + type a struct { + Props struct { + PropsPage struct { + SearchResults struct { + NameResults struct { + Items []*AdvancedSearchNameResult `json:"nameListItems"` + } `json:"nameResults"` + } `json:"searchResults"` + } `json:"pageProps"` + } `json:"props"` } - var results []*AdvacedSearchNameResult - - for _, e := range elements { - var item AdvacedSearchNameResult - - if posterNode, _ := htmlquery.Query(e, "//img"); posterNode != nil { - for _, a := range posterNode.Attr { - if a.Key == "src" { - item.Image = a.Val - } else if a.Key == href { - item.Link = a.Val - } - } - } - - if titleNode, _ := htmlquery.Query(e, "//h3"); titleNode != nil { - s := htmlquery.InnerText(titleNode) - if split := strings.SplitN(s, ".", 2); len(split) > 1 { - n, _ := strconv.Atoi(split[0]) - - item.Index = n - s = strings.TrimSpace(split[1]) - } - - item.Title = s - - aNode, _ := htmlquery.Query(titleNode, "/..") - for _, a := range aNode.Attr { - if a.Key == href { - item.Link = baseImdbURL + a.Val - } - } - } - - if professionsNode, _ := htmlquery.Query(e, "//ul[@data-testid='nlib-professions']"); professionsNode != nil { - var items []string - for _, li := range htmlquery.Find(professionsNode, "/li") { - items = append(items, htmlquery.InnerText(li)) - } - - item.Professions = items - } - - if topTitleNode, _ := htmlquery.Query(e, "//a[@data-testid='nlib-known-for-title']"); topTitleNode != nil { - var topTitle types.Link - topTitle.Text = htmlquery.InnerText(topTitleNode) - - for _, a := range topTitleNode.Attr { - if a.Key == href { - topTitle.Href = baseImdbURL + a.Val - } - } - - item.TopTitle = topTitle - } - - if descriptionNode, _ := htmlquery.Query(e, "/div[last()]//div[contains(@class, 'inner')]"); descriptionNode != nil { - item.Description = htmlquery.InnerText(descriptionNode) - } - - results = append(results, &item) + var data a + + json.Unmarshal([]byte(htmlquery.InnerText(dataNode)), &data) + + results := data.Props.PropsPage.SearchResults.NameResults.Items + + if len(results) < 1 { + return results, errors.New("results not found") } return results, nil @@ -334,6 +311,6 @@ func (*ImdbClient) AdvancedSearchName(opts *AdvancedSearchNameOpts) ([]*AdvacedS // FullPerson returns the full data about a title scraped from it's imdb page. // // - client : Client to make imdb requests through. -func (s *AdvancedSearchTitleResult) FullPerson(client *ImdbClient) (*Person, error) { - return client.GetPerson(s.Link) +func (s *AdvancedSearchNameResult) FullPerson(client *ImdbClient) (*Person, error) { + return client.GetPerson(s.ID) } diff --git a/imdb/advancedsearch_test.go b/imdb/advancedsearch_test.go index a713940..6fac16f 100644 --- a/imdb/advancedsearch_test.go +++ b/imdb/advancedsearch_test.go @@ -1,6 +1,7 @@ package imdb_test import ( + "fmt" "testing" "github.com/Jisin0/filmigo/imdb" @@ -8,25 +9,63 @@ import ( ) func TestAdvancedSearchTitle(t *testing.T) { - r, err := c.AdvancedSearchTitle(&imdb.AdvancedSearchTitleOpts{Genres: []string{constants.TitleGenreAction}, ExtraParams: map[string]any{"plot": "guns"}}) - if err != nil { - t.Error(err) + testData := []imdb.AdvancedSearchTitleOpts{ + {Genres: []string{constants.TitleGenreAction}, ExtraParams: map[string]any{"plot": "guns"}}, + {CastOrCrew: []string{cillianMurphyID}}, + {TitleName: "aksjgka"}, // bad data } - if len(r) > 0 { - t.Logf("%+v", r[0]) - t.Logf("%v more results", len(r)-1) + lastIndex := len(testData) - 1 // item at this index should fail. + + for i, o := range testData { + t.Run(fmt.Sprint(i), func(t *testing.T) { + _, err := c.AdvancedSearchTitle(&o) + if err != nil { + // if item is last error is expected. + if i == lastIndex { + t.Log("error as expected") + } else { + t.Errorf("item %v returned unexpected error %v", i, err) + } + } else { + if i == lastIndex { + t.Errorf("error expected for item %v but results found", i) + } else { + t.Logf("item %v succesfully returned", i) + } + } + }) + } } func TestAdvancedSearchName(t *testing.T) { - r, err := c.AdvancedSearchName(&imdb.AdvancedSearchNameOpts{Titles: []string{oppenheimerID}}) - if err != nil { - t.Error(err) + testData := []imdb.AdvancedSearchNameOpts{ + {Titles: []string{oppenheimerID}}, + {Awards: []string{constants.NameAwardBestActressNominated}}, + {Name: "shkjag"}, // should fail } - if len(r) > 0 { - t.Logf("%+v", r[0]) - t.Logf("%v more results", len(r)-1) + lastIndex := len(testData) - 1 // item at this index should fail. + + for i, o := range testData { + t.Run(fmt.Sprint(i), func(t *testing.T) { + _, err := c.AdvancedSearchName(&o) + if err != nil { + // if item is last error is expected. + if i == lastIndex { + t.Log("error as expected") + } else { + t.Errorf("item %v returned unexpected error %v", i, err) + } + } else { + if i == lastIndex { + t.Errorf("error expected for item %v but results found", i) + } else { + t.Logf("item %v succesfully returned", i) + } + } + }) + } } diff --git a/imdb/getperson_test.go b/imdb/getperson_test.go index a10fba5..0734f88 100644 --- a/imdb/getperson_test.go +++ b/imdb/getperson_test.go @@ -6,14 +6,32 @@ import ( const ( cillianMurphyID = "nm0614165" + invalidPersonID = "invalidID123" + keanuReevesID = "nm0000206" // Example valid ID, you can replace it with other valid IDs ) -func TestGetPerson(t *testing.T) { - res, err := c.GetPerson(cillianMurphyID) - if err != nil { - t.Error(err) - t.Failed() +func TestGetPersons(t *testing.T) { + personIDs := []string{ + cillianMurphyID, + invalidPersonID, + keanuReevesID, } - t.Logf("%+v", res) + for _, id := range personIDs { + t.Run(id, func(t *testing.T) { + res, err := c.GetPerson(id) + if err != nil { + t.Logf("Expected error for ID %s: %v", id, err) + if id != invalidPersonID { + t.Errorf("Unexpected error for valid ID %s: %v", id, err) + } + } else { + if id == invalidPersonID { + t.Errorf("Expected error for invalid ID %s, but got result: %+v", id, res) + } else { + t.Logf("Got person data for ID %s", id) + } + } + }) + } } diff --git a/imdb/search.go b/imdb/search.go index e9603a8..abffa58 100644 --- a/imdb/search.go +++ b/imdb/search.go @@ -55,9 +55,12 @@ type SearchResult struct { } type Image struct { - Height int `json:"height"` - ImageURL string `json:"imageURL"` - Width int `json:"width"` + // Height of the image. + Height int `json:"height"` + // URL of the image. + URL string `json:"imageURL"` + // Width of the image. + Width int `json:"width"` } type Video struct {