Skip to content

Commit

Permalink
Fix intermediate URL tracking when working around interstitials
Browse files Browse the repository at this point in the history
  • Loading branch information
mccutchen committed Sep 4, 2024
1 parent 1659369 commit c5ea31d
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 11 deletions.
10 changes: 5 additions & 5 deletions urlresolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,11 +244,6 @@ type redirectRecorder struct {
}

func (r *redirectRecorder) checkRedirect(req *http.Request, via []*http.Request) error {
r.result.IntermediateURLs = append(r.result.IntermediateURLs, via[len(via)-1].URL.String())

if len(via) >= maxRedirects {
return http.ErrUseLastResponse
}
// Work around instagram auth redirect
if strings.Contains(req.URL.String(), "instagram.com/accounts/login/") {
return http.ErrUseLastResponse
Expand All @@ -260,5 +255,10 @@ func (r *redirectRecorder) checkRedirect(req *http.Request, via []*http.Request)
if strings.Contains(req.URL.String(), "bloomberg.com/tosv2.html") {
return http.ErrUseLastResponse
}

r.result.IntermediateURLs = append(r.result.IntermediateURLs, via[len(via)-1].URL.String())
if len(via) >= maxRedirects {
return http.ErrUseLastResponse
}
return nil
}
24 changes: 18 additions & 6 deletions urlresolver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,46 +137,58 @@ func TestResolver(t *testing.T) {
// https://github.com/mccutchen/thresholderbot/pull/63
name: "forbes interstitial detection",
handlerFunc: func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/start" {
http.Redirect(w, r, "/forbes", http.StatusFound)
return
}
if r.URL.Path == "/forbes" {
http.Redirect(w, r, "https://www.forbes.com/forbes/welcome/", http.StatusFound)
return
}
},
givenURL: "/forbes",
givenURL: "/start",
wantResult: Result{
ResolvedURL: "/forbes",
Title: "",
IntermediateURLs: []string{"/forbes"},
IntermediateURLs: []string{"/start"},
},
},
{
name: "instagram auth detection",
handlerFunc: func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/start" {
http.Redirect(w, r, "/instagram", http.StatusFound)
return
}
if r.URL.Path == "/instagram" {
http.Redirect(w, r, "https://www.instagram.com/accounts/login/", http.StatusFound)
return
}
},
givenURL: "/instagram",
givenURL: "/start",
wantResult: Result{
ResolvedURL: "/instagram",
Title: "",
IntermediateURLs: []string{"/instagram"},
IntermediateURLs: []string{"/start"},
},
},
{
name: "bloomberg bot detection",
handlerFunc: func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/start" {
http.Redirect(w, r, "/bloomberg", http.StatusFound)
return
}
if r.URL.Path == "/bloomberg" {
http.Redirect(w, r, "https://www.bloomberg.com/tosv2.html?url=foo", http.StatusFound)
return
}
},
givenURL: "/bloomberg",
givenURL: "/start",
wantResult: Result{
ResolvedURL: "/bloomberg",
Title: "",
IntermediateURLs: []string{"/bloomberg"},
IntermediateURLs: []string{"/start"},
},
},
{
Expand Down

0 comments on commit c5ea31d

Please sign in to comment.