Skip to content

Commit

Permalink
Don't .lstrip() the entire HTML document.
Browse files Browse the repository at this point in the history
  • Loading branch information
domdfcoding committed Jan 31, 2022
1 parent 1b5e494 commit c0b9be3
Showing 1 changed file with 10 additions and 1 deletion.
11 changes: 10 additions & 1 deletion src/pip/_internal/index/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,16 @@ def parse_links(page: "HTMLPage", use_deprecated_html5lib: bool) -> Iterable[Lin
# requested to use html5lib.
if not use_deprecated_html5lib:
expected_doctype = "<!doctype html>".encode(encoding)
actual_start = page.content.lstrip()[: len(expected_doctype)]

char: str
offset: int = 0
for char in page.content:
if chr(char).isspace():
offset += 1
else:
break

actual_start = page.content[offset : offset + len(expected_doctype)]
if actual_start.decode(encoding).lower() != "<!doctype html>":
deprecated(
reason=(
Expand Down

0 comments on commit c0b9be3

Please sign in to comment.