Skip to content

Commit

Permalink
improve util.load/save_cookiestxt() and add tests
Browse files Browse the repository at this point in the history
- take a file object as argument instead of an filename
- accept whitespace before comments ("   # comment")
- map expiration "0" to None and not the number 0
  • Loading branch information
mikf committed Jan 25, 2020
1 parent e35c2ea commit 2a9be48
Show file tree
Hide file tree
Showing 3 changed files with 152 additions and 55 deletions.
6 changes: 4 additions & 2 deletions gallery_dl/extractor/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,8 @@ def _init_cookies(self):
elif isinstance(cookies, str):
cookiefile = util.expand_path(cookies)
try:
cookies = util.load_cookiestxt(cookiefile)
with open(cookiefile) as fp:
cookies = util.load_cookiestxt(fp)
except Exception as exc:
self.log.warning("cookies: %s", exc)
else:
Expand All @@ -217,7 +218,8 @@ def _store_cookies(self):
"""Store the session's cookiejar in a cookies.txt file"""
if self._cookiefile and self.config("cookies-update", True):
try:
util.save_cookiestxt(self._cookiefile, self._cookiejar)
with open(self._cookiefile, "w") as fp:
util.save_cookiestxt(fp, self._cookiejar)
except OSError as exc:
self.log.warning("cookies: %s", exc)

Expand Down
103 changes: 51 additions & 52 deletions gallery_dl/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,66 +136,65 @@ def remove_directory(path):
pass


def load_cookiestxt(path):
def load_cookiestxt(fp):
"""Parse a Netscape cookies.txt file and return a list of its Cookies"""
cookies = []

with open(path) as fp:
for line in fp:

# strip '#HttpOnly_' and trailing '\n'
if line.startswith("#HttpOnly_"):
line = line[10:]
if line[-1] == "\n":
line = line[:-1]

# ignore empty lines and comments
if not line or line[0] in ("#", "$"):
continue

domain, domain_specified, path, secure, expires, name, value = \
line.split("\t")
if not name:
name = value
value = None

cookies.append(Cookie(
0, name, value,
None, False,
domain,
domain_specified == "TRUE",
domain.startswith("."),
path, False,
secure == "TRUE",
expires or None,
False, None, None, {},
))
for line in fp:

line = line.lstrip()
# strip '#HttpOnly_'
if line.startswith("#HttpOnly_"):
line = line[10:]
# ignore empty lines and comments
if not line or line[0] in ("#", "$"):
continue
# strip trailing '\n'
if line[-1] == "\n":
line = line[:-1]

domain, domain_specified, path, secure, expires, name, value = \
line.split("\t")
if not name:
name = value
value = None

cookies.append(Cookie(
0, name, value,
None, False,
domain,
domain_specified == "TRUE",
domain.startswith("."),
path, False,
secure == "TRUE",
None if expires == "0" or not expires else expires,
False, None, None, {},
))

return cookies


def save_cookiestxt(path, cookies):
"""Store 'cookies' in Netscape cookies.txt format"""
with open(path, "w") as fp:
fp.write("# Netscape HTTP Cookie File\n\n")
def save_cookiestxt(fp, cookies):
"""Write 'cookies' in Netscape cookies.txt format to 'fp'"""
fp.write("# Netscape HTTP Cookie File\n\n")

for cookie in cookies:
if cookie.value is None:
name = ""
value = cookie.name
else:
name = cookie.name
value = cookie.value

fp.write("\t".join((
cookie.domain,
"TRUE" if cookie.domain.startswith(".") else "FALSE",
cookie.path,
"TRUE" if cookie.secure else "FALSE",
"0" if cookie.expires is None else str(cookie.expires),
name,
value,
)) + "\n")
for cookie in cookies:
if cookie.value is None:
name = ""
value = cookie.name
else:
name = cookie.name
value = cookie.value

fp.write("\t".join((
cookie.domain,
"TRUE" if cookie.domain.startswith(".") else "FALSE",
cookie.path,
"TRUE" if cookie.secure else "FALSE",
"0" if cookie.expires is None else str(cookie.expires),
name,
value,
)) + "\n")


def code_to_language(code, default=None):
Expand Down
98 changes: 97 additions & 1 deletion test/test_util.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# Copyright 2015-2019 Mike Fährmann
# Copyright 2015-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

import unittest
import sys
import io
import random
import string
import http.cookiejar

from gallery_dl import util, text, exception

Expand Down Expand Up @@ -158,6 +160,100 @@ def _run_test(self, func, tests):
self.assertEqual(func(*args), result)


class TestCookiesTxt(unittest.TestCase):

def test_load_cookiestxt(self):

def _assert(content, expected):
cookies = util.load_cookiestxt(io.StringIO(content, None))
for c, e in zip(cookies, expected):
self.assertEqual(c.__dict__, e.__dict__)

_assert("", [])
_assert("\n\n\n", [])
_assert("$ Comment", [])
_assert("# Comment", [])
_assert(" # Comment \n\n $ Comment ", [])
_assert(
".example.org\tTRUE\t/\tTRUE\t0\tname\tvalue",
[self._cookie("name", "value", ".example.org")],
)
_assert(
".example.org\tTRUE\t/\tTRUE\t\tname\t",
[self._cookie("name", "", ".example.org")],
)
_assert(
"# Netscape HTTP Cookie File\n"
"\n"
"# default\n"
".example.org TRUE / FALSE 0 n1 v1\n"
".example.org TRUE / TRUE 2145945600 n2 v2\n"
".example.org TRUE /path FALSE 0 n3\n"
"\n"
" # # extra # # \n"
"www.example.org FALSE / FALSE n4 \n"
"www.example.org FALSE /path FALSE 100 n5 v5\n",
[
self._cookie(
"n1", "v1", ".example.org", True, "/", False),
self._cookie(
"n2", "v2", ".example.org", True, "/", True, 2145945600),
self._cookie(
"n3", None, ".example.org", True, "/path", False),
self._cookie(
"n4", "" , "www.example.org", False, "/", False),
self._cookie(
"n5", "v5", "www.example.org", False, "/path", False, 100),
],
)

with self.assertRaises(ValueError):
util.load_cookiestxt("example.org\tTRUE\t/\tTRUE\t0\tname")

def test_save_cookiestxt(self):

def _assert(cookies, expected):
fp = io.StringIO(newline=None)
util.save_cookiestxt(fp, cookies)
self.assertMultiLineEqual(fp.getvalue(), expected)

_assert([], "# Netscape HTTP Cookie File\n\n")
_assert(
[self._cookie("name", "value", ".example.org")],
"# Netscape HTTP Cookie File\n\n"
".example.org\tTRUE\t/\tTRUE\t0\tname\tvalue\n",
)
_assert(
[
self._cookie(
"n1", "v1", ".example.org", True, "/", False),
self._cookie(
"n2", "v2", ".example.org", True, "/", True, 2145945600),
self._cookie(
"n3", None, ".example.org", True, "/path", False),
self._cookie(
"n4", "" , "www.example.org", False, "/", False),
self._cookie(
"n5", "v5", "www.example.org", False, "/path", False, 100),
],
"# Netscape HTTP Cookie File\n"
"\n"
".example.org TRUE / FALSE 0 n1 v1\n"
".example.org TRUE / TRUE 2145945600 n2 v2\n"
".example.org TRUE /path FALSE 0 n3\n"
"www.example.org FALSE / FALSE 0 n4 \n"
"www.example.org FALSE /path FALSE 100 n5 v5\n",
)

def _cookie(self, name, value, domain, domain_specified=True,
path="/", secure=True, expires=None):
return http.cookiejar.Cookie(
0, name, value, None, False,
domain, domain_specified, domain.startswith("."),
path, False, secure, expires, False, None, None, {},
)


class TestFormatter(unittest.TestCase):

kwdict = {
Expand Down

0 comments on commit 2a9be48

Please sign in to comment.