From 05c961b808bfd8d2e87e569e5694694cfd35702b Mon Sep 17 00:00:00 2001 From: Anderson Bravalheri Date: Wed, 23 Feb 2022 11:14:26 +0000 Subject: [PATCH 01/12] Don't warn on false positive for author/maintainer's email While I was working to support pyproject.toml metadata in setuptools, I received as a feedback from the community[^1] that setuptools warns the following message when `author_email` and `maintainer_email` are given in the form of `Person Name `: > warning: check: missing meta-data: either (author and author_email) > or (maintainer and maintainer_email) should be supplied This can be seen as a false positive, because indeed both author's name and email are provided. This warning seems to happen because distutils define the `check` command as a subcommand for `sdist`. This change aims to remove this false positive result from the checks. [^1]: https://discuss.python.org/t/help-testing-experimental-features-in-setuptools/13821/18 --- distutils/command/check.py | 40 +++++++++++++++++++++++++++-------- distutils/tests/test_check.py | 22 +++++++++++++++++++ 2 files changed, 53 insertions(+), 9 deletions(-) diff --git a/distutils/command/check.py b/distutils/command/check.py index 525540b6cc..af311ca90e 100644 --- a/distutils/command/check.py +++ b/distutils/command/check.py @@ -2,6 +2,8 @@ Implements the Distutils 'check' command. """ +from email.utils import getaddresses + from distutils.core import Command from distutils.errors import DistutilsSetupError @@ -96,19 +98,39 @@ def check_metadata(self): if missing: self.warn("missing required meta-data: %s" % ', '.join(missing)) - if metadata.author: - if not metadata.author_email: - self.warn("missing meta-data: if 'author' supplied, " + - "'author_email' should be supplied too") - elif metadata.maintainer: - if not metadata.maintainer_email: - self.warn("missing meta-data: if 'maintainer' supplied, " + - "'maintainer_email' should be supplied too") - else: + if not ( + self._check_contact("author", metadata) or + self._check_contact("maintainer", metadata) + ): self.warn("missing meta-data: either (author and author_email) " + "or (maintainer and maintainer_email) " + "should be supplied") + def _check_contact(self, kind, metadata): + """ + Returns True if the contact's name is specified and False otherwise. + This function will warn if the contact's email is not specified. + """ + name = getattr(metadata, kind) or '' + email = getattr(metadata, kind + '_email') or '' + + msg = ("missing meta-data: if '{}' supplied, " + + "'{}' should be supplied too") + + if name and email: + return True + + if name: + self.warn(msg.format(kind, kind + '_email')) + return True + + addresses = [(alias, addr) for alias, addr in getaddresses([email])] + if any(alias and addr for alias, addr in addresses): + # The contact's name can be encoded in the email: `Name ` + return True + + return False + def check_restructuredtext(self): """Checks if the long string fields are reST-compliant.""" data = self.distribution.get_long_description() diff --git a/distutils/tests/test_check.py b/distutils/tests/test_check.py index 91bcdceb43..b41dba3d0a 100644 --- a/distutils/tests/test_check.py +++ b/distutils/tests/test_check.py @@ -71,6 +71,28 @@ def test_check_metadata(self): cmd = self._run(metadata) self.assertEqual(cmd._warnings, 0) + def test_check_author_maintainer(self): + for kind in ("author", "maintainer"): + # ensure no warning when author_email or maintainer_email is given + # (the spec allows these fields to take the form "Name ") + metadata = {'url': 'xxx', + kind + '_email': 'Name ', + 'name': 'xxx', 'version': 'xxx'} + cmd = self._run(metadata) + self.assertEqual(cmd._warnings, 0) + + # the check should warn if only email is given and it does not + # contain the name + metadata[kind + '_email'] = 'name@email.com' + cmd = self._run(metadata) + self.assertEqual(cmd._warnings, 1) + + # the check should warn if only the name is given + metadata[kind] = "Name" + del metadata[kind + '_email'] + cmd = self._run(metadata) + self.assertEqual(cmd._warnings, 1) + @unittest.skipUnless(HAS_DOCUTILS, "won't test without docutils") def test_check_document(self): pkg_info, dist = self.create_dist() From b16cf407dcbbfab0df079e894819ea5dce166b48 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 27 Mar 2022 10:20:24 -0400 Subject: [PATCH 02/12] =?UTF-8?q?=F0=9F=91=B9=20Feed=20the=20hobgoblins=20?= =?UTF-8?q?(delint).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- distutils/command/build_scripts.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/distutils/command/build_scripts.py b/distutils/command/build_scripts.py index e3312cf0ca..dbeef2ddb0 100644 --- a/distutils/command/build_scripts.py +++ b/distutils/command/build_scripts.py @@ -2,7 +2,8 @@ Implements the Distutils 'build_scripts' command.""" -import os, re +import os +import re from stat import ST_MODE from distutils import sysconfig from distutils.core import Command @@ -14,6 +15,7 @@ # check if Python is called on the first line with this expression first_line_re = re.compile(b'^#!.*python[0-9.]*([ \t].*)?$') + class build_scripts(Command): description = "\"build\" scripts (copy and fixup #! line)" @@ -26,7 +28,6 @@ class build_scripts(Command): boolean_options = ['force'] - def initialize_options(self): self.build_dir = None self.scripts = None @@ -49,7 +50,6 @@ def run(self): return self.copy_scripts() - def copy_scripts(self): r"""Copy each script listed in 'self.scripts'; if it's marked as a Python script in the Unix way (first line matches 'first_line_re', @@ -101,8 +101,9 @@ def copy_scripts(self): else: executable = os.path.join( sysconfig.get_config_var("BINDIR"), - "python%s%s" % (sysconfig.get_config_var("VERSION"), - sysconfig.get_config_var("EXE"))) + "python%s%s" % ( + sysconfig.get_config_var("VERSION"), + sysconfig.get_config_var("EXE"))) executable = os.fsencode(executable) shebang = b"#!" + executable + post_interp + b"\n" # Python parser starts to read a script using UTF-8 until From 6736459f5bc024bd640ab564c7a5ee0b2d1c0416 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 27 Mar 2022 10:28:33 -0400 Subject: [PATCH 03/12] Rewrite docstring for imperative voice and proper structure. --- distutils/command/build_scripts.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/distutils/command/build_scripts.py b/distutils/command/build_scripts.py index dbeef2ddb0..64a472aeba 100644 --- a/distutils/command/build_scripts.py +++ b/distutils/command/build_scripts.py @@ -51,10 +51,13 @@ def run(self): self.copy_scripts() def copy_scripts(self): - r"""Copy each script listed in 'self.scripts'; if it's marked as a - Python script in the Unix way (first line matches 'first_line_re', - ie. starts with "\#!" and contains "python"), then adjust the first - line to refer to the current Python interpreter as we copy. + """ + Copy each script listed in ``self.scripts``. + + If a script is marked as a Python script (first line matches + 'first_line_re', i.e. starts with ``#!`` and contains + "python"), then adjust in the copy the first line to refer to + the current Python interpreter. """ self.mkpath(self.build_dir) outfiles = [] From beefbe746fd875ea75f6943f55ccca3b77b44674 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 27 Mar 2022 10:31:08 -0400 Subject: [PATCH 04/12] Move 'updated_files' operation outside of if statement as it appears in both branches unconditionally. --- distutils/command/build_scripts.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/distutils/command/build_scripts.py b/distutils/command/build_scripts.py index 64a472aeba..cee65c6430 100644 --- a/distutils/command/build_scripts.py +++ b/distutils/command/build_scripts.py @@ -94,10 +94,10 @@ def copy_scripts(self): adjust = True post_interp = match.group(1) or b'' + updated_files.append(outfile) if adjust: log.info("copying and adjusting %s -> %s", script, self.build_dir) - updated_files.append(outfile) if not self.dry_run: if not sysconfig.python_build: executable = self.executable @@ -138,7 +138,6 @@ def copy_scripts(self): else: if f: f.close() - updated_files.append(outfile) self.copy_file(script, outfile) if os.name == 'posix': From fe5e02dd434e98137189406a710c8fa2bfa42e5c Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 27 Mar 2022 10:44:25 -0400 Subject: [PATCH 05/12] Extract method for copying a file. --- distutils/command/build_scripts.py | 155 +++++++++++++++-------------- 1 file changed, 79 insertions(+), 76 deletions(-) diff --git a/distutils/command/build_scripts.py b/distutils/command/build_scripts.py index cee65c6430..359b476501 100644 --- a/distutils/command/build_scripts.py +++ b/distutils/command/build_scripts.py @@ -63,82 +63,7 @@ def copy_scripts(self): outfiles = [] updated_files = [] for script in self.scripts: - adjust = False - script = convert_path(script) - outfile = os.path.join(self.build_dir, os.path.basename(script)) - outfiles.append(outfile) - - if not self.force and not newer(script, outfile): - log.debug("not copying %s (up-to-date)", script) - continue - - # Always open the file, but ignore failures in dry-run mode -- - # that way, we'll get accurate feedback if we can read the - # script. - try: - f = open(script, "rb") - except OSError: - if not self.dry_run: - raise - f = None - else: - encoding, lines = tokenize.detect_encoding(f.readline) - f.seek(0) - first_line = f.readline() - if not first_line: - self.warn("%s is an empty file (skipping)" % script) - continue - - match = first_line_re.match(first_line) - if match: - adjust = True - post_interp = match.group(1) or b'' - - updated_files.append(outfile) - if adjust: - log.info("copying and adjusting %s -> %s", script, - self.build_dir) - if not self.dry_run: - if not sysconfig.python_build: - executable = self.executable - else: - executable = os.path.join( - sysconfig.get_config_var("BINDIR"), - "python%s%s" % ( - sysconfig.get_config_var("VERSION"), - sysconfig.get_config_var("EXE"))) - executable = os.fsencode(executable) - shebang = b"#!" + executable + post_interp + b"\n" - # Python parser starts to read a script using UTF-8 until - # it gets a #coding:xxx cookie. The shebang has to be the - # first line of a file, the #coding:xxx cookie cannot be - # written before. So the shebang has to be decodable from - # UTF-8. - try: - shebang.decode('utf-8') - except UnicodeDecodeError: - raise ValueError( - "The shebang ({!r}) is not decodable " - "from utf-8".format(shebang)) - # If the script is encoded to a custom encoding (use a - # #coding:xxx cookie), the shebang has to be decodable from - # the script encoding too. - try: - shebang.decode(encoding) - except UnicodeDecodeError: - raise ValueError( - "The shebang ({!r}) is not decodable " - "from the script encoding ({})" - .format(shebang, encoding)) - with open(outfile, "wb") as outf: - outf.write(shebang) - outf.writelines(f.readlines()) - if f: - f.close() - else: - if f: - f.close() - self.copy_file(script, outfile) + self._copy_script(script, outfiles, updated_files) if os.name == 'posix': for file in outfiles: @@ -153,3 +78,81 @@ def copy_scripts(self): os.chmod(file, newmode) # XXX should we modify self.outfiles? return outfiles, updated_files + + def _copy_script(self, script, outfiles, updated_files): + adjust = False + script = convert_path(script) + outfile = os.path.join(self.build_dir, os.path.basename(script)) + outfiles.append(outfile) + + if not self.force and not newer(script, outfile): + log.debug("not copying %s (up-to-date)", script) + return + + # Always open the file, but ignore failures in dry-run mode -- + # that way, we'll get accurate feedback if we can read the + # script. + try: + f = open(script, "rb") + except OSError: + if not self.dry_run: + raise + f = None + else: + encoding, lines = tokenize.detect_encoding(f.readline) + f.seek(0) + first_line = f.readline() + if not first_line: + self.warn("%s is an empty file (skipping)" % script) + return + + match = first_line_re.match(first_line) + if match: + adjust = True + post_interp = match.group(1) or b'' + + updated_files.append(outfile) + if adjust: + log.info("copying and adjusting %s -> %s", script, + self.build_dir) + if not self.dry_run: + if not sysconfig.python_build: + executable = self.executable + else: + executable = os.path.join( + sysconfig.get_config_var("BINDIR"), + "python%s%s" % ( + sysconfig.get_config_var("VERSION"), + sysconfig.get_config_var("EXE"))) + executable = os.fsencode(executable) + shebang = b"#!" + executable + post_interp + b"\n" + # Python parser starts to read a script using UTF-8 until + # it gets a #coding:xxx cookie. The shebang has to be the + # first line of a file, the #coding:xxx cookie cannot be + # written before. So the shebang has to be decodable from + # UTF-8. + try: + shebang.decode('utf-8') + except UnicodeDecodeError: + raise ValueError( + "The shebang ({!r}) is not decodable " + "from utf-8".format(shebang)) + # If the script is encoded to a custom encoding (use a + # #coding:xxx cookie), the shebang has to be decodable from + # the script encoding too. + try: + shebang.decode(encoding) + except UnicodeDecodeError: + raise ValueError( + "The shebang ({!r}) is not decodable " + "from the script encoding ({})" + .format(shebang, encoding)) + with open(outfile, "wb") as outf: + outf.write(shebang) + outf.writelines(f.readlines()) + if f: + f.close() + else: + if f: + f.close() + self.copy_file(script, outfile) From d80e72007a0397efe2026173a93a50106145304d Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 27 Mar 2022 10:50:49 -0400 Subject: [PATCH 06/12] Remove outfiles, unused. --- distutils/command/build_scripts.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/distutils/command/build_scripts.py b/distutils/command/build_scripts.py index 359b476501..d717f3004d 100644 --- a/distutils/command/build_scripts.py +++ b/distutils/command/build_scripts.py @@ -33,7 +33,6 @@ def initialize_options(self): self.scripts = None self.force = None self.executable = None - self.outfiles = None def finalize_options(self): self.set_undefined_options('build', @@ -76,7 +75,7 @@ def copy_scripts(self): log.info("changing mode of %s from %o to %o", file, oldmode, newmode) os.chmod(file, newmode) - # XXX should we modify self.outfiles? + return outfiles, updated_files def _copy_script(self, script, outfiles, updated_files): From f91759e6b7e11af9ee23a28a324e8a67ffe897b2 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 27 Mar 2022 11:01:09 -0400 Subject: [PATCH 07/12] Extract _change_modes and _change_mode functions. --- distutils/command/build_scripts.py | 31 +++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/distutils/command/build_scripts.py b/distutils/command/build_scripts.py index d717f3004d..07408efa60 100644 --- a/distutils/command/build_scripts.py +++ b/distutils/command/build_scripts.py @@ -64,17 +64,7 @@ def copy_scripts(self): for script in self.scripts: self._copy_script(script, outfiles, updated_files) - if os.name == 'posix': - for file in outfiles: - if self.dry_run: - log.info("changing mode of %s", file) - else: - oldmode = os.stat(file)[ST_MODE] & 0o7777 - newmode = (oldmode | 0o555) & 0o7777 - if newmode != oldmode: - log.info("changing mode of %s from %o to %o", - file, oldmode, newmode) - os.chmod(file, newmode) + self._change_modes(outfiles) return outfiles, updated_files @@ -155,3 +145,22 @@ def _copy_script(self, script, outfiles, updated_files): if f: f.close() self.copy_file(script, outfile) + + def _change_modes(self, outfiles): + if os.name != 'posix': + return + + for file in outfiles: + self._change_mode(file) + + def _change_mode(self, file): + if self.dry_run: + log.info("changing mode of %s", file) + return + + oldmode = os.stat(file)[ST_MODE] & 0o7777 + newmode = (oldmode | 0o555) & 0o7777 + if newmode != oldmode: + log.info("changing mode of %s from %o to %o", + file, oldmode, newmode) + os.chmod(file, newmode) From afaf3c099d745799ef6bc014f30ea417401e3baa Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 27 Mar 2022 11:11:20 -0400 Subject: [PATCH 08/12] Rewrite the comment to match the implementation. --- distutils/command/build_scripts.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/distutils/command/build_scripts.py b/distutils/command/build_scripts.py index 07408efa60..36047dccc6 100644 --- a/distutils/command/build_scripts.py +++ b/distutils/command/build_scripts.py @@ -78,9 +78,8 @@ def _copy_script(self, script, outfiles, updated_files): log.debug("not copying %s (up-to-date)", script) return - # Always open the file, but ignore failures in dry-run mode -- - # that way, we'll get accurate feedback if we can read the - # script. + # Always open the file, but ignore failures in dry-run mode + # in order to attempt to copy directly. try: f = open(script, "rb") except OSError: From 6a7d01c0d0b1960b343db5bc120d668a9f58ce84 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 27 Mar 2022 11:17:14 -0400 Subject: [PATCH 09/12] Use 'shebang_' for pattern and match. --- distutils/command/build_scripts.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/distutils/command/build_scripts.py b/distutils/command/build_scripts.py index 36047dccc6..d141a8804f 100644 --- a/distutils/command/build_scripts.py +++ b/distutils/command/build_scripts.py @@ -13,7 +13,7 @@ import tokenize # check if Python is called on the first line with this expression -first_line_re = re.compile(b'^#!.*python[0-9.]*([ \t].*)?$') +shebang_pattern = re.compile(b'^#!.*python[0-9.]*([ \t].*)?$') class build_scripts(Command): @@ -54,7 +54,7 @@ def copy_scripts(self): Copy each script listed in ``self.scripts``. If a script is marked as a Python script (first line matches - 'first_line_re', i.e. starts with ``#!`` and contains + 'shebang_pattern', i.e. starts with ``#!`` and contains "python"), then adjust in the copy the first line to refer to the current Python interpreter. """ @@ -69,7 +69,7 @@ def copy_scripts(self): return outfiles, updated_files def _copy_script(self, script, outfiles, updated_files): - adjust = False + shebang_match = None script = convert_path(script) outfile = os.path.join(self.build_dir, os.path.basename(script)) outfiles.append(outfile) @@ -94,13 +94,10 @@ def _copy_script(self, script, outfiles, updated_files): self.warn("%s is an empty file (skipping)" % script) return - match = first_line_re.match(first_line) - if match: - adjust = True - post_interp = match.group(1) or b'' + shebang_match = shebang_pattern.match(first_line) updated_files.append(outfile) - if adjust: + if shebang_match: log.info("copying and adjusting %s -> %s", script, self.build_dir) if not self.dry_run: @@ -113,6 +110,7 @@ def _copy_script(self, script, outfiles, updated_files): sysconfig.get_config_var("VERSION"), sysconfig.get_config_var("EXE"))) executable = os.fsencode(executable) + post_interp = shebang_match.group(1) or b'' shebang = b"#!" + executable + post_interp + b"\n" # Python parser starts to read a script using UTF-8 until # it gets a #coding:xxx cookie. The shebang has to be the From 12edb8d575966b50afe6b2f89383bf804e99b310 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 27 Mar 2022 11:33:14 -0400 Subject: [PATCH 10/12] Extract method to validate the shebang. --- distutils/command/build_scripts.py | 47 +++++++++++++++++------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/distutils/command/build_scripts.py b/distutils/command/build_scripts.py index d141a8804f..94167d6cea 100644 --- a/distutils/command/build_scripts.py +++ b/distutils/command/build_scripts.py @@ -112,27 +112,7 @@ def _copy_script(self, script, outfiles, updated_files): executable = os.fsencode(executable) post_interp = shebang_match.group(1) or b'' shebang = b"#!" + executable + post_interp + b"\n" - # Python parser starts to read a script using UTF-8 until - # it gets a #coding:xxx cookie. The shebang has to be the - # first line of a file, the #coding:xxx cookie cannot be - # written before. So the shebang has to be decodable from - # UTF-8. - try: - shebang.decode('utf-8') - except UnicodeDecodeError: - raise ValueError( - "The shebang ({!r}) is not decodable " - "from utf-8".format(shebang)) - # If the script is encoded to a custom encoding (use a - # #coding:xxx cookie), the shebang has to be decodable from - # the script encoding too. - try: - shebang.decode(encoding) - except UnicodeDecodeError: - raise ValueError( - "The shebang ({!r}) is not decodable " - "from the script encoding ({})" - .format(shebang, encoding)) + self._validate_shebang(shebang, encoding) with open(outfile, "wb") as outf: outf.write(shebang) outf.writelines(f.readlines()) @@ -161,3 +141,28 @@ def _change_mode(self, file): log.info("changing mode of %s from %o to %o", file, oldmode, newmode) os.chmod(file, newmode) + + @staticmethod + def _validate_shebang(shebang, encoding): + # Python parser starts to read a script using UTF-8 until + # it gets a #coding:xxx cookie. The shebang has to be the + # first line of a file, the #coding:xxx cookie cannot be + # written before. So the shebang has to be decodable from + # UTF-8. + try: + shebang.decode('utf-8') + except UnicodeDecodeError: + raise ValueError( + "The shebang ({!r}) is not decodable " + "from utf-8".format(shebang)) + + # If the script is encoded to a custom encoding (use a + # #coding:xxx cookie), the shebang has to be decodable from + # the script encoding too. + try: + shebang.decode(encoding) + except UnicodeDecodeError: + raise ValueError( + "The shebang ({!r}) is not decodable " + "from the script encoding ({})" + .format(shebang, encoding)) From 7038cf2a659509b76847e463a3d3f47927986e0d Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 27 Mar 2022 11:44:07 -0400 Subject: [PATCH 11/12] Restore Setuptools compatibility. --- distutils/command/build_scripts.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/distutils/command/build_scripts.py b/distutils/command/build_scripts.py index 94167d6cea..cc4ca1db83 100644 --- a/distutils/command/build_scripts.py +++ b/distutils/command/build_scripts.py @@ -15,6 +15,9 @@ # check if Python is called on the first line with this expression shebang_pattern = re.compile(b'^#!.*python[0-9.]*([ \t].*)?$') +# for Setuptools compatibility +first_line_re = shebang_pattern + class build_scripts(Command): From e2f47dcfc2a8019254a7600c400062d5c392d944 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 27 Mar 2022 12:02:25 -0400 Subject: [PATCH 12/12] In build_scripts, open scripts as text. Fixes pypa/distutils#124. --- distutils/command/build_scripts.py | 39 +++++++++++++++--------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/distutils/command/build_scripts.py b/distutils/command/build_scripts.py index cc4ca1db83..e56511da09 100644 --- a/distutils/command/build_scripts.py +++ b/distutils/command/build_scripts.py @@ -12,8 +12,10 @@ from distutils import log import tokenize -# check if Python is called on the first line with this expression -shebang_pattern = re.compile(b'^#!.*python[0-9.]*([ \t].*)?$') +shebang_pattern = re.compile('^#!.*python[0-9.]*([ \t].*)?$') +""" +Pattern matching a Python interpreter indicated in first line of a script. +""" # for Setuptools compatibility first_line_re = shebang_pattern @@ -84,14 +86,12 @@ def _copy_script(self, script, outfiles, updated_files): # Always open the file, but ignore failures in dry-run mode # in order to attempt to copy directly. try: - f = open(script, "rb") + f = tokenize.open(script) except OSError: if not self.dry_run: raise f = None else: - encoding, lines = tokenize.detect_encoding(f.readline) - f.seek(0) first_line = f.readline() if not first_line: self.warn("%s is an empty file (skipping)" % script) @@ -112,11 +112,10 @@ def _copy_script(self, script, outfiles, updated_files): "python%s%s" % ( sysconfig.get_config_var("VERSION"), sysconfig.get_config_var("EXE"))) - executable = os.fsencode(executable) - post_interp = shebang_match.group(1) or b'' - shebang = b"#!" + executable + post_interp + b"\n" - self._validate_shebang(shebang, encoding) - with open(outfile, "wb") as outf: + post_interp = shebang_match.group(1) or '' + shebang = "#!" + executable + post_interp + "\n" + self._validate_shebang(shebang, f.encoding) + with open(outfile, "w", encoding=f.encoding) as outf: outf.write(shebang) outf.writelines(f.readlines()) if f: @@ -150,22 +149,22 @@ def _validate_shebang(shebang, encoding): # Python parser starts to read a script using UTF-8 until # it gets a #coding:xxx cookie. The shebang has to be the # first line of a file, the #coding:xxx cookie cannot be - # written before. So the shebang has to be decodable from + # written before. So the shebang has to be encodable to # UTF-8. try: - shebang.decode('utf-8') - except UnicodeDecodeError: + shebang.encode('utf-8') + except UnicodeEncodeError: raise ValueError( - "The shebang ({!r}) is not decodable " - "from utf-8".format(shebang)) + "The shebang ({!r}) is not encodable " + "to utf-8".format(shebang)) # If the script is encoded to a custom encoding (use a - # #coding:xxx cookie), the shebang has to be decodable from + # #coding:xxx cookie), the shebang has to be encodable to # the script encoding too. try: - shebang.decode(encoding) - except UnicodeDecodeError: + shebang.encode(encoding) + except UnicodeEncodeError: raise ValueError( - "The shebang ({!r}) is not decodable " - "from the script encoding ({})" + "The shebang ({!r}) is not encodable " + "to the script encoding ({})" .format(shebang, encoding))