diff --git a/traits/util/clean_strings.py b/traits/util/clean_strings.py index 786dc7082..e7d10e8d7 100644 --- a/traits/util/clean_strings.py +++ b/traits/util/clean_strings.py @@ -13,110 +13,7 @@ """ # Standard library imports. -import copy -import datetime import keyword -import re -import unicodedata -import warnings - - -def clean_filename(name, replace_empty=""): - """ - Make a user-supplied string safe for filename use. - - Returns an ASCII-encodable string based on the input string that's safe for - use as a component of a filename or URL. The returned value is a string - containing only lowercase ASCII letters, digits, and the characters '-' and - '_'. - - This does not give a faithful representation of the original string: - different input strings can result in the same output string. - - .. deprecated:: 6.3.0 - This function will be removed in a future version of Traits. - - Parameters - ---------- - name : str - The string to be made safe. - replace_empty : str, optional - The return value to be used in the event that the sanitised - string ends up being empty. No validation is done on this - input - it's up to the user to ensure that the default is - itself safe. The default is to return the empty string. - - Returns - ------- - safe_string : str - A filename-safe version of string. - - """ - warnings.warn( - "clean_filename is deprecated and will eventually be removed", - DeprecationWarning, - stacklevel=2, - ) - - # Code is based on Django's slugify utility. - # https://docs.djangoproject.com/en/1.9/_modules/django/utils/text/#slugify - name = ( - unicodedata.normalize('NFKD', name) - .encode('ascii', 'ignore') - .decode('ascii') - ) - name = re.sub(r'[^\w\s-]', '', name).strip().lower() - safe_name = re.sub(r'[-\s]+', '-', name) - if safe_name == "": - return replace_empty - return safe_name - - -def clean_timestamp(dt=None, microseconds=False): - """ - Return a timestamp that has been cleansed of characters that might - cause problems in filenames, namely colons. If no datetime object - is provided, then uses the current time. - - The timestamp is in ISO-8601 format with the following exceptions: - - * Colons ':' are replaced by underscores '_'. - * Microseconds are not displayed if the 'microseconds' parameter is - False. - - .. deprecated:: 6.3.0 - This function will be removed in a future version of Traits. - - Parameters - ---------- - dt : None or datetime.datetime - If None, then the current time is used. - microseconds : bool - Display microseconds or not. - - Returns - ------- - A string timestamp. - """ - warnings.warn( - "clean_timestamp is deprecated and will eventually be removed", - DeprecationWarning, - stacklevel=2, - ) - - if dt is None: - dt = datetime.datetime.now() - else: - # Operate on a copy. - dt = copy.copy(dt) - - if not microseconds: - # The microseconds are largely uninformative but annoying. - dt = dt.replace(microsecond=0) - - stamp = dt.isoformat().replace(":", "_") - - return stamp def python_name(name): diff --git a/traits/util/tests/test_clean_strings.py b/traits/util/tests/test_clean_strings.py deleted file mode 100644 index e87ff525d..000000000 --- a/traits/util/tests/test_clean_strings.py +++ /dev/null @@ -1,89 +0,0 @@ -# (C) Copyright 2005-2025 Enthought, Inc., Austin, TX -# All rights reserved. -# -# This software is provided without warranty under the terms of the BSD -# license included in LICENSE.txt and may be redistributed only under -# the conditions described in the aforementioned license. The license -# is also available online at http://www.enthought.com/licenses/BSD.txt -# -# Thanks for using Enthought open source! - -import datetime -import unittest - -from traits.util.clean_strings import clean_filename, clean_timestamp - -# Safe strings should only contain the following characters. -LEGAL_CHARS = set("-0123456789_abcdefghijklmnopqrstuvwxyz") - - -class TestCleanStrings(unittest.TestCase): - def test_clean_filename_default(self): - test_strings = [ - "!!!", - "", - " ", - "\t/\n", - "^!+", - ] - for test_string in test_strings: - with self.assertWarns(DeprecationWarning): - safe_string = clean_filename(test_string, "default-output") - self.check_output(safe_string) - self.assertEqual(safe_string, "default-output") - - def test_clean_filename_whitespace_handling(self): - # Leading and trailing whitespace stripped. - with self.assertWarns(DeprecationWarning): - self.assertEqual(clean_filename(" abc "), "abc") - with self.assertWarns(DeprecationWarning): - self.assertEqual(clean_filename(" \t\tabc \n"), "abc") - - # Internal whitespace turned into hyphens. - with self.assertWarns(DeprecationWarning): - self.assertEqual(clean_filename("well name"), "well-name") - with self.assertWarns(DeprecationWarning): - self.assertEqual(clean_filename("well \n name"), "well-name") - with self.assertWarns(DeprecationWarning): - self.assertEqual(clean_filename("well - name"), "well-name") - - def test_clean_filename_conversion_to_lowercase(self): - test_string = "ABCdefGHI123" - with self.assertWarns(DeprecationWarning): - safe_string = clean_filename(test_string) - self.assertEqual(safe_string, test_string.lower()) - self.check_output(safe_string) - - def test_clean_filename_accented_chars(self): - test_strings = [ - "\xe4b\xe7d\xe8f", - "a\u0308bc\u0327de\u0300f", - ] - for test_string in test_strings: - with self.assertWarns(DeprecationWarning): - safe_string = clean_filename(test_string) - self.check_output(safe_string) - self.assertEqual(safe_string, "abcdef") - - def test_clean_filename_all_chars(self): - test_strings = [ - "".join(chr(n) for n in range(10000)), - "".join(chr(n) for n in range(10000)) * 2, - "".join(chr(n) for n in reversed(range(10000))), - ] - for test_string in test_strings: - with self.assertWarns(DeprecationWarning): - safe_string = clean_filename(test_string) - self.check_output(safe_string) - - def test_clean_timestamp_deprecation(self): - with self.assertWarns(DeprecationWarning): - clean_timestamp(datetime.datetime.now()) - - def check_output(self, safe_string): - """ - Check that a supposedly safe string is actually safe. - """ - self.assertIsInstance(safe_string, str) - chars_in_string = set(safe_string) - self.assertLessEqual(chars_in_string, LEGAL_CHARS)