Skip to content

Commit

Permalink
make filename non-english-friendly
Browse files Browse the repository at this point in the history
  • Loading branch information
demeritcowboy committed Aug 26, 2021
1 parent 375952f commit 31c6c66
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 2 deletions.
4 changes: 2 additions & 2 deletions CRM/Contact/Form/Task/PDFLetterCommon.php
Original file line number Diff line number Diff line change
Expand Up @@ -230,10 +230,10 @@ public static function postProcess(&$form) {
*/
private static function getFileName(CRM_Core_Form $form) {
if (!empty($form->getSubmittedValue('pdf_file_name'))) {
$fileName = CRM_Utils_String::munge($form->getSubmittedValue('pdf_file_name'), '_', 200);
$fileName = CRM_Utils_File::makeFilenameWithUnicode($form->getSubmittedValue('pdf_file_name'), '_', 200);
}
elseif (!empty($form->getSubmittedValue('subject'))) {
$fileName = CRM_Utils_String::munge($form->getSubmittedValue('subject'), '_', 200);
$fileName = CRM_Utils_File::makeFilenameWithUnicode($form->getSubmittedValue('subject'), '_', 200);
}
else {
$fileName = 'CiviLetter';
Expand Down
21 changes: 21 additions & 0 deletions CRM/Utils/File.php
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,27 @@ public static function makeFileName($name) {
}
}

/**
* CRM_Utils_String::munge() doesn't handle unicode and needs to be able
* to generate valid database tablenames so will sometimes generate a
* random string. Here what we want is a human-sensible filename that might
* contain unicode.
* Note that this does filter out emojis and such, but keeps characters that
* are considered alphanumeric in non-english languages.
*
* @param string $input
* @param string $replacementString Character or string to replace invalid characters with. Can be the empty string.
* @param int $cutoffLength Length to truncate the result after replacements.
* @return string
*/
public static function makeFilenameWithUnicode(string $input, string $replacementString = '_', int $cutoffLength = 63): string {
$filename = preg_replace('/\W/u', $replacementString, $input);
if ($cutoffLength) {
return mb_substr($filename, 0, $cutoffLength);
}
return $filename;
}

/**
* Copies a file
*
Expand Down
99 changes: 99 additions & 0 deletions tests/phpunit/CRM/Utils/FileTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -161,4 +161,103 @@ public function testIsIncludable() {
unlink($file);
}

/**
* dataprovider for testMakeFilenameWithUnicode
* @return array
*/
public function makeFilenameWithUnicodeProvider(): array {
return [
// explicit indices to make it easier to see which one failed
0 => [
'string' => '',
'replacementCharacter' => NULL,
'cutoffLength' => NULL,
'expected' => '',
],
1 => [
'string' => 'a',
'replacementCharacter' => NULL,
'cutoffLength' => NULL,
'expected' => 'a',
],
2 => [
'string' => 'a b',
'replacementCharacter' => NULL,
'cutoffLength' => NULL,
'expected' => 'a_b',
],
3 => [
'string' => 'a4b',
'replacementCharacter' => NULL,
'cutoffLength' => NULL,
'expected' => 'a4b',
],
4 => [
'string' => '_a!@#$%^&*()[]+-=."\'{}<>?/\\|;:b',
'replacementCharacter' => NULL,
'cutoffLength' => NULL,
'expected' => '_a____________________________b',
],
5 => [
'string' => '_a!@#$%^&*()[]+-=."\'{}<>?/\\|;:b',
'replacementCharacter' => '',
'cutoffLength' => NULL,
'expected' => '_ab',
],
// emojis get replaced, but alphabetic letters in non-english are kept
6 => [
'string' => 'açbяc😀d',
'replacementCharacter' => NULL,
'cutoffLength' => NULL,
'expected' => 'açbяc_d',
],
7 => [
'string' => 'çя😀',
'replacementCharacter' => NULL,
'cutoffLength' => NULL,
'expected' => 'çя_',
],
// test default cutoff
8 => [
'string' => 'abcdefghijklmnopqrstuvwxyz0123456789012345678901234567890123456789',
'replacementCharacter' => NULL,
'cutoffLength' => NULL,
'expected' => 'abcdefghijklmnopqrstuvwxyz0123456789012345678901234567890123456',
],
9 => [
'string' => 'abcdefghijklmnopqrstuvwxyz0123456789012345678901234567890123456789',
'replacementCharacter' => '_',
'cutoffLength' => 30,
'expected' => 'abcdefghijklmnopqrstuvwxyz0123',
],
// test cutoff truncates multibyte properly
10 => [
'string' => 'ДДДДДДДДДДДДДДД',
'replacementCharacter' => '',
'cutoffLength' => 10,
'expected' => 'ДДДДДДДДДД',
],
];
}

/**
* test makeFilenameWithUnicode
* @dataProvider makeFilenameWithUnicodeProvider
* @param string $input
* @param ?string $replacementCharacter
* @param ?int $cutoffLength
* @param string $expected
*/
public function testMakeFilenameWithUnicode(string $input, ?string $replacementCharacter, ?int $cutoffLength, string $expected) {
if (is_null($replacementCharacter) && is_null($cutoffLength)) {
$this->assertSame($expected, CRM_Utils_File::makeFilenameWithUnicode($input));
}
elseif (is_null($cutoffLength)) {
$this->assertSame($expected, CRM_Utils_File::makeFilenameWithUnicode($input, $replacementCharacter));
}
else {
$this->assertSame($expected, CRM_Utils_File::makeFilenameWithUnicode($input, $replacementCharacter, $cutoffLength));
}
}

}

0 comments on commit 31c6c66

Please sign in to comment.