Skip to content

Commit

Permalink
Fix bug that sanitize chars that are GSM
Browse files Browse the repository at this point in the history
  • Loading branch information
luishdez committed Dec 22, 2015
1 parent 867aab1 commit 591e84c
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 37 deletions.
103 changes: 68 additions & 35 deletions SMSCounter.php
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,6 @@ public function removeNonGsmChars($str)
public function replaceNonGsmChars($str, $replacement = null)
{
$validChars = $this->getGsm7bitExMap();

$allChars = self::utf8ToUnicode($str);

if (strlen($replacement) > 1) {
Expand Down Expand Up @@ -349,33 +348,60 @@ public function removeAccents($str)

$chars = array(
// Decompositions for Latin-1 Supplement
chr(195).chr(128) => 'A', chr(195).chr(129) => 'A',
chr(195).chr(130) => 'A', chr(195).chr(131) => 'A',
chr(195).chr(132) => 'A', chr(195).chr(133) => 'A',
chr(195).chr(135) => 'C', chr(195).chr(136) => 'E',
chr(195).chr(137) => 'E', chr(195).chr(138) => 'E',
chr(195).chr(139) => 'E', chr(195).chr(140) => 'I',
chr(195).chr(141) => 'I', chr(195).chr(142) => 'I',
chr(195).chr(143) => 'I', chr(195).chr(145) => 'N',
chr(195).chr(146) => 'O', chr(195).chr(147) => 'O',
chr(195).chr(148) => 'O', chr(195).chr(149) => 'O',
chr(195).chr(150) => 'O', chr(195).chr(153) => 'U',
chr(195).chr(154) => 'U', chr(195).chr(155) => 'U',
chr(195).chr(156) => 'U', chr(195).chr(157) => 'Y',
chr(195).chr(159) => 's', // chr(195).chr(160) => 'a',
chr(195).chr(161) => 'a', chr(195).chr(162) => 'a',
chr(195).chr(163) => 'a', chr(195).chr(164) => 'a',
chr(195).chr(165) => 'a', chr(195).chr(167) => 'c',
chr(195).chr(168) => 'e', chr(195).chr(169) => 'e',
chr(195).chr(170) => 'e', chr(195).chr(171) => 'e',
chr(195).chr(172) => 'i', chr(195).chr(173) => 'i',
chr(195).chr(174) => 'i', chr(195).chr(175) => 'i',
chr(195).chr(177) => 'n', chr(195).chr(178) => 'o',
chr(195).chr(179) => 'o', chr(195).chr(180) => 'o',
chr(195).chr(181) => 'o', chr(195).chr(182) => 'o',
chr(195).chr(182) => 'o', chr(195).chr(185) => 'u',
chr(195).chr(186) => 'u', chr(195).chr(187) => 'u',
chr(195).chr(188) => 'u', chr(195).chr(189) => 'y',
chr(195).chr(128) => 'A',
chr(195).chr(129) => 'A',
chr(195).chr(130) => 'A',
chr(195).chr(131) => 'A',
chr(195).chr(132) => 'A',
chr(195).chr(133) => 'A',
// chr(195).chr(135) => 'C', // Ç
chr(195).chr(136) => 'E',
chr(195).chr(137) => 'E',
chr(195).chr(138) => 'E',
chr(195).chr(139) => 'E',
chr(195).chr(140) => 'I',
chr(195).chr(141) => 'I',
chr(195).chr(142) => 'I',
chr(195).chr(143) => 'I',
// chr(195).chr(145) => 'N', // Ñ
chr(195).chr(146) => 'O',
chr(195).chr(147) => 'O',
chr(195).chr(148) => 'O',
chr(195).chr(149) => 'O',
chr(195).chr(150) => 'O',
chr(195).chr(153) => 'U',
chr(195).chr(154) => 'U',
chr(195).chr(155) => 'U',
chr(195).chr(156) => 'U',
chr(195).chr(157) => 'Y',
chr(195).chr(159) => 's',
// chr(195).chr(160) => 'a',
chr(195).chr(161) => 'a',
chr(195).chr(162) => 'a',
chr(195).chr(163) => 'a',
chr(195).chr(164) => 'a',
chr(195).chr(165) => 'a',
// chr(195).chr(167) => 'c', // ç
chr(195).chr(168) => 'e',
chr(195).chr(169) => 'e',
chr(195).chr(170) => 'e',
chr(195).chr(171) => 'e',
chr(195).chr(172) => 'i',
chr(195).chr(173) => 'i',
chr(195).chr(174) => 'i',
chr(195).chr(175) => 'i',
// chr(195).chr(177) => 'n', // ñ
chr(195).chr(178) => 'o',
chr(195).chr(179) => 'o',
chr(195).chr(180) => 'o',
chr(195).chr(181) => 'o',
chr(195).chr(182) => 'o',
chr(195).chr(182) => 'o',
chr(195).chr(185) => 'u',
chr(195).chr(186) => 'u',
chr(195).chr(187) => 'u',
chr(195).chr(188) => 'u',
chr(195).chr(189) => 'y',
chr(195).chr(191) => 'y',
// Decompositions for Latin Extended-A
chr(196).chr(128) => 'A', chr(196).chr(129) => 'a',
Expand Down Expand Up @@ -411,13 +437,20 @@ public function removeAccents($str)
chr(196).chr(188) => 'l', chr(196).chr(189) => 'L',
chr(196).chr(190) => 'l', chr(196).chr(191) => 'L',
chr(197).chr(128) => 'l', chr(197).chr(129) => 'L',
chr(197).chr(130) => 'l', chr(197).chr(131) => 'N',
chr(197).chr(132) => 'n', chr(197).chr(133) => 'N',
chr(197).chr(134) => 'n', chr(197).chr(135) => 'N',
chr(197).chr(136) => 'n', chr(197).chr(137) => 'N',
chr(197).chr(138) => 'n', chr(197).chr(139) => 'N',
chr(197).chr(140) => 'O', chr(197).chr(141) => 'o',
chr(197).chr(142) => 'O', chr(197).chr(143) => 'o',
chr(197).chr(130) => 'l',
chr(197).chr(131) => 'N', // Ń
chr(197).chr(132) => 'n', // ń
chr(197).chr(133) => 'N', // Ņ
chr(197).chr(134) => 'n', // ņ
chr(197).chr(135) => 'N',
chr(197).chr(136) => 'n',
chr(197).chr(137) => 'N',
chr(197).chr(138) => 'n',
chr(197).chr(139) => 'N',
chr(197).chr(140) => 'O',
chr(197).chr(141) => 'o',
chr(197).chr(142) => 'O',
chr(197).chr(143) => 'o',
chr(197).chr(144) => 'O', chr(197).chr(145) => 'o',
chr(197).chr(146) => 'OE',chr(197).chr(147) => 'oe',
chr(197).chr(148) => 'R',chr(197).chr(149) => 'r',
Expand Down
15 changes: 13 additions & 2 deletions Tests/SMSCounterTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -130,15 +130,26 @@ public function testUnicode()

public function testRemoveNonGSMChars()
{
$text = "áno-unicode-remaining`";
$expectedTExt = "no-unicode-remaining";
$text = "áno-unicode-remaining` ñ";
$expectedTExt = "no-unicode-remaining ñ";

$smsCounter = new SMSCounter;
$output = $smsCounter->removeNonGsmChars($text);

$this->assertEquals($expectedTExt, $output);
}

public function testSanitizeToGSM()
{
$text = "Test sanitization à ñ Ç";
$expectedTExt = "Test sanitization à ñ Ç";

$smsCounter = new SMSCounter;
$output = $smsCounter->sanitizeToGSM($text);

$this->assertEquals($expectedTExt, $output);
}

public function testTruncate1SmsGSM7()
{
$text = "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem.";
Expand Down

0 comments on commit 591e84c

Please sign in to comment.