diff --git a/SMSCounter.php b/SMSCounter.php index 8cc691a..60e5e32 100644 --- a/SMSCounter.php +++ b/SMSCounter.php @@ -296,7 +296,6 @@ public function removeNonGsmChars($str) public function replaceNonGsmChars($str, $replacement = null) { $validChars = $this->getGsm7bitExMap(); - $allChars = self::utf8ToUnicode($str); if (strlen($replacement) > 1) { @@ -349,33 +348,60 @@ public function removeAccents($str) $chars = array( // Decompositions for Latin-1 Supplement - chr(195).chr(128) => 'A', chr(195).chr(129) => 'A', - chr(195).chr(130) => 'A', chr(195).chr(131) => 'A', - chr(195).chr(132) => 'A', chr(195).chr(133) => 'A', - chr(195).chr(135) => 'C', chr(195).chr(136) => 'E', - chr(195).chr(137) => 'E', chr(195).chr(138) => 'E', - chr(195).chr(139) => 'E', chr(195).chr(140) => 'I', - chr(195).chr(141) => 'I', chr(195).chr(142) => 'I', - chr(195).chr(143) => 'I', chr(195).chr(145) => 'N', - chr(195).chr(146) => 'O', chr(195).chr(147) => 'O', - chr(195).chr(148) => 'O', chr(195).chr(149) => 'O', - chr(195).chr(150) => 'O', chr(195).chr(153) => 'U', - chr(195).chr(154) => 'U', chr(195).chr(155) => 'U', - chr(195).chr(156) => 'U', chr(195).chr(157) => 'Y', - chr(195).chr(159) => 's', // chr(195).chr(160) => 'a', - chr(195).chr(161) => 'a', chr(195).chr(162) => 'a', - chr(195).chr(163) => 'a', chr(195).chr(164) => 'a', - chr(195).chr(165) => 'a', chr(195).chr(167) => 'c', - chr(195).chr(168) => 'e', chr(195).chr(169) => 'e', - chr(195).chr(170) => 'e', chr(195).chr(171) => 'e', - chr(195).chr(172) => 'i', chr(195).chr(173) => 'i', - chr(195).chr(174) => 'i', chr(195).chr(175) => 'i', - chr(195).chr(177) => 'n', chr(195).chr(178) => 'o', - chr(195).chr(179) => 'o', chr(195).chr(180) => 'o', - chr(195).chr(181) => 'o', chr(195).chr(182) => 'o', - chr(195).chr(182) => 'o', chr(195).chr(185) => 'u', - chr(195).chr(186) => 'u', chr(195).chr(187) => 'u', - chr(195).chr(188) => 'u', chr(195).chr(189) => 'y', + chr(195).chr(128) => 'A', + chr(195).chr(129) => 'A', + chr(195).chr(130) => 'A', + chr(195).chr(131) => 'A', + chr(195).chr(132) => 'A', + chr(195).chr(133) => 'A', + // chr(195).chr(135) => 'C', // Ç + chr(195).chr(136) => 'E', + chr(195).chr(137) => 'E', + chr(195).chr(138) => 'E', + chr(195).chr(139) => 'E', + chr(195).chr(140) => 'I', + chr(195).chr(141) => 'I', + chr(195).chr(142) => 'I', + chr(195).chr(143) => 'I', + // chr(195).chr(145) => 'N', // Ñ + chr(195).chr(146) => 'O', + chr(195).chr(147) => 'O', + chr(195).chr(148) => 'O', + chr(195).chr(149) => 'O', + chr(195).chr(150) => 'O', + chr(195).chr(153) => 'U', + chr(195).chr(154) => 'U', + chr(195).chr(155) => 'U', + chr(195).chr(156) => 'U', + chr(195).chr(157) => 'Y', + chr(195).chr(159) => 's', + // chr(195).chr(160) => 'a', + chr(195).chr(161) => 'a', + chr(195).chr(162) => 'a', + chr(195).chr(163) => 'a', + chr(195).chr(164) => 'a', + chr(195).chr(165) => 'a', + // chr(195).chr(167) => 'c', // ç + chr(195).chr(168) => 'e', + chr(195).chr(169) => 'e', + chr(195).chr(170) => 'e', + chr(195).chr(171) => 'e', + chr(195).chr(172) => 'i', + chr(195).chr(173) => 'i', + chr(195).chr(174) => 'i', + chr(195).chr(175) => 'i', + // chr(195).chr(177) => 'n', // ñ + chr(195).chr(178) => 'o', + chr(195).chr(179) => 'o', + chr(195).chr(180) => 'o', + chr(195).chr(181) => 'o', + chr(195).chr(182) => 'o', + chr(195).chr(182) => 'o', + chr(195).chr(185) => 'u', + chr(195).chr(186) => 'u', + chr(195).chr(187) => 'u', + chr(195).chr(188) => 'u', + chr(195).chr(189) => 'y', chr(195).chr(191) => 'y', // Decompositions for Latin Extended-A chr(196).chr(128) => 'A', chr(196).chr(129) => 'a', @@ -411,13 +437,20 @@ public function removeAccents($str) chr(196).chr(188) => 'l', chr(196).chr(189) => 'L', chr(196).chr(190) => 'l', chr(196).chr(191) => 'L', chr(197).chr(128) => 'l', chr(197).chr(129) => 'L', - chr(197).chr(130) => 'l', chr(197).chr(131) => 'N', - chr(197).chr(132) => 'n', chr(197).chr(133) => 'N', - chr(197).chr(134) => 'n', chr(197).chr(135) => 'N', - chr(197).chr(136) => 'n', chr(197).chr(137) => 'N', - chr(197).chr(138) => 'n', chr(197).chr(139) => 'N', - chr(197).chr(140) => 'O', chr(197).chr(141) => 'o', - chr(197).chr(142) => 'O', chr(197).chr(143) => 'o', + chr(197).chr(130) => 'l', + chr(197).chr(131) => 'N', // Ń + chr(197).chr(132) => 'n', // ń + chr(197).chr(133) => 'N', // Ņ + chr(197).chr(134) => 'n', // ņ + chr(197).chr(135) => 'N', + chr(197).chr(136) => 'n', + chr(197).chr(137) => 'N', + chr(197).chr(138) => 'n', + chr(197).chr(139) => 'N', + chr(197).chr(140) => 'O', + chr(197).chr(141) => 'o', + chr(197).chr(142) => 'O', + chr(197).chr(143) => 'o', chr(197).chr(144) => 'O', chr(197).chr(145) => 'o', chr(197).chr(146) => 'OE',chr(197).chr(147) => 'oe', chr(197).chr(148) => 'R',chr(197).chr(149) => 'r', diff --git a/Tests/SMSCounterTest.php b/Tests/SMSCounterTest.php index 4adf8fc..9552ea0 100755 --- a/Tests/SMSCounterTest.php +++ b/Tests/SMSCounterTest.php @@ -130,8 +130,8 @@ public function testUnicode() public function testRemoveNonGSMChars() { - $text = "áno-unicode-remaining`"; - $expectedTExt = "no-unicode-remaining"; + $text = "áno-unicode-remaining` ñ"; + $expectedTExt = "no-unicode-remaining ñ"; $smsCounter = new SMSCounter; $output = $smsCounter->removeNonGsmChars($text); @@ -139,6 +139,17 @@ public function testRemoveNonGSMChars() $this->assertEquals($expectedTExt, $output); } + public function testSanitizeToGSM() + { + $text = "Test sanitization à ñ Ç"; + $expectedTExt = "Test sanitization à ñ Ç"; + + $smsCounter = new SMSCounter; + $output = $smsCounter->sanitizeToGSM($text); + + $this->assertEquals($expectedTExt, $output); + } + public function testTruncate1SmsGSM7() { $text = "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem.";