Skip to content

Commit

Permalink
Merge pull request #79 from thephpleague/fix-multibyte-bugs
Browse files Browse the repository at this point in the history
Fix multibyte bugs
  • Loading branch information
colinodell committed Mar 1, 2015
2 parents da85529 + aa6b8a0 commit 1db80ba
Show file tree
Hide file tree
Showing 7 changed files with 176 additions and 7 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ Updates should follow the [Keep a CHANGELOG](http://keepachangelog.com/) princip
- Replace references to HtmlRenderer with new HtmlRendererInterface

### Fixed
- Fix 0-based ordered lists starting at 1 instead of 0 (#74)
- Fixed 0-based ordered lists starting at 1 instead of 0 (#74)
- Fixed errors parsing multi-byte characters (#78 and #79)

## [0.7.0] - 2015-02-16
### Added
Expand Down
9 changes: 7 additions & 2 deletions src/Cursor.php
Original file line number Diff line number Diff line change
Expand Up @@ -267,14 +267,19 @@ public function isAtEnd()
*/
public function match($regex)
{
$subject = $this->getRemainder();

$matches = array();
if (!preg_match($regex, $this->getRemainder(), $matches, PREG_OFFSET_CAPTURE)) {
if (!preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) {
return null;
}

// PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
$offset = mb_strlen(mb_strcut($subject, 0, $matches[0][1], 'utf-8'), 'utf-8');

// [0][0] contains the matched text
// [0][1] contains the index of that match
$this->advanceBy($matches[0][1] + mb_strlen($matches[0][0], 'utf-8'));
$this->advanceBy($offset + mb_strlen($matches[0][0], 'utf-8'));

return $matches[0][0];
}
Expand Down
2 changes: 1 addition & 1 deletion src/Inline/Parser/BacktickParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public function parse(ContextInterface $context, InlineParserContext $inlineCont

while ($matchingTicks = $cursor->match('/`+/m')) {
if ($matchingTicks === $ticks) {
$code = substr($cursor->getLine(), $previousState->getCurrentPosition(), $cursor->getPosition() - $previousState->getCurrentPosition() - strlen($ticks));
$code = mb_substr($cursor->getLine(), $previousState->getCurrentPosition(), $cursor->getPosition() - $previousState->getCurrentPosition() - strlen($ticks), 'utf-8');
$c = preg_replace('/[ \n]+/', ' ', $code);
$inlineContext->getInlines()->add(new Code(trim($c)));

Expand Down
7 changes: 5 additions & 2 deletions src/Util/RegexHelper.php
Original file line number Diff line number Diff line change
Expand Up @@ -192,12 +192,15 @@ public function getHRuleRegex()
public static function matchAt($regex, $string, $offset)
{
$matches = array();
$string = substr($string, $offset);
$string = mb_substr($string, $offset, mb_strlen($string), 'utf-8');
if (!preg_match($regex, $string, $matches, PREG_OFFSET_CAPTURE)) {
return null;
}

return $offset + $matches[0][1];
// PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
$charPos = mb_strlen(mb_strcut($string, 0, $matches[0][1], 'utf-8'), 'utf-8');

return $offset + $charPos;
}

/**
Expand Down
81 changes: 80 additions & 1 deletion tests/CursorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ public function dataForTestingFirstNonSpaceMethods()
array('foo', 0, 'f'),
array(' foo', 1, 'f'),
array(' foo', 2, 'f'),
array('тест', 0, 'т'),
array(' т', 1, 'т'),
);
}

Expand Down Expand Up @@ -95,6 +97,10 @@ public function dataForGetIndentTest()
array(' foo', 1, 1),
array(' foo', 2, 0),
array(' foo', 3, 0),
array('тест', 0, 0),
array('тест', 1, 0),
array(' тест', 0, 1),
array(' тест', 1, 0),
);
}

Expand All @@ -121,6 +127,12 @@ public function dataForGetCharacterTest()
array('foo', null, 'f'),
array('foo', 0, 'f'),
array('foo', 1, 'o'),
array(' тест ', 0, ' '),
array(' тест ', 1, 'т'),
array(' тест ', 2, 'е'),
array(' тест ', 3, 'с'),
array(' тест ', 4, 'т'),
array(' тест ', 5, ' '),
);
}

Expand All @@ -147,6 +159,7 @@ public function dataForPeekTest()
array('', 99, ''),
array('foo', 0, 'o'),
array('bar', 1, 'r'),
array('тест ', 1, 'с'),
);
}

Expand All @@ -171,6 +184,7 @@ public function dataForIsLineBlankTest()
array(' ', true),
array('foo', false),
array(' foo', false),
array('тест', false),
);
}

Expand Down Expand Up @@ -202,6 +216,12 @@ public function dataForAdvanceTest()
array('foo', 2, 2),
array('foo', 3, 3),
array('foo', 9, 3),
array('тест', 0, 0),
array('тест', 1, 1),
array('тест', 2, 2),
array('тест', 3, 3),
array('тест', 4, 4),
array('тест', 9, 4),
);
}

Expand Down Expand Up @@ -231,6 +251,12 @@ public function dataForAdvanceTestBy()
array('foo', 2, 2),
array('foo', 3, 3),
array('foo', 9, 3),
array('тест', 0, 0),
array('тест', 1, 1),
array('тест', 2, 2),
array('тест', 3, 3),
array('тест', 4, 4),
array('тест', 9, 4),
);
}

Expand Down Expand Up @@ -271,6 +297,13 @@ public function dataForAdvanceWhileMatchesTest()
array('foo', 1, 'o', 2, 2),
array('foo', 1, 'o', 3, 2),
array('foo', 1, 'o', 99, 2),
array('Россия', 0, 'Р', null, 1),
array('Россия', 1, 'Р', null, 0),
array('Россия', 2, 'с', null, 2),
array('Россия', 2, 'с', 0, 0),
array('Россия', 2, 'с', 1, 1),
array('Россия', 2, 'с', 2, 2),
array('Россия', 2, 'с', 3, 2),
);
}

Expand Down Expand Up @@ -300,6 +333,10 @@ public function dataForAdvanceToFirstNonSpaceTest()
array(' ', 2, 0),
array('foo bar', 0, 0),
array('foo bar', 3, 1),
array('foo bar', 4, 0),
array('это тест', 0, 0),
array('это тест', 3, 1),
array('это тест', 4, 0),
array(" \n \n ", 0, 5),
array(" \n \n ", 1, 4),
array(" \n \n ", 2, 3),
Expand Down Expand Up @@ -330,6 +367,9 @@ public function dataForGetRemainderTest()
array(' ', 0, ' '),
array(' ', 0, ' '),
array(' ', 1, ' '),
array('foo bar', 0, 'foo bar'),
array('foo bar', 2, 'o bar'),
array('это тест', 1, 'то тест'),
);
}

Expand Down Expand Up @@ -358,7 +398,46 @@ public function dataForIsAtEndTest()
array('', false, true),
array(' ', 0, false),
array(' ', null, true),
array(' ', 1, true)
array(' ', 1, true),
array('foo', 2, false),
array('foo', 3, true),
array('тест', 4, true),
);
}

/**
* @param string $string
* @param string $regex
* @param int $initialPosition
* @param int $expectedPosition
* @param string $expectedResult
*
* @dataProvider dataForTestMatch
*/
public function testMatch($string, $regex, $initialPosition, $expectedPosition, $expectedResult)
{
$cursor = new Cursor($string);
$cursor->advanceBy($initialPosition);

$result = $cursor->match($regex);

$this->assertEquals($expectedResult, $result);
$this->assertEquals($expectedPosition, $cursor->getPosition());
}

/**
* @return array
*/
public function dataForTestMatch()
{
return array(
array('this is a test', '/[aeiou]s/', 0, 4, 'is'),
array('this is a test', '/[aeiou]s/', 2, 4, 'is'),
array('this is a test', '/[aeiou]s/', 3, 7, 'is'),
array('this is a test', '/[aeiou]s/', 9, 13, 'es'),
array('Это тест', '/т/u', 0, 2, 'т'),
array('Это тест', '/т/u', 1, 2, 'т'),
array('Это тест', '/т/u', 2, 5, 'т'),
);
}
}
52 changes: 52 additions & 0 deletions tests/Inline/Parser/BacktickParserTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<?php

namespace League\CommonMark\Tests\Inline\Parser;

use League\CommonMark\Cursor;
use League\CommonMark\Inline\Element\Code;
use League\CommonMark\InlineParserContext;
use League\CommonMark\Inline\Parser\BacktickParser;

class BacktickParserTest extends \PHPUnit_Framework_TestCase
{
/**
* @param $string
* @param $expectedContents
*
* @dataProvider dataForTestParse
*/
public function testParse($string, $expectedContents)
{
$cursor = new Cursor($string);

// Move to just before the first backtick
$firstBacktickPos = mb_strpos($string, '`', null, 'utf-8');
$cursor->advanceBy($firstBacktickPos);

$inlineContext = new InlineParserContext($cursor);
$contextStub = $this->getMock('League\CommonMark\ContextInterface');

$parser = new BacktickParser();

$parser->parse($contextStub, $inlineContext);

$inlines = $inlineContext->getInlines();
$this->assertCount(1, $inlines);
$this->assertTrue($inlines->first() instanceof Code);
/** @var Code $code */
$code = $inlines->first();
$this->assertEquals($expectedContents, $code->getContent());
}

/**
* @return array
*/
public function dataForTestParse()
{
return array(
array('This is `just` a test', 'just'),
array('Из: твоя `feature` ветка', 'feature'),
array('Из: твоя `тест` ветка', 'тест'),
);
}
}
29 changes: 29 additions & 0 deletions tests/Util/RegexHelperTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -277,4 +277,33 @@ public function testUnescape()
{
$this->assertEquals('foo(and(bar))', RegexHelper::unescape('foo(and\\(bar\\))'));
}

/**
* @param $regex
* @param $string
* @param $offset
* @param $expectedResult
*
* @dataProvider dataForTestMatchAt
*/
public function testMatchAt($regex, $string, $offset, $expectedResult)
{
$this->assertEquals($expectedResult, RegexHelper::matchAt($regex, $string, $offset));
}

/**
* @return array
*/
public function dataForTestMatchAt()
{
return array(
array('/ /', 'foo bar', null, 3),
array('/ /', 'foo bar', 0, 3),
array('/ /', 'foo bar', 1, 3),
array('/ /', 'это тест', null, 3),
array('/ /', 'это тест', 0, 3),
array('/ /', 'это тест', 1, 3),
);
}

}

0 comments on commit 1db80ba

Please sign in to comment.