-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #14 from brevia-ai/feat/import-sitemap-command
New import sitemap command
- Loading branch information
Showing
3 changed files
with
277 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
<?php | ||
declare(strict_types=1); | ||
|
||
/** | ||
* BEdita Brevia plugin | ||
* | ||
* Copyright 2024 Atlas Srl | ||
*/ | ||
namespace Brevia\BEdita\Command; | ||
|
||
use BEdita\Core\Utility\LoggedUser; | ||
use Brevia\BEdita\Client\BreviaClient; | ||
use Brevia\BEdita\Utility\ReadCSVTrait; | ||
use Cake\Command\Command; | ||
use Cake\Console\Arguments; | ||
use Cake\Console\ConsoleIo; | ||
use Cake\Console\ConsoleOptionParser; | ||
use Cake\Log\LogTrait; | ||
use Cake\ORM\Table; | ||
use Cake\Utility\Hash; | ||
|
||
/** | ||
* Import links from sitemap and create links | ||
* | ||
* @property \BEdita\Core\Model\Table\ObjectsTable $Collections | ||
*/ | ||
class ImportSitemapCommand extends Command | ||
{ | ||
use LogTrait; | ||
use ReadCSVTrait; | ||
|
||
/** | ||
* Brevia API client | ||
* | ||
* @var \Brevia\BEdita\Client\BreviaClient | ||
*/ | ||
protected BreviaClient $client; | ||
|
||
/** | ||
* Links Table | ||
* | ||
* @var \Cake\ORM\Table | ||
*/ | ||
protected Table $Links; | ||
|
||
/** | ||
* @inheritDoc | ||
*/ | ||
public $defaultTable = 'Collections'; | ||
|
||
/** | ||
* @inheritDoc | ||
*/ | ||
protected function buildOptionParser(ConsoleOptionParser $parser): ConsoleOptionParser | ||
{ | ||
return $parser->addOption('sitemap', [ | ||
'help' => 'File path or URL of sitemap to import', | ||
'short' => 's', | ||
'required' => true, | ||
]) | ||
->addOption('prefix', [ | ||
'help' => 'Optional path prefix of URLs to import', | ||
'short' => 'p', | ||
'required' => false, | ||
]) | ||
->addOption('collection', [ | ||
'help' => 'Collection used to index (use the unique collection name)', | ||
'short' => 'c', | ||
'required' => true, | ||
]); | ||
} | ||
|
||
/** | ||
* @inheritDoc | ||
*/ | ||
public function initialize(): void | ||
{ | ||
$this->client = new BreviaClient(); | ||
$this->Links = $this->fetchTable('Links'); | ||
} | ||
|
||
/** | ||
* @inheritDoc | ||
*/ | ||
public function execute(Arguments $args, ConsoleIo $io) | ||
{ | ||
$sitemap = $args->getOption('sitemap'); | ||
if (!file_exists($sitemap)) { | ||
$io->abort(sprintf('File not found: %s', $sitemap)); | ||
} | ||
$content = file_get_contents($sitemap); | ||
|
||
$name = $args->getOption('collection'); | ||
$response = $this->client->get('/collections', compact('name')); | ||
$collectionId = Hash::get($response->getJson(), '0.cmetadata.id'); | ||
if (empty($collectionId)) { | ||
$io->abort(sprintf('Collection not found: %s', $name)); | ||
} | ||
$collection = $this->Collections->get($collectionId, ['contain' => ['HasDocuments']]); | ||
$currentUrls = array_filter(array_map(function ($link) { | ||
$link = $link->getTable()->get($link->id); | ||
|
||
return $link->get('url'); | ||
}, | ||
(array)$collection->get('has_documents'))); | ||
$prefix = $args->getOption('prefix'); | ||
|
||
$xml = simplexml_load_string($content); | ||
$json = json_encode($xml); | ||
$data = (array)json_decode($json, true); | ||
$urls = Hash::extract($data, 'url.{n}.loc'); | ||
if (empty($urls)) { | ||
$io->abort('No URLs found in sitemap'); | ||
} | ||
$entities = []; | ||
LoggedUser::setUserAdmin(); | ||
foreach ($urls as $url) { | ||
if ( | ||
in_array($url, $currentUrls) || | ||
in_array(urldecode($url), $currentUrls) || | ||
($prefix && strpos($url, $prefix) !== 0) | ||
) { | ||
continue; | ||
} | ||
$io->info('Adding link: ' . $url); | ||
$data = [ | ||
'status' => 'on', | ||
'title' => $url, | ||
'url' => $url, | ||
'extra' => [ | ||
'brevia' => [ | ||
'metadata' => [ | ||
'type' => 'links', | ||
'url' => $url, | ||
], | ||
'options' => $this->linkOptions($url, (array)$collection->get('link_load_options')), | ||
], | ||
], | ||
]; | ||
$entity = $this->Links->newEntity($data); | ||
$entities[] = $this->Links->saveOrFail($entity); | ||
} | ||
// @phpstan-ignore-next-line | ||
$this->Collections->addRelated($collection, 'has_documents', $entities); | ||
|
||
$io->out('Done. Link added successfully: ' . count($entities)); | ||
|
||
return null; | ||
} | ||
|
||
/** | ||
* Get link options | ||
* | ||
* @param string $url URL | ||
* @param array $linkLoadOptions Link load options | ||
* @return array | ||
*/ | ||
protected function linkOptions(string $url, array $linkLoadOptions): array | ||
{ | ||
$options = array_filter($linkLoadOptions, function ($o) use ($url) { | ||
return $o['url'] === $url; | ||
}); | ||
$selector = Hash::get($options, '0.selector'); | ||
if (!empty($selector)) { | ||
return compact('selector'); | ||
} | ||
$options = array_filter($linkLoadOptions, function ($o) use ($url) { | ||
return strpos($url, $o['url']) === 0; | ||
}); | ||
|
||
return ['selector' => Hash::get($options, '0.selector')]; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
<?php | ||
declare(strict_types=1); | ||
|
||
/** | ||
* BEdita Brevia plugin | ||
* | ||
* Copyright 2023 Atlas Srl | ||
*/ | ||
namespace Brevia\BEdita\Test\TestCase\Command; | ||
|
||
use BEdita\Core\Model\Entity\ObjectEntity; | ||
use Brevia\BEdita\Test\TestMockTrait; | ||
use Cake\Console\TestSuite\ConsoleIntegrationTestTrait; | ||
use Cake\Routing\Router; | ||
use Cake\TestSuite\TestCase; | ||
|
||
/** | ||
* {@see \Brevia\BEdita\Command\ImportSitemapCommand} Test Case | ||
* | ||
* @coversDefaultClass \Brevia\BEdita\Command\ImportSitemapCommand | ||
*/ | ||
class ImportSitemapCommandTest extends TestCase | ||
{ | ||
use ConsoleIntegrationTestTrait; | ||
use TestMockTrait; | ||
|
||
/** | ||
* @inheritDoc | ||
*/ | ||
public function setUp(): void | ||
{ | ||
parent::setUp(); | ||
$this->useCommandRunner(); | ||
Router::reload(); | ||
} | ||
|
||
/** | ||
* Test buildOptionParser method | ||
* | ||
* @return void | ||
* @covers ::buildOptionParser() | ||
*/ | ||
public function testBuildOptionParser(): void | ||
{ | ||
$this->exec('import_sitemap --help'); | ||
$this->assertOutputContains('File path or URL of sitemap to import'); | ||
$this->assertOutputContains('Optional path prefix of URLs to import'); | ||
$this->assertOutputContains('Collection used to index'); | ||
} | ||
|
||
/** | ||
* Test options failure | ||
* | ||
* @return void | ||
* @covers ::initialize() | ||
* @covers ::execute() | ||
*/ | ||
public function testOptionFailure(): void | ||
{ | ||
$this->exec('import_sitemap --sitemap /not/existing/path --collection gustavo'); | ||
$this->assertExitError('File not found: /not/existing/path'); | ||
|
||
$this->mockClientResponse(json_encode([])); | ||
$xmlPath = sprintf('%s/tests/files/sitemap.xml', getcwd()); | ||
$this->exec(sprintf('import_sitemap --sitemap %s --collection gustavo', $xmlPath)); | ||
$this->assertExitError('Collection not found: gustavo'); | ||
|
||
$this->mockClientResponse('[{"cmetadata": {"id":"1"}}]'); | ||
$this->mockTable('Collections', new ObjectEntity()); | ||
$xmlPath = sprintf('%s/tests/files/empty.csv', getcwd()); | ||
$this->exec(sprintf('import_sitemap --sitemap %s --collection gustavo', $xmlPath)); | ||
$this->assertExitError('No URLs found in sitemap'); | ||
} | ||
|
||
/** | ||
* Test command success | ||
* | ||
* @return void | ||
* @covers ::execute() | ||
*/ | ||
public function testCommand(): void | ||
{ | ||
$this->mockTable('Collections', new ObjectEntity()); | ||
$this->mockTable('Links', new ObjectEntity()); | ||
$this->mockClientResponse('[{"cmetadata": {"id":"1"}}]', 200, 3); | ||
$xmlPath = sprintf('%s/tests/files/sitemap.xml', getcwd()); | ||
$this->exec(sprintf('import_sitemap --sitemap %s --collection gustavo', $xmlPath)); | ||
$this->assertExitSuccess('Done'); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml"> | ||
<url> | ||
<loc>https://example.com/home</loc> | ||
<lastmod>2024-04-28T08:02:38+00:00</lastmod> | ||
<changefreq>always</changefreq> | ||
<priority>1.0</priority> | ||
</url> | ||
<url> | ||
<loc>https://example.com/page2</loc> | ||
<lastmod>2024-04-28T08:02:38+00:00</lastmod> | ||
<changefreq>always</changefreq> | ||
<priority>1.0</priority> | ||
</url> | ||
</urlset> |