Skip to content

Commit

Permalink
Add Arabic search support
Browse files Browse the repository at this point in the history
  • Loading branch information
abdnh committed Nov 8, 2021
1 parent e440094 commit 1d537b5
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 5 deletions.
3 changes: 1 addition & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ tokio = { version = "0.2.18", features = ["macros"], optional = true }
warp = { version = "0.2.2", default-features = false, features = ["websocket"], optional = true }

# Search feature
elasticlunr-rs = { version = "2.3", optional = true, default-features = false }
elasticlunr-rs = { version = "2.3", optional = true, default-features = false, features = ["ar"], git = "https://github.com/abdnh/elasticlunr-rs", branch = "arabic"}
ammonia = { version = "3", optional = true }

[dev-dependencies]
Expand Down
6 changes: 4 additions & 2 deletions src/renderer/html_handlebars/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::borrow::Cow;
use std::collections::{HashMap, HashSet};
use std::path::Path;

use elasticlunr::Index;
use elasticlunr::{Index, Language};
use pulldown_cmark::*;

use crate::book::{Book, BookItem};
Expand All @@ -13,7 +13,8 @@ use crate::utils;

/// Creates all files required for search.
pub fn create_files(search_config: &Search, destination: &Path, book: &Book) -> Result<()> {
let mut index = Index::new(&["title", "body", "breadcrumbs"]);
// let mut index = Index::new(&["title", "body", "breadcrumbs"]);
let mut index = Index::with_language(Language::Arabic, &["title", "body", "breadcrumbs"]);
let mut doc_urls = Vec::with_capacity(book.sections.len());

for item in book.iter() {
Expand All @@ -36,6 +37,7 @@ pub fn create_files(search_config: &Search, destination: &Path, book: &Book) ->
utils::fs::write_file(destination, "searcher.js", searcher::JS)?;
utils::fs::write_file(destination, "mark.min.js", searcher::MARK_JS)?;
utils::fs::write_file(destination, "elasticlunr.min.js", searcher::ELASTICLUNR_JS)?;
utils::fs::write_file(destination, "lunr.ar.js", searcher::LUNR_AR_JS)?;
debug!("Copying search files ✓");
}

Expand Down
1 change: 1 addition & 0 deletions src/theme/index.hbs
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@

{{#if search_js}}
<script src="{{ path_to_root }}elasticlunr.min.js" type="text/javascript" charset="utf-8"></script>
<script src="{{ path_to_root }}lunr.ar.js" type="text/javascript" charset="utf-8"></script>
<script src="{{ path_to_root }}mark.min.js" type="text/javascript" charset="utf-8"></script>
<script src="{{ path_to_root }}searcher.js" type="text/javascript" charset="utf-8"></script>
{{/if}}
Expand Down
97 changes: 97 additions & 0 deletions src/theme/searcher/lunr.ar.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/*!
* Simple Arabic stemmer based on lunr.ar.js from https://github.com/MihaiValentin/lunr-languages
*
* Copyright 2018, Dalia Al-Shahrabi
* http://www.mozilla.org/MPL/
*/
/*!
*/

/**
* export the module via AMD, CommonJS or as a browser global
* Export code from https://github.com/umdjs/umd/blob/master/returnExports.js
*/
;
(function(root, factory) {
if (typeof define === 'function' && define.amd) {
// AMD. Register as an anonymous module.
define(factory)
} else if (typeof exports === 'object') {
/**
* Node. Does not work with strict CommonJS, but
* only CommonJS-like environments that support module.exports,
* like Node.
*/
module.exports = factory()
} else {
// Browser globals (root is window)
factory()(root.lunr);
}
}(this, function() {
/**
* Just return a value to define the module export.
* This example returns an object, but the module
* can return a function as the exported value.
*/
return function(lunr) {
/* throw error if lunr is not yet included */
if ('undefined' === typeof lunr) {
throw new Error('Lunr is not present. Please include / require Lunr before this script.');
}

/* register specific locale function */
lunr.ar = function() {
this.pipeline.reset();
this.pipeline.add(
lunr.ar.stemmer
);

// for lunr version 2
// this is necessary so that every searched word is also stemmed before
// in lunr <= 1 this is not needed, as it is done using the normal pipeline
if (this.searchPipeline) {
this.searchPipeline.reset();
this.searchPipeline.add(lunr.ar.stemmer)
}
};

/* lunr stemmer function */
lunr.ar.stemmer = (function() {

/* remove elongating character */
self.removeElongating = function(word) {
return word.replace(/[\u0640]/gi, '');
}

self.removeDiacritics = function(word) {
return word.replace(/[\u064b-\u065b]/gi, '');
}

/*Replace all variations of alef (آأإٱى) to a plain alef (ا)*/
self.cleanAlef = function(word) {
return word.replace(/[\u0622\u0623\u0625\u0671\u0649]/gi, "\u0627");
}

self.execArray = [
'removeElongating',
'removeDiacritics',
'cleanAlef'
];

self.stem = function(word) {
var counter = 0;
while (counter < self.execArray.length) {
word = self[self.execArray[counter]](word);
counter++;
}
return word;
}

return function(word) {
return self.stem(word);
}
})();

lunr.Pipeline.registerFunction(lunr.ar.stemmer, 'stemmer-ar');
};
}))
1 change: 1 addition & 0 deletions src/theme/searcher/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
pub static JS: &[u8] = include_bytes!("searcher.js");
pub static MARK_JS: &[u8] = include_bytes!("mark.min.js");
pub static ELASTICLUNR_JS: &[u8] = include_bytes!("elasticlunr.min.js");
pub static LUNR_AR_JS: &[u8] = include_bytes!("lunr.ar.js");

0 comments on commit 1d537b5

Please sign in to comment.