Skip to content

Commit

Permalink
Support multi-byte search for query names and descriptions (getredash…
Browse files Browse the repository at this point in the history
…#3908)

* Support multi-byte search for query names and descriptions

* add multi_byte_support_enabled option to organization settings

* add `ilike %...%` to query search conditions when the option is enabled

* Improve description for multi_byte_search_enabled option

Co-Authored-By: Arik Fraimovich <arik@arikfr.com>

* Remove tsvector from search when multi_byte_search_enabled

* Add a multi-byte search test case
  • Loading branch information
sekiyama58 authored and harveyrendell committed Nov 14, 2019
1 parent ab1f746 commit 167cd1c
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 1 deletion.
9 changes: 9 additions & 0 deletions client/app/pages/settings/OrganizationSettings.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,15 @@ class OrganizationSettings extends React.Component {
))}
</Select>
</Form.Item>
<Form.Item label="Multi-byte Search">
<Checkbox
name="multi_byte_search_enabled"
checked={formValues.multi_byte_search_enabled}
onChange={e => this.handleChange('multi_byte_search_enabled', e.target.checked)}
>
Enable multi-byte (Chinese, Japanese, and Korean) search for query names and descriptions (slower)
</Checkbox>
</Form.Item>
<Form.Item label="Feature Flags">
<Checkbox
name="feature_show_permissions_control"
Expand Down
2 changes: 2 additions & 0 deletions redash/handlers/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def get_queries(self, search_term):
self.current_user.group_ids,
self.current_user.id,
include_drafts=True,
multi_byte_search=current_org.get_setting('multi_byte_search_enabled'),
)
else:
results = models.Query.all_queries(
Expand Down Expand Up @@ -256,6 +257,7 @@ def get_queries(self, search_term):
self.current_user.id,
include_drafts=False,
include_archived=True,
multi_byte_search=current_org.get_setting('multi_byte_search_enabled'),
)
else:
return models.Query.all_queries(
Expand Down
13 changes: 12 additions & 1 deletion redash/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,13 +578,24 @@ def outdated_queries(cls):

@classmethod
def search(cls, term, group_ids, user_id=None, include_drafts=False,
limit=None, include_archived=False):
limit=None, include_archived=False, multi_byte_search=False):
all_queries = cls.all_queries(
group_ids,
user_id=user_id,
include_drafts=include_drafts,
include_archived=include_archived,
)

if multi_byte_search:
# Since tsvector doesn't work well with CJK languages, use `ilike` too
pattern = u'%{}%'.format(term)
return all_queries.filter(
or_(
cls.name.ilike(pattern),
cls.description.ilike(pattern)
)
).order_by(Query.id).limit(limit)

# sort the result using the weight as defined in the search vector column
return all_queries.search(term, sort=True).limit(limit)

Expand Down
2 changes: 2 additions & 0 deletions redash/settings/organization.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
TIME_FORMAT = os.environ.get("REDASH_TIME_FORMAT", "HH:mm")
INTEGER_FORMAT = os.environ.get("REDASH_INTEGER_FORMAT", "0,0")
FLOAT_FORMAT = os.environ.get("REDASH_FLOAT_FORMAT", "0,0.00")
MULTI_BYTE_SEARCH_ENABLED = parse_boolean(os.environ.get("MULTI_BYTE_SEARCH_ENABLED", "false"))

JWT_LOGIN_ENABLED = parse_boolean(os.environ.get("REDASH_JWT_LOGIN_ENABLED", "false"))
JWT_AUTH_ISSUER = os.environ.get("REDASH_JWT_AUTH_ISSUER", "")
Expand All @@ -41,6 +42,7 @@
"time_format": TIME_FORMAT,
"integer_format": INTEGER_FORMAT,
"float_format": FLOAT_FORMAT,
"multi_byte_search_enabled": MULTI_BYTE_SEARCH_ENABLED,
"auth_jwt_login_enabled": JWT_LOGIN_ENABLED,
"auth_jwt_auth_issuer": JWT_AUTH_ISSUER,
"auth_jwt_auth_public_certs_url": JWT_AUTH_PUBLIC_CERTS_URL,
Expand Down
11 changes: 11 additions & 0 deletions tests/models/test_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,17 @@ def test_search_finds_in_description(self):
self.assertIn(q2, queries)
self.assertNotIn(q3, queries)

def test_search_finds_in_multi_byte_name_and_description(self):
q1 = self.factory.create_query(name="日本語の名前テスト")
q2 = self.factory.create_query(description=u"日本語の説明文テスト")
q3 = self.factory.create_query(description=u"Testing search")

queries = Query.search(u"テスト", [self.factory.default_group.id], multi_byte_search=True)

self.assertIn(q1, queries)
self.assertIn(q2, queries)
self.assertNotIn(q3, queries)

def test_search_by_id_returns_query(self):
q1 = self.factory.create_query(description=u"Testing search")
q2 = self.factory.create_query(description=u"Testing searching")
Expand Down

0 comments on commit 167cd1c

Please sign in to comment.