Skip to content

Commit

Permalink
feat: index tweaks and exclude blank description from search (#277)
Browse files Browse the repository at this point in the history
  • Loading branch information
JasonChong96 authored Jul 20, 2020
1 parent 9bbafb5 commit 5c24975
Show file tree
Hide file tree
Showing 11 changed files with 114 additions and 73 deletions.
68 changes: 38 additions & 30 deletions src/client/components/BaseLayout/BaseLayoutHeader.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,12 @@ const mapDispatchToProps = (dispatch) => ({
logout: () => dispatch(loginActions.logout()),
})

const BaseLayoutHeader = ({ backgroundType, isLoggedIn, logout, hideAuth }) => {
const BaseLayoutHeader = ({
backgroundType,
isLoggedIn,
logout,
hideNavButtons,
}) => {
const isLightItems = backgroundType === 'darkest'
const theme = useTheme()
const isMobileVariant = useMediaQuery(theme.breakpoints.down('sm'))
Expand Down Expand Up @@ -179,33 +184,36 @@ const BaseLayoutHeader = ({ backgroundType, isLoggedIn, logout, hideAuth }) => {
/>
</a>
<span className={classes.rowSpace} />
{headers.map(
(header) =>
(header.public ? !isLoggedIn : isLoggedIn) &&
!header.hidden && (
<Button
href={header.internalLink ? `/#${header.link}` : header.link}
target={header.internalLink ? '' : '_blank'}
color="primary"
size="large"
variant="text"
key={header.text}
className={classes.headerButton}
style={
isMobileVariant && header.mobileOrder
? { order: header.mobileOrder }
: {}
}
>
{isMobileVariant && header.icon && (
<img src={header.icon} alt={header.text} />
)}
{isMobileVariant && header.component}
{!isMobileVariant && header.text}
</Button>
),
)}
{!hideAuth && appBarBtn}
{!hideNavButtons &&
headers.map(
(header) =>
(header.public ? !isLoggedIn : isLoggedIn) &&
!header.hidden && (
<Button
href={
header.internalLink ? `/#${header.link}` : header.link
}
target={header.internalLink ? '' : '_blank'}
color="primary"
size="large"
variant="text"
key={header.text}
className={classes.headerButton}
style={
isMobileVariant && header.mobileOrder
? { order: header.mobileOrder }
: {}
}
>
{isMobileVariant && header.icon && (
<img src={header.icon} alt={header.text} />
)}
{isMobileVariant && header.component}
{!isMobileVariant && header.text}
</Button>
),
)}
{!hideNavButtons && appBarBtn}
</Toolbar>
</AppBar>
</Section>
Expand All @@ -216,11 +224,11 @@ BaseLayoutHeader.propTypes = {
backgroundType: PropTypes.string.isRequired,
isLoggedIn: PropTypes.bool.isRequired,
logout: PropTypes.func.isRequired,
hideAuth: PropTypes.bool,
hideNavButtons: PropTypes.bool,
}

BaseLayoutHeader.defaultProps = {
hideAuth: false,
hideNavButtons: false,
}

export default connect(mapStateToProps, mapDispatchToProps)(BaseLayoutHeader)
8 changes: 4 additions & 4 deletions src/client/components/BaseLayout/index.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ const BaseLayout = ({
headerBackgroundType,
withFooter,
children,
hideAuth,
hideNavButtons,
}) => {
const classes = useStyles()
const path = useLocation().pathname
Expand All @@ -60,7 +60,7 @@ const BaseLayout = ({
{withHeader && (
<BaseLayoutHeader
backgroundType={headerBackgroundType}
hideAuth={hideAuth}
hideNavButtons={hideNavButtons}
/>
)}
<div className={classes.layout}>{children}</div>
Expand All @@ -73,14 +73,14 @@ BaseLayout.propTypes = {
withHeader: PropTypes.bool,
headerBackgroundType: PropTypes.string,
withFooter: PropTypes.bool,
hideAuth: PropTypes.bool,
hideNavButtons: PropTypes.bool,
}

BaseLayout.defaultProps = {
withHeader: true,
headerBackgroundType: 'dark',
withFooter: true,
hideAuth: false,
hideNavButtons: false,
}

export default BaseLayout
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ const useStyles = makeStyles((theme) =>
marginBottom: '76px',
position: 'relative',
right: theme.spacing(5),
zIndex: -1,
},
emptyStateBodyText: {
marginTop: '8px',
Expand Down
2 changes: 1 addition & 1 deletion src/client/components/SearchPage/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ const SearchPage: FunctionComponent<SearchPageProps> = () => {

return (
<div className={classes.root}>
<BaseLayout headerBackgroundType="darkest" hideAuth>
<BaseLayout headerBackgroundType="darkest" hideNavButtons>
<SearchHeader
onQueryChange={onQueryChange}
query={pendingQuery}
Expand Down
6 changes: 6 additions & 0 deletions src/server/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -172,3 +172,9 @@ export const accessEndpoint =
export const dbPoolSize = Number(process.env.DB_POOL_SIZE) || 40

export const sentryDns: string | undefined = process.env.SENTRY_DNS

// Search variables
export const searchShortUrlWeight =
Number(process.env.SEARCH_SHORT_URL_WEIGHT) || 1.0
export const searchDescriptionWeight =
Number(process.env.SEARCH_DESCRIPTION_WEIGHT) || 0.4
4 changes: 4 additions & 0 deletions src/server/db_migrations/20200720_delete_search_index.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
-- This migration script adds the old index for GoSearch
-- The new index will be managed by sequelize
-- This must be run before the deployment of the sequelize managed index.
DROP INDEX IF EXISTS urls_weighted_search_idx;
10 changes: 10 additions & 0 deletions src/server/models/search.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import { StorableUrlState } from '../repositories/enums'

// Warning: This expression and conditions has to be EXACTLY the same as the one used in the index
// or else the index will not be used leading to unnecessarily long query times.
export const urlSearchVector = `
setweight(to_tsvector('english', urls."shortUrl"), 'A') ||
setweight(to_tsvector('english', urls."description"), 'B')
`

export const urlSearchConditions = `urls.state = '${StorableUrlState.Active}' AND urls.description != ''`
17 changes: 17 additions & 0 deletions src/server/models/url.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import { sequelize } from '../util/sequelize'
import { IdType } from '../../types/server/models'
import { DEV_ENV, emailValidator, ogHostname } from '../config'
import { StorableUrlState } from '../repositories/enums'
import { urlSearchVector } from './search'

interface UrlBaseType extends IdType {
readonly shortUrl: string
Expand Down Expand Up @@ -150,6 +151,22 @@ export const Url = <UrlTypeStatic>sequelize.define(
unique: false,
fields: ['userId'],
},
{
name: 'urls_weighted_search_idx',
unique: false,
using: 'GIN',
fields: [
// Type definition on sequelize seems to be inaccurate.
// @ts-ignore
Sequelize.literal(`(${urlSearchVector})`),
],
where: {
state: ACTIVE,
description: {
[Sequelize.Op.ne]: '',
},
},
},
],
},
)
Expand Down
37 changes: 15 additions & 22 deletions src/server/repositories/UrlRepository.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@ import { QueryTypes } from 'sequelize'
import { Url, UrlType } from '../models/url'
import { NotFoundError } from '../util/error'
import { redirectClient } from '../redis'
import { logger, redirectExpiry } from '../config'
import {
logger,
redirectExpiry,
searchDescriptionWeight,
searchShortUrlWeight,
} from '../config'
import { sequelize } from '../util/sequelize'
import { DependencyIds } from '../constants'
import { FileVisibility, S3Interface } from '../services/aws'
Expand All @@ -14,8 +19,10 @@ import { StorableFile, StorableUrl, UrlsPaginated } from './types'
import { StorableUrlState } from './enums'
import { Mapper } from '../mappers/Mapper'
import { SearchResultsSortOrder } from '../../shared/search'
import { urlSearchConditions, urlSearchVector } from '../models/search'

const { Public, Private } = FileVisibility

/**
* A url repository that handles access to the data store of Urls.
* The following implementation uses Sequelize, AWS S3 and Redis.
Expand Down Expand Up @@ -148,23 +155,11 @@ export class UrlRepository implements UrlRepositoryInterface {
) => Promise<UrlsPaginated> = async (query, order, limit, offset) => {
const { tableName } = Url

// Warning: This expression has to be EXACTLY the same as the one used in the index
// or else the index will not be used leading to unnecessarily long query times.
const urlVector = `
setweight(to_tsvector('english', ${tableName}."shortUrl"), 'A') ||
setweight(to_tsvector('english', ${tableName}."description"), 'B')
`
const count = await this.getPlainTextSearchResultsCount(
tableName,
urlVector,
query,
)
const urlVector = urlSearchVector

const rankingAlgorithm = this.getRankingAlgorithm(
order,
urlVector,
tableName,
)
const count = await this.getPlainTextSearchResultsCount(tableName, query)

const rankingAlgorithm = this.getRankingAlgorithm(order, tableName)

const urlsModel = await this.getRelevantUrls(
tableName,
Expand Down Expand Up @@ -254,7 +249,7 @@ export class UrlRepository implements UrlRepositoryInterface {
const rawQuery = `
SELECT ${tableName}.*
FROM ${tableName}, plainto_tsquery($query) query
WHERE query @@ (${urlVector}) AND state = '${StorableUrlState.Active}'
WHERE query @@ (${urlVector}) AND ${urlSearchConditions}
ORDER BY (${rankingAlgorithm}) DESC
LIMIT $limit
OFFSET $offset`
Expand All @@ -273,7 +268,6 @@ export class UrlRepository implements UrlRepositoryInterface {

private getRankingAlgorithm(
order: SearchResultsSortOrder,
urlVector: string,
tableName: string,
) {
let rankingAlgorithm
Expand All @@ -284,7 +278,7 @@ export class UrlRepository implements UrlRepositoryInterface {
// the normalization option that specifies whether and how
// a document's length should impact its rank. It works as a bit mask.
// 1 divides the rank by 1 + the logarithm of the document length
const textRanking = `ts_rank_cd(${urlVector}, query, 1)`
const textRanking = `ts_rank_cd('{0, 0, ${searchDescriptionWeight}, ${searchShortUrlWeight}}',${urlSearchVector}, query, 1)`
rankingAlgorithm = `${textRanking} * log(${tableName}.clicks + 1)`
}
break
Expand All @@ -302,13 +296,12 @@ export class UrlRepository implements UrlRepositoryInterface {

private async getPlainTextSearchResultsCount(
tableName: string,
urlVector: string,
query: string,
) {
const rawCountQuery = `
SELECT count(*)
FROM ${tableName}, plainto_tsquery($query) query
WHERE query @@ (${urlVector}) AND state = '${StorableUrlState.Active}'
WHERE query @@ (${urlSearchVector}) AND ${urlSearchConditions}
`
const [{ count: countString }] = await sequelize.query(rawCountQuery, {
bind: {
Expand Down
2 changes: 2 additions & 0 deletions test/server/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,6 @@ jest.mock('../../src/server/config', () => ({
s3Bucket: 'file-staging.go.gov.sg',
linksToRotate: 'testlink1,testlink2,testlink3',
sentryDns: 'mocksentry.com',
searchShortUrlWeight: 1,
searchDescriptionWeight: 0.4,
}))
32 changes: 16 additions & 16 deletions test/server/respositories/UrlRepository.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,9 @@ describe('UrlRepository tests', () => {
SELECT count(*)
FROM urls, plainto_tsquery($query) query
WHERE query @@ (
setweight(to_tsvector('english', urls."shortUrl"), 'A') ||
setweight(to_tsvector('english', urls."description"), 'B')
) AND state = 'ACTIVE'
setweight(to_tsvector('english', urls."shortUrl"), 'A') ||
setweight(to_tsvector('english', urls."description"), 'B')
) AND urls.state = 'ACTIVE' AND urls.description != ''
`,
{ bind: { query: 'query' }, raw: true, type: QueryTypes.SELECT },
)
Expand All @@ -114,9 +114,9 @@ describe('UrlRepository tests', () => {
SELECT urls.*
FROM urls, plainto_tsquery($query) query
WHERE query @@ (
setweight(to_tsvector('english', urls."shortUrl"), 'A') ||
setweight(to_tsvector('english', urls."description"), 'B')
) AND state = 'ACTIVE'
setweight(to_tsvector('english', urls."shortUrl"), 'A') ||
setweight(to_tsvector('english', urls."description"), 'B')
) AND urls.state = 'ACTIVE' AND urls.description != ''
ORDER BY (urls.clicks) DESC
LIMIT $limit
OFFSET $offset`,
Expand All @@ -142,13 +142,13 @@ describe('UrlRepository tests', () => {
SELECT urls.*
FROM urls, plainto_tsquery($query) query
WHERE query @@ (
setweight(to_tsvector('english', urls."shortUrl"), 'A') ||
setweight(to_tsvector('english', urls."description"), 'B')
) AND state = 'ACTIVE'
ORDER BY (ts_rank_cd(
setweight(to_tsvector('english', urls."shortUrl"), 'A') ||
setweight(to_tsvector('english', urls."description"), 'B')
, query, 1) * log(urls.clicks + 1)) DESC
setweight(to_tsvector('english', urls."shortUrl"), 'A') ||
setweight(to_tsvector('english', urls."description"), 'B')
) AND urls.state = 'ACTIVE' AND urls.description != ''
ORDER BY (ts_rank_cd('{0, 0, 0.4, 1}',
setweight(to_tsvector('english', urls."shortUrl"), 'A') ||
setweight(to_tsvector('english', urls."description"), 'B')
, query, 1) * log(urls.clicks + 1)) DESC
LIMIT $limit
OFFSET $offset`,
{
Expand All @@ -173,9 +173,9 @@ describe('UrlRepository tests', () => {
SELECT urls.*
FROM urls, plainto_tsquery($query) query
WHERE query @@ (
setweight(to_tsvector('english', urls."shortUrl"), 'A') ||
setweight(to_tsvector('english', urls."description"), 'B')
) AND state = 'ACTIVE'
setweight(to_tsvector('english', urls."shortUrl"), 'A') ||
setweight(to_tsvector('english', urls."description"), 'B')
) AND urls.state = 'ACTIVE' AND urls.description != ''
ORDER BY (urls."createdAt") DESC
LIMIT $limit
OFFSET $offset`,
Expand Down

0 comments on commit 5c24975

Please sign in to comment.