Skip to content

Commit

Permalink
Merge pull request #159 from guardian/pf/ap-world-bucket
Browse files Browse the repository at this point in the history
Initial fingerpost-based AP world bucket
  • Loading branch information
bryophyta authored Feb 26, 2025
2 parents 4319828 + 3497837 commit 8a451de
Show file tree
Hide file tree
Showing 7 changed files with 83 additions and 11 deletions.
29 changes: 28 additions & 1 deletion newswires/app/conf/SearchBuckets.scala
Original file line number Diff line number Diff line change
Expand Up @@ -659,10 +659,37 @@ object SearchBuckets {
)
)

// format: off
/**
* Main config table for AP world ('NY:for') bucket in Fip system.
* (nb. 'NY' here is a Fip header, and doesn't seem to stand for New York)
> ; Category Codes
> 2 JC=a* >w4apapi#NY:for
> 2 JC=d* >w4apapi#NY:fea
> 2 JC=e* >w4apapi#NY:fea
> 2 JC=f* >w4apapi#NY:fin
> 2 JC=i* >w4apapi#NY:for
> 2 JC=s* >w4apapi#NY:spt
> 2 JC=t* >w4apapi#NY:fea
> 2 JC=w* >w4apapi#NY:for
> ; Default
> 2 JC=* >w4apapi#NY:for
* The fingerpost system runs top to bottom, and '>' tells it to stop once it finds a match, so an item with
* category code 'JC:ae' would be bucketed as 'NY:for' and not 'NY:fea', and an item with category code 'JC:ew'
* would be bucketed as 'NY:fea' rather than 'NY:for'.
* We're inclined to exclude sports, entertainment, finance, and technology news from this bucket instead, even
* if they have e.g. code 'a' (US news) code, because they're likely to be less relevant to International desk.
* However, we should remain open to changing this in response to user feedback.
*/
// format: on
private val ApWorld = SearchParams(
text = None,
suppliersIncl = List("AP"),
keywordIncl = List("World news"),
subjectsIncl = Nil
categoryCodesIncl = List("apCat:i", "apCat:a", "apCat:w"),
categoryCodesExcl = List("apCat:s", "apCat:e", "apCat:f")
)

private val ReutersWorld = SearchParams(
Expand Down
6 changes: 5 additions & 1 deletion newswires/app/controllers/QueryController.scala
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ class QueryController(
suppliers: List[String],
subjects: List[String],
subjectsExcl: List[String],
categoryCode: List[String],
categoryCodeExcl: List[String],
maybeBeforeId: Option[Int],
maybeSinceId: Option[Int]
): Action[AnyContent] = apiAuthAction { request: UserRequest[AnyContent] =>
Expand All @@ -57,7 +59,9 @@ class QueryController(
.map(_.toList)
.getOrElse(Nil) ++ suppliersToExcludeByDefault,
subjectsIncl = subjects,
subjectsExcl = subjectsExcl
subjectsExcl = subjectsExcl,
categoryCodesIncl = categoryCode,
categoryCodesExcl = categoryCodeExcl
)

val mergedParams = bucket.map(_ merge queryParams).getOrElse(queryParams)
Expand Down
41 changes: 35 additions & 6 deletions newswires/app/db/FingerpostWireEntry.scala
Original file line number Diff line number Diff line change
Expand Up @@ -258,20 +258,49 @@ object FingerpostWireEntry
)
}

// grr annoying but broadly I think subjects and keywords are the same "axis" to search on
val keywordsOrSubjectsQuery = (keywordsQuery, subjectsQuery) match {
case (Some(kwq), Some(subq)) => Some(sqls"$kwq OR $subq")
case _ => keywordsQuery orElse subjectsQuery
val categoryCodesInclQuery = search.categoryCodesIncl match {
case Nil => None
case categoryCodes =>
Some(
sqls"${syn.categoryCodes} && ${textArray(categoryCodes)}"
)
}

val categoryCodesExclQuery = search.categoryCodesExcl match {
case Nil => None
case categoryCodesExcl =>
val cce = this.syntax("categoryCodesExcl")
val doesContainCategoryCodes =
sqls"${cce.categoryCodes} && ${textArray(categoryCodesExcl)}"

Some(
sqls"""|NOT EXISTS (
| SELECT FROM ${FingerpostWireEntry as cce}
| WHERE ${syn.id} = ${cce.id}
| AND $doesContainCategoryCodes
|)""".stripMargin
)
}

// grr annoying but broadly I think subjects(/categoryCodes) and keywords are the same "axis" to search on
val clausesJoinedWithOr =
List(keywordsQuery, subjectsQuery, categoryCodesInclQuery).flatten match {
case Nil => None
case clauses =>
Some(sqls.joinWithOr(clauses: _*))
}

val commonWhereClauses = List(
keywordsOrSubjectsQuery,
clausesJoinedWithOr,
keywordsExclQuery,
subjectsExclQuery,
search.text.map(query =>
sqls"websearch_to_tsquery('english', $query) @@ ${FingerpostWireEntry.syn.column("combined_textsearch")}"
),
sourceFeedsQuery,
sourceFeedsExclQuery
sourceFeedsExclQuery,
categoryCodesInclQuery,
categoryCodesExclQuery
).flatten

val dataOnlyWhereClauses = List(
Expand Down
8 changes: 6 additions & 2 deletions newswires/app/db/SearchParams.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ case class SearchParams(
suppliersIncl: List[String] = Nil,
suppliersExcl: List[String] = Nil,
subjectsIncl: List[String] = Nil,
subjectsExcl: List[String] = Nil
subjectsExcl: List[String] = Nil,
categoryCodesIncl: List[String] = Nil,
categoryCodesExcl: List[String] = Nil
) {
def merge(o: SearchParams): SearchParams = {
val mergedText = (text, o.text) match {
Expand All @@ -21,7 +23,9 @@ case class SearchParams(
suppliersIncl = suppliersIncl ++ o.suppliersIncl,
suppliersExcl = suppliersExcl ++ o.suppliersExcl,
subjectsIncl = subjectsIncl ++ o.subjectsIncl,
subjectsExcl = subjectsExcl ++ o.subjectsExcl
subjectsExcl = subjectsExcl ++ o.subjectsExcl,
categoryCodesIncl = categoryCodesIncl ++ o.categoryCodesIncl,
categoryCodesExcl = categoryCodesExcl ++ o.categoryCodesExcl
)
}
}
2 changes: 2 additions & 0 deletions newswires/client/src/sharedTypes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ export const QuerySchema = z.object({
keywordsExcl: z.ostring(),
subjects: z.array(z.string()).optional(),
subjectsExcl: z.array(z.string()).optional(),
categoryCode: z.array(z.string()).optional(),
categoryCodeExcl: z.array(z.string()).optional(),
bucket: z.ostring(),
});

Expand Down
6 changes: 6 additions & 0 deletions newswires/client/src/urlState.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ export const defaultQuery: Query = {
subjects: [],
subjectsExcl: [],
bucket: undefined,
categoryCode: [],
categoryCodeExcl: [],
};

export const defaultConfig: Config = Object.freeze({
Expand All @@ -31,6 +33,8 @@ export function urlToConfig(location: {
const keywordsExcl = urlSearchParams.get('keywordsExcl') ?? undefined;
const subjects = urlSearchParams.getAll('subjects');
const subjectsExcl = urlSearchParams.getAll('subjectsExcl');
const categoryCode = urlSearchParams.getAll('categoryCode');
const categoryCodeExcl = urlSearchParams.getAll('categoryCodeExcl');
const bucket = urlSearchParams.get('bucket') ?? undefined;
const query: Query = {
q:
Expand All @@ -43,6 +47,8 @@ export function urlToConfig(location: {
keywordsExcl,
subjects,
subjectsExcl,
categoryCode,
categoryCodeExcl,
bucket,
};

Expand Down
2 changes: 1 addition & 1 deletion newswires/conf/routes
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
GET / controllers.ViteController.index()
GET /feed controllers.ViteController.index()
GET /item/*id controllers.ViteController.item(id: String)
GET /api/search controllers.QueryController.query(q: Option[String], keywords: Option[String], supplier: List[String], subjects: List[String], subjectsExcl: List[String], beforeId: Option[Int], sinceId: Option[Int])
GET /api/search controllers.QueryController.query(q: Option[String], keywords: Option[String], supplier: List[String], subjects: List[String], subjectsExcl: List[String], categoryCode: List[String], categoryCodeExcl: List[String], beforeId: Option[Int], sinceId: Option[Int])
GET /api/keywords controllers.QueryController.keywords(inLastHours: Option[Int], limit:Option[Int])
GET /api/item/:id controllers.QueryController.item(id: Int, q: Option[String])
PUT /api/item/:id/composerId controllers.QueryController.linkToComposer(id: Int)
Expand Down

0 comments on commit 8a451de

Please sign in to comment.