From 9fa3b24f1e505cbbf572b05614632f1c883fdf96 Mon Sep 17 00:00:00 2001 From: Pete F Date: Thu, 13 Feb 2025 10:34:23 +0000 Subject: [PATCH 1/2] Initial fingerpost-based AP world bucket --- newswires/app/conf/SearchBuckets.scala | 29 ++++++++++++- .../app/controllers/QueryController.scala | 6 ++- newswires/app/db/FingerpostWireEntry.scala | 41 ++++++++++++++++--- newswires/app/db/SearchParams.scala | 8 +++- .../client/src/context/SearchContext.tsx | 1 - newswires/client/src/sharedTypes.ts | 2 + newswires/client/src/urlState.ts | 6 +++ newswires/conf/routes | 2 +- 8 files changed, 83 insertions(+), 12 deletions(-) diff --git a/newswires/app/conf/SearchBuckets.scala b/newswires/app/conf/SearchBuckets.scala index f012c1ef..08749a34 100644 --- a/newswires/app/conf/SearchBuckets.scala +++ b/newswires/app/conf/SearchBuckets.scala @@ -659,10 +659,37 @@ object SearchBuckets { ) ) + // format: off + /** + * Main config table for AP world ('NY:for') bucket in Fip system. + * (nb. 'NY' here is a Fip header, and doesn't seem to stand for New York) + + > ; Category Codes + > 2 JC=a* >w4apapi#NY:for + > 2 JC=d* >w4apapi#NY:fea + > 2 JC=e* >w4apapi#NY:fea + > 2 JC=f* >w4apapi#NY:fin + > 2 JC=i* >w4apapi#NY:for + > 2 JC=s* >w4apapi#NY:spt + > 2 JC=t* >w4apapi#NY:fea + > 2 JC=w* >w4apapi#NY:for + > ; Default + > 2 JC=* >w4apapi#NY:for + + * The fingerpost system runs top to bottom, and '>' tells it to stop once it finds a match, so an item with + * category code 'JC:ae' would be bucketed as 'NY:for' and not 'NY:fea', and an item with category code 'JC:ew' + * would be bucketed as 'NY:fea' rather than 'NY:for'. + * We're inclined to exclude sports, entertainment, finance, and technology news from this bucket instead, even + * if they have e.g. code 'a' (US news) code, because they're likely to be less relevant to International desk. + * However, we should remain open to changing this in response to user feedback. + */ + // format: on private val ApWorld = SearchParams( text = None, + suppliersIncl = List("AP"), keywordIncl = List("World news"), - subjectsIncl = Nil + categoryCodesIncl = List("apCat:i", "apCat:a", "apCat:w"), + categoryCodesExcl = List("apCat:s", "apCat:e", "apCat:f") ) private val ReutersWorld = SearchParams( diff --git a/newswires/app/controllers/QueryController.scala b/newswires/app/controllers/QueryController.scala index 505af762..4e16bc87 100644 --- a/newswires/app/controllers/QueryController.scala +++ b/newswires/app/controllers/QueryController.scala @@ -37,6 +37,8 @@ class QueryController( suppliers: List[String], subjects: List[String], subjectsExcl: List[String], + categoryCode: List[String], + categoryCodeExcl: List[String], maybeBeforeId: Option[Int], maybeSinceId: Option[Int] ): Action[AnyContent] = apiAuthAction { request: UserRequest[AnyContent] => @@ -50,7 +52,9 @@ class QueryController( suppliersExcl = request.queryString.get("supplierExcl").map(_.toList).getOrElse(Nil), subjectsIncl = subjects, - subjectsExcl = subjectsExcl + subjectsExcl = subjectsExcl, + categoryCodesIncl = categoryCode, + categoryCodesExcl = categoryCodeExcl ) val mergedParams = bucket.map(_ merge queryParams).getOrElse(queryParams) diff --git a/newswires/app/db/FingerpostWireEntry.scala b/newswires/app/db/FingerpostWireEntry.scala index b6dfe961..d405a175 100644 --- a/newswires/app/db/FingerpostWireEntry.scala +++ b/newswires/app/db/FingerpostWireEntry.scala @@ -258,20 +258,49 @@ object FingerpostWireEntry ) } - // grr annoying but broadly I think subjects and keywords are the same "axis" to search on - val keywordsOrSubjectsQuery = (keywordsQuery, subjectsQuery) match { - case (Some(kwq), Some(subq)) => Some(sqls"$kwq OR $subq") - case _ => keywordsQuery orElse subjectsQuery + val categoryCodesInclQuery = search.categoryCodesIncl match { + case Nil => None + case categoryCodes => + Some( + sqls"${syn.categoryCodes} && array[${categoryCodes.map(code => sqls"$code")}]::text[]" + ) + } + + val categoryCodesExclQuery = search.categoryCodesExcl match { + case Nil => None + case categoryCodesExcl => + val cce = this.syntax("categoryCodesExcl") + val doesContainCategoryCodes = + sqls"${cce.categoryCodes} && array[${categoryCodesExcl.map(code => sqls"$code")}]::text[]" + + Some( + sqls"""|NOT EXISTS ( + | SELECT FROM ${FingerpostWireEntry as cce} + | WHERE ${syn.id} = ${cce.id} + | AND $doesContainCategoryCodes + |)""".stripMargin + ) } + + // grr annoying but broadly I think subjects(/categoryCodes) and keywords are the same "axis" to search on + val clausesJoinedWithOr = + List(keywordsQuery, subjectsQuery, categoryCodesInclQuery).flatten match { + case Nil => None + case clauses => + Some(sqls.joinWithOr(clauses: _*)) + } + val commonWhereClauses = List( - keywordsOrSubjectsQuery, + clausesJoinedWithOr, keywordsExclQuery, subjectsExclQuery, search.text.map(query => sqls"websearch_to_tsquery('english', $query) @@ ${FingerpostWireEntry.syn.column("combined_textsearch")}" ), sourceFeedsQuery, - sourceFeedsExclQuery + sourceFeedsExclQuery, + categoryCodesInclQuery, + categoryCodesExclQuery ).flatten val dataOnlyWhereClauses = List( diff --git a/newswires/app/db/SearchParams.scala b/newswires/app/db/SearchParams.scala index 80a70656..e508423b 100644 --- a/newswires/app/db/SearchParams.scala +++ b/newswires/app/db/SearchParams.scala @@ -7,7 +7,9 @@ case class SearchParams( suppliersIncl: List[String] = Nil, suppliersExcl: List[String] = Nil, subjectsIncl: List[String] = Nil, - subjectsExcl: List[String] = Nil + subjectsExcl: List[String] = Nil, + categoryCodesIncl: List[String] = Nil, + categoryCodesExcl: List[String] = Nil ) { def merge(o: SearchParams): SearchParams = { val mergedText = (text, o.text) match { @@ -21,7 +23,9 @@ case class SearchParams( suppliersIncl = suppliersIncl ++ o.suppliersIncl, suppliersExcl = suppliersExcl ++ o.suppliersExcl, subjectsIncl = subjectsIncl ++ o.subjectsIncl, - subjectsExcl = subjectsExcl ++ o.subjectsExcl + subjectsExcl = subjectsExcl ++ o.subjectsExcl, + categoryCodesIncl = categoryCodesIncl ++ o.categoryCodesIncl, + categoryCodesExcl = categoryCodesExcl ++ o.categoryCodesExcl ) } } diff --git a/newswires/client/src/context/SearchContext.tsx b/newswires/client/src/context/SearchContext.tsx index 69a0388c..a6e0198f 100644 --- a/newswires/client/src/context/SearchContext.tsx +++ b/newswires/client/src/context/SearchContext.tsx @@ -5,7 +5,6 @@ import { useContext, useEffect, useReducer, - useRef, useState, } from 'react'; import { z } from 'zod'; diff --git a/newswires/client/src/sharedTypes.ts b/newswires/client/src/sharedTypes.ts index 1e32ceb6..0c6e4c6d 100644 --- a/newswires/client/src/sharedTypes.ts +++ b/newswires/client/src/sharedTypes.ts @@ -58,6 +58,8 @@ export const QuerySchema = z.object({ keywordsExcl: z.ostring(), subjects: z.array(z.string()).optional(), subjectsExcl: z.array(z.string()).optional(), + categoryCode: z.array(z.string()).optional(), + categoryCodeExcl: z.array(z.string()).optional(), bucket: z.ostring(), }); diff --git a/newswires/client/src/urlState.ts b/newswires/client/src/urlState.ts index 4863e1ba..07061633 100644 --- a/newswires/client/src/urlState.ts +++ b/newswires/client/src/urlState.ts @@ -9,6 +9,8 @@ export const defaultQuery: Query = { subjects: [], subjectsExcl: [], bucket: undefined, + categoryCode: [], + categoryCodeExcl: [], }; export const defaultConfig: Config = Object.freeze({ @@ -31,6 +33,8 @@ export function urlToConfig(location: { const keywordsExcl = urlSearchParams.get('keywordsExcl') ?? undefined; const subjects = urlSearchParams.getAll('subjects'); const subjectsExcl = urlSearchParams.getAll('subjectsExcl'); + const categoryCode = urlSearchParams.getAll('categoryCode'); + const categoryCodeExcl = urlSearchParams.getAll('categoryCodeExcl'); const bucket = urlSearchParams.get('bucket') ?? undefined; const query: Query = { q: @@ -43,6 +47,8 @@ export function urlToConfig(location: { keywordsExcl, subjects, subjectsExcl, + categoryCode, + categoryCodeExcl, bucket, }; diff --git a/newswires/conf/routes b/newswires/conf/routes index 00289bcf..e692e8af 100644 --- a/newswires/conf/routes +++ b/newswires/conf/routes @@ -7,7 +7,7 @@ GET / controllers.ViteController.index() GET /feed controllers.ViteController.index() GET /item/*id controllers.ViteController.item(id: String) -GET /api/search controllers.QueryController.query(q: Option[String], keywords: Option[String], supplier: List[String], subjects: List[String], subjectsExcl: List[String], beforeId: Option[Int], sinceId: Option[Int]) +GET /api/search controllers.QueryController.query(q: Option[String], keywords: Option[String], supplier: List[String], subjects: List[String], subjectsExcl: List[String], categoryCode: List[String], categoryCodeExcl: List[String], beforeId: Option[Int], sinceId: Option[Int]) GET /api/keywords controllers.QueryController.keywords(inLastHours: Option[Int], limit:Option[Int]) GET /api/item/:id controllers.QueryController.item(id: Int, q: Option[String]) PUT /api/item/:id/composerId controllers.QueryController.linkToComposer(id: Int) From 34978376547106e03396b1976d1aa7225a17164f Mon Sep 17 00:00:00 2001 From: Pete F Date: Mon, 24 Feb 2025 14:21:38 +0000 Subject: [PATCH 2/2] Refactor: use textArray helper --- newswires/app/db/FingerpostWireEntry.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/newswires/app/db/FingerpostWireEntry.scala b/newswires/app/db/FingerpostWireEntry.scala index d405a175..df639669 100644 --- a/newswires/app/db/FingerpostWireEntry.scala +++ b/newswires/app/db/FingerpostWireEntry.scala @@ -262,7 +262,7 @@ object FingerpostWireEntry case Nil => None case categoryCodes => Some( - sqls"${syn.categoryCodes} && array[${categoryCodes.map(code => sqls"$code")}]::text[]" + sqls"${syn.categoryCodes} && ${textArray(categoryCodes)}" ) } @@ -271,7 +271,7 @@ object FingerpostWireEntry case categoryCodesExcl => val cce = this.syntax("categoryCodesExcl") val doesContainCategoryCodes = - sqls"${cce.categoryCodes} && array[${categoryCodesExcl.map(code => sqls"$code")}]::text[]" + sqls"${cce.categoryCodes} && ${textArray(categoryCodesExcl)}" Some( sqls"""|NOT EXISTS (