From 30e04363599a5874095ead6f1a49fb36d9a23544 Mon Sep 17 00:00:00 2001 From: dwithana Date: Wed, 29 May 2024 16:47:58 -0400 Subject: [PATCH 1/5] Parse search service from manifest, [WIP]: parse response --- public/manifests/dev/search-response.js | 315 ++++++++++++++++++++++++ src/components/Transcript/Transcript.js | 22 +- src/context/manifest-context.js | 2 +- src/services/iiif-parser.js | 14 +- src/services/search.js | 51 +++- src/services/transcript-parser.js | 23 ++ src/services/utility-helpers.js | 14 +- src/services/utility-helpers.test.js | 16 ++ 8 files changed, 434 insertions(+), 23 deletions(-) create mode 100644 public/manifests/dev/search-response.js diff --git a/public/manifests/dev/search-response.js b/public/manifests/dev/search-response.js new file mode 100644 index 00000000..9fbfa893 --- /dev/null +++ b/public/manifests/dev/search-response.js @@ -0,0 +1,315 @@ +export default { + "@context": "http://iiif.io/api/search/2/context.json", + "id": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/search?q=%22Mr.%20Bungle%22", + "type": "AnnotationPage", + "items": [ + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/2bc45ce5-03ff-43a1-a46c-6f0d4ee3bb43", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "In the puppet show, \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e came to the boys' room on his way to lunch.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:00:36.100,00:00:41.300" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/c1ba21f8-6238-4520-88b0-0c16477b03de", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "But \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e didn't stop to wash his hands or comb his hair.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:00:46.300,00:00:51.100" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/c8b79c82-1fae-4804-b281-a4f65747bc32", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "Then, instead of getting into line at the lunchroom, \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e pushed everyone aside and went right to the front.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:00:57.900,00:01:05.700" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/f3ce4814-2b1f-4322-9c3f-a4ed67488bb6", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "Then, in the lunchroom, \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e was so clumsy and impolite that he knocked over everything. And no one wanted to sit next to him.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:01:11.900,00:01:22.000" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/37cfcfe2-c6fa-42eb-9fe2-622fdb500c16", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "The children knew that even though \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e was funny to watch, he wouldn't be much fun to eat with.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:01:30.300,00:01:36.300" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/923d96ea-cbb3-40c3-80ca-5a60dfc6ece5", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "Phil knew that a \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e wouldn't have many friends. He wouldn't want to be like \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:01:36.400,00:01:42.500" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/2bcb3b8c-8bb8-4fc8-b777-e9b1ae07d0ba", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "No, they were a little dirty. Phil remembered that \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e didn't wash his hands.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:02:16.500,00:02:22.800" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/84daa40a-9456-471e-a018-9b32d1f296cd", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "Phil didn't want to be like \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:02:25.800,00:02:28.000" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/299203e1-81f4-4c0b-a51c-cc18db3eead0", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "But Phil didn't want to break into line as \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e did.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:03:29.600,00:03:33.100" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/7c31f978-485d-47f5-a696-533fbceaa6c0", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "Phil had good manners. He didn't want to be like \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e in the lunchroom.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:04:56.700,00:05:01.000" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/394354a1-68c9-40fe-b67d-21b711834da4", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "She wouldn't smile at a \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:05:17.100,00:05:19.400" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/d9550279-03ba-4b95-bb73-17931c727a67", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "He knew his friends would like a noisy \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e at their table.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:05:30.100,00:05:33.600" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/1bb31753-f823-438d-b5d0-dc41f73fd122", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "Phil would rather be like Freddy than like \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:06:18.200,00:06:21.500" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/c71681d7-8775-4db1-8a7f-529b6b184caa", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "You shouldn't run in the lunchroom. Only \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e would do that.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:06:38.100,00:06:42.300" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/327fad42-fa37-45e9-a5c4-4e12a91cc2d5", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "Phil and his friends wouldn't like to have a \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e at their table.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:06:45.800,00:06:49.400" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/3ecd0a89-31b6-4239-93ac-2bdd62bbcc5c", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "Only a \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e would eat his dessert before he'd finished the rest of his lunch.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:07:03.900,00:07:07.900" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/a69cd5a2-3749-4183-bac8-6845f9b68152", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "And Phil wan't a \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:07:08.200,00:07:10.200" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/b69d9166-9afd-496d-9d83-b84e02e46f03", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "Phil thought a \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e must have sat there.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:07:53.900,00:07:56.500" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/964a023b-03f1-4b0e-8002-dc684f54debc", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "But Phil didn't want to be like \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e so he put his chair neatly into place.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:07:58.200,00:08:04.500" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/46774d36-af38-46b9-b063-6b58c2905e0a", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "No \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e sat here!", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:08:17.000,00:08:19.500" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/65f227bc-dada-454f-bc9c-6fc0ac1c8684", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "Phil was certain that \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e wouldn't put his paper in the waste basket and his empty carton on the milk tray.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:08:31.900,00:08:37.700" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/91d29499-75ca-4ac7-a027-cfe8ab9404b2", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "\u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e probably wouldn't bother to put his lunch tray in the right place either.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:08:38.300,00:08:42.600" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/c7b38639-4fad-49f3-b6d4-b3ccd9833ffe", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "No one here was a \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e. And no one wanted to be.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:08:57.400,00:09:02.200" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/649ab34c-c4a6-445a-a106-421b98dff62b", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "Are you like \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e? \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e is ashamed because he spoils lunchtime.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:09:02.800,00:09:09.200" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/05aedec4-9b3d-43d4-97d4-c8a1587fe6d8", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "Don't be like \u003cem\u003eMr\u003c/em\u003e. \u003cem\u003eBungle\u003c/em\u003e.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:09:10.500,00:09:12.000" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/55630159-ca6a-4a51-afbe-2f4c8dab4daa", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "It was called \"Mister \u003cem\u003eBungle\u003c/em\u003e Goes to Lunch\".", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:00:26.700,00:00:31.500" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/4960018b-2a56-4447-91f5-502721f76acf", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "She hoped there weren't any \u003cem\u003eMr\u003c/em\u003e. Bungles in this room.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:01:49.200,00:01:52.500" + }, + { + "id": "https://avalon-dev.dlib.indiana.edu/media_objects/fj236208t/manifest/canvas/ng451h49f/search/80270769-0a10-42a7-92fc-e5d86d7c8272", + "type": "Annotation", + "motivation": "supplementing", + "body": { + "type": "TextualBody", + "value": "\u003cem\u003eMr\u003c/em\u003e. Bungle's hair was messy too.", + "format": "text/plain" + }, + "target": "https://avalon-dev.dlib.indiana.edu/master_files/ng451h49f/supplemental_files/309/transcripts#t=00:02:22.900,00:02:25.600" + } + ] +}; diff --git a/src/components/Transcript/Transcript.js b/src/components/Transcript/Transcript.js index eef49e5a..66c25f32 100644 --- a/src/components/Transcript/Transcript.js +++ b/src/components/Transcript/Transcript.js @@ -2,7 +2,6 @@ import React from 'react'; import cx from 'classnames'; import PropTypes from 'prop-types'; import throttle from 'lodash/throttle'; -import { timeToHHmmss } from '@Services/utility-helpers'; import { readSupplementingAnnotations, parseTranscriptData, @@ -11,7 +10,8 @@ import { TRANSCRIPT_CUE_TYPES, } from '@Services/transcript-parser'; import TranscriptMenu from './TranscriptMenu/TranscriptMenu'; -import { useFilteredTranscripts, useFocusedMatch, useSearchOpts } from '../../services/search'; +import { useFilteredTranscripts, useFocusedMatch, useSearchOpts } from '@Services/search'; +import { timeToHHmmss } from '@Services/utility-helpers'; import './Transcript.scss'; const NO_TRANSCRIPTS_MSG = 'No valid Transcript(s) found, please check again.'; @@ -245,10 +245,19 @@ const Transcript = ({ playerID, manifestUrl, search = {}, transcripts = [] }) => }); const [searchQuery, setSearchQuery] = React.useState(initialSearchQuery); + const [_canvasIndex, _setCanvasIndex] = React.useState(-1); + const canvasIndexRef = React.useRef(_canvasIndex); + const setCanvasIndex = (c) => { + abortController.abort(); + canvasIndexRef.current = c; + _setCanvasIndex(c); // force re-render + }; + const searchResults = useFilteredTranscripts({ ...searchOpts, query: searchQuery, - transcripts: transcript + transcripts: transcript, + canvasIndex: canvasIndexRef.current }); const { focusedMatchId, setFocusedMatchId, focusedMatchIndex, setFocusedMatchIndex } = useFocusedMatch({ searchResults }); @@ -262,13 +271,6 @@ const Transcript = ({ playerID, manifestUrl, search = {}, transcripts = [] }) => }; const abortController = new AbortController(); - const [_canvasIndex, _setCanvasIndex] = React.useState(-1); - const canvasIndexRef = React.useRef(_canvasIndex); - const setCanvasIndex = (c) => { - abortController.abort(); - canvasIndexRef.current = c; - _setCanvasIndex(c); // force re-render - }; const playerIntervalRef = React.useRef(null); const playerRef = React.useRef(null); diff --git a/src/context/manifest-context.js b/src/context/manifest-context.js index 6cfa4911..bffddf98 100644 --- a/src/context/manifest-context.js +++ b/src/context/manifest-context.js @@ -1,6 +1,6 @@ import React from 'react'; -const ManifestStateContext = React.createContext(); +export const ManifestStateContext = React.createContext(); const ManifestDispatchContext = React.createContext(); /** diff --git a/src/services/iiif-parser.js b/src/services/iiif-parser.js index d1b13bc4..30ad0f52 100644 --- a/src/services/iiif-parser.js +++ b/src/services/iiif-parser.js @@ -1,10 +1,9 @@ -import { LabelValuePair, parseManifest, PropertyValue } from 'manifesto.js'; +import { parseManifest, PropertyValue } from 'manifesto.js'; import mimeDb from 'mime-db'; import sanitizeHtml from 'sanitize-html'; import { GENERIC_EMPTY_MANIFEST_MESSAGE, GENERIC_ERROR_MESSAGE, - checkSrcRange, getAnnotations, getLabelValue, getMediaFragment, @@ -717,3 +716,14 @@ export function getStructureRanges(manifest, isPlaylist = false) { return { structures, timespans, markRoot }; } } + +export function getSearchService(manifest, canvasIndex) { + let canvases = parseSequences(manifest)[0].getCanvases(); + if (canvases !== undefined && canvases[canvasIndex] != undefined) { + const canvas = canvases[canvasIndex]; + const searchService = canvas.getServices()?.length > 0 + ? canvas.getServices()[0].id + : null; + return searchService; + } +} diff --git a/src/services/search.js b/src/services/search.js index cb5e0bbe..4ceeb1f5 100644 --- a/src/services/search.js +++ b/src/services/search.js @@ -1,5 +1,9 @@ import { useRef, useEffect, useState, useMemo, useCallback, useContext } from 'react'; import { PlayerStateContext, PlayerDispatchContext } from '../context/player-context'; +import { ManifestStateContext } from '../context/manifest-context'; +import { getSearchService } from './iiif-parser'; +import { parseSearchResponse } from './transcript-parser'; +import searchResponse from '../../public/manifests/dev/search-response'; export const defaultMatcherFactory = (items) => { const mappedItems = items.map(item => item.text.toLocaleLowerCase()); @@ -37,21 +41,25 @@ export const defaultSearchOpts = { matchesOnly: false }; -export const useSearchOpts = (opts) => (opts && opts.isSearchable - ? { ...defaultSearchOpts, ...opts, enabled: true } - : { ...defaultSearchOpts, enabled: false } -); +export const useSearchOpts = (opts) => { + return (opts && opts.isSearchable + ? { ...defaultSearchOpts, ...opts, enabled: true } + : { ...defaultSearchOpts, enabled: false } + ); +}; export function useFilteredTranscripts({ query, sorter = defaultSearchOpts.sorter, enabled = true, transcripts, + canvasIndex, showMarkers = defaultSearchOpts.showMarkers, matchesOnly = defaultSearchOpts.matchesOnly, matcherFactory = defaultSearchOpts.matcherFactory }) { const [searchResults, setSearchResults] = useState({ results: {}, ids: [], matchingIds: [] }); + const [searchService, setSearchService] = useState(); const abortControllerRef = useRef(null); const { matcher, itemsWithIds, itemsIndexed } = useMemo(() => { @@ -65,11 +73,42 @@ export function useFilteredTranscripts({ ...acc, [item.id]: item }), {}); - const matcher = matcherFactory(itemsWithIds); + let matcher = matcherFactory(itemsWithIds); + // if (searchService != null && searchService != undefined) { + // matcher = contentSearchFactory(); + // } return { matcher, itemsWithIds, itemsIndexed }; }, [transcripts, matcherFactory]); const playerDispatch = useContext(PlayerDispatchContext); + const manifestState = useContext(ManifestStateContext); + + let debounceTimer; + + function contentSearchFactory() { + return async (query, abortController) => { + try { + console.log(abortController.signal); + const res = await fetch(`${searchService}?q=${query}`, { + signal: abortController.signal + }); + const json = await res.json(); + return res.results; + } catch (e) { + console.error(e); + return []; + } + }; + }; + + // Parse seachService from the Canvas + useEffect(() => { + const { manifest } = manifestState; + if (manifest) { + let serviceId = getSearchService(manifest, canvasIndex); + setSearchService(serviceId); + } + }, [canvasIndex]); useEffect(() => { if (!itemsWithIds.length) { @@ -99,6 +138,8 @@ export function useFilteredTranscripts({ ...acc, [match.id]: match }), {}); + parseSearchResponse(searchResponse); + console.log(filtered); const sortedMatchIds = sorter([...filtered], true).map(item => item.id); if (matchesOnly) { setSearchResults({ diff --git a/src/services/transcript-parser.js b/src/services/transcript-parser.js index 6fa872fd..39cf05c7 100644 --- a/src/services/transcript-parser.js +++ b/src/services/transcript-parser.js @@ -759,3 +759,26 @@ function parseTimedTextLine({ times, line, tag }, isSRT) { return null; } } + +export const parseSearchResponse = (response) => { + if (!response || response === undefined) return; + if (response.items?.length > 0) { + let items = response.items; + let searchHits = []; + items.map((item) => { + const anno = new Annotation(item); + if (anno.getMotivation() != 'supplementing') return; + const { start, end } = getMediaFragment(anno.getTarget()); + const value = anno.getBody()[0].getProperty('value'); + const [preHit, withSearchhit] = value.split(/(.*)/s); + const [searchHit, postHit] = withSearchhit.split(/(.*)<\/em>/s); + console.log(preHit); + // console.log(value.substring(0, value.indexOf(''))); + // console.log(value.substring(value.indexOf(''), value.lastIndexOf(''))); + // console.log(value.substring(value.lastIndexOf(''), value.length)); + searchHits.push({ + + }); + }); + } +}; diff --git a/src/services/utility-helpers.js b/src/services/utility-helpers.js index d8312e65..f9cf54a4 100644 --- a/src/services/utility-helpers.js +++ b/src/services/utility-helpers.js @@ -249,15 +249,19 @@ export function fileDownload(fileUrl, fileName, fileExt = '', machineGenerated = * @param {number} duration - duration of the current canvas * @return {Object} - Representing the media fragment ie. { start: 3287.0, end: 3590.0 }, or undefined */ -export function getMediaFragment(uri, duration) { +export function getMediaFragment(uri, duration = 0) { if (uri !== undefined) { const fragment = uri.split('#t=')[1]; if (fragment !== undefined) { - const splitFragment = fragment.split(','); - if (splitFragment[1] == undefined) { - splitFragment[1] = duration; + let [start, end] = fragment.split(','); + if (end === undefined) { + end = duration.toString(); } - return { start: Number(splitFragment[0]), end: Number(splitFragment[1]) }; + let timestampRegex = /([0-9]*:){1,2}([0-9]{2})(?:(\.[0-9]{2,3})*)/g; + return { + start: start.match(timestampRegex) ? timeToS(start) : Number(start), + end: end.match(timestampRegex) ? timeToS(end) : Number(end) + }; } else { return undefined; } diff --git a/src/services/utility-helpers.test.js b/src/services/utility-helpers.test.js index 91b746f8..00d65fe4 100644 --- a/src/services/utility-helpers.test.js +++ b/src/services/utility-helpers.test.js @@ -66,6 +66,22 @@ describe('util helper', () => { it('returns undefined when invalid uri is given', () => { expect(util.getMediaFragment(undefined, 1985)).toBeUndefined(); }); + + it('returns time in seconds when hh:mm:ss.ms format time string is given', () => { + expect(util.getMediaFragment( + 'http://example.com/sample/manifest/canvas#t=00:07:53.900,00:07:56.500' + )).toEqual({ + start: 473.9, end: 476.5 + }); + }); + + it('returns time in seconds when hh:mm:ss format time string is given', () => { + expect(util.getMediaFragment( + 'http://example.com/sample/manifest/canvas#t=00:07:53,00:07:56' + )).toEqual({ + start: 473, end: 476 + }); + }); }); describe('getResourceItems()', () => { From a66117730246bbb71ed9d14560c9751c7388c7ba Mon Sep 17 00:00:00 2001 From: dwithana Date: Thu, 30 May 2024 16:22:16 -0400 Subject: [PATCH 2/5] Parse search response into expected format to be displayed --- src/services/iiif-parser.js | 26 +++++++++++--- src/services/search.js | 33 ++++++++--------- src/services/transcript-parser.js | 60 ++++++++++++++++++++++++------- 3 files changed, 83 insertions(+), 36 deletions(-) diff --git a/src/services/iiif-parser.js b/src/services/iiif-parser.js index 30ad0f52..735a79bc 100644 --- a/src/services/iiif-parser.js +++ b/src/services/iiif-parser.js @@ -717,13 +717,29 @@ export function getStructureRanges(manifest, isPlaylist = false) { } } +/** + * Read 'services' block in the relevant Canvas and returns the + * id of the service typed 'SearchService2' to enable content + * search + * @param {Object} manifest + * @param {Number} canvasIndex index of the current Canvas + * @returns + */ export function getSearchService(manifest, canvasIndex) { let canvases = parseSequences(manifest)[0].getCanvases(); - if (canvases !== undefined && canvases[canvasIndex] != undefined) { - const canvas = canvases[canvasIndex]; - const searchService = canvas.getServices()?.length > 0 - ? canvas.getServices()[0].id + if (canvases === undefined || canvases[canvasIndex] === undefined) return null; + + let searchService = null; + const canvas = canvases[canvasIndex]; + const services = canvas.getServices(); + if (services && services.length > 0) { + const searchServices = services.filter( + s => s.getProperty('type') === 'SearchService2' + ); + searchService = searchServices?.length > 0 + ? searchServices[0].id : null; - return searchService; } + return searchService; + } diff --git a/src/services/search.js b/src/services/search.js index 4ceeb1f5..bd1bb678 100644 --- a/src/services/search.js +++ b/src/services/search.js @@ -2,8 +2,7 @@ import { useRef, useEffect, useState, useMemo, useCallback, useContext } from 'r import { PlayerStateContext, PlayerDispatchContext } from '../context/player-context'; import { ManifestStateContext } from '../context/manifest-context'; import { getSearchService } from './iiif-parser'; -import { parseSearchResponse } from './transcript-parser'; -import searchResponse from '../../public/manifests/dev/search-response'; +import { getMatchedParts, parseSearchResponse } from './transcript-parser'; export const defaultMatcherFactory = (items) => { const mappedItems = items.map(item => item.text.toLocaleLowerCase()); @@ -13,11 +12,7 @@ export const defaultMatcherFactory = (items) => { const matchOffset = mappedText.indexOf(qStr); if (matchOffset !== -1) { const matchedItem = items[idx]; - const matchParts = [ - matchedItem.text.slice(0, matchOffset), - matchedItem.text.slice(matchOffset, matchOffset + qStr.length), - matchedItem.text.slice(matchOffset + qStr.length) - ]; + const matchParts = getMatchedParts(matchOffset, matchedItem.text, qStr); return [ ...results, @@ -74,29 +69,31 @@ export function useFilteredTranscripts({ [item.id]: item }), {}); let matcher = matcherFactory(itemsWithIds); - // if (searchService != null && searchService != undefined) { - // matcher = contentSearchFactory(); - // } + if (searchService != null && searchService != undefined) { + matcher = contentSearchFactory(); + } return { matcher, itemsWithIds, itemsIndexed }; }, [transcripts, matcherFactory]); const playerDispatch = useContext(PlayerDispatchContext); const manifestState = useContext(ManifestStateContext); - let debounceTimer; - function contentSearchFactory() { return async (query, abortController) => { try { - console.log(abortController.signal); const res = await fetch(`${searchService}?q=${query}`, { - signal: abortController.signal + signal: abortController.signal, }); const json = await res.json(); - return res.results; + const results = json.items?.length > 0 + ? parseSearchResponse(json, query, itemsWithIds) + : []; + return results; } catch (e) { - console.error(e); - return []; + if (e.name !== 'AbortError') { + console.error(e); + return []; + } } }; }; @@ -138,8 +135,6 @@ export function useFilteredTranscripts({ ...acc, [match.id]: match }), {}); - parseSearchResponse(searchResponse); - console.log(filtered); const sortedMatchIds = sorter([...filtered], true).map(item => item.id); if (matchesOnly) { setSearchResults({ diff --git a/src/services/transcript-parser.js b/src/services/transcript-parser.js index 39cf05c7..91144e9f 100644 --- a/src/services/transcript-parser.js +++ b/src/services/transcript-parser.js @@ -44,6 +44,7 @@ export const TRANSCRIPT_TYPES = { export const TRANSCRIPT_CUE_TYPES = { note: 'NOTE', timedCue: 'TIMED_CUE', + nonTimedLine: 'NON_TIMED_LINE' }; /** @@ -760,25 +761,60 @@ function parseTimedTextLine({ times, line, tag }, isSRT) { } } -export const parseSearchResponse = (response) => { - if (!response || response === undefined) return; +export const parseSearchResponse = (response, query, transcripts) => { + if (!response || response === undefined) return []; + + const qStr = query.trim().toLocaleLowerCase(); + let searchHits = []; if (response.items?.length > 0) { let items = response.items; - let searchHits = []; items.map((item) => { const anno = new Annotation(item); + // Exclude annotations without supplementing motivation if (anno.getMotivation() != 'supplementing') return; - const { start, end } = getMediaFragment(anno.getTarget()); + + // Read time offsets and text of the search hit + const timeRange = getMediaFragment(anno.getTarget()); const value = anno.getBody()[0].getProperty('value'); - const [preHit, withSearchhit] = value.split(/(.*)/s); - const [searchHit, postHit] = withSearchhit.split(/(.*)<\/em>/s); - console.log(preHit); - // console.log(value.substring(0, value.indexOf(''))); - // console.log(value.substring(value.indexOf(''), value.lastIndexOf(''))); - // console.log(value.substring(value.lastIndexOf(''), value.length)); - searchHits.push({ - }); + // Replace all HTML tags + const mappedText = value.replace(/<\/?[^>]+>/gi, ''); + + let start = 0, end = 0; + let transcirptId = undefined; + let hit = {}; + if (timeRange != undefined) { + // For timed-text + start = timeRange.start; end = timeRange.end; + transcirptId = transcripts.findIndex((t) => t.begin == start && t.end == end); + hit.tag = TRANSCRIPT_CUE_TYPES.timedCue; + } else { + // For non timed-text + transcirptId = transcripts.findIndex((t) => t.text === mappedText); + hit.tag = TRANSCRIPT_CUE_TYPES.nonTimedLine; + } + const matchOffset = mappedText.toLocaleLowerCase().indexOf(qStr); + if (matchOffset !== -1 && transcirptId != undefined) { + const matchParts = getMatchedParts(matchOffset, mappedText, qStr); + + searchHits.push({ + ...hit, + begin: start, + end: end, + id: transcirptId, + match: matchParts, + text: value, + }); + } }); } + return searchHits; +}; + +export const getMatchedParts = (offset, text, query) => { + return [ + text.slice(0, offset), + text.slice(offset, offset + query.length), + text.slice(offset + query.length) + ]; }; From 1ee3ea6a4a9cc51e2b6944f0eedb918e14f386af Mon Sep 17 00:00:00 2001 From: dwithana Date: Fri, 31 May 2024 17:36:28 -0400 Subject: [PATCH 3/5] Get hit counts for each transcript --- src/components/Transcript/Transcript.js | 9 +- .../TranscriptMenu/TranscriptSearch.js | 2 + .../TranscriptMenu/TranscriptSelector.js | 2 +- src/context/player-context.js | 2 +- src/services/iiif-parser.js | 34 +++--- src/services/search.js | 107 ++++++++++++------ src/services/transcript-parser.js | 102 +++++++++++------ 7 files changed, 171 insertions(+), 87 deletions(-) diff --git a/src/components/Transcript/Transcript.js b/src/components/Transcript/Transcript.js index 66c25f32..72fcd6d2 100644 --- a/src/components/Transcript/Transcript.js +++ b/src/components/Transcript/Transcript.js @@ -10,7 +10,7 @@ import { TRANSCRIPT_CUE_TYPES, } from '@Services/transcript-parser'; import TranscriptMenu from './TranscriptMenu/TranscriptMenu'; -import { useFilteredTranscripts, useFocusedMatch, useSearchOpts } from '@Services/search'; +import { useFilteredTranscripts, useFocusedMatch, useSearchOpts, useSearchCounts } from '@Services/search'; import { timeToHHmmss } from '@Services/utility-helpers'; import './Transcript.scss'; @@ -257,11 +257,14 @@ const Transcript = ({ playerID, manifestUrl, search = {}, transcripts = [] }) => ...searchOpts, query: searchQuery, transcripts: transcript, - canvasIndex: canvasIndexRef.current + canvasIndex: canvasIndexRef.current, + selectedTranscript: transcriptInfo.id, }); const { focusedMatchId, setFocusedMatchId, focusedMatchIndex, setFocusedMatchIndex } = useFocusedMatch({ searchResults }); + const { tanscriptHitCounts } = useSearchCounts({ searchResults, canvasTranscripts }); + const [isEmpty, setIsEmpty] = React.useState(true); const [_autoScrollEnabled, _setAutoScrollEnabled] = React.useState(true); const autoScrollEnabledRef = React.useRef(_autoScrollEnabled); @@ -464,7 +467,7 @@ const Transcript = ({ playerID, manifestUrl, search = {}, transcripts = [] }) => 0 && transcriptInfo.tError != NO_SUPPORT} setAutoScrollEnabled={setAutoScrollEnabled} diff --git a/src/components/Transcript/TranscriptMenu/TranscriptSearch.js b/src/components/Transcript/TranscriptMenu/TranscriptSearch.js index 0a097904..5586f505 100644 --- a/src/components/Transcript/TranscriptMenu/TranscriptSearch.js +++ b/src/components/Transcript/TranscriptMenu/TranscriptSearch.js @@ -102,6 +102,8 @@ export const TranscriptSearch = ({ onClick={() => { setSearchQuery(null); if (searchInputRef.current) searchInputRef.current.value = ''; + // Set focus to the search input field + searchInputRef.current.focus(); }} > diff --git a/src/components/Transcript/TranscriptMenu/TranscriptSelector.js b/src/components/Transcript/TranscriptMenu/TranscriptSelector.js index 437da7a9..e755ebb4 100644 --- a/src/components/Transcript/TranscriptMenu/TranscriptSelector.js +++ b/src/components/Transcript/TranscriptMenu/TranscriptSelector.js @@ -34,7 +34,7 @@ const TranscriptSelector = ({ > {transcriptData.map((t, i) => ( ))} diff --git a/src/context/player-context.js b/src/context/player-context.js index 353daf7e..eab35aaf 100644 --- a/src/context/player-context.js +++ b/src/context/player-context.js @@ -1,6 +1,6 @@ import React from 'react'; -export const PlayerStateContext = React.createContext(); +const PlayerStateContext = React.createContext(); export const PlayerDispatchContext = React.createContext(); /** diff --git a/src/services/iiif-parser.js b/src/services/iiif-parser.js index 735a79bc..c0f27851 100644 --- a/src/services/iiif-parser.js +++ b/src/services/iiif-parser.js @@ -718,28 +718,36 @@ export function getStructureRanges(manifest, isPlaylist = false) { } /** - * Read 'services' block in the relevant Canvas and returns the - * id of the service typed 'SearchService2' to enable content + * Read 'services' block in the Manifest or in relevant Canvas. Services listed + * at the manifest-level takes precedence. + * Returns the id of the service typed 'SearchService2' to enable content * search * @param {Object} manifest * @param {Number} canvasIndex index of the current Canvas * @returns */ export function getSearchService(manifest, canvasIndex) { - let canvases = parseSequences(manifest)[0].getCanvases(); - if (canvases === undefined || canvases[canvasIndex] === undefined) return null; - let searchService = null; - const canvas = canvases[canvasIndex]; - const services = canvas.getServices(); - if (services && services.length > 0) { - const searchServices = services.filter( + const manifestServices = parseManifest(manifest).getServices(); + if (manifestServices && manifestServices?.length > 0) { + let searchServices = manifestServices.filter( s => s.getProperty('type') === 'SearchService2' ); - searchService = searchServices?.length > 0 - ? searchServices[0].id - : null; + searchService = searchServices?.length > 0 ? searchServices[0].id : null; + } else { + let canvases = parseSequences(manifest)[0].getCanvases(); + if (canvases === undefined || canvases[canvasIndex] === undefined) return null; + + const canvas = canvases[canvasIndex]; + const services = canvas.getServices(); + if (services && services.length > 0) { + const searchServices = services.filter( + s => s.getProperty('type') === 'SearchService2' + ); + searchService = searchServices?.length > 0 + ? searchServices[0].id + : null; + } } return searchService; - } diff --git a/src/services/search.js b/src/services/search.js index bd1bb678..4686716a 100644 --- a/src/services/search.js +++ b/src/services/search.js @@ -1,8 +1,9 @@ import { useRef, useEffect, useState, useMemo, useCallback, useContext } from 'react'; -import { PlayerStateContext, PlayerDispatchContext } from '../context/player-context'; +import { PlayerDispatchContext } from '../context/player-context'; import { ManifestStateContext } from '../context/manifest-context'; import { getSearchService } from './iiif-parser'; -import { getMatchedParts, parseSearchResponse } from './transcript-parser'; +import { getMatchedParts, parseContentSearchResponse } from './transcript-parser'; +import { getByTestId } from '@testing-library/react'; export const defaultMatcherFactory = (items) => { const mappedItems = items.map(item => item.text.toLocaleLowerCase()); @@ -22,7 +23,30 @@ export const defaultMatcherFactory = (items) => { return results; } }, []); - return matchedItems; + return { matchedTranscriptLines: matchedItems, hitCounts: [] }; + }; +}; + +const contentSearchFactory = (searchService, items) => { + return async (query, abortController) => { + try { + const res = await fetch(`${searchService}?q=${query}`, + { signal: abortController.signal } + ); + const json = await res.json(); + let results = []; + if (json.items?.length > 0) { + const parsed = parseContentSearchResponse(json, query, items); + // results = parsed.matchedTranscriptLines; + return parsed; + } + return { matchedTranscriptLines: [], hitCounts: [] }; + } catch (e) { + if (e.name !== 'AbortError') { + console.error(e); + return []; + } + } }; }; @@ -53,7 +77,7 @@ export function useFilteredTranscripts({ matchesOnly = defaultSearchOpts.matchesOnly, matcherFactory = defaultSearchOpts.matcherFactory }) { - const [searchResults, setSearchResults] = useState({ results: {}, ids: [], matchingIds: [] }); + const [searchResults, setSearchResults] = useState({ results: {}, ids: [], matchingIds: [], counts: [] }); const [searchService, setSearchService] = useState(); const abortControllerRef = useRef(null); @@ -70,7 +94,7 @@ export function useFilteredTranscripts({ }), {}); let matcher = matcherFactory(itemsWithIds); if (searchService != null && searchService != undefined) { - matcher = contentSearchFactory(); + matcher = contentSearchFactory(searchService, itemsWithIds); } return { matcher, itemsWithIds, itemsIndexed }; }, [transcripts, matcherFactory]); @@ -78,27 +102,7 @@ export function useFilteredTranscripts({ const playerDispatch = useContext(PlayerDispatchContext); const manifestState = useContext(ManifestStateContext); - function contentSearchFactory() { - return async (query, abortController) => { - try { - const res = await fetch(`${searchService}?q=${query}`, { - signal: abortController.signal, - }); - const json = await res.json(); - const results = json.items?.length > 0 - ? parseSearchResponse(json, query, itemsWithIds) - : []; - return results; - } catch (e) { - if (e.name !== 'AbortError') { - console.error(e); - return []; - } - } - }; - }; - - // Parse seachService from the Canvas + // Parse searchService from the Canvas/Manifest useEffect(() => { const { manifest } = manifestState; if (manifest) { @@ -107,6 +111,13 @@ export function useFilteredTranscripts({ } }, [canvasIndex]); + useEffect(() => { + // abort any existing search operations + if (abortControllerRef.current) { + abortControllerRef.current.abort('Cancelling content search request'); + } + }, [query]); + useEffect(() => { if (!itemsWithIds.length) { if (playerDispatch) playerDispatch({ type: 'setSearchMarkers', payload: [] }); @@ -124,18 +135,16 @@ export function useFilteredTranscripts({ } const abortController = new AbortController(); - // abort any existing search operations - if (abortControllerRef.current && !abortControllerRef.current.signal.aborted) abortControllerRef.current.abort(); abortControllerRef.current = abortController; - (Promise.resolve(matcher(query, abortController)) - .then((filtered) => { - if (abortController.signal.aborted) return; - const matchingItemsIndexed = filtered.reduce((acc, match) => ({ + (Promise.resolve(matcher(query, abortControllerRef.current)) + .then(({ matchedTranscriptLines, hitCounts }) => { + if (abortController.signal.aborted || matchedTranscriptLines == undefined) return; + const matchingItemsIndexed = matchedTranscriptLines.reduce((acc, match) => ({ ...acc, [match.id]: match }), {}); - const sortedMatchIds = sorter([...filtered], true).map(item => item.id); + const sortedMatchIds = sorter([...matchedTranscriptLines], true).map(item => item.id); if (matchesOnly) { setSearchResults({ results: matchingItemsIndexed, @@ -155,6 +164,12 @@ export function useFilteredTranscripts({ matchingIds: sortedMatchIds }; setSearchResults(searchResults); + if (hitCounts?.length > 0) { + setSearchResults({ + ...searchResults, + counts: hitCounts, + }); + } if (playerDispatch) { if (showMarkers) { @@ -189,6 +204,32 @@ export function useFilteredTranscripts({ return searchResults; } +export const useSearchCounts = ({ searchResults, canvasTranscripts }) => { + const [resultCount, setResultCount] = useState(null); + + if (!searchResults?.counts || canvasTranscripts?.length === 0) { + return { tanscriptHitCounts: canvasTranscripts, resultCount, setResultCount }; + } + + const hitCounts = searchResults.counts; + + let canvasTranscriptsWithCount = []; + canvasTranscripts.map((ct) => { + ct.numberOfHits = hitCounts.find((h) => h.transcriptURL === ct.url).numberOfHits; + canvasTranscriptsWithCount.push(ct); + }); + + const setResultsNavCount = useCallback((transcriptUrl) => { + const hitCounts = searchResults.counts; + if (!(hitCounts === undefined || hitCounts?.length === 0)) { + const currentCount = hitCounts.find(c => c.transcriptURL === transcriptUrl).numberOfHits; + setResultCount(currentCount); + } + }, []); + + console.log(canvasTranscriptsWithCount); + return { tanscriptHitCounts: canvasTranscriptsWithCount, resultCount, setResultsNavCount }; +}; export const useFocusedMatch = ({ searchResults }) => { const [focusedMatchIndex, setFocusedMatchIndex] = useState(null); diff --git a/src/services/transcript-parser.js b/src/services/transcript-parser.js index 91144e9f..94eee9fc 100644 --- a/src/services/transcript-parser.js +++ b/src/services/transcript-parser.js @@ -11,6 +11,7 @@ import { identifySupplementingAnnotation, parseSequences, } from './utility-helpers'; +import { getCanvasId } from './iiif-parser'; // ENum for supported transcript MIME types const TRANSCRIPT_MIME_TYPES = { @@ -347,6 +348,8 @@ async function parseWordFile(response) { .convertToHtml({ arrayBuffer: arrayBuffer }) .then(function (result) { tData = result.value; + }).catch(err => { + console.error(err); }); return tData; } @@ -760,11 +763,10 @@ function parseTimedTextLine({ times, line, tag }, isSRT) { return null; } } - -export const parseSearchResponse = (response, query, transcripts) => { +export const parseContentSearchResponse = (response, query, trancripts) => { if (!response || response === undefined) return []; - const qStr = query.trim().toLocaleLowerCase(); + let hitCounts = []; let searchHits = []; if (response.items?.length > 0) { let items = response.items; @@ -773,42 +775,58 @@ export const parseSearchResponse = (response, query, transcripts) => { // Exclude annotations without supplementing motivation if (anno.getMotivation() != 'supplementing') return; - // Read time offsets and text of the search hit - const timeRange = getMediaFragment(anno.getTarget()); + const target = anno.getTarget(); + const targetURI = getCanvasId(target); const value = anno.getBody()[0].getProperty('value'); - - // Replace all HTML tags - const mappedText = value.replace(/<\/?[^>]+>/gi, ''); - - let start = 0, end = 0; - let transcirptId = undefined; - let hit = {}; - if (timeRange != undefined) { - // For timed-text - start = timeRange.start; end = timeRange.end; - transcirptId = transcripts.findIndex((t) => t.begin == start && t.end == end); - hit.tag = TRANSCRIPT_CUE_TYPES.timedCue; - } else { - // For non timed-text - transcirptId = transcripts.findIndex((t) => t.text === mappedText); - hit.tag = TRANSCRIPT_CUE_TYPES.nonTimedLine; - } - const matchOffset = mappedText.toLocaleLowerCase().indexOf(qStr); - if (matchOffset !== -1 && transcirptId != undefined) { - const matchParts = getMatchedParts(matchOffset, mappedText, qStr); - - searchHits.push({ - ...hit, - begin: start, - end: end, - id: transcirptId, - match: matchParts, - text: value, - }); - } + searchHits.push({ target, targetURI, value }); }); } - return searchHits; + for (const [key, value] of Object.entries(Object.groupBy(searchHits, ({ targetURI }) => targetURI))) { + hitCounts.push({ transcriptURL: key, numberOfHits: value.length }); + } + const matchedTranscriptLines = getMatchedTranscriptLines(searchHits, query, trancripts); + return { matchedTranscriptLines, hitCounts }; +}; + +export const getMatchedTranscriptLines = (searchHits, query, transcripts) => { + const qStr = query.trim().toLocaleLowerCase(); + let transcriptLines = []; + searchHits.map((item) => { + const { target, value } = item; + // Read time offsets and text of the search hit + const timeRange = getMediaFragment(target); + + // Replace all HTML tags + const mappedText = value.replace(/<\/?[^>]+>/gi, ''); + + let start = 0, end = 0; + let transcirptId = undefined; + let hit = {}; + if (timeRange != undefined) { + // For timed-text + start = timeRange.start; end = timeRange.end; + transcirptId = transcripts.findIndex((t) => t.begin == start && t.end == end); + hit.tag = TRANSCRIPT_CUE_TYPES.timedCue; + } else { + // For non timed-text + transcirptId = transcripts.findIndex((t) => t.text === mappedText); + hit.tag = TRANSCRIPT_CUE_TYPES.nonTimedLine; + } + const matchOffset = mappedText.toLocaleLowerCase().indexOf(qStr); + if (matchOffset !== -1 && transcirptId != undefined) { + const matchParts = getMatchedParts(matchOffset, mappedText, qStr); + + transcriptLines.push({ + ...hit, + begin: start, + end: end, + id: transcirptId, + match: matchParts, + text: value, + }); + } + }); + return transcriptLines; }; export const getMatchedParts = (offset, text, query) => { @@ -818,3 +836,15 @@ export const getMatchedParts = (offset, text, query) => { text.slice(offset + query.length) ]; }; + +// TODO:: Could be used for marking search hits in Word Doc transcripts? +// export const splitIntoElements = (htmlContent) => { +// // Create a temporary DOM element to parse the HTML +// const tempDiv = document.createElement('div'); +// tempDiv.innerHTML = htmlContent; +// console.log(tempDiv); + +// // Convert child nodes into an array +// const elements = Array.from(tempDiv.childNodes); +// return elements; +// }; From 7272cd5ce93c502ef89e35211e03360815c466e7 Mon Sep 17 00:00:00 2001 From: dwithana Date: Thu, 6 Jun 2024 16:25:08 -0700 Subject: [PATCH 4/5] Display correct results and hit counts, fix failing tests --- src/components/Transcript/Transcript.js | 2 +- src/components/Transcript/Transcript.scss | 4 +- src/components/Transcript/Transcript.test.js | 167 +++++++++++++-- .../TranscriptMenu/TranscriptSelector.js | 11 +- src/services/iiif-parser.test.js | 22 ++ src/services/search.js | 191 ++++++++++-------- src/services/search.test.js | 11 +- src/services/transcript-parser.js | 45 ++++- src/services/transcript-parser.test.js | 55 +++++ src/test_data/lunchroom-manners.js | 6 + src/test_data/transcript-annotation.js | 12 ++ 11 files changed, 408 insertions(+), 118 deletions(-) diff --git a/src/components/Transcript/Transcript.js b/src/components/Transcript/Transcript.js index 72fcd6d2..cf337097 100644 --- a/src/components/Transcript/Transcript.js +++ b/src/components/Transcript/Transcript.js @@ -258,7 +258,7 @@ const Transcript = ({ playerID, manifestUrl, search = {}, transcripts = [] }) => query: searchQuery, transcripts: transcript, canvasIndex: canvasIndexRef.current, - selectedTranscript: transcriptInfo.id, + selectedTranscript: transcriptInfo.tUrl, }); const { focusedMatchId, setFocusedMatchId, focusedMatchIndex, setFocusedMatchIndex } = useFocusedMatch({ searchResults }); diff --git a/src/components/Transcript/Transcript.scss b/src/components/Transcript/Transcript.scss index 22f5c057..4f9d6eab 100644 --- a/src/components/Transcript/Transcript.scss +++ b/src/components/Transcript/Transcript.scss @@ -6,7 +6,7 @@ div.transcript_content { - height: 20em; + height: 19em; overflow-y: auto; p { @@ -184,4 +184,4 @@ a.ramp--transcript_item { 100% { opacity: 0; } -} \ No newline at end of file +} diff --git a/src/components/Transcript/Transcript.test.js b/src/components/Transcript/Transcript.test.js index a0f636b9..09c358f6 100644 --- a/src/components/Transcript/Transcript.test.js +++ b/src/components/Transcript/Transcript.test.js @@ -2,15 +2,21 @@ import React from 'react'; import { act, fireEvent, render, screen, waitFor } from '@testing-library/react'; import Transcript from './Transcript'; import * as transcriptParser from '@Services/transcript-parser'; +import { withManifestAndPlayerProvider } from '@Services/testing-helpers'; +import lunchroomManners from '@TestData/lunchroom-manners'; + describe('Transcript component', () => { - let originalError; + let originalError, originalLogger; beforeEach(() => { originalError = console.error; console.error = jest.fn(); + originalLogger = console.log; + console.log = jest.fn(); }); afterAll(() => { console.error = originalError; + console.log = originalLogger; }); describe('with valid transcript data', () => { @@ -61,12 +67,19 @@ describe('Transcript component', () => { .spyOn(transcriptParser, 'parseTranscriptData') .mockReturnValue(parsedData); + const TranscriptWithState = withManifestAndPlayerProvider(Transcript, { + initialManifestState: { manifest: lunchroomManners, canvasIndex: 0 }, + initialPlayerState: {}, + ...props, + }); + render( ); + await act(() => Promise.resolve()); }); test('renders successfully', async () => { @@ -141,12 +154,19 @@ describe('Transcript component', () => { .spyOn(transcriptParser, 'parseTranscriptData') .mockReturnValue(parsedData); + const TranscriptWithState = withManifestAndPlayerProvider(Transcript, { + initialManifestState: { manifest: lunchroomManners, canvasIndex: 0 }, + initialPlayerState: {}, + ...props, + }); + render( ); + await act(() => Promise.resolve()); }); test('renders successfully', async () => { @@ -208,12 +228,19 @@ describe('Transcript component', () => { .spyOn(transcriptParser, 'parseTranscriptData') .mockReturnValue(parsedData); + const TranscriptWithState = withManifestAndPlayerProvider(Transcript, { + initialManifestState: { manifest: lunchroomManners, canvasIndex: 0 }, + initialPlayerState: {}, + ...props, + }); + render( ); + await act(() => Promise.resolve()); }); @@ -287,12 +314,20 @@ describe('Transcript component', () => { .spyOn(transcriptParser, 'parseTranscriptData') .mockReturnValue(parsedData); + + const TranscriptWithState = withManifestAndPlayerProvider(Transcript, { + initialManifestState: { manifest: lunchroomManners, canvasIndex: 0 }, + initialPlayerState: {}, + ...props, + }); + render( ); + await act(() => Promise.resolve()); await waitFor(() => { @@ -328,12 +363,20 @@ describe('Transcript component', () => { .spyOn(transcriptParser, 'parseTranscriptData') .mockReturnValue(parsedData); + + const TranscriptWithState = withManifestAndPlayerProvider(Transcript, { + initialManifestState: { manifest: lunchroomManners, canvasIndex: 0 }, + initialPlayerState: {}, + ...props, + }); + render( ); + await act(() => Promise.resolve()); await waitFor(() => { @@ -347,11 +390,20 @@ describe('Transcript component', () => { describe('renders a message for', () => { test('an empty list of transcripts', () => { + const TranscriptWithState = withManifestAndPlayerProvider(Transcript, { + initialManifestState: { manifest: lunchroomManners, canvasIndex: 0 }, + initialPlayerState: {}, + playerID: "player-id", + transcripts: [], + }); + render( - + ); + expect(screen.queryByTestId('transcript_nav')).toBeInTheDocument(); expect(screen.queryByTestId('transcript_content_0')).toBeInTheDocument(); expect(screen.queryByTestId('no-transcript')).toBeInTheDocument(); @@ -370,12 +422,20 @@ describe('Transcript component', () => { }, ], }; + + const TranscriptWithState = withManifestAndPlayerProvider(Transcript, { + initialManifestState: { manifest: lunchroomManners, canvasIndex: 0 }, + initialPlayerState: {}, + ...props + }); + render( ); + await act(() => Promise.resolve()); expect(screen.queryByTestId('transcript-selector')).not.toBeInTheDocument(); expect(screen.queryByTestId('transcript_content_0')).toBeInTheDocument(); @@ -408,12 +468,20 @@ describe('Transcript component', () => { const parseTranscriptMock = jest .spyOn(transcriptParser, 'parseTranscriptData') .mockReturnValue(parsedData); + + const TranscriptWithState = withManifestAndPlayerProvider(Transcript, { + initialManifestState: { manifest: lunchroomManners, canvasIndex: 0 }, + initialPlayerState: {}, + ...props + }); + render( ); + await act(() => Promise.resolve()); await waitFor(() => { expect(parseTranscriptMock).toHaveBeenCalled(); @@ -450,12 +518,19 @@ describe('Transcript component', () => { .spyOn(transcriptParser, 'parseTranscriptData') .mockReturnValue(parsedData); + const TranscriptWithState = withManifestAndPlayerProvider(Transcript, { + initialManifestState: { manifest: lunchroomManners, canvasIndex: 0 }, + initialPlayerState: {}, + ...props + }); + render( ); + await act(() => Promise.resolve()); await waitFor(() => { expect(parseTranscriptMock).toHaveBeenCalled(); @@ -490,12 +565,20 @@ describe('Transcript component', () => { tUrl: 'https://example.com/transcript_image.png', tType: transcriptParser.TRANSCRIPT_TYPES.noSupport, }); + + const TranscriptWithState = withManifestAndPlayerProvider(Transcript, { + initialManifestState: { manifest: lunchroomManners, canvasIndex: 0 }, + initialPlayerState: {}, + ...props + }); + render( ); + await act(() => Promise.resolve()); await waitFor(() => { @@ -532,13 +615,19 @@ describe('Transcript component', () => { tType: transcriptParser.TRANSCRIPT_TYPES.noSupport, }); + const TranscriptWithState = withManifestAndPlayerProvider(Transcript, { + initialManifestState: { manifest: lunchroomManners, canvasIndex: 0 }, + initialPlayerState: {}, + ...props + }); render( ); + await act(() => Promise.resolve()); await waitFor(() => { @@ -576,12 +665,19 @@ describe('Transcript component', () => { .spyOn(transcriptParser, 'parseTranscriptData') .mockReturnValue(parsedData); + const TranscriptWithState = withManifestAndPlayerProvider(Transcript, { + initialManifestState: { manifest: lunchroomManners, canvasIndex: 0 }, + initialPlayerState: {}, + ...props + }); + render( ); + await act(() => Promise.resolve()); await waitFor(() => { @@ -618,13 +714,19 @@ describe('Transcript component', () => { tType: transcriptParser.TRANSCRIPT_TYPES.invalidTimedText, }); + const TranscriptWithState = withManifestAndPlayerProvider(Transcript, { + initialManifestState: { manifest: lunchroomManners, canvasIndex: 0 }, + initialPlayerState: {}, + ...props + }); render( ); + await act(() => Promise.resolve()); await waitFor(() => { @@ -689,12 +791,19 @@ describe('Transcript component', () => { tFileExt: 'vtt', }); + const TranscriptWithState = withManifestAndPlayerProvider(Transcript, { + initialManifestState: { manifest: lunchroomManners, canvasIndex: 0 }, + initialPlayerState: {}, + ...props + }); + render( ); + await act(() => Promise.resolve()); await waitFor(() => { @@ -726,12 +835,19 @@ describe('Transcript component', () => { .spyOn(transcriptParser, 'parseTranscriptData') .mockReturnValue({}); + const TranscriptWithState = withManifestAndPlayerProvider(Transcript, { + initialManifestState: { manifest: lunchroomManners, canvasIndex: 0 }, + initialPlayerState: {}, + ...props + }); + render( ); + await act(() => Promise.resolve()); await waitFor(() => { @@ -760,12 +876,20 @@ describe('Transcript component', () => { const readSupplementingAnnotationsMock = jest .spyOn(transcriptParser, 'readSupplementingAnnotations'); + + const TranscriptWithState = withManifestAndPlayerProvider(Transcript, { + initialManifestState: { manifest: lunchroomManners, canvasIndex: 0 }, + initialPlayerState: {}, + ...props + }); + render( ); + await act(() => Promise.resolve()); await waitFor(() => { @@ -787,12 +911,19 @@ describe('Transcript component', () => { transcripts: [], }; + const TranscriptWithState = withManifestAndPlayerProvider(Transcript, { + initialManifestState: { manifest: lunchroomManners, canvasIndex: 0 }, + initialPlayerState: {}, + ...props + }); + render( ); + await act(() => Promise.resolve()); await waitFor(() => { diff --git a/src/components/Transcript/TranscriptMenu/TranscriptSelector.js b/src/components/Transcript/TranscriptMenu/TranscriptSelector.js index e755ebb4..f9c3254d 100644 --- a/src/components/Transcript/TranscriptMenu/TranscriptSelector.js +++ b/src/components/Transcript/TranscriptMenu/TranscriptSelector.js @@ -2,7 +2,6 @@ import React from 'react'; import PropTypes from 'prop-types'; import TranscriptDownloader from './TranscriptDownloader'; - const MACHINE_GEN_MESSAGE = 'Machine-generated transcript may contain errors.'; const TranscriptSelector = ({ @@ -17,6 +16,10 @@ const TranscriptSelector = ({ selectTranscript(event.target.value); }; + const buildOptionText = (title, numberOfHits) => { + return `${title} (${numberOfHits ? numberOfHits : ''})`; + }; + if (transcriptData) { const result = [
{transcriptData.map((t, i) => ( - ))} diff --git a/src/services/iiif-parser.test.js b/src/services/iiif-parser.test.js index 00ca03e9..76adf990 100644 --- a/src/services/iiif-parser.test.js +++ b/src/services/iiif-parser.test.js @@ -702,4 +702,26 @@ describe('iiif-parser', () => { expect(firstStructCanvas.canvasDuration).toEqual(32); }); }); + + describe('getSearchService()', () => { + test('returns null for a manifest without services', () => { + expect(iiifParser.getSearchService(singleCanvasManifest)).toBeNull(); + }); + + test('returns an id for a manifest with manifest-level search service', () => { + expect(iiifParser.getSearchService(lunchroomManifest)).toEqual( + 'http://example.com/manifest/search' + ); + }); + + test('returns an id for a manifest with canvas-level search service', () => { + expect(iiifParser.getSearchService(manifest, 0)).toEqual( + 'http://example.com/sample/transcript-annotation/canvas/1/search' + ); + }); + + test('returns null when service type is not equal to SearchService2', () => { + expect(iiifParser.getSearchService(manifest, 1)).toBeNull(); + }); + }); }); diff --git a/src/services/search.js b/src/services/search.js index 4686716a..dd6e2e0e 100644 --- a/src/services/search.js +++ b/src/services/search.js @@ -2,8 +2,7 @@ import { useRef, useEffect, useState, useMemo, useCallback, useContext } from 'r import { PlayerDispatchContext } from '../context/player-context'; import { ManifestStateContext } from '../context/manifest-context'; import { getSearchService } from './iiif-parser'; -import { getMatchedParts, parseContentSearchResponse } from './transcript-parser'; -import { getByTestId } from '@testing-library/react'; +import { getMatchedParts, getMatchedTranscriptLines, parseContentSearchResponse } from './transcript-parser'; export const defaultMatcherFactory = (items) => { const mappedItems = items.map(item => item.text.toLocaleLowerCase()); @@ -23,28 +22,26 @@ export const defaultMatcherFactory = (items) => { return results; } }, []); - return { matchedTranscriptLines: matchedItems, hitCounts: [] }; + return { matchedTranscriptLines: matchedItems, hitCounts: [], allSearchHits: null }; }; }; -const contentSearchFactory = (searchService, items) => { +const contentSearchFactory = (searchService, items, selectedTranscript) => { return async (query, abortController) => { try { const res = await fetch(`${searchService}?q=${query}`, { signal: abortController.signal } ); const json = await res.json(); - let results = []; if (json.items?.length > 0) { - const parsed = parseContentSearchResponse(json, query, items); - // results = parsed.matchedTranscriptLines; + const parsed = parseContentSearchResponse(json, query, items, selectedTranscript); return parsed; } - return { matchedTranscriptLines: [], hitCounts: [] }; + return { matchedTranscriptLines: [], hitCounts: [], allSearchHits: null }; } catch (e) { if (e.name !== 'AbortError') { console.error(e); - return []; + return { matchedTranscriptLines: [], hitCounts: [], allSearchHits: null }; } } }; @@ -73,12 +70,14 @@ export function useFilteredTranscripts({ enabled = true, transcripts, canvasIndex, + selectedTranscript, showMarkers = defaultSearchOpts.showMarkers, matchesOnly = defaultSearchOpts.matchesOnly, matcherFactory = defaultSearchOpts.matcherFactory }) { const [searchResults, setSearchResults] = useState({ results: {}, ids: [], matchingIds: [], counts: [] }); const [searchService, setSearchService] = useState(); + const [allSearchResults, setAllSearchResults] = useState(null); const abortControllerRef = useRef(null); const { matcher, itemsWithIds, itemsIndexed } = useMemo(() => { @@ -94,7 +93,7 @@ export function useFilteredTranscripts({ }), {}); let matcher = matcherFactory(itemsWithIds); if (searchService != null && searchService != undefined) { - matcher = contentSearchFactory(searchService, itemsWithIds); + matcher = contentSearchFactory(searchService, itemsWithIds, selectedTranscript); } return { matcher, itemsWithIds, itemsIndexed }; }, [transcripts, matcherFactory]); @@ -131,104 +130,123 @@ export function useFilteredTranscripts({ matchingIds: [], ids: sortedIds }); + setAllSearchResults(null); return; } - const abortController = new AbortController(); - abortControllerRef.current = abortController; + if (allSearchResults != null) { + const transcriptSearchResults = allSearchResults[selectedTranscript]; + const searchHits = getMatchedTranscriptLines(transcriptSearchResults, query, itemsWithIds); + markMatchedItems(searchHits, searchResults?.counts, allSearchResults); + } else { + const abortController = new AbortController(); + abortControllerRef.current = abortController; - (Promise.resolve(matcher(query, abortControllerRef.current)) - .then(({ matchedTranscriptLines, hitCounts }) => { - if (abortController.signal.aborted || matchedTranscriptLines == undefined) return; - const matchingItemsIndexed = matchedTranscriptLines.reduce((acc, match) => ({ - ...acc, - [match.id]: match - }), {}); - const sortedMatchIds = sorter([...matchedTranscriptLines], true).map(item => item.id); - if (matchesOnly) { - setSearchResults({ - results: matchingItemsIndexed, - ids: sortedMatchIds, - matchingIds: sortedMatchIds - }); - } else { - const joinedIndexed = { - ...itemsIndexed, - ...matchingItemsIndexed - }; - const sortedItemIds = sorter(Object.values(joinedIndexed), false).map(item => item.id); + (Promise.resolve(matcher(query, abortControllerRef.current)) + .then(({ matchedTranscriptLines, hitCounts, allSearchHits }) => { + if (abortController.signal.aborted) return; + markMatchedItems(matchedTranscriptLines, hitCounts, allSearchHits); + }) + .catch(e => { + console.error('search failed', e, query, transcripts); + }) + ); + } - const searchResults = { - results: joinedIndexed, - ids: sortedItemIds, - matchingIds: sortedMatchIds - }; - setSearchResults(searchResults); - if (hitCounts?.length > 0) { - setSearchResults({ - ...searchResults, - counts: hitCounts, - }); - } + }, [matcher, query, enabled, sorter, matchesOnly, showMarkers, playerDispatch, selectedTranscript]); + + /** + * Generic function to prepare a list of search hits to be displayed in the transcript + * component either from a reponse from a content search API call (using content search factory) + * across multiple transcripts or a single JS search using the default matcher factory. + * @param {Array} matchedTranscriptLines an array of matched transcript lines with ids + * @param {Array} hitCounts search hit counts for each transcript in the selected canvas + * @param {Object} allSearchHits a map of search hits grouped by transcript + * @returns + */ + const markMatchedItems = (matchedTranscriptLines, hitCounts = [], allSearchHits = null) => { + if (matchedTranscriptLines === undefined) return; + const matchingItemsIndexed = matchedTranscriptLines.reduce((acc, match) => ({ + ...acc, + [match.id]: match + }), {}); + const sortedMatchIds = sorter([...matchedTranscriptLines], true).map(item => item.id); + if (matchesOnly) { + setSearchResults({ + results: matchingItemsIndexed, + ids: sortedMatchIds, + matchingIds: sortedMatchIds + }); + } else { + const joinedIndexed = { + ...itemsIndexed, + ...matchingItemsIndexed + }; + const sortedItemIds = sorter(Object.values(joinedIndexed), false).map(item => item.id); - if (playerDispatch) { - if (showMarkers) { - let nextMarkers = []; - if ( - searchResults.matchingIds.length < 25 - || (query?.length >= 4 && searchResults.matchingIds.length < 45) - ) { - // ^^ don't show a bazillion markers if we're searching for a short string ^^ - nextMarkers = searchResults.matchingIds.map(id => { - const result = searchResults.results[id]; - return { - time: result.begin, - text: '', - class: 'ramp--track-marker--search' - }; - }); - } - playerDispatch({ type: 'setSearchMarkers', payload: nextMarkers }); - } else { - playerDispatch({ type: 'setSearchMarkers', payload: [] }); - } + const searchResults = { + results: joinedIndexed, + ids: sortedItemIds, + matchingIds: sortedMatchIds + }; + setSearchResults(searchResults); + if (hitCounts?.length > 0) { + setSearchResults({ + ...searchResults, + counts: hitCounts, + }); + } + setAllSearchResults(allSearchHits); + + if (playerDispatch) { + if (showMarkers) { + let nextMarkers = []; + if ( + searchResults.matchingIds.length < 25 + || (query?.length >= 4 && searchResults.matchingIds.length < 45) + ) { + // ^^ don't show a bazillion markers if we're searching for a short string ^^ + nextMarkers = searchResults.matchingIds.map(id => { + const result = searchResults.results[id]; + return { + time: result.begin, + text: '', + class: 'ramp--track-marker--search' + }; + }); } + playerDispatch({ type: 'setSearchMarkers', payload: nextMarkers }); + } else { + playerDispatch({ type: 'setSearchMarkers', payload: [] }); } - }) - .catch(e => { - console.error('search failed', e, query, transcripts); - }) - ); - }, [matcher, query, enabled, sorter, matchesOnly, showMarkers, playerDispatch]); + } + } + }; return searchResults; } +/** + * Calculate the search hit count for each transcript in the canvas, when use type-in a search + * query + * @param {Object.searchResults} searchResults search result object from useFilteredTranscripts hook + * @param {Object.canvasTranscripts} canvasTranscripts a list of all the transcripts in the canvas + * @returns a list of all transcripts in the canvas with number of search hits for each transcript + */ export const useSearchCounts = ({ searchResults, canvasTranscripts }) => { - const [resultCount, setResultCount] = useState(null); - if (!searchResults?.counts || canvasTranscripts?.length === 0) { - return { tanscriptHitCounts: canvasTranscripts, resultCount, setResultCount }; + return { tanscriptHitCounts: canvasTranscripts }; } const hitCounts = searchResults.counts; let canvasTranscriptsWithCount = []; canvasTranscripts.map((ct) => { - ct.numberOfHits = hitCounts.find((h) => h.transcriptURL === ct.url).numberOfHits; - canvasTranscriptsWithCount.push(ct); + const numberOfHits = hitCounts.find((h) => h.transcriptURL === ct.url).numberOfHits; + canvasTranscriptsWithCount.push({ ...ct, numberOfHits }); }); - const setResultsNavCount = useCallback((transcriptUrl) => { - const hitCounts = searchResults.counts; - if (!(hitCounts === undefined || hitCounts?.length === 0)) { - const currentCount = hitCounts.find(c => c.transcriptURL === transcriptUrl).numberOfHits; - setResultCount(currentCount); - } - }, []); - - console.log(canvasTranscriptsWithCount); - return { tanscriptHitCounts: canvasTranscriptsWithCount, resultCount, setResultsNavCount }; + return { tanscriptHitCounts: canvasTranscriptsWithCount }; }; export const useFocusedMatch = ({ searchResults }) => { @@ -245,6 +263,7 @@ export const useFocusedMatch = ({ searchResults }) => { setFocusedMatchIndex(null); } }, [searchResults.matchingIds]); + useEffect(() => { if (!searchResults.matchingIds.length && focusedMatchIndex !== null) { setFocusedMatchIndex(null); diff --git a/src/services/search.test.js b/src/services/search.test.js index 11c24667..a64d4cc0 100644 --- a/src/services/search.test.js +++ b/src/services/search.test.js @@ -1,7 +1,8 @@ import React, { useEffect } from 'react'; import { PlayerProvider } from '../context/player-context'; import { useFilteredTranscripts, defaultMatcherFactory } from './search'; -import { render, screen, waitFor } from '@testing-library/react'; +import { render, waitFor } from '@testing-library/react'; +import { ManifestProvider } from '../context/manifest-context'; const transcriptsFixture = [ { id: 0, text: 'The party has begun.' }, @@ -45,7 +46,9 @@ describe('useFilteredTranscripts', () => { }; const Component = ( - + + + ); return { resultRef, Component }; @@ -120,7 +123,7 @@ describe('useFilteredTranscripts', () => { expect(resultRef.current.matchingIds).toEqual([4, 1, 5, 7]); }); - }) + }); }); describe('default behavior', () => { test('when the search query is null, all results are returned with 0 matches', async () => { @@ -151,7 +154,7 @@ describe('useFilteredTranscripts', () => { 'I believe that on the first night I went to ', 'Gatsby', '\'s house' - ]) + ]); }); expect(resultRef.current.results[4].match).toEqual([ 'and somehow they ended up at ', diff --git a/src/services/transcript-parser.js b/src/services/transcript-parser.js index 94eee9fc..f4c85b30 100644 --- a/src/services/transcript-parser.js +++ b/src/services/transcript-parser.js @@ -763,7 +763,18 @@ function parseTimedTextLine({ times, line, tag }, isSRT) { return null; } } -export const parseContentSearchResponse = (response, query, trancripts) => { + +/** + * Parse the content search response from the search service, and then use it to calculate + * number of search hits for each transcripts, and create a list of matched transcript + * lines for the search in the current transcript + * @param {Object} response JSON response from content search API + * @param {String} query search query from transcript search + * @param {Array} trancripts content of the displayed transcript with ids + * @param {String} selectedTranscript url of the selected transcript + * @returns a list of matched transcript lines for the current search + */ +export const parseContentSearchResponse = (response, query, trancripts, selectedTranscript) => { if (!response || response === undefined) return []; let hitCounts = []; @@ -781,16 +792,32 @@ export const parseContentSearchResponse = (response, query, trancripts) => { searchHits.push({ target, targetURI, value }); }); } - for (const [key, value] of Object.entries(Object.groupBy(searchHits, ({ targetURI }) => targetURI))) { + // Group search responses by transcript + const allSearchHits = Object.groupBy(searchHits, ({ targetURI }) => targetURI); + + // Calculate search hit count for each transcript in the Canvas + for (const [key, value] of Object.entries(allSearchHits)) { hitCounts.push({ transcriptURL: key, numberOfHits: value.length }); } - const matchedTranscriptLines = getMatchedTranscriptLines(searchHits, query, trancripts); - return { matchedTranscriptLines, hitCounts }; + + // Get all the matching transcript lines with the query in the current transcript + const matchedTranscriptLines = getMatchedTranscriptLines(allSearchHits[selectedTranscript], query, trancripts); + return { matchedTranscriptLines, hitCounts, allSearchHits }; }; +/** + * Create a list matched transcript lines for the current search for the displayed transcript + * @param {Array} searchHits a list of matched transcript lines with ids from the current transcript + * @param {String} query search query + * @param {Array} transcripts list of all the transcript lines from the current transcript + * @returns a list of matched transcrip lines in the current transcript + */ export const getMatchedTranscriptLines = (searchHits, query, transcripts) => { const qStr = query.trim().toLocaleLowerCase(); let transcriptLines = []; + + if (searchHits === undefined) return; + searchHits.map((item) => { const { target, value } = item; // Read time offsets and text of the search hit @@ -829,6 +856,16 @@ export const getMatchedTranscriptLines = (searchHits, query, transcripts) => { return transcriptLines; }; +// FIXME:: When there are 2 hits in the same transcript text/cue, only the first +// match is highlighted. +/** + * Generic function to split the matched transcript text into 3 parts where the output is in + * the format [text before search query, search query, text after search query] + * @param {Number} offset character offset to the query string in the matched transcript text/cue + * @param {String} text matched transcript text/cue + * @param {String} query current search query + * @returns a list of parts of the given matched transcript text/cue + */ export const getMatchedParts = (offset, text, query) => { return [ text.slice(0, offset), diff --git a/src/services/transcript-parser.test.js b/src/services/transcript-parser.test.js index 9050a986..27401799 100644 --- a/src/services/transcript-parser.test.js +++ b/src/services/transcript-parser.test.js @@ -808,4 +808,59 @@ describe('transcript-parser', () => { }); }); }); + + describe('getMatchedTranscriptLines()', () => { + const transcripts = [ + { + id: 0, begin: 71.9, end: 82, tag: "TIMED_CUE", + text: 'Then, in the lunchroom, Mr. Bungle was so \rclumsy and impolite that he knocked over \reverything. And no one wanted to sit next \rto him.\r' + }, + { + id: 1, begin: 83.5, end: 89, tag: "TIMED_CUE", + text: 'And when he finally knocked his own tray \roff the table, that was the end of the puppet \rshow.\r' + }, + { + id: 2, begin: 90.3, end: 96.3, tag: "TIMED_CUE", + text: 'The children knew that even though Mr. Bungle \rwas funny to watch, he wouldn\'t be much fun \rto eat with.\r' + }, + { + id: 3, begin: 96.4, end: 102.5, tag: "TIMED_CUE", + text: 'Phil knew that a Mr. Bungle wouldn\'t have \rmany friends. He wouldn\'t want to be like \rMr. Bungle.\r' + }, + { + id: 4, begin: 103.9, end: 109.1, tag: "TIMED_CUE", + text: 'Later Miss Brown said it was time to for \rthe children who ate in the cafeteria to \rgo to lunch.\r' + }, + { + id: 5, begin: 109.2, end: 112.5, tag: "TIMED_CUE", + text: 'She hoped there weren\'t any Mr. Bungles in \rthis room.\r' + }, + { + id: 6, begin: 118.5, end: 123.2, tag: "TIMED_CUE", + text: 'Phil stopped to return a book to Miss Brown \rwhile his friends went on to the lunchroom.\r' + }, + ]; + const searchHits = [ + { + target: "http://example.com/canvas/1/transcript/1/transcripts#t=00:01:36.400,00:01:42.500", + targetURI: "http://example.com/canvas/1/transcript/1/transcripts", + value: "Phil knew that a Mr. Bungle wouldn't have many friends. He wouldn't want to be like Mr. Bungle." + }, + { + target: "http://example.com/canvas/1/transcript/1/transcripts#t=00:01:58.500,00:02:03.200", + targetURI: "http://example.com/canvas/1/transcrip/1/transcripts", + value: "Phil stopped to return a book to Miss Brown while his friends went on to the lunchroom." + }, + ]; + const matchedTranscriptLines = transcriptParser.getMatchedTranscriptLines(searchHits, 'phil', transcripts); + expect(matchedTranscriptLines).toHaveLength(2); + expect(matchedTranscriptLines[0]).toEqual({ + id: 3, + begin: 96.4, + end: 102.5, + tag: "TIMED_CUE", + text: "Phil knew that a Mr. Bungle wouldn't have many friends. He wouldn't want to be like Mr. Bungle.", + match: ["", "Phil", " knew that a Mr. Bungle wouldn't have many friends. He wouldn't want to be like Mr. Bungle."] + }); + }); }); diff --git a/src/test_data/lunchroom-manners.js b/src/test_data/lunchroom-manners.js index 8e556bfc..0037ed01 100644 --- a/src/test_data/lunchroom-manners.js +++ b/src/test_data/lunchroom-manners.js @@ -8,6 +8,12 @@ export default { label: { en: ['Beginning Responsibility: Lunchroom Manners'], }, + service: [ + { + type: "SearchService2", + id: "http://example.com/manifest/search" + } + ], metadata: [ { label: { none: ["Title"] }, diff --git a/src/test_data/transcript-annotation.js b/src/test_data/transcript-annotation.js index 47e20667..3f0ba5f8 100644 --- a/src/test_data/transcript-annotation.js +++ b/src/test_data/transcript-annotation.js @@ -116,6 +116,12 @@ export default { ], }, ], + service: [ + { + type: "SearchService2", + id: "http://example.com/sample/transcript-annotation/canvas/1/search" + } + ], }, { id: 'https://example.com/sample/transcript-annotation/canvas/2', @@ -161,6 +167,12 @@ export default { } ] }, + service: [ + { + type: "SearchService", + id: "http://example.com/sample/transcript-annotation/canvas/2/search" + } + ], }, ], structures: [ From 007039e70e32a27083f8a407c451b32648b796bf Mon Sep 17 00:00:00 2001 From: dwithana Date: Thu, 6 Jun 2024 16:35:40 -0700 Subject: [PATCH 5/5] Fix failing test --- .../Transcript/TranscriptMenu/TranscriptSelector.js | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/components/Transcript/TranscriptMenu/TranscriptSelector.js b/src/components/Transcript/TranscriptMenu/TranscriptSelector.js index f9c3254d..996af2c4 100644 --- a/src/components/Transcript/TranscriptMenu/TranscriptSelector.js +++ b/src/components/Transcript/TranscriptMenu/TranscriptSelector.js @@ -16,10 +16,6 @@ const TranscriptSelector = ({ selectTranscript(event.target.value); }; - const buildOptionText = (title, numberOfHits) => { - return `${title} (${numberOfHits ? numberOfHits : ''})`; - }; - if (transcriptData) { const result = [
{transcriptData.map((t, i) => ( ))}