Skip to content

Commit

Permalink
Fix parsing for timestamps with commas as decimal seperators
Browse files Browse the repository at this point in the history
  • Loading branch information
Dananji committed Jul 2, 2024
1 parent 7465d96 commit a91f4d1
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 4 deletions.
8 changes: 6 additions & 2 deletions src/services/transcript-parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ const TRANSCRIPT_MIME_TYPES = {
docx: ['application/vnd.openxmlformats-officedocument.wordprocessingml.document']
};

export const VTT_TIMESTAMP_REGEX = /^(?:\d{2}:)?\d{2}:\d{2}(?:\.\d+)/g;
// SRT allows using comma for milliseconds while WebVTT does not
export const SRT_TIMESTAMP_REGEX = /^(?:\d{2}:)?\d{2}:\d{2}(?:[.,]\d+)/g;

const TRANSCRIPT_MIME_EXTENSIONS = [
{ type: TRANSCRIPT_MIME_TYPES.json, ext: 'json' },
{ type: TRANSCRIPT_MIME_TYPES.webvtt, ext: 'vtt' },
Expand Down Expand Up @@ -732,9 +736,9 @@ function parseTimedTextLine({ times, line, tag }, isSRT) {
let timestampRegex;
if (isSRT) {
// SRT allows using comma for milliseconds while WebVTT does not
timestampRegex = /^(?:\d{2}:)?\d{2}:\d{2}(?:[.,]\d+)/g;
timestampRegex = SRT_TIMESTAMP_REGEX;
} else {
timestampRegex = /^(?:\d{2}:)?\d{2}:\d{2}(?:\.\d+)/;
timestampRegex = VTT_TIMESTAMP_REGEX;
}

switch (tag) {
Expand Down
14 changes: 12 additions & 2 deletions src/services/utility-helpers.js
Original file line number Diff line number Diff line change
Expand Up @@ -253,11 +253,21 @@ export function getMediaFragment(uri, duration = 0) {
if (uri !== undefined) {
const fragment = uri.split('#t=')[1];
if (fragment !== undefined) {
let [start, end] = fragment.split(',');
let start, end;
/**
* If the times are in a string format (hh:mm:ss) check for comma seperated decimals.
* Some SRT captions use comma to seperate milliseconds.
*/
const timestampRegex = /([0-9]*:){1,2}([0-9]{2})(?:((\.|\,)[0-9]{2,3})?)/g;
if (fragment.includes(':') && [...fragment.matchAll(/\,/g)]?.length > 1) {
const times = [...fragment.matchAll(timestampRegex)];
[start, end] = times?.length == 2 ? [times[0][0], times[1][0]] : [0, 0];
} else {
[start, end] = fragment.split(',');
}
if (end === undefined) {
end = duration.toString();
}
let timestampRegex = /([0-9]*:){1,2}([0-9]{2})(?:(\.[0-9]{2,3})*)/g;
return {
start: start.match(timestampRegex) ? timeToS(start) : Number(start),
end: end.match(timestampRegex) ? timeToS(end) : Number(end)
Expand Down
32 changes: 32 additions & 0 deletions src/services/utility-helpers.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,45 @@ describe('util helper', () => {
});
});

it('returns time in seconds when hh:mm:ss,ms format time string is given', () => {
expect(util.getMediaFragment(
'http://example.com/sample/manifest/canvas#t=00:07:53,900,00:07:56,500'
)).toEqual({
start: 473.9, end: 476.5
});
});

it('returns time in seconds when hh:mm:ss format with mixed decimal formating is given', () => {
expect(util.getMediaFragment(
'http://example.com/sample/manifest/canvas#t=00:07:53.900,00:07:56,500'
)).toEqual({
start: 473.9, end: 476.5
});
});

it('returns time in seconds when hh:mm:ss format time string is given', () => {
expect(util.getMediaFragment(
'http://example.com/sample/manifest/canvas#t=00:07:53,00:07:56'
)).toEqual({
start: 473, end: 476
});
});

it('returns time in seconds when mm:ss,ms format time string is given', () => {
expect(util.getMediaFragment(
'http://example.com/sample/manifest/canvas#t=07:53,900,07:56,500'
)).toEqual({
start: 473.9, end: 476.5
});
});

it('returns time in seconds when mm:ss,ms format with mixed decimal formatting is given', () => {
expect(util.getMediaFragment(
'http://example.com/sample/manifest/canvas#t=07:53.900,07:56,500'
)).toEqual({
start: 473.9, end: 476.5
});
});
});

describe('getResourceItems()', () => {
Expand Down

0 comments on commit a91f4d1

Please sign in to comment.