Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change parser to use http response info to distinguish file types #111

Merged
merged 1 commit into from
Dec 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/build/bundle.1cbd777f.js

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ object-assign

/*! @name @videojs/http-streaming @version 2.2.4 @license Apache-2.0 */

/*! @name @videojs/vhs-utils @version 1.3.0 @license MIT */

/*! @name @videojs/vhs-utils @version 2.2.1 @license MIT */

/*! @name aes-decrypter @version 3.1.0 @license Apache-2.0 */
Expand Down
2 changes: 0 additions & 2 deletions docs/build/bundle.7863e271.js

This file was deleted.

2 changes: 1 addition & 1 deletion docs/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@
</head>
<body>
<div id="rsg-root"></div>
<script src="build/bundle.7863e271.js"></script>
<script src="build/bundle.1cbd777f.js"></script>
</body>
</html>
100 changes: 59 additions & 41 deletions src/services/transcript-parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,37 +19,54 @@ export async function parseTranscriptData(url, canvasIndex) {
if (!url) {
return null;
}
const isValid =
url.match(
/(http(s)?:\/\/.)[-a-zA-Z0-9.:]*\/(.*\/\.html|.*\.txt|.*\.json|.*\.vtt|.*\.[a-zA-z])/g
) !== null;

if (!isValid) {
// validate url
let newUrl = '';
try {
newUrl = new URL(url);
} catch (_) {
return null;
}

let extension = url.split('.').reverse()[0];
// Use .doc extension for both .docx and .doc for each of understanding
extension = extension == 'docx' || extension == 'doc' ? 'doc' : extension;
let fileType = null;
let fileData = null;

switch (extension) {
case 'json':
let jsonData = await fetchJSONFile(tUrl);
// get file type
await fetch(url).then((response) => {
fileType = response.headers.get('Content-Type');
fileData = response;
});

switch (fileType.split(';')[0]) {
case 'application/json':
let jsonData = await fileData.json();
let manifest = parseManifest(jsonData);
if (manifest) {
return parseManifestTranscript(jsonData, url, canvasIndex);
} else {
tData = parseJSONData(jsonData);
return { tData, tUrl };
}
case 'vtt':
tData = await parseWebVTT(url);
return { tData, tUrl: url };
case 'txt':
tData = fetchTextFile(url);
return { tData: null, tUrl: url };
case 'doc':
tData = await parseWordFile(url);
case 'text/plain':
let textData = await fileData.text();
let textLines = textData.split('\n');
if (textLines.length == 0) {
return { tData: [], tUrl: url };
}
const isWebVTT = validateWebVTT(textLines[0]);
if (isWebVTT) {
tData = parseWebVTT(textData);
return { tData, tUrl: url };
} else {
return { tData: null, tUrl: url };
}
// for .doc files
case 'application/msword':
tData = await parseWordFile(fileData);
return { tData: [tData], tUrl: url };
// for .docx files
case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
tData = await parseWordFile(fileData);
return { tData: [tData], tUrl: url };
default:
return { tData: [], tUrl: url };
Expand All @@ -62,9 +79,8 @@ export async function parseTranscriptData(url, canvasIndex) {
* @param {String} url url of the word document
* @returns {Array} html markdown for the word document contents
*/
async function parseWordFile(url) {
async function parseWordFile(response) {
let tData = null;
const response = await fetch(url);
const data = await response.blob();
let arrayBuffer = new File([data], name, {
type: response.headers.get('content-type'),
Expand Down Expand Up @@ -170,7 +186,9 @@ async function parseExternalAnnotations(annotation) {
/** When external file contains text data */
if (tType === 'Text') {
if (tBody.getFormat() === 'text/vtt') {
tData = await parseWebVTT(tUrl);
await fetch(tUrl)
.then((response) => response.text())
.then((data) => (tData = parseWebVTT(data)));
} else {
await fetch(tUrl)
.then((response) => response.text())
Expand Down Expand Up @@ -281,26 +299,26 @@ function createTData(annotations) {
* text: 'Transcript text sample'
* }
*/
export async function parseWebVTT(fileURL) {
export function parseWebVTT(fileData) {
let tData = [];
await fetch(fileURL)
.then((response) => response.text())
.then((data) => {
const lines = cleanWebVTT(data);
let firstLine = lines.shift();
const valid = validateWebVTT(firstLine);
if (!valid) {
console.error('Invalid WebVTT file');
return;
}
const groups = groupWebVTTLines(lines);
groups.map((t) => {
let line = parseWebVTTLine(t);
if (line) {
tData.push(line);
}
});
});
// await fetch(fileURL)
// .then((response) => response.text())
// .then((data) => {
const lines = cleanWebVTT(fileData);
const firstLine = lines.shift();
const valid = validateWebVTT(firstLine);
if (!valid) {
console.error('Invalid WebVTT file');
return [];
}
const groups = groupWebVTTLines(lines);
groups.map((t) => {
let line = parseWebVTTLine(t);
if (line) {
tData.push(line);
}
});
// });
return tData;
}

Expand Down
Loading