From a514a1a1591ff57dcf0a05d7e6f9897328befbbd Mon Sep 17 00:00:00 2001 From: Ryuu Mitsuki Date: Tue, 20 Aug 2024 18:12:44 +0700 Subject: [PATCH 1/5] feat(url-utils): Add `url-utils` module This module provides utility static class for working with YouTube URLs more easy and reliable, offering static methods for URL validation and video ID extraction. This module is aimed to improve the YouTube URL validation and more strict rather using only regular expression. --- lib/url-utils.js | 264 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 264 insertions(+) create mode 100644 lib/url-utils.js diff --git a/lib/url-utils.js b/lib/url-utils.js new file mode 100644 index 0000000..4486182 --- /dev/null +++ b/lib/url-utils.js @@ -0,0 +1,264 @@ +/** + * A module provides utilities for working with YouTube URLs. + * + * @module url-utils + * @requires utils + * @author Ryuu Mitsuki (https://github.com/mitsuki31) + * @license MIT + * @since 1.1.0 + */ + +'use strict'; + +const { isNullOrUndefined } = require('./utils'); + + +/** + * @classdesc Represents an error that occurred during video ID extraction. + * @extends {Error} + * @global + */ +class IDExtractorError extends Error {} + +/** + * @classdesc A static class that contains utilities for working with YouTube URLs. + * + * @class + * @hideconstructor + * @public + * @since 1.1.0 + */ +function URLUtils() { + throw new Error('Cannot create new instance of static class'); +} + +/** + * A list containing valid known YouTube domains. + * + * @type {Readonly} + * @static + * @readonly + * @public + */ +URLUtils.VALID_YOUTUBE_DOMAINS = Object.freeze([ + // ! NOTE: Any modification that affecting list orders will + // ! need to update the `BASIC_YOUTUBE_DOMAINS` property. + 'www.youtube.com', // Normal + 'm.youtube.com', // Normal (typically in YouTube mobile) + 'youtube.com', // Alternative (but will be redirected) + 'youtubekids.com', // YouTube Kids + 'music.youtube.com', // YouTube Music + 'gaming.youtube.com', // YouTube Gaming + 'youtu.be' // Shortened +]); + +/** + * A list containing YouTube domains that basically most used for downloading. + * + * @type {Readonly} + * @static + * @readonly + * @public + * @see {@link module:url-utils~URLUtils.VALID_YOUTUBE_DOMAINS URLUtils.VALID_YOUTUBE_DOMAINS} + */ +URLUtils.BASIC_YOUTUBE_DOMAINS = Object.freeze([ + ...URLUtils.VALID_YOUTUBE_DOMAINS.slice(0, 3), + URLUtils.VALID_YOUTUBE_DOMAINS[4], + URLUtils.VALID_YOUTUBE_DOMAINS[URLUtils.VALID_YOUTUBE_DOMAINS.length - 1] +]); + +/** + * Maximum length of YouTube video ID. + * + * According to YouTube API documentation V3, the `videoId` is a string but + * does not specified about the length of video IDs was used. However, communities + * says that YouTube video IDs have: + * + * - Exactly 11 characters. + * - Allowed characters and symbols: `[A-Za-z0-9_-]` + * + * More details: + * + * @type {number} + * @static + * @readonly + * @default + */ +URLUtils.MAX_ID_LENGTH = 0x0B; + +/** + * A regular expression for matching the YouTube video ID. + * + * This regular expression will match exactly 11 characters and can be more. + * If you want strictly parse the YouTube video ID, use {@link + * module:url-utils~URLUtils.VIDEO_ID_STRICT_REGEX `VIDEO_ID_STRICT_REGEX`} instead. + * + * @type {RegExp} + * @static + * @readonly + * @see {@link module:url-utils~URLUtils.VIDEO_ID_STRICT_REGEX URLUtils.VIDEO_ID_STRICT_REGEX} + * @see {@link module:url-utils~URLUtils.MAX_ID_LENGTH URLUtils.MAX_ID_LENGTH} + */ +URLUtils.VIDEO_ID_REGEX = new RegExp(`[A-Za-z0-9_-]{${URLUtils.MAX_ID_LENGTH}}`); + +/** + * A regular expression for strictly matching the YouTube video ID. + * + * @type {RegExp} + * @static + * @readonly + * @see {@link module:url-utils~URLUtils.VIDEO_ID_REGEX URLUtils.VIDEO_ID_REGEX} + * @see {@link module:url-utils~URLUtils.MAX_ID_LENGTH URLUtils.MAX_ID_LENGTH} + */ +URLUtils.VIDEO_ID_STRICT_REGEX = new RegExp(`^[A-Za-z0-9_-]{${URLUtils.MAX_ID_LENGTH}}$`); + +/** + * A regular expression for matching the YouTube video (excluding video ID). + * + * @type {RegExp} + * @static + * @readonly + * @see {@link module:url-utils~URLUtils.YTURL_WITH_ID_REGEX URLUtils.YTURL_WITH_ID_REGEX} + * @see {@link module:url-utils~URLUtils.BASIC_YOUTUBE_DOMAINS URLUtils.BASIC_YOUTUBE_DOMAINS} + */ +URLUtils.YTURL_REGEX = new RegExp(`https?:\\/\\/(${ + URLUtils.BASIC_YOUTUBE_DOMAINS.reduce((acc, val) => { + val = val.replace(/\./g, '\\.'); + acc += !(acc.length || 0) ? val : `|${val}`; + acc += (val !== 'youtu\\.be') ? '\\/watch\\?v=' : '\\/'; + return acc; + }, '') +})`); + +/** + * A regular expression for matching the YouTube video. + * + * @type {RegExp} + * @static + * @readonly + * @see {@link module:url-utils~URLUtils.YTURL_REGEX URLUtils.YTURL_REGEX} + * @see {@link module:url-utils~URLUtils.BASIC_YOUTUBE_DOMAINS URLUtils.BASIC_YOUTUBE_DOMAINS} + */ +URLUtils.YTURL_WITH_ID_REGEX = new RegExp( + `${URLUtils.YTURL_REGEX.source}${URLUtils.VIDEO_ID_REGEX.source}` +); + +/** + * Extracts the YouTube video ID from given YouTube URL. + * + * The YouTube video ID have exactly 11 characters with allowed + * symbols are `A-Z`, `a-z`, `0-9`, `_`, and `-`. + * + * Allowed YouTube domains to extract: + * - `www.youtube.com` + * - `m.youtube.com` + * - `youtube.com` + * - `youtu.be` + * - `music.youtube.com` + * + * @param {string | URL} url - The URL to be evaluate. + * @returns {string} A string with 11 characters representing the video ID. + * + * @throws {TypeError} If a given URL is not a string or `URL` object. + * @throws {URIError} If the host name of given URL is invalid YouTube domain. + * @throws {IDExtractorError} If the extractor unable to extract the video ID, + * this can be happen due to incorrect YouTube URL. + * + * @static + * @public + */ +URLUtils.extractVideoId = function (url) { + url = (typeof url === 'string') ? String.prototype.trim.apply(url) : url; + if (isNullOrUndefined(url) + // Only accept a URL with type either of string or URL object + || (url && typeof url !== 'string') + && (url && !(url instanceof URL)) + ) { + throw new TypeError(`Given URL is invalid type, got ${typeof url}`); + } + + const parsedUrl = (url instanceof URL) ? url : new URL(url); + let videoId; + + // Throw if the host name is not a valid YouTube domain + if (!isNullOrUndefined(parsedUrl.hostname) + && !URLUtils.VALID_YOUTUBE_DOMAINS.includes(parsedUrl.hostname)) { + throw new URIError( + `Not a valid YouTube domain: \x1b[33m${parsedUrl.hostname}\x1b[0m` + ); + } + + if (URLUtils.YTURL_WITH_ID_REGEX.test(url)) { + videoId = parsedUrl.searchParams.get('v'); // can be null + } + + if (isNullOrUndefined(videoId)) { + const paths = parsedUrl.pathname.split('/'); + videoId = (parsedUrl.hostname === 'youtu.be') ? paths[1] : paths[2]; + } + + // If still no video ID, throw an error + if (!URLUtils.validateId(videoId)) { + throw new IDExtractorError( + `Unable to extract video ID from URL: \x1b[33m${url}\x1b[0m` + ); + } + + return videoId; +}; + +/** + * Validates the given YouTube video ID. + * + * @param {string} id - The video ID to validate. + * @returns {boolean} `true` if the given ID correctly represents + * the YouTube video ID; otherwise `false`. + * + * @static + * @public + */ +URLUtils.validateId = function (id) { + id = (typeof id === 'string') ? String.prototype.trim.apply(id) : id; + if (isNullOrUndefined(id) || typeof id !== 'string') { + throw new TypeError(`Given ID is not a string, got ${typeof id}`); + } + return !isNullOrUndefined(id) && URLUtils.VIDEO_ID_STRICT_REGEX.test(id); +}; + +/** + * Validates the given YouTube URL and optionally validates its video ID. + * + * @param {string | URL} url - The YouTube URL to validate. + * @param {boolean} [withId=true] - Whether to also validate the video ID within the URL. + * If `false`, the function will only validate the URL's domain name. + * @returns {boolean} `true` if the given URL is a valid YouTube URL; otherwise `false`. + * + * @static + * @public + */ +URLUtils.validateUrl = function (url, withId=true) { + url = (typeof url === 'string') ? String.prototype.trim.apply(url) : url; + if (isNullOrUndefined(url) + // Only accept a URL with type either of string or URL object + || (url && typeof url !== 'string') + && (url && !(url instanceof URL)) + ) { + throw new TypeError(`Given URL is invalid type, got ${typeof url}`); + } + + let result = false; + const parsedUrl = (url instanceof URL) ? url : new URL(url); + + result = URLUtils.YTURL_REGEX.test(url); + if (withId) { + try { + result = result && !!URLUtils.extractVideoId(parsedUrl); + } catch (_err) { + result = result && false; + } + } + + return result; +}; + +module.exports = Object.freeze(URLUtils); From 0afa0ca0ba8e2819b375294022766d8f50270403 Mon Sep 17 00:00:00 2001 From: Ryuu Mitsuki Date: Tue, 20 Aug 2024 18:20:53 +0700 Subject: [PATCH 2/5] refactor(ytmp3): Use another URL validation Replace the regex-based validation to function-based instead (`URLUtils.validateUrl`). --- lib/ytmp3.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/ytmp3.js b/lib/ytmp3.js index 8e48e04..c6e9d7c 100644 --- a/lib/ytmp3.js +++ b/lib/ytmp3.js @@ -33,7 +33,7 @@ * @version 1.0.0 * @requires audioconv * @requires utils - * @requires yt-urlfmt + * @requires url-utils * @author Ryuu Mitsuki (https://github.com/mitsuki31) * @license MIT * @since 1.0.0 @@ -63,7 +63,7 @@ const { convertAudio, defaultOptions: defaultAudioConvOptions } = require('./audioconv'); -const { VIDEO: VIDEO_URL } = require('./yt-urlfmt'); +const URLUtils = require('./url-utils'); /** * The video information object. @@ -164,12 +164,12 @@ function validateYTURL(url, verbose=false) { throw new TypeError(`Invalid type of URL: ${typeof url}`); } - // Test that the given URL is valid and extract it - url = ((typeof url === 'string') ? (new URL(url)) : url).href; + // Parse the given URL string + url = (typeof url === 'string') ? new URL(url) : url; verbose && process.stdout.write( `${log.INFO_PREFIX} Validating URL, please wait...`); - if (VIDEO_URL.test(url) || ytdl.validateURL(url)) { + if (URLUtils.validateUrl(url)) { verbose && process.stdout.write( `\n${log.DONE_PREFIX} \x1b[92m\u2714\x1b[0m URL is valid\n`); } else { From 8f59aa3bf1b82a30dfbc3ec7050a428c1036a555 Mon Sep 17 00:00:00 2001 From: Ryuu Mitsuki Date: Tue, 20 Aug 2024 18:24:02 +0700 Subject: [PATCH 3/5] refactor(utils): Add alias and exports refactor * Added an alias to exports: `log` aliased from `logger` * Re-sorted the module's members and documentations --- lib/utils.js | 57 ++++++++++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/lib/utils.js b/lib/utils.js index 6288d8e..b66b79f 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -61,6 +61,7 @@ const path = require('node:path'); * @since 1.0.0 */ +// region Constants const FrozenProperty = { writable: false, @@ -76,6 +77,8 @@ const OUTDIR = path.join(ROOTDIR, 'download'); const LOGDIR = path.join(os.homedir(), '.ytmp3-js', 'logs'); +// region Utilities Function + /** * Synchronously checks whether the specified directory path is exist, * creates new if not exist with asynchronous operation. @@ -102,13 +105,6 @@ function createDirIfNotExistSync(dirpath) { if (!fs.existsSync(dirpath)) fs.mkdirSync(dirpath, { recursive: true }); } -/** - * **Logger Namespace** - * @namespace module:utils~Logger - * @public - * @since 1.0.0 - */ - /** * Checks if a given value is null or undefined. * @@ -144,6 +140,13 @@ function isObject(x) { ); } +/** + * **Logger Namespace** + * @namespace module:utils~Logger + * @public + * @since 1.0.0 + */ + /** * A custom logger object for the **YTMP3** project with ANSI color codes. * @@ -260,6 +263,24 @@ Object.defineProperties(logger, { } }); +/** + * Drops null and undefined values from the input object. + * + * @param {Object} obj - The input object to filter null and undefined values from. + * @return {Object} The filtered object without null and undefined values. + * + * @public + * @since 1.0.0 + */ +function dropNullAndUndefined(obj) { + return Object.keys(obj).reduce((acc, key) => { + if (!isNullOrUndefined(obj[key])) acc[key] = obj[key]; + return acc; + }, {}); +} + + +// region Utilities Class class ProgressBar { /** @@ -380,30 +401,14 @@ class ProgressBar { } -/** - * Drops null and undefined values from the input object. - * - * @param {Object} obj - The input object to filter null and undefined values from. - * @return {Object} The filtered object without null and undefined values. - * - * @public - * @since 1.0.0 - */ -function dropNullAndUndefined(obj) { - return Object.keys(obj).reduce((acc, key) => { - if (!isNullOrUndefined(obj[key])) acc[key] = obj[key]; - return acc; - }, {}); -} - - module.exports = Object.freeze({ ROOTDIR, OUTDIR, LOGDIR, logger, + log: logger, // alias for `logger` isNullOrUndefined, isObject, - ProgressBar, createDirIfNotExist, createDirIfNotExistSync, - dropNullAndUndefined + dropNullAndUndefined, + ProgressBar }); From 28520c0a00aa78469551249d3d07826e357c4394 Mon Sep 17 00:00:00 2001 From: Ryuu Mitsuki Date: Tue, 20 Aug 2024 20:51:37 +0700 Subject: [PATCH 4/5] feat(error): Add `error` module * Added `error` module containing all custom error classes * Moved the `IDExtractionError` and `UnknownOptionError` class to `error` module * Resolve unused variable within try-catch block in `URLUtils.validateUrl` method --- lib/config.js | 11 +---------- lib/error.js | 33 +++++++++++++++++++++++++++++++++ lib/url-utils.js | 9 ++------- 3 files changed, 36 insertions(+), 17 deletions(-) create mode 100644 lib/error.js diff --git a/lib/config.js b/lib/config.js index b005f00..ed6e66a 100644 --- a/lib/config.js +++ b/lib/config.js @@ -92,6 +92,7 @@ const { const { resolveOptions: resolveACOptions } = require('./audioconv'); +const { UnknownOptionError } = require('./error'); /** * An array containing all known configuration file's extension names. @@ -118,16 +119,6 @@ const KNOWN_OPTIONS = [ 'downloadOptions', 'audioConverterOptions' ]; */ const ERR_FORMAT = '%s\n\tat \x1b[90m%s\n\x1b[1;91m%s\x1b[0m\n'; -/** - * A class represents the error that occurred due to defining an unknown option - * in the configuration object and may throw during configuration validation. - * - * @public - * @extends Error - * @since 1.0.0 - */ -class UnknownOptionError extends Error {} - /** * Resolves the configuration for YTMP3-JS from a given configuration object. * diff --git a/lib/error.js b/lib/error.js new file mode 100644 index 0000000..4ebe246 --- /dev/null +++ b/lib/error.js @@ -0,0 +1,33 @@ +/** + * A module contains all custom error classes for **YTMP3-JS** project. + * + * @author Ryuu Mitsuki (https://github.com/mitsuki31) + * @license MIT + * @since 1.1.0 + */ + +'use strict'; + +/** + * @classdesc Represents an error that occurred during video ID extraction. + * + * @extends Error + * @global + * @since 1.1.0 + */ +class IDExtractorError extends Error {} + +/** + * @classdesc A class represents the error that occurred due to defining an unknown + * option in the configuration object and may throw during configuration validation. + * + * @extends Error + * @global + * @since 1.0.0 + */ +class UnknownOptionError extends Error {} + +exports = { + IDExtractorError, + UnknownOptionError +}; diff --git a/lib/url-utils.js b/lib/url-utils.js index 4486182..8ba7553 100644 --- a/lib/url-utils.js +++ b/lib/url-utils.js @@ -11,15 +11,9 @@ 'use strict'; const { isNullOrUndefined } = require('./utils'); +const { IDExtractorError } = require('./error'); -/** - * @classdesc Represents an error that occurred during video ID extraction. - * @extends {Error} - * @global - */ -class IDExtractorError extends Error {} - /** * @classdesc A static class that contains utilities for working with YouTube URLs. * @@ -253,6 +247,7 @@ URLUtils.validateUrl = function (url, withId=true) { if (withId) { try { result = result && !!URLUtils.extractVideoId(parsedUrl); + // eslint-disable-next-line no-unused-vars } catch (_err) { result = result && false; } From 6fb1ccd201dd33f4cec30145abafb57ce7f6bbb8 Mon Sep 17 00:00:00 2001 From: Ryuu Mitsuki Date: Tue, 20 Aug 2024 21:22:54 +0700 Subject: [PATCH 5/5] test(url-utils): Add suite test for `url-utils` module --- test/unittest/url-utils.spec.mjs | 114 +++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 test/unittest/url-utils.spec.mjs diff --git a/test/unittest/url-utils.spec.mjs b/test/unittest/url-utils.spec.mjs new file mode 100644 index 0000000..39c64ed --- /dev/null +++ b/test/unittest/url-utils.spec.mjs @@ -0,0 +1,114 @@ +import assert from 'node:assert'; + +import URLUtils from '../../lib/url-utils.js'; + +describe('module:url-utils', function () { + describe('.URLUtils', function () { + const testMessages = { + constructor: [ + 'should throw an error when attempt to create new instance' + ], + extractVideoId: [ + 'should return the video ID of given YouTube URL if it is valid', + 'should throw a `TypeError` if given URL is neither a string or URL object', + 'should throw a `IDExtractorError` if an incorrect YouTube URL are given', + "should throw a `URIError` if the given URL's domain is not a YouTube domain" + ], + validateUrl: [ + 'should return true if the given URL is valid', + "should return false if the given URL's domain name is not a YouTube domain", + 'should return false if the given URL is valid but with invalid video ID', + 'should return true if the given URL is valid but with invalid video ID and `withId` disabled', + 'should throw `TypeError` if the given URL is neither a string or URL object' + ], + validateId: [ + 'should return true if the given ID is valid', + 'should return false if the given ID is invalid', + 'should throw `TypeError` if the given ID is not a string' + ] + }; + + describe('#constructor', function () { + it(testMessages.constructor[0], function () { + assert.throws(() => URLUtils(), Error); + assert.throws(() => new URLUtils(), Error); + }); + }); + + describe('#extractVideoId', function () { + const id = 'abcdeQWERTY'; // Valid video ID always have 11 characters + const url = `https://www.youtube.com/watch?v=${id}`; + + it(testMessages.extractVideoId[0], function () { + assert.strictEqual(URLUtils.extractVideoId(url), id); + }); + + it(testMessages.extractVideoId[1], function () { + assert.throws(() => URLUtils.extractVideoId(123), TypeError); + assert.throws(() => URLUtils.extractVideoId([]), TypeError); + assert.throws(() => URLUtils.extractVideoId(true), TypeError); + assert.throws(() => URLUtils.extractVideoId(0n), TypeError); + assert.throws(() => URLUtils.extractVideoId(-Infinity), TypeError); + }); + + it(testMessages.extractVideoId[2], function () { + assert.throws(() => + URLUtils.extractVideoId('https://youtu.be/watch?v=abc'), 'IDExtractorError'); + }); + + it(testMessages.extractVideoId[3], function () { + assert.throws(() => URLUtils.extractVideoId('https://open.spotify.com'), URIError); + }); + }); + + describe('#validateUrl', function () { + const exampleValidUrl = 'https://m.youtube.com/watch?v=12345-abcde'; + const exampleInvalidUrl = 'https://youtu.be/12-=~56'; + + it(testMessages.validateUrl[0], function () { + assert.ok(URLUtils.validateUrl(exampleValidUrl)); + }); + + it(testMessages.validateUrl[1], function () { + assert.equal(URLUtils.validateUrl('https://www.google.com/'), false); + }); + + it(testMessages.validateUrl[2], function () { + assert.equal(URLUtils.validateUrl(exampleInvalidUrl), false); + }); + + it(testMessages.validateUrl[3], function () { + assert.ok(URLUtils.validateUrl(exampleInvalidUrl, false)); + }); + + it(testMessages.validateUrl[4], function () { + assert.throws(() => URLUtils.validateUrl(123), TypeError); + assert.throws(() => URLUtils.validateUrl([]), TypeError); + assert.throws(() => URLUtils.validateUrl(0n), TypeError); + assert.throws(() => URLUtils.validateUrl(-Infinity), TypeError); + assert.throws(() => URLUtils.validateUrl(/abc/), TypeError); + }); + }); + + describe('#validateId', function () { + const exampleValidId = '_1234-zxcvO'; // Valid ID always have 11 characters + const exampleInvalidId = '123~V_'; + + it(testMessages.validateId[0], function () { + assert.ok(URLUtils.validateId(exampleValidId)); + }); + + it(testMessages.validateId[1], function () { + assert.equal(URLUtils.validateId(exampleInvalidId), false); + }); + + it(testMessages.validateId[2], function () { + assert.throws(() => URLUtils.validateId(/_ba$/), TypeError); + assert.throws(() => URLUtils.validateId(0x12345), TypeError); + assert.throws(() => URLUtils.validateId(999n), TypeError); + assert.throws(() => URLUtils.validateId(Infinity), TypeError); + assert.throws(() => URLUtils.validateId({}), TypeError); + }); + }); + }); +});