Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor YouTube URL validator and introduce utility modules #29

Merged
merged 5 commits into from
Aug 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 1 addition & 10 deletions lib/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ const {
const {
resolveOptions: resolveACOptions
} = require('./audioconv');
const { UnknownOptionError } = require('./error');

/**
* An array containing all known configuration file's extension names.
Expand All @@ -118,16 +119,6 @@ const KNOWN_OPTIONS = [ 'downloadOptions', 'audioConverterOptions' ];
*/
const ERR_FORMAT = '%s\n\tat \x1b[90m%s\n\x1b[1;91m%s\x1b[0m\n';

/**
* A class represents the error that occurred due to defining an unknown option
* in the configuration object and may throw during configuration validation.
*
* @public
* @extends Error
* @since 1.0.0
*/
class UnknownOptionError extends Error {}

/**
* Resolves the configuration for YTMP3-JS from a given configuration object.
*
Expand Down
33 changes: 33 additions & 0 deletions lib/error.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/**
* A module contains all custom error classes for **YTMP3-JS** project.
*
* @author Ryuu Mitsuki (https://github.com/mitsuki31)
* @license MIT
* @since 1.1.0
*/

'use strict';

/**
* @classdesc Represents an error that occurred during video ID extraction.
*
* @extends Error
* @global
* @since 1.1.0
*/
class IDExtractorError extends Error {}

/**
* @classdesc A class represents the error that occurred due to defining an unknown
* option in the configuration object and may throw during configuration validation.
*
* @extends Error
* @global
* @since 1.0.0
*/
class UnknownOptionError extends Error {}

exports = {
IDExtractorError,
UnknownOptionError
};
259 changes: 259 additions & 0 deletions lib/url-utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
/**
* A module provides utilities for working with YouTube URLs.
*
* @module url-utils
* @requires utils
* @author Ryuu Mitsuki (https://github.com/mitsuki31)
* @license MIT
* @since 1.1.0
*/

'use strict';

const { isNullOrUndefined } = require('./utils');
const { IDExtractorError } = require('./error');


/**
* @classdesc A static class that contains utilities for working with YouTube URLs.
*
* @class
* @hideconstructor
* @public
* @since 1.1.0
*/
function URLUtils() {
throw new Error('Cannot create new instance of static class');
}

/**
* A list containing valid known YouTube domains.
*
* @type {Readonly<string[]>}
* @static
* @readonly
* @public
*/
URLUtils.VALID_YOUTUBE_DOMAINS = Object.freeze([
// ! NOTE: Any modification that affecting list orders will
// ! need to update the `BASIC_YOUTUBE_DOMAINS` property.
'www.youtube.com', // Normal
'm.youtube.com', // Normal (typically in YouTube mobile)
'youtube.com', // Alternative (but will be redirected)
'youtubekids.com', // YouTube Kids
'music.youtube.com', // YouTube Music
'gaming.youtube.com', // YouTube Gaming
'youtu.be' // Shortened
]);

/**
* A list containing YouTube domains that basically most used for downloading.
*
* @type {Readonly<string[]>}
* @static
* @readonly
* @public
* @see {@link module:url-utils~URLUtils.VALID_YOUTUBE_DOMAINS URLUtils.VALID_YOUTUBE_DOMAINS}
*/
URLUtils.BASIC_YOUTUBE_DOMAINS = Object.freeze([
...URLUtils.VALID_YOUTUBE_DOMAINS.slice(0, 3),
URLUtils.VALID_YOUTUBE_DOMAINS[4],
URLUtils.VALID_YOUTUBE_DOMAINS[URLUtils.VALID_YOUTUBE_DOMAINS.length - 1]
]);

/**
* Maximum length of YouTube video ID.
*
* According to YouTube API documentation V3, the `videoId` is a string but
* does not specified about the length of video IDs was used. However, communities
* says that YouTube video IDs have:
*
* - Exactly 11 characters.
* - Allowed characters and symbols: `[A-Za-z0-9_-]`
*
* More details: <https://webapps.stackexchange.com/a/101153>
*
* @type {number}
* @static
* @readonly
* @default
*/
URLUtils.MAX_ID_LENGTH = 0x0B;

/**
* A regular expression for matching the YouTube video ID.
*
* This regular expression will match exactly 11 characters and can be more.
* If you want strictly parse the YouTube video ID, use {@link
* module:url-utils~URLUtils.VIDEO_ID_STRICT_REGEX `VIDEO_ID_STRICT_REGEX`} instead.
*
* @type {RegExp}
* @static
* @readonly
* @see {@link module:url-utils~URLUtils.VIDEO_ID_STRICT_REGEX URLUtils.VIDEO_ID_STRICT_REGEX}
* @see {@link module:url-utils~URLUtils.MAX_ID_LENGTH URLUtils.MAX_ID_LENGTH}
*/
URLUtils.VIDEO_ID_REGEX = new RegExp(`[A-Za-z0-9_-]{${URLUtils.MAX_ID_LENGTH}}`);

/**
* A regular expression for strictly matching the YouTube video ID.
*
* @type {RegExp}
* @static
* @readonly
* @see {@link module:url-utils~URLUtils.VIDEO_ID_REGEX URLUtils.VIDEO_ID_REGEX}
* @see {@link module:url-utils~URLUtils.MAX_ID_LENGTH URLUtils.MAX_ID_LENGTH}
*/
URLUtils.VIDEO_ID_STRICT_REGEX = new RegExp(`^[A-Za-z0-9_-]{${URLUtils.MAX_ID_LENGTH}}$`);

/**
* A regular expression for matching the YouTube video (excluding video ID).
*
* @type {RegExp}
* @static
* @readonly
* @see {@link module:url-utils~URLUtils.YTURL_WITH_ID_REGEX URLUtils.YTURL_WITH_ID_REGEX}
* @see {@link module:url-utils~URLUtils.BASIC_YOUTUBE_DOMAINS URLUtils.BASIC_YOUTUBE_DOMAINS}
*/
URLUtils.YTURL_REGEX = new RegExp(`https?:\\/\\/(${
URLUtils.BASIC_YOUTUBE_DOMAINS.reduce((acc, val) => {
val = val.replace(/\./g, '\\.');
acc += !(acc.length || 0) ? val : `|${val}`;
acc += (val !== 'youtu\\.be') ? '\\/watch\\?v=' : '\\/';
return acc;
}, '')
})`);

/**
* A regular expression for matching the YouTube video.
*
* @type {RegExp}
* @static
* @readonly
* @see {@link module:url-utils~URLUtils.YTURL_REGEX URLUtils.YTURL_REGEX}
* @see {@link module:url-utils~URLUtils.BASIC_YOUTUBE_DOMAINS URLUtils.BASIC_YOUTUBE_DOMAINS}
*/
URLUtils.YTURL_WITH_ID_REGEX = new RegExp(
`${URLUtils.YTURL_REGEX.source}${URLUtils.VIDEO_ID_REGEX.source}`
);

/**
* Extracts the YouTube video ID from given YouTube URL.
*
* The YouTube video ID have exactly 11 characters with allowed
* symbols are `A-Z`, `a-z`, `0-9`, `_`, and `-`.
*
* Allowed YouTube domains to extract:
* - `www.youtube.com`
* - `m.youtube.com`
* - `youtube.com`
* - `youtu.be`
* - `music.youtube.com`
*
* @param {string | URL} url - The URL to be evaluate.
* @returns {string} A string with 11 characters representing the video ID.
*
* @throws {TypeError} If a given URL is not a string or `URL` object.
* @throws {URIError} If the host name of given URL is invalid YouTube domain.
* @throws {IDExtractorError} If the extractor unable to extract the video ID,
* this can be happen due to incorrect YouTube URL.
*
* @static
* @public
*/
URLUtils.extractVideoId = function (url) {
url = (typeof url === 'string') ? String.prototype.trim.apply(url) : url;
if (isNullOrUndefined(url)
// Only accept a URL with type either of string or URL object
|| (url && typeof url !== 'string')
&& (url && !(url instanceof URL))
) {
throw new TypeError(`Given URL is invalid type, got ${typeof url}`);
}

const parsedUrl = (url instanceof URL) ? url : new URL(url);
let videoId;

// Throw if the host name is not a valid YouTube domain
if (!isNullOrUndefined(parsedUrl.hostname)
&& !URLUtils.VALID_YOUTUBE_DOMAINS.includes(parsedUrl.hostname)) {
throw new URIError(
`Not a valid YouTube domain: \x1b[33m${parsedUrl.hostname}\x1b[0m`
);
}

if (URLUtils.YTURL_WITH_ID_REGEX.test(url)) {
videoId = parsedUrl.searchParams.get('v'); // can be null
}

if (isNullOrUndefined(videoId)) {
const paths = parsedUrl.pathname.split('/');
videoId = (parsedUrl.hostname === 'youtu.be') ? paths[1] : paths[2];
}

// If still no video ID, throw an error
if (!URLUtils.validateId(videoId)) {
throw new IDExtractorError(
`Unable to extract video ID from URL: \x1b[33m${url}\x1b[0m`
);
}

return videoId;
};

/**
* Validates the given YouTube video ID.
*
* @param {string} id - The video ID to validate.
* @returns {boolean} `true` if the given ID correctly represents
* the YouTube video ID; otherwise `false`.
*
* @static
* @public
*/
URLUtils.validateId = function (id) {
id = (typeof id === 'string') ? String.prototype.trim.apply(id) : id;
if (isNullOrUndefined(id) || typeof id !== 'string') {
throw new TypeError(`Given ID is not a string, got ${typeof id}`);
}
return !isNullOrUndefined(id) && URLUtils.VIDEO_ID_STRICT_REGEX.test(id);
};

/**
* Validates the given YouTube URL and optionally validates its video ID.
*
* @param {string | URL} url - The YouTube URL to validate.
* @param {boolean} [withId=true] - Whether to also validate the video ID within the URL.
* If `false`, the function will only validate the URL's domain name.
* @returns {boolean} `true` if the given URL is a valid YouTube URL; otherwise `false`.
*
* @static
* @public
*/
URLUtils.validateUrl = function (url, withId=true) {
url = (typeof url === 'string') ? String.prototype.trim.apply(url) : url;
if (isNullOrUndefined(url)
// Only accept a URL with type either of string or URL object
|| (url && typeof url !== 'string')
&& (url && !(url instanceof URL))
) {
throw new TypeError(`Given URL is invalid type, got ${typeof url}`);
}

let result = false;
const parsedUrl = (url instanceof URL) ? url : new URL(url);

result = URLUtils.YTURL_REGEX.test(url);
if (withId) {
try {
result = result && !!URLUtils.extractVideoId(parsedUrl);
// eslint-disable-next-line no-unused-vars
} catch (_err) {
result = result && false;
}
}

return result;
};

module.exports = Object.freeze(URLUtils);
Loading