WordPress · scruffian · Jun 6, 2023 · May 26, 2023 · May 26, 2023 · May 26, 2023
@@ -1,7 +1,7 @@
 /**
  * WordPress dependencies
  */
-import { isURL } from '@wordpress/url';
+import { getProtocol, isValidProtocol, isValidFragment } from '@wordpress/url';
 
 /**
  * Determines whether a given value could be a URL. Note this does not
@@ -15,6 +15,43 @@ import { isURL } from '@wordpress/url';
  * @return {boolean} whether or not the value is potentially a URL.
  */
 export default function isURLLike( val ) {
-	const isInternal = val?.startsWith( '#' );
-	return isURL( val ) || ( val && val.includes( 'www.' ) ) || isInternal;
+	const hasSpaces = val.includes( ' ' );
+
+	if ( hasSpaces ) {
+		return false;
+	}
+
+	const protocol = getProtocol( val );
+	const protocolIsValid = isValidProtocol( protocol );
+
+	const mayBeTLD = hasPossibleTLD( val );
+
+	const isWWW = val?.startsWith( 'www.' );
+
+	const isInternal = val?.startsWith( '#' ) && isValidFragment( val );
+
+	return protocolIsValid || isWWW || isInternal || mayBeTLD;
+}
+
+/**
+ * Checks if a given URL has a valid Top-Level Domain (TLD).
+ *
+ * @param {string} url       - The URL to check.
+ * @param {number} maxLength - The maximum length of the TLD.
+ * @return {boolean} Returns true if the URL has a valid TLD, false otherwise.
+ */
+function hasPossibleTLD( url, maxLength = 6 ) {
+	// Clean the URL by removing anything after the first occurrence of "?" or "#".
+	const cleanedURL = url.split( /[?#]/ )[ 0 ];
+
+	// Regular expression explanation:
+	// - (?<=\S)                  : Positive lookbehind assertion to ensure there is at least one non-whitespace character before the TLD
+	// - \.                       : Matches a literal dot (.)
+	// - [a-zA-Z_]{2,maxLength}   : Matches 2 to maxLength letters or underscores, representing the TLD
+	// - (?:\/|$)                 : Non-capturing group that matches either a forward slash (/) or the end of the string
+	const regex = new RegExp(
+		`(?<=\\S)\\.(?:[a-zA-Z_]{2,${ maxLength }})(?:\\/|$)`
+	);
+
+	return regex.test( cleanedURL );
 }
@@ -0,0 +1,66 @@
+/**
+ * Internal dependencies
+ */
+import isURLLike from '../is-url-like';
+
+describe( 'isURLLike', () => {
+	it.each( [ 'https://wordpress.org', 'http://wordpress.org' ] )(
+		'returns true for a string that starts with an http(s) protocol',
+		( testString ) => {
+			expect( isURLLike( testString ) ).toBe( true );
+		}
+	);
+
+	it.each( [
+		'hello world',
+		'https://   has spaces even though starts with protocol',
+		'www. wordpress . org',
+	] )(
+		'returns false for any string with spaces (e.g. "%s")',
+		( testString ) => {
+			expect( isURLLike( testString ) ).toBe( false );
+		}
+	);
+
+	it( 'returns false for a string without a protocol or a TLD', () => {
+		expect( isURLLike( 'somedirectentryhere' ) ).toBe( false );
+	} );
+
+	it( 'returns true for a string beginning with www.', () => {
+		expect( isURLLike( 'www.wordpress.org' ) ).toBe( true );
+	} );
+
+	it.each( [ 'mailto:test@wordpress.org', 'tel:123456' ] )(
+		'returns true for common protocols',
+		( testString ) => {
+			expect( isURLLike( testString ) ).toBe( true );
+		}
+	);
+
+	it( 'returns true for internal anchor ("hash") links.', () => {
+		expect( isURLLike( '#someinternallink' ) ).toBe( true );
+	} );
+
+	// use .each to test multiple cases
+	it.each( [
+		[ true, 'http://example.com' ],
+		[ true, 'https://test.co.uk?query=param' ],
+		[ true, 'ftp://openai.ai?param=value#section' ],
+		[ true, 'example.com' ],
+		[ true, 'http://example.com?query=param#section' ],
+		[ true, 'https://test.co.uk/some/path' ],
+		[ true, 'ftp://openai.ai/some/path' ],
+		[ true, 'example.org/some/path' ],
+		[ true, 'example_test.tld' ],
+		[ true, 'example_test.com' ],
+		[ false, 'example' ],
+		[ false, '.com' ],
+		[ true, '_test.com' ],
+		[ true, 'http://example_test.com' ],
+	] )(
+		'returns %s when testing against string "%s" for a valid TLD',
+		( expected, testString ) => {
+			expect( isURLLike( testString ) ).toBe( expected );
+		}
+	);
+} );
@@ -51,46 +51,23 @@ const handleEntitySearch = async (
 	val,
 	suggestionsQuery,
 	fetchSearchSuggestions,
-	directEntryHandler,
 	withCreateSuggestion,
-	withURLSuggestion,
 	pageOnFront
 ) => {
 	const { isInitialSuggestions } = suggestionsQuery;
-	let resultsIncludeFrontPage = false;
 
-	let results = await Promise.all( [
-		fetchSearchSuggestions( val, suggestionsQuery ),
-		directEntryHandler( val ),
-	] );
+	const results = await fetchSearchSuggestions( val, suggestionsQuery );
 
 	// Identify front page and update type to match.
-	results[ 0 ] = results[ 0 ].map( ( result ) => {
+	results.map( ( result ) => {
 		if ( Number( result.id ) === pageOnFront ) {
-			resultsIncludeFrontPage = true;
 			result.isFrontPage = true;
 			return result;
 		}
 
 		return result;
 	} );
 
-	const couldBeURL = ! val.includes( ' ' );
-
-	// If it's potentially a URL search then concat on a URL search suggestion
-	// just for good measure. That way once the actual results run out we always
-	// have a URL option to fallback on.
-	if (
-		! resultsIncludeFrontPage &&
-		couldBeURL &&
-		withURLSuggestion &&
-		! isInitialSuggestions
-	) {
-		results = results[ 0 ].concat( results[ 1 ] );
-	} else {
-		results = results[ 0 ];
-	}
-
 	// If displaying initial suggestions just return plain results.
 	if ( isInitialSuggestions ) {
 		return results;
@@ -150,12 +127,18 @@ export default function useSearchHandler(
 						val,
 						{ ...suggestionsQuery, isInitialSuggestions },
 						fetchSearchSuggestions,
-						directEntryHandler,
 						withCreateSuggestion,
 						withURLSuggestion,
 						pageOnFront
 				  );
 		},
-		[ directEntryHandler, fetchSearchSuggestions, withCreateSuggestion ]
+		[
+			directEntryHandler,
+			fetchSearchSuggestions,
+			pageOnFront,
+			suggestionsQuery,
+			withCreateSuggestion,
+			withURLSuggestion,
+		]
 	);
 }