Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve (and relax) search vs direct URL entry detection in Link Control #51011

Merged
merged 10 commits into from
Jun 6, 2023
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/**
* WordPress dependencies
*/
import { isURL } from '@wordpress/url';
import { getProtocol, isValidProtocol, isValidFragment } from '@wordpress/url';

/**
* Determines whether a given value could be a URL. Note this does not
Expand All @@ -15,6 +15,43 @@ import { isURL } from '@wordpress/url';
* @return {boolean} whether or not the value is potentially a URL.
*/
export default function isURLLike( val ) {
const isInternal = val?.startsWith( '#' );
return isURL( val ) || ( val && val.includes( 'www.' ) ) || isInternal;
const hasSpaces = val.includes( ' ' );

if ( hasSpaces ) {
return false;
}

const protocol = getProtocol( val );
const protocolIsValid = isValidProtocol( protocol );

const mayBeTLD = hasPossibleTLD( val );

const isWWW = val?.startsWith( 'www.' );

const isInternal = val?.startsWith( '#' ) && isValidFragment( val );

return protocolIsValid || isWWW || isInternal || mayBeTLD;
}

/**
* Checks if a given URL has a valid Top-Level Domain (TLD).
*
* @param {string} url - The URL to check.
* @param {number} maxLength - The maximum length of the TLD.
* @return {boolean} Returns true if the URL has a valid TLD, false otherwise.
*/
function hasPossibleTLD( url, maxLength = 6 ) {
getdave marked this conversation as resolved.
Show resolved Hide resolved
// Clean the URL by removing anything after the first occurrence of "?" or "#".
const cleanedURL = url.split( /[?#]/ )[ 0 ];

// Regular expression explanation:
// - (?<=\S) : Positive lookbehind assertion to ensure there is at least one non-whitespace character before the TLD
// - \. : Matches a literal dot (.)
// - [a-zA-Z_]{2,maxLength} : Matches 2 to maxLength letters or underscores, representing the TLD
// - (?:\/|$) : Non-capturing group that matches either a forward slash (/) or the end of the string
const regex = new RegExp(
`(?<=\\S)\\.(?:[a-zA-Z_]{2,${ maxLength }})(?:\\/|$)`
);

return regex.test( cleanedURL );
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/**
* Internal dependencies
*/
import isURLLike from '../is-url-like';

describe( 'isURLLike', () => {
it.each( [ 'https://wordpress.org', 'http://wordpress.org' ] )(
'returns true for a string that starts with an http(s) protocol',
( testString ) => {
expect( isURLLike( testString ) ).toBe( true );
}
);

it.each( [
'hello world',
'https:// has spaces even though starts with protocol',
'www. wordpress . org',
] )(
'returns false for any string with spaces (e.g. "%s")',
( testString ) => {
expect( isURLLike( testString ) ).toBe( false );
}
);

it( 'returns false for a string without a protocol or a TLD', () => {
expect( isURLLike( 'somedirectentryhere' ) ).toBe( false );
} );

it( 'returns true for a string beginning with www.', () => {
expect( isURLLike( 'www.wordpress.org' ) ).toBe( true );
} );

it.each( [ 'mailto:test@wordpress.org', 'tel:123456' ] )(
'returns true for common protocols',
( testString ) => {
expect( isURLLike( testString ) ).toBe( true );
}
);

it( 'returns true for internal anchor ("hash") links.', () => {
expect( isURLLike( '#someinternallink' ) ).toBe( true );
} );

// use .each to test multiple cases
it.each( [
[ true, 'http://example.com' ],
[ true, 'https://test.co.uk?query=param' ],
[ true, 'ftp://openai.ai?param=value#section' ],
[ true, 'example.com' ],
[ true, 'http://example.com?query=param#section' ],
[ true, 'https://test.co.uk/some/path' ],
[ true, 'ftp://openai.ai/some/path' ],
[ true, 'example.org/some/path' ],
[ true, 'example_test.tld' ],
[ true, 'example_test.com' ],
[ false, 'example' ],
[ false, '.com' ],
[ true, '_test.com' ],
[ true, 'http://example_test.com' ],
] )(
'returns %s when testing against string "%s" for a valid TLD',
( expected, testString ) => {
expect( isURLLike( testString ) ).toBe( expected );
}
);
} );
Original file line number Diff line number Diff line change
Expand Up @@ -51,46 +51,23 @@ const handleEntitySearch = async (
val,
suggestionsQuery,
fetchSearchSuggestions,
directEntryHandler,
withCreateSuggestion,
withURLSuggestion,
pageOnFront
) => {
const { isInitialSuggestions } = suggestionsQuery;
let resultsIncludeFrontPage = false;

let results = await Promise.all( [
fetchSearchSuggestions( val, suggestionsQuery ),
directEntryHandler( val ),
] );
const results = await fetchSearchSuggestions( val, suggestionsQuery );

// Identify front page and update type to match.
results[ 0 ] = results[ 0 ].map( ( result ) => {
results.map( ( result ) => {
if ( Number( result.id ) === pageOnFront ) {
resultsIncludeFrontPage = true;
result.isFrontPage = true;
return result;
}

return result;
} );

const couldBeURL = ! val.includes( ' ' );

// If it's potentially a URL search then concat on a URL search suggestion
// just for good measure. That way once the actual results run out we always
// have a URL option to fallback on.
if (
! resultsIncludeFrontPage &&
couldBeURL &&
withURLSuggestion &&
! isInitialSuggestions
) {
results = results[ 0 ].concat( results[ 1 ] );
} else {
results = results[ 0 ];
}

// If displaying initial suggestions just return plain results.
if ( isInitialSuggestions ) {
return results;
Expand Down Expand Up @@ -150,12 +127,18 @@ export default function useSearchHandler(
val,
{ ...suggestionsQuery, isInitialSuggestions },
fetchSearchSuggestions,
directEntryHandler,
withCreateSuggestion,
withURLSuggestion,
pageOnFront
);
},
[ directEntryHandler, fetchSearchSuggestions, withCreateSuggestion ]
[
directEntryHandler,
fetchSearchSuggestions,
pageOnFront,
suggestionsQuery,
withCreateSuggestion,
withURLSuggestion,
]
);
}