From 43d532dc34f1493cc95f704a1a2e49c385fbe2c7 Mon Sep 17 00:00:00 2001 From: Herr Vigg Date: Sun, 13 Sep 2020 15:37:05 +0200 Subject: [PATCH] Extend language code format (#880) Fixes #836. Fixes partially #668. Major refactoring: language code format now handled with a unique regex. The new format allows 2 or 3-letter (ISO 639-2 and 639-3), lower case. Upper case values are only allowed for legacy codes but not for new entries. A migration of DB will be required before enforcing to lower case. URL checks remain case-insensitive (unchanged). --- admin/js/common.js | 9 +++---- admin/js/common.min.js | 2 +- admin/qtx_admin.php | 8 ++++--- admin/qtx_admin_options_update.php | 20 ++++++++++++---- admin/qtx_admin_settings.php | 10 +++++--- admin/qtx_admin_utils.php | 5 +++- admin/qtx_admin_utils_db.php | 14 +++++++---- modules/wp-seo/qwpseo-activation.php | 6 +++-- modules/wp-seo/qwpseo-admin.php | 3 ++- qtranslate.php | 2 +- qtranslate_core.php | 35 +++++++++++++++++----------- qtranslate_frontend.php | 3 ++- qtranslate_options.php | 3 +++ qtranslate_utils.php | 4 +++- 14 files changed, 83 insertions(+), 41 deletions(-) diff --git a/admin/js/common.js b/admin/js/common.js index 5d40d400..851ce5e4 100644 --- a/admin/js/common.js +++ b/admin/js/common.js @@ -32,7 +32,8 @@ var qTranslateConfig = window.qTranslateConfig; * since 3.2.7 */ qtranxj_get_split_blocks = function (text) { - var split_regex = /(||\[:[a-z]{2}]|\[:]|{:[a-z]{2}}|{:})/gi; + var regex = '(||\\[:lang]|\\[:]|{:lang}|{:})'.replace(/lang/g, qTranslateConfig.lang_code_format); + var split_regex = new RegExp(regex, "gi"); // Most browsers support RegExp.prototype[@@split]()... except IE if ('a~b'.split(/(~)/).length === 3) { @@ -81,9 +82,9 @@ qtranxj_split_blocks = function (blocks) { } return result; } - var clang_regex = //gi; - var blang_regex = /\[:([a-z]{2})]/gi; - var slang_regex = /{:([a-z]{2})}/gi; // @since 3.3.6 swirly brackets + var clang_regex = new RegExp(''.replace(/lang/g, qTranslateConfig.lang_code_format), 'gi'); + var blang_regex = new RegExp('\\[:(lang)]'.replace(/lang/g, qTranslateConfig.lang_code_format), 'gi'); + var slang_regex = new RegExp('{:(lang)}'.replace(/lang/g, qTranslateConfig.lang_code_format), 'gi'); var lang = false; var matches; for (var i = 0; i < blocks.length; ++i) { diff --git a/admin/js/common.min.js b/admin/js/common.min.js index 62254c46..e3e912de 100644 --- a/admin/js/common.min.js +++ b/admin/js/common.min.js @@ -1 +1 @@ -var qTranslateConfig=window.qTranslateConfig;function qtranxj_ce(n,t,e,a){var i=document.createElement(n);if(t)for(var s in t)i[s]=t[s];return e&&(a&&e.firstChild?e.insertBefore(i,e.firstChild):e.appendChild(i)),i}qtranxj_get_split_blocks=function(n){return n.xsplit(/(||\[:[a-z]{2}]|\[:]|{:[a-z]{2}}|{:})/gi)},qtranxj_split=function(n){var t=qtranxj_get_split_blocks(n);return qtranxj_split_blocks(t)},qtranxj_split_blocks=function(n){var t=new Object;for(var e in qTranslateConfig.language_config)t[e]="";if(!n||!n.length)return t;if(1===n.length){var a=n[0];for(var e in qTranslateConfig.language_config)t[e]+=a;return t}for(var i,s=//gi,o=/\[:([a-z]{2})]/gi,r=/{:([a-z]{2})}/gi,l=(e=!1,0);l
-

ISO Language Code for the Language you want to insert. (Example: en)', 'qtranslate' ) . '
' . __( 'The language code is used in language tags and in URLs. It is case sensitive. Use of lower case for the language code is preferable, but not required. The code may be arbitrary chosen by site owner, although it is preferable to use already commonly accepted code if available. Once a language code is created and entries for this language are made, it is difficult to change it, please make a careful decision.', 'qtranslate' ) ?>

+ size="3" maxlength="3"/> +

', '' ); + echo '
'; + echo _( 'The language code is used in language tags and in URLs. The code may be arbitrary chosen by site owner, although it is preferable to use already commonly accepted code if available. Once a language code is created and entries for this language are made, it is difficult to change it, please make a careful decision.', 'qtranslate' ) + ?>

@@ -346,7 +350,7 @@ private function add_general_section() { if ( $url_mode == QTX_URL_DOMAINS ) : ?>
$#ism", $block, $matches ) ) { + if ( preg_match( "#^$#ism", $block, $matches ) ) { $lang_closed = false; $lang = $matches[1]; $text .= '[:' . $lang . ']'; continue; - } elseif ( preg_match( "#^\[:([a-z]{2})]$#ism", $block, $matches ) ) { + } elseif ( preg_match( "#^\[:($lang_code)]$#ism", $block, $matches ) ) { $lang_closed = false; $lang = $matches[1]; $text .= '[:' . $lang . ']'; @@ -428,9 +430,11 @@ function qtranxf_extract_languages( $text, $lang2keep ) { $s = ''; $current_language = false; $eol = false; + $lang_code = QTX_LANG_CODE_FORMAT; + foreach ( $blocks as $block ) { // detect c-tags - if ( preg_match( "#^$#ism", $block, $matches ) ) { + if ( preg_match( "#^$#ism", $block, $matches ) ) { $current_language = $matches[1]; if ( isset( $lang2keep[ $current_language ] ) ) { $s .= $block; @@ -438,7 +442,7 @@ function qtranxf_extract_languages( $text, $lang2keep ) { } continue; // detect b-tags - } elseif ( preg_match( "#^\[:([a-z]{2})]$#ism", $block, $matches ) ) { + } elseif ( preg_match( "#^\[:($lang_code)]$#ism", $block, $matches ) ) { $current_language = $matches[1]; if ( isset( $lang2keep[ $current_language ] ) ) { $s .= $block; @@ -446,7 +450,7 @@ function qtranxf_extract_languages( $text, $lang2keep ) { } continue; // detect s-tags @since 3.3.6 swirly bracket encoding added - } elseif ( preg_match( "#^{:([a-z]{2})}$#ism", $block, $matches ) ) { + } elseif ( preg_match( "#^{:($lang_code)}$#ism", $block, $matches ) ) { $current_language = $matches[1]; if ( isset( $lang2keep[ $current_language ] ) ) { $s .= $block; diff --git a/modules/wp-seo/qwpseo-activation.php b/modules/wp-seo/qwpseo-activation.php index ab080209..77db657a 100644 --- a/modules/wp-seo/qwpseo-activation.php +++ b/modules/wp-seo/qwpseo-activation.php @@ -17,9 +17,11 @@ /* function qwpseo_set_encoding_s($value){ + $lang_code = QTX_LANG_CODE_FORMAT; + if(is_string($value)){ - $value = preg_replace('//ism', '{:$1}', $value); - $value = preg_replace('/\\[:([a-z]{2})\\]/ism', '{:$1}', $value); + $value = preg_replace('//ism', '{:$1}', $value); + $value = preg_replace('/\\[:($lang_code)\\]/ism', '{:$1}', $value); $value = preg_replace('/\\[:\\]|/ism', '{:}', $value); }elseif(is_array($value)){ foreach($value as $k => $v){ diff --git a/modules/wp-seo/qwpseo-admin.php b/modules/wp-seo/qwpseo-admin.php index 521f33ef..445c544a 100644 --- a/modules/wp-seo/qwpseo-admin.php +++ b/modules/wp-seo/qwpseo-admin.php @@ -228,7 +228,8 @@ function qwpseo_xmlsitemaps_config() { * @since 1.1 */ function qwpseo_encode_swirly( $value ) { - $value = preg_replace( '#\[:([a-z]{2}|)]#i', '{:$1}', $value ); + $lang_code = QTX_LANG_CODE_FORMAT; + $value = preg_replace( '#\[:($lang_code|)]#i', '{:$1}', $value ); return $value; } diff --git a/qtranslate.php b/qtranslate.php index 2a7aca5a..3baebd74 100644 --- a/qtranslate.php +++ b/qtranslate.php @@ -54,7 +54,7 @@ * Designed as interface for other plugin integration. The documentation is available at * https://github.com/qtranslate/qtranslate-xt/wiki/Integration-Guide/ */ -define( 'QTX_VERSION', '3.8.1' ); +define( 'QTX_VERSION', '3.9.0.dev.1' ); if ( ! defined( 'QTRANSLATE_FILE' ) ) { define( 'QTRANSLATE_FILE', __FILE__ ); diff --git a/qtranslate_core.php b/qtranslate_core.php index ec2fa35e..67504271 100644 --- a/qtranslate_core.php +++ b/qtranslate_core.php @@ -260,6 +260,7 @@ function qtranxf_parse_language_info( &$url_info, $link = false ) { return false; // url is not from this WP installation } + $lang_code = QTX_LANG_CODE_FORMAT; $doredirect = false; // parse URL lang @@ -267,11 +268,11 @@ function qtranxf_parse_language_info( &$url_info, $link = false ) { $url_mode = $q_config['url_mode']; switch ( $url_mode ) { case QTX_URL_PATH: - if ( ! empty( $url_info['wp-path'] ) && preg_match( '!^/([a-z]{2})(/|$)!i', $url_info['wp-path'], $match ) ) { + if ( ! empty( $url_info['wp-path'] ) && preg_match( "!^/($lang_code)(/|$)!i", $url_info['wp-path'], $match ) ) { $lang = qtranxf_resolveLangCase( $match[1], $doredirect ); if ( $lang ) { $url_info['lang_url'] = $lang; - $url_info['wp-path'] = substr( $url_info['wp-path'], 3 ); + $url_info['wp-path'] = substr( $url_info['wp-path'], strlen( $lang ) + 1 ); $url_info['doing_front_end'] = true; } } @@ -279,11 +280,11 @@ function qtranxf_parse_language_info( &$url_info, $link = false ) { case QTX_URL_DOMAIN: if ( ! empty( $url_info['host'] ) ) { - if ( preg_match( '#^([a-z]{2})\.#i', $url_info['host'], $match ) ) { + if ( preg_match( "#^($lang_code)\.#i", $url_info['host'], $match ) ) { $lang = qtranxf_resolveLangCase( $match[1], $doredirect ); if ( $lang ) { $url_info['lang_url'] = $lang; - $url_info['host'] = substr( $url_info['host'], 3 ); + $url_info['host'] = substr( $url_info['host'], strlen( $lang ) + 1 ); $url_info['doing_front_end'] = true; } } @@ -336,7 +337,7 @@ function qtranxf_parse_language_info( &$url_info, $link = false ) { } else if ( isset( $_POST['lang'] ) ) { $query_lang = qtranxf_resolveLangCase( $_POST['lang'], $doredirect ); } - } elseif ( ! empty( $url_info['query'] ) && preg_match( '/(^|&|&|&|\?)lang=([a-z]{2})/i', $url_info['query'], $match ) ) { + } elseif ( ! empty( $url_info['query'] ) && preg_match( '/(^|&|&|&|\?)lang=($lang_code)/i', $url_info['query'], $match ) ) { // checked for query mode, see https://github.com/qTranslate-Team/qtranslate-x/issues/288 $query_lang = qtranxf_resolveLangCase( $match[2], $doredirect ); } @@ -910,7 +911,8 @@ function qtranxf_url_del_language( &$urlinfo ) { switch ( $url_mode ) { case QTX_URL_PATH: // might already have language information - if ( ! empty( $urlinfo['wp-path'] ) && preg_match( '!^/([a-z]{2})(/|$)!i', $urlinfo['wp-path'], $match ) ) { + $lang_code = QTX_LANG_CODE_FORMAT; + if ( ! empty( $urlinfo['wp-path'] ) && preg_match( "!^/($lang_code)(/|$)!i", $urlinfo['wp-path'], $match ) ) { if ( qtranxf_isEnabled( $match[1] ) ) { // found language information, remove it $urlinfo['wp-path'] = substr( $urlinfo['wp-path'], 3 ); @@ -1154,7 +1156,8 @@ function qtranxf_convertURLs( $url, $lang = '', $forceadmin = false, $showDefaul * @since 3.3.6 swirly bracket encoding added */ function qtranxf_get_language_blocks( $text ) { - $split_regex = "#(||\[:[a-z]{2}\]|\[:\]|\{:[a-z]{2}\}|\{:\})#ism"; + $lang_code = QTX_LANG_CODE_FORMAT; + $split_regex = "#(||\[:$lang_code\]|\[:\]|\{:$lang_code\}|\{:\})#ism"; return preg_split( $split_regex, $text, - 1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE ); } @@ -1170,22 +1173,26 @@ function qtranxf_split( $text ) { **/ function qtranxf_split_blocks( $blocks, &$found = array() ) { global $q_config; + $result = array(); foreach ( $q_config['enabled_languages'] as $language ) { $result[ $language ] = ''; } + $current_language = false; + $lang_code = QTX_LANG_CODE_FORMAT; + foreach ( $blocks as $block ) { // detect c-tags - if ( preg_match( "#^$#ism", $block, $matches ) ) { + if ( preg_match( "#^$#ism", $block, $matches ) ) { $current_language = $matches[1]; continue; // detect b-tags - } elseif ( preg_match( "#^\[:([a-z]{2})]$#ism", $block, $matches ) ) { + } elseif ( preg_match( "#^\[:($lang_code)]$#ism", $block, $matches ) ) { $current_language = $matches[1]; continue; // detect s-tags @since 3.3.6 swirly bracket encoding added - } elseif ( preg_match( "#^{:([a-z]{2})}$#ism", $block, $matches ) ) { + } elseif ( preg_match( "#^{:($lang_code)}$#ism", $block, $matches ) ) { $current_language = $matches[1]; continue; } @@ -1226,17 +1233,19 @@ function qtranxf_split_blocks( $blocks, &$found = array() ) { function qtranxf_split_languages( $blocks ) { $result = array(); $current_language = false; + $lang_code = QTX_LANG_CODE_FORMAT; + foreach ( $blocks as $block ) { // detect c-tags - if ( preg_match( "#^$#ism", $block, $matches ) ) { + if ( preg_match( "#^$#ism", $block, $matches ) ) { $current_language = $matches[1]; continue; // detect b-tags - } elseif ( preg_match( "#^\[:([a-z]{2})]$#ism", $block, $matches ) ) { + } elseif ( preg_match( "#^\[:($lang_code)]$#ism", $block, $matches ) ) { $current_language = $matches[1]; continue; // detect s-tags @since 3.3.6 swirly bracket encoding added - } elseif ( preg_match( "#^{:([a-z]{2})}$#ism", $block, $matches ) ) { + } elseif ( preg_match( "#^{:($lang_code)}$#ism", $block, $matches ) ) { $current_language = $matches[1]; continue; } diff --git a/qtranslate_frontend.php b/qtranslate_frontend.php index 5735dcd4..578d39f3 100644 --- a/qtranslate_frontend.php +++ b/qtranslate_frontend.php @@ -882,7 +882,8 @@ function qtranxf_convertBlogInfoURL( $url, $what ) { * Moved here from qtranslate_hooks.php and modified. */ function qtranxf_pagenum_link( $url ) { - $url_fixed = preg_replace( '#\?lang=[a-z]{2}/#i', '/', $url ); //kind of ugly fix for function get_pagenum_link in /wp-includes/link-template.php. Maybe we should cancel filter 'bloginfo_url' instead? + $lang_code = QTX_LANG_CODE_FORMAT; + $url_fixed = preg_replace( "#\?lang=$lang_code/#i", '/', $url ); //kind of ugly fix for function get_pagenum_link in /wp-includes/link-template.php. Maybe we should cancel filter 'bloginfo_url' instead? return qtranxf_convertURL( $url_fixed ); } diff --git a/qtranslate_options.php b/qtranslate_options.php index 87f8e791..5e4b8c1d 100644 --- a/qtranslate_options.php +++ b/qtranslate_options.php @@ -51,6 +51,9 @@ define( 'QTX_IGNORE_FILE_TYPES', 'gif,jpg,jpeg,png,svg,pdf,swf,tif,rar,zip,7z,mpg,divx,mpeg,avi,css,js,mp3,mp4,apk' ); +// Language code format: ISO 639-1 (2 alpha), 639-2 or 639-3 (3 alpha) +define( 'QTX_LANG_CODE_FORMAT', '[a-z]{2,3}' ); + global $q_config; global $qtranslate_options; diff --git a/qtranslate_utils.php b/qtranslate_utils.php index deb8f804..f36d55eb 100644 --- a/qtranslate_utils.php +++ b/qtranslate_utils.php @@ -412,7 +412,9 @@ function qtranxf_external_host( $host ) { } function qtranxf_isMultilingual( $str ) { - return preg_match( '/|\[:[a-z]{2}]|{:[a-z]{2}}/im', $str ); + $lang_code = QTX_LANG_CODE_FORMAT; + + return preg_match( "/|\[:$lang_code]|{:$lang_code}/im", $str ); } function qtranxf_is_multilingual_deep( $value ) {