Skip to content

Commit

Permalink
Extend language code format (#880)
Browse files Browse the repository at this point in the history
Fixes #836.  Fixes partially #668.
Major refactoring: language code format now handled with a unique regex.
The new format allows 2 or 3-letter (ISO 639-2 and 639-3), lower case.
Upper case values are only allowed for legacy codes but not for new entries.
A migration of DB will be required before enforcing to lower case.
URL checks remain case-insensitive (unchanged).
  • Loading branch information
herrvigg authored Sep 13, 2020
1 parent 91c51ac commit 43d532d
Show file tree
Hide file tree
Showing 14 changed files with 83 additions and 41 deletions.
9 changes: 5 additions & 4 deletions admin/js/common.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ var qTranslateConfig = window.qTranslateConfig;
* since 3.2.7
*/
qtranxj_get_split_blocks = function (text) {
var split_regex = /(<!--:[a-z]{2}-->|<!--:-->|\[:[a-z]{2}]|\[:]|{:[a-z]{2}}|{:})/gi;
var regex = '(<!--:lang-->|<!--:-->|\\[:lang]|\\[:]|{:lang}|{:})'.replace(/lang/g, qTranslateConfig.lang_code_format);
var split_regex = new RegExp(regex, "gi");

// Most browsers support RegExp.prototype[@@split]()... except IE
if ('a~b'.split(/(~)/).length === 3) {
Expand Down Expand Up @@ -81,9 +82,9 @@ qtranxj_split_blocks = function (blocks) {
}
return result;
}
var clang_regex = /<!--:([a-z]{2})-->/gi;
var blang_regex = /\[:([a-z]{2})]/gi;
var slang_regex = /{:([a-z]{2})}/gi; // @since 3.3.6 swirly brackets
var clang_regex = new RegExp('<!--:(lang)-->'.replace(/lang/g, qTranslateConfig.lang_code_format), 'gi');
var blang_regex = new RegExp('\\[:(lang)]'.replace(/lang/g, qTranslateConfig.lang_code_format), 'gi');
var slang_regex = new RegExp('{:(lang)}'.replace(/lang/g, qTranslateConfig.lang_code_format), 'gi');
var lang = false;
var matches;
for (var i = 0; i < blocks.length; ++i) {
Expand Down
2 changes: 1 addition & 1 deletion admin/js/common.min.js

Large diffs are not rendered by default.

8 changes: 5 additions & 3 deletions admin/qtx_admin.php
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ function qtranxf_collect_translations_posted() {
}
qtranxf_collect_translations( $request['qtranslate-fields'], $request, $edit_lang );
unset( $request['qtranslate-fields'] );
$url_encoded = http_build_query( $request );
$url_encoded = http_build_query( $request );
$_REQUEST[ $name ] = $url_encoded;
if ( isset( $_POST[ $name ] ) ) {
$_POST[ $name ] = $url_encoded;
Expand Down Expand Up @@ -226,7 +226,7 @@ function qtranxf_get_admin_page_config() {
*/
$admin_config = apply_filters( 'i18n_admin_config', $admin_config );

$page_configs = qtranxf_parse_page_config( $admin_config, $pagenow, $url_query );
$page_configs = qtranxf_parse_page_config( $admin_config, $pagenow, $url_query );
$q_config['i18n-cache']['admin_page_configs'] = $page_configs;

return $page_configs;
Expand Down Expand Up @@ -363,7 +363,7 @@ function qtranxf_get_admin_page_config_post_type( $post_type ) {
if ( file_exists( $src ) ) {
// absolute path was given
if ( qtranxf_startsWith( $src, $content_dir ) ) {
$fp = substr( $src, strlen( $content_dir ) );
$fp = substr( $src, strlen( $content_dir ) );
$page_config['js'][ $key ]['src'] = $fp;
continue;
}
Expand Down Expand Up @@ -459,6 +459,8 @@ function qtranxf_add_admin_footer_js() {
$config['hide_lsb_copy_content'] = true;
}

$config['lang_code_format'] = QTX_LANG_CODE_FORMAT;

// Last chance to customize Java script variable qTranslateConfig
$config = apply_filters( 'qtranslate_admin_page_config', $config );

Expand Down
20 changes: 15 additions & 5 deletions admin/qtx_admin_options_update.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ function qtranxf_editConfig() {
}

$errors = &$q_config['url_info']['errors'];
$warnings = &$q_config['url_info']['warnings'];
$messages = &$q_config['url_info']['messages'];

$q_config['posted'] = array();
Expand Down Expand Up @@ -58,8 +59,13 @@ function qtranxf_editConfig() {
if ( $_POST['language_name'] == '' ) {
$errors[] = __( 'The Language must have a name!', 'qtranslate' );
}
if ( strlen( $lang ) != 2 ) {
$errors[] = __( 'Language Code has to be 2 characters long!', 'qtranslate' );
if ( ! preg_match( '/^' . QTX_LANG_CODE_FORMAT . '$/', $lang ) ) {
// TODO: still allow 2-letter upper case for existing values, keep only case-sensitive check once legacy fixed in DB
if ( ! empty ( $original_lang ) && $lang === $original_lang && preg_match( '/^[a-z]{2}$/i', $lang ) ) {
$warnings[] = sprintf( _( 'The 2-letter language code "%s" should be lower case (ISO 639-1). Upper case is still allowed for legacy codes but not for new entries.', 'qtranslate' ), $lang );
} else {
$errors[] = __( 'Invalid language code!', 'qtranslate' );
}
}
$langs = array();
qtranxf_load_languages( $langs );
Expand Down Expand Up @@ -195,9 +201,13 @@ function qtranxf_editConfig() {
}
} elseif ( isset( $_GET['edit'] ) ) {
$lang = sanitize_text_field( $_GET['edit'] );
$lang = preg_replace( '/[^a-z]/i', '', $lang );
if ( strlen( $lang ) != 2 ) {
$errors[] = __( 'Language Code has to be 2 characters long!', 'qtranslate' );
if ( ! preg_match( '/^' . QTX_LANG_CODE_FORMAT . '$/', $lang ) ) {
// TODO: still allow 2-letter upper case for existing values, keep only case-sensitive check once legacy fixed in DB
if ( preg_match( '/^[a-z]{2}$/i', $lang ) ) {
$warnings[] = sprintf( _( 'The 2-letter language code "%s" should be lower case (ISO 639-1). Upper case is still allowed for legacy codes but not for new entries.', 'qtranslate' ), $lang );
} else {
$errors[] = __( 'Invalid language code!', 'qtranslate' );
}
}
$original_lang = $lang;
$language_code = $lang;
Expand Down
10 changes: 7 additions & 3 deletions admin/qtx_admin_settings.php
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,12 @@ class="add:the-list: validate">
<div class="form-field">
<label for="language_code"><?php _e( 'Language Code', 'qtranslate' ) ?><br/></label>
<input name="language_code" id="language_code" type="text" value="<?php echo $language_code; ?>"
size="2" maxlength="2"/>
<p class="qtranxs-notes"><?php echo __( '2-Letter <a href="http://www.w3.org/WAI/ER/IG/ert/iso639.htm#2letter">ISO Language Code</a> for the Language you want to insert. (Example: en)', 'qtranslate' ) . '<br/>' . __( 'The language code is used in language tags and in URLs. It is case sensitive. Use of lower case for the language code is preferable, but not required. The code may be arbitrary chosen by site owner, although it is preferable to use already commonly accepted code if available. Once a language code is created and entries for this language are made, it is difficult to change it, please make a careful decision.', 'qtranslate' ) ?></p>
size="3" maxlength="3"/>
<p class="qtranxs-notes"><?php
printf( _( 'Language %sISO 639 code%s, two-letter (ISO 639-1) or three-letter (ISO 639-2 and 639-3), lower case. (Examples: en, fr, zh, nds)', 'qtranslate' ), '<a href="https://en.wikipedia.org/wiki/ISO_639">', '</a>' );
echo '<br/>';
echo _( 'The language code is used in language tags and in URLs. The code may be arbitrary chosen by site owner, although it is preferable to use already commonly accepted code if available. Once a language code is created and entries for this language are made, it is difficult to change it, please make a careful decision.', 'qtranslate' )
?></p>
</div>
<div class="form-field">
<label for="language_flag"><?php _e( 'Flag', 'qtranslate' ) ?></label>
Expand Down Expand Up @@ -346,7 +350,7 @@ private function add_general_section() {
if ( $url_mode == QTX_URL_DOMAINS ) : ?>
<div style="margin: 10px 0">
<?php
$home_info = qtranxf_get_home_info();
$home_info = qtranxf_get_home_info();
$home_host = $home_info['host'];
foreach ( $q_config['enabled_languages'] as $lang ) {
$id = 'language_domain_' . $lang;
Expand Down
5 changes: 4 additions & 1 deletion admin/qtx_admin_utils.php
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,11 @@ function qtranxf_detect_admin_language( $url_info ) {
* Detect language from $_POST['WPLANG'].
*/
if ( isset( $_POST['WPLANG'] ) ) {
// User is switching the language using "Site Language" field on page /wp-admin/options-general.php
// User is switching the language in WordPress using "Site Language" field on page /wp-admin/options-general.php
// The value of WPLANG corresponds to a WP locale such as fr_FR or empty for default (en_US)
$wplang = sanitize_text_field( $_POST['WPLANG'] );
if ( empty( $wplang ) ) {
// TODO check for default locale other than en_US in WordPress
$wplang = 'en';
}
foreach ( $q_config['enabled_languages'] as $language ) {
Expand All @@ -93,6 +95,7 @@ function qtranxf_detect_admin_language( $url_info ) {
break;
}
if ( ! $lang ) {
// TODO extend language code check and resolve, in case the WP locale is not enabled in qTranslate
$lang = substr( $wplang, 0, 2 );
$lang = qtranxf_resolveLangCase( $lang, $cs );
}
Expand Down
14 changes: 9 additions & 5 deletions admin/qtx_admin_utils_db.php
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,15 @@ function qtranxf_convert_to_b( $text ) {
$text = '';
$lang = false;
$lang_closed = true;
$lang_code = QTX_LANG_CODE_FORMAT;

foreach ( $blocks as $block ) {
if ( preg_match( "#^<!--:([a-z]{2})-->$#ism", $block, $matches ) ) {
if ( preg_match( "#^<!--:($lang_code)-->$#ism", $block, $matches ) ) {
$lang_closed = false;
$lang = $matches[1];
$text .= '[:' . $lang . ']';
continue;
} elseif ( preg_match( "#^\[:([a-z]{2})]$#ism", $block, $matches ) ) {
} elseif ( preg_match( "#^\[:($lang_code)]$#ism", $block, $matches ) ) {
$lang_closed = false;
$lang = $matches[1];
$text .= '[:' . $lang . ']';
Expand Down Expand Up @@ -428,25 +430,27 @@ function qtranxf_extract_languages( $text, $lang2keep ) {
$s = '';
$current_language = false;
$eol = false;
$lang_code = QTX_LANG_CODE_FORMAT;

foreach ( $blocks as $block ) {
// detect c-tags
if ( preg_match( "#^<!--:([a-z]{2})-->$#ism", $block, $matches ) ) {
if ( preg_match( "#^<!--:($lang_code)-->$#ism", $block, $matches ) ) {
$current_language = $matches[1];
if ( isset( $lang2keep[ $current_language ] ) ) {
$s .= $block;
$eol = true;
}
continue;
// detect b-tags
} elseif ( preg_match( "#^\[:([a-z]{2})]$#ism", $block, $matches ) ) {
} elseif ( preg_match( "#^\[:($lang_code)]$#ism", $block, $matches ) ) {
$current_language = $matches[1];
if ( isset( $lang2keep[ $current_language ] ) ) {
$s .= $block;
$eol = true;
}
continue;
// detect s-tags @since 3.3.6 swirly bracket encoding added
} elseif ( preg_match( "#^{:([a-z]{2})}$#ism", $block, $matches ) ) {
} elseif ( preg_match( "#^{:($lang_code)}$#ism", $block, $matches ) ) {
$current_language = $matches[1];
if ( isset( $lang2keep[ $current_language ] ) ) {
$s .= $block;
Expand Down
6 changes: 4 additions & 2 deletions modules/wp-seo/qwpseo-activation.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@

/*
function qwpseo_set_encoding_s($value){
$lang_code = QTX_LANG_CODE_FORMAT;
if(is_string($value)){
$value = preg_replace('/<!--:([a-z]{2})-->/ism', '{:$1}', $value);
$value = preg_replace('/\\[:([a-z]{2})\\]/ism', '{:$1}', $value);
$value = preg_replace('/<!--:($lang_code)-->/ism', '{:$1}', $value);
$value = preg_replace('/\\[:($lang_code)\\]/ism', '{:$1}', $value);
$value = preg_replace('/\\[:\\]|<!--:-->/ism', '{:}', $value);
}elseif(is_array($value)){
foreach($value as $k => $v){
Expand Down
3 changes: 2 additions & 1 deletion modules/wp-seo/qwpseo-admin.php
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,8 @@ function qwpseo_xmlsitemaps_config() {
* @since 1.1
*/
function qwpseo_encode_swirly( $value ) {
$value = preg_replace( '#\[:([a-z]{2}|)]#i', '{:$1}', $value );
$lang_code = QTX_LANG_CODE_FORMAT;
$value = preg_replace( '#\[:($lang_code|)]#i', '{:$1}', $value );

return $value;
}
Expand Down
2 changes: 1 addition & 1 deletion qtranslate.php
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
* Designed as interface for other plugin integration. The documentation is available at
* https://github.com/qtranslate/qtranslate-xt/wiki/Integration-Guide/
*/
define( 'QTX_VERSION', '3.8.1' );
define( 'QTX_VERSION', '3.9.0.dev.1' );

if ( ! defined( 'QTRANSLATE_FILE' ) ) {
define( 'QTRANSLATE_FILE', __FILE__ );
Expand Down
35 changes: 22 additions & 13 deletions qtranslate_core.php
Original file line number Diff line number Diff line change
Expand Up @@ -260,30 +260,31 @@ function qtranxf_parse_language_info( &$url_info, $link = false ) {
return false; // url is not from this WP installation
}

$lang_code = QTX_LANG_CODE_FORMAT;
$doredirect = false;

// parse URL lang
if ( ! is_admin() || $link ) {
$url_mode = $q_config['url_mode'];
switch ( $url_mode ) {
case QTX_URL_PATH:
if ( ! empty( $url_info['wp-path'] ) && preg_match( '!^/([a-z]{2})(/|$)!i', $url_info['wp-path'], $match ) ) {
if ( ! empty( $url_info['wp-path'] ) && preg_match( "!^/($lang_code)(/|$)!i", $url_info['wp-path'], $match ) ) {
$lang = qtranxf_resolveLangCase( $match[1], $doredirect );
if ( $lang ) {
$url_info['lang_url'] = $lang;
$url_info['wp-path'] = substr( $url_info['wp-path'], 3 );
$url_info['wp-path'] = substr( $url_info['wp-path'], strlen( $lang ) + 1 );
$url_info['doing_front_end'] = true;
}
}
break;

case QTX_URL_DOMAIN:
if ( ! empty( $url_info['host'] ) ) {
if ( preg_match( '#^([a-z]{2})\.#i', $url_info['host'], $match ) ) {
if ( preg_match( "#^($lang_code)\.#i", $url_info['host'], $match ) ) {
$lang = qtranxf_resolveLangCase( $match[1], $doredirect );
if ( $lang ) {
$url_info['lang_url'] = $lang;
$url_info['host'] = substr( $url_info['host'], 3 );
$url_info['host'] = substr( $url_info['host'], strlen( $lang ) + 1 );
$url_info['doing_front_end'] = true;
}
}
Expand Down Expand Up @@ -336,7 +337,7 @@ function qtranxf_parse_language_info( &$url_info, $link = false ) {
} else if ( isset( $_POST['lang'] ) ) {
$query_lang = qtranxf_resolveLangCase( $_POST['lang'], $doredirect );
}
} elseif ( ! empty( $url_info['query'] ) && preg_match( '/(^|&|&amp;|&#038;|\?)lang=([a-z]{2})/i', $url_info['query'], $match ) ) {
} elseif ( ! empty( $url_info['query'] ) && preg_match( '/(^|&|&amp;|&#038;|\?)lang=($lang_code)/i', $url_info['query'], $match ) ) {
// checked for query mode, see https://github.com/qTranslate-Team/qtranslate-x/issues/288
$query_lang = qtranxf_resolveLangCase( $match[2], $doredirect );
}
Expand Down Expand Up @@ -910,7 +911,8 @@ function qtranxf_url_del_language( &$urlinfo ) {
switch ( $url_mode ) {
case QTX_URL_PATH:
// might already have language information
if ( ! empty( $urlinfo['wp-path'] ) && preg_match( '!^/([a-z]{2})(/|$)!i', $urlinfo['wp-path'], $match ) ) {
$lang_code = QTX_LANG_CODE_FORMAT;
if ( ! empty( $urlinfo['wp-path'] ) && preg_match( "!^/($lang_code)(/|$)!i", $urlinfo['wp-path'], $match ) ) {
if ( qtranxf_isEnabled( $match[1] ) ) {
// found language information, remove it
$urlinfo['wp-path'] = substr( $urlinfo['wp-path'], 3 );
Expand Down Expand Up @@ -1154,7 +1156,8 @@ function qtranxf_convertURLs( $url, $lang = '', $forceadmin = false, $showDefaul
* @since 3.3.6 swirly bracket encoding added
*/
function qtranxf_get_language_blocks( $text ) {
$split_regex = "#(<!--:[a-z]{2}-->|<!--:-->|\[:[a-z]{2}\]|\[:\]|\{:[a-z]{2}\}|\{:\})#ism";
$lang_code = QTX_LANG_CODE_FORMAT;
$split_regex = "#(<!--:$lang_code-->|<!--:-->|\[:$lang_code\]|\[:\]|\{:$lang_code\}|\{:\})#ism";

return preg_split( $split_regex, $text, - 1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE );
}
Expand All @@ -1170,22 +1173,26 @@ function qtranxf_split( $text ) {
**/
function qtranxf_split_blocks( $blocks, &$found = array() ) {
global $q_config;

$result = array();
foreach ( $q_config['enabled_languages'] as $language ) {
$result[ $language ] = '';
}

$current_language = false;
$lang_code = QTX_LANG_CODE_FORMAT;

foreach ( $blocks as $block ) {
// detect c-tags
if ( preg_match( "#^<!--:([a-z]{2})-->$#ism", $block, $matches ) ) {
if ( preg_match( "#^<!--:($lang_code)-->$#ism", $block, $matches ) ) {
$current_language = $matches[1];
continue;
// detect b-tags
} elseif ( preg_match( "#^\[:([a-z]{2})]$#ism", $block, $matches ) ) {
} elseif ( preg_match( "#^\[:($lang_code)]$#ism", $block, $matches ) ) {
$current_language = $matches[1];
continue;
// detect s-tags @since 3.3.6 swirly bracket encoding added
} elseif ( preg_match( "#^{:([a-z]{2})}$#ism", $block, $matches ) ) {
} elseif ( preg_match( "#^{:($lang_code)}$#ism", $block, $matches ) ) {
$current_language = $matches[1];
continue;
}
Expand Down Expand Up @@ -1226,17 +1233,19 @@ function qtranxf_split_blocks( $blocks, &$found = array() ) {
function qtranxf_split_languages( $blocks ) {
$result = array();
$current_language = false;
$lang_code = QTX_LANG_CODE_FORMAT;

foreach ( $blocks as $block ) {
// detect c-tags
if ( preg_match( "#^<!--:([a-z]{2})-->$#ism", $block, $matches ) ) {
if ( preg_match( "#^<!--:($lang_code)-->$#ism", $block, $matches ) ) {
$current_language = $matches[1];
continue;
// detect b-tags
} elseif ( preg_match( "#^\[:([a-z]{2})]$#ism", $block, $matches ) ) {
} elseif ( preg_match( "#^\[:($lang_code)]$#ism", $block, $matches ) ) {
$current_language = $matches[1];
continue;
// detect s-tags @since 3.3.6 swirly bracket encoding added
} elseif ( preg_match( "#^{:([a-z]{2})}$#ism", $block, $matches ) ) {
} elseif ( preg_match( "#^{:($lang_code)}$#ism", $block, $matches ) ) {
$current_language = $matches[1];
continue;
}
Expand Down
3 changes: 2 additions & 1 deletion qtranslate_frontend.php
Original file line number Diff line number Diff line change
Expand Up @@ -882,7 +882,8 @@ function qtranxf_convertBlogInfoURL( $url, $what ) {
* Moved here from qtranslate_hooks.php and modified.
*/
function qtranxf_pagenum_link( $url ) {
$url_fixed = preg_replace( '#\?lang=[a-z]{2}/#i', '/', $url ); //kind of ugly fix for function get_pagenum_link in /wp-includes/link-template.php. Maybe we should cancel filter 'bloginfo_url' instead?
$lang_code = QTX_LANG_CODE_FORMAT;
$url_fixed = preg_replace( "#\?lang=$lang_code/#i", '/', $url ); //kind of ugly fix for function get_pagenum_link in /wp-includes/link-template.php. Maybe we should cancel filter 'bloginfo_url' instead?
return qtranxf_convertURL( $url_fixed );
}
Expand Down
3 changes: 3 additions & 0 deletions qtranslate_options.php
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@

define( 'QTX_IGNORE_FILE_TYPES', 'gif,jpg,jpeg,png,svg,pdf,swf,tif,rar,zip,7z,mpg,divx,mpeg,avi,css,js,mp3,mp4,apk' );

// Language code format: ISO 639-1 (2 alpha), 639-2 or 639-3 (3 alpha)
define( 'QTX_LANG_CODE_FORMAT', '[a-z]{2,3}' );


global $q_config;
global $qtranslate_options;
Expand Down
4 changes: 3 additions & 1 deletion qtranslate_utils.php
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,9 @@ function qtranxf_external_host( $host ) {
}

function qtranxf_isMultilingual( $str ) {
return preg_match( '/<!--:[a-z]{2}-->|\[:[a-z]{2}]|{:[a-z]{2}}/im', $str );
$lang_code = QTX_LANG_CODE_FORMAT;

return preg_match( "/<!--:$lang_code-->|\[:$lang_code]|{:$lang_code}/im", $str );
}

function qtranxf_is_multilingual_deep( $value ) {
Expand Down

0 comments on commit 43d532d

Please sign in to comment.