From 4c68a1881f4efba5cf900fc19dafbef66e913a05 Mon Sep 17 00:00:00 2001 From: Alessandro Astone Date: Tue, 20 Aug 2024 15:02:50 +0200 Subject: [PATCH 1/2] Set forgiving encoding fallback when parsing Po file Before reading the expected encoding from the file itself, we first try to read it as UTF-8 so we can at least get to read the line that specifies the encoding. For some reason, $PerlIO::encoding::fallback is set to FB_CROAK when we reach this part of the code so the program would instead exit at the first unknown character. Unless we were requested a specific encoding, do not even print any warning about it. --- lib/Locale/Po4a/Po.pm | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/Locale/Po4a/Po.pm b/lib/Locale/Po4a/Po.pm index e5d025359..166aef66c 100644 --- a/lib/Locale/Po4a/Po.pm +++ b/lib/Locale/Po4a/Po.pm @@ -318,7 +318,9 @@ sub read { my $filename = shift or croak wrap_mod( "po4a::po", dgettext( "po4a", "Please provide a non-null filename" ) ); - my $charset = shift // 'UTF-8'; + my $charset = shift; + my $did_request_charset = defined($charset); + $charset //= 'UTF-8'; $charset = 'UTF-8' if $charset eq "CHARSET"; warn "Read $filename with encoding: $charset" if $debug{'encoding'}; @@ -340,6 +342,12 @@ sub read { unless ( $? == 0 ); } + # Allow reading with the wrong encoding. + # We will read the expected encoding from the file itself later. + use Encode qw(:fallback_all); + use PerlIO::encoding; + $PerlIO::encoding::fallback = $did_request_charset ? WARN_ON_ERR : FB_DEFAULT; + my $fh; if ( $filename eq '-' ) { $fh = *STDIN; @@ -366,6 +374,10 @@ sub read { $self->read( $filename, $detected_charset, $checkvalidity ); return; } + } elsif ( !$did_request_charset ) { + # Read again, this time requesting explicitely UTF-8 encoding to printing warnings if the charset does not match + $self->read( $filename, 'UTF-8', $checkvalidity ); + return; } if ( $pofile =~ m/^\N{BOM}/ ) { # UTF-8 BOM detected From 9f89cdd3a073a20be9106ae288ba06a0579b46de Mon Sep 17 00:00:00 2001 From: Alessandro Astone Date: Tue, 3 Sep 2024 12:35:45 +0200 Subject: [PATCH 2/2] Add test case for charset=iso-8859-1 --- t/charset.t | 7 ++++++ t/charset/implicit-iso8859/_iso8859.en.pod | 18 +++++++++++++++ t/charset/implicit-iso8859/_iso8859.up.pod | 18 +++++++++++++++ t/charset/implicit-iso8859/_output | 5 ++++ t/charset/implicit-iso8859/iso8859.en.po | 27 ++++++++++++++++++++++ t/charset/implicit-iso8859/iso8859.pod | 4 ++++ t/charset/implicit-iso8859/iso8859.pot | 27 ++++++++++++++++++++++ t/charset/implicit-iso8859/iso8859.up.po | 27 ++++++++++++++++++++++ t/charset/implicit-iso8859/po4a.conf | 5 ++++ 9 files changed, 138 insertions(+) create mode 100644 t/charset/implicit-iso8859/_iso8859.en.pod create mode 100644 t/charset/implicit-iso8859/_iso8859.up.pod create mode 100644 t/charset/implicit-iso8859/_output create mode 100644 t/charset/implicit-iso8859/iso8859.en.po create mode 100644 t/charset/implicit-iso8859/iso8859.pod create mode 100644 t/charset/implicit-iso8859/iso8859.pot create mode 100644 t/charset/implicit-iso8859/iso8859.up.po create mode 100644 t/charset/implicit-iso8859/po4a.conf diff --git a/t/charset.t b/t/charset.t index e35a85353..afca52c8a 100644 --- a/t/charset.t +++ b/t/charset.t @@ -53,6 +53,13 @@ push @tests, 'format' => 'yaml', 'options' => "-M UTF-8", 'input' => "charset/yaml/utf8.yaml", + }, + { + 'doc' => 'implicit encoding: iso8859', + 'po4a.conf' => 'charset/implicit-iso8859/po4a.conf', + 'closed_path' => 'charset/*/', + 'options' => '--keep 0', + 'expected_files' => 'iso8859.pot iso8859.en.po iso8859.up.pod ', }; run_all_tests(@tests); diff --git a/t/charset/implicit-iso8859/_iso8859.en.pod b/t/charset/implicit-iso8859/_iso8859.en.pod new file mode 100644 index 000000000..0d0d76495 --- /dev/null +++ b/t/charset/implicit-iso8859/_iso8859.en.pod @@ -0,0 +1,18 @@ + + ***************************************************** + * GENERATED FILE, DO NOT EDIT * + * THIS IS NO SOURCE FILE, BUT RESULT OF COMPILATION * + ***************************************************** + +This file was generated by po4a(7). Do not store it (in VCS, for example), +but store the PO file used as source file by po4a-translate. + +In fact, consider this as a binary, and the PO file as a regular .c file: +If the PO get lost, keeping this translation up-to-date will be harder. + +=encoding UTF-8 + +=head1 Test title + +blebleble lalala + diff --git a/t/charset/implicit-iso8859/_iso8859.up.pod b/t/charset/implicit-iso8859/_iso8859.up.pod new file mode 100644 index 000000000..f7795ffbc --- /dev/null +++ b/t/charset/implicit-iso8859/_iso8859.up.pod @@ -0,0 +1,18 @@ + + ***************************************************** + * GENERATED FILE, DO NOT EDIT * + * THIS IS NO SOURCE FILE, BUT RESULT OF COMPILATION * + ***************************************************** + +This file was generated by po4a(7). Do not store it (in VCS, for example), +but store the PO file used as source file by po4a-translate. + +In fact, consider this as a binary, and the PO file as a regular .c file: +If the PO get lost, keeping this translation up-to-date will be harder. + +=encoding iso-8859-1 + +=head1 S + +S + diff --git a/t/charset/implicit-iso8859/_output b/t/charset/implicit-iso8859/_output new file mode 100644 index 000000000..2d509ae7e --- /dev/null +++ b/t/charset/implicit-iso8859/_output @@ -0,0 +1,5 @@ +Updating iso8859.pot: (2 entries) +Updating iso8859.en.po: 2 translated messages. +Updating iso8859.nb.po: 2 translated messages. +iso8859.en.pod is 100% translated (2 strings). +iso8859.nb.pod is 100% translated (2 strings). diff --git a/t/charset/implicit-iso8859/iso8859.en.po b/t/charset/implicit-iso8859/iso8859.en.po new file mode 100644 index 000000000..52307f9af --- /dev/null +++ b/t/charset/implicit-iso8859/iso8859.en.po @@ -0,0 +1,27 @@ +# English translations for implicit-iso package +# Copyright (C) 2024 Free Software Foundation, Inc. +# This file is distributed under the same license as the implicit-iso package. +# Automatically generated, 2024. +# +msgid "" +msgstr "" +"Project-Id-Version: implicit-iso 8859\n" +"POT-Creation-Date: 2024-09-03 11:19+0200\n" +"PO-Revision-Date: 2024-09-03 11:19+0200\n" +"Last-Translator: Automatically generated\n" +"Language-Team: none\n" +"Language: en\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Plural-Forms: nplurals=2; plural=(n != 1);\n" + +#. type: =head1 +#: iso8859.pod:1 +msgid "Test title" +msgstr "Test title" + +#. type: textblock +#: iso8859.pod:3 +msgid "blebleble lalala" +msgstr "blebleble lalala" diff --git a/t/charset/implicit-iso8859/iso8859.pod b/t/charset/implicit-iso8859/iso8859.pod new file mode 100644 index 000000000..7941d680e --- /dev/null +++ b/t/charset/implicit-iso8859/iso8859.pod @@ -0,0 +1,4 @@ +=head1 Test title + +blebleble +lalala diff --git a/t/charset/implicit-iso8859/iso8859.pot b/t/charset/implicit-iso8859/iso8859.pot new file mode 100644 index 000000000..fb38d2c95 --- /dev/null +++ b/t/charset/implicit-iso8859/iso8859.pot @@ -0,0 +1,27 @@ +# SOME DESCRIPTIVE TITLE +# Copyright (C) YEAR Free Software Foundation, Inc. +# This file is distributed under the same license as the PACKAGE package. +# FIRST AUTHOR , YEAR. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: 2024-09-25 09:50+0200\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +#. type: =head1 +#: iso8859.pod:1 +msgid "Test title" +msgstr "" + +#. type: textblock +#: iso8859.pod:3 +msgid "blebleble lalala" +msgstr "" diff --git a/t/charset/implicit-iso8859/iso8859.up.po b/t/charset/implicit-iso8859/iso8859.up.po new file mode 100644 index 000000000..7d7c02ccb --- /dev/null +++ b/t/charset/implicit-iso8859/iso8859.up.po @@ -0,0 +1,27 @@ +# Norwegian Bokmal translations for implicit-iso package +# Copyright (C) 2024 Free Software Foundation, Inc. +# This file is distributed under the same license as the implicit-iso package. +# Automatically generated, 2024. +# +msgid "" +msgstr "" +"Project-Id-Version: implicit-iso 8859\n" +"POT-Creation-Date: 2024-09-03 10:10+0200\n" +"PO-Revision-Date: 2024-09-03 10:10+0200\n" +"Last-Translator: Automatically generated\n" +"Language-Team: none\n" +"Language: nb\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=iso-8859-1\n" +"Content-Transfer-Encoding: 8bit\n" +"Plural-Forms: nplurals=2; plural=(n != 1);\n" + +#. type: =head1 +#: iso8859.pod:1 +msgid "Test title" +msgstr "Tést title" + +#. type: textblock +#: iso8859.pod:3 +msgid "blebleble lalala" +msgstr "blèbleble làlala" diff --git a/t/charset/implicit-iso8859/po4a.conf b/t/charset/implicit-iso8859/po4a.conf new file mode 100644 index 000000000..e75e7196a --- /dev/null +++ b/t/charset/implicit-iso8859/po4a.conf @@ -0,0 +1,5 @@ +[po4a_paths] iso8859.pot en:iso8859.en.po up:iso8859.up.po + +[options] opt:"--msgmerge-opt --silent" + +[type:pod] iso8859.pod en:iso8859.en.pod up:iso8859.up.pod