Normalize soft breaks to space

This change makes the extracted messages ignore any wrapping done for readability of the Markdown source. So This is a paragraph. and This is a paragraph. now becomes the same message in the PO file. This makes it possible for people to freely reformat the source files, without having to worry about invalidating existing translations. Part of #19.
google · mgeisler · May 2, 2023 · May 1, 2023 · May 1, 2023 · fa5b20156a2fb38846caed5b587a723df50d8b9f
commit fa5b20156a2fb38846caed5b587a723df50d8b9f
diff --git a/src/bin/mdbook-gettext.rs b/src/bin/mdbook-gettext.rs
@@ -210,20 +210,8 @@ mod tests {
     fn test_translate_multiple_paragraphs_extra_newlines() {
         // Notice how the translated paragraphs have more lines.
         let catalog = create_catalog(&[
-            (
-                "first\n\
-                 paragraph",
-                "FIRST\n\
-                 TRANSLATED\n\
-                 PARAGRAPH",
-            ),
-            (
-                "last\n\
-                 paragraph",
-                "LAST\n\
-                 TRANSLATED\n\
-                 PARAGRAPH",
-            ),
+            ("first paragraph", "FIRST TRANSLATED PARAGRAPH"),
+            ("last paragraph", "LAST TRANSLATED PARAGRAPH"),
         ]);
         // Paragraph separation is normalized when translating.
         assert_eq!(
@@ -236,13 +224,9 @@ mod tests {
                  paragraph\n",
                 &catalog
             ),
-            "FIRST\n\
-             TRANSLATED\n\
-             PARAGRAPH\n\
+            "FIRST TRANSLATED PARAGRAPH\n\
              \n\
-             LAST\n\
-             TRANSLATED\n\
-             PARAGRAPH"
+             LAST TRANSLATED PARAGRAPH"
         );
     }
 

diff --git a/src/bin/mdbook-xgettext.rs b/src/bin/mdbook-xgettext.rs
@@ -216,8 +216,7 @@ mod tests {
             &[
                 "The Foo Chapter",
                 "How to Foo",
-                "The first paragraph about Foo.\n\
-                 Still the first paragraph."
+                "The first paragraph about Foo. Still the first paragraph.",
             ]
         );
 

diff --git a/src/lib.rs b/src/lib.rs
@@ -47,7 +47,7 @@ use pulldown_cmark_to_cmark::{cmark_resume_with_options, Options, State};
 ///     vec![
 ///         (1, Event::Start(Tag::Paragraph)),
 ///         (1, Event::Text("Hello,".into())),
-///         (1, Event::SoftBreak),
+///         (1, Event::Text(" ".into())),
 ///         (2, Event::Text("world!".into())),
 ///         (1, Event::End(Tag::Paragraph)),
 ///     ]
@@ -75,6 +75,10 @@ pub fn extract_events<'a>(text: &'a str, state: Option<State<'static>>) -> Vec<(
             .into_offset_iter()
             .map(|(event, range)| {
                 let lineno = offsets.partition_point(|&o| o < range.start) + 1;
+                let event = match event {
+                    Event::SoftBreak => Event::Text(" ".into()),
+                    _ => event,
+                };
                 (lineno, event)
             })
             .collect(),
@@ -285,8 +289,8 @@ pub fn reconstruct_markdown(
 /// assert_eq!(
 ///     messages,
 ///     vec![
-///         (1, "Hello, this is a\nlist in a quote.".into()),
-///         (4, "This is the second\nparagraph.".into()),
+///         (1, "Hello, this is a list in a quote.".into()),
+///         (4, "This is the second paragraph.".into()),
 ///     ],
 /// );
 /// ```
@@ -349,7 +353,7 @@ mod tests {
              \n\
              Second paragraph.",
             vec![
-                (1, "This is\nthe first\nparagraph.🦀"),
+                (1, "This is the first paragraph.🦀"),
                 (5, "Second paragraph."),
             ],
         );
@@ -363,7 +367,7 @@ mod tests {
              \n\
              This is the\n\
              first paragraph.",
-            vec![(4, "This is the\nfirst paragraph.")],
+            vec![(4, "This is the first paragraph.")],
         );
     }
 
@@ -374,7 +378,7 @@ mod tests {
              a paragraph.\n\
              \n\
              \n",
-            vec![(1, "This is\na paragraph.")],
+            vec![(1, "This is a paragraph.")],
         );
     }
 
@@ -594,7 +598,7 @@ The document[^1] text.
 "#,
             vec![
                 (1, "Item 1."),
-                (2, "Item 2,\ntwo lines."),
+                (2, "Item 2, two lines."),
                 (5, "Sub 1."),
                 (6, "Sub 2."),
             ],