diff --git a/src/main/java/org/wordinator/xml2docx/generator/DocxConstants.java b/src/main/java/org/wordinator/xml2docx/generator/DocxConstants.java index daa4f8d..3d6d069 100644 --- a/src/main/java/org/wordinator/xml2docx/generator/DocxConstants.java +++ b/src/main/java/org/wordinator/xml2docx/generator/DocxConstants.java @@ -123,6 +123,7 @@ public final class DocxConstants { public static final QName QNAME_XSLT_FORMAT_ATT = new QName("", "xslt-format"); // Elements: + public static final QName QNAME_BODY_ELEM = new QName(SIMPLE_WP_NS, "body"); public static final QName QNAME_COLS_ELEM = new QName(SIMPLE_WP_NS, "cols"); public static final QName QNAME_COL_ELEM = new QName(SIMPLE_WP_NS, "col"); public static final QName QNAME_CORE_PROPERTIES_ELEM = new QName(SIMPLE_WP_NS, "core-properties"); @@ -137,6 +138,7 @@ public final class DocxConstants { public static final QName QNAME_W_P_ELEM = new QName(OO_WPML_NS, "p"); public static final QName QNAME_R_ELEM = new QName(OO_WPML_NS, "r"); public static final QName QNAME_ROW_ELEM = new QName(SIMPLE_WP_NS, "row"); + public static final QName QNAME_SECTION_ELEM = new QName(SIMPLE_WP_NS, "section"); public static final QName QNAME_T_ELEM = new QName(OO_WPML_NS, "t"); // w:t -- text element public static final QName QNAME_TABLE_ELEM = new QName(SIMPLE_WP_NS, "table"); // w:table -- table element public static final QName QNAME_THEAD_ELEM = new QName(SIMPLE_WP_NS, "thead"); diff --git a/src/main/java/org/wordinator/xml2docx/generator/DocxGenerator.java b/src/main/java/org/wordinator/xml2docx/generator/DocxGenerator.java index 088d13d..9fa85c1 100644 --- a/src/main/java/org/wordinator/xml2docx/generator/DocxGenerator.java +++ b/src/main/java/org/wordinator/xml2docx/generator/DocxGenerator.java @@ -41,7 +41,9 @@ import org.apache.poi.ss.formula.eval.NotImplementedException; import org.apache.poi.util.Units; import org.apache.poi.wp.usermodel.HeaderFooterType; +import org.apache.poi.xwpf.usermodel.BodyElementType; import org.apache.poi.xwpf.usermodel.BreakType; +import org.apache.poi.xwpf.usermodel.IBodyElement; import org.apache.poi.xwpf.usermodel.ParagraphAlignment; import org.apache.poi.xwpf.usermodel.UnderlinePatterns; import org.apache.poi.xwpf.usermodel.XWPFAbstractFootnoteEndnote; @@ -451,7 +453,7 @@ private void constructDoc(XWPFDocument doc, XmlObject xml) throws DocxGeneration } cursor.pop(); cursor.push(); - cursor.toChild(new QName(DocxConstants.SIMPLE_WP_NS, "body")); + cursor.toChild(DocxConstants.QNAME_BODY_ELEM); setDocSettings(doc, xml); handleBody(doc, cursor.getObject()); @@ -464,13 +466,79 @@ private void constructDoc(XWPFDocument doc, XmlObject xml) throws DocxGeneration } else { CTDocument1 document = doc.getDocument(); CTBody body = (document.isSetBody() ? document.getBody() : document.addNewBody()); - @SuppressWarnings("unused") - CTSectPr sectPr = (body.isSetSectPr() ? body.getSectPr() : body.addNewSectPr()); + if (body.isSetSectPr()) { + body.getSectPr(); + } else { + body.addNewSectPr(); + } // At this point let Word fill in the details. } cursor.pop(); + // if the document has multiple sections we need to move the section + // properties from the last paragraph to directly within the body + XWPFParagraph lastPara = getLastParagraph(doc); + if (hasMultipleSections(xml) && lastPara != null && lastPara.getCTPPr().isSetSectPr()) { + CTSectPr sectPr = lastPara.getCTPPr().getSectPr(); + CTBody body = doc.getDocument().getBody(); + mergeSectPrs(body.getSectPr(), sectPr); + lastPara.getCTPPr().unsetSectPr(); + } + } + + private boolean hasMultipleSections(XmlObject xml) { + XmlCursor cursor = xml.newCursor(); + cursor.toFirstChild(); // go to root element + + if (!cursor.toChild(DocxConstants.QNAME_BODY_ELEM)) { + return false; + } + if (!cursor.toFirstChild()) { + return false; + } + int sections = cursor.getName().equals(DocxConstants.QNAME_SECTION_ELEM) ? 1 : 0; + while (cursor.toNextSibling() && sections < 2) { + if (cursor.getName().equals(DocxConstants.QNAME_SECTION_ELEM)) { + sections++; + } + } + return sections >= 2; + } + + private XWPFParagraph getLastParagraph(XWPFDocument doc) { + XWPFParagraph lastPara = null; + for (IBodyElement elem : doc.getBodyElements()) { + if (elem.getElementType() == BodyElementType.PARAGRAPH) { + lastPara = (XWPFParagraph) elem; + } + } + return lastPara; + } + + // this method does not merge all section properties, but I hope it + // does merge those that wordinator actually sets + private void mergeSectPrs(CTSectPr toSectPr, CTSectPr fromSectPr) { + if (fromSectPr.isSetPgMar()) { + toSectPr.setPgMar(fromSectPr.getPgMar()); + } + if (fromSectPr.isSetPgSz()) { + toSectPr.setPgSz(fromSectPr.getPgSz()); + } + if (fromSectPr.isSetPgNumType()) { + toSectPr.setPgNumType(fromSectPr.getPgNumType()); + } + + for (CTHdrFtrRef ref : fromSectPr.getHeaderReferenceList()) { + int ix = toSectPr.getHeaderReferenceList().size(); + toSectPr.insertNewHeaderReference(ix); + toSectPr.setHeaderReferenceArray(ix, ref); + } + for (CTHdrFtrRef ref : fromSectPr.getFooterReferenceList()) { + int ix = toSectPr.getFooterReferenceList().size(); + toSectPr.insertNewFooterReference(ix); + toSectPr.setFooterReferenceArray(ix, ref); + } } /** diff --git a/src/test/java/org/wordinator/xml2docx/TestDocxGenerator.java b/src/test/java/org/wordinator/xml2docx/TestDocxGenerator.java index 81b0735..189d052 100644 --- a/src/test/java/org/wordinator/xml2docx/TestDocxGenerator.java +++ b/src/test/java/org/wordinator/xml2docx/TestDocxGenerator.java @@ -39,11 +39,14 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTFldChar; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTOnOff; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPageMar; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPageNumber; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPageSz; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText; import org.openxmlformats.schemas.wordprocessingml.x2006.main.STFldCharType; -// import org.openxmlformats.schemas.wordprocessingml.x2006.main.STOnOff; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.STNumberFormat; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.STPageOrientation; import org.wordinator.xml2docx.generator.DocxConstants; import org.wordinator.xml2docx.generator.DocxGenerator; @@ -123,8 +126,8 @@ public void testMakeDocxWithSections() throws Exception { CTSectPr docSectPr = doc.getDocument().getBody().getSectPr(); assertNotNull("Expected to find a docSectPr element", docSectPr); - assertEquals("Expected 3 headers", 3, docSectPr.getHeaderReferenceList().size()); - assertEquals("Expected 3 footers", 3, docSectPr.getFooterReferenceList().size()); + assertEquals("Expected 6 headers", 6, docSectPr.getHeaderReferenceList().size()); + assertEquals("Expected 6 footers", 6, docSectPr.getFooterReferenceList().size()); // Document-level headers and footers: XWPFHeaderFooterPolicy hfPolicy = doc.getHeaderFooterPolicy(); @@ -824,6 +827,41 @@ public void testNestedTableParaBeforeTable() throws Exception { assertEquals(BodyElementType.PARAGRAPH, elem.getElementType()); } + @Test + public void testMultiSectionPageProps() throws Exception { + // verifies the solution to issues #68 and #117 + XWPFDocument doc = convert("simplewp/simplewpml-multisection-01.swpx", "out/multisection-01.docx"); + + List contents = doc.getBodyElements(); + assertEquals(2, contents.size()); + + Iterator it = contents.iterator(); + + IBodyElement elem = it.next(); + assertEquals(BodyElementType.PARAGRAPH, elem.getElementType()); + XWPFParagraph p = (XWPFParagraph) elem; + assertEquals("This is the first page numbered in Roman lower-case", p.getText()); + assertTrue("first para lacks section properties", p.getCTPPr().isSetSectPr()); + + elem = it.next(); + assertEquals(BodyElementType.PARAGRAPH, elem.getElementType()); + p = (XWPFParagraph) elem; + assertEquals("This is the first page numbered in decimal", p.getText()); + assertFalse("second para has section properties", p.getCTPPr().isSetSectPr()); + + CTSectPr sectPr = doc.getDocument().getBody().getSectPr(); + CTPageNumber pgNum = sectPr.getPgNumType(); + assertEquals(BigInteger.valueOf(1), pgNum.getStart()); + assertEquals(STNumberFormat.Enum.forString("decimal"), pgNum.getFmt()); + + // FIXME: check header & footer (a bit tricky) + + CTPageSz pageSz = sectPr.getPgSz(); + assertEquals(STPageOrientation.Enum.forString("portrait"), pageSz.getOrient()); + assertEquals(BigInteger.valueOf(11906), pageSz.getW()); + assertEquals(BigInteger.valueOf(16838), pageSz.getH()); + } + // ===== INTERNAL UTILITIES private XWPFDocument convert(String infile, String outfile) throws Exception { diff --git a/src/test/resources/simplewp/simplewpml-multisection-01.swpx b/src/test/resources/simplewp/simplewpml-multisection-01.swpx new file mode 100644 index 0000000..b3e985a --- /dev/null +++ b/src/test/resources/simplewp/simplewpml-multisection-01.swpx @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + This is the first page numbered in Roman lower-case + + + + + + + + + + + + + + + + This is the first page numbered in decimal + + + +