Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/issue 117 last section properties #153

Merged
merged 5 commits into from
Aug 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ public final class DocxConstants {
public static final QName QNAME_XSLT_FORMAT_ATT = new QName("", "xslt-format");

// Elements:
public static final QName QNAME_BODY_ELEM = new QName(SIMPLE_WP_NS, "body");
public static final QName QNAME_COLS_ELEM = new QName(SIMPLE_WP_NS, "cols");
public static final QName QNAME_COL_ELEM = new QName(SIMPLE_WP_NS, "col");
public static final QName QNAME_CORE_PROPERTIES_ELEM = new QName(SIMPLE_WP_NS, "core-properties");
Expand All @@ -137,6 +138,7 @@ public final class DocxConstants {
public static final QName QNAME_W_P_ELEM = new QName(OO_WPML_NS, "p");
public static final QName QNAME_R_ELEM = new QName(OO_WPML_NS, "r");
public static final QName QNAME_ROW_ELEM = new QName(SIMPLE_WP_NS, "row");
public static final QName QNAME_SECTION_ELEM = new QName(SIMPLE_WP_NS, "section");
public static final QName QNAME_T_ELEM = new QName(OO_WPML_NS, "t"); // w:t -- text element
public static final QName QNAME_TABLE_ELEM = new QName(SIMPLE_WP_NS, "table"); // w:table -- table element
public static final QName QNAME_THEAD_ELEM = new QName(SIMPLE_WP_NS, "thead");
Expand Down
74 changes: 71 additions & 3 deletions src/main/java/org/wordinator/xml2docx/generator/DocxGenerator.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@
import org.apache.poi.ss.formula.eval.NotImplementedException;
import org.apache.poi.util.Units;
import org.apache.poi.wp.usermodel.HeaderFooterType;
import org.apache.poi.xwpf.usermodel.BodyElementType;
import org.apache.poi.xwpf.usermodel.BreakType;
import org.apache.poi.xwpf.usermodel.IBodyElement;
import org.apache.poi.xwpf.usermodel.ParagraphAlignment;
import org.apache.poi.xwpf.usermodel.UnderlinePatterns;
import org.apache.poi.xwpf.usermodel.XWPFAbstractFootnoteEndnote;
Expand Down Expand Up @@ -451,7 +453,7 @@ private void constructDoc(XWPFDocument doc, XmlObject xml) throws DocxGeneration
}
cursor.pop();
cursor.push();
cursor.toChild(new QName(DocxConstants.SIMPLE_WP_NS, "body"));
cursor.toChild(DocxConstants.QNAME_BODY_ELEM);
setDocSettings(doc, xml);
handleBody(doc, cursor.getObject());

Expand All @@ -464,13 +466,79 @@ private void constructDoc(XWPFDocument doc, XmlObject xml) throws DocxGeneration
} else {
CTDocument1 document = doc.getDocument();
CTBody body = (document.isSetBody() ? document.getBody() : document.addNewBody());
@SuppressWarnings("unused")
CTSectPr sectPr = (body.isSetSectPr() ? body.getSectPr() : body.addNewSectPr());
if (body.isSetSectPr()) {
body.getSectPr();
} else {
body.addNewSectPr();
}
// At this point let Word fill in the details.

}
cursor.pop();

// if the document has multiple sections we need to move the section
// properties from the last paragraph to directly within the body
XWPFParagraph lastPara = getLastParagraph(doc);
if (hasMultipleSections(xml) && lastPara != null && lastPara.getCTPPr().isSetSectPr()) {
CTSectPr sectPr = lastPara.getCTPPr().getSectPr();
CTBody body = doc.getDocument().getBody();
mergeSectPrs(body.getSectPr(), sectPr);
lastPara.getCTPPr().unsetSectPr();
}
}

private boolean hasMultipleSections(XmlObject xml) {
XmlCursor cursor = xml.newCursor();
cursor.toFirstChild(); // go to root element

if (!cursor.toChild(DocxConstants.QNAME_BODY_ELEM)) {
return false;
}
if (!cursor.toFirstChild()) {
return false;
}
int sections = cursor.getName().equals(DocxConstants.QNAME_SECTION_ELEM) ? 1 : 0;
while (cursor.toNextSibling() && sections < 2) {
if (cursor.getName().equals(DocxConstants.QNAME_SECTION_ELEM)) {
sections++;
}
}
return sections >= 2;
}

private XWPFParagraph getLastParagraph(XWPFDocument doc) {
XWPFParagraph lastPara = null;
for (IBodyElement elem : doc.getBodyElements()) {
if (elem.getElementType() == BodyElementType.PARAGRAPH) {
lastPara = (XWPFParagraph) elem;
}
}
return lastPara;
}

// this method does not merge all section properties, but I hope it
// does merge those that wordinator actually sets
private void mergeSectPrs(CTSectPr toSectPr, CTSectPr fromSectPr) {
if (fromSectPr.isSetPgMar()) {
toSectPr.setPgMar(fromSectPr.getPgMar());
}
if (fromSectPr.isSetPgSz()) {
toSectPr.setPgSz(fromSectPr.getPgSz());
}
if (fromSectPr.isSetPgNumType()) {
toSectPr.setPgNumType(fromSectPr.getPgNumType());
}

for (CTHdrFtrRef ref : fromSectPr.getHeaderReferenceList()) {
int ix = toSectPr.getHeaderReferenceList().size();
toSectPr.insertNewHeaderReference(ix);
toSectPr.setHeaderReferenceArray(ix, ref);
}
for (CTHdrFtrRef ref : fromSectPr.getFooterReferenceList()) {
int ix = toSectPr.getFooterReferenceList().size();
toSectPr.insertNewFooterReference(ix);
toSectPr.setFooterReferenceArray(ix, ref);
}
}

/**
Expand Down
44 changes: 41 additions & 3 deletions src/test/java/org/wordinator/xml2docx/TestDocxGenerator.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,14 @@
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTFldChar;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTOnOff;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPageMar;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPageNumber;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPageSz;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STFldCharType;
// import org.openxmlformats.schemas.wordprocessingml.x2006.main.STOnOff;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STNumberFormat;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STPageOrientation;
import org.wordinator.xml2docx.generator.DocxConstants;
import org.wordinator.xml2docx.generator.DocxGenerator;

Expand Down Expand Up @@ -123,8 +126,8 @@ public void testMakeDocxWithSections() throws Exception {

CTSectPr docSectPr = doc.getDocument().getBody().getSectPr();
assertNotNull("Expected to find a docSectPr element", docSectPr);
assertEquals("Expected 3 headers", 3, docSectPr.getHeaderReferenceList().size());
assertEquals("Expected 3 footers", 3, docSectPr.getFooterReferenceList().size());
assertEquals("Expected 6 headers", 6, docSectPr.getHeaderReferenceList().size());
assertEquals("Expected 6 footers", 6, docSectPr.getFooterReferenceList().size());

// Document-level headers and footers:
XWPFHeaderFooterPolicy hfPolicy = doc.getHeaderFooterPolicy();
Expand Down Expand Up @@ -824,6 +827,41 @@ public void testNestedTableParaBeforeTable() throws Exception {
assertEquals(BodyElementType.PARAGRAPH, elem.getElementType());
}

@Test
public void testMultiSectionPageProps() throws Exception {
// verifies the solution to issues #68 and #117
XWPFDocument doc = convert("simplewp/simplewpml-multisection-01.swpx", "out/multisection-01.docx");

List<IBodyElement> contents = doc.getBodyElements();
assertEquals(2, contents.size());

Iterator<IBodyElement> it = contents.iterator();

IBodyElement elem = it.next();
assertEquals(BodyElementType.PARAGRAPH, elem.getElementType());
XWPFParagraph p = (XWPFParagraph) elem;
assertEquals("This is the first page numbered in Roman lower-case", p.getText());
assertTrue("first para lacks section properties", p.getCTPPr().isSetSectPr());

elem = it.next();
assertEquals(BodyElementType.PARAGRAPH, elem.getElementType());
p = (XWPFParagraph) elem;
assertEquals("This is the first page numbered in decimal", p.getText());
assertFalse("second para has section properties", p.getCTPPr().isSetSectPr());

CTSectPr sectPr = doc.getDocument().getBody().getSectPr();
CTPageNumber pgNum = sectPr.getPgNumType();
assertEquals(BigInteger.valueOf(1), pgNum.getStart());
assertEquals(STNumberFormat.Enum.forString("decimal"), pgNum.getFmt());

// FIXME: check header & footer (a bit tricky)

CTPageSz pageSz = sectPr.getPgSz();
assertEquals(STPageOrientation.Enum.forString("portrait"), pageSz.getOrient());
assertEquals(BigInteger.valueOf(11906), pageSz.getW());
assertEquals(BigInteger.valueOf(16838), pageSz.getH());
}

// ===== INTERNAL UTILITIES

private XWPFDocument convert(String infile, String outfile) throws Exception {
Expand Down
37 changes: 37 additions & 0 deletions src/test/resources/simplewp/simplewpml-multisection-01.swpx
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
<?xml version="1.0" encoding="UTF-8"?>
<wp:document xmlns:wp="urn:ns:wordinator:simplewpml">
<wp:body>
<wp:section type="nextPage">
<wp:page-sequence-properties>
<wp:page-number-properties start="1" format="lowerRoman"/>
<wp:headers-and-footers>
<wp:header>
<wp:p style="Normal">
<wp:page-number-ref/>
</wp:p>
</wp:header>
</wp:headers-and-footers>
<wp:page-size orient="portrait" width="210mm" height="297mm"/>
</wp:page-sequence-properties>
<wp:body>
<wp:p><wp:run>This is the first page numbered in Roman lower-case</wp:run></wp:p>
</wp:body>
</wp:section>
<wp:section type="nextPage">
<wp:page-sequence-properties>
<wp:page-number-properties start="1" format="decimal"/>
<wp:headers-and-footers>
<wp:header>
<wp:p style="Normal">
<wp:page-number-ref/>
</wp:p>
</wp:header>
</wp:headers-and-footers>
<wp:page-size orient="portrait" width="210mm" height="297mm"/>
</wp:page-sequence-properties>
<wp:body>
<wp:p><wp:run>This is the first page numbered in decimal</wp:run></wp:p>
</wp:body>
</wp:section>
</wp:body>
</wp:document>