Skip to content

Commit

Permalink
Merge pull request #15363 from ckeditor/ck/15333-metadata-retained-wh…
Browse files Browse the repository at this point in the history
…en-copying-from-word-in-windows

Fix (paste-from-office): Content from Word documents should be pasted correctly (without HTML styles tag content) on Windows systems. Closes #15333. Closes #9002.
  • Loading branch information
niegowski authored Nov 23, 2023
2 parents f73874c + f9568e7 commit 18a88ed
Show file tree
Hide file tree
Showing 8 changed files with 1,168 additions and 1 deletion.
4 changes: 4 additions & 0 deletions packages/ckeditor5-paste-from-office/src/filters/parse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ export function parseHtml( htmlString: string, stylesProcessor: StylesProcessor
// Remove Word specific "if comments" so content inside is not omitted by the parser.
htmlString = htmlString.replace( /<!--\[if gte vml 1]>/g, '' );

// Clean the <head> section of MS Windows specific tags. See https://github.com/ckeditor/ckeditor5/issues/15333.
// The regular expression matches the <o:SmartTagType> tag with optional attributes (with or without values).
htmlString = htmlString.replace( /<o:SmartTagType(?:\s+[^\s>=]+(?:="[^"]*")?)*\s*\/?>/gi, '' );

const normalizedHtml = normalizeSpacing( cleanContentAfterBody( htmlString ) );

// Parse htmlString as native Document object.
Expand Down
16 changes: 16 additions & 0 deletions packages/ckeditor5-paste-from-office/tests/_data/other/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/**
* @license Copyright (c) 2003-2023, CKSource Holding sp. z o.o. All rights reserved.
* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
*/

import smartTags from './smart-tags/input.html';
import smartTagsModel from './smart-tags/model.html';

export const fixtures = {
input: {
smartTags
},
model: {
smartTags: smartTagsModel
}
};
Binary file not shown.
1,023 changes: 1,023 additions & 0 deletions packages/ckeditor5-paste-from-office/tests/_data/other/smart-tags/input.html

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
<paragraph><$text bold="true" fontColor="red">A/K: </$text></paragraph>
<paragraph><$text bold="true">T ID: 1</$text></paragraph>
<paragraph></paragraph>
<paragraph><$text bold="true" fontColor="black">ID 1 –adXY</$text></paragraph>
<paragraph>M for F Der Transactions with XXXX SYXE dated 06th March 2013</paragraph>
<paragraph></paragraph>
<paragraph>Type of Entity: XXXX company</paragraph>
<paragraph>Jurisdiction: Germany</paragraph>
<paragraph><$text bold="true">C: </$text>Addendum covering </paragraph>
<paragraph>One-Way: Two Way </paragraph>
<paragraph>Go: German </paragraph>
<paragraph>Sub: N/A</paragraph>
<paragraph>Ind: zero </paragraph>
<paragraph>Thresholds: R: zero</paragraph>
<paragraph> Co: zero</paragraph>
<paragraph>Minimum Transfer Amount: R: EUR1million</paragraph>
<paragraph> Counterparty: EUR1million</paragraph>
<paragraph>Rounding: R: EUR1million</paragraph>
<paragraph> Co: EUR1million</paragraph>
<paragraph></paragraph>
<paragraph>M re to EUR 2,000 (and rounding to EUR 1,000) if either party's rating falls below C/C2</paragraph>
<paragraph><$text bold="true">El 2-day C: </$text>Yes</paragraph>
<paragraph></paragraph>
<paragraph><$text bold="true" fontColor="black">ID 1 –addfSE</$text></paragraph>
<paragraph>M A for F D Transactions with XXXX SETXY dated 06th March 2013</paragraph>
<paragraph></paragraph>
<paragraph>Type of Entity: XXX company</paragraph>
<paragraph>Jurisdiction: Germany</paragraph>
<paragraph><$text bold="true" fontColor="black">ID 1 – fSETXX</$text></paragraph>
<paragraph>Master Agreement for Financial Derivatives Transactions with XXXX STXY dated 06th March 2013</paragraph>
<paragraph></paragraph>
<paragraph>Type of Entity: XXX company</paragraph>
<paragraph>Jurisdiction: Germany</paragraph>
<paragraph></paragraph>
<paragraph></paragraph>
<paragraph></paragraph>
<paragraph><$text bold="true" fontColor="black">GID XXXXX –adE</$text></paragraph>
<paragraph><$text fontColor="black">Master Agreement for Financial Derivatives Transactions with XYZ COMPANY SE dated 06th March 2013</$text></paragraph>
<paragraph></paragraph>
<paragraph><$text fontColor="black">Type of Entity: XXXYYY company</$text></paragraph>
<paragraph><$text fontColor="black">Jurisdiction: Germany</$text></paragraph>
<paragraph></paragraph>
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import { fixtures as table } from '../_data/table/index.js';
import { fixtures as pageBreak } from '../_data/page-break/index.js';
import { fixtures as fontWithoutTableProperties } from '../_data/font-without-table-properties/index';
import { fixtures as googleDocsBrParagraphs } from '../_data/paste-from-google-docs/br-paragraph/index';
import { fixtures as smartTags } from '../_data/other/index';

// Generic fixtures.
export const generic = {
Expand All @@ -29,7 +30,8 @@ export const generic = {
'google-docs-br-paragraphs': googleDocsBrParagraphs,
table,
'page-break': pageBreak,
'font-without-table-properties': fontWithoutTableProperties
'font-without-table-properties': fontWithoutTableProperties,
'smart-tags': smartTags
};

// Browser specific fixtures.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,13 @@ describe( 'PasteFromOffice - integration', () => {
}
} );

generateIntegrationTests( {
input: 'smart-tags',
editorConfig: {
plugins: [ Clipboard, Paragraph, Bold, PasteFromOffice, FontColor ]
}
} );

function generateIntegrationTests( config ) {
const commonIntegrationConfig = {
type: 'integration',
Expand Down
73 changes: 73 additions & 0 deletions packages/ckeditor5-paste-from-office/tests/filters/parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,79 @@ describe( 'PasteFromOffice - filters', () => {

expect( body.getChild( 0 ).name ).to.equal( 'p' );
} );

// See https://github.com/ckeditor/ckeditor5/issues/15333.
describe( 'should remove MS Windows specific tags to prevent incorrect parsing of HTML', () => {
it( 'should remove <o:SmartTagType> empty tag (with or without `/` at the end)', () => {
const html =
'<html>' +
'<head>' +
'<o:SmartTagType/>' +
'<o:SmartTagType>' +
'</head>' +
'<body>' +
'<p>foo</p>' +
'</body>' +
'</html>';
const { body, bodyString } = parseHtml( html );

expect( body ).to.instanceof( DocumentFragment );
expect( body.childCount ).to.equal( 1 );
expect( bodyString ).to.equal( '<p>foo</p>' );
} );

it( 'should remove <o:SmartTagType> empty tag with white space before the ending', () => {
const html =
'<html>' +
'<head>' +
'<o:SmartTagType />' +
'<o:SmartTagType >' +
'</head>' +
'<body>' +
'<p>foo</p>' +
'</body>' +
'</html>';
const { body, bodyString } = parseHtml( html );

expect( body ).to.instanceof( DocumentFragment );
expect( body.childCount ).to.equal( 1 );
expect( bodyString ).to.equal( '<p>foo</p>' );
} );

it( 'should remove <o:SmartTagType> tag with attributes (with and without values)', () => {
const html =
'<html>' +
'<head>' +
'<o:SmartTagType namespaceuri="foo:bar:smarttags" baz />' +
'</head>' +
'<body>' +
'<p>foo</p>' +
'</body>' +
'</html>';
const { body, bodyString } = parseHtml( html );

expect( body ).to.instanceof( DocumentFragment );
expect( body.childCount ).to.equal( 1 );
expect( bodyString ).to.equal( '<p>foo</p>' );
} );

it( 'should remove <o:SmartTagType> tag with attributes containing `>`', () => {
const html =
'<html>' +
'<head>' +
'<o:SmartTagType namespaceuri="foo>bar>smarttags" />' +
'</head>' +
'<body>' +
'<p>foo</p>' +
'</body>' +
'</html>';
const { body, bodyString } = parseHtml( html );

expect( body ).to.instanceof( DocumentFragment );
expect( body.childCount ).to.equal( 1 );
expect( bodyString ).to.equal( '<p>foo</p>' );
} );
} );
} );
} );
} );

0 comments on commit 18a88ed

Please sign in to comment.