Implemented stripTags() function

panzerdp · Dec 26, 2016 · 688a363 · 688a363
1 parent 99a08ff
commit 688a363
Show file tree

Hide file tree

Showing 16 changed files with 1,020 additions and 99 deletions.
diff --git a/dist/voca.js b/dist/voca.js
diff --git a/dist/voca.min.js b/dist/voca.min.js
diff --git a/dist/voca.min.js.map b/dist/voca.min.js.map
diff --git a/package.json b/package.json
@@ -49,14 +49,14 @@
   },
   "devDependencies": {
     "babel-cli": "6.18.0",
-    "babel-core": "6.20.0",
+    "babel-core": "6.21.0",
     "babel-eslint": "7.1.1",
     "babel-plugin-add-module-exports": "0.2.1",
     "babel-plugin-module-resolver": "2.4.0",
-    "babel-plugin-transform-es2015-block-scoping": "6.20.0",
+    "babel-plugin-transform-es2015-block-scoping": "6.21.0",
     "babel-plugin-transform-es2015-destructuring": "6.19.0",
     "babel-plugin-transform-es2015-modules-commonjs": "6.18.0",
-    "babel-plugin-transform-es2015-parameters": "6.18.0",
+    "babel-plugin-transform-es2015-parameters": "6.21.0",
     "babel-plugin-transform-es2015-shorthand-properties": "6.18.0",
     "babel-plugin-transform-es2015-spread": "6.8.0",
     "babel-plugin-transform-object-assign": "6.8.0",
@@ -65,7 +65,7 @@
     "chai": "3.5.0",
     "codecov.io": "0.1.6",
     "eslint": "3.12.2",
-    "ghooks": "1.3.2",
+    "ghooks": "2.0.0",
     "glob": "7.1.1",
     "grunt": "1.0.1",
     "grunt-contrib-connect": "1.0.2",
@@ -74,10 +74,10 @@
     "jsdoc": "3.4.3",
     "mkdirp": "0.5.1",
     "mocha": "3.2.0",
-    "rollup": "0.37.0",
+    "rollup": "0.38.0",
     "rollup-plugin-babel": "2.7.1",
     "rollup-plugin-uglify": "1.0.1",
-    "source-map-support": "0.4.6"
+    "source-map-support": "0.4.8"
   },
   "dependencies": {},
   "config": {

diff --git a/src/helper/reg_exp/const.js b/src/helper/reg_exp/const.js
@@ -76,6 +76,14 @@ export const REGEXP_UNICODE_CHARACTER = new RegExp(
 ([\\n\\r\\u2028\\u2029])|\
 (.)', 'g');
 
+/**
+ * Regular expression to match whitespaces
+ *
+ * @type {RegExp}
+ * @ignore
+ */
+export const REGEXP_WHITESPACE = new RegExp('[' + whitespace + ']');
+
 /**
  * Regular expression to match whitespaces from the left side
  *

diff --git a/src/helper/string/has_substring_at_index.js b/src/helper/string/has_substring_at_index.js
@@ -0,0 +1,18 @@
+/**
+ * Checks whether `subject` contains substring at specific `index`.
+ *
+ * @ignore
+ * @param {string} subject The subject to search in.
+ * @param {string} substring The substring to search/
+ * @param {number} index The index to search substring.
+ * @param {boolean} lookBehind Whether to look behind (true) or ahead (false).
+ * @return {boolean} Returns a boolean whether the substring exists.
+ */
+export default function hasSubstringAtIndex(subject, substring, index, lookBehind = true) {
+  let indexOffset = 0;
+  if (lookBehind) {
+    indexOffset = - substring.length + 1;
+  }
+  const extractedSubstring = subject.substr(index + indexOffset, substring.length);
+  return extractedSubstring.toLowerCase() === substring;
+}
diff --git a/src/helper/strip/tag/parse_tag_list.js → src/helper/strip/parse_tag_list.js b/src/helper/strip/tag/parse_tag_list.js → src/helper/strip/parse_tag_list.js
@@ -11,7 +11,7 @@ export default function parseTagList(tags) {
   const tagsList = [];
   let match;
   while ((match = REGEXP_TAG_LIST.exec(tags)) !== null) {
-    tagsList.push(match[0]);
+    tagsList.push(match[1]);
   }
   return tagsList;
 }
diff --git a/src/helper/strip/parse_tag_name.js b/src/helper/strip/parse_tag_name.js
@@ -0,0 +1,42 @@
+import { REGEXP_WHITESPACE } from 'helper/reg_exp/const';
+
+const STATE_START_TAG = 0;
+const STATE_NON_WHITESPACE = 1;
+const STATE_DONE = 2;
+
+/**
+ * Parses the tag name from html content
+ *
+ * @param {string} tagContent The tag content
+ * @return {string} Returns the tag name
+ */
+export default function parseTagName(tagContent) {
+  let state = STATE_START_TAG;
+  let tagName = '';
+  let index = 0;
+  while (state !== STATE_DONE) {
+    const char = tagContent[index++].toLowerCase();
+    switch (char) {
+      case '<':
+        break;
+      case '>':
+        state = STATE_DONE;
+        break;
+      default:
+        if (REGEXP_WHITESPACE.test(char)) {
+          if (state === STATE_NON_WHITESPACE) {
+            state = STATE_DONE;
+          }
+        } else {
+          if (state === STATE_START_TAG) {
+            state = STATE_NON_WHITESPACE;
+          }
+          if (char !== '/') {
+            tagName += char;
+          }
+        }
+        break;
+    }
+  }
+  return tagName;
+}
diff --git a/src/strip/strip_tags.js b/src/strip/strip_tags.js
@@ -1,13 +1,15 @@
-/* eslint-disable */
-import parseTagList from 'helper/strip/tag/parse_tag_list';
 import coerceToString from 'helper/string/coerce_to_string';
-import isNil from 'helper/object/is_nil';
-import toString from 'helper/string/to_string';
-import trimLeft from 'manipulate/trim_left';
-import trimRight from 'manipulate/trim_right';
+import hasSubstringAtIndex from 'helper/string/has_substring_at_index';
+import parseTagList from 'helper/strip/parse_tag_list';
+import parseTagName from 'helper/strip/parse_tag_name';
+
+const STATE_OUTPUT = 0;
+const STATE_HTML = 1;
+const STATE_EXCLAMATION = 2;
+const STATE_COMMENT = 3;
 
 /**
- * Strips HTML tags from `subject`.
+ * Strips subject tags from `subject`.
  *
  * @function stripTags
  * @static
@@ -25,14 +27,126 @@ import trimRight from 'manipulate/trim_right';
  * // => 'Earth'
  */
 export default function trim(subject, allowableTags, replacement) {
-  const subjectString = coerceToString(subject);
-  if (subjectString === '') {
+  subject = coerceToString(subject);
+  if (subject === '') {
     return '';
   }
   if (!Array.isArray(allowableTags)) {
-    allowableTags = parseTagList(coerceToString(allowableTags))
+    const allowableTagsString = coerceToString(allowableTags);
+    allowableTags = allowableTagsString === '' ? [] : parseTagList(allowableTagsString);
   }
   const replacementString = coerceToString(replacement);
+  const length = subject.length;
+  const hasAllowableTags = allowableTags.length > 0;
+  const hasSubstring = hasSubstringAtIndex.bind(null, subject);
+  let state = STATE_OUTPUT;
+  let depth = 0;
+  let output = '';
+  let tagContent = '';
+  let quote = null;
+  for (let index = 0; index < length; index++) {
+    const char = subject[index];
+    let advance = false;
+    switch (char) {
+      case '<':
+        if (quote) {
+          break;
+        }
+        if (hasSubstring('< ', index, false)) {
+          advance = true;
+          break;
+        }
+        if (state === STATE_OUTPUT) {
+          advance = true;
+          state = STATE_HTML;
+          break;
+        }
+        if (state === STATE_HTML) {
+          depth++;
+          break;
+        }
+        advance = true;
+        break;
+      case '!':
+        if (state === STATE_HTML && hasSubstring('<!', index)) {
+          state = STATE_EXCLAMATION;
+          break;
+        }
+        advance = true;
+        break;
+      case '-':
+        if (state === STATE_EXCLAMATION && hasSubstring('!--', index)) {
+          state = STATE_COMMENT;
+          break;
+        }
+        advance = true;
+        break;
+      case '"':
+      case "'":
+        if (state === STATE_HTML) {
+          if (quote === char) {
+            quote = null;
+          } else if (!quote) {
+            quote = char;
+          }
+        }
+        advance = true;
+        break;
+      case 'E':
+      case 'e':
+        if (state === STATE_EXCLAMATION && hasSubstring('doctype', index)) {
+          state = STATE_HTML;
+          break;
+        }
+        advance = true;
+        break;
+      case '>':
+        if (depth > 0) {
+          depth--;
+          break;
+        }
+        if (quote) {
+          break;
+        }
+        if (state === STATE_HTML) {
+          quote = null;
+          state = STATE_OUTPUT;
+          if (hasAllowableTags) {
+            tagContent += '>';
+            const tagName = parseTagName(tagContent);
+            if (allowableTags.indexOf(tagName.toLowerCase()) !== -1) {
+              output += tagContent;
+            }
+            tagContent = '';
+          } else {
+            tagContent += replacementString;
+          }
+          break;
+        }
+        if (state === STATE_EXCLAMATION || state === STATE_COMMENT && hasSubstring('-->', index)) {
+          quote = null;
+          state = STATE_OUTPUT;
+          tagContent = '';
+          break;
+        }
+        advance = true;
+        break;
+      default:
+        advance = true;
+    }
+    if (advance) {
+      switch (state) {
+        case STATE_OUTPUT:
+          output += char;
+          break;
+        case STATE_HTML:
+          if (hasAllowableTags) {
+            tagContent += char;
+          }
+          break;
+      }
+    }
+  }
 
-
+  return output;
 }
diff --git a/test/helper/strip/parse_tag_name.js b/test/helper/strip/parse_tag_name.js
@@ -0,0 +1,11 @@
+import { expect } from 'chai';
+import parseTagName from 'helper/strip/parse_tag_name';
+
+describe('parseTagName', function() {
+
+  it('should parse the tag name from markup', function () {
+    expect(parseTagName("<img title=\"foo 'bar'\"/>")).to.be.equal('img');
+    expect(parseTagName("<  b>Wonderful world</b>")).to.be.equal('b');
+  });
+
+});
diff --git a/test/index.js b/test/index.js
@@ -38,6 +38,7 @@ import './escape/unescape_html';
 import './helper/boolean/coerce_to_boolean';
 import './helper/number/coerce_to_number';
 import './helper/reg_exp/append_flag_to_reg_exp';
+import './helper/strip/parse_tag_name';
 
 //format
 import './format/sprintf';

diff --git a/test/modules_common.js b/test/modules_common.js
@@ -344,6 +344,13 @@ describe('CommonJS modules', function() {
     expect(words('*gravity***can****cross&&dimensions++')).to.eql(['gravity', 'can', 'cross', 'dimensions']);
   });
 
+  // Strip
+
+  it('should require stripTags()', function() {
+    const stripTags = require('../dist_mod/strip_tags');
+    expect(stripTags('<b>Welcome</b>')).to.equal('Welcome');
+  });
+
   // Util
 
   it('should require version()', function() {

diff --git a/test/modules_es2015.js b/test/modules_es2015.js
@@ -92,6 +92,10 @@ import graphemes from '../dist_mod/graphemes';
 import split from '../dist_mod/split';
 import words from '../dist_mod/words';
 
+// Strip
+
+import stripTags from '../dist_mod/strip_tags';
+
 // Util
 
 import version from '../dist_mod/version';
@@ -380,6 +384,10 @@ describe('CommonJS modules', function() {
     expect(words('*gravity***can****cross&&dimensions++')).to.eql(['gravity', 'can', 'cross', 'dimensions']);
   });
 
+  it('should require stripTags()', function() {
+    expect(stripTags('<b>Welcome</b>')).to.equal('Welcome');
+  });
+
   // Util
 
   it('should require version()', function() {