Skip to content

Commit

Permalink
New parser, serializer, API, and package name
Browse files Browse the repository at this point in the history
* Renames the package from "content-type-parser" to "whatwg-mimetype", as MIME type is the more general concept, and this is now implementing part of the WHATWG MIME Sniffing standard
* Replaces the parser and serializer with the newly-specified one from  whatwg/mimesniff@cc81ec4. This closes #3 as regular expressions are no longer used.
* Overhauls the API to more or less match what is proposed in whatwg/mimesniff#43. Notably, the invariants of the MIME type model are now maintained more aggressively, and the parameters exist on a separate Map-like data structure. Also removes the isText() method, as it's much less interesting than the other two.
* Switches from Mocha to Jest, and brings in the appropriate web platform test data files.

All of this helps close #1, as it's now clear that this project has its own direction which is more standards-based and merging it with another project doesn't make much sense.
  • Loading branch information
domenic authored Dec 10, 2017
1 parent 00a5e0c commit 26c539a
Show file tree
Hide file tree
Showing 20 changed files with 4,834 additions and 343 deletions.
1 change: 1 addition & 0 deletions .eslintignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/coverage/**
80 changes: 62 additions & 18 deletions .eslintrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,13 @@
},
"rules": {
// Possible errors
"comma-dangle": ["error", "never"],
"for-direction": "error",
"getter-return": "error",
"no-await-in-loop": "error",
"no-compare-neg-zero": "error",
"no-cond-assign": ["error", "except-parens"],
"no-console": "error",
"no-constant-condition": "error",
"no-constant-condition": ["error", { "checkLoops": false }],
"no-control-regex": "off",
"no-debugger": "error",
"no-dupe-args": "error",
Expand All @@ -28,13 +31,15 @@
"no-inner-declarations": "off",
"no-invalid-regexp": "error",
"no-irregular-whitespace": "error",
"no-negated-in-lhs": "error",
"no-obj-calls": "error",
"no-prototype-builtins": "error",
"no-regex-spaces": "error",
"no-sparse-arrays": "error",
"no-template-curly-in-string": "error",
"no-unexpected-multiline": "error",
"no-unreachable": "error",
"no-unsafe-finally": "off",
"no-unsafe-negation": "error",
"use-isnan": "error",
"valid-jsdoc": "off",
"valid-typeof": "error",
Expand All @@ -43,6 +48,7 @@
"accessor-pairs": "error",
"array-callback-return": "error",
"block-scoped-var": "off",
"class-methods-use-this": "off",
"complexity": "off",
"consistent-return": "error",
"curly": ["error", "all"],
Expand All @@ -56,7 +62,7 @@
"no-case-declarations": "error",
"no-div-regex": "off",
"no-else-return": "error",
"no-empty-function": "error",
"no-empty-function": "off",
"no-empty-pattern": "error",
"no-eq-null": "error",
"no-eval": "error",
Expand All @@ -65,18 +71,18 @@
"no-extra-label": "error",
"no-fallthrough": "error",
"no-floating-decimal": "error",
"no-global-assign": "error",
"no-implicit-coercion": "error",
"no-implicit-globals": "error",
"no-implied-eval": "error",
"no-invalid-this": "error",
"no-implied-eval": "off",
"no-invalid-this": "off", // meh
"no-iterator": "error",
"no-labels": ["error", { "allowLoop": true }],
"no-lone-blocks": "error",
"no-loop-func": "off",
"no-magic-numbers": "off",
"no-multi-spaces": "error",
"no-multi-str": "error",
"no-native-reassign": "error",
"no-new": "error",
"no-new-func": "error",
"no-new-wrappers": "error",
Expand All @@ -86,7 +92,9 @@
"no-process-env": "error",
"no-proto": "error",
"no-redeclare": "error",
"no-restricted-properties": "off",
"no-return-assign": ["error", "except-parens"],
"no-return-await": "error",
"no-script-url": "off",
"no-self-assign": "error",
"no-self-compare": "error",
Expand All @@ -98,10 +106,13 @@
"no-useless-call": "error",
"no-useless-concat": "error",
"no-useless-escape": "error",
"no-useless-return": "error",
"no-void": "error",
"no-warning-comments": "off",
"no-with": "error",
"prefer-promise-reject-errors": "error",
"radix": ["error", "as-needed"],
"require-await": "error",
"vars-on-top": "off",
"wrap-iife": ["error", "outside"],
"yoda": ["error", "never"],
Expand All @@ -121,90 +132,117 @@
"no-undef-init": "error",
"no-undefined": "off",
"no-unused-vars": "error",
"no-use-before-define": ["error", "nofunc"],
"no-use-before-define": "off",

// Node.js and CommonJS
"callback-return": "off",
"global-require": "error",
"handle-callback-err": "error",
"no-buffer-constructor": "error",
"no-mixed-requires": ["error", true],
"no-new-require": "error",
"no-path-concat": "error",
"no-process-exit": "error",
"no-restricted-imports": "off",
"no-restricted-modules": "off",
"no-sync": "off",

// Stylistic Issues
"array-bracket-newline": ["error", { "multiline": true }],
"array-bracket-spacing": ["error", "never"],
"array-element-newline": ["off"],
"block-spacing": ["error", "always"],
"brace-style": ["error", "1tbs", { "allowSingleLine": false }],
"camelcase": ["error", { "properties": "always" }],
"capitalized-comments": "off",
"comma-dangle": ["error", "never"],
"comma-spacing": ["error", { "before": false, "after": true }],
"comma-style": ["error", "last"],
"computed-property-spacing": ["error", "never"],
"consistent-this": "off",
"eol-last": "error",
"func-call-spacing": ["error", "never"],
"func-name-matching": ["error"],
"func-names": "off",
"func-style": ["error", "declaration"],
"function-paren-newline": ["error", "multiline"],
"id-blacklist": "off",
"id-length": "off",
"id-match": "off",
"indent": ["error", 2, { "SwitchCase": 1 }],
"indent": ["error", 2, { "SwitchCase": 1, "CallExpression": {"arguments": "first"}, "FunctionExpression": {"parameters": "first"}, "ignoredNodes": ["ConditionalExpression"] }],
"jsx-quotes": "off",
"key-spacing": ["error", { "beforeColon": false, "afterColon": true, "mode": "strict" }],
"keyword-spacing": ["error", { "before": true, "after": true }],
"line-comment-position": "off",
"linebreak-style": ["error", "unix"],
"lines-around-comment": "off",
"max-depth": "off",
"max-len": ["error", 120, { "ignoreUrls": true }],
"max-lines": "off",
"max-nested-callbacks": "off",
"max-params": "off",
"max-statements": "off",
"max-statements-per-line": ["error", { "max": 1 }],
"new-cap": ["error", { "capIsNewExceptions": ["USVString"] }],
"multiline-ternary": ["error", "always-multiline"],
"new-cap": ["error", { "capIsNewExceptions": ["USVString", "DOMString"] }],
"new-parens": "error",
"newline-after-var": "off",
"newline-before-return": "off",
"newline-per-chained-call": "off",
"no-array-constructor": "error",
"no-bitwise": "off",
"no-continue": "off",
"no-inline-comments": "off",
"no-lonely-if": "error",
"no-mixed-operators": [
"error",
{
"groups": [
["&", "|", "^", "~", "<<", ">>", ">>>"],
["==", "!=", "===", "!==", ">", ">=", "<", "<="],
["&&", "||"],
["in", "instanceof"]
]
}
],
"no-mixed-spaces-and-tabs": "error",
"no-multi-assign": "off",
"no-multiple-empty-lines": "error",
"no-negated-condition": "off",
"no-nested-ternary": "error",
"no-new-object": "error",
"no-plusplus": "off",
"no-restricted-syntax": "off",
"no-spaced-func": "error",
"no-tabs": "error",
"no-ternary": "off",
"no-trailing-spaces": "error",
"no-underscore-dangle": "off",
"no-unneeded-ternary": "error",
"no-whitespace-before-property": "error",
"nonblock-statement-body-position": "error",
"object-curly-newline": ["error", { "consistent": true }],
"object-curly-spacing": ["error", "always"],
"object-property-newline": "off",
"one-var": ["error", "never"],
"one-var-declaration-per-line": ["error", "initializations"],
"operator-assignment": ["error", "always"],
"operator-linebreak": ["error", "after"],
"padded-blocks": ["error", "never"],
"padding-line-between-statements": "off",
"quote-props": ["error", "as-needed"],
"quotes": ["error", "double", { "avoidEscape": true, "allowTemplateLiterals": true }],
"require-jsdoc": "off",
"semi": ["error", "always"],
"semi-spacing": "error",
"sort-imports": "off",
"semi-style": "error",
"sort-keys": "off",
"sort-vars": "off",
"space-before-blocks": ["error", "always"],
"space-before-function-paren": ["error", { "anonymous": "always", "named": "never" }],
"space-in-parens": ["error", "never"],
"space-infix-ops": "error",
"space-unary-ops": ["error", { "words": true, "nonwords": false }],
"spaced-comment": ["error", "always", { "markers": ["///"] }],
"switch-colon-spacing": "error",
"template-tag-spacing": "error",
"unicode-bom": "error",
"wrap-regex": "off",

// ECMAScript 6
Expand All @@ -219,18 +257,24 @@
"no-dupe-class-members": "error",
"no-duplicate-imports": "error",
"no-new-symbol": "error",
"no-restricted-imports": "off",
"no-this-before-super": "error",
"no-useless-computed-key": "error",
"no-useless-constructor": "error",
"no-useless-rename": "error",
"no-var": "error",
"object-shorthand": "error",
"prefer-arrow-callback": "error",
"prefer-const": ["error", { "ignoreReadBeforeAssign": true }],
"prefer-reflect": "off",
"prefer-const": ["error", { "ignoreReadBeforeAssign": true, "destructuring": "all" }],
"prefer-destructuring": ["error", { "VariableDeclarator": { "array": false, "object": true }, "AssignmentExpression": { "array": false, "object": false } }, { "enforceForRenamedProperties": false }],
"prefer-numeric-literals": "error",
"prefer-rest-params": "off",
"prefer-spread": "off", // TODO with new Node versions
"prefer-spread": "error",
"prefer-template": "off",
"require-yield": "error",
"rest-spread-spacing": "error",
"sort-imports": "off",
"symbol-description": "error",
"template-curly-spacing": ["error", "never"],
"yield-star-spacing": ["error", "after"]
}
Expand Down
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# lint requires lf line endings
*.js text eol=lf
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
/node_modules/
/npm-debug.log

/coverage/
/test/web-platform-tests/*
2 changes: 1 addition & 1 deletion LICENSE.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright © 2016 Domenic Denicola <d@domenic.me>
Copyright © 2017 Domenic Denicola <d@domenic.me>

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

Expand Down
106 changes: 72 additions & 34 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,60 +1,98 @@
# Parse `Content-Type` Header Strings
# Parse, serialize, and manipulate MIME types

This package will parse the [`Content-Type`](https://tools.ietf.org/html/rfc7231#section-3.1.1.1) header field into an introspectable data structure, whose parameters can be manipulated:
This package will parse [MIME types](https://mimesniff.spec.whatwg.org/#understanding-mime-types) into a structured format, which can then be manipulated and serialized:

```js
const contentTypeParser = require("content-type-parser");
const MIMEType = require("content-type-parser");

const contentType = contentTypeParser(`Text/HTML;Charset="utf-8"`);
const mimeType = new MIMEType(`Text/HTML;Charset="utf-8"`);

console.assert(contentType.toString() === "text/html;charset=utf-8");
console.assert(mimeType.toString() === "text/html;charset=utf-8");

console.assert(contentType.type === "text");
console.assert(contentType.subtype === "html");
console.assert(contentType.get("charset") === "utf-8");
console.assert(mimeType.type === "text");
console.assert(mimeType.subtype === "html");
console.assert(mimeType.essence === "text/html");
console.assert(mimeType.parameters.get("charset") === "utf-8");

contentType.set("charset", "windows-1252");
console.assert(contentType.get("charset") === "windows-1252");
console.assert(contentType.toString() === "text/html;charset=windows-1252");
mimeType.parameters.set("charset", "windows-1252");
console.assert(mimeType.parameters.get("charset") === "windows-1252");
console.assert(mimeType.toString() === "text/html;charset=windows-1252");

console.assert(contentType.isHTML() === true);
console.assert(contentType.isXML() === false);
console.assert(contentType.isText() === true);
console.assert(mimeType.isHTML() === true);
console.assert(mimeType.isXML() === false);
```

Note how parsing will lowercase the type, subtype, and parameter name tokens (but not parameter values).
Parsing is a fairly complex process; see [the specification](https://mimesniff.spec.whatwg.org/#parsing-a-mime-type) for details (and similarly [for serialization](https://mimesniff.spec.whatwg.org/#serializing-a-mime-type)).

If the passed string cannot be parsed as a content-type, `contentTypeParser` will return `null`.
If the passed string cannot be parsed as a MIME type, the `MIMEType` constructor will throw.

## `ContentType` instance API
This package's algorithms conform to those of the WHATWG [MIME Sniffing Standard](https://mimesniff.spec.whatwg.org/), and is aligned up to commit [cc81ec4](https://github.com/whatwg/mimesniff/commit/cc81ec48288944562c4554069da1d74a71e199fb).

This package's main module's default export will return an instance of the `ContentType` class, which has the following public APIs:
## `MIMEType` API

This package's main module's default export is a class, `MIMEType`. Its constructor takes a string which it will attempt to parse into a MIME type; if parsing fails, an `Error` will be thrown.

### Properties

- `type`: the top-level media type, e.g. `"text"`
- `subtype`: the subtype, e.g. `"html"`
- `parameterList`: an array of `{ separator, key, value }` pairs representing the parameters. The `separator` field contains any whitespace, not just the `;` character.
- `type`: the MIME type's [type](https://mimesniff.spec.whatwg.org/#mime-type-type), e.g. `"text"`
- `subtype`: the MIME type's [subtype](https://mimesniff.spec.whatwg.org/#mime-type-subtype), e.g. `"html"`
- `essence`: the MIME type's [essence](https://mimesniff.spec.whatwg.org/#mime-type-essence), e.g. `"text/html"`
- `parameters`: an instance of `MIMETypeParameters`, containing this MIME type's [parameters](https://mimesniff.spec.whatwg.org/#mime-type-parameters)

`type` and `subtype` can be changed. They will be validated to be non-empty and only contain [HTTP token code points](https://mimesniff.spec.whatwg.org/#http-token-code-point).

`essence` is only a getter, and cannot be changed.

### Parameter manipulation
`parameters` is also a getter, but the contents of the `MIMETypeParameters` object are mutable, as described below.

In general you should not directly manipulate `parameterList`. Instead, use the following APIs:
### Methods

- `get("key")`: returns the value of the parameter with the given key, or `undefined` if no such parameter is present
- `set("key", "value")`: adds the given key/value pair to the parameter list, or overwrites the existing value if an entry already existed
- `toString()` serializes the MIME type to a string
- `isHTML()`: returns true if this instance represents [a HTML MIME type](https://mimesniff.spec.whatwg.org/#html-mime-type)
- `isXML()`: returns true if this instance represents [an XML MIME type](https://mimesniff.spec.whatwg.org/#xml-mime-type)

Both of these will lowercase the keys.
_Note: the `isHTML()` and `isXML()` methods are speculative, and may be removed or changed in future major versions. See [whatwg/mimesniff#48](https://github.com/whatwg/mimesniff/issues/48) for brainstorming in this area. Currently we implement these mainly because they are useful in jsdom._

### MIME type tests
## `MIMETypeParameters` API

- `isHTML()`: returns true if this instance's MIME type is [the HTML MIME type](https://html.spec.whatwg.org/multipage/infrastructure.html#html-mime-type), `"text/html"`
- `isXML()`: returns true if this instance's MIME type is [an XML MIME type](https://html.spec.whatwg.org/multipage/infrastructure.html#xml-mime-type)
- `isText()`: returns true if this instance's top-level media type is `"text"`
The `MIMETypeParameters` class, instances of which are returned by `mimeType.parameters`, has equivalent surface API to a [JavaScript `Map`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map).

### Serialization
However, `MIMETypeParameters` methods will always interpret their arguments as appropriate for MIME types, so e.g. parameter names will be lowercased, and attempting to set invalid characters will throw.

- `toString()` will return a canonicalized representation of the content-type, re-built from the parsed components
Some examples:

```js
const mimeType = new MIMEType(`x/x;a=b;c=D;E="F"`);

// Logs:
// a b
// c D
// e F
for (const [name, value] of mimeType.parameters) {
console.log(name, value);
}

console.assert(mimeType.parameters.has("a"));
console.assert(mimeType.parameters.has("A"));
console.assert(mimeType.parameters.get("A") === "b");

mimeType.parameters.set("Q", "X");
console.assert(mimeType.parameters.get("q") === "X");
console.assert(mimeType.toString() === "x/x;a=b;c=d;e=F;q=X");

// Throws:
mimeType.parameters.set("@", "x");
```

## Raw parsing/serialization APIs

If you want primitives on which to build your own API, you can get direct access to the parsing and serialization algorithms as follows:

```js
const parse = require("content-type-parser/parser");
const serialize = require("content-type-parser/serialize");
```

## Credits
`parse(string)` returns an object containing the `type` and `subtype` strings, plus `parameters`, which is a `Map`. This is roughly our equivalent of the spec's [MIME type record](https://mimesniff.spec.whatwg.org/#mime-type). If parsing fails, it instead returns `null`.

This package was originally based on the excellent work of [@nicolashenry](https://github.com/nicolashenry), [in jsdom](https://github.com/tmpvar/jsdom/blob/16fd85618f2705d181232f6552125872a37164bc/lib/jsdom/living/helpers/headers.js). It has since been pulled out into this separate package.
`serialize(record)` operates on the such an object, giving back a string according to the serialization algorithm.
Loading

0 comments on commit 26c539a

Please sign in to comment.