From a57dc06adf53d9f3f46c844470474283cd26d8fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Zasso?= Date: Sat, 27 Mar 2021 14:26:39 +0100 Subject: [PATCH] doc: improve Buffer's encoding documentation - Add a paragraph about case-insensitivity of encoding options. - Document "utf-8", "utf-16le" and "ucs-2" aliases. - Always use "utf8" in documentation for defaults and examples. PR-URL: https://github.com/nodejs/node/pull/37945 Reviewed-By: Antoine du Hamel Reviewed-By: Colin Ihrig Reviewed-By: James M Snell Reviewed-By: Luigi Pinca --- doc/api/buffer.md | 30 +++++++++++++++++------------- doc/api/http.md | 4 ++-- doc/api/stream.md | 2 +- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/doc/api/buffer.md b/doc/api/buffer.md index 3334c260125338..086df183c59d85 100644 --- a/doc/api/buffer.md +++ b/doc/api/buffer.md @@ -79,17 +79,21 @@ console.log(Buffer.from('fhqwhgads', 'utf16le')); // Prints: ``` +Node.js buffers accept all case variations of encoding strings that they +receive. For example, UTF-8 can be specified as `'utf8'`, `'UTF8'` or `'uTf8'`. + The character encodings currently supported by Node.js are the following: -* `'utf8'`: Multi-byte encoded Unicode characters. Many web pages and other - document formats use [UTF-8][]. This is the default character encoding. - When decoding a `Buffer` into a string that does not exclusively contain - valid UTF-8 data, the Unicode replacement character `U+FFFD` � will be used - to represent those errors. +* `'utf8'` (alias: `'utf-8'`): Multi-byte encoded Unicode characters. Many web + pages and other document formats use [UTF-8][]. This is the default character + encoding. When decoding a `Buffer` into a string that does not exclusively + contain valid UTF-8 data, the Unicode replacement character `U+FFFD` � will be + used to represent those errors. -* `'utf16le'`: Multi-byte encoded Unicode characters. Unlike `'utf8'`, each - character in the string will be encoded using either 2 or 4 bytes. - Node.js only supports the [little-endian][endianness] variant of [UTF-16][]. +* `'utf16le'` (alias: `'utf-16le'`): Multi-byte encoded Unicode characters. + Unlike `'utf8'`, each character in the string will be encoded using either 2 + or 4 bytes. Node.js only supports the [little-endian][endianness] variant of + [UTF-16][]. * `'latin1'`: Latin-1 stands for [ISO-8859-1][]. This character encoding only supports the Unicode characters from `U+0000` to `U+00FF`. Each character is @@ -132,11 +136,11 @@ The following legacy character encodings are also supported: * `'binary'`: Alias for `'latin1'`. See [binary strings][] for more background on this topic. The name of this encoding can be very misleading, as all of the encodings listed here convert between strings and binary data. For converting - between strings and `Buffer`s, typically `'utf-8'` is the right choice. + between strings and `Buffer`s, typically `'utf8'` is the right choice. -* `'ucs2'`: Alias of `'utf16le'`. UCS-2 used to refer to a variant of UTF-16 - that did not support characters that had code points larger than U+FFFF. - In Node.js, these code points are always supported. +* `'ucs2'`, `'ucs-2'`: Aliases of `'utf16le'`. UCS-2 used to refer to a variant + of UTF-16 that did not support characters that had code points larger than + U+FFFF. In Node.js, these code points are always supported. ```js Buffer.from('1ag', 'hex'); @@ -900,7 +904,7 @@ Returns `true` if `encoding` is the name of a supported character encoding, or `false` otherwise. ```js -console.log(Buffer.isEncoding('utf-8')); +console.log(Buffer.isEncoding('utf8')); // Prints: true console.log(Buffer.isEncoding('hex')); diff --git a/doc/api/http.md b/doc/api/http.md index 93e754b82f484e..ff24f36d1eb36f 100644 --- a/doc/api/http.md +++ b/doc/api/http.md @@ -2323,7 +2323,7 @@ changes: --> * `chunk` {string | Buffer} -* `encoding` {string} Optional, **Default**: `utf-8` +* `encoding` {string} Optional, **Default**: `utf8` * `callback` {Function} Optional * Returns: {this} @@ -2565,7 +2565,7 @@ changes: --> * `chunk` {string | Buffer} -* `encoding` {string} **Default**: `utf-8` +* `encoding` {string} **Default**: `utf8` * `callback` {Function} * Returns {boolean} diff --git a/doc/api/stream.md b/doc/api/stream.md index 26a04be24c6f35..4e35340c04812d 100644 --- a/doc/api/stream.md +++ b/doc/api/stream.md @@ -2759,7 +2759,7 @@ const fs = require('fs'); pipeline( fs.createReadStream('object.json') - .setEncoding('utf-8'), + .setEncoding('utf8'), new Transform({ decodeStrings: false, // Accept string input rather than Buffers construct(callback) {