From e8f9e6c45bd79c4e903cbfc1ccf885e7c6f1ad70 Mon Sep 17 00:00:00 2001 From: Jonathan Goldwasser Date: Mon, 30 Aug 2021 17:46:22 +0200 Subject: [PATCH] feat(core): normalize line endings in asset hash calculation (#16276) Replace CRLF with LF so asset hashes are identical across platforms. The hash still includes the size but it is now the size after converting line endings. Addresses #14555 (closes it?) ---- *By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license* --- packages/@aws-cdk/core/lib/fs/fingerprint.ts | 52 ++++++++++++++++--- packages/@aws-cdk/core/test/fs/eol/lf.txt | 2 + .../core/test/fs/fs-fingerprint.test.ts | 24 +++++++++ 3 files changed, 71 insertions(+), 7 deletions(-) create mode 100644 packages/@aws-cdk/core/test/fs/eol/lf.txt diff --git a/packages/@aws-cdk/core/lib/fs/fingerprint.ts b/packages/@aws-cdk/core/lib/fs/fingerprint.ts index 50a81fd53982d..7ba7214109b2c 100644 --- a/packages/@aws-cdk/core/lib/fs/fingerprint.ts +++ b/packages/@aws-cdk/core/lib/fs/fingerprint.ts @@ -9,14 +9,18 @@ const BUFFER_SIZE = 8 * 1024; const CTRL_SOH = '\x01'; const CTRL_SOT = '\x02'; const CTRL_ETX = '\x03'; +const CR = '\r'; +const LF = '\n'; +const CRLF = `${CR}${LF}`; /** * Produces fingerprint based on the contents of a single file or an entire directory tree. * + * Line endings are converted from CRLF to LF. + * * The fingerprint will also include: * 1. An extra string if defined in `options.extra`. - * 2. The set of exclude patterns, if defined in `options.exclude` - * 3. The symlink follow mode value. + * 2. The symlink follow mode value. * * @param fileOrDirectory The directory or file to fingerprint * @param options Fingerprinting options @@ -60,7 +64,7 @@ export function fingerprint(fileOrDirectory: string, options: FingerprintOptions _hashField(hash, `link:${relativePath}`, linkTarget); } } else if (stat.isFile()) { - _hashField(hash, `file:${relativePath}`, _contentFingerprint(realPath, stat)); + _hashField(hash, `file:${relativePath}`, contentFingerprint(realPath)); } else if (stat.isDirectory()) { for (const item of fs.readdirSync(realPath).sort()) { _processFileOrDirectory(path.join(symbolicPath, item), false, path.join(realPath, item)); @@ -71,20 +75,54 @@ export function fingerprint(fileOrDirectory: string, options: FingerprintOptions } } -function _contentFingerprint(file: string, stat: fs.Stats): string { +export function contentFingerprint(file: string): string { const hash = crypto.createHash('sha256'); const buffer = Buffer.alloc(BUFFER_SIZE); // eslint-disable-next-line no-bitwise const fd = fs.openSync(file, fs.constants.O_DSYNC | fs.constants.O_RDONLY | fs.constants.O_SYNC); + let size = 0; + let isBinary = false; + let lastStr = ''; + let read = 0; try { - let read = 0; while ((read = fs.readSync(fd, buffer, 0, BUFFER_SIZE, null)) !== 0) { - hash.update(buffer.slice(0, read)); + const slicedBuffer = buffer.slice(0, read); + + // Detect if file is binary by checking the first 8k bytes for the + // null character (git like implementation) + if (size === 0) { + isBinary = slicedBuffer.indexOf(0) !== -1; + } + + let dataBuffer = slicedBuffer; + if (!isBinary) { // Line endings normalization (CRLF -> LF) + const str = buffer.slice(0, read).toString(); + + // We are going to normalize line endings to LF. So if the current + // buffer ends with CR, it could be that the next one starts with + // LF so we need to save it for later use. + if (new RegExp(`${CR}$`).test(str)) { + lastStr += str; + continue; + } + + const data = lastStr + str; + const normalizedData = data.replace(new RegExp(CRLF, 'g'), LF); + dataBuffer = Buffer.from(normalizedData); + lastStr = ''; + } + + size += dataBuffer.length; + hash.update(dataBuffer); + } + + if (lastStr) { + hash.update(Buffer.from(lastStr)); } } finally { fs.closeSync(fd); } - return `${stat.size}:${hash.digest('hex')}`; + return `${size}:${hash.digest('hex')}`; } function _hashField(hash: crypto.Hash, header: string, value: string | Buffer | DataView) { diff --git a/packages/@aws-cdk/core/test/fs/eol/lf.txt b/packages/@aws-cdk/core/test/fs/eol/lf.txt new file mode 100644 index 0000000000000..f41bc690ba927 --- /dev/null +++ b/packages/@aws-cdk/core/test/fs/eol/lf.txt @@ -0,0 +1,2 @@ +hello word +this a new line! diff --git a/packages/@aws-cdk/core/test/fs/fs-fingerprint.test.ts b/packages/@aws-cdk/core/test/fs/fs-fingerprint.test.ts index 6a589b3ba159e..be093c32cbffc 100644 --- a/packages/@aws-cdk/core/test/fs/fs-fingerprint.test.ts +++ b/packages/@aws-cdk/core/test/fs/fs-fingerprint.test.ts @@ -3,6 +3,7 @@ import * as os from 'os'; import * as path from 'path'; import { nodeunitShim, Test } from 'nodeunit-shim'; import { FileSystem, SymlinkFollowMode } from '../../lib/fs'; +import { contentFingerprint } from '../../lib/fs/fingerprint'; nodeunitShim({ files: { @@ -155,4 +156,27 @@ nodeunitShim({ test.done(); }, }, + + eol: { + 'normalizes line endings'(test: Test) { + // GIVEN + const lf = path.join(__dirname, 'eol', 'lf.txt'); + const crlf = path.join(__dirname, 'eol', 'crlf.txt'); + fs.writeFileSync(crlf, fs.readFileSync(lf, 'utf8').replace(/\n/g, '\r\n')); + + const lfStat = fs.statSync(lf); + const crlfStat = fs.statSync(crlf); + + // WHEN + const crlfHash = contentFingerprint(crlf); + const lfHash = contentFingerprint(lf); + + // THEN + test.notEqual(crlfStat.size, lfStat.size); // Difference in size due to different line endings + test.deepEqual(crlfHash, lfHash); // Same hash + + fs.unlinkSync(crlf); + test.done(); + }, + }, });