Skip to content

Commit

Permalink
feat(core): normalize line endings in asset hash calculation (aws#16276)
Browse files Browse the repository at this point in the history
Replace CRLF with LF so asset hashes are identical across platforms.

The hash still includes the size but it is now the size after converting
line endings.

Addresses aws#14555 (closes it?)

----

*By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
  • Loading branch information
jogold authored and david-doyle-as24 committed Sep 7, 2021
1 parent f30466b commit e8f9e6c
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 7 deletions.
52 changes: 45 additions & 7 deletions packages/@aws-cdk/core/lib/fs/fingerprint.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,18 @@ const BUFFER_SIZE = 8 * 1024;
const CTRL_SOH = '\x01';
const CTRL_SOT = '\x02';
const CTRL_ETX = '\x03';
const CR = '\r';
const LF = '\n';
const CRLF = `${CR}${LF}`;

/**
* Produces fingerprint based on the contents of a single file or an entire directory tree.
*
* Line endings are converted from CRLF to LF.
*
* The fingerprint will also include:
* 1. An extra string if defined in `options.extra`.
* 2. The set of exclude patterns, if defined in `options.exclude`
* 3. The symlink follow mode value.
* 2. The symlink follow mode value.
*
* @param fileOrDirectory The directory or file to fingerprint
* @param options Fingerprinting options
Expand Down Expand Up @@ -60,7 +64,7 @@ export function fingerprint(fileOrDirectory: string, options: FingerprintOptions
_hashField(hash, `link:${relativePath}`, linkTarget);
}
} else if (stat.isFile()) {
_hashField(hash, `file:${relativePath}`, _contentFingerprint(realPath, stat));
_hashField(hash, `file:${relativePath}`, contentFingerprint(realPath));
} else if (stat.isDirectory()) {
for (const item of fs.readdirSync(realPath).sort()) {
_processFileOrDirectory(path.join(symbolicPath, item), false, path.join(realPath, item));
Expand All @@ -71,20 +75,54 @@ export function fingerprint(fileOrDirectory: string, options: FingerprintOptions
}
}

function _contentFingerprint(file: string, stat: fs.Stats): string {
export function contentFingerprint(file: string): string {
const hash = crypto.createHash('sha256');
const buffer = Buffer.alloc(BUFFER_SIZE);
// eslint-disable-next-line no-bitwise
const fd = fs.openSync(file, fs.constants.O_DSYNC | fs.constants.O_RDONLY | fs.constants.O_SYNC);
let size = 0;
let isBinary = false;
let lastStr = '';
let read = 0;
try {
let read = 0;
while ((read = fs.readSync(fd, buffer, 0, BUFFER_SIZE, null)) !== 0) {
hash.update(buffer.slice(0, read));
const slicedBuffer = buffer.slice(0, read);

// Detect if file is binary by checking the first 8k bytes for the
// null character (git like implementation)
if (size === 0) {
isBinary = slicedBuffer.indexOf(0) !== -1;
}

let dataBuffer = slicedBuffer;
if (!isBinary) { // Line endings normalization (CRLF -> LF)
const str = buffer.slice(0, read).toString();

// We are going to normalize line endings to LF. So if the current
// buffer ends with CR, it could be that the next one starts with
// LF so we need to save it for later use.
if (new RegExp(`${CR}$`).test(str)) {
lastStr += str;
continue;
}

const data = lastStr + str;
const normalizedData = data.replace(new RegExp(CRLF, 'g'), LF);
dataBuffer = Buffer.from(normalizedData);
lastStr = '';
}

size += dataBuffer.length;
hash.update(dataBuffer);
}

if (lastStr) {
hash.update(Buffer.from(lastStr));
}
} finally {
fs.closeSync(fd);
}
return `${stat.size}:${hash.digest('hex')}`;
return `${size}:${hash.digest('hex')}`;
}

function _hashField(hash: crypto.Hash, header: string, value: string | Buffer | DataView) {
Expand Down
2 changes: 2 additions & 0 deletions packages/@aws-cdk/core/test/fs/eol/lf.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
hello word
this a new line!
24 changes: 24 additions & 0 deletions packages/@aws-cdk/core/test/fs/fs-fingerprint.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import * as os from 'os';
import * as path from 'path';
import { nodeunitShim, Test } from 'nodeunit-shim';
import { FileSystem, SymlinkFollowMode } from '../../lib/fs';
import { contentFingerprint } from '../../lib/fs/fingerprint';

nodeunitShim({
files: {
Expand Down Expand Up @@ -155,4 +156,27 @@ nodeunitShim({
test.done();
},
},

eol: {
'normalizes line endings'(test: Test) {
// GIVEN
const lf = path.join(__dirname, 'eol', 'lf.txt');
const crlf = path.join(__dirname, 'eol', 'crlf.txt');
fs.writeFileSync(crlf, fs.readFileSync(lf, 'utf8').replace(/\n/g, '\r\n'));

const lfStat = fs.statSync(lf);
const crlfStat = fs.statSync(crlf);

// WHEN
const crlfHash = contentFingerprint(crlf);
const lfHash = contentFingerprint(lf);

// THEN
test.notEqual(crlfStat.size, lfStat.size); // Difference in size due to different line endings
test.deepEqual(crlfHash, lfHash); // Same hash

fs.unlinkSync(crlf);
test.done();
},
},
});

0 comments on commit e8f9e6c

Please sign in to comment.