diff --git a/helpers/import.js b/helpers/import.js index 8db01d9..bc17871 100644 --- a/helpers/import.js +++ b/helpers/import.js @@ -39,99 +39,86 @@ function importTable(gtfs, tableName) { */ function processGtfsTable(gtfs, fileContent, tableName, indexKeys) { - let table = (indexKeys.setOfItems) ? new Set() : new Map(); - - Papa.parse(fileContent, { + const parsedFileContent = Papa.parse(fileContent, { delimiter: ',', - header: true, skipEmptyLines: true, - step: (row) => { // streams the CSV by row - const item = processGtfsTableRow(gtfs, tableName, row, indexKeys); - if (!item) { - return; - } - - if (indexKeys.indexKey) { - table.set(item[indexKeys.indexKey], item); - } else if (indexKeys.firstIndexKey && indexKeys.secondIndexKey) { - if (table.has(item[indexKeys.firstIndexKey]) === false) { - table.set(item[indexKeys.firstIndexKey], new Map()); - } - - table.get(item[indexKeys.firstIndexKey]).set(item[indexKeys.secondIndexKey], item); - } else if (indexKeys.singleton) { - table = item; - } else if (indexKeys.setOfItems) { - table.add(item); - } - }, }); - return table; -} + if (parsedFileContent.errors.length) { + let errorMessage = `Invalid rows in table ${tableName}:\n`; -function processGtfsTableRow(gtfs, tableName, row, indexKeys) { - let processedRow = JSON.parse(JSON.stringify(row)); - const rowAsCsv = Papa.unparse(processedRow); - - const errorsInRow = processedRow.errors; - if (errorsInRow.length) { - let errorMessage = `Invalid row in table ${tableName}: - -Line: ${errorsInRow[0].row} -${rowAsCsv}\n\n`; - errorsInRow.forEach((error) => { - errorMessage += `Issue: ${error.message}`; + parsedFileContent.errors.forEach((error) => { + errorMessage += `Line: ${error.row} +Issue: ${error.message} +Row: ${parsedFileContent.data[error.row].join(',')}`; }); - const errorTypes = new Set(errorsInRow.map(error => error.type)); - if (gtfs._shouldThrow === true && !errorTypes.has('FieldMismatch')) { + if (gtfs._shouldThrow === true) { throw new Error(errorMessage); } - - errorMessage += '\nError in CSV was fixed by parser.'; - process.notices.addWarning('Invalid CSV', errorMessage); - processedRow = Papa.parse(rowAsCsv, { // fix FieldMismatch errors (TooFewFields / TooManyFields) - delimiter: ',', - header: true, - }); } + const [keys, ...rows] = parsedFileContent.data; + + checkThatKeysIncludeIndexKeys(keys, indexKeys, tableName); + + const trimmedKeys = keys.map(key => key.trim()); + const GtfsRow = createGtfsClassForKeys(trimmedKeys); + + return processGtfsTableRows(gtfs, tableName, trimmedKeys, rows, indexKeys, GtfsRow); +} + +function processGtfsTableRows(gtfs, tableName, keys, rows, indexKeys, GtfsRow) { + let table = (indexKeys.setOfItems) ? new Set() : new Map(); + const regexPatternObjects = gtfs._regexPatternObjectsByTableName.get(tableName); - if (regexPatternObjects) { - processedRow = applyRegexPatternObjectsByTableName(regexPatternObjects, rowAsCsv, processedRow, tableName); - } - const rowObject = {}; - for (const [field, value] of Object.entries(processedRow.data[0])) { - rowObject[field.trim()] = value.trim(); - } + rows.forEach((row) => { + if (regexPatternObjects) { + row = applyRegexPatternObjectsByTableName(regexPatternObjects, keys, row, tableName); + } + + const trimmedRow = row.map(value => value.trim()); + const gtfsRow = new GtfsRow(trimmedRow); + + if (indexKeys.indexKey) { + table.set(gtfsRow[indexKeys.indexKey], gtfsRow); + } else if (indexKeys.firstIndexKey && indexKeys.secondIndexKey) { + if (table.has(gtfsRow[indexKeys.firstIndexKey]) === false) { + table.set(gtfsRow[indexKeys.firstIndexKey], new Map()); + } - checkThatKeysIncludeIndexKeys(Object.keys(rowObject), indexKeys, tableName); + table.get(gtfsRow[indexKeys.firstIndexKey]).set(gtfsRow[indexKeys.secondIndexKey], gtfsRow); + } else if (indexKeys.singleton) { + table = gtfsRow; + } else if (indexKeys.setOfItems) { + table.add(gtfsRow); + } + }); - return createGtfsObjectFromSimpleObject(rowObject); + return table; } -function applyRegexPatternObjectsByTableName(regexPatternObjects, rowAsCsv, row, tableName) { - let modifiedRowAsCsv; - let modifiedRow = JSON.parse(JSON.stringify(row)); +function applyRegexPatternObjectsByTableName(regexPatternObjects, keys, row, tableName) { + const rowStringified = String(row); + let modifiedRowStringified = rowStringified; regexPatternObjects.forEach(({ regex, pattern }) => { - modifiedRowAsCsv = rowAsCsv.replace(regex, pattern || ''); + modifiedRowStringified = rowStringified.replace(regex, pattern || ''); - if (modifiedRowAsCsv !== rowAsCsv) { + if (modifiedRowStringified !== rowStringified) { process.notices.addInfo( 'Applying Changes on Raw GTFS', `Applying regex replace to table: "${tableName}". regex: "${regex}".` ); - modifiedRow = Papa.parse(modifiedRowAsCsv, { - delimiter: ',', - header: true, - }); } }); - return modifiedRow; + const parsedModifiedRow = Papa.parse(`${keys}\n${modifiedRowStringified}`, { + delimiter: ',', + }); + + return parsedModifiedRow.data[1]; } function checkThatKeysIncludeIndexKeys(sortedKeys, indexKeys, tableName) { @@ -140,19 +127,19 @@ function checkThatKeysIncludeIndexKeys(sortedKeys, indexKeys, tableName) { if (deepness === 1 && sortedKeys.includes(indexKeys.indexKey) === false && indexKeys.indexKey !== 'agency_id') { /* Field agency_id is optional in table agency.txt according to the specification. */ throw new Error( - `Keys of table ${tableName} do not contain the index key: ${indexKeys.indexKey}.\n` + - ` The values are: ${JSON.stringify(indexKeys.indexKey)}` + `Keys of table ${tableName} do not contain the index key: ${indexKeys.indexKey}.\n` + + ` The values are: ${JSON.stringify(indexKeys.indexKey)}` ); } if ( - deepness === 2 && - (sortedKeys.includes(indexKeys.firstIndexKey) === false || sortedKeys.includes(indexKeys.secondIndexKey) === false) + deepness === 2 + && (sortedKeys.includes(indexKeys.firstIndexKey) === false || sortedKeys.includes(indexKeys.secondIndexKey) === false) ) { throw new Error( - `Keys of table ${tableName} do not contain the index keys: ` + - `${indexKeys.firstIndexKey} and ${indexKeys.secondIndexKey}.\n` + - ` The values are: ${JSON.stringify(indexKeys.indexKey)}` + `Keys of table ${tableName} do not contain the index keys: ` + + `${indexKeys.firstIndexKey} and ${indexKeys.secondIndexKey}.\n` + + ` The values are: ${JSON.stringify(indexKeys.indexKey)}` ); } } @@ -199,7 +186,7 @@ function createGtfsClassForKeys(sortedKeys) { return jsonObj; }; - // eslint-disable-next-line func-names + // eslint-disable-next-line func-names GtfsRow.prototype.toJSON = function () { return JSON.stringify(this.toSimpleObject()); }; diff --git a/tests/tests.js b/tests/tests.js index 71961fd..1e6ade7 100644 --- a/tests/tests.js +++ b/tests/tests.js @@ -98,7 +98,7 @@ describe('Tests on GTFS', () => { // Fixes field using regexPatternObjectsByTableName const regexPatternObjectsByTableName = new Map([[ - 'stops', [{ regex: /,"Some ""other"" stop",/g, pattern: ',"Some stop",' }], + 'stops', [{ regex: /,Some "other" stop,/g, pattern: ',Some stop,' }], ]]); const gtfsWithFix = new Gtfs(path, { regexPatternObjectsByTableName });