Skip to content

Commit

Permalink
#2019 importer parsing units made case sensitive (#2024)
Browse files Browse the repository at this point in the history
* initial implementation

* incomplete unit test

* correction

* adding test

* test corrected

* creating new functions

* test corrected
  • Loading branch information
georgeDaskalakis authored Jun 6, 2023
1 parent 50a7aa0 commit e3435e9
Show file tree
Hide file tree
Showing 17 changed files with 371 additions and 242 deletions.
5 changes: 5 additions & 0 deletions src/OSPSuite.Infrastructure.Import/Core/ColumnInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ public bool IsMeasurement
{
get => !IsBase && !IsAuxiliary;
}

public bool SupportsDimension(IDimension dimensionForUnit)
{
return SupportedDimensions.Contains(dimensionForUnit);
}
}

public class ColumnInfoCache : Cache<string, ColumnInfo>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@ public abstract class AbstractColumnsDataFormat : IDataFormat

public IList<string> ExcelColumnNames { get; protected set; } = new List<string>();

protected readonly IDimensionFactory _dimensionFactory;

protected AbstractColumnsDataFormat(IDimensionFactory dimensionFactory)
{
_dimensionFactory = dimensionFactory;
}

public double SetParameters(DataSheet rawDataSheet, ColumnInfoCache columnInfos, IReadOnlyList<MetaDataCategory> metaDataCategories)
{
if (NotCompatible(rawDataSheet, columnInfos))
Expand All @@ -26,6 +33,16 @@ public double SetParameters(DataSheet rawDataSheet, ColumnInfoCache columnInfos,
return 1 + setParameters(rawDataSheet, columnInfos, metaDataCategories);
}

public IEnumerable<T> GetParameters<T>() where T : DataFormatParameter
{
return Parameters.OfType<T>();
}

public T GetColumnByName<T>(string columnName) where T : DataFormatParameter
{
return Parameters.OfType<T>().FirstOrDefault(x => x.ColumnName == columnName);
}

protected bool NotCompatible(DataSheet dataSheet, ColumnInfoCache columnInfos)
{
return (dataSheet.GetHeaders()
Expand All @@ -52,7 +69,7 @@ private double setParameters(DataSheet dataSheet, ColumnInfoCache columnInfos, I

private void setDimensionsForMappings(ColumnInfoCache columnInfos)
{
foreach (var parameter in Parameters.OfType<MappingDataFormatParameter>())
foreach (var parameter in GetParameters<MappingDataFormatParameter>())
{
var mappedColumn = parameter.MappedColumn;

Expand All @@ -74,10 +91,9 @@ private void setDimensionsForMappings(ColumnInfoCache columnInfos)
mappedColumn.Dimension = null;
else
{
var supportedDimensions = concreteColumnInfo.SupportedDimensions;
var dimensionForUnit = supportedDimensions.FirstOrDefault(x => x.SupportsUnit(mappedColumn.Unit.SelectedUnit, ignoreCase: true));
var dimensionForUnit = _dimensionFactory.DimensionForUnit(mappedColumn.Unit.SelectedUnit);

if (dimensionForUnit == null)
if (dimensionForUnit == null || !concreteColumnInfo.SupportsDimension(dimensionForUnit))
mappedColumn.Unit = new UnitDescription(UnitDescription.InvalidUnit);
else
mappedColumn.Dimension = dimensionForUnit;
Expand All @@ -87,7 +103,7 @@ private void setDimensionsForMappings(ColumnInfoCache columnInfos)

private void setSecondaryColumnUnit(ColumnInfoCache columnInfos)
{
var mappings = Parameters.OfType<MappingDataFormatParameter>().ToList();
var mappings = GetParameters<MappingDataFormatParameter>().ToList();
foreach (var column in columnInfos.Where(c => !c.IsAuxiliary))
{
foreach (var relatedColumn in columnInfos.RelatedColumnsFrom(column.Name))
Expand Down Expand Up @@ -158,8 +174,9 @@ protected virtual void ExtractQualifiedHeadings(List<string> keys, List<string>

protected string ValidateUnit(string unit, IReadOnlyList<IDimension> supportedDimensions)
{
var dimensionForUnit = supportedDimensions.FirstOrDefault(x => x.SupportsUnit(unit, ignoreCase: true));
if (dimensionForUnit == null)
var dimensionForUnit = _dimensionFactory.DimensionForUnit(unit);

if (dimensionForUnit == null || !supportedDimensions.Contains(dimensionForUnit))
return UnitDescription.InvalidUnit;

//We know it exists here as it was found previously
Expand All @@ -174,8 +191,7 @@ protected virtual void ExtractNonQualifiedHeadings(List<string> keys, List<strin
var headerKey = keys.FirstOrDefault
(h =>
dataSheet.GetColumnDescription(h).Level == ColumnDescription.MeasurementLevel.Numeric &&
Parameters
.OfType<MappingDataFormatParameter>()
GetParameters<MappingDataFormatParameter>()
.All(m => m.ColumnName != h)
);
if (headerKey == null) continue;
Expand Down Expand Up @@ -272,7 +288,7 @@ private Dictionary<ExtendedColumn, IList<SimulationPoint>> parseMappings(IEnumer
var dictionary = new Dictionary<ExtendedColumn, IList<SimulationPoint>>();

//Add time mapping
var mappingParameters = Parameters.OfType<MappingDataFormatParameter>().ToList();
var mappingParameters = GetParameters<MappingDataFormatParameter>().ToList();

var dataSet = rawDataSet.ToList();
foreach (var columnInfo in columnInfos)
Expand All @@ -281,7 +297,7 @@ private Dictionary<ExtendedColumn, IList<SimulationPoint>> parseMappings(IEnumer
if (currentParameter == null) continue;
Func<MappingDataFormatParameter, DataSheet, UnformattedRow, SimulationPoint> mappingsParser =
currentParameter.MappedColumn.LloqColumn == null
? (Func<MappingDataFormatParameter, DataSheet, UnformattedRow, SimulationPoint>) parseMappingOnSameColumn
? (Func<MappingDataFormatParameter, DataSheet, UnformattedRow, SimulationPoint>)parseMappingOnSameColumn
: parseMappingOnSameGivenColumn;

dictionary.Add
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ public class DataFormatHeadersWithUnits : AbstractColumnsDataFormat
{
public override string Name => "Headers with units";
public override string Description => "https://github.com/Open-Systems-Pharmacology/OSPSuite.Core/issues/639";

public DataFormatHeadersWithUnits(IDimensionFactory dimensionFactory) : base(dimensionFactory)
{
}
protected override string ExtractLLOQ(string description, DataSheet dataSheet, List<string> keys, ref double rank)
{
if (dataSheet.GetColumn(description).Any(element => element.Trim().StartsWith("<")))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ public class DataFormatNonmem : AbstractColumnsDataFormat
public override string Name => "Nonmem";
public override string Description => "https://github.com/Open-Systems-Pharmacology/OSPSuite.Core/issues/797";

public DataFormatNonmem(IDimensionFactory dimensionFactory) : base(dimensionFactory)
{
}

protected override string ExtractLLOQ(string description, DataSheet dataSheet, List<string> keys, ref double rank)
{
var lloqKey = dataSheet.GetHeaders().FindHeader(description + "_LLOQ");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,13 @@ namespace OSPSuite.Infrastructure.Import.Core.DataFormat
public class MixColumnsDataFormat : AbstractColumnsDataFormat
{
public override string Name => "Mixin";
public override string Description => "https://github.com/Open-Systems-Pharmacology/OSPSuite.Core/issues/639\rhttps://github.com/Open-Systems-Pharmacology/OSPSuite.Core/issues/797";

public override string Description =>
"https://github.com/Open-Systems-Pharmacology/OSPSuite.Core/issues/639\rhttps://github.com/Open-Systems-Pharmacology/OSPSuite.Core/issues/797";

public MixColumnsDataFormat(IDimensionFactory dimensionFactory) : base(dimensionFactory)
{
}

protected override string ExtractLLOQ(string description, DataSheet dataSheet, List<string> keys, ref double rank)
{
Expand All @@ -31,7 +37,8 @@ protected override string ExtractLLOQ(string description, DataSheet dataSheet, L
return lloqKey;
}

protected override UnitDescription ExtractUnits(string description, DataSheet dataSheet, List<string> keys, IReadOnlyList<IDimension> supportedDimensions, ref double rank)
protected override UnitDescription ExtractUnits(string description, DataSheet dataSheet, List<string> keys,
IReadOnlyList<IDimension> supportedDimensions, ref double rank)
{
var (_, unit) = UnitExtractor.ExtractNameAndUnit(description);

Expand Down
2 changes: 2 additions & 0 deletions src/OSPSuite.Infrastructure.Import/Core/IDataFormat.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,7 @@ public interface IDataFormat
IList<string> ExcelColumnNames { get; }
IEnumerable<ParsedDataSet> Parse(DataSheet dataSheet, ColumnInfoCache columnInfos);
UnitDescription ExtractUnitDescriptions(string description, IReadOnlyList<IDimension> supportedDimensions);
T GetColumnByName<T>(string columnName) where T : DataFormatParameter;
IEnumerable<T> GetParameters<T>() where T : DataFormatParameter;
}
}
4 changes: 2 additions & 2 deletions src/OSPSuite.Infrastructure.Import/Services/Importer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -316,13 +316,13 @@ DataImporterSettings dataImporterSettings
}

dataSourceFile.Format.CopyParametersFromConfiguration(configuration);
var mappings = dataSourceFile.Format.Parameters.OfType<MetaDataFormatParameter>().Select(md => new MetaDataMappingConverter()
var mappings = dataSourceFile.Format.GetParameters<MetaDataFormatParameter>().Select(md => new MetaDataMappingConverter()
{
Id = md.MetaDataId,
Index = sheetName => md.IsColumn ? dataSourceFile.DataSheets.GetDataSheetByName(sheetName).GetColumnDescription(md.ColumnName).Index : -1
}).Union
(
dataSourceFile.Format.Parameters.OfType<GroupByDataFormatParameter>().Select(md => new MetaDataMappingConverter()
dataSourceFile.Format.GetParameters<GroupByDataFormatParameter>().Select(md => new MetaDataMappingConverter()
{
Id = md.ColumnName,
Index = sheetName => dataSourceFile.DataSheets.GetDataSheetByName(sheetName).GetColumnDescription(md.ColumnName).Index
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -540,10 +540,10 @@ private IEnumerable<string>
.Where
(
cn =>
_format.Parameters.OfType<MappingDataFormatParameter>().All(p =>
_format.GetParameters<MappingDataFormatParameter>().All(p =>
p.ColumnName != cn && p.MappedColumn?.Unit?.ColumnName != cn && p.MappedColumn?.LloqColumn != cn) &&
_format.Parameters.OfType<MetaDataFormatParameter>().All(p => p.ColumnName != cn) &&
_format.Parameters.OfType<GroupByDataFormatParameter>().All(p => p.ColumnName != cn)
_format.GetParameters<MetaDataFormatParameter>().All(p => p.ColumnName != cn) &&
_format.GetParameters<GroupByDataFormatParameter>().All(p => p.ColumnName != cn)
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
using System.Collections.Generic;
using System.Data;
using System.Linq;
using MathNet.Numerics;
using OSPSuite.Assets;
using OSPSuite.Core.Extensions;
using OSPSuite.Core.Import;
Expand Down Expand Up @@ -51,7 +50,7 @@ public DataTable GetSheet(string tabName)
public void ImportDataForConfirmation()
{
var sheets = ImportedSheets.AddNotExistingSheets(_dataSourceFile.DataSheets);

if (sheets.Count == 0)
return;

Expand Down Expand Up @@ -82,7 +81,8 @@ public void ImportDataForConfirmation(string sheetName)
return;

OnImportSheets.Invoke(this,
new ImportSheetsEventArgs { DataSourceFile = _dataSourceFile, SheetNames = sheets.GetDataSheetNames(), Filter = GetActiveFilterCriteria() });
new ImportSheetsEventArgs
{ DataSourceFile = _dataSourceFile, SheetNames = sheets.GetDataSheetNames(), Filter = GetActiveFilterCriteria() });
}

public string GetFilter()
Expand Down Expand Up @@ -147,16 +147,15 @@ private void setMetaDataWithManualInput()
if (!metaData.AllowsManualInput)
continue;

var parameter = _dataSourceFile.Format.Parameters.OfType<MetaDataFormatParameter>().FirstOrDefault(p => p.ColumnName == metaData.Name);
var parameter = _dataSourceFile.Format.GetColumnByName<MetaDataFormatParameter>(metaData.Name);

if (parameter != null)
continue;

parameter = new MetaDataFormatParameter(null, metaData.Name, false);

if (_dataSourceFile.Format.Parameters.Any(p => (p as MetaDataFormatParameter)?.MetaDataId == parameter.MetaDataId))
if (_dataSourceFile.Format.Parameters.Any(p => (p as MetaDataFormatParameter)?.MetaDataId == parameter.MetaDataId))
continue;
if (_dataSourceFile.Format.GetParameters<MetaDataFormatParameter>().Any(p => p.MetaDataId == parameter.MetaDataId))
continue;

_dataSourceFile.Format.Parameters.Add(parameter);
return;
Expand All @@ -168,7 +167,7 @@ private void setDefaultMetaData()
foreach (var metaData in _metaDataCategories)
{
if (!metaData.SelectDefaultValue || metaData.DefaultValue == null) continue;
var parameter = _dataSourceFile.Format.Parameters.OfType<MetaDataFormatParameter>().FirstOrDefault(p => p.ColumnName == metaData.Name);
var parameter = _dataSourceFile.Format.GetColumnByName<MetaDataFormatParameter>(metaData.Name);
if (parameter == null)
{
parameter = new MetaDataFormatParameter(metaData.DefaultValue.ToString(), metaData.Name, false);
Expand Down
24 changes: 15 additions & 9 deletions src/OSPSuite.Presentation/Presenters/Importer/ImporterPresenter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,8 @@ private void plotDataSet(object sender, DataSetSelectedEventArgs e)
catch (TimeNotStrictlyMonotoneException timeNonMonotoneException)
{
var errors = new ParseErrors();
errors.Add(_dataSource.DataSetAt(e.Index), new NonMonotonicalTimeParseErrorDescription(Error.ErrorWhenPlottingDataRepository(e.Index, timeNonMonotoneException.Message)));
errors.Add(_dataSource.DataSetAt(e.Index),
new NonMonotonicalTimeParseErrorDescription(Error.ErrorWhenPlottingDataRepository(e.Index, timeNonMonotoneException.Message)));
_importerDataPresenter.SetTabMarks(errors);
_confirmationPresenter.SetViewingStateToError(timeNonMonotoneException.Message);
}
Expand Down Expand Up @@ -184,7 +185,7 @@ public void ImportData(object sender, EventArgs e)

var configuration = UpdateAndGetConfiguration();
configuration.Id = id;
OnTriggerImport.Invoke(this, new ImportTriggeredEventArgs {DataRepositories = dataRepositories});
OnTriggerImport.Invoke(this, new ImportTriggeredEventArgs { DataRepositories = dataRepositories });
}

private void loadSheetsFromDataPresenter(object sender, ImportSheetsEventArgs args)
Expand Down Expand Up @@ -217,16 +218,17 @@ private void loadSheets(IDataSourceFile dataSourceFile, IReadOnlyList<string> sh
}

var sheets = dataSourceFile.DataSheets.GetDataSheetsByName(sheetNames);
var dataMappings = dataSourceFile.Format.Parameters.OfType<MetaDataFormatParameter>().Where(p => p.ColumnName != null).Select(md =>
var dataMappings = dataSourceFile.Format.GetParameters<MetaDataFormatParameter>().Where(p => p.ColumnName != null).Select(md =>
new MetaDataMappingConverter()
{
Id = md.MetaDataId,
Index = sheetName => md.IsColumn ? dataSourceFile.DataSheets.GetDataSheetByName(sheetName).GetColumnDescription(md.ColumnName).Index : -1
Index = sheetName =>
md.IsColumn ? dataSourceFile.DataSheets.GetDataSheetByName(sheetName).GetColumnDescription(md.ColumnName).Index : -1
}).ToList();

var mappings = dataMappings.Union
(
dataSourceFile.Format.Parameters.OfType<GroupByDataFormatParameter>().Select(md => new MetaDataMappingConverter()
dataSourceFile.Format.GetParameters<GroupByDataFormatParameter>().Select(md => new MetaDataMappingConverter()
{
//in case of a duplicate name coming from an excel column used as a grouping by with the same name as a metaData, we add a suffix
Id = dataMappings.ExistsById(md.ColumnName) ? md.ColumnName + Constants.ImporterConstants.GroupingBySuffix : md.ColumnName,
Expand Down Expand Up @@ -336,7 +338,8 @@ public bool SetSourceFile(string path)

public void SaveConfiguration()
{
var fileName = _dialogCreator.AskForFileToSave(Captions.Importer.SaveConfiguration, Constants.Filter.XML_FILE_FILTER, Constants.DirectoryKey.OBSERVED_DATA);
var fileName = _dialogCreator.AskForFileToSave(Captions.Importer.SaveConfiguration, Constants.Filter.XML_FILE_FILTER,
Constants.DirectoryKey.OBSERVED_DATA);

if (string.IsNullOrEmpty(fileName))
return;
Expand All @@ -361,11 +364,13 @@ private void openFile(string configurationFileName)
private void applyConfiguration(ImporterConfiguration configuration)
{
var excelColumnNames = _columnMappingPresenter.GetAllAvailableExcelColumns();
var listOfNonExistingColumns = configuration.Parameters.Where(parameter => !excelColumnNames.Contains(parameter.ColumnName) && parameter.ComesFromColumn()).ToList();
var listOfNonExistingColumns = configuration.Parameters
.Where(parameter => !excelColumnNames.Contains(parameter.ColumnName) && parameter.ComesFromColumn()).ToList();

if (listOfNonExistingColumns.Any())
{
var confirm = _dialogCreator.MessageBoxYesNo(Captions.Importer.ConfirmDroppingExcelColumns(string.Join("\n", listOfNonExistingColumns.Select(x => x.ColumnName))));
var confirm = _dialogCreator.MessageBoxYesNo(
Captions.Importer.ConfirmDroppingExcelColumns(string.Join("\n", listOfNonExistingColumns.Select(x => x.ColumnName))));

if (confirm == ViewResult.No)
return;
Expand All @@ -377,7 +382,8 @@ private void applyConfiguration(ImporterConfiguration configuration)
}

var mappings = configuration.Parameters.OfType<MappingDataFormatParameter>();
var listOfNonExistingUnitColumns = mappings.Where(parameter => !parameter.MappedColumn.Unit.ColumnName.IsNullOrEmpty() && !excelColumnNames.Contains(parameter.MappedColumn.Unit.ColumnName)).ToList();
var listOfNonExistingUnitColumns = mappings.Where(parameter =>
!parameter.MappedColumn.Unit.ColumnName.IsNullOrEmpty() && !excelColumnNames.Contains(parameter.MappedColumn.Unit.ColumnName)).ToList();
foreach (var element in listOfNonExistingUnitColumns)
{
element.MappedColumn.Unit = new UnitDescription();
Expand Down
9 changes: 7 additions & 2 deletions src/OSPSuite.R/Services/DataImporterTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,13 @@ public interface IDataImporterTask
void SaveConfiguration(ImporterConfiguration configuration, string path);
IReadOnlyList<DataRepository> ImportExcelFromConfiguration(string configurationPath, string dataPath = null);
IReadOnlyList<DataRepository> ImportExcelFromConfiguration(ImporterConfiguration configuration, string dataPath);
IReadOnlyList<DataRepository> ImportCsvFromConfiguration(string configurationPath, string dataPath, char columnSeparator, char decimalSeparator = '.');
IReadOnlyList<DataRepository> ImportCsvFromConfiguration(ImporterConfiguration configuration, string dataPath, char columnSeparator, char decimalSeparator = '.');

IReadOnlyList<DataRepository> ImportCsvFromConfiguration(string configurationPath, string dataPath, char columnSeparator,
char decimalSeparator = '.');

IReadOnlyList<DataRepository> ImportCsvFromConfiguration(ImporterConfiguration configuration, string dataPath, char columnSeparator,
char decimalSeparator = '.');

MappingDataFormatParameter GetTime(ImporterConfiguration configuration);
MappingDataFormatParameter GetMeasurement(ImporterConfiguration configuration);
MappingDataFormatParameter GetError(ImporterConfiguration configuration);
Expand Down
Loading

0 comments on commit e3435e9

Please sign in to comment.