Skip to content

Commit

Permalink
Check valid csv metadata and fix #64
Browse files Browse the repository at this point in the history
Consistently check and warning for valid csv metadata before running menuitems, and use known datetime format to fix issue #64
  • Loading branch information
BdR76 committed May 26, 2023
1 parent e71d566 commit ccf92cf
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 70 deletions.
26 changes: 5 additions & 21 deletions CSVLintNppPlugin/CsvLint/CsvAnalyze.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,25 +17,6 @@ namespace CSVLint
{
class CsvAnalyze
{
private class CsvColumStats
{
public string Name = "";
public int MinWidth = 9999;
public int MaxWidth = 0;
public int CountString = 0;
public int CountInteger = 0;
public int CountDecimal = 0;
public int CountDecimalComma = 0;
public int CountDecimalPoint = 0;
public int DecimalDigMax = 0; // maximum digits, example "1234.5" = 4 digits
public int DecimalDecMax = 0; // maximum decimals, example "123.45" = 2 decimals
public int CountDateTime = 0;
public char DateSep = '\0';
public int DateMax1 = 0;
public int DateMax2 = 0;
public int DateMax3 = 0;
}

/// <summary>
/// Infer CSV definition from data; determine separators, column names, datatypes etc
/// </summary>
Expand Down Expand Up @@ -349,7 +330,6 @@ public static CsvDefinition InferFromData(bool autodetect, char mansep, string m

// examine data and keep statistics for each column
List<CsvAnalyzeColumn> colstats = new List<CsvAnalyzeColumn>();
//List<CsvColumStats> colstats = new List<CsvColumStats>();
lineContent = 0;

// skip any comment lines
Expand Down Expand Up @@ -512,7 +492,6 @@ public static void StatisticalReportData(CsvDefinition csvdef)
// examine data and keep statistics for each column
List<CsvAnalyzeColumn> colstats = new List<CsvAnalyzeColumn>();

//List<CsvColumStats> colstats = new List<CsvColumStats>();
int lineCount = 0;
bool fixedwidth = csvdef.Separator == '\0';

Expand Down Expand Up @@ -542,6 +521,11 @@ public static void StatisticalReportData(CsvDefinition csvdef)
if (i > colstats.Count - 1)
{
colstats.Add(new CsvAnalyzeColumn(i));
// if datetime field, the datetime format is already known
if ( (i < csvdef.Fields.Count) && (csvdef.Fields[i].DataType == ColumnType.DateTime) )
{
colstats[i].DateTimeFormatKnown(csvdef.Fields[i].Mask);
}
}

// next value to evaluate
Expand Down
19 changes: 19 additions & 0 deletions CSVLintNppPlugin/CsvLint/CsvAnalyzeColumn.cs
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,25 @@ public void KeepMinMaxDecimal(string value, char dec)
}
}

public void DateTimeFormatKnown(string knownformat)
{
// When running the Analyze Data Report, the format of DateTime fields is already known
// because csvdef needs to be available anyway.
// Use the known datetime format before starting to analyze column data,
// so that the minimum and maximum datetime will always be correct.
// i.e. avoid day/Month Month/day mixup when determining the minimum and maximum date
if (knownformat != "")
{
// determine YMD, DMY or MDY
var d_before_y = (knownformat.IndexOf("d") < knownformat.IndexOf("y")); // so not "yyyy-MM-dd"
var M_before_d = (knownformat.IndexOf("M") < knownformat.IndexOf("d")); // example "MM/dd/yyyy"

// when datetime format is already known
this.stat_dat_dmy = (d_before_y ? (M_before_d ? 3 : 2) : 1); // 0=unknown, 1=YMD, 2=DMY, 3=MDY
this.stat_dat_format = knownformat;
}
}

public void KeepMinMaxDateTime(string value, int ddmax1, int ddmax2, int ddmax3, int datatype)
{
// TODO: this is not optimal, could still miss some minimum/maximum dates when initially assuming incorrect format
Expand Down
119 changes: 70 additions & 49 deletions CSVLintNppPlugin/Main.cs
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,19 @@ public static CsvDefinition GetCurrentCsvDef()
return FileCsvDef.TryGetValue(filename, out CsvDefinition result) ? result : null;
}

public static bool CheckValidCsvDef(CsvDefinition csvdef, string errmsg)
{
// check if valid dictionary
if ((csvdef.Fields.Count == 1) && (csvdef.Fields[0].DataType == ColumnType.String) && (csvdef.Fields[0].MaxWidth >= 9999))
{
// show warning message and solution
errmsg = string.Format("Cannot {0} without valid csv metadata\nOpen the CSV Lint window, press [Detect columns] and try again.", errmsg);
MessageBox.Show(errmsg, "Missing csv metadata", MessageBoxButtons.OK, MessageBoxIcon.Information);
return false;
}
return true;
}

public static void RemoveCSVdef(IntPtr buffer_id)
{
// Notepad++ closes a file, also remove the definition from list
Expand Down Expand Up @@ -580,29 +593,33 @@ internal static void convertData()
// get dictionary
CsvDefinition csvdef = GetCurrentCsvDef();

// show split column dialog
var frmparam = new DataConvertForm();
frmparam.InitialiseSetting();
// check if valid csv metadata
if (CheckValidCsvDef(csvdef, "convert data"))
{
// show split column dialog
var frmparam = new DataConvertForm();
frmparam.InitialiseSetting();

DialogResult r = frmparam.ShowDialog();
DialogResult r = frmparam.ShowDialog();

// clear up
frmparam.Dispose();
// clear up
frmparam.Dispose();

// return true (OK) or false (Cancel)
if (r == DialogResult.OK)
{
switch (Main.Settings.DataConvertType)
// return true (OK) or false (Cancel)
if (r == DialogResult.OK)
{
case 1: // XML
CsvEdit.ConvertToXML(csvdef);
break;
case 2: // JSON
CsvEdit.ConvertToJSON(csvdef);
break;
default: // case 0: SQL
CsvEdit.ConvertToSQL(csvdef);
break;
switch (Main.Settings.DataConvertType)
{
case 1: // XML
CsvEdit.ConvertToXML(csvdef);
break;
case 2: // JSON
CsvEdit.ConvertToJSON(csvdef);
break;
default: // case 0: SQL
CsvEdit.ConvertToSQL(csvdef);
break;
}
}
}
}
Expand All @@ -611,33 +628,37 @@ internal static void generateMetaData()
{
// get dictionary
CsvDefinition csvdef = GetCurrentCsvDef();

// show metadata options
var frmparam = new MetaDataGenerateForm();
frmparam.InitialiseSetting();

DialogResult r = frmparam.ShowDialog();

// clear up
frmparam.Dispose();

// return true (OK) or false (Cancel)
if (r == DialogResult.OK)

// check if valid csv metadata
if (CheckValidCsvDef(csvdef, "generate script"))
{
switch (Main.Settings.MetadataType)
// show metadata options
var frmparam = new MetaDataGenerateForm();
frmparam.InitialiseSetting();

DialogResult r = frmparam.ShowDialog();

// clear up
frmparam.Dispose();

// return true (OK) or false (Cancel)
if (r == DialogResult.OK)
{
case 1: // schema JSON
CsvGenerateCode.GenerateSchemaJSON(csvdef);
break;
case 2: // Python
CsvGenerateCode.GeneratePythonPanda(csvdef);
break;
case 3: // R - script
CsvGenerateCode.GenerateRScript(csvdef);
break;
default: // case 0: schema ini
CsvGenerateCode.GenerateSchemaIni(csvdef);
break;
switch (Main.Settings.MetadataType)
{
case 1: // schema JSON
CsvGenerateCode.GenerateSchemaJSON(csvdef);
break;
case 2: // Python
CsvGenerateCode.GeneratePythonPanda(csvdef);
break;
case 3: // R - script
CsvGenerateCode.GenerateRScript(csvdef);
break;
default: // case 0: schema ini
CsvGenerateCode.GenerateSchemaIni(csvdef);
break;
}
}
}
}
Expand All @@ -646,9 +667,9 @@ internal static void AnalyseDataReport()
{
// get dictionary
CsvDefinition csvdef = GetCurrentCsvDef();
// check if valid dictionary
if (csvdef.Fields.Count > 0)

// check if valid csv metadata
if (CheckValidCsvDef(csvdef, "run Analyze Data Report"))
{
// validate data
CsvAnalyze.StatisticalReportData(csvdef);
Expand All @@ -660,8 +681,8 @@ internal static void CountUniqueValues()
// get dictionary
CsvDefinition csvdef = GetCurrentCsvDef();

// check if valid dictionary
if (csvdef.Fields.Count > 0)
// check if valid csv metadata
if (CheckValidCsvDef(csvdef, "count unique values"))
{
// show unique values parameters form
var frmunq = new UniqueValuesForm();
Expand Down
Binary file modified CSVLintNppPlugin/bin/Release-x64/CSVLint.dll
Binary file not shown.
Binary file modified CSVLintNppPlugin/bin/Release/CSVLint.dll
Binary file not shown.

0 comments on commit ccf92cf

Please sign in to comment.