From ec0bd26d17fd0ccb5d26ab0e1948b2a03c959aa7 Mon Sep 17 00:00:00 2001 From: "INTERNAL\\stitova" Date: Fri, 22 Nov 2024 18:41:43 -0800 Subject: [PATCH 1/6] updating docs for vector migration by genetics changes. --- .../vector-migration-by-genetics-input.csv | 18 +++ docs/csv/vector-migration-by-sex-input.csv | 9 ++ .../software-migration-creation-vector.rst | 104 ++++++++++++++++++ docs/emod/software-migration-vector.rst | 70 ++++++++---- docs/emod/software-migration.rst | 3 +- docs/json/vector-migration-metadata.json | 12 ++ 6 files changed, 191 insertions(+), 25 deletions(-) create mode 100644 docs/csv/vector-migration-by-genetics-input.csv create mode 100644 docs/csv/vector-migration-by-sex-input.csv create mode 100644 docs/emod/software-migration-creation-vector.rst create mode 100644 docs/json/vector-migration-metadata.json diff --git a/docs/csv/vector-migration-by-genetics-input.csv b/docs/csv/vector-migration-by-genetics-input.csv new file mode 100644 index 0000000..d21951f --- /dev/null +++ b/docs/csv/vector-migration-by-genetics-input.csv @@ -0,0 +1,18 @@ +FromNodeID,ToNodeID,[],"[[""X1"",""Y2""]]","[[""a1"", ""a1""],[""b1"",""b0""], [""X1"", ""X1""]]","[[""*"", ""a0""], [""X1"", ""Y1""]]","[[""a1"", ""a1""], [""b1"", ""b1""]]" +5,1,0.1,0,0,0,0 +5,2,0,0.1,0,0,0 +5,3,0,0,0.1,0,0 +5,4,0,0,0,0.1,0 +5,6,0,0,0,0,0.1 +5,7,0.1,0.1,0,0,0 +5,8,0.1,0,0.1,0.05,0.01 +5,9,0,0.1,0,0,0.1 +1,2,1,0,0,0,0 +1,3,0,1,0,0,0 +1,4,0,0,1,0,0 +1,6,0,0,0,1,0 +3,6,0,0,0,0,0.5 +3,7,0,0.5,0,0,0 +3,8,0.5,0,0,0.0,0.5 +3,9,0,0.5,0,0,0.0 + diff --git a/docs/csv/vector-migration-by-sex-input.csv b/docs/csv/vector-migration-by-sex-input.csv new file mode 100644 index 0000000..ff266dc --- /dev/null +++ b/docs/csv/vector-migration-by-sex-input.csv @@ -0,0 +1,9 @@ +FromNodeID,ToNodeID,RateMales,RateFemales +5,1,0.1,0.02 +5,2,0.1,0.02 +5,3,0.1,0.02 +5,4,0.1,0.02 +5,6,0,0.02 +5,7,0,0.02 +5,8,0.1,0 +5,9,0.1,0 diff --git a/docs/emod/software-migration-creation-vector.rst b/docs/emod/software-migration-creation-vector.rst new file mode 100644 index 0000000..145fb8e --- /dev/null +++ b/docs/emod/software-migration-creation-vector.rst @@ -0,0 +1,104 @@ +==================================== +How to create vector migration files +==================================== + +You can create the JSON metadata and binary migration files needed by |EMOD_s| to run simulations +from CSV ata using Python scripts provided by |IDM_s|. You can assign the same +probability of migration to each vector in a node or you can assign different migration rates based on gender or +genetics of the vector. + +.. note:: + + The **IdReference** must match the value in the demographics file. Each node can be connected a + maximum of 100 destination nodes. The bin.json metadata file will be created without a valid + **IdReference** with expectations that the user will set it themselves. + + +Create from CSV input +===================== + +This script converts a CSV formatted txt file to an EMOD binary-formatted migration file. +It also creates the required metadata file. + +The CSV file can have several column configurations: + +1. Header (optional): FromNodeID, ToNodeID, Rate (Average # of Trips Per Day) +If the csv/text file has three columns with no headers, this is the format we assume. +This can be used for human and vector migration. The Rate is for any/all agents regardless of sex or age. + +2. Header (optional): FromNodeID, ToNodeID, RateMales, RateFemales +If the csv/text file has four columns with no headers, this is the format we assume. +RateMales are rates for male migration, RateFemales for female migration and are Average # of Trips Per Day. + +3. Header (required): FromNodeID, ToNodeID, [], arrays denoting Allele_Combinations +Allele_Combinations example: [["a1", "a1"], ["b1", "b1"]]; [["X1","Y2"]]; [["*", "a0"], ["X1", "Y1"]] +Due to use of commas in headers, it is best to use Excel to create them (or look at a sample text csv). +This is to support VECTOR_MIGRATION_BY_GENETICS. Headers are required for this csv file. +The first (empty, []) array is used as a "default rate" if the vector's genetics doesn't match any of the +Allele_Combinations. The other column headers denote the rate that the vector will travel at if it matches the +Allele_Combination listed. Vectors are checked against Allele_Combinations from most-specific, to least-specific, +regardless of the order in the csv file. Allele_Combinations can, but don't have to, include sex-alleles. Without +specified sex-alleles, any vector that matches the alleles regardless of sex will travel at that rate. + +The FromNodeIDs and ToNodeIDs are the external ID's found in the demographics file. +Each node ID in the migration file must exist in the demographics file. +One can have node ID's in the demographics that don't exist in the migration file. + +The CSV file does not have to have the same number of entries for each FromNodeID. +The script will find the FromNodeID that has the most and use that for the +DestinationsPerNode. The binary file will have DestinationsPerNode entries +per node. + +#. Run the `convert_csv_to_bin_vector_migration.py `_ script using the command format below:: + + python convert_csv_to_bin_vector_migration.py [input-migration-csv] + + +This will create both the metadata and binary file needed by |EMOD_s|. + +Example Input files +------------------- + +.. literalinclude:: ../csv/vector-migration-by-genetics-input.csv +.. literalinclude:: ../csv/vector-migration-by-sex-input.csv + + +JSON metadata file +================== + +The metadata file is a JSON-formatted file that includes a metadata section and a node offsets +section. The **Metadata** section contains a JSON object with parameters, some of which are +strictly informational and some of which are used by |exe_s|. However, the informational ones may +still be important to understand the provenance and meaning of the data. + +Vector Migration Metadata File Parameters +------------------------------------------ + +Vector migration does not do age-based migration and does not differentiate the migration type since there +is only one migration file per species, therefore the parameters pertaining to those options are not included, +and if included, are ignored. The omitted parameters are: MigrationType, AgesYears, InterpolationType. + +The following parameters can be included in the by-gender or by-genetics migration metadata file: + +.. csv-table:: + :header: Parameter, Data type, Description + :widths: 10,5,20 + + IdReference, string, "(Used by |EMOD_s|.) A unique, user-selected string that indicates the method used by |EMOD_s| for generating **NodeID** values in the input files. For more information, see :doc:`software-inputs`." + DateCreated, string, Date and time the file was generated. + Tool, string, The script used to create the file. + DatavalueCount, integer, "(Used by |EMOD_s|.) The number of outbound data values per node (max 100). The number must be the same across every node in the binary file." + GenderDataType, enum, "Denotes whether data is provided for each gender separately, is the same for both, or depends on vector genetics. Accepted values are ONE_FOR_BOTH_GENDERS, ONE_FOR_EACH_GENDER, VECTOR_MIGRATION_BY_GENETICS." + AlleleCombinations, array, "An array of Allele_Combinations, starting with an emtpy array to mark the default migration rate." + NodeCount, integer, "(Used by |EMOD_s|.) The number of nodes to expect in this file." + NodeOffsets, string, "(Used by |EMOD_s|.) A string that is **NodeCount** :math:`\times` 16 characters long. For each node, the first 8 characters are the origin **NodeID** in hexadecimal. The second 8 characters are the byte offset in hex to the location in the binary file where the destination **NodeIDs** and migration rates appear." + + +Example +------- + +.. literalinclude:: ../json/vector-migration-metadata.json + :language: json + + + diff --git a/docs/emod/software-migration-vector.rst b/docs/emod/software-migration-vector.rst index 88a01ee..3d3e271 100644 --- a/docs/emod/software-migration-vector.rst +++ b/docs/emod/software-migration-vector.rst @@ -1,28 +1,28 @@ -====================== -Vector migration files -====================== +================= +Vector Migration +================= Vector migration file describes the rate of migration of vectors *out* of a geographic :term:`node` -analogously to human migration (see :doc:`software-migration` for more information), although vector -migration supports gender parameters, it does not support migration by age and age-based migration -in the migration file will cause an error. Vector migration is one way, such that each trip made by -a vector is independent of previous trips made by the vector. For **Vector_Sampling_Type** set to -"TRACK_ALL_VECTORS" or "SAMPLE_IND_VECTORS", the rates in the file are used to control whether or not -a female vector will migrate: the rate specified is used to get a "time to leave on trip" value -from an exponential distribution. If the value is less than one day, then the female vector will migrate. -For male vectors (who are always in cohorts/compartments), and for female vectors when -**Vector_Sampling_Type** is set to "VECTOR_COMPARTMENTS_NUMBER" or "VECTOR_COMPARTMENTS_PERCENT", -the rates in the file are used to calculate what fraction of the population is traveling out of the node -on that day based on a total rate of travel out of that node and the traveling vectors are distributed -to their destination nodes in proportion of rates to those nodes to the total outbound rate. - -If default geography is used (the configuration parameter **Enable_Demographics_Builtin** is set to 1, +analogously to human migration (see :doc:`software-migration` for more information), the vector +model does not support migration by age and age-based migration in the migration file will cause an +error. Vector migration does support migration by gender as well as migration based on genetics (see below). +Vector migration is one way, such that each trip made by a vector is independent of previous trips made +by the vector. For **Vector_Sampling_Type** set to "TRACK_ALL_VECTORS" or "SAMPLE_IND_VECTORS", +the rates in the file are used to control whether or not a female vector will migrate: the rate specified +is used to get a "time to leave on trip" value from an exponential distribution. If the value is less than +one day, then the female vector will migrate. For male vectors (who are always in cohorts/compartments), +and for female vectors when **Vector_Sampling_Type** is set to "VECTOR_COMPARTMENTS_NUMBER" or +"VECTOR_COMPARTMENTS_PERCENT", the rates in the file are used to calculate what fraction of the population +is traveling out of the node on that day based on a total rate of travel out of that node and the traveling +vectors are distributed to their destination nodes in proportion of rates to those nodes to the total +outbound rate. + +Note: If default geography is used (the configuration parameter **Enable_Demographics_Builtin** is set to 1, and **Default_Geography_Initial_Node_Population** and **Default_Geography_Torus_Size** are configured), vector migration will be built internally and vectors will automatically migrate. It is a known issue, please see https://github.com/InstituteforDiseaseModeling/EMOD/issues/43 -Vectors "LOCAL_MIGRATION" MigrationType for all their migration needs, but are not limited to the default -maximum data values of 8 (destinations). +Vectors do not have a "MigrationType" as each species use only one file for all their migration needs. Each vector species has its own **Vector_Migration_Filename**, if it is left as an empty string, no migration will happen for that species. The **Vector_Migration_Modifier_Equation** and its parameters @@ -30,10 +30,32 @@ can influence female vector migration to particular nodes over others, while **x a multiplier affects the migration rates for both genders. See :doc:`parameter-configuration` for more information on the parameters governing vector migration. -Migration Files -================== +Vector Migration Using Genetics +=============================== + +Vectors have a type of migration not available to humans set with "GenderDataType": VECTOR_MIGRATION_BY_GENETICS in the +migration metadata file (usually a .bin.json file). + +The way this migration works is that you can define a migration rate for each defined Allele_Combination. +Allele_Combinations example: [ [], [["a1", "a1"], ["b1", "b1"]], [["X1","Y2"]], [["*", "a0"], ["X1", "Y1"]]] +The first (empty, []) array is used as a "default rate" if the vector's genetics doesn't match any of the +Allele_Combinations. The other column Allele_Combinations will be associated with the rate that the vector will use +if it matches with that Allele_Combination. Vectors are checked against Allele_Combinations from most-specific, +to least-specific, regardless of the order in the Allele_Combinations parameter. Allele_Combinations can, +but don't have to, include sex-alleles. Without specified sex-alleles, any vector that matches the alleles regardless +of sex will travel at that rate. Please see Vector Migration File creation for more information. + + +Vector Migration Files +====================== + +The Binary file structure for the vector migration files is the same as it is for human files. +The Allele_Combinations array, when present for vector VECTOR_MIGRATION_BY_GENETICS, is used in the same capacity as +AgesYears array would be to maintain the same structure of the file. Please see migration file creation +for more details. -Vectors use the same migration files as humans, with two caveats: +.. toctree:: + :maxdepth: 3 + :titlesonly: -1. Vectors do not migrate by-age, so multiple AgeBins in the by-age by-gender migration file will cause an error. -2. Vector migration only uses MigrationType "LOCAL_MIGRATION" all other migration types will cause an error. + software-migration-creation-vector diff --git a/docs/emod/software-migration.rst b/docs/emod/software-migration.rst index ab5da9f..9fd2028 100644 --- a/docs/emod/software-migration.rst +++ b/docs/emod/software-migration.rst @@ -118,5 +118,6 @@ The following image shows how a binary file with a **DatavalueCount** value of 8 :maxdepth: 3 :titlesonly: - software-migration-vector software-migration-creation + software-migration-vector + diff --git a/docs/json/vector-migration-metadata.json b/docs/json/vector-migration-metadata.json new file mode 100644 index 0000000..367a97a --- /dev/null +++ b/docs/json/vector-migration-metadata.json @@ -0,0 +1,12 @@ +{ + "Metadata": { + "IdReference": "9-nodes", + "DateCreated": "Thu Nov 21 17:41:47 2024", + "Tool": "convert_csv_to_bin_vector_migration.py", + "DatavalueCount": 8, + "GenderDataType": "VECTOR_MIGRATION_BY_GENETICS", + "AlleleCombinations": [[], [["X1","Y2"]], [["a1","a1"], ["b1","b0"], ["X1","X1"]], [["*","a0"], ["X1","Y1"]], [["a1","a1"],["b1","b1"]]], + "NodeCount": 3 + }, + "NodeOffsets": "0000000500000000000000010000006000000003000000C0" +} \ No newline at end of file From 95c732f2cdcdc9220ad29c4b8b16259a5338a492 Mon Sep 17 00:00:00 2001 From: "INTERNAL\\stitova" Date: Sat, 14 Dec 2024 13:52:31 -0800 Subject: [PATCH 2/6] updates per code review. --- docs/emod/model-migration.rst | 2 +- .../software-migration-creation-vector.rst | 72 ++-- docs/emod/software-migration-vector.rst | 5 +- docs/json/vector-migration-metadata.json | 3 +- .../{vector_migration => migration}/README.md | 0 .../__init__.py | 0 .../convert_csv_to_bin_vector_migration.py | 238 +++++++++++++ .../migration/convert_json_to_bin.py | 331 ++++++++++++++++++ .../migration/convert_txt_to_bin.py | 135 +++++++ .../vector_migration.py | 0 examples-container/microsporidia/example.py | 2 +- .../example.py | 2 +- 12 files changed, 757 insertions(+), 33 deletions(-) rename emodpy_malaria/{vector_migration => migration}/README.md (100%) rename emodpy_malaria/{vector_migration => migration}/__init__.py (100%) create mode 100644 emodpy_malaria/migration/convert_csv_to_bin_vector_migration.py create mode 100644 emodpy_malaria/migration/convert_json_to_bin.py create mode 100644 emodpy_malaria/migration/convert_txt_to_bin.py rename emodpy_malaria/{vector_migration => migration}/vector_migration.py (100%) diff --git a/docs/emod/model-migration.rst b/docs/emod/model-migration.rst index c1cdbdf..757949f 100644 --- a/docs/emod/model-migration.rst +++ b/docs/emod/model-migration.rst @@ -9,7 +9,7 @@ nodes without disease and introduce disease transmission into that node. Nodes a can represent everything from individual households to entire countries or anything in between. Therefore, to include migration in a simulation, you must define multiple nodes. -At each time step, individuals in each node have a defined probability of migrating out of their +At each time step, individuals in each node have a defined rate of migration out of their current node to another. You can also define the average length of time individuals will stay in their destination node before migrating again. If you are using timesteps longer than one day and the time to next migration falls between timesteps, individuals will migrate at the following diff --git a/docs/emod/software-migration-creation-vector.rst b/docs/emod/software-migration-creation-vector.rst index 145fb8e..e8f5873 100644 --- a/docs/emod/software-migration-creation-vector.rst +++ b/docs/emod/software-migration-creation-vector.rst @@ -24,14 +24,31 @@ The CSV file can have several column configurations: 1. Header (optional): FromNodeID, ToNodeID, Rate (Average # of Trips Per Day) If the csv/text file has three columns with no headers, this is the format we assume. -This can be used for human and vector migration. The Rate is for any/all agents regardless of sex or age. + +.. csv-table:: + :header: Parameter, Data type, Min, Max, Default, Description + :widths: 10,5,5,5,5,20 + + FromNodeID, integer, 1, 2147480000, NA,"NodeID, matching NodeIDs in demographics file, from which the vector/human will travel." + ToNodeID, integer, 1, 2147480000, NA,"NodeID, matching NodeIDs in demographics file, to which the vector/human will travel." + Rate, float, 0, 3.40282e+38, NA, "Rate at which the all the vectors/humans will travel from the FromNodeID to ToNodeID." + 2. Header (optional): FromNodeID, ToNodeID, RateMales, RateFemales If the csv/text file has four columns with no headers, this is the format we assume. -RateMales are rates for male migration, RateFemales for female migration and are Average # of Trips Per Day. + +.. csv-table:: + :header: Parameter, Data type, Min, Max, Default, Description + :widths: 10,5,5,5,5,20 + + FromNodeID, integer, 1, 2147480000, NA, "NodeID, matching NodeIDs in demographics file, from which the vector/human will travel." + ToNodeID, integer, 1, 2147480000, NA,"NodeID, matching NodeIDs in demographics file, to which the vector/human will travel." + RateMales, float,0, 3.40282e+38, NA, "Rate at which the vector/human of male sex will travel from the FromNodeID to ToNodeID." + RateFemales, float, 0, 3.40282e+38, NA, "Rate at which the vector/human of female sex will travel from the FromNodeID to ToNodeID." + 3. Header (required): FromNodeID, ToNodeID, [], arrays denoting Allele_Combinations -Allele_Combinations example: [["a1", "a1"], ["b1", "b1"]]; [["X1","Y2"]]; [["*", "a0"], ["X1", "Y1"]] +Allele_Combinations example: [["a1", "a1"], ["b1", "b1"]] or [["X1","Y2"]] or [["*", "a0"], ["X1", "Y1"]] Due to use of commas in headers, it is best to use Excel to create them (or look at a sample text csv). This is to support VECTOR_MIGRATION_BY_GENETICS. Headers are required for this csv file. The first (empty, []) array is used as a "default rate" if the vector's genetics doesn't match any of the @@ -40,16 +57,19 @@ Allele_Combination listed. Vectors are checked against Allele_Combinations from regardless of the order in the csv file. Allele_Combinations can, but don't have to, include sex-alleles. Without specified sex-alleles, any vector that matches the alleles regardless of sex will travel at that rate. -The FromNodeIDs and ToNodeIDs are the external ID's found in the demographics file. -Each node ID in the migration file must exist in the demographics file. -One can have node ID's in the demographics that don't exist in the migration file. +.. csv-table:: + :header: Parameter, Data type, Min, Max, Default, Description + :widths: 10,5,5,5,5,20 + + FromNodeID, integer, 1, 2147480000, NA, "NodeID, matching NodeIDs in demographics file, from which the vector/human will travel." + ToNodeID, integer, 1, 2147480000, NA, "NodeID, matching NodeIDs in demographics file, to which the vector/human will travel." + [], float, 0, 3.40282e+38, NA, "Default rate at which the vector that doesn't match any other allele combinations will travel from the FromNodeID to ToNodeID." + "[['a1', 'a1'], ['b1', 'b1']]", float, 0, 3.40282e+38, NA, "Rate at which the vector that matches this and not a more-specific allele combination will travel from the FromNodeID to ToNodeID." + "[['*', 'a0'], ['X1', 'Y1']]", float, 0, 3.40282e+38, NA,"Rate at which the vector that matches this and not a more-specific allele combination will travel from the FromNodeID to ToNodeID." + "[['X1','Y2']]", float, 0, 3.40282e+38, NA,"Rate at which the vector that matches this and not a more-specific allele combination will travel from the FromNodeID to ToNodeID." -The CSV file does not have to have the same number of entries for each FromNodeID. -The script will find the FromNodeID that has the most and use that for the -DestinationsPerNode. The binary file will have DestinationsPerNode entries -per node. -#. Run the `convert_csv_to_bin_vector_migration.py `_ script using the command format below:: +#. Run the `convert_csv_to_bin_vector_migration.py `_ script using the format below: python convert_csv_to_bin_vector_migration.py [input-migration-csv] @@ -67,31 +87,27 @@ JSON metadata file ================== The metadata file is a JSON-formatted file that includes a metadata section and a node offsets -section. The **Metadata** section contains a JSON object with parameters, some of which are -strictly informational and some of which are used by |exe_s|. However, the informational ones may -still be important to understand the provenance and meaning of the data. +section. The **Metadata** section contains a JSON object with parameters that help |EMOD_s| interpret the migration +binary file. The users are encouraged to add their own parameters to the section to remind themselves about the source, +reason, purpose of the binary file and the data it contains. Non-required parameters are ignored. + Vector Migration Metadata File Parameters ------------------------------------------ -Vector migration does not do age-based migration and does not differentiate the migration type since there -is only one migration file per species, therefore the parameters pertaining to those options are not included, -and if included, are ignored. The omitted parameters are: MigrationType, AgesYears, InterpolationType. - -The following parameters can be included in the by-gender or by-genetics migration metadata file: - .. csv-table:: :header: Parameter, Data type, Description :widths: 10,5,20 - IdReference, string, "(Used by |EMOD_s|.) A unique, user-selected string that indicates the method used by |EMOD_s| for generating **NodeID** values in the input files. For more information, see :doc:`software-inputs`." - DateCreated, string, Date and time the file was generated. - Tool, string, The script used to create the file. - DatavalueCount, integer, "(Used by |EMOD_s|.) The number of outbound data values per node (max 100). The number must be the same across every node in the binary file." - GenderDataType, enum, "Denotes whether data is provided for each gender separately, is the same for both, or depends on vector genetics. Accepted values are ONE_FOR_BOTH_GENDERS, ONE_FOR_EACH_GENDER, VECTOR_MIGRATION_BY_GENETICS." - AlleleCombinations, array, "An array of Allele_Combinations, starting with an emtpy array to mark the default migration rate." - NodeCount, integer, "(Used by |EMOD_s|.) The number of nodes to expect in this file." - NodeOffsets, string, "(Used by |EMOD_s|.) A string that is **NodeCount** :math:`\times` 16 characters long. For each node, the first 8 characters are the origin **NodeID** in hexadecimal. The second 8 characters are the byte offset in hex to the location in the binary file where the destination **NodeIDs** and migration rates appear." + IdReference, string, "Required. A unique id to match demographics, climate, and migration files that work together." + DatavalueCount, integer, "Required.The number of outbound data values per node (max 100). The number must be the same across every node in the binary file." + GenderDataType, enum, "Required. Denotes whether data is provided for each gender separately, is the same for both, or depends on vector genetics. Accepted values are ONE_FOR_BOTH_GENDERS, ONE_FOR_EACH_GENDER, VECTOR_MIGRATION_BY_GENETICS." + AlleleCombinations, array, "Required for GenderDataType: VECTOR_MIGRATION_BY_GENETICS. An array of Allele_Combinations, starting with an emtpy array to mark the default migration rate." + NodeCount, integer, "Required. The number of 'from' nodes in the data. Used to verify size NodeOffsets - 16*NodeCount = # chars in NodeOffsets." + NodeOffsets, string, "Required. The number of rates/'to' nodes for each 'from' node. Max of 100." + DateCreated, string, Date and time the file was generated by the script. Informational for user only. + Tool, string, The script used to create the file. Informational for user only. + Project, string, Example of a user-created parameter. Informational for user only. Example diff --git a/docs/emod/software-migration-vector.rst b/docs/emod/software-migration-vector.rst index 3d3e271..6764bc0 100644 --- a/docs/emod/software-migration-vector.rst +++ b/docs/emod/software-migration-vector.rst @@ -15,7 +15,10 @@ and for female vectors when **Vector_Sampling_Type** is set to "VECTOR_COMPARTME "VECTOR_COMPARTMENTS_PERCENT", the rates in the file are used to calculate what fraction of the population is traveling out of the node on that day based on a total rate of travel out of that node and the traveling vectors are distributed to their destination nodes in proportion of rates to those nodes to the total -outbound rate. +outbound rate. total_fraction_traveling out of a node is 1.0 - exp( -1.0 * m_TotalRate ), where m_TotalRate is +the sum of all rates out of this node. Then fraction of vectors traveling to each node is (rate to node) / m_TotalRate * total_fraction_traveling +Please see https://github.com/EMOD-Hub/EMOD/blob/529bd11b19b5b10d49fab445dea29ee4ebd65740/Eradication/MigrationInfoVector.cpp#L335 for further details. + Note: If default geography is used (the configuration parameter **Enable_Demographics_Builtin** is set to 1, and **Default_Geography_Initial_Node_Population** and **Default_Geography_Torus_Size** are configured), diff --git a/docs/json/vector-migration-metadata.json b/docs/json/vector-migration-metadata.json index 367a97a..78bfc6c 100644 --- a/docs/json/vector-migration-metadata.json +++ b/docs/json/vector-migration-metadata.json @@ -6,7 +6,8 @@ "DatavalueCount": 8, "GenderDataType": "VECTOR_MIGRATION_BY_GENETICS", "AlleleCombinations": [[], [["X1","Y2"]], [["a1","a1"], ["b1","b0"], ["X1","X1"]], [["*","a0"], ["X1","Y1"]], [["a1","a1"],["b1","b1"]]], - "NodeCount": 3 + "NodeCount": 3, + "Project": "Migration based on Dr. Acula research." }, "NodeOffsets": "0000000500000000000000010000006000000003000000C0" } \ No newline at end of file diff --git a/emodpy_malaria/vector_migration/README.md b/emodpy_malaria/migration/README.md similarity index 100% rename from emodpy_malaria/vector_migration/README.md rename to emodpy_malaria/migration/README.md diff --git a/emodpy_malaria/vector_migration/__init__.py b/emodpy_malaria/migration/__init__.py similarity index 100% rename from emodpy_malaria/vector_migration/__init__.py rename to emodpy_malaria/migration/__init__.py diff --git a/emodpy_malaria/migration/convert_csv_to_bin_vector_migration.py b/emodpy_malaria/migration/convert_csv_to_bin_vector_migration.py new file mode 100644 index 0000000..5e971f6 --- /dev/null +++ b/emodpy_malaria/migration/convert_csv_to_bin_vector_migration.py @@ -0,0 +1,238 @@ +# convert_csv_to_bin_vector_migration.py +# ----------------------------------------------------------------------------- +# This script converts a CSV formatted txt file to an EMOD binary-formatted migration file. +# It also creates the required metadata file. +# +# The CSV file can have several configuration of columns: +# +# 1) Headers: FromNodeID, ToNodeID, Rate (Average # of Trips Per Day) +# If the csv/text file does not have column headers and three entries, this is the format we assume. +# This can be used for human and vector migration. The Rate is for any/all agents regardless of sex or age. +# +# 2) Headers: FromNodeID, ToNodeID, RateMales, RateFemales +# If the csv/text file does not have column headers and four entries, this is the format we assume. +# RateMales are rates for male migration, RateFemales for female migration and are Average # of Trips Per Day. +# This can be used for human and vector migration when using sex-based migration without age. +# +# 3) Headers: FromNodeID, ToNodeID, [], arrays denoting Allele_Combinations +# Allele_Combinations example: [["a1", "a1"], ["b1", "b1"]]; [["X1","Y2"]]; [["*", "a0"], ["X1", "Y1"]] +# Due to use of commas in headers, it is best to use Excel to create them (or look at a sample text csv) +# This is to support VECTOR_MIGRATION_BY_GENETICS. +# Headers are required for this csv file. +# The first (empty, []) array is used as a "default rate" if the vector's genetics doesn't match any of the +# Allele_Combinations. The other column headers denote the rate that the vector will travel at if it matches the +# Allele_Combination listed. Vectors are checked against Allele_Combinations from most-specific, to least-specific, +# regardless of the order in the csv file. Allele_Combinations can, but don't have to, include sex-alleles. Without +# specified sex-alleles, any vector that matches the alleles regardless of sex will travel at that rate. +# +# The FromNodeIDs and ToNodeIDs are the external ID's found in the demographics file. +# Each node ID in the migration file must exist in the demographics file. +# One can have node ID's in the demographics that don't exist in the migration file. +# +# The CSV file does not have to have the same number of entries for each FromNodeID. +# The script will find the FromNodeID that has the most and use that for the +# DestinationsPerNode. The binary file will have DestinationsPerNode entries +# per node. +# +# ----------------------------------------------------------------------------- + +import collections +import datetime +import json +import os +import struct +import sys +import pandas as pd +from enum import Enum +import ast + + +class GenderDataType(Enum): + SAME_FOR_BOTH_GENDERS = "SAME_FOR_BOTH_GENDERS" + ONE_FOR_EACH_GENDER = "ONE_FOR_EACH_GENDER" + VECTOR_MIGRATION_BY_GENETICS = "VECTOR_MIGRATION_BY_GENETICS" + +class MetaData: + def __init__(self): + self.node_count = 0 + self.offset_str = "" + self.max_destinations_per_node = 0 + self.gender_data_type = None + self.filename_out = "" + self.has_headers = False + self.num_columns = 0 + self.allele_combinations = [] + self.data_df = None + self.ref_id = None + +def ShowUsage(): + print('\nUsage: %s [input-migration-csv] [idreference]' % os.path.basename(sys.argv[0])) + + +# ----------------------------------------------------------------------------- +# WriteMetadataFile +# ----------------------------------------------------------------------------- +def WriteMetadataFile(metadata): + output_json = collections.OrderedDict([]) + + output_json["Metadata"] = {} + output_json["Metadata"]["IdReference"] = metadata.ref_id + output_json["Metadata"]["DateCreated"] = datetime.datetime.now().ctime() + output_json["Metadata"]["Tool"] = os.path.basename(sys.argv[0]) + output_json["Metadata"]["DatavalueCount"] = metadata.max_destinations_per_node + output_json["Metadata"]["GenderDataType"] = metadata.gender_data_type.value + if metadata.allele_combinations: + output_json["Metadata"]["AlleleCombinations"] = metadata.allele_combinations + output_json["Metadata"]["NodeCount"] = metadata.node_count + output_json["NodeOffsets"] = metadata.offset_str + + metadata_filename = metadata.filename_out + ".json" + with open(metadata_filename, 'w') as file_out_json: + json.dump(output_json, file_out_json, indent=4) + + +def is_number(value): + try: + float(value) # Attempt to convert to a float + return True + except ValueError: + return False + + +def GetSummaryData(metadata): + # ---------------------------- + # collect data from CSV file + # ---------------------------- + data_df = metadata.data_df + metadata.num_columns = len(data_df.iloc[0]) + if metadata.num_columns < 3: + raise ValueError(f"There are {metadata.num_columns} in the file, but we expect at least three. Please review comments" + f" for expected column configurations and try again.") + if not metadata.has_headers: # no column headers + if metadata.num_columns == 3: + print(f"File doesn't seem to have headers, and with {metadata.num_columns} columns, " + "we are assuming 'FromNodeID', 'ToNodeID', 'Rate' column configuration.") + metadata.gender_data_type = GenderDataType.SAME_FOR_BOTH_GENDERS + elif metadata.num_columns == 4: + print(f"File doesn't seem to have headers, and with {metadata.num_columns} columns, " + "we are assuming 'FromNodeID', 'ToNodeID', 'RateMales', 'RateFemales' column configuration.") + metadata.gender_data_type = GenderDataType.ONE_FOR_EACH_GENDER + else: + raise ValueError(f"File doesn't seem to have headers, and with {metadata.num_columns} columns, it is not " + f"obvious what the column configuration should be. If you are trying to create a " + f"VECTOR_MIGRATION_BY_GENETICS file, please add headers as shown in the comments.") + else: # has headers, force user to use one of the three formats + if 'FromNodeID' not in headers[0] or 'ToNodeID' not in headers[1]: + raise ValueError(f"With headers, we expect first two column headers to be 'FromNodeID', 'ToNodeID', but " + f"they are {headers[0]} and {headers[1]}.") + elif metadata.num_columns == 3: + if 'Rate' not in headers[2]: + raise ValueError(f"With headers and {metadata.num_columns}, we expect the third column to be 'Rate', " + f"but it is {headers[2]}, please check and correct.") + else: + metadata.gender_data_type = GenderDataType.SAME_FOR_BOTH_GENDERS + elif metadata.num_columns > 3: + if "[]" in headers[2]: + metadata.gender_data_type = GenderDataType.VECTOR_MIGRATION_BY_GENETICS + for alleles in data_df.columns.tolist()[2:]: + metadata.allele_combinations.append(ast.literal_eval(alleles)) + elif metadata.num_columns == 4: + if 'RateMales' in headers[2] and 'RateFemales' in headers[3]: + metadata.gender_data_type = GenderDataType.ONE_FOR_EACH_GENDER + else: + raise ValueError(f"With column headers and {metadata.num_columns} and not Allele_Combinations in " + f"headers, we expect 'RateMales' in third column and 'RateFemales' in forth " + f"column, but they are" + f" {headers[2]} and {headers[3]}, please check and correct.") + else: + raise ValueError(f"File has column headers with {metadata.num_columns} columns, but does not have the " + f"expected headers. Please review the headers expected in the comments, correct, " + f"and try again.") + + # ------------------------------------------------------------------------- + # Find the list node that individuals can migrate from + # Also find the maximum number of nodes that one can go to from a give node. + # This max is used in determine the layout of the binary data. + # ------------------------------------------------------------------------- + from_node_id_list = data_df[data_df.columns[0]].unique().tolist() + for from_node_id in from_node_id_list: + to_node_id_list = data_df[data_df.iloc[:, 0] == from_node_id].iloc[:, 1] + if len(to_node_id_list) != len(to_node_id_list.unique()): + raise ValueError(f"For 'FromNodeID' = {from_node_id}, there are non-unique 'ToNodeIDs'.") + if len(to_node_id_list) > metadata.max_destinations_per_node: + metadata.max_destinations_per_node = len(to_node_id_list) + + # ------------------------------------------------------------------- + # Create NodeOffsets string + # This contains the location of each From Node's data in the bin file + # ------------------------------------------------------------------- + for from_node_id in from_node_id_list: + metadata.offset_str += '%0.8X' % from_node_id + metadata.offset_str += '%0.8X' % (metadata.node_count * metadata.max_destinations_per_node * 12) # 12 -> sizeof(uint32_t) + sizeof(double) + metadata.node_count += 1 + + # return metadata + +# ----------------------------------------------------------------------------- +# WriteBinFileGender +# ----------------------------------------------------------------------------- +def WriteBinFile(metadata): + bin_file = open(metadata.filename_out, 'wb') + data_df = metadata.data_df + from_node_id_list = data_df[data_df.columns[0]].unique().tolist() + for data_index in range(2, metadata.num_columns): + for from_node_id in from_node_id_list: + # Initialize with zeros + to_node_id_array = [0] * metadata.max_destinations_per_node + rates_array = [0] * metadata.max_destinations_per_node + + # Populate arrays with data + to_node_id_list = data_df[data_df.iloc[:, 0] == from_node_id].iloc[:, 1].tolist() + for index, to_node_id in enumerate(to_node_id_list): + rate_list = data_df[(data_df.iloc[:, 0] == from_node_id) & (data_df.iloc[:, 1] == to_node_id)].iloc[:, data_index].to_list() + if len(rate_list) > 1: # should only have one entry for the to/from node id combination + raise ValueError(f"For FromNodeID {from_node_id} and ToNodeID {to_node_id}, there are multiple " + f"rates in column index {data_index}.") + to_node_id_array[index] = to_node_id + rates_array[index] = rate_list[0] # get the one rate + + # Format data into binary + bin_data_id = struct.pack('I' * len(to_node_id_array), *to_node_id_array) + bin_data_rt = struct.pack('d' * len(rates_array), *rates_array) + + bin_file.write(bin_data_id) + bin_file.write(bin_data_rt) + + bin_file.close() + + + +if __name__ == "__main__": + if not (len(sys.argv) == 2 or len(sys.argv) == 3): + ShowUsage() + exit(0) + + filename_in = sys.argv[1] + id_ref = "temp_ref_id" + if len(sys.argv) == 3: + id_ref = sys.argv[1] + + meta_data = MetaData() + meta_data.ref_id = id_ref + meta_data.data_df = pd.read_csv(filename_in) + headers = meta_data.data_df.columns.tolist() + if is_number(headers[0]): # no headers + meta_data.data_df = pd.read_csv(filename_in, header=None) + else: + meta_data.has_headers = True # False by default + meta_data.filename_out = filename_in.split(".")[0] + ".bin" + + GetSummaryData(meta_data) + WriteBinFile(meta_data) + WriteMetadataFile(meta_data) + + print(f"max_destinations_per_node = {meta_data.max_destinations_per_node}") + print(f"Finished converting {filename_in} to {meta_data.filename_out} and +.json metadata file.") + if len(sys.argv) == 2: + print(f"IdReference in {meta_data.filename_out}.json file is set to a temporary value, please update it" + f" to match your demographics.") diff --git a/emodpy_malaria/migration/convert_json_to_bin.py b/emodpy_malaria/migration/convert_json_to_bin.py new file mode 100644 index 0000000..d6688cf --- /dev/null +++ b/emodpy_malaria/migration/convert_json_to_bin.py @@ -0,0 +1,331 @@ +# convert_json_to_bin.py +# ----------------------------------------------------------------------------- +# This script converts a JSON formated txt file to an EMOD binary-formatted migration file. +# It also creates the required metadata file. +# +# The JSON file allows the user to specify different rates for different ages +# and genders. +# +# The output binary file has one or two Gender Data sections depending on whether +# the JSON file has different data for each gender. Each Gender Data section has +# one Age Data section for each age specified in the JSON file. Each Age Data +# section has one Node Data section for each node that individuals can migrate +# from. Each Node Data section has one chunk of data +# [1-unint32_t (4-bytes) plus 1-double (8-bytes)] +# for each destination where each Node Data section has DestinationsPerNode chuncks. +# In other words, each Node Data section is 12-bytes times DestinationsPerNode +# ----------------------------------------------------------------------------- + +import sys, os, json, collections, struct, datetime + +# ----------------------------------------------------------------------------- +# Age Limits +# ----------------------------------------------------------------------------- +AGE_Min = 0.0 +AGE_Max = 125.0 + +# ----------------------------------------------------------------------------- +# CheckAge +# ----------------------------------------------------------------------------- +def CheckAge( age ): + if( age < AGE_Min ): + print(f"Invalid age={age} < {AGE_Min}") + exit(-1) + + if( age > AGE_Max ): + print(f"Invalid age={age} > {AGE_Max}") + exit(-1) + +# ----------------------------------------------------------------------------- +# CheckAgeArray +# ----------------------------------------------------------------------------- +def CheckAgeArray( ages_years ): + errmsg = JSON_AgesYears + " must be an array of ages in years and in increasing order." + if( len( ages_years ) == 0 ): + print(errmsg) + exit(-1) + + prev = 0.0 + for age in ages_years: + CheckAge( age ) + if( age < prev ): + print(errmsg) + exit(-1) + prev = age + +# ----------------------------------------------------------------------------- +# GenderDataTypes +# ----------------------------------------------------------------------------- +GDT_SAME_FOR_BOTH_GENDERS = "SAME_FOR_BOTH_GENDERS" +GDT_ONE_FOR_EACH_GENDER = "ONE_FOR_EACH_GENDER" + +GenderDataTypes = [] +GenderDataTypes.append( GDT_SAME_FOR_BOTH_GENDERS ) +GenderDataTypes.append( GDT_ONE_FOR_EACH_GENDER ) + +# ----------------------------------------------------------------------------- +# CheckGenderDataType +# ----------------------------------------------------------------------------- +def CheckGenderDataType( gdt ): + found = False + for type in GenderDataTypes: + found |= (type == gdt) + + if( not found ): + print(f"Invalid GenderDataType = {gdt}") + exit(-1) + +# ----------------------------------------------------------------------------- +# InterpolationTypes +# ----------------------------------------------------------------------------- +InterpolationTypes = [] +InterpolationTypes.append( "LINEAR_INTERPOLATION" ) +InterpolationTypes.append( "PIECEWISE_CONSTANT" ) + +# ----------------------------------------------------------------------------- +# CheckInterpolationType +# ----------------------------------------------------------------------------- +def CheckInterpolationType( interp_type ): + found = False + for type in InterpolationTypes: + found |= (type == interp_type) + + if( not found ): + print(f"Invalid InterpolationType = {interp_type}") + exit(-1) + +# ----------------------------------------------------------------------------- +# MigrationTypes +# ----------------------------------------------------------------------------- +MigrationTypes = [] +MigrationTypes.append( "LOCAL_MIGRATION" ) +MigrationTypes.append( "AIR_MIGRATION" ) +MigrationTypes.append( "REGIONAL_MIGRATION" ) +MigrationTypes.append( "SEA_MIGRATION" ) + +# ----------------------------------------------------------------------------- +# CheckMigrationType +# ----------------------------------------------------------------------------- +def CheckMigrationType( mig_type ): + found = False + for type in MigrationTypes: + found |= (type == mig_type) + + if( not found ): + print(f"Invalid MigrationType = {mig_type}") + exit(-1) + +# ----------------------------------------------------------------------------- +# JSON Element Names +# ----------------------------------------------------------------------------- +# NOTE: The indention below indicates where the tag is used in the JSON + +JSON_IdRef = "IdReference" +JSON_InterpType = "Interpolation_Type" +JSON_GenderDataType = "Gender_Data_Type" +JSON_AgesYears = "Ages_Years" +JSON_NodeData = "Node_Data" +JSON_ND_FromNodeId = "From_Node_ID" +JSON_ND_RateData = "Rate_Data" +JSON_RD_ToNodeId = "To_Node_ID" +JSON_RD_RatesBoth = "Avg_Num_Trips_Per_Day_Both" +JSON_RD_RatesMale = "Avg_Num_Trips_Per_Day_Male" +JSON_RD_RatesFemale = "Avg_Num_Trips_Per_Day_Female" + +# ----------------------------------------------------------------------------- +# CheckInJson +# ----------------------------------------------------------------------------- +def CheckInJson( fn, data, key ): + if( not key in data ): + print(f"Could not find {key} in file {fn}.") + exit(-1) + +# ----------------------------------------------------------------------------- +# CheckRatesSize +# ----------------------------------------------------------------------------- +def CheckRatesSize( num_ages, rd_data, key ): + if( len( rd_data[ key ] ) != num_ages ): + print (f"{JSON_AgesYears} has {num_ages} values and one of the {key} has {len( rd_data[ key ] )} values. They must have the same number.") + exit(-1) + +# ----------------------------------------------------------------------------- +# ReadJson +# ----------------------------------------------------------------------------- +def ReadJson( json_fn ): + json_file = open( json_fn,'r') + json_data = json.load( json_file ) + json_file.close() + + CheckInJson( json_fn, json_data, JSON_IdRef ) + CheckInJson( json_fn, json_data, JSON_InterpType ) + CheckInJson( json_fn, json_data, JSON_GenderDataType ) + CheckInJson( json_fn, json_data, JSON_AgesYears ) + CheckInJson( json_fn, json_data, JSON_NodeData ) + + CheckInterpolationType( json_data[ JSON_InterpType ] ) + CheckGenderDataType( json_data[ JSON_GenderDataType ] ) + CheckAgeArray( json_data[ JSON_AgesYears ] ) + + if( len( json_data[ JSON_NodeData ] ) == 0 ): + print(f"{JSON_NodeData} has no elements so there would be no migration data.") + exit(-1) + + num_ages = len( json_data[ JSON_AgesYears ] ) + + for nd_data in json_data[ JSON_NodeData ]: + CheckInJson( json_fn, nd_data, JSON_ND_FromNodeId ) + CheckInJson( json_fn, nd_data, JSON_ND_RateData ) + + if( len( nd_data[ JSON_ND_RateData ] ) == 0 ): + print(f"{JSON_ND_RateData} has no elements so there would be no migration data.") + exit(-1) + + for rd_data in nd_data[ JSON_ND_RateData ]: + CheckInJson( json_fn, rd_data, JSON_RD_ToNodeId ) + + if( json_data[ JSON_GenderDataType ] == GDT_ONE_FOR_EACH_GENDER ): + CheckInJson( json_fn, rd_data, JSON_RD_RatesMale ) + CheckInJson( json_fn, rd_data, JSON_RD_RatesFemale ) + + CheckRatesSize( num_ages, rd_data, JSON_RD_RatesMale ) + CheckRatesSize( num_ages, rd_data, JSON_RD_RatesFemale ) + else: + CheckInJson( json_fn, rd_data, JSON_RD_RatesBoth ) + + CheckRatesSize( num_ages, rd_data, JSON_RD_RatesBoth ) + + return json_data + +# ----------------------------------------------------------------------------- +# SummaryData +# ----------------------------------------------------------------------------- +class SummaryData: + def __init__( self, nodeCount, offsetStr, maxDestinations ): + self.num_nodes = nodeCount + self.offset_str = offsetStr + self.max_destinations_per_node = maxDestinations + +# ----------------------------------------------------------------------------- +# GetSummaryData +# ----------------------------------------------------------------------------- +def GetSummaryData( json_data ): + from_node_id_list = [] + + # ------------------------------------------------------------------------- + # Find the list node that individuals can migrate from + # Also find the maximum number of nodes that one can go to from a give node. + # This max is used in determine the layout of the binary data. + # ------------------------------------------------------------------------- + max_destinations = 0 + for node_data in json_data[ JSON_NodeData ]: + from_node_id_list.append( int( node_data[ JSON_ND_FromNodeId ] ) ) + destinations = len( node_data[ JSON_ND_RateData ] ) + if( destinations > max_destinations ): + max_destinations = destinations + + print(f"max_destinations = {max_destinations}") + + # ------------------------------------------------------------------- + # Create NodeOffsets string + # This contains the location of each From Node's data in the bin file + # ------------------------------------------------------------------- + offset_str = "" + nodecount = 0 + + for from_node_id in from_node_id_list: + offset_str += '%0.8X' % from_node_id + offset_str += '%0.8X' % (nodecount * max_destinations * 12) # 12 -> sizeof(uint32_t) + sizeof(double) + nodecount += 1 + + summary = SummaryData( nodecount, offset_str, max_destinations ) + + return summary + +# ----------------------------------------------------------------------------- +# WriteBinFile +# ----------------------------------------------------------------------------- +def WriteBinFile( bin_fn, json_data, summary ): + bin_file = open( bin_fn, 'wb' ) + + if( json_data[ JSON_GenderDataType ] == GDT_ONE_FOR_EACH_GENDER ): + WriteBinFileGender( bin_file, json_data, summary, JSON_RD_RatesMale ) + WriteBinFileGender( bin_file, json_data, summary, JSON_RD_RatesFemale ) + else: + WriteBinFileGender( bin_file, json_data, summary, JSON_RD_RatesBoth ) + + bin_file.close() + +# ----------------------------------------------------------------------------- +# WriteBinFileGender +# ----------------------------------------------------------------------------- +def WriteBinFileGender( bin_file, json_data, summary, rates_key ): + for age_index in range( len( json_data[ JSON_AgesYears ] ) ): + for node_data in json_data[ JSON_NodeData ]: + array_id = [] + array_rt = [] + + # Initialize with zeros + for i in range( summary.max_destinations_per_node ): + array_id.append(0) + array_rt.append(0) + + # Populate arrays with data + index = 0 + for rate_data in node_data[ JSON_ND_RateData ] : + array_id[ index ] = int( rate_data[ JSON_RD_ToNodeId ] ) + array_rt[ index ] = rate_data[ rates_key ][ age_index ] + index += 1 + + # Format data into binary + bin_data_id = struct.pack( 'I'*len(array_id), *array_id ) + bin_data_rt = struct.pack( 'd'*len(array_rt), *array_rt ) + + bin_file.write( bin_data_id ) + bin_file.write( bin_data_rt ) + +# ----------------------------------------------------------------------------- +# WriteMetadataFile +# ----------------------------------------------------------------------------- +def WriteMetadataFile( metadata_fn, mig_type, json_data, rate_data ): + output_json = collections.OrderedDict([]) + + output_json["Metadata"] = {} + output_json["Metadata"]["IdReference" ] = json_data[ JSON_IdRef ] + output_json["Metadata"]["DateCreated" ] = datetime.datetime.now().ctime() + output_json["Metadata"]["Tool" ] = os.path.basename(sys.argv[0]) + output_json["Metadata"]["DatavalueCount" ] = rate_data.max_destinations_per_node + output_json["Metadata"]["MigrationType" ] = mig_type + output_json["Metadata"]["GenderDataType" ] = json_data[ JSON_GenderDataType ] + output_json["Metadata"]["InterpolationType" ] = json_data[ JSON_InterpType ] + output_json["Metadata"]["AgesYears" ] = json_data[ JSON_AgesYears ] + output_json["Metadata"]["NodeCount" ] = rate_data.num_nodes + output_json["NodeOffsets"] = rate_data.offset_str + + with open( metadata_fn, 'w') as file: + json.dump( output_json, file, indent=4 ) + +# ----------------------------------------------------------------------------- +# Main +# ----------------------------------------------------------------------------- +if __name__ == "__main__": + if len(sys.argv) != 4: + print ("\nUsage: %s [input-json] [output-bin] [migration-type]" % os.path.basename(sys.argv[0])) + exit(0) + + json_fn = sys.argv[1] + bin_fn = sys.argv[2] + mig_type = sys.argv[3] + + metadata_fn = bin_fn + ".json" + + CheckMigrationType( mig_type ) + + json_data = ReadJson( json_fn ) + + summary = GetSummaryData( json_data ) + + WriteBinFile( bin_fn, json_data, summary ) + WriteMetadataFile( metadata_fn, mig_type, json_data, summary ) + + print(f"Finished converting {json_fn} to {bin_fn} and {metadata_fn}") + diff --git a/emodpy_malaria/migration/convert_txt_to_bin.py b/emodpy_malaria/migration/convert_txt_to_bin.py new file mode 100644 index 0000000..cd0344a --- /dev/null +++ b/emodpy_malaria/migration/convert_txt_to_bin.py @@ -0,0 +1,135 @@ +# convert_txt_to_bin.py +# ----------------------------------------------------------------------------- +# This script converts a CSV formated txt file to an EMOD binary-formatted migration file. +# It also creates the required metadata file. +# +# The CSV file has three columns +# From_Node_ID, To_Node_ID, Rate (Average # of Trips Per Day) +# where the node ID's are the external ID's found in the demographics file. +# Each node ID in the migration file must exist in the demographics file. +# One can have node ID's in the demographics that don't exist in the migration file. +# +# The CSV file does not have to have the same number of entries for each From_Node. +# The script will find the From_Node that has the most and use that for the +# DestinationsPerNode. The binary file will have DestinationsPerNode entries +# per node. +# ----------------------------------------------------------------------------- + +import sys, os, json, collections, struct, datetime + +MigrationType = [] +MigrationType.append( "LOCAL_MIGRATION" ) +MigrationType.append( "AIR_MIGRATION" ) +MigrationType.append( "REGIONAL_MIGRATION" ) +MigrationType.append( "SEA_MIGRATION" ) + + +def ShowUsage(): + print ('\nUsage: %s [input-migration-csv] [output-bin] [migration-type] [idreference]' % os.path.basename(sys.argv[0])) + +if __name__ == "__main__": + if len(sys.argv) != 5: + ShowUsage() + exit(0) + + filename = sys.argv[1] + outfilename = sys.argv[2] + mig_type = sys.argv[3] + id_ref = sys.argv[4] + + mig_type_found = False + for mig in MigrationType: + mig_type_found |= (mig == mig_type) + + if( not mig_type_found ): + print ("Invalid MigrationType = " + mig_type) + exit(-1) + + max_destinations_per_node = 0 + destinations_per_node = 0 + + fopen=open(filename) + fout=open(outfilename,'wb') + net={} + net_rate={} + + # ---------------------------- + # collect data from CSV file + # ---------------------------- + node_id_list = [] + prev_id = -1 + for line in fopen: + s=line.strip().split(',') + ID1=int(float(s[0])) + ID2=int(float(s[1])) + rate=float(s[2]) + if ID1 not in net: + net[ID1]=[] + net_rate[ID1]=[] + net[ID1].append(ID2) + net_rate[ID1].append(rate) + if prev_id != ID1: + if( destinations_per_node > max_destinations_per_node ): + max_destinations_per_node = destinations_per_node + node_id_list.append(ID1) + print (prev_id, max_destinations_per_node) + prev_id = ID1 + destinations_per_node = 0 + destinations_per_node += 1 + + # --------------- + # Write bin file + # --------------- + for ID in net: + ID_write=[] + ID_rate_write=[] + for i in range(max_destinations_per_node): + ID_write.append(0) + ID_rate_write.append(0) + for i in range(len(net[ID])): + ID_write[i]=net[ID][i] + ID_rate_write[i]=net_rate[ID][i] + #The type needs to be 'I' because Linux handles 'L' differently than Windows. + s_write=struct.pack('I'*len(ID_write), *ID_write) + s_rate_write=struct.pack('d'*len(ID_rate_write),*ID_rate_write) + fout.write(s_write) + fout.write(s_rate_write) + + fopen.close() + fout.close() + + # ------------------------------------------------------------------- + # Create NodeOffsets string + # This contains the location of each From Node's data in the bin file + # ------------------------------------------------------------------- + offset_str = "" + nodecount = 0 + + for ID in net: + offset_str += '%0.8X' % ID + offset_str += '%0.8X' % (nodecount * max_destinations_per_node * 12) # 12 -> sizeof(uint32_t) + sizeof(double) + nodecount += 1 + + # ------------------- + # Write Metadata file + # ------------------- + migjson = collections.OrderedDict([]) + migjson['Metadata'] = {} + + if os.name == "nt": + migjson['Metadata']['Author'] = os.environ['USERNAME'] + else: + migjson['Metadata']['Author'] = os.environ['USER'] + + migjson['Metadata']['NodeCount' ] = len(node_id_list) + migjson['Metadata']['IdReference' ] = id_ref + migjson['Metadata']['DateCreated' ] = datetime.datetime.now().ctime() + migjson['Metadata']['Tool' ] = os.path.basename(sys.argv[0]) + migjson['Metadata']['DatavalueCount' ] = max_destinations_per_node + migjson['Metadata']['MigrationType' ] = mig_type + migjson['NodeOffsets'] = offset_str + + with open(outfilename+".json", 'w') as file: + json.dump(migjson, file, indent=4) + + diff --git a/emodpy_malaria/vector_migration/vector_migration.py b/emodpy_malaria/migration/vector_migration.py similarity index 100% rename from emodpy_malaria/vector_migration/vector_migration.py rename to emodpy_malaria/migration/vector_migration.py diff --git a/examples-container/microsporidia/example.py b/examples-container/microsporidia/example.py index fedfa93..7f5ce14 100644 --- a/examples-container/microsporidia/example.py +++ b/examples-container/microsporidia/example.py @@ -14,7 +14,7 @@ import emodpy.emod_task as emod_task from emodpy.utils import EradicationBambooBuilds from emodpy.bamboo import get_model_files -from emodpy_malaria.vector_migration.vector_migration import from_demographics_and_gravity_params +from emodpy_malaria.migration.vector_migration import from_demographics_and_gravity_params import manifest diff --git a/examples/migration_vector_and_microsporidia/example.py b/examples/migration_vector_and_microsporidia/example.py index 0d5de5b..05b62e1 100644 --- a/examples/migration_vector_and_microsporidia/example.py +++ b/examples/migration_vector_and_microsporidia/example.py @@ -12,7 +12,7 @@ # emodpy import emodpy.emod_task as emod_task -from emodpy_malaria.vector_migration.vector_migration import from_demographics_and_gravity_params +from emodpy_malaria.migration.vector_migration import from_demographics_and_gravity_params from emodpy_malaria.vector_config import add_vector_migration, ModifierEquationType import manifest From 11121307d1dbdec82e427c6a350ebb4bd8e07286 Mon Sep 17 00:00:00 2001 From: "INTERNAL\\stitova" Date: Mon, 16 Dec 2024 18:45:03 -0800 Subject: [PATCH 3/6] per-review changes so far --- .../software-migration-creation-vector.rst | 50 ++++++++++--------- docs/emod/software-migration-vector.rst | 13 +---- docs/emod/software-migration.rst | 3 ++ emodpy_malaria/migration/README.md | 9 ++-- 4 files changed, 37 insertions(+), 38 deletions(-) diff --git a/docs/emod/software-migration-creation-vector.rst b/docs/emod/software-migration-creation-vector.rst index e8f5873..17c0a37 100644 --- a/docs/emod/software-migration-creation-vector.rst +++ b/docs/emod/software-migration-creation-vector.rst @@ -3,26 +3,28 @@ How to create vector migration files ==================================== You can create the JSON metadata and binary migration files needed by |EMOD_s| to run simulations -from CSV ata using Python scripts provided by |IDM_s|. You can assign the same -probability of migration to each vector in a node or you can assign different migration rates based on gender or -genetics of the vector. +from CSV data using Python script below. You can assign the same probability of migration to +each vector in a node or you can assign different migration rates based on gender or genetics of the vector. + +#. Run the `convert_csv_to_bin_vector_migration.py `_ script using the format below: + + python convert_csv_to_bin_vector_migration.py [input-migration-csv] + .. note:: - The **IdReference** must match the value in the demographics file. Each node can be connected a - maximum of 100 destination nodes. The bin.json metadata file will be created without a valid + The **IdReference** must match the value in the demographics file. The bin.json metadata file will be created without a valid **IdReference** with expectations that the user will set it themselves. -Create from CSV input -===================== - -This script converts a CSV formatted txt file to an EMOD binary-formatted migration file. -It also creates the required metadata file. +CSV Input Configurations +======================== +Below are different csv file input configurations you can use to create vector migration. -The CSV file can have several column configurations: +One rate for all vectors +------------------------ -1. Header (optional): FromNodeID, ToNodeID, Rate (Average # of Trips Per Day) +Header (optional): FromNodeID, ToNodeID, Rate (Average # of Trips Per Day) If the csv/text file has three columns with no headers, this is the format we assume. .. csv-table:: @@ -33,8 +35,10 @@ If the csv/text file has three columns with no headers, this is the format we as ToNodeID, integer, 1, 2147480000, NA,"NodeID, matching NodeIDs in demographics file, to which the vector/human will travel." Rate, float, 0, 3.40282e+38, NA, "Rate at which the all the vectors/humans will travel from the FromNodeID to ToNodeID." +Different rates for male and female vectors +------------------------------------------- -2. Header (optional): FromNodeID, ToNodeID, RateMales, RateFemales +Header (optional): FromNodeID, ToNodeID, RateMales, RateFemales If the csv/text file has four columns with no headers, this is the format we assume. .. csv-table:: @@ -46,8 +50,13 @@ If the csv/text file has four columns with no headers, this is the format we ass RateMales, float,0, 3.40282e+38, NA, "Rate at which the vector/human of male sex will travel from the FromNodeID to ToNodeID." RateFemales, float, 0, 3.40282e+38, NA, "Rate at which the vector/human of female sex will travel from the FromNodeID to ToNodeID." +.. literalinclude:: ../csv/vector-migration-by-sex-input.csv + -3. Header (required): FromNodeID, ToNodeID, [], arrays denoting Allele_Combinations +Different rates depending on genetics of the vector +--------------------------------------------------- + +Header (required): FromNodeID, ToNodeID, [], arrays denoting Allele_Combinations Allele_Combinations example: [["a1", "a1"], ["b1", "b1"]] or [["X1","Y2"]] or [["*", "a0"], ["X1", "Y1"]] Due to use of commas in headers, it is best to use Excel to create them (or look at a sample text csv). This is to support VECTOR_MIGRATION_BY_GENETICS. Headers are required for this csv file. @@ -69,18 +78,13 @@ specified sex-alleles, any vector that matches the alleles regardless of sex wil "[['X1','Y2']]", float, 0, 3.40282e+38, NA,"Rate at which the vector that matches this and not a more-specific allele combination will travel from the FromNodeID to ToNodeID." -#. Run the `convert_csv_to_bin_vector_migration.py `_ script using the format below: - - python convert_csv_to_bin_vector_migration.py [input-migration-csv] - +.. literalinclude:: ../csv/vector-migration-by-genetics-input.csv -This will create both the metadata and binary file needed by |EMOD_s|. -Example Input files -------------------- +Migration binary file +===================== -.. literalinclude:: ../csv/vector-migration-by-genetics-input.csv -.. literalinclude:: ../csv/vector-migration-by-sex-input.csv +For information, see :ref:`binary_migration_file`. JSON metadata file diff --git a/docs/emod/software-migration-vector.rst b/docs/emod/software-migration-vector.rst index 6764bc0..6aee3eb 100644 --- a/docs/emod/software-migration-vector.rst +++ b/docs/emod/software-migration-vector.rst @@ -7,18 +7,7 @@ analogously to human migration (see :doc:`software-migration` for more informati model does not support migration by age and age-based migration in the migration file will cause an error. Vector migration does support migration by gender as well as migration based on genetics (see below). Vector migration is one way, such that each trip made by a vector is independent of previous trips made -by the vector. For **Vector_Sampling_Type** set to "TRACK_ALL_VECTORS" or "SAMPLE_IND_VECTORS", -the rates in the file are used to control whether or not a female vector will migrate: the rate specified -is used to get a "time to leave on trip" value from an exponential distribution. If the value is less than -one day, then the female vector will migrate. For male vectors (who are always in cohorts/compartments), -and for female vectors when **Vector_Sampling_Type** is set to "VECTOR_COMPARTMENTS_NUMBER" or -"VECTOR_COMPARTMENTS_PERCENT", the rates in the file are used to calculate what fraction of the population -is traveling out of the node on that day based on a total rate of travel out of that node and the traveling -vectors are distributed to their destination nodes in proportion of rates to those nodes to the total -outbound rate. total_fraction_traveling out of a node is 1.0 - exp( -1.0 * m_TotalRate ), where m_TotalRate is -the sum of all rates out of this node. Then fraction of vectors traveling to each node is (rate to node) / m_TotalRate * total_fraction_traveling -Please see https://github.com/EMOD-Hub/EMOD/blob/529bd11b19b5b10d49fab445dea29ee4ebd65740/Eradication/MigrationInfoVector.cpp#L335 for further details. - +by the vector. Note: If default geography is used (the configuration parameter **Enable_Demographics_Builtin** is set to 1, and **Default_Geography_Initial_Node_Population** and **Default_Geography_Torus_Size** are configured), diff --git a/docs/emod/software-migration.rst b/docs/emod/software-migration.rst index 9fd2028..fa29f52 100644 --- a/docs/emod/software-migration.rst +++ b/docs/emod/software-migration.rst @@ -98,6 +98,9 @@ Example .. literalinclude:: ../json/migration-metadata.json :language: json + +.. _binary_migration_file: + Binary file =========== diff --git a/emodpy_malaria/migration/README.md b/emodpy_malaria/migration/README.md index 23c77f9..20e471e 100644 --- a/emodpy_malaria/migration/README.md +++ b/emodpy_malaria/migration/README.md @@ -1,7 +1,10 @@ # Migration -This submodule provides scripts for creating and reading vector migration input files that are directly ingested by the DTK (EMOD) -for determining how individuals migrate between nodes over time during a simulation. Going forward, all reading and writing of -these migration control files should be done via this submodule. +This submodule provides scripts for creating and reading vector migration input files that are directly ingested by +EMOD for determining how individuals migrate between nodes over time during a simulation. Going forward, all reading +and writing of these migration control files should be done via this submodule. + +The script files convert from human-readable files (txt, json, csv) to binary migration files. Please see documentation +for more details. From b23375d8544e61045c449b4c2280492632cbb96d Mon Sep 17 00:00:00 2001 From: "INTERNAL\\stitova" Date: Tue, 17 Dec 2024 12:50:09 -0800 Subject: [PATCH 4/6] updating tables --- .../software-migration-creation-vector.rst | 69 +++++++++++++++++-- 1 file changed, 65 insertions(+), 4 deletions(-) diff --git a/docs/emod/software-migration-creation-vector.rst b/docs/emod/software-migration-creation-vector.rst index 17c0a37..0910f09 100644 --- a/docs/emod/software-migration-creation-vector.rst +++ b/docs/emod/software-migration-creation-vector.rst @@ -35,6 +35,26 @@ If the csv/text file has three columns with no headers, this is the format we as ToNodeID, integer, 1, 2147480000, NA,"NodeID, matching NodeIDs in demographics file, to which the vector/human will travel." Rate, float, 0, 3.40282e+38, NA, "Rate at which the all the vectors/humans will travel from the FromNodeID to ToNodeID." +Example: + +.. csv-table:: + :header: FromNodeID, ToNodeID, Rate + :widths: 5,5,5 + + 5,1,0.1 + 5,2,0.1 + 5,3,0.1 + 5,4,0.1 + 5,6,0 + 5,7,0 + 5,8,0.1 + 5,9,0.1 + +Actual csv: + +.. literalinclude:: ../csv/migration-input-file-simple.csv + + Different rates for male and female vectors ------------------------------------------- @@ -50,6 +70,23 @@ If the csv/text file has four columns with no headers, this is the format we ass RateMales, float,0, 3.40282e+38, NA, "Rate at which the vector/human of male sex will travel from the FromNodeID to ToNodeID." RateFemales, float, 0, 3.40282e+38, NA, "Rate at which the vector/human of female sex will travel from the FromNodeID to ToNodeID." +Example: + +.. csv-table:: + :header: FromNodeID, ToNodeID, RateMales, RateFemales + :widths: 5,5,5,5 + + 5,1,0.1,0.02 + 5,2,0.1,0.02 + 5,3,0.1,0.02 + 5,4,0.1,0.02 + 5,6,0,0.02 + 5,7,0,0.02 + 5,8,0.1,0 + 5,9,0.1,0 + +Actual csv: + .. literalinclude:: ../csv/vector-migration-by-sex-input.csv @@ -64,7 +101,8 @@ The first (empty, []) array is used as a "default rate" if the vector's genetics Allele_Combinations. The other column headers denote the rate that the vector will travel at if it matches the Allele_Combination listed. Vectors are checked against Allele_Combinations from most-specific, to least-specific, regardless of the order in the csv file. Allele_Combinations can, but don't have to, include sex-alleles. Without -specified sex-alleles, any vector that matches the alleles regardless of sex will travel at that rate. +specified sex-alleles, any vector that matches the alleles regardless of sex will travel at that rate. Use '*' as a +wildcard if the second allele does not matter and can be matched with anything. .. csv-table:: :header: Parameter, Data type, Min, Max, Default, Description @@ -73,10 +111,33 @@ specified sex-alleles, any vector that matches the alleles regardless of sex wil FromNodeID, integer, 1, 2147480000, NA, "NodeID, matching NodeIDs in demographics file, from which the vector/human will travel." ToNodeID, integer, 1, 2147480000, NA, "NodeID, matching NodeIDs in demographics file, to which the vector/human will travel." [], float, 0, 3.40282e+38, NA, "Default rate at which the vector that doesn't match any other allele combinations will travel from the FromNodeID to ToNodeID." - "[['a1', 'a1'], ['b1', 'b1']]", float, 0, 3.40282e+38, NA, "Rate at which the vector that matches this and not a more-specific allele combination will travel from the FromNodeID to ToNodeID." - "[['*', 'a0'], ['X1', 'Y1']]", float, 0, 3.40282e+38, NA,"Rate at which the vector that matches this and not a more-specific allele combination will travel from the FromNodeID to ToNodeID." - "[['X1','Y2']]", float, 0, 3.40282e+38, NA,"Rate at which the vector that matches this and not a more-specific allele combination will travel from the FromNodeID to ToNodeID." + User-defined Allele Combination, float, 0, 3.40282e+38, NA, "Rate at which the vector that matches this and not a more-specific allele combination will travel from the FromNodeID to ToNodeID." + +Example: +.. csv-table:: + :header: FromNodeID, ToNodeID, [], "[['a1', 'a1'], ['b1', 'b1']]", "[['*', 'a0'], ['X1', 'Y1']]", "[['X1','Y2']]" + :widths: 5,5,5,5,5,5 + + 5,1,0.1,0,0,0 + 5,2,0,0.1,0,0 + 5,3,0,0,0.1,0 + 5,4,0,0,0,0.1 + 5,6,0,0,0,0 + 5,7,0.1,0.1,0,0 + 5,8,0.1,0,0.1,0.05 + 5,9,0,0.1,0,0 + 1,2,1,0,0,0 + 1,3,0,1,0,0 + 1,4,0,0,1,0 + 1,6,0,0,0,1 + 3,6,0,0,0,0 + 3,7,0,0.5,0,0 + 3,8,0.5,0,0,0.0 + 3,9,0,0.5,0,0 + + +Actual csv: .. literalinclude:: ../csv/vector-migration-by-genetics-input.csv From ba52436720b9063ef3f3e4f3afeb67f7f0cd83d4 Mon Sep 17 00:00:00 2001 From: "INTERNAL\\stitova" Date: Tue, 17 Dec 2024 12:58:18 -0800 Subject: [PATCH 5/6] udpating descriptions --- docs/emod/software-migration-creation-vector.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/emod/software-migration-creation-vector.rst b/docs/emod/software-migration-creation-vector.rst index 0910f09..faedf5e 100644 --- a/docs/emod/software-migration-creation-vector.rst +++ b/docs/emod/software-migration-creation-vector.rst @@ -170,9 +170,9 @@ Vector Migration Metadata File Parameters AlleleCombinations, array, "Required for GenderDataType: VECTOR_MIGRATION_BY_GENETICS. An array of Allele_Combinations, starting with an emtpy array to mark the default migration rate." NodeCount, integer, "Required. The number of 'from' nodes in the data. Used to verify size NodeOffsets - 16*NodeCount = # chars in NodeOffsets." NodeOffsets, string, "Required. The number of rates/'to' nodes for each 'from' node. Max of 100." - DateCreated, string, Date and time the file was generated by the script. Informational for user only. - Tool, string, The script used to create the file. Informational for user only. - Project, string, Example of a user-created parameter. Informational for user only. + DateCreated , string, (Informational for user only) Date and time the file was generated by the script. + Tool, string, (Informational for user only) The script used to create the file. + User-created parameter, string, (Informational for user only) Example of a user-created parameter Example From fa987a7d10ef4c39502679acdb5c506acfd9f0bb Mon Sep 17 00:00:00 2001 From: "INTERNAL\\stitova" Date: Tue, 17 Dec 2024 17:35:34 -0800 Subject: [PATCH 6/6] pep8 and some updates. I realize there are multiple Enum redefinitions, we will eventually be importing them from the migration file, but they aren't there yet. --- .../software-migration-creation-vector.rst | 6 +- docs/emod/software-migration-creation.rst | 8 +- emodpy_malaria/migration/__main__.py | 0 .../convert_csv_to_bin_vector_migration.py | 38 +-- .../migration/convert_json_to_bin.py | 297 +++++++++--------- .../migration/convert_txt_to_bin.py | 108 ++++--- 6 files changed, 232 insertions(+), 225 deletions(-) create mode 100644 emodpy_malaria/migration/__main__.py diff --git a/docs/emod/software-migration-creation-vector.rst b/docs/emod/software-migration-creation-vector.rst index faedf5e..7a5f30a 100644 --- a/docs/emod/software-migration-creation-vector.rst +++ b/docs/emod/software-migration-creation-vector.rst @@ -6,15 +6,15 @@ You can create the JSON metadata and binary migration files needed by |EMOD_s| t from CSV data using Python script below. You can assign the same probability of migration to each vector in a node or you can assign different migration rates based on gender or genetics of the vector. -#. Run the `convert_csv_to_bin_vector_migration.py `_ script using the format below: +#. Run the 'convert_csv_to_bin_vector_migration.py' script using the format below:: - python convert_csv_to_bin_vector_migration.py [input-migration-csv] + python -m emodpy_malaria.migration.convert_csv_to_bin_vector_migration [input-migration-csv] [idreference(optional)] .. note:: The **IdReference** must match the value in the demographics file. The bin.json metadata file will be created without a valid - **IdReference** with expectations that the user will set it themselves. + **IdReference** with expectations that the user will set it themselves if that argument is not passed in. CSV Input Configurations diff --git a/docs/emod/software-migration-creation.rst b/docs/emod/software-migration-creation.rst index f09b555..f99207b 100644 --- a/docs/emod/software-migration-creation.rst +++ b/docs/emod/software-migration-creation.rst @@ -41,9 +41,9 @@ To use the same average migration rate for every individual in a node, create th Rate The average number of trips per day. -#. Run the `convert_txt_to_bin.py `_ script using the command format below:: +#. Run the 'convert_txt_to_bin.py' script using the command format below:: - python convert_txt_to_bin.py [input-migration-csv] [output-bin] [migration-type] [idreference] + python -m emodpy_malaria.migration.convert_txt_to_bin [input-migration-csv] [output-bin] [migration-type] [idreference] This will create both the metadata and binary file needed by |EMOD_s|. @@ -61,9 +61,9 @@ To vary the average migration rate based on age and/or gender, create the migrat #. Create a JSON file with the structure described in the sections below. -#. Run the `convert_json_to_bin.py `_ script using the command format below:: +#. Run the 'convert_json_to_bin.py' script using the command format below:: - python convert_json_to_bin.py [input-json] [output-bin] [migration-type] + python -m emodpy_malaria.migration.convert_json_to_bin [input-json] [output-bin] [migration-type] This will create both the metadata and binary file needed by |EMOD_s|. diff --git a/emodpy_malaria/migration/__main__.py b/emodpy_malaria/migration/__main__.py new file mode 100644 index 0000000..e69de29 diff --git a/emodpy_malaria/migration/convert_csv_to_bin_vector_migration.py b/emodpy_malaria/migration/convert_csv_to_bin_vector_migration.py index 5e971f6..8a0e7a3 100644 --- a/emodpy_malaria/migration/convert_csv_to_bin_vector_migration.py +++ b/emodpy_malaria/migration/convert_csv_to_bin_vector_migration.py @@ -52,6 +52,7 @@ class GenderDataType(Enum): ONE_FOR_EACH_GENDER = "ONE_FOR_EACH_GENDER" VECTOR_MIGRATION_BY_GENETICS = "VECTOR_MIGRATION_BY_GENETICS" + class MetaData: def __init__(self): self.node_count = 0 @@ -59,20 +60,16 @@ def __init__(self): self.max_destinations_per_node = 0 self.gender_data_type = None self.filename_out = "" - self.has_headers = False self.num_columns = 0 self.allele_combinations = [] self.data_df = None self.ref_id = None -def ShowUsage(): - print('\nUsage: %s [input-migration-csv] [idreference]' % os.path.basename(sys.argv[0])) - # ----------------------------------------------------------------------------- # WriteMetadataFile # ----------------------------------------------------------------------------- -def WriteMetadataFile(metadata): +def write_metadata_file(metadata): output_json = collections.OrderedDict([]) output_json["Metadata"] = {} @@ -99,16 +96,16 @@ def is_number(value): return False -def GetSummaryData(metadata): +def get_summary_data(metadata): # ---------------------------- # collect data from CSV file # ---------------------------- data_df = metadata.data_df metadata.num_columns = len(data_df.iloc[0]) if metadata.num_columns < 3: - raise ValueError(f"There are {metadata.num_columns} in the file, but we expect at least three. Please review comments" - f" for expected column configurations and try again.") - if not metadata.has_headers: # no column headers + raise ValueError(f"There are {metadata.num_columns} in the file, but we expect at least three. " + f"Please review comments for expected column configurations and try again.") + if is_number(data_df.columns[0]): # no column headers if metadata.num_columns == 3: print(f"File doesn't seem to have headers, and with {metadata.num_columns} columns, " "we are assuming 'FromNodeID', 'ToNodeID', 'Rate' column configuration.") @@ -122,6 +119,7 @@ def GetSummaryData(metadata): f"obvious what the column configuration should be. If you are trying to create a " f"VECTOR_MIGRATION_BY_GENETICS file, please add headers as shown in the comments.") else: # has headers, force user to use one of the three formats + headers = data_df.columns if 'FromNodeID' not in headers[0] or 'ToNodeID' not in headers[1]: raise ValueError(f"With headers, we expect first two column headers to be 'FromNodeID', 'ToNodeID', but " f"they are {headers[0]} and {headers[1]}.") @@ -173,10 +171,11 @@ def GetSummaryData(metadata): # return metadata + # ----------------------------------------------------------------------------- # WriteBinFileGender # ----------------------------------------------------------------------------- -def WriteBinFile(metadata): +def write_bin_file(metadata): bin_file = open(metadata.filename_out, 'wb') data_df = metadata.data_df from_node_id_list = data_df[data_df.columns[0]].unique().tolist() @@ -206,30 +205,23 @@ def WriteBinFile(metadata): bin_file.close() - if __name__ == "__main__": if not (len(sys.argv) == 2 or len(sys.argv) == 3): - ShowUsage() + print('\nUsage: %s [input-migration-csv] [idreference(optional)]' % os.path.basename(sys.argv[0])) exit(0) filename_in = sys.argv[1] - id_ref = "temp_ref_id" + id_ref = "temp_id_reference" if len(sys.argv) == 3: - id_ref = sys.argv[1] + id_ref = sys.argv[2] meta_data = MetaData() meta_data.ref_id = id_ref meta_data.data_df = pd.read_csv(filename_in) - headers = meta_data.data_df.columns.tolist() - if is_number(headers[0]): # no headers - meta_data.data_df = pd.read_csv(filename_in, header=None) - else: - meta_data.has_headers = True # False by default meta_data.filename_out = filename_in.split(".")[0] + ".bin" - - GetSummaryData(meta_data) - WriteBinFile(meta_data) - WriteMetadataFile(meta_data) + get_summary_data(meta_data) + write_bin_file(meta_data) + write_metadata_file(meta_data) print(f"max_destinations_per_node = {meta_data.max_destinations_per_node}") print(f"Finished converting {filename_in} to {meta_data.filename_out} and +.json metadata file.") diff --git a/emodpy_malaria/migration/convert_json_to_bin.py b/emodpy_malaria/migration/convert_json_to_bin.py index d6688cf..1f51559 100644 --- a/emodpy_malaria/migration/convert_json_to_bin.py +++ b/emodpy_malaria/migration/convert_json_to_bin.py @@ -1,6 +1,6 @@ # convert_json_to_bin.py # ----------------------------------------------------------------------------- -# This script converts a JSON formated txt file to an EMOD binary-formatted migration file. +# This script converts a JSON formatted txt file to an EMOD binary-formatted migration file. # It also creates the required metadata file. # # The JSON file allows the user to specify different rates for different ages @@ -12,11 +12,18 @@ # section has one Node Data section for each node that individuals can migrate # from. Each Node Data section has one chunk of data # [1-unint32_t (4-bytes) plus 1-double (8-bytes)] -# for each destination where each Node Data section has DestinationsPerNode chuncks. +# for each destination where each Node Data section has DestinationsPerNode chunks. # In other words, each Node Data section is 12-bytes times DestinationsPerNode # ----------------------------------------------------------------------------- -import sys, os, json, collections, struct, datetime + +import collections +import datetime +import json +import os +import struct +import sys +from enum import Enum # ----------------------------------------------------------------------------- # Age Limits @@ -24,191 +31,192 @@ AGE_Min = 0.0 AGE_Max = 125.0 + # ----------------------------------------------------------------------------- # CheckAge # ----------------------------------------------------------------------------- -def CheckAge( age ): - if( age < AGE_Min ): +def check_age(age): + if age < AGE_Min: print(f"Invalid age={age} < {AGE_Min}") exit(-1) - if( age > AGE_Max ): + if age > AGE_Max: print(f"Invalid age={age} > {AGE_Max}") exit(-1) + # ----------------------------------------------------------------------------- # CheckAgeArray # ----------------------------------------------------------------------------- -def CheckAgeArray( ages_years ): +def check_ages_array(ages_years): errmsg = JSON_AgesYears + " must be an array of ages in years and in increasing order." - if( len( ages_years ) == 0 ): + if len(ages_years) == 0: print(errmsg) exit(-1) prev = 0.0 for age in ages_years: - CheckAge( age ) - if( age < prev ): + check_age(age) + if age < prev: print(errmsg) exit(-1) prev = age + # ----------------------------------------------------------------------------- -# GenderDataTypes +# Enum Types # ----------------------------------------------------------------------------- -GDT_SAME_FOR_BOTH_GENDERS = "SAME_FOR_BOTH_GENDERS" -GDT_ONE_FOR_EACH_GENDER = "ONE_FOR_EACH_GENDER" +class GenderDataType(Enum): + SAME_FOR_BOTH_GENDERS = "SAME_FOR_BOTH_GENDERS" + ONE_FOR_EACH_GENDER = "ONE_FOR_EACH_GENDER" + VECTOR_MIGRATION_BY_GENETICS = "VECTOR_MIGRATION_BY_GENETICS" + + +class InterpolationTypes(Enum): + LINEAR_INTERPOLATION = "LINEAR_INTERPOLATION" + PIECEWISE_CONSTANT = "PIECEWISE_CONSTANT" + + +class MigrationTypes(Enum): + LOCAL_MIGRATION = "LOCAL_MIGRATION" + AIR_MIGRATION = "AIR_MIGRATION" + REGIONAL_MIGRATION = "REGIONAL_MIGRATION" + SEA_MIGRATION = "SEA_MIGRATION" -GenderDataTypes = [] -GenderDataTypes.append( GDT_SAME_FOR_BOTH_GENDERS ) -GenderDataTypes.append( GDT_ONE_FOR_EACH_GENDER ) # ----------------------------------------------------------------------------- # CheckGenderDataType # ----------------------------------------------------------------------------- -def CheckGenderDataType( gdt ): - found = False - for type in GenderDataTypes: - found |= (type == gdt) - - if( not found ): - print(f"Invalid GenderDataType = {gdt}") +def check_gender_data_type(gdt): + if gdt not in GenderDataType: + print(f"Invalid GenderDataType = {gdt}, valid GenderDataTypes are: " + f"{GenderDataType.SAME_FOR_BOTH_GENDERS}, {GenderDataType.ONE_FOR_EACH_GENDER}, " + f"{GenderDataType.VECTOR_MIGRATION_BY_GENETICS} (only for vector migration).") exit(-1) -# ----------------------------------------------------------------------------- -# InterpolationTypes -# ----------------------------------------------------------------------------- -InterpolationTypes = [] -InterpolationTypes.append( "LINEAR_INTERPOLATION" ) -InterpolationTypes.append( "PIECEWISE_CONSTANT" ) # ----------------------------------------------------------------------------- # CheckInterpolationType # ----------------------------------------------------------------------------- -def CheckInterpolationType( interp_type ): - found = False - for type in InterpolationTypes: - found |= (type == interp_type) - - if( not found ): - print(f"Invalid InterpolationType = {interp_type}") +def check_interpolation_type(interp_type): + if interp_type not in InterpolationTypes: + print(f"Invalid InterpolationType = {interp_type}, valid InterpolationTypes are: " + f"{InterpolationTypes.LINEAR_INTERPOLATION}, {InterpolationTypes.PIECEWISE_CONSTANT}.") exit(-1) -# ----------------------------------------------------------------------------- -# MigrationTypes -# ----------------------------------------------------------------------------- -MigrationTypes = [] -MigrationTypes.append( "LOCAL_MIGRATION" ) -MigrationTypes.append( "AIR_MIGRATION" ) -MigrationTypes.append( "REGIONAL_MIGRATION" ) -MigrationTypes.append( "SEA_MIGRATION" ) # ----------------------------------------------------------------------------- # CheckMigrationType # ----------------------------------------------------------------------------- -def CheckMigrationType( mig_type ): - found = False - for type in MigrationTypes: - found |= (type == mig_type) - - if( not found ): - print(f"Invalid MigrationType = {mig_type}") +def check_migration_type(mig_type): + if mig_type not in MigrationTypes: + print(f"Invalid MigrationType = {mig_type}, valid MigrationTypes are: " + f"{MigrationTypes.LOCAL_MIGRATION}, {MigrationTypes.REGIONAL_MIGRATION}," + f"{MigrationTypes.SEA_MIGRATION}, {MigrationTypes.AIR_MIGRATION}.") exit(-1) + # ----------------------------------------------------------------------------- # JSON Element Names # ----------------------------------------------------------------------------- # NOTE: The indention below indicates where the tag is used in the JSON -JSON_IdRef = "IdReference" -JSON_InterpType = "Interpolation_Type" +JSON_IdRef = "IdReference" +JSON_InterpType = "Interpolation_Type" JSON_GenderDataType = "Gender_Data_Type" -JSON_AgesYears = "Ages_Years" -JSON_NodeData = "Node_Data" -JSON_ND_FromNodeId = "From_Node_ID" -JSON_ND_RateData = "Rate_Data" -JSON_RD_ToNodeId = "To_Node_ID" -JSON_RD_RatesBoth = "Avg_Num_Trips_Per_Day_Both" -JSON_RD_RatesMale = "Avg_Num_Trips_Per_Day_Male" -JSON_RD_RatesFemale = "Avg_Num_Trips_Per_Day_Female" +JSON_AgesYears = "Ages_Years" +JSON_NodeData = "Node_Data" +JSON_ND_FromNodeId = "From_Node_ID" +JSON_ND_RateData = "Rate_Data" +JSON_RD_ToNodeId = "To_Node_ID" +JSON_RD_RatesBoth = "Avg_Num_Trips_Per_Day_Both" +JSON_RD_RatesMale = "Avg_Num_Trips_Per_Day_Male" +JSON_RD_RatesFemale = "Avg_Num_Trips_Per_Day_Female" + # ----------------------------------------------------------------------------- # CheckInJson # ----------------------------------------------------------------------------- -def CheckInJson( fn, data, key ): - if( not key in data ): +def check_in_json(fn, data, key): + if key not in data: print(f"Could not find {key} in file {fn}.") exit(-1) + # ----------------------------------------------------------------------------- # CheckRatesSize # ----------------------------------------------------------------------------- -def CheckRatesSize( num_ages, rd_data, key ): - if( len( rd_data[ key ] ) != num_ages ): - print (f"{JSON_AgesYears} has {num_ages} values and one of the {key} has {len( rd_data[ key ] )} values. They must have the same number.") +def check_rates_size(num_ages, rd_data, key): + if len(rd_data[key]) != num_ages: + print( + f"{JSON_AgesYears} has {num_ages} values and one of the {key} has {len(rd_data[key])} values. " + f" They must have the same number.") exit(-1) + # ----------------------------------------------------------------------------- # ReadJson # ----------------------------------------------------------------------------- -def ReadJson( json_fn ): - json_file = open( json_fn,'r') - json_data = json.load( json_file ) +def read_json(json_fn): + json_file = open(json_fn, 'r') + json_data = json.load(json_file) json_file.close() - CheckInJson( json_fn, json_data, JSON_IdRef ) - CheckInJson( json_fn, json_data, JSON_InterpType ) - CheckInJson( json_fn, json_data, JSON_GenderDataType ) - CheckInJson( json_fn, json_data, JSON_AgesYears ) - CheckInJson( json_fn, json_data, JSON_NodeData ) + check_in_json(json_fn, json_data, JSON_IdRef) + check_in_json(json_fn, json_data, JSON_InterpType) + check_in_json(json_fn, json_data, JSON_GenderDataType) + check_in_json(json_fn, json_data, JSON_AgesYears) + check_in_json(json_fn, json_data, JSON_NodeData) - CheckInterpolationType( json_data[ JSON_InterpType ] ) - CheckGenderDataType( json_data[ JSON_GenderDataType ] ) - CheckAgeArray( json_data[ JSON_AgesYears ] ) + check_interpolation_type(json_data[JSON_InterpType]) + check_gender_data_type(json_data[JSON_GenderDataType]) + check_ages_array(json_data[JSON_AgesYears]) - if( len( json_data[ JSON_NodeData ] ) == 0 ): + if len(json_data[JSON_NodeData]) == 0: print(f"{JSON_NodeData} has no elements so there would be no migration data.") exit(-1) - num_ages = len( json_data[ JSON_AgesYears ] ) + num_ages = len(json_data[JSON_AgesYears]) - for nd_data in json_data[ JSON_NodeData ]: - CheckInJson( json_fn, nd_data, JSON_ND_FromNodeId ) - CheckInJson( json_fn, nd_data, JSON_ND_RateData ) + for nd_data in json_data[JSON_NodeData]: + check_in_json(json_fn, nd_data, JSON_ND_FromNodeId) + check_in_json(json_fn, nd_data, JSON_ND_RateData) - if( len( nd_data[ JSON_ND_RateData ] ) == 0 ): + if len(nd_data[JSON_ND_RateData]) == 0: print(f"{JSON_ND_RateData} has no elements so there would be no migration data.") exit(-1) - for rd_data in nd_data[ JSON_ND_RateData ]: - CheckInJson( json_fn, rd_data, JSON_RD_ToNodeId ) + for rd_data in nd_data[JSON_ND_RateData]: + check_in_json(json_fn, rd_data, JSON_RD_ToNodeId) - if( json_data[ JSON_GenderDataType ] == GDT_ONE_FOR_EACH_GENDER ): - CheckInJson( json_fn, rd_data, JSON_RD_RatesMale ) - CheckInJson( json_fn, rd_data, JSON_RD_RatesFemale ) + if json_data[JSON_GenderDataType] == GenderDataType.ONE_FOR_EACH_GENDER.value: + check_in_json(json_fn, rd_data, JSON_RD_RatesMale) + check_in_json(json_fn, rd_data, JSON_RD_RatesFemale) - CheckRatesSize( num_ages, rd_data, JSON_RD_RatesMale ) - CheckRatesSize( num_ages, rd_data, JSON_RD_RatesFemale ) + check_rates_size(num_ages, rd_data, JSON_RD_RatesMale) + check_rates_size(num_ages, rd_data, JSON_RD_RatesFemale) else: - CheckInJson( json_fn, rd_data, JSON_RD_RatesBoth ) + check_in_json(json_fn, rd_data, JSON_RD_RatesBoth) - CheckRatesSize( num_ages, rd_data, JSON_RD_RatesBoth ) + check_rates_size(num_ages, rd_data, JSON_RD_RatesBoth) return json_data + # ----------------------------------------------------------------------------- # SummaryData # ----------------------------------------------------------------------------- class SummaryData: - def __init__( self, nodeCount, offsetStr, maxDestinations ): - self.num_nodes = nodeCount - self.offset_str = offsetStr - self.max_destinations_per_node = maxDestinations + def __init__(self, node_count, offset_str, max_destinations_per_node): + self.num_nodes = node_count + self.offset_str = offset_str + self.max_destinations_per_node = max_destinations_per_node + # ----------------------------------------------------------------------------- # GetSummaryData # ----------------------------------------------------------------------------- -def GetSummaryData( json_data ): +def get_summary_data(json_data): from_node_id_list = [] # ------------------------------------------------------------------------- @@ -217,10 +225,10 @@ def GetSummaryData( json_data ): # This max is used in determine the layout of the binary data. # ------------------------------------------------------------------------- max_destinations = 0 - for node_data in json_data[ JSON_NodeData ]: - from_node_id_list.append( int( node_data[ JSON_ND_FromNodeId ] ) ) - destinations = len( node_data[ JSON_ND_RateData ] ) - if( destinations > max_destinations ): + for node_data in json_data[JSON_NodeData]: + from_node_id_list.append(int(node_data[JSON_ND_FromNodeId])) + destinations = len(node_data[JSON_ND_RateData]) + if destinations > max_destinations: max_destinations = destinations print(f"max_destinations = {max_destinations}") @@ -234,98 +242,99 @@ def GetSummaryData( json_data ): for from_node_id in from_node_id_list: offset_str += '%0.8X' % from_node_id - offset_str += '%0.8X' % (nodecount * max_destinations * 12) # 12 -> sizeof(uint32_t) + sizeof(double) + offset_str += '%0.8X' % (nodecount * max_destinations * 12) # 12 -> sizeof(uint32_t) + sizeof(double) nodecount += 1 - summary = SummaryData( nodecount, offset_str, max_destinations ) + return SummaryData(nodecount, offset_str, max_destinations) - return summary # ----------------------------------------------------------------------------- # WriteBinFile # ----------------------------------------------------------------------------- -def WriteBinFile( bin_fn, json_data, summary ): - bin_file = open( bin_fn, 'wb' ) - - if( json_data[ JSON_GenderDataType ] == GDT_ONE_FOR_EACH_GENDER ): - WriteBinFileGender( bin_file, json_data, summary, JSON_RD_RatesMale ) - WriteBinFileGender( bin_file, json_data, summary, JSON_RD_RatesFemale ) +def write_bin_file(bin_fn, json_data, summary): + bin_file = open(bin_fn, 'wb') + + if json_data[JSON_GenderDataType] == GenderDataType.ONE_FOR_EACH_GENDER.value: + write_bin_file_gender(bin_file, json_data, summary, JSON_RD_RatesMale) + write_bin_file_gender(bin_file, json_data, summary, JSON_RD_RatesFemale) else: - WriteBinFileGender( bin_file, json_data, summary, JSON_RD_RatesBoth ) + write_bin_file_gender(bin_file, json_data, summary, JSON_RD_RatesBoth) bin_file.close() + # ----------------------------------------------------------------------------- # WriteBinFileGender # ----------------------------------------------------------------------------- -def WriteBinFileGender( bin_file, json_data, summary, rates_key ): - for age_index in range( len( json_data[ JSON_AgesYears ] ) ): - for node_data in json_data[ JSON_NodeData ]: +def write_bin_file_gender(bin_file, json_data, summary, rates_key): + for age_index in range(len(json_data[JSON_AgesYears])): + for node_data in json_data[JSON_NodeData]: array_id = [] array_rt = [] - + # Initialize with zeros - for i in range( summary.max_destinations_per_node ): + for i in range(summary.max_destinations_per_node): array_id.append(0) array_rt.append(0) # Populate arrays with data index = 0 - for rate_data in node_data[ JSON_ND_RateData ] : - array_id[ index ] = int( rate_data[ JSON_RD_ToNodeId ] ) - array_rt[ index ] = rate_data[ rates_key ][ age_index ] + for rate_data in node_data[JSON_ND_RateData]: + array_id[index] = int(rate_data[JSON_RD_ToNodeId]) + array_rt[index] = rate_data[rates_key][age_index] index += 1 # Format data into binary - bin_data_id = struct.pack( 'I'*len(array_id), *array_id ) - bin_data_rt = struct.pack( 'd'*len(array_rt), *array_rt ) + bin_data_id = struct.pack('I' * len(array_id), *array_id) + bin_data_rt = struct.pack('d' * len(array_rt), *array_rt) + + bin_file.write(bin_data_id) + bin_file.write(bin_data_rt) - bin_file.write( bin_data_id ) - bin_file.write( bin_data_rt ) # ----------------------------------------------------------------------------- # WriteMetadataFile # ----------------------------------------------------------------------------- -def WriteMetadataFile( metadata_fn, mig_type, json_data, rate_data ): +def write_metadata_file(metadata_fn, mig_type, json_data, rate_data): output_json = collections.OrderedDict([]) output_json["Metadata"] = {} - output_json["Metadata"]["IdReference" ] = json_data[ JSON_IdRef ] - output_json["Metadata"]["DateCreated" ] = datetime.datetime.now().ctime() - output_json["Metadata"]["Tool" ] = os.path.basename(sys.argv[0]) - output_json["Metadata"]["DatavalueCount" ] = rate_data.max_destinations_per_node - output_json["Metadata"]["MigrationType" ] = mig_type - output_json["Metadata"]["GenderDataType" ] = json_data[ JSON_GenderDataType ] - output_json["Metadata"]["InterpolationType" ] = json_data[ JSON_InterpType ] - output_json["Metadata"]["AgesYears" ] = json_data[ JSON_AgesYears ] - output_json["Metadata"]["NodeCount" ] = rate_data.num_nodes + output_json["Metadata"]["IdReference"] = json_data[JSON_IdRef] + output_json["Metadata"]["DateCreated"] = datetime.datetime.now().ctime() + output_json["Metadata"]["Tool"] = os.path.basename(sys.argv[0]) + output_json["Metadata"]["DatavalueCount"] = rate_data.max_destinations_per_node + output_json["Metadata"]["MigrationType"] = mig_type + output_json["Metadata"]["GenderDataType"] = json_data[JSON_GenderDataType] + output_json["Metadata"]["InterpolationType"] = json_data[JSON_InterpType] + output_json["Metadata"]["AgesYears"] = json_data[JSON_AgesYears] + output_json["Metadata"]["NodeCount"] = rate_data.num_nodes output_json["NodeOffsets"] = rate_data.offset_str - with open( metadata_fn, 'w') as file: - json.dump( output_json, file, indent=4 ) + with open(metadata_fn, 'w') as file: + json.dump(output_json, file, indent=4) + # ----------------------------------------------------------------------------- # Main # ----------------------------------------------------------------------------- if __name__ == "__main__": if len(sys.argv) != 4: - print ("\nUsage: %s [input-json] [output-bin] [migration-type]" % os.path.basename(sys.argv[0])) + print("\nUsage: %s [input-json] [output-bin] [migration-type]" % os.path.basename(sys.argv[0])) exit(0) - json_fn = sys.argv[1] - bin_fn = sys.argv[2] + json_fn = sys.argv[1] + bin_fn = sys.argv[2] mig_type = sys.argv[3] metadata_fn = bin_fn + ".json" - CheckMigrationType( mig_type ) + check_migration_type(mig_type) - json_data = ReadJson( json_fn ) + json_data = read_json(json_fn) - summary = GetSummaryData( json_data ) + summary = get_summary_data(json_data) - WriteBinFile( bin_fn, json_data, summary ) - WriteMetadataFile( metadata_fn, mig_type, json_data, summary ) + write_bin_file(bin_fn, json_data, summary) + write_metadata_file(metadata_fn, mig_type, json_data, summary) print(f"Finished converting {json_fn} to {bin_fn} and {metadata_fn}") - diff --git a/emodpy_malaria/migration/convert_txt_to_bin.py b/emodpy_malaria/migration/convert_txt_to_bin.py index cd0344a..049a9ea 100644 --- a/emodpy_malaria/migration/convert_txt_to_bin.py +++ b/emodpy_malaria/migration/convert_txt_to_bin.py @@ -1,11 +1,12 @@ # convert_txt_to_bin.py # ----------------------------------------------------------------------------- -# This script converts a CSV formated txt file to an EMOD binary-formatted migration file. +# This script converts a CSV formatted txt file to an EMOD binary-formatted migration file. +# This script converts a CSV formatted txt file to an EMOD binary-formatted migration file. # It also creates the required metadata file. # # The CSV file has three columns # From_Node_ID, To_Node_ID, Rate (Average # of Trips Per Day) -# where the node ID's are the external ID's found in the demographics file. +# where the node IDs are the external IDs found in the demographics file. # Each node ID in the migration file must exist in the demographics file. # One can have node ID's in the demographics that don't exist in the migration file. # @@ -15,43 +16,50 @@ # per node. # ----------------------------------------------------------------------------- -import sys, os, json, collections, struct, datetime +import collections +import datetime +import json +import os +import struct +import sys +from enum import Enum -MigrationType = [] -MigrationType.append( "LOCAL_MIGRATION" ) -MigrationType.append( "AIR_MIGRATION" ) -MigrationType.append( "REGIONAL_MIGRATION" ) -MigrationType.append( "SEA_MIGRATION" ) +class MigrationTypes(Enum): + LOCAL_MIGRATION = "LOCAL_MIGRATION" + AIR_MIGRATION = "AIR_MIGRATION" + REGIONAL_MIGRATION = "REGIONAL_MIGRATION" + SEA_MIGRATION = "SEA_MIGRATION" + + +def show_usage(): + print('\nUsage: %s [input-migration-csv] [output-bin] [migration-type] ' + '[idreference]' % os.path.basename(sys.argv[0])) -def ShowUsage(): - print ('\nUsage: %s [input-migration-csv] [output-bin] [migration-type] [idreference]' % os.path.basename(sys.argv[0])) if __name__ == "__main__": if len(sys.argv) != 5: - ShowUsage() + show_usage() exit(0) - filename = sys.argv[1] + filename = sys.argv[1] outfilename = sys.argv[2] - mig_type = sys.argv[3] - id_ref = sys.argv[4] + mig_type = sys.argv[3] + id_ref = sys.argv[4] - mig_type_found = False - for mig in MigrationType: - mig_type_found |= (mig == mig_type) - - if( not mig_type_found ): - print ("Invalid MigrationType = " + mig_type) + if mig_type not in MigrationTypes: + print(f"Invalid MigrationType = {mig_type}, valid MigrationTypes are: " + f"{MigrationTypes.LOCAL_MIGRATION}, {MigrationTypes.REGIONAL_MIGRATION}," + f"{MigrationTypes.SEA_MIGRATION}, {MigrationTypes.AIR_MIGRATION}.") exit(-1) max_destinations_per_node = 0 destinations_per_node = 0 - fopen=open(filename) - fout=open(outfilename,'wb') - net={} - net_rate={} + fopen = open(filename) + fout = open(outfilename, 'wb') + net = {} + net_rate = {} # ---------------------------- # collect data from CSV file @@ -59,20 +67,20 @@ def ShowUsage(): node_id_list = [] prev_id = -1 for line in fopen: - s=line.strip().split(',') - ID1=int(float(s[0])) - ID2=int(float(s[1])) - rate=float(s[2]) + s = line.strip().split(',') + ID1 = int(float(s[0])) + ID2 = int(float(s[1])) + rate = float(s[2]) if ID1 not in net: - net[ID1]=[] - net_rate[ID1]=[] + net[ID1] = [] + net_rate[ID1] = [] net[ID1].append(ID2) net_rate[ID1].append(rate) if prev_id != ID1: - if( destinations_per_node > max_destinations_per_node ): + if destinations_per_node > max_destinations_per_node: max_destinations_per_node = destinations_per_node node_id_list.append(ID1) - print (prev_id, max_destinations_per_node) + print(prev_id, max_destinations_per_node) prev_id = ID1 destinations_per_node = 0 destinations_per_node += 1 @@ -81,17 +89,17 @@ def ShowUsage(): # Write bin file # --------------- for ID in net: - ID_write=[] - ID_rate_write=[] + ID_write = [] + ID_rate_write = [] for i in range(max_destinations_per_node): ID_write.append(0) ID_rate_write.append(0) for i in range(len(net[ID])): - ID_write[i]=net[ID][i] - ID_rate_write[i]=net_rate[ID][i] - #The type needs to be 'I' because Linux handles 'L' differently than Windows. - s_write=struct.pack('I'*len(ID_write), *ID_write) - s_rate_write=struct.pack('d'*len(ID_rate_write),*ID_rate_write) + ID_write[i] = net[ID][i] + ID_rate_write[i] = net_rate[ID][i] + # The type needs to be 'I' because Linux handles 'L' differently than Windows. + s_write = struct.pack('I' * len(ID_write), *ID_write) + s_rate_write = struct.pack('d' * len(ID_rate_write), *ID_rate_write) fout.write(s_write) fout.write(s_rate_write) @@ -107,7 +115,7 @@ def ShowUsage(): for ID in net: offset_str += '%0.8X' % ID - offset_str += '%0.8X' % (nodecount * max_destinations_per_node * 12) # 12 -> sizeof(uint32_t) + sizeof(double) + offset_str += '%0.8X' % (nodecount * max_destinations_per_node * 12) # 12 -> sizeof(uint32_t) + sizeof(double) nodecount += 1 # ------------------- @@ -115,21 +123,19 @@ def ShowUsage(): # ------------------- migjson = collections.OrderedDict([]) migjson['Metadata'] = {} - + if os.name == "nt": migjson['Metadata']['Author'] = os.environ['USERNAME'] else: migjson['Metadata']['Author'] = os.environ['USER'] - - migjson['Metadata']['NodeCount' ] = len(node_id_list) - migjson['Metadata']['IdReference' ] = id_ref - migjson['Metadata']['DateCreated' ] = datetime.datetime.now().ctime() - migjson['Metadata']['Tool' ] = os.path.basename(sys.argv[0]) - migjson['Metadata']['DatavalueCount' ] = max_destinations_per_node - migjson['Metadata']['MigrationType' ] = mig_type + + migjson['Metadata']['NodeCount'] = len(node_id_list) + migjson['Metadata']['IdReference'] = id_ref + migjson['Metadata']['DateCreated'] = datetime.datetime.now().ctime() + migjson['Metadata']['Tool'] = os.path.basename(sys.argv[0]) + migjson['Metadata']['DatavalueCount'] = max_destinations_per_node + migjson['Metadata']['MigrationType'] = mig_type migjson['NodeOffsets'] = offset_str - with open(outfilename+".json", 'w') as file: + with open(outfilename + ".json", 'w') as file: json.dump(migjson, file, indent=4) - -