Merge pull request #395 from EnergySystemsModellingLab/keep-input-dat…

…a-ordered Keep (some) input data ordered in MUSE2
EnergySystemsModellingLab · Feb 26, 2025 · 7733c8d · 7733c8d
2 parents 0c12284 + 4dd7e1b
commit 7733c8d
Show file tree

Hide file tree

Showing 17 changed files with 107 additions and 86 deletions.
diff --git a/src/agent.rs b/src/agent.rs
@@ -4,12 +4,16 @@ use crate::commodity::Commodity;
 use crate::process::Process;
 use crate::region::RegionSelection;
 use crate::time_slice::TimeSliceID;
+use indexmap::IndexMap;
 use serde::Deserialize;
 use serde_string_enum::DeserializeLabeledStringEnum;
 use std::collections::HashSet;
 use std::ops::RangeInclusive;
 use std::rc::Rc;
 
+/// A map of [`Agent`]s, keyed by agent ID
+pub type AgentMap = IndexMap<Rc<str>, Agent>;
+
 /// An agent in the simulation
 #[derive(Debug, Clone, PartialEq)]
 pub struct Agent {

diff --git a/src/commodity.rs b/src/commodity.rs
@@ -1,11 +1,15 @@
 #![allow(missing_docs)]
 use crate::input::*;
 use crate::time_slice::{TimeSliceID, TimeSliceLevel};
+use indexmap::IndexMap;
 use serde::Deserialize;
 use serde_string_enum::DeserializeLabeledStringEnum;
 use std::collections::HashMap;
 use std::rc::Rc;
 
+/// A map of [`Commodity`]s, keyed by commodity ID
+pub type CommodityMap = IndexMap<Rc<str>, Rc<Commodity>>;
+
 /// A commodity within the simulation. Represents a substance (e.g. CO2) or form of energy (e.g.
 /// electricity) that can be produced and/or consumed by technologies in the model.
 #[derive(PartialEq, Debug, Deserialize)]

diff --git a/src/input.rs b/src/input.rs
@@ -3,6 +3,7 @@ use crate::agent::AssetPool;
 use crate::model::{Model, ModelFile};
 use anyhow::{ensure, Context, Result};
 use float_cmp::approx_eq;
+use indexmap::IndexMap;
 use itertools::Itertools;
 use serde::de::{Deserialize, DeserializeOwned, Deserializer};
 use std::collections::{HashMap, HashSet};
@@ -115,16 +116,19 @@ impl IDCollection for HashSet<Rc<str>> {
     }
 }
 
-/// Read a CSV file of items with IDs
-pub fn read_csv_id_file<T>(file_path: &Path) -> Result<HashMap<Rc<str>, T>>
+/// Read a CSV file of items with IDs.
+///
+/// As this function is only ever used for top-level CSV files (i.e. the ones which actually define
+/// the IDs for a given type), we use an ordered map to maintain the order in the input files.
+pub fn read_csv_id_file<T>(file_path: &Path) -> Result<IndexMap<Rc<str>, T>>
 where
     T: HasID + DeserializeOwned,
 {
-    fn fill_and_validate_map<T>(file_path: &Path) -> Result<HashMap<Rc<str>, T>>
+    fn fill_and_validate_map<T>(file_path: &Path) -> Result<IndexMap<Rc<str>, T>>
     where
         T: HasID + DeserializeOwned,
     {
-        let mut map = HashMap::new();
+        let mut map = IndexMap::new();
         for record in read_csv::<T>(file_path)? {
             let id = record.get_id();
 

diff --git a/src/input/agent.rs b/src/input/agent.rs
@@ -1,12 +1,12 @@
 //! Code for reading in agent-related data from CSV files.
 use super::*;
-use crate::agent::{Agent, DecisionRule, SearchSpace};
-use crate::commodity::Commodity;
-use crate::process::Process;
+use crate::agent::{Agent, AgentMap, DecisionRule, SearchSpace};
+use crate::commodity::CommodityMap;
+use crate::process::ProcessMap;
 use crate::region::RegionSelection;
 use anyhow::{ensure, Context, Result};
 use serde::Deserialize;
-use std::collections::{HashMap, HashSet};
+use std::collections::HashSet;
 use std::path::Path;
 use std::rc::Rc;
 
@@ -54,10 +54,10 @@ struct AgentRaw {
 /// A map of Agents, with the agent ID as the key
 pub fn read_agents(
     model_dir: &Path,
-    commodities: &HashMap<Rc<str>, Rc<Commodity>>,
-    processes: &HashMap<Rc<str>, Rc<Process>>,
+    commodities: &CommodityMap,
+    processes: &ProcessMap,
     region_ids: &HashSet<Rc<str>>,
-) -> Result<HashMap<Rc<str>, Agent>> {
+) -> Result<AgentMap> {
     let process_ids = processes.keys().cloned().collect();
     let mut agents = read_agents_file(model_dir, commodities, &process_ids)?;
     let agent_ids = agents.keys().cloned().collect();
@@ -86,9 +86,9 @@ pub fn read_agents(
 /// A map of Agents, with the agent ID as the key
 pub fn read_agents_file(
     model_dir: &Path,
-    commodities: &HashMap<Rc<str>, Rc<Commodity>>,
+    commodities: &CommodityMap,
     process_ids: &HashSet<Rc<str>>,
-) -> Result<HashMap<Rc<str>, Agent>> {
+) -> Result<AgentMap> {
     let file_path = model_dir.join(AGENT_FILE_NAME);
     let agents_csv = read_csv(&file_path)?;
     read_agents_file_from_iter(agents_csv, commodities, process_ids)
@@ -98,13 +98,13 @@ pub fn read_agents_file(
 /// Read agents info from an iterator.
 fn read_agents_file_from_iter<I>(
     iter: I,
-    commodities: &HashMap<Rc<str>, Rc<Commodity>>,
+    commodities: &CommodityMap,
     process_ids: &HashSet<Rc<str>>,
-) -> Result<HashMap<Rc<str>, Agent>>
+) -> Result<AgentMap>
 where
     I: Iterator<Item = AgentRaw>,
 {
-    let mut agents = HashMap::new();
+    let mut agents = AgentMap::new();
     for agent_raw in iter {
         let commodity = commodities
             .get(agent_raw.commodity_id.as_str())
@@ -149,7 +149,7 @@ where
 mod tests {
     use super::*;
     use crate::agent::DecisionRule;
-    use crate::commodity::{CommodityCostMap, CommodityType, DemandMap};
+    use crate::commodity::{Commodity, CommodityCostMap, CommodityType, DemandMap};
     use crate::region::RegionSelection;
     use crate::time_slice::TimeSliceLevel;
     use std::iter;
@@ -191,7 +191,7 @@ mod tests {
             regions: RegionSelection::default(),
             objectives: Vec::new(),
         };
-        let expected = HashMap::from_iter([("agent".into(), agent_out)]);
+        let expected = AgentMap::from_iter(iter::once(("agent".into(), agent_out)));
         let actual =
             read_agents_file_from_iter(iter::once(agent), &commodities, &process_ids).unwrap();
         assert_eq!(actual, expected);

diff --git a/src/input/agent/objective.rs b/src/input/agent/objective.rs
@@ -1,6 +1,6 @@
 //! Code for reading the agent objectives CSV file.
 use super::super::*;
-use crate::agent::{Agent, AgentObjective, DecisionRule};
+use crate::agent::{Agent, AgentMap, AgentObjective, DecisionRule};
 use anyhow::{ensure, Context, Result};
 use std::collections::HashMap;
 use std::path::Path;
@@ -21,7 +21,7 @@ define_id_getter! {Agent}
 /// A map of Agents, with the agent ID as the key
 pub fn read_agent_objectives(
     model_dir: &Path,
-    agents: &HashMap<Rc<str>, Agent>,
+    agents: &AgentMap,
 ) -> Result<HashMap<Rc<str>, Vec<AgentObjective>>> {
     let file_path = model_dir.join(AGENT_OBJECTIVES_FILE_NAME);
     let agent_objectives_csv = read_csv(&file_path)?;
@@ -31,7 +31,7 @@ pub fn read_agent_objectives(
 
 fn read_agent_objectives_from_iter<I>(
     iter: I,
-    agents: &HashMap<Rc<str>, Agent>,
+    agents: &AgentMap,
 ) -> Result<HashMap<Rc<str>, Vec<AgentObjective>>>
 where
     I: Iterator<Item = AgentObjective>,
@@ -164,7 +164,7 @@ mod tests {
             costs: CommodityCostMap::new(),
             demand: DemandMap::new(),
         });
-        let agents: HashMap<_, _> = [(
+        let agents = [(
             "agent".into(),
             Agent {
                 id: "agent".into(),

diff --git a/src/input/asset.rs b/src/input/asset.rs
@@ -1,11 +1,11 @@
 //! Code for reading [Asset]s from a CSV file.
 use crate::agent::Asset;
 use crate::input::*;
-use crate::process::Process;
+use crate::process::ProcessMap;
 use anyhow::{ensure, Context, Result};
 use itertools::Itertools;
 use serde::Deserialize;
-use std::collections::{HashMap, HashSet};
+use std::collections::HashSet;
 use std::path::Path;
 use std::rc::Rc;
 
@@ -35,7 +35,7 @@ struct AssetRaw {
 pub fn read_assets(
     model_dir: &Path,
     agent_ids: &HashSet<Rc<str>>,
-    processes: &HashMap<Rc<str>, Rc<Process>>,
+    processes: &ProcessMap,
     region_ids: &HashSet<Rc<str>>,
 ) -> Result<Vec<Asset>> {
     let file_path = model_dir.join(ASSETS_FILE_NAME);
@@ -59,7 +59,7 @@ pub fn read_assets(
 fn read_assets_from_iter<I>(
     iter: I,
     agent_ids: &HashSet<Rc<str>>,
-    processes: &HashMap<Rc<str>, Rc<Process>>,
+    processes: &ProcessMap,
     region_ids: &HashSet<Rc<str>>,
 ) -> Result<Vec<Asset>>
 where
@@ -92,7 +92,7 @@ where
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::process::{ProcessCapacityMap, ProcessParameter};
+    use crate::process::{Process, ProcessCapacityMap, ProcessParameter};
     use crate::region::RegionSelection;
     use itertools::assert_equal;
     use std::iter;

diff --git a/src/input/commodity.rs b/src/input/commodity.rs
@@ -1,9 +1,9 @@
 //! Code for reading in commodity-related data from CSV files.
-use crate::commodity::Commodity;
+use crate::commodity::{Commodity, CommodityMap};
 use crate::input::*;
 use crate::time_slice::TimeSliceInfo;
 use anyhow::Result;
-use std::collections::{HashMap, HashSet};
+use std::collections::HashSet;
 use std::path::Path;
 use std::rc::Rc;
 
@@ -32,7 +32,7 @@ pub fn read_commodities(
     region_ids: &HashSet<Rc<str>>,
     time_slice_info: &TimeSliceInfo,
     milestone_years: &[u32],
-) -> Result<HashMap<Rc<str>, Rc<Commodity>>> {
+) -> Result<CommodityMap> {
     let commodities = read_csv_id_file::<Commodity>(&model_dir.join(COMMODITY_FILE_NAME))?;
     let commodity_ids = commodities.keys().cloned().collect();
     let mut costs = read_commodity_costs(

diff --git a/src/input/process.rs b/src/input/process.rs
@@ -1,7 +1,7 @@
 //! Code for reading process-related information from CSV files.
-use crate::commodity::{Commodity, CommodityType};
+use crate::commodity::{Commodity, CommodityMap, CommodityType};
 use crate::input::*;
-use crate::process::{Process, ProcessCapacityMap, ProcessFlow, ProcessParameter};
+use crate::process::{Process, ProcessCapacityMap, ProcessFlow, ProcessMap, ProcessParameter};
 use crate::region::RegionSelection;
 use crate::time_slice::TimeSliceInfo;
 use anyhow::Result;
@@ -55,11 +55,11 @@ define_id_getter! {ProcessDescription}
 /// This function returns a map of processes, with the IDs as keys.
 pub fn read_processes(
     model_dir: &Path,
-    commodities: &HashMap<Rc<str>, Rc<Commodity>>,
+    commodities: &CommodityMap,
     region_ids: &HashSet<Rc<str>>,
     time_slice_info: &TimeSliceInfo,
     year_range: &RangeInclusive<u32>,
-) -> Result<HashMap<Rc<str>, Rc<Process>>> {
+) -> Result<ProcessMap> {
     let file_path = model_dir.join(PROCESSES_FILE_NAME);
     let descriptions = read_csv_id_file::<ProcessDescription>(&file_path)?;
     let process_ids = HashSet::from_iter(descriptions.keys().cloned());
@@ -83,7 +83,7 @@ pub fn read_processes(
 
 /// Perform consistency checks for commodity flows.
 fn validate_commodities(
-    commodities: &HashMap<Rc<str>, Rc<Commodity>>,
+    commodities: &CommodityMap,
     flows: &HashMap<Rc<str>, Vec<ProcessFlow>>,
 ) -> anyhow::Result<()> {
     for (commodity_id, commodity) in commodities {
@@ -128,7 +128,7 @@ fn create_process_map<I>(
     mut flows: HashMap<Rc<str>, Vec<ProcessFlow>>,
     mut parameters: HashMap<Rc<str>, ProcessParameter>,
     mut regions: HashMap<Rc<str>, RegionSelection>,
-) -> Result<HashMap<Rc<str>, Rc<Process>>>
+) -> Result<ProcessMap>
 where
     I: Iterator<Item = ProcessDescription>,
 {
@@ -159,7 +159,7 @@ where
 
             Ok((description.id, process.into()))
         })
-        .process_results(|iter| iter.collect())
+        .try_collect()
 }
 
 #[cfg(test)]
@@ -303,7 +303,7 @@ mod tests {
             demand: DemandMap::new(),
         });
 
-        let commodities: HashMap<Rc<str>, Rc<Commodity>> = [
+        let commodities: CommodityMap = [
             (Rc::clone(&commodity_sed.id), Rc::clone(&commodity_sed)),
             (
                 Rc::clone(&commodity_non_sed.id),

diff --git a/src/input/process/flow.rs b/src/input/process/flow.rs
@@ -1,6 +1,6 @@
 //! Code for reading process flows file
 use super::define_process_id_getter;
-use crate::commodity::Commodity;
+use crate::commodity::CommodityMap;
 use crate::input::*;
 use crate::process::{FlowType, ProcessFlow};
 use anyhow::{ensure, Context, Result};
@@ -30,7 +30,7 @@ define_process_id_getter! {ProcessFlowRaw}
 pub fn read_process_flows(
     model_dir: &Path,
     process_ids: &HashSet<Rc<str>>,
-    commodities: &HashMap<Rc<str>, Rc<Commodity>>,
+    commodities: &CommodityMap,
 ) -> Result<HashMap<Rc<str>, Vec<ProcessFlow>>> {
     let file_path = model_dir.join(PROCESS_FLOWS_FILE_NAME);
     let process_flow_csv = read_csv(&file_path)?;
@@ -42,7 +42,7 @@ pub fn read_process_flows(
 fn read_process_flows_from_iter<I>(
     iter: I,
     process_ids: &HashSet<Rc<str>>,
-    commodities: &HashMap<Rc<str>, Rc<Commodity>>,
+    commodities: &CommodityMap,
 ) -> Result<HashMap<Rc<str>, Vec<ProcessFlow>>>
 where
     I: Iterator<Item = ProcessFlowRaw>,
@@ -151,14 +151,14 @@ fn validate_pac_flows(flows: &HashMap<Rc<str>, Vec<ProcessFlow>>) -> Result<()>
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::commodity::{CommodityCostMap, CommodityType, DemandMap};
+    use crate::commodity::{Commodity, CommodityCostMap, CommodityType, DemandMap};
     use crate::time_slice::TimeSliceLevel;
     use std::iter;
 
     #[test]
     fn test_read_process_flows_from_iter_good() {
         let process_ids = ["id1".into(), "id2".into()].into_iter().collect();
-        let commodities: HashMap<Rc<str>, Rc<Commodity>> = ["commodity1", "commodity2"]
+        let commodities: CommodityMap = ["commodity1", "commodity2"]
             .into_iter()
             .map(|id| {
                 let commodity = Commodity {

diff --git a/src/input/region.rs b/src/input/region.rs
@@ -1,6 +1,6 @@
 //! Code for reading region-related information from CSV files.
 use super::*;
-use crate::region::{Region, RegionSelection};
+use crate::region::{Region, RegionMap, RegionSelection};
 use anyhow::{anyhow, ensure, Context, Result};
 use serde::de::DeserializeOwned;
 use std::collections::{HashMap, HashSet};
@@ -37,7 +37,7 @@ pub(crate) use define_region_id_getter;
 /// # Returns
 ///
 /// A `HashMap<Rc<str>, Region>` with the parsed regions data or an error. The keys are region IDs.
-pub fn read_regions(model_dir: &Path) -> Result<HashMap<Rc<str>, Region>> {
+pub fn read_regions(model_dir: &Path) -> Result<RegionMap> {
     read_csv_id_file(&model_dir.join(REGIONS_FILE_NAME))
 }
 
@@ -164,7 +164,7 @@ AP,Asia Pacific"
         let regions = read_regions(dir.path()).unwrap();
         assert_eq!(
             regions,
-            HashMap::from([
+            RegionMap::from([
                 (
                     "NA".into(),
                     Region {