From 29f12e25e8a74fa46fe41e3e322651bb45ce5ffa Mon Sep 17 00:00:00 2001
From: Milad Ebrahimipour <milad.ebrahimipour@rapidsilicon.com>
Date: Thu, 31 Aug 2023 01:37:05 -0400
Subject: [PATCH 1/6] Adding congestion-aware placement

---
 vpr/src/place/move_generator.h |   2 +
 vpr/src/place/place.cpp        | 340 ++++++++++++++++++++++++++-------
 vpr/src/place/place_util.cpp   |   5 +
 vpr/src/place/place_util.h     |   3 +
 4 files changed, 280 insertions(+), 70 deletions(-)
diff --git a/vpr/src/place/move_generator.h b/vpr/src/place/move_generator.h
index 378d86c8bf1..74ac8dfd00e 100644
--- a/vpr/src/place/move_generator.h
+++ b/vpr/src/place/move_generator.h
@@ -12,9 +12,11 @@ struct MoveOutcomeStats {
     float delta_cost_norm = std::numeric_limits<float>::quiet_NaN();
     float delta_bb_cost_norm = std::numeric_limits<float>::quiet_NaN();
     float delta_timing_cost_norm = std::numeric_limits<float>::quiet_NaN();
+    float delta_cong_cost_norm = std::numeric_limits<float>::quiet_NaN();
 
     float delta_bb_cost_abs = std::numeric_limits<float>::quiet_NaN();
     float delta_timing_cost_abs = std::numeric_limits<float>::quiet_NaN();
+    float delta_cong_cost_abs = std::numeric_limits<float>::quiet_NaN();
 
     e_move_result outcome = ABORTED;
     float elapsed_time = std::numeric_limits<float>::quiet_NaN();
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 1b9a6508010..b201705a69f 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -171,6 +171,11 @@ static const float cross_count[50] = {/* [0..49] */ 1.0, 1.0, 1.0, 1.0828,
                                       2.5610, 2.5864, 2.6117, 2.6371, 2.6625, 2.6887, 2.7148, 2.7410, 2.7671,
                                       2.7933};
 
+double cong_matrix[400][400];
+double cong_matrix_new[400][400];
+
+float congestion_tradeoff = 1.0;
+
 std::unique_ptr<FILE, decltype(&vtr::fclose)> f_move_stats_file(nullptr,
                                                                 vtr::fclose);
 
@@ -266,7 +271,7 @@ static void alloc_and_load_for_fast_cost_update(float place_cost_exp);
 
 static void free_fast_cost_update();
 
-static double comp_bb_cost(e_cost_methods method);
+static double comp_bb_cost(e_cost_methods method,const t_place_algorithm& place_algorithm);
 
 static void update_move_nets(int num_nets_affected);
 static void reset_move_nets(int num_nets_affected);
@@ -323,7 +328,7 @@ static bool driven_by_moved_block(const ClusterNetId net,
 
 static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks);
 
-static e_move_result assess_swap(double delta_c, double t);
+static e_move_result assess_swap(double delta_c, double t,double cong_delta_c,double cost,const t_placer_opts& placer_opts);
 
 static void get_non_updateable_bb(ClusterNetId net_id, t_bb* bb_coord_new);
 
@@ -335,7 +340,9 @@ static int find_affected_nets_and_update_costs(
     const PlacerCriticalities* criticalities,
     t_pl_blocks_to_be_moved& blocks_affected,
     double& bb_delta_c,
-    double& timing_delta_c);
+    double& timing_delta_c,
+    double& bb_cong_c,
+    const t_placer_opts& placer_opts);
 
 static void record_affected_net(const ClusterNetId net, int& num_affected_nets);
 
@@ -356,6 +363,9 @@ static void update_placement_cost_normalization_factors(t_placer_costs* costs, c
 static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts);
 
 static double get_net_cost(ClusterNetId net_id, t_bb* bb_ptr);
+static double get_cong_cost(double chan_width);
+static void get_cong_matrix(ClusterNetId net_id, t_bb* bb_ptr);
+static void update_cong_matrix(ClusterNetId net_id, t_bb* bb_ptr_old, t_bb* bb_ptr_new);
 
 static void get_bb_from_scratch(ClusterNetId net_id, t_bb* coords, t_bb* num_on_edges);
 
@@ -448,7 +458,7 @@ void try_place(const Netlist<>& net_list,
      * width of the widest channel.  Place_cost_exp says what exponent the   *
      * width should be taken to when calculating costs.  This allows a       *
      * greater bias for anisotropic architectures.                           */
-
+    
     /*
      * Currently, the functions that require is_flat as their parameter and are called during placement should
      * receive is_flat as false. For example, if the RR graph of router lookahead is built here, it should be as
@@ -574,8 +584,12 @@ void try_place(const Netlist<>& net_list,
     /* Gets initial cost and loads bounding boxes. */
 
     if (placer_opts.place_algorithm.is_timing_driven()) {
-        costs.bb_cost = comp_bb_cost(NORMAL);
+        costs.bb_cost = comp_bb_cost(NORMAL,placer_opts.place_algorithm);
 
+        if(placer_opts.place_algorithm == CONGESTION_AWARE_PLACE){
+            costs.cong_cost = get_cong_cost(placer_opts.congestion_tradeoff);
+        }
+        // costs.cong_cost_norm = 1/ costs.cong_cost;
         first_crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */
 
         num_connections = count_connections();
@@ -609,7 +623,7 @@ void try_place(const Netlist<>& net_list,
         placer_criticalities = std::make_unique<PlacerCriticalities>(
             cluster_ctx.clb_nlist, netlist_pin_lookup);
 
-        pin_timing_invalidator = make_net_pin_timing_invalidator(
+         pin_timing_invalidator = make_net_pin_timing_invalidator(
             placer_opts.timing_update_type,
             net_list,
             netlist_pin_lookup,
@@ -654,9 +668,12 @@ void try_place(const Netlist<>& net_list,
         VTR_ASSERT(placer_opts.place_algorithm == BOUNDING_BOX_PLACE);
 
         /* Total cost is the same as wirelength cost normalized*/
-        costs.bb_cost = comp_bb_cost(NORMAL);
+        costs.bb_cost = comp_bb_cost(NORMAL,placer_opts.place_algorithm);
         costs.bb_cost_norm = 1 / costs.bb_cost;
-
+        if(placer_opts.place_algorithm == CONGESTION_AWARE_PLACE){
+            costs.cong_cost = get_cong_cost(placer_opts.congestion_tradeoff);
+            costs.cong_cost_norm = 1/ costs.cong_cost;
+        }
         /* Timing cost and normalization factors are not used */
         costs.timing_cost = INVALID_COST;
         costs.timing_cost_norm = INVALID_COST;
@@ -773,20 +790,27 @@ void try_place(const Netlist<>& net_list,
     /* Set the temperature low to ensure that initial placement quality will be preserved */
     first_t = EPSILON;
 
-    t_annealing_state state(annealing_sched,
-                            first_t,
+    t_annealing_state state(annealing_sched, 
+                            first_t, 
                             first_rlim,
-                            first_move_lim,
+                            first_move_lim, 
                             first_crit_exponent,
                             device_ctx.grid.get_num_layers());
 
     /* Update the starting temperature for placement annealing to a more appropriate value */
+    if(placer_opts.place_algorithm == CONGESTION_AWARE_PLACE){
+        congestion_tradeoff = 1.0;
+    }
     state.t = starting_t(&state, &costs, annealing_sched,
                          place_delay_model.get(), placer_criticalities.get(),
                          placer_setup_slacks.get(), timing_info.get(), *move_generator,
                          *manual_move_generator, pin_timing_invalidator.get(),
                          blocks_affected, placer_opts, noc_opts, move_type_stat);
-
+                        
+    if(placer_opts.place_algorithm == CONGESTION_AWARE_PLACE){
+        congestion_tradeoff = 0.1;
+    }
+    double starting_tempreture = state.t ;
     if (!placer_opts.move_stats_file.empty()) {
         f_move_stats_file = std::unique_ptr<FILE, decltype(&vtr::fclose)>(
             vtr::fopen(placer_opts.move_stats_file.c_str(), "w"),
@@ -818,9 +842,16 @@ void try_place(const Netlist<>& net_list,
         //Table header
         VTR_LOG("\n");
         print_place_status_header();
+        bool congest_flag = false;
 
         /* Outer loop of the simulated annealing begins */
         do {
+            if(get_cong_cost(placer_opts.congestion_tradeoff)<=20 && congest_flag==false && placer_opts.place_algorithm == CONGESTION_AWARE_PLACE){
+                congest_flag = true;
+                congestion_tradeoff = 1.0;
+                state.t = starting_tempreture;
+                // state.move_lim = state.move_lim_max;
+            }
             vtr::Timer temperature_timer;
 
             outer_loop_update_timing_info(placer_opts, noc_opts, &costs, num_connections,
@@ -848,6 +879,8 @@ void try_place(const Netlist<>& net_list,
                                           agent_state, placer_opts, false, current_move_generator);
 
             //do a complete inner loop iteration
+            // VTR_LOG("cong_cost is:%5.5f\n",);
+
             placement_inner_loop(&state, placer_opts, noc_opts,
                                  inner_recompute_limit,
                                  &stats, &costs, &moves_since_cost_recompute,
@@ -1168,8 +1201,8 @@ static void placement_inner_loop(const t_annealing_state* state,
 #ifdef VERBOSE
         VTR_LOG("t = %g  cost = %g   bb_cost = %g timing_cost = %g move = %d\n",
                 state->t, costs->cost, costs->bb_cost, costs->timing_cost, inner_iter);
-        if (fabs((costs->bb_cost) - comp_bb_cost(CHECK)) > (costs->bb_cost) * ERROR_TOL)
-            VPR_ERROR(VPR_ERROR_PLACE, "bb_cost is %g, comp_bb_cost is %g\n", costs->bb_cost, comp_bb_cost(CHECK));
+        if (fabs((costs->bb_cost) - comp_bb_cost(CHECK,placer_opts.place_algorithm)) > (costs->bb_cost) * ERROR_TOL)
+            VPR_ERROR(VPR_ERROR_PLACE, "bb_cost is %g, comp_bb_cost is %g\n", costs->bb_cost, comp_bb_cost(CHECK,placer_opts.place_algorithm));
             //"fabs((*bb_cost) - comp_bb_cost(CHECK)) > (*bb_cost) * ERROR_TOL");
 #endif
 
@@ -1212,6 +1245,10 @@ static void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
                                          const PlacerCriticalities* criticalities,
                                          t_placer_costs* costs) {
     double new_bb_cost = recompute_bb_cost();
+    double new_cong_cost = 0.0;
+    if(placer_opts.place_algorithm == CONGESTION_AWARE_PLACE){
+        new_cong_cost = get_cong_cost(placer_opts.congestion_tradeoff);
+    }
     if (fabs(new_bb_cost - costs->bb_cost) > costs->bb_cost * ERROR_TOL) {
         std::string msg = vtr::string_fmt(
             "in recompute_costs_from_scratch: new_bb_cost = %g, old bb_cost = %g\n",
@@ -1219,7 +1256,7 @@ static void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
         VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
     }
     costs->bb_cost = new_bb_cost;
-
+    costs->cong_cost = new_cong_cost;
     if (placer_opts.place_algorithm.is_timing_driven()) {
         double new_timing_cost = 0.;
         comp_td_costs(delay_model, *criticalities, &new_timing_cost);
@@ -1370,6 +1407,8 @@ static void update_move_nets(int num_nets_affected) {
     /* update net cost functions and reset flags. */
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& place_move_ctx = g_placer_ctx.mutable_move();
+    auto& device_ctx = g_vpr_ctx.device();
+
 
     for (int inet_affected = 0; inet_affected < num_nets_affected;
          inet_affected++) {
@@ -1385,9 +1424,22 @@ static void update_move_nets(int num_nets_affected) {
         proposed_net_cost[net_id] = -1;
         bb_updated_before[net_id] = NOT_UPDATED_YET;
     }
+
+    for(int i=0;i<int(device_ctx.grid.width());i++){
+        for(int j=0;j<int(device_ctx.grid.height());j++){
+            cong_matrix[i][j] = cong_matrix_new[i][j];
+        }
+    }
 }
 
 static void reset_move_nets(int num_nets_affected) {
+    // VTR_LOG("in reset:\n\n");
+    auto& device_ctx = g_vpr_ctx.device();
+    for(int i=0;i<int(device_ctx.grid.width());i++){
+        for(int j=0;j<int(device_ctx.grid.height());j++){
+            cong_matrix_new[i][j] = cong_matrix[i][j];
+        }
+    }
     /* Reset the net cost function flags first. */
     for (int inet_affected = 0; inet_affected < num_nets_affected;
          inet_affected++) {
@@ -1456,6 +1508,7 @@ static e_move_result try_swap(const t_annealing_state* state,
     double delta_c = 0;        //Change in cost due to this swap.
     double bb_delta_c = 0;     //Change in the bounding box (wiring) cost.
     double timing_delta_c = 0; //Change in the timing cost (delay * criticality).
+    double cong_delta_c = 0;
 
     // Determine whether we need to force swap two router blocks
     bool router_block_move = false;
@@ -1528,8 +1581,8 @@ static e_move_result try_swap(const t_annealing_state* state,
         //delays and timing costs and store them in proposed_* data structures.
         int num_nets_affected = find_affected_nets_and_update_costs(
             place_algorithm, delay_model, criticalities, blocks_affected,
-            bb_delta_c, timing_delta_c);
-
+            bb_delta_c, timing_delta_c, cong_delta_c, placer_opts);
+        // VTR_LOG("bb_delta_c = %5.5f, timing_delta_c = %5.5f,cong_delta_c = %5.5f\n",bb_delta_c, timing_delta_c,cong_delta_c,placer_opts);
         //For setup slack analysis, we first do a timing analysis to get the newest
         //slack values resulted from the proposed block moves. If the move turns out
         //to be accepted, we keep the updated slack values and commit the block moves.
@@ -1565,6 +1618,12 @@ static e_move_result try_swap(const t_annealing_state* state,
             delta_c = (1 - timing_tradeoff) * bb_delta_c * costs->bb_cost_norm
                       + timing_tradeoff * timing_delta_c
                             * costs->timing_cost_norm;
+        } else if (place_algorithm == CONGESTION_AWARE_PLACE) {
+            /* Take delta_c as a combination of timing and wiring cost. In
+             * addition to `timing_tradeoff`, we normalize the cost values */
+            delta_c =(1 - congestion_tradeoff)*(cong_delta_c * costs->cong_cost_norm)+ ((1 - timing_tradeoff) * bb_delta_c * costs->bb_cost_norm
+                      + timing_tradeoff * timing_delta_c
+                            * costs->timing_cost_norm);
         } else {
             VTR_ASSERT_SAFE(place_algorithm == BOUNDING_BOX_PLACE);
             delta_c = bb_delta_c * costs->bb_cost_norm;
@@ -1581,7 +1640,11 @@ static e_move_result try_swap(const t_annealing_state* state,
         }
 
         /* 1 -> move accepted, 0 -> rejected. */
-        move_outcome = assess_swap(delta_c, state->t);
+        if(placer_opts.place_algorithm == CONGESTION_AWARE_PLACE){
+            move_outcome = assess_swap(delta_c, state->t, cong_delta_c, get_cong_cost(placer_opts.congestion_tradeoff), placer_opts);
+        } else{
+            move_outcome = assess_swap(delta_c, state->t, 0.0, 0.0, placer_opts);
+        }
 
         //Updates the manual_move_state members and displays costs to the user to decide whether to ACCEPT/REJECT manual move.
 #ifndef NO_GRAPHICS
@@ -1593,6 +1656,7 @@ static e_move_result try_swap(const t_annealing_state* state,
         if (move_outcome == ACCEPTED) {
             costs->cost += delta_c;
             costs->bb_cost += bb_delta_c;
+            costs->cong_cost += cong_delta_c;
 
             if (place_algorithm == SLACK_TIMING_PLACE) {
                 /* Update the timing driven cost as usual */
@@ -1603,7 +1667,7 @@ static e_move_result try_swap(const t_annealing_state* state,
                 commit_setup_slacks(setup_slacks);
             }
 
-            if (place_algorithm == CRITICALITY_TIMING_PLACE) {
+            if (place_algorithm == CRITICALITY_TIMING_PLACE || place_algorithm == CONGESTION_AWARE_PLACE) {
                 costs->timing_cost += timing_delta_c;
 
                 /* Invalidates timing of modified connections for incremental *
@@ -1671,7 +1735,7 @@ static e_move_result try_swap(const t_annealing_state* state,
                     "The current setup slacks should be identical to the values before the try swap timing info update.");
             }
 
-            if (place_algorithm == CRITICALITY_TIMING_PLACE) {
+            if (place_algorithm == CRITICALITY_TIMING_PLACE || place_algorithm == CONGESTION_AWARE_PLACE) {
                 /* Unstage the values stored in proposed_* data structures */
                 revert_td_cost(blocks_affected);
             }
@@ -1690,9 +1754,11 @@ static e_move_result try_swap(const t_annealing_state* state,
                                                 * costs->bb_cost_norm;
         move_outcome_stats.delta_timing_cost_norm = timing_delta_c
                                                     * costs->timing_cost_norm;
+        move_outcome_stats.delta_cong_cost_norm = cong_delta_c * costs->cong_cost_norm;
 
         move_outcome_stats.delta_bb_cost_abs = bb_delta_c;
         move_outcome_stats.delta_timing_cost_abs = timing_delta_c;
+        move_outcome_stats.delta_cong_cost_abs = cong_delta_c;
 
         LOG_MOVE_STATS_OUTCOME(delta_c, bb_delta_c, timing_delta_c,
                                (move_outcome ? "ACCEPTED" : "REJECTED"), "");
@@ -1755,14 +1821,24 @@ static int find_affected_nets_and_update_costs(
     const PlacerCriticalities* criticalities,
     t_pl_blocks_to_be_moved& blocks_affected,
     double& bb_delta_c,
-    double& timing_delta_c) {
+    double& timing_delta_c,
+    double& bb_cong_c,
+    const t_placer_opts& placer_opts) {    
+
     VTR_ASSERT_SAFE(bb_delta_c == 0.);
     VTR_ASSERT_SAFE(timing_delta_c == 0.);
     auto& cluster_ctx = g_vpr_ctx.clustering();
 
+    auto& place_move_ctx = g_placer_ctx.mutable_move();
+    
+
     int num_affected_nets = 0;
 
-    /* Go through all the blocks moved. */
+    double before_cost = 0.0;
+
+    if(place_algorithm == CONGESTION_AWARE_PLACE){
+        before_cost = get_cong_cost(placer_opts.congestion_tradeoff);
+    }
     for (int iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
         ClusterBlockId blk = blocks_affected.moved_blocks[iblk].block_num;
 
@@ -1782,6 +1858,7 @@ static int find_affected_nets_and_update_costs(
 
             /* Update the net bounding boxes. */
             update_net_bb(net_id, blocks_affected, iblk, blk, blk_pin);
+            
 
             if (place_algorithm.is_timing_driven()) {
                 /* Determine the change in connection delay and timing cost. */
@@ -1797,11 +1874,21 @@ static int find_affected_nets_and_update_costs(
          inet_affected++) {
         ClusterNetId net_id = ts_nets_to_update[inet_affected];
 
+        if(place_algorithm == CONGESTION_AWARE_PLACE){
+            update_cong_matrix(net_id,&place_move_ctx.bb_coords[net_id],&ts_bb_coord_new[net_id]);
+        }
+
         proposed_net_cost[net_id] = get_net_cost(net_id,
                                                  &ts_bb_coord_new[net_id]);
         bb_delta_c += proposed_net_cost[net_id] - net_cost[net_id];
     }
 
+    // bb_delta_c = get_cong_cost()-before_cost;
+
+    if(place_algorithm == CONGESTION_AWARE_PLACE){
+        bb_cong_c = get_cong_cost(placer_opts.congestion_tradeoff)-before_cost;
+    }
+    // VTR_LOG("cong cost is= %0.3f\n",bb_cong_c);
     return num_affected_nets;
 }
 
@@ -1994,10 +2081,13 @@ static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_
     if (placer_opts.place_algorithm == BOUNDING_BOX_PLACE) {
         // in bounding box mode we only care about wirelength
         total_cost = costs->bb_cost * costs->bb_cost_norm;
+    } else if (placer_opts.place_algorithm == CONGESTION_AWARE_PLACE) {
+        // in timing mode we include both wirelength and timing costs
+        total_cost =(1-congestion_tradeoff) * (costs->cong_cost * costs->cong_cost_norm) + ((1 - placer_opts.timing_tradeoff) * (costs->bb_cost * costs->bb_cost_norm) + (placer_opts.timing_tradeoff) * (costs->timing_cost * costs->timing_cost_norm));
     } else if (placer_opts.place_algorithm.is_timing_driven()) {
         // in timing mode we include both wirelength and timing costs
         total_cost = (1 - placer_opts.timing_tradeoff) * (costs->bb_cost * costs->bb_cost_norm) + (placer_opts.timing_tradeoff) * (costs->timing_cost * costs->timing_cost_norm);
-    }
+    }  
 
     if (noc_opts.noc) {
         // in noc mode we include noc agggregate bandwidth and noc latency
@@ -2065,9 +2155,9 @@ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks) {
     return 1;
 }
 
-static e_move_result assess_swap(double delta_c, double t) {
+static e_move_result assess_swap(double delta_c, double t, double cong_delta_c, double cost, const t_placer_opts& placer_opts) {
     /* Returns: 1 -> move accepted, 0 -> rejected. */
-    if (delta_c <= 0) {
+    if (delta_c <= 0 && ((cong_delta_c <= 0) || (cost <= 20.0) || (placer_opts.place_algorithm != CONGESTION_AWARE_PLACE))) {
         return ACCEPTED;
     }
 
@@ -2077,7 +2167,8 @@ static e_move_result assess_swap(double delta_c, double t) {
 
     float fnum = vtr::frand();
     float prob_fac = std::exp(-delta_c / t);
-    if (prob_fac > fnum) {
+    float prob_fac_cong = std::exp(-cong_delta_c / t);
+    if (prob_fac > fnum && (prob_fac_cong>fnum || placer_opts.place_algorithm != CONGESTION_AWARE_PLACE)) {
         return ACCEPTED;
     }
 
@@ -2202,11 +2293,21 @@ static bool driven_by_moved_block(const ClusterNetId net,
  * are found via the non_updateable_bb routine, to provide a    *
  * cost which can be used to check the correctness of the       *
  * other routine.                                               */
-static double comp_bb_cost(e_cost_methods method) {
+static double comp_bb_cost(e_cost_methods method, const t_place_algorithm& place_algorithm) {
     double cost = 0;
     double expected_wirelength = 0.0;
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& place_move_ctx = g_placer_ctx.mutable_move();
+    auto& device_ctx = g_vpr_ctx.device();
+    // VTR_LOG("\n\n\nwidth = %d and height= %d\n\n\n",device_ctx.grid.width(), device_ctx.grid.height());
+    if(place_algorithm == CONGESTION_AWARE_PLACE){
+        for(int i = 0; i < int(device_ctx.grid.width()); i++){
+            for(int j = 0; j < int(device_ctx.grid.height()); j++){
+                cong_matrix[i][j] = 0.0;
+                // cong_matrix_new[i][j] = 0.0;
+            }
+        }
+    }
 
     for (auto net_id : cluster_ctx.clb_nlist.nets()) {       /* for each net ... */
         if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { /* Do only if not ignored. */
@@ -2221,6 +2322,10 @@ static double comp_bb_cost(e_cost_methods method) {
                                       &place_move_ctx.bb_coords[net_id]);
             }
 
+            if(place_algorithm == CONGESTION_AWARE_PLACE){
+                get_cong_matrix(net_id,&place_move_ctx.bb_coords[net_id]);
+            }
+
             net_cost[net_id] = get_net_cost(net_id,
                                             &place_move_ctx.bb_coords[net_id]);
             cost += net_cost[net_id];
@@ -2230,6 +2335,23 @@ static double comp_bb_cost(e_cost_methods method) {
         }
     }
 
+    if(place_algorithm == CONGESTION_AWARE_PLACE){
+        for(int i = 0; i < int(device_ctx.grid.width()); i++){
+            for(int j = 0; j < int(device_ctx.grid.height()); j++){
+                cong_matrix_new[i][j] = cong_matrix[i][j];
+            }
+        }
+    }
+
+    // cost = get_cong_cost();
+    if(place_algorithm == CONGESTION_AWARE_PLACE){
+        for(int i=0;i<int(device_ctx.grid.width());i++){
+            for(int j=0;j<int(device_ctx.grid.height());j++){
+                VTR_LOG("%4.0f\t",cong_matrix[i][j]);
+            }
+            VTR_LOG("\n");
+        }
+    }
     if (method == CHECK) {
         VTR_LOG("\n");
         VTR_LOG("BB estimate of min-dist (placement) wire length: %.0f\n",
@@ -2471,7 +2593,7 @@ static void get_bb_from_scratch(ClusterNetId net_id, t_bb* coords, t_bb* num_on_
 static double wirelength_crossing_count(size_t fanout) {
     /* Get the expected "crossing count" of a net, based on its number *
      * of pins.  Extrapolate for very large nets.                      */
-
+    
     if (fanout > 50) {
         return 2.7933 + 0.02616 * (fanout - 50);
     } else {
@@ -2503,6 +2625,83 @@ static double get_net_wirelength_estimate(ClusterNetId net_id, t_bb* bbptr) {
     return (ncost);
 }
 
+static void update_cong_matrix(ClusterNetId net_id, t_bb* bb_ptr_old, t_bb* bb_ptr_new){
+    /* Finds the cost due to one net by looking at its coordinate bounding  *
+     * box.                                                                 */
+    // auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    /* Could insert a check for xmin == xmax.  In that case, assume  *
+     * connection will be made with no bends and hence no x-cost.    *
+     * Same thing for y-cost.                                        */
+
+    /* Cost = wire length along channel * cross_count / average      *
+     * channel capacity.   Do this for x, then y direction and add.  */
+    for(int i = bb_ptr_old->xmin; i < bb_ptr_old->xmax; i++){
+        for(int j = bb_ptr_old->ymin; j < bb_ptr_old->ymax; j++){
+            cong_matrix_new[i][j] -= get_net_wirelength_estimate(net_id,bb_ptr_old)/double((bb_ptr_old->xmax - bb_ptr_old->xmin + 1)*(bb_ptr_old->ymax - bb_ptr_old->ymin + 1));
+        }
+    }
+    for(int i = bb_ptr_new->xmin; i < bb_ptr_new->xmax; i++){
+        for(int j = bb_ptr_new->ymin; j < bb_ptr_new->ymax; j++){
+            cong_matrix_new[i][j] +=  get_net_wirelength_estimate(net_id,bb_ptr_new)/double((bb_ptr_new->xmax - bb_ptr_new->xmin + 1)*(bb_ptr_new->ymax - bb_ptr_new->ymin + 1));
+        }
+    }
+}
+
+
+static void get_cong_matrix(ClusterNetId net_id, t_bb* bbptr) {
+    /* Finds the cost due to one net by looking at its coordinate bounding  *
+     * box.                                                                 */
+    // auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    /* Could insert a check for xmin == xmax.  In that case, assume  *
+     * connection will be made with no bends and hence no x-cost.    *
+     * Same thing for y-cost.                                        */
+
+    /* Cost = wire length along channel * cross_count / average      *
+     * channel capacity.   Do this for x, then y direction and add.  */
+    for(int i=bbptr->xmin;i<bbptr->xmax;i++){
+        for(int j=bbptr->ymin;j<bbptr->ymax;j++){
+            cong_matrix[i][j] +=  get_net_wirelength_estimate(net_id,bbptr)/double((bbptr->xmax - bbptr->xmin + 1)*(bbptr->ymax - bbptr->ymin + 1));
+        }
+    }
+}
+
+
+static double get_cong_cost(double chan_width) {
+    auto& device_ctx = g_vpr_ctx.device();
+    double max = 0.0;
+    double avg = 1e-4,var=0.0;
+    double num = 0.0;
+    double max_width = chan_width;
+    for(int i=0;i<int(device_ctx.grid.width());i++){
+        for(int j=0;j<int(device_ctx.grid.height());j++){
+            if(max<cong_matrix_new[i][j]){
+                max = cong_matrix_new[i][j];
+            }
+        }
+    }
+
+    for(int i=0;i<int(device_ctx.grid.width());i++){
+        for(int j=0;j<int(device_ctx.grid.height());j++){
+            if(cong_matrix_new[i][j]>max_width){
+                avg+=cong_matrix_new[i][j]-max_width;
+                num+=1.0;
+            }
+        }
+    }
+
+    for(int i=0;i<int(device_ctx.grid.width());i++){
+        for(int j=0;j<int(device_ctx.grid.height());j++){
+            double var_var=cong_matrix_new[i][j]-avg;
+            var_var = var_var*var_var;
+            var += var_var;
+        }
+    }
+    var = var/double((device_ctx.grid.width()*device_ctx.grid.height()));
+    return avg;
+}
+
 static double get_net_cost(ClusterNetId net_id, t_bb* bbptr) {
     /* Finds the cost due to one net by looking at its coordinate bounding  *
      * box.                                                                 */
@@ -2938,7 +3137,7 @@ static int check_placement_costs(const t_placer_costs& costs,
     double bb_cost_check;
     double timing_cost_check;
 
-    bb_cost_check = comp_bb_cost(CHECK);
+    bb_cost_check = comp_bb_cost(CHECK, place_algorithm);
     if (fabs(bb_cost_check - costs.bb_cost) > costs.bb_cost * ERROR_TOL) {
         VTR_LOG_ERROR(
             "bb_cost_check: %g and bb_cost: %g differ in check_place.\n",
@@ -2987,31 +3186,31 @@ static int check_block_placement_consistency() {
                     VTR_LOG_ERROR(
                         "%d blocks were placed at grid location (%d,%d,%d), but location capacity is %d.\n",
                         place_ctx.grid_blocks.get_usage(tile_loc), i, j, layer_num,
-                        type->capacity);
+                    type->capacity);
+                error++;
+            }
+            int usage_check = 0;
+            for (int k = 0; k < type->capacity; k++) {
+                auto bnum = place_ctx.grid_blocks.block_at_location({i, j, k, layer_num});
+                if (EMPTY_BLOCK_ID == bnum || INVALID_BLOCK_ID == bnum)
+                    continue;
+
+                auto logical_block = cluster_ctx.clb_nlist.block_type(bnum);
+                auto physical_tile = type;
+
+                if (physical_tile_type(bnum) != physical_tile) {
+                    VTR_LOG_ERROR(
+                        "Block %zu type (%s) does not match grid location (%zu,%zu, %d) type (%s).\n",
+                            size_t(bnum), logical_block->name, i, j, layer_num, physical_tile->name);
                     error++;
                 }
-                int usage_check = 0;
-                for (int k = 0; k < type->capacity; k++) {
-                    auto bnum = place_ctx.grid_blocks.block_at_location({i, j, k, layer_num});
-                    if (EMPTY_BLOCK_ID == bnum || INVALID_BLOCK_ID == bnum)
-                        continue;
-
-                    auto logical_block = cluster_ctx.clb_nlist.block_type(bnum);
-                    auto physical_tile = type;
-
-                    if (physical_tile_type(bnum) != physical_tile) {
-                        VTR_LOG_ERROR(
-                            "Block %zu type (%s) does not match grid location (%zu,%zu, %d) type (%s).\n",
-                            size_t(bnum), logical_block->name, i, j, layer_num, physical_tile->name);
-                        error++;
-                    }
-
-                    auto& loc = place_ctx.block_locs[bnum].loc;
-                    if (loc.x != i || loc.y != j || loc.layer != layer_num
-                        || !is_sub_tile_compatible(physical_tile, logical_block,
-                                                   loc.sub_tile)) {
-                        VTR_LOG_ERROR(
-                            "Block %zu's location is (%d,%d,%d) but found in grid at (%zu,%zu,%d,%d).\n",
+
+                auto& loc = place_ctx.block_locs[bnum].loc;
+                if (loc.x != i || loc.y != j || loc.layer != layer_num
+                    || !is_sub_tile_compatible(physical_tile, logical_block,
+                                               loc.sub_tile)) {
+                    VTR_LOG_ERROR(
+                        "Block %zu's location is (%d,%d,%d) but found in grid at (%zu,%zu,%d,%d).\n",
                             size_t(bnum),
                             loc.x,
                             loc.y,
@@ -3020,23 +3219,23 @@ static int check_block_placement_consistency() {
                             tile_loc.y,
                             tile_loc.layer_num,
                             layer_num);
-                        error++;
-                    }
-                    ++usage_check;
-                    bdone[bnum]++;
+                    error++;
                 }
-                if (usage_check != place_ctx.grid_blocks.get_usage(tile_loc)) {
-                    VTR_LOG_ERROR(
-                        "%d block(s) were placed at location (%d,%d,%d), but location contains %d block(s).\n",
+                ++usage_check;
+                bdone[bnum]++;
+            }
+            if (usage_check != place_ctx.grid_blocks.get_usage(tile_loc)) {
+                VTR_LOG_ERROR(
+                    "%d block(s) were placed at location (%d,%d,%d), but location contains %d block(s).\n",
                         place_ctx.grid_blocks.get_usage(tile_loc),
                         tile_loc.x,
                         tile_loc.y,
                         tile_loc.layer_num,
                         usage_check);
-                    error++;
-                }
+                error++;
             }
         }
+        }
     }
 
     /* Check that every block exists in the device_ctx.grid and cluster_ctx.blocks arrays somewhere. */
@@ -3145,13 +3344,13 @@ static void update_screen_debug() {
 
 static void print_place_status_header() {
     VTR_LOG(
-        "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------\n");
+        "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------  ---------------\n");
     VTR_LOG(
-        "Tnum   Time       T Av Cost Av BB Cost Av TD Cost     CPD       sTNS     sWNS Ac Rate Std Dev  R lim Crit Exp Tot Moves  Alpha\n");
+        "Tnum   Time       T Av Cost Av BB Cost Av TD Cost     CPD       sTNS     sWNS Ac Rate Std Dev  R lim Crit Exp Tot Moves  Alpha  congestion_cost\n");
     VTR_LOG(
-        "      (sec)                                          (ns)       (ns)     (ns)                                                 \n");
+        "      (sec)                                          (ns)       (ns)     (ns)                                                                  \n");
     VTR_LOG(
-        "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------\n");
+        "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------  ---------------\n");
 }
 
 static void print_place_status(const t_annealing_state& state,
@@ -3167,11 +3366,11 @@ static void print_place_status(const t_annealing_state& state,
         "%7.1e "
         "%7.3f %10.2f %-10.5g "
         "%7.3f % 10.3g % 8.3f "
-        "%7.3f %7.4f %6.1f %8.2f",
+        "%7.3f %7.4f %6.1f %8.2f %5.3f",
         state.num_temps, elapsed_sec, state.t,
         stats.av_cost, stats.av_bb_cost, stats.av_timing_cost, 1e9 * cpd,
         1e9 * sTNS, 1e9 * sWNS, stats.success_rate, stats.std_dev,
-        state.rlim, state.crit_exponent);
+        state.rlim, state.crit_exponent, stats.av_cong_cost);
 
     pretty_print_uint(" ", tot_moves, 9, 3);
 
@@ -3265,6 +3464,7 @@ static void print_placement_move_types_stats(
     int count = 0;
     int num_of_avail_moves = move_type_stat.blk_type_moves.size() / get_num_agent_types();
 
+    // VTR_LOG("\n\nPercentage of different move types and block types:\n");
     //Print placement information for each block type
     for (auto itype : device_ctx.logical_block_types) {
         //Skip non-existing block types in the netlist
@@ -3321,10 +3521,10 @@ static void calculate_reward_and_process_outcome(
     } else if (reward_fun == WL_BIASED_RUNTIME_AWARE) {
         if (delta_c < 0) {
             float reward = -1
-                           * (move_outcome_stats.delta_cost_norm
-                              + (0.5 - timing_bb_factor)
+                        * (move_outcome_stats.delta_cost_norm
+                            + (0.5 - timing_bb_factor)
                                     * move_outcome_stats.delta_timing_cost_norm
-                              + timing_bb_factor
+                            + timing_bb_factor
                                     * move_outcome_stats.delta_bb_cost_norm);
             move_generator.process_outcome(reward, reward_fun);
         } else {
diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index 75ff2d2bf12..5ac864b49e7 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -65,6 +65,7 @@ static GridBlock init_grid_blocks() {
 void t_placer_costs::update_norm_factors() {
     if (place_algorithm.is_timing_driven()) {
         bb_cost_norm = 1 / bb_cost;
+        cong_cost_norm = 1/ cong_cost;
         //Prevent the norm factor from going to infinity
         timing_cost_norm = std::min(1 / timing_cost, MAX_INV_TIMING_COST);
     } else {
@@ -289,6 +290,7 @@ void t_placer_statistics::reset() {
     av_cost = 0.;
     av_bb_cost = 0.;
     av_timing_cost = 0.;
+    av_cong_cost = 0.;
     sum_of_squares = 0.;
     success_sum = 0;
     success_rate = 0.;
@@ -301,6 +303,7 @@ void t_placer_statistics::single_swap_update(const t_placer_costs& costs) {
     av_cost += costs.cost;
     av_bb_cost += costs.bb_cost;
     av_timing_cost += costs.timing_cost;
+    av_cong_cost += costs.cong_cost;
     sum_of_squares += (costs.cost) * (costs.cost);
 }
 
@@ -310,10 +313,12 @@ void t_placer_statistics::calc_iteration_stats(const t_placer_costs& costs, int
         av_cost = costs.cost;
         av_bb_cost = costs.bb_cost;
         av_timing_cost = costs.timing_cost;
+        av_cong_cost = costs.cong_cost;
     } else {
         av_cost /= success_sum;
         av_bb_cost /= success_sum;
         av_timing_cost /= success_sum;
+        av_cong_cost /= success_sum;
     }
     success_rate = success_sum / float(move_lim);
     std_dev = get_std_dev(success_sum, sum_of_squares, av_cost);
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
index cc903cf4f71..d2ce7eca12f 100644
--- a/vpr/src/place/place_util.h
+++ b/vpr/src/place/place_util.h
@@ -51,6 +51,8 @@ class t_placer_costs {
     double timing_cost = 0.;
     double bb_cost_norm = 0.;
     double timing_cost_norm = 0.;
+    double cong_cost = 0.;
+    double cong_cost_norm = 0.;
     double noc_aggregate_bandwidth_cost = 0.;
     double noc_aggregate_bandwidth_cost_norm = 0.;
     double noc_latency_cost = 0.;
@@ -190,6 +192,7 @@ class t_placer_statistics {
     double av_cost;
     double av_bb_cost;
     double av_timing_cost;
+    double av_cong_cost;
     double sum_of_squares;
     int success_sum;
     float success_rate;

From c30a95168dcfaebc021c55b389d5f407bb4981ee Mon Sep 17 00:00:00 2001
From: Milad Ebrahimipour <milad.ebrahimipour@rapidsilicon.com>
Date: Thu, 31 Aug 2023 01:48:06 -0400
Subject: [PATCH 2/6] Adding congestion-aware placement

---
 vpr/src/base/SetupVPR.cpp     |  1 +
 vpr/src/base/read_options.cpp | 26 +++++++++++++++++++++-----
 vpr/src/base/read_options.h   |  1 +
 vpr/src/base/vpr_types.h      |  6 ++++--
 4 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
index efef48ed4c1..8480591bcf4 100644
--- a/vpr/src/base/SetupVPR.cpp
+++ b/vpr/src/base/SetupVPR.cpp
@@ -641,6 +641,7 @@ static void SetupPlacerOpts(const t_options& Options, t_placer_opts* PlacerOpts)
     PlacerOpts->recompute_crit_iter = Options.RecomputeCritIter;
 
     PlacerOpts->timing_tradeoff = Options.PlaceTimingTradeoff;
+    PlacerOpts->congestion_tradeoff = Options.CongestionTradeoff;
 
     /* Depends on PlacerOpts->place_algorithm */
     PlacerOpts->delay_offset = Options.place_delay_offset;
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index ad935c44faa..e1048e82d1c 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -397,6 +397,8 @@ struct ParsePlaceAlgorithm {
             conv_value.set_value(CRITICALITY_TIMING_PLACE);
         } else if (str == "slack_timing") {
             conv_value.set_value(SLACK_TIMING_PLACE);
+        } else if (str == "congestion_aware"){
+            conv_value.set_value(CONGESTION_AWARE_PLACE);
         } else {
             std::stringstream msg;
             msg << "Invalid conversion from '" << str << "' to e_place_algorithm (expected one of: " << argparse::join(default_choices(), ", ") << ")";
@@ -418,6 +420,8 @@ struct ParsePlaceAlgorithm {
             conv_value.set_value("bounding_box");
         } else if (val == CRITICALITY_TIMING_PLACE) {
             conv_value.set_value("criticality_timing");
+        } else if (val == CONGESTION_AWARE_PLACE) {
+            conv_value.set_value("congestion_aware");
         } else {
             VTR_ASSERT(val == SLACK_TIMING_PLACE);
             conv_value.set_value("slack_timing");
@@ -426,7 +430,7 @@ struct ParsePlaceAlgorithm {
     }
 
     std::vector<std::string> default_choices() {
-        return {"bounding_box", "criticality_timing", "slack_timing"};
+        return {"bounding_box", "criticality_timing", "slack_timing", "congestion_aware"};
     }
 };
 
@@ -1344,6 +1348,8 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
             "           Sets the routing congestion drawing state\n"
             "      * exit <int>\n"
             "           Exits VPR with specified exit code\n"
+            "      * set_congestion <int>\n"
+            "           Sets the routing congestion drawing state\n"
             "\n"
             "   Example:\n"
             "     'save_graphics place.png; \\\n"
@@ -1928,9 +1934,10 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
             "Controls which placement algorithm is used. Valid options:\n"
             " * bounding_box: Focuses purely on minimizing the bounding box wirelength of the circuit. Turns off timing analysis if specified.\n"
             " * criticality_timing: Focuses on minimizing both the wirelength and the connection timing costs (criticality * delay).\n"
-            " * slack_timing: Focuses on improving the circuit slack values to reduce critical path delay.\n")
+            " * slack_timing: Focuses on improving the circuit slack values to reduce critical path delay.\n"
+            " * congestion_aware: Focuses on improving routability.\n")
         .default_value("criticality_timing")
-        .choices({"bounding_box", "criticality_timing", "slack_timing"})
+        .choices({"bounding_box", "criticality_timing", "slack_timing", "congestion_aware"})
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     place_grp.add_argument<e_place_algorithm, ParsePlaceAlgorithm>(args.PlaceQuenchAlgorithm, "--place_quench_algorithm")
@@ -1940,9 +1947,10 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
             "Valid options:\n"
             " * bounding_box: Focuses purely on minimizing the bounding box wirelength of the circuit. Turns off timing analysis if specified.\n"
             " * criticality_timing: Focuses on minimizing both the wirelength and the connection timing costs (criticality * delay).\n"
-            " * slack_timing: Focuses on improving the circuit slack values to reduce critical path delay.\n")
+            " * slack_timing: Focuses on improving the circuit slack values to reduce critical path delay.\n"
+            " * congestion_aware: Focuses on improving routability.\n")
         .default_value("criticality_timing")
-        .choices({"bounding_box", "criticality_timing", "slack_timing"})
+        .choices({"bounding_box", "criticality_timing", "slack_timing", "congestion_aware"})
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     place_grp.add_argument(args.PlaceChanWidth, "--place_chan_width")
@@ -2130,6 +2138,14 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
             " 0.0 focuses completely on wirelength, 1.0 completely on timing")
         .default_value("0.5")
         .show_in(argparse::ShowIn::HELP_ONLY);
+    
+    place_timing_grp.add_argument(args.CongestionTradeoff, "--congest_tradeoff")
+        .help(
+            "Trade-off control the bouding value for the contestion matrix.\n"
+            " a value near routing channel width can be a good value.\n"
+            " a high value let the VPR to ignore the congestion aware placement and continue its own course of action.\n")
+        .default_value("1.0")
+        .show_in(argparse::ShowIn::HELP_ONLY);
 
     place_timing_grp.add_argument(args.RecomputeCritIter, "--recompute_crit_iter")
         .help("Controls how many temperature updates occur between timing analysis during placement")
diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h
index 97645367680..d94c0396ec6 100644
--- a/vpr/src/base/read_options.h
+++ b/vpr/src/base/read_options.h
@@ -155,6 +155,7 @@ struct t_options {
 
     /* Timing-driven placement options only */
     argparse::ArgValue<float> PlaceTimingTradeoff;
+    argparse::ArgValue<float> CongestionTradeoff;
     argparse::ArgValue<int> RecomputeCritIter;
     argparse::ArgValue<int> inner_loop_recompute_divider;
     argparse::ArgValue<int> quench_recompute_divider;
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 7b98cc2c0e0..363b8ef1c2d 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -971,7 +971,8 @@ struct t_annealing_sched {
 enum e_place_algorithm {
     BOUNDING_BOX_PLACE,
     CRITICALITY_TIMING_PLACE,
-    SLACK_TIMING_PLACE
+    SLACK_TIMING_PLACE,
+    CONGESTION_AWARE_PLACE
 };
 
 /**
@@ -1015,7 +1016,7 @@ class t_place_algorithm {
 
     ///@brief Check if the algorithm belongs to the timing driven category.
     inline bool is_timing_driven() const {
-        return algo == CRITICALITY_TIMING_PLACE || algo == SLACK_TIMING_PLACE;
+        return algo == CRITICALITY_TIMING_PLACE || algo == SLACK_TIMING_PLACE || algo== CONGESTION_AWARE_PLACE;
     }
 
     ///@brief Accessor: returns the underlying e_place_algorithm enum value.
@@ -1147,6 +1148,7 @@ struct t_placer_opts {
     t_place_algorithm place_algorithm;
     t_place_algorithm place_quench_algorithm;
     float timing_tradeoff;
+    float congestion_tradeoff;
     float place_cost_exp;
     int place_chan_width;
     enum e_pad_loc_type pad_loc_type;

From 73f9a7965794c5ac32228286697a60bb2db88ad3 Mon Sep 17 00:00:00 2001
From: behzadmehmood <behzadmehmood82@gmail.com>
Date: Wed, 31 Jul 2024 13:52:02 +0500
Subject: [PATCH 3/6] Updating code.

---
 vpr/src/place/net_cost_handler.cpp | 11 +++++------
 vpr/src/place/net_cost_handler.h   |  4 ++++
 vpr/src/place/place.cpp            |  3 ---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 31e15209611..98f35483fdc 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -54,6 +54,9 @@ enum class NetUpdateState {
 
 const int MAX_FANOUT_CROSSING_COUNT = 50;
 
+double cong_matrix[400][400];
+double cong_matrix_new[400][400];
+
 /**
  * @brief Crossing counts for nets with different #'s of pins.  From
  * ICCAD 94 pp. 690 - 695 (with linear interpolation applied by me).
@@ -458,10 +461,6 @@ static double get_net_cost(ClusterNetId net_id, const t_bb& bb);
  */
 static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bb);
 
-static void get_cong_matrix(ClusterNetId net_id, const t_bb& bb);
-
-static double get_cong_cost(double chan_width)
-
 /**
  * @brief To mitigate round-off errors, every once in a while, the costs of nets are summed up from scratch.
  * This functions is called to do that for bb cost. It doesn't calculate the BBs from scratch, it would only add the costs again.
@@ -1846,7 +1845,7 @@ static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bb) {
     return (ncost);
 }
 
-static void get_cong_matrix(ClusterNetId net_id, const t_bb& bb) {
+void get_cong_matrix(ClusterNetId net_id, const t_bb& bb) {
     /* Finds the cost due to one net by looking at its coordinate bounding  *
      * box.                                                                 */
     // auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -1865,7 +1864,7 @@ static void get_cong_matrix(ClusterNetId net_id, const t_bb& bb) {
 }
 
 
-static double get_cong_cost(double chan_width) {
+double get_cong_cost(double chan_width) {
     auto& device_ctx = g_vpr_ctx.device();
     double max = 0.0;
     double avg = 1e-4,var=0.0;
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index deeaad368ad..90543464711 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -147,3 +147,7 @@ void init_try_swap_net_cost_structs(size_t num_nets, bool cube_bb);
  * @brief Free (layer_)ts_bb_edge_new, (layer_)ts_bb_coord_new, ts_layer_sink_pin_count, and ts_nets_to_update data structures.
  */
 void free_try_swap_net_cost_structs();
+
+void get_cong_matrix(ClusterNetId net_id, const t_bb& bb);
+
+double get_cong_cost(double chan_width);
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 8cdd79af5a8..b1be42b2e25 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -111,9 +111,6 @@ static int num_swap_accepted = 0;
 static int num_swap_aborted = 0;
 static int num_ts_called = 0;
 
-double cong_matrix[400][400];
-double cong_matrix_new[400][400];
-
 float congestion_tradeoff = 1.0;
 
 std::unique_ptr<FILE, decltype(&vtr::fclose)> f_move_stats_file(nullptr,

From 2fd1be91cde981ff19128812686266d7304a5b35 Mon Sep 17 00:00:00 2001
From: behzadmehmood <behzadmehmood82@gmail.com>
Date: Wed, 21 Aug 2024 17:49:38 +0500
Subject: [PATCH 4/6] Updating fixed size arrays to NdMatrix

---
 vpr/src/place/net_cost_handler.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 98f35483fdc..7109379dc7d 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -54,8 +54,8 @@ enum class NetUpdateState {
 
 const int MAX_FANOUT_CROSSING_COUNT = 50;
 
-double cong_matrix[400][400];
-double cong_matrix_new[400][400];
+vtr::NdMatrix<double, 2> cong_matrix;
+vtr::NdMatrix<double, 2> cong_matrix_new;
 
 /**
  * @brief Crossing counts for nets with different #'s of pins.  From

From c68068595fc4eab133c62c48353d2504d915a36b Mon Sep 17 00:00:00 2001
From: behzadmehmood <behzadmehmood82@gmail.com>
Date: Thu, 22 Aug 2024 14:53:15 +0500
Subject: [PATCH 5/6] Updating data types

---
 vpr/src/base/read_options.cpp      |  2 +-
 vpr/src/place/net_cost_handler.cpp |  4 ++--
 vpr/src/place/place.cpp            | 22 +++++-----------------
 vpr/src/place/place_util.cpp       |  2 +-
 4 files changed, 9 insertions(+), 21 deletions(-)

diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 5e872836a3e..03d5d4165db 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -2250,7 +2250,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
             "Trade-off control the bouding value for the contestion matrix.\n"
             " a value near routing channel width can be a good value.\n"
             " a high value let the VPR to ignore the congestion aware placement and continue its own course of action.\n")
-        .default_value("1.0")
+        .default_value("0.5")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     place_timing_grp.add_argument(args.RecomputeCritIter, "--recompute_crit_iter")
diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 7109379dc7d..f1ae58b1efa 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -54,8 +54,8 @@ enum class NetUpdateState {
 
 const int MAX_FANOUT_CROSSING_COUNT = 50;
 
-vtr::NdMatrix<double, 2> cong_matrix;
-vtr::NdMatrix<double, 2> cong_matrix_new;
+static vtr::NdMatrix<float, 2> cong_matrix;
+static vtr::NdMatrix<float, 2> cong_matrix_new;
 
 /**
  * @brief Crossing counts for nets with different #'s of pins.  From
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index b1be42b2e25..229f0c43eea 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -273,7 +273,7 @@ static void invalidate_affected_connections(
 
 static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks);
 
-static e_move_result assess_swap(double delta_c, double t,double cong_delta_c,double cost,const t_placer_opts& placer_opts);
+static e_move_result assess_swap(double delta_c, double t, double cong_delta_c, double cost, const t_placer_opts& placer_opts);
 
 static void update_placement_cost_normalization_factors(t_placer_costs* costs, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts);
 
@@ -2140,24 +2140,16 @@ static int check_block_placement_consistency() {
                 if (physical_tile_type(bnum) != physical_tile) {
                     VTR_LOG_ERROR(
                         "Block %zu type (%s) does not match grid location (%zu,%zu, %d) type (%s).\n",
-                            size_t(bnum), logical_block->name, i, j, layer_num, physical_tile->name);
+                        size_t(bnum), logical_block->name, i, j, layer_num, physical_tile->name);
                     error++;
                 }
 
                 auto& loc = place_ctx.block_locs[bnum].loc;
                 if (loc.x != i || loc.y != j || loc.layer != layer_num
-                    || !is_sub_tile_compatible(physical_tile, logical_block,
-                                               loc.sub_tile)) {
+                    || !is_sub_tile_compatible(physical_tile, logical_block, loc.sub_tile)) {
                     VTR_LOG_ERROR(
                         "Block %zu's location is (%d,%d,%d) but found in grid at (%zu,%zu,%d,%d).\n",
-                            size_t(bnum),
-                            loc.x,
-                            loc.y,
-                            loc.sub_tile,
-                            tile_loc.x,
-                            tile_loc.y,
-                            tile_loc.layer_num,
-                            layer_num);
+                        size_t(bnum), loc.x, loc.y, loc.sub_tile, tile_loc.x, tile_loc.y, tile_loc.layer_num, layer_num);
                     error++;
                 }
                 ++usage_check;
@@ -2166,11 +2158,7 @@ static int check_block_placement_consistency() {
             if (usage_check != place_ctx.grid_blocks.get_usage(tile_loc)) {
                 VTR_LOG_ERROR(
                     "%d block(s) were placed at location (%d,%d,%d), but location contains %d block(s).\n",
-                        place_ctx.grid_blocks.get_usage(tile_loc),
-                        tile_loc.x,
-                        tile_loc.y,
-                        tile_loc.layer_num,
-                        usage_check);
+                    place_ctx.grid_blocks.get_usage(tile_loc), tile_loc.x, tile_loc.y, tile_loc.layer_num, usage_check);
                 error++;
             }
         }
diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index 636fd987ff9..53c88d76cea 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -50,7 +50,7 @@ static GridBlock init_grid_blocks() {
 void t_placer_costs::update_norm_factors() {
     if (place_algorithm.is_timing_driven()) {
         bb_cost_norm = 1 / bb_cost;
-        cong_cost_norm = 1/ cong_cost;
+        if (cong_cost) cong_cost_norm = 1/ cong_cost;
         //Prevent the norm factor from going to infinity
         timing_cost_norm = std::min(1 / timing_cost, MAX_INV_TIMING_COST);
     } else {

From 59188acb417eea1df59f6ef278b8ac9a90c9e3fe Mon Sep 17 00:00:00 2001
From: behzadmehmood <behzadmehmood82@gmail.com>
Date: Thu, 22 Aug 2024 17:45:06 +0500
Subject: [PATCH 6/6] correcting code format

---
 vpr/src/base/SetupVPR.cpp          |   2 -
 vpr/src/base/read_options.cpp      |  14 ++--
 vpr/src/base/vpr_types.h           |   2 +-
 vpr/src/place/net_cost_handler.cpp | 106 +++++++++++++-------------
 vpr/src/place/place.cpp            | 118 ++++++++++++++---------------
 vpr/src/place/place_util.cpp       |   2 +-
 6 files changed, 118 insertions(+), 126 deletions(-)

diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
index a6878d9afa8..7c21d22ebdb 100644
--- a/vpr/src/base/SetupVPR.cpp
+++ b/vpr/src/base/SetupVPR.cpp
@@ -784,8 +784,6 @@ static void SetupNocOpts(const t_options& Options, t_noc_opts* NocOpts) {
     }
     NocOpts->noc_sat_routing_log_search_progress = Options.noc_sat_routing_log_search_progress;
     NocOpts->noc_placement_file_name = Options.noc_placement_file_name;
-
-
 }
 
 static void SetupServerOpts(const t_options& Options, t_server_opts* ServerOpts) {
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 03d5d4165db..9d2ffb11e12 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -398,7 +398,7 @@ struct ParsePlaceAlgorithm {
             conv_value.set_value(CRITICALITY_TIMING_PLACE);
         } else if (str == "slack_timing") {
             conv_value.set_value(SLACK_TIMING_PLACE);
-        } else if (str == "congestion_aware"){
+        } else if (str == "congestion_aware") {
             conv_value.set_value(CONGESTION_AWARE_PLACE);
         } else {
             std::stringstream msg;
@@ -2075,7 +2075,6 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
         .default_value({"100"})
         .show_in(argparse::ShowIn::HELP_ONLY);
 
-
     place_grp.add_argument(args.place_high_fanout_net, "--place_high_fanout_net")
         .help(
             "Sets the assumed high fanout net during placement. "
@@ -2244,7 +2243,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
             " 0.0 focuses completely on wirelength, 1.0 completely on timing")
         .default_value("0.5")
         .show_in(argparse::ShowIn::HELP_ONLY);
-    
+
     place_timing_grp.add_argument(args.CongestionTradeoff, "--congest_tradeoff")
         .help(
             "Trade-off control the bouding value for the contestion matrix.\n"
@@ -2913,13 +2912,13 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
         .default_value("0.25")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
-	noc_grp.add_argument<double>(args.noc_centroid_weight, "--noc_centroid_weight")
+    noc_grp.add_argument<double>(args.noc_centroid_weight, "--noc_centroid_weight")
         .help(
             "Sets the minimum fraction of swaps attempted by the placer that are NoC blocks."
             "This value is an integer ranging from 0-100. 0 means NoC blocks will be moved at the same rate as other blocks. 100 means all swaps attempted by the placer are NoC router blocks.")
         .default_value("0")
         .show_in(argparse::ShowIn::HELP_ONLY);
-        
+
     noc_grp.add_argument<double>(args.noc_swap_percentage, "--noc_swap_percentage")
         .help(
             "Sets the minimum fraction of swaps attempted by the placer that are NoC blocks. "
@@ -2971,8 +2970,9 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
     auto& server_grp = parser.add_argument_group("server options");
 
     server_grp.add_argument<bool, ParseOnOff>(args.is_server_mode_enabled, "--server")
-        .help("Run in server mode."
-              "Accept client application connection and respond to requests." )
+        .help(
+            "Run in server mode."
+            "Accept client application connection and respond to requests.")
         .action(argparse::Action::STORE_TRUE)
         .default_value("off");
 
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 6279ff30d9d..6326bd0c107 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -1094,7 +1094,7 @@ class t_place_algorithm {
 
     ///@brief Check if the algorithm belongs to the timing driven category.
     inline bool is_timing_driven() const {
-        return algo == CRITICALITY_TIMING_PLACE || algo == SLACK_TIMING_PLACE || algo== CONGESTION_AWARE_PLACE;
+        return algo == CRITICALITY_TIMING_PLACE || algo == SLACK_TIMING_PLACE || algo == CONGESTION_AWARE_PLACE;
     }
 
     ///@brief Accessor: returns the underlying e_place_algorithm enum value.
diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index f1ae58b1efa..9d2a9894101 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -299,8 +299,8 @@ static void update_layer_bb(ClusterNetId net_id,
                             bool is_output_pin);
 
 /**
-* @brief This function is called in update_layer_bb to update the net's bounding box incrementally if
-* the pin under consideration change layer.
+ * @brief This function is called in update_layer_bb to update the net's bounding box incrementally if
+ * the pin under consideration change layer.
  * @param net_id ID of the net which the moving pin belongs to
  * @param pin_old_loc Old location of the moving pin
  * @param pin_new_loc New location of the moving pin
@@ -509,8 +509,7 @@ void BBUpdater::get_non_updatable_bb(const ClusterNetId& net) {
         ::get_non_updatable_bb(net,
                                ts_info.ts_bb_coord_new[net],
                                ts_info.ts_layer_sink_pin_count[size_t(net)]);
-    }
-    else {
+    } else {
         ::get_non_updatable_layer_bb(net,
                                      ts_info.layer_ts_bb_coord_new[net],
                                      ts_info.ts_layer_sink_pin_count[size_t(net)]);
@@ -526,8 +525,7 @@ void BBUpdater::update_bb(ClusterNetId net_id, t_physical_tile_loc pin_old_loc,
                     pin_old_loc,
                     pin_new_loc,
                     is_driver);
-    }
-    else {
+    } else {
         ::update_layer_bb(net_id,
                           ts_info.layer_ts_bb_edge_new[net_id],
                           ts_info.layer_ts_bb_coord_new[net_id],
@@ -541,8 +539,7 @@ void BBUpdater::update_bb(ClusterNetId net_id, t_physical_tile_loc pin_old_loc,
 double BBUpdater::get_net_cost(const ClusterNetId net_id) {
     if (m_cube_bb) {
         return ::get_net_cost(net_id, ts_info.ts_bb_coord_new[net_id]);
-    }
-    else {
+    } else {
         return ::get_net_layer_bb_wire_cost(net_id, ts_info.layer_ts_bb_coord_new[net_id], ts_info.ts_layer_sink_pin_count[size_t(net_id)]);
     }
 }
@@ -1381,10 +1378,10 @@ static inline void update_bb_layer_changed(ClusterNetId net_id,
     VTR_ASSERT_SAFE(old_layer_num != new_layer_num);
 
     /*
-    This funcitn is called when BB per layer is used and when the moving block is moving from one layer to another.
-    Thus, we need to update bounding box on both "from" and "to" layer. Here, we update the bounding box on "from" or
-    "old_layer". Then, "add_block_to_bb" is called to update the bounding box on the new layer.
-    */
+     * This funcitn is called when BB per layer is used and when the moving block is moving from one layer to another.
+     * Thus, we need to update bounding box on both "from" and "to" layer. Here, we update the bounding box on "from" or
+     * "old_layer". Then, "add_block_to_bb" is called to update the bounding box on the new layer.
+     */
     if (x_old == curr_bb_coord[old_layer_num].xmax) {
         update_bb_edge(net_id,
                        bb_edge_new,
@@ -1489,10 +1486,10 @@ static void add_block_to_bb(const t_physical_tile_loc& new_pin_loc,
     int y_new = new_pin_loc.y;
 
     /*
-    This function is called to only update the bounding box on the new layer from a block
-    moving to this layer from another layer. Thus, we only need to assess the effect of this
-    new block on the edges.
-    */
+     * This function is called to only update the bounding box on the new layer from a block
+     * moving to this layer from another layer. Thus, we only need to assess the effect of this
+     * new block on the edges.
+     */
 
     if (x_new > bb_coord_old.xmax) {
         bb_edge_new.xmax = 1;
@@ -1801,10 +1798,10 @@ static double get_net_layer_bb_wire_cost(ClusterNetId /* net_id */,
             continue;
         }
         /*
-        adjust the bounding box half perimeter by the wirelength correction
-        factor based on terminal count, which is 1 for the source + the number
-        of sinks on this layer.
-        */
+         * adjust the bounding box half perimeter by the wirelength correction
+         * factor based on terminal count, which is 1 for the source + the number
+         * of sinks on this layer.
+         */
         crossing = wirelength_crossing_count(layer_pin_sink_count[layer_num] + 1);
 
         /* Could insert a check for xmin == xmax.  In that case, assume  *
@@ -1856,45 +1853,44 @@ void get_cong_matrix(ClusterNetId net_id, const t_bb& bb) {
 
     /* Cost = wire length along channel * cross_count / average      *
      * channel capacity.   Do this for x, then y direction and add.  */
-    for(int i=bb.xmin;i<bb.xmax;i++){
-        for(int j=bb.ymin;j<bb.ymax;j++){
-            cong_matrix[i][j] +=  get_net_wirelength_estimate(net_id,bb)/double((bb.xmax - bb.xmin + 1)*(bb.ymax - bb.ymin + 1));
+    for (int i = bb.xmin; i < bb.xmax; i++) {
+        for (int j = bb.ymin; j < bb.ymax; j++) {
+            cong_matrix[i][j] += get_net_wirelength_estimate(net_id, bb) / double((bb.xmax - bb.xmin + 1) * (bb.ymax - bb.ymin + 1));
         }
     }
 }
 
-
 double get_cong_cost(double chan_width) {
     auto& device_ctx = g_vpr_ctx.device();
     double max = 0.0;
-    double avg = 1e-4,var=0.0;
+    double avg = 1e-4, var = 0.0;
     double num = 0.0;
     double max_width = chan_width;
-    for(int i=0;i<int(device_ctx.grid.width());i++){
-        for(int j=0;j<int(device_ctx.grid.height());j++){
-            if(max<cong_matrix_new[i][j]){
+    for (int i = 0; i < int(device_ctx.grid.width()); i++) {
+        for (int j = 0; j < int(device_ctx.grid.height()); j++) {
+            if (max < cong_matrix_new[i][j]) {
                 max = cong_matrix_new[i][j];
             }
         }
     }
 
-    for(int i=0;i<int(device_ctx.grid.width());i++){
-        for(int j=0;j<int(device_ctx.grid.height());j++){
-            if(cong_matrix_new[i][j]>max_width){
-                avg+=cong_matrix_new[i][j]-max_width;
-                num+=1.0;
+    for (int i = 0; i < int(device_ctx.grid.width()); i++) {
+        for (int j = 0; j < int(device_ctx.grid.height()); j++) {
+            if (cong_matrix_new[i][j] > max_width) {
+                avg += cong_matrix_new[i][j] - max_width;
+                num += 1.0;
             }
         }
     }
 
-    for(int i=0;i<int(device_ctx.grid.width());i++){
-        for(int j=0;j<int(device_ctx.grid.height());j++){
-            double var_var=cong_matrix_new[i][j]-avg;
-            var_var = var_var*var_var;
+    for (int i = 0; i < int(device_ctx.grid.width()); i++) {
+        for (int j = 0; j < int(device_ctx.grid.height()); j++) {
+            double var_var = cong_matrix_new[i][j] - avg;
+            var_var = var_var * var_var;
             var += var_var;
         }
     }
-    var = var/double((device_ctx.grid.width()*device_ctx.grid.height()));
+    var = var / double((device_ctx.grid.width() * device_ctx.grid.height()));
     return avg;
 }
 
@@ -1957,7 +1953,7 @@ static double wirelength_crossing_count(size_t fanout) {
 }
 
 static void set_bb_delta_cost(double& bb_delta_c) {
-    for (const ClusterNetId ts_net: ts_info.ts_nets_to_update) {
+    for (const ClusterNetId ts_net : ts_info.ts_nets_to_update) {
         ClusterNetId net_id = ts_net;
 
         pl_net_cost.proposed_net_cost[net_id] = bb_updater.get_net_cost(net_id);
@@ -2017,9 +2013,9 @@ double comp_bb_cost(e_cost_methods method, const t_place_algorithm& place_algori
     auto& place_move_ctx = g_placer_ctx.mutable_move();
     auto& device_ctx = g_vpr_ctx.device();
     // VTR_LOG("\n\n\nwidth = %d and height= %d\n\n\n",device_ctx.grid.width(), device_ctx.grid.height());
-    if(place_algorithm == CONGESTION_AWARE_PLACE){
-        for(int i = 0; i < int(device_ctx.grid.width()); i++){
-            for(int j = 0; j < int(device_ctx.grid.height()); j++){
+    if (place_algorithm == CONGESTION_AWARE_PLACE) {
+        for (int i = 0; i < int(device_ctx.grid.width()); i++) {
+            for (int j = 0; j < int(device_ctx.grid.height()); j++) {
                 cong_matrix[i][j] = 0.0;
                 // cong_matrix_new[i][j] = 0.0;
             }
@@ -2042,7 +2038,7 @@ double comp_bb_cost(e_cost_methods method, const t_place_algorithm& place_algori
                                      place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
             }
 
-            if(place_algorithm == CONGESTION_AWARE_PLACE){
+            if (place_algorithm == CONGESTION_AWARE_PLACE) {
                 get_cong_matrix(net_id, place_move_ctx.bb_coords[net_id]);
             }
 
@@ -2053,19 +2049,19 @@ double comp_bb_cost(e_cost_methods method, const t_place_algorithm& place_algori
         }
     }
 
-    if(place_algorithm == CONGESTION_AWARE_PLACE){
-        for(int i = 0; i < int(device_ctx.grid.width()); i++){
-            for(int j = 0; j < int(device_ctx.grid.height()); j++){
+    if (place_algorithm == CONGESTION_AWARE_PLACE) {
+        for (int i = 0; i < int(device_ctx.grid.width()); i++) {
+            for (int j = 0; j < int(device_ctx.grid.height()); j++) {
                 cong_matrix_new[i][j] = cong_matrix[i][j];
             }
         }
     }
 
     // cost = get_cong_cost();
-    if(place_algorithm == CONGESTION_AWARE_PLACE){
-        for(int i=0;i<int(device_ctx.grid.width());i++){
-            for(int j=0;j<int(device_ctx.grid.height());j++){
-                VTR_LOG("%4.0f\t",cong_matrix[i][j]);
+    if (place_algorithm == CONGESTION_AWARE_PLACE) {
+        for (int i = 0; i < int(device_ctx.grid.width()); i++) {
+            for (int j = 0; j < int(device_ctx.grid.height()); j++) {
+                VTR_LOG("%4.0f\t", cong_matrix[i][j]);
             }
             VTR_LOG("\n");
         }
@@ -2173,7 +2169,7 @@ void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
 
     double new_bb_cost = recompute_bb_cost();
     double new_cong_cost = 0.0;
-    if(placer_opts.place_algorithm == CONGESTION_AWARE_PLACE){
+    if (placer_opts.place_algorithm == CONGESTION_AWARE_PLACE) {
         new_cong_cost = get_cong_cost(placer_opts.congestion_tradeoff);
     }
     check_and_print_cost(new_bb_cost, costs->bb_cost, "bb_cost");
@@ -2240,10 +2236,10 @@ void alloc_and_load_chan_w_factors_for_place_cost(float place_cost_exp) {
     auto& device_ctx = g_vpr_ctx.device();
 
     /*
-    Access arrays below as chan?_place_cost_fac[subhigh][sublow]. Since subhigh must be greater than or
-    equal to sublow, we will only access the lower half of a matrix, but we allocate the whole matrix anyway
-    for simplicity so we can use the vtr utility matrix functions.
-    */
+     * Access arrays below as chan?_place_cost_fac[subhigh][sublow]. Since subhigh must be greater than or
+     * equal to sublow, we will only access the lower half of a matrix, but we allocate the whole matrix anyway
+     * for simplicity so we can use the vtr utility matrix functions.
+     */
 
     chanx_place_cost_fac.resize({device_ctx.grid.height(), device_ctx.grid.height() + 1});
     chany_place_cost_fac.resize({device_ctx.grid.width(), device_ctx.grid.width() + 1});
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 229f0c43eea..24e4791528d 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -361,7 +361,7 @@ void try_place(const Netlist<>& net_list,
      * width of the widest channel.  Place_cost_exp says what exponent the   *
      * width should be taken to when calculating costs.  This allows a       *
      * greater bias for anisotropic architectures.                           */
-    
+
     /*
      * Currently, the functions that require is_flat as their parameter and are called during placement should
      * receive is_flat as false. For example, if the RR graph of router lookahead is built here, it should be as
@@ -498,13 +498,13 @@ void try_place(const Netlist<>& net_list,
 
     if (placer_opts.place_algorithm.is_timing_driven()) {
         if (cube_bb) {
-            costs.bb_cost = comp_bb_cost(NORMAL,placer_opts.place_algorithm);
+            costs.bb_cost = comp_bb_cost(NORMAL, placer_opts.place_algorithm);
         } else {
             VTR_ASSERT_SAFE(!cube_bb);
             costs.bb_cost = comp_layer_bb_cost(NORMAL);
         }
 
-        if(placer_opts.place_algorithm == CONGESTION_AWARE_PLACE){
+        if (placer_opts.place_algorithm == CONGESTION_AWARE_PLACE) {
             costs.cong_cost = get_cong_cost(placer_opts.congestion_tradeoff);
         }
         // costs.cong_cost_norm = 1/ costs.cong_cost;
@@ -541,7 +541,7 @@ void try_place(const Netlist<>& net_list,
         placer_criticalities = std::make_unique<PlacerCriticalities>(
             cluster_ctx.clb_nlist, netlist_pin_lookup);
 
-         pin_timing_invalidator = make_net_pin_timing_invalidator(
+        pin_timing_invalidator = make_net_pin_timing_invalidator(
             placer_opts.timing_update_type,
             net_list,
             netlist_pin_lookup,
@@ -587,15 +587,15 @@ void try_place(const Netlist<>& net_list,
 
         /* Total cost is the same as wirelength cost normalized*/
         if (cube_bb) {
-            costs.bb_cost = comp_bb_cost(NORMAL,placer_opts.place_algorithm);
+            costs.bb_cost = comp_bb_cost(NORMAL, placer_opts.place_algorithm);
         } else {
             VTR_ASSERT_SAFE(!cube_bb);
             costs.bb_cost = comp_layer_bb_cost(NORMAL);
         }
         costs.bb_cost_norm = 1 / costs.bb_cost;
-        if(placer_opts.place_algorithm == CONGESTION_AWARE_PLACE){
+        if (placer_opts.place_algorithm == CONGESTION_AWARE_PLACE) {
             costs.cong_cost = get_cong_cost(placer_opts.congestion_tradeoff);
-            costs.cong_cost_norm = 1/ costs.cong_cost;
+            costs.cong_cost_norm = 1 / costs.cong_cost;
         }
         /* Timing cost and normalization factors are not used */
         costs.timing_cost = INVALID_COST;
@@ -716,15 +716,15 @@ void try_place(const Netlist<>& net_list,
     /* Set the temperature low to ensure that initial placement quality will be preserved */
     first_t = EPSILON;
 
-    t_annealing_state state(annealing_sched, 
-                            first_t, 
+    t_annealing_state state(annealing_sched,
+                            first_t,
                             first_rlim,
-                            first_move_lim, 
+                            first_move_lim,
                             first_crit_exponent,
                             device_ctx.grid.get_num_layers());
 
     /* Update the starting temperature for placement annealing to a more appropriate value */
-    if(placer_opts.place_algorithm == CONGESTION_AWARE_PLACE){
+    if (placer_opts.place_algorithm == CONGESTION_AWARE_PLACE) {
         congestion_tradeoff = 1.0;
     }
     state.t = starting_t(&state, &costs, annealing_sched,
@@ -732,11 +732,11 @@ void try_place(const Netlist<>& net_list,
                          placer_setup_slacks.get(), timing_info.get(), *move_generator,
                          *manual_move_generator, pin_timing_invalidator.get(),
                          blocks_affected, placer_opts, noc_opts, move_type_stat);
-                        
-    if(placer_opts.place_algorithm == CONGESTION_AWARE_PLACE){
+
+    if (placer_opts.place_algorithm == CONGESTION_AWARE_PLACE) {
         congestion_tradeoff = 0.1;
     }
-    double starting_tempreture = state.t ;
+    double starting_tempreture = state.t;
     if (!placer_opts.move_stats_file.empty()) {
         f_move_stats_file = std::unique_ptr<FILE, decltype(&vtr::fclose)>(
             vtr::fopen(placer_opts.move_stats_file.c_str(), "w"),
@@ -772,7 +772,7 @@ void try_place(const Netlist<>& net_list,
 
         /* Outer loop of the simulated annealing begins */
         do {
-            if(get_cong_cost(placer_opts.congestion_tradeoff)<=20 && congest_flag==false && placer_opts.place_algorithm == CONGESTION_AWARE_PLACE){
+            if (get_cong_cost(placer_opts.congestion_tradeoff) <= 20 && congest_flag == false && placer_opts.place_algorithm == CONGESTION_AWARE_PLACE) {
                 congest_flag = true;
                 congestion_tradeoff = 1.0;
                 state.t = starting_tempreture;
@@ -1351,7 +1351,7 @@ static e_move_result try_swap(const t_annealing_state* state,
     if (manual_move_enabled) {
 #ifndef NO_GRAPHICS
         create_move_outcome = manual_move_display_and_propose(manual_move_generator, blocks_affected, proposed_action.move_type, rlim, placer_opts, criticalities);
-#else  //NO_GRAPHICS
+#else  //NO_GRAPHICS \
        //Cast to void to explicitly avoid warning.
         (void)manual_move_generator;
 #endif //NO_GRAPHICS
@@ -1404,7 +1404,7 @@ static e_move_result try_swap(const t_annealing_state* state,
         //
         //Also find all the pins affected by the swap, and calculates new connection
         //delays and timing costs and store them in proposed_* data structures.
-        find_affected_nets_and_update_costs(place_algorithm, delay_model, criticalities, 
+        find_affected_nets_and_update_costs(place_algorithm, delay_model, criticalities,
                                             blocks_affected, bb_delta_c, timing_delta_c);
 
         //For setup slack analysis, we first do a timing analysis to get the newest
@@ -1453,9 +1453,7 @@ static e_move_result try_swap(const t_annealing_state* state,
         } else if (place_algorithm == CONGESTION_AWARE_PLACE) {
             /* Take delta_c as a combination of timing and wiring cost. In
              * addition to `timing_tradeoff`, we normalize the cost values */
-            delta_c =(1 - congestion_tradeoff)*(cong_delta_c * costs->cong_cost_norm)+ ((1 - timing_tradeoff) * bb_delta_c * costs->bb_cost_norm
-                      + timing_tradeoff * timing_delta_c
-                            * costs->timing_cost_norm);
+            delta_c = (1 - congestion_tradeoff) * (cong_delta_c * costs->cong_cost_norm) + ((1 - timing_tradeoff) * bb_delta_c * costs->bb_cost_norm + timing_tradeoff * timing_delta_c * costs->timing_cost_norm);
         } else {
             VTR_ASSERT_SAFE(place_algorithm == BOUNDING_BOX_PLACE);
             VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug,
@@ -1475,9 +1473,9 @@ static e_move_result try_swap(const t_annealing_state* state,
         }
 
         /* 1 -> move accepted, 0 -> rejected. */
-        if(placer_opts.place_algorithm == CONGESTION_AWARE_PLACE){
+        if (placer_opts.place_algorithm == CONGESTION_AWARE_PLACE) {
             move_outcome = assess_swap(delta_c, state->t, cong_delta_c, get_cong_cost(placer_opts.congestion_tradeoff), placer_opts);
-        } else{
+        } else {
             move_outcome = assess_swap(delta_c, state->t, 0.0, 0.0, placer_opts);
         }
 
@@ -1702,11 +1700,11 @@ static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_
         total_cost = costs->bb_cost * costs->bb_cost_norm;
     } else if (placer_opts.place_algorithm == CONGESTION_AWARE_PLACE) {
         // in timing mode we include both wirelength and timing costs
-        total_cost =(1-congestion_tradeoff) * (costs->cong_cost * costs->cong_cost_norm) + ((1 - placer_opts.timing_tradeoff) * (costs->bb_cost * costs->bb_cost_norm) + (placer_opts.timing_tradeoff) * (costs->timing_cost * costs->timing_cost_norm));
+        total_cost = (1 - congestion_tradeoff) * (costs->cong_cost * costs->cong_cost_norm) + ((1 - placer_opts.timing_tradeoff) * (costs->bb_cost * costs->bb_cost_norm) + (placer_opts.timing_tradeoff) * (costs->timing_cost * costs->timing_cost_norm));
     } else if (placer_opts.place_algorithm.is_timing_driven()) {
         // in timing mode we include both wirelength and timing costs
         total_cost = (1 - placer_opts.timing_tradeoff) * (costs->bb_cost * costs->bb_cost_norm) + (placer_opts.timing_tradeoff) * (costs->timing_cost * costs->timing_cost_norm);
-    }  
+    }
 
     if (noc_opts.noc) {
         // in noc mode we include noc aggregate bandwidth and noc latency
@@ -1789,7 +1787,7 @@ static e_move_result assess_swap(double delta_c, double t, double cong_delta_c,
     float fnum = vtr::frand();
     float prob_fac = std::exp(-delta_c / t);
     float prob_fac_cong = std::exp(-cong_delta_c / t);
-    if (prob_fac > fnum && (prob_fac_cong>fnum || placer_opts.place_algorithm != CONGESTION_AWARE_PLACE)) {
+    if (prob_fac > fnum && (prob_fac_cong > fnum || placer_opts.place_algorithm != CONGESTION_AWARE_PLACE)) {
         VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is accepted(hill climbing)\n");
         return ACCEPTED;
     }
@@ -2125,43 +2123,43 @@ static int check_block_placement_consistency() {
                     VTR_LOG_ERROR(
                         "%d blocks were placed at grid location (%d,%d,%d), but location capacity is %d.\n",
                         place_ctx.grid_blocks.get_usage(tile_loc), i, j, layer_num,
-                    type->capacity);
-                error++;
-            }
-            int usage_check = 0;
-            for (int k = 0; k < type->capacity; k++) {
-                auto bnum = place_ctx.grid_blocks.block_at_location({i, j, k, layer_num});
-                if (EMPTY_BLOCK_ID == bnum || INVALID_BLOCK_ID == bnum)
-                    continue;
-
-                auto logical_block = cluster_ctx.clb_nlist.block_type(bnum);
-                auto physical_tile = type;
-
-                if (physical_tile_type(bnum) != physical_tile) {
-                    VTR_LOG_ERROR(
-                        "Block %zu type (%s) does not match grid location (%zu,%zu, %d) type (%s).\n",
-                        size_t(bnum), logical_block->name, i, j, layer_num, physical_tile->name);
+                        type->capacity);
                     error++;
                 }
-
-                auto& loc = place_ctx.block_locs[bnum].loc;
-                if (loc.x != i || loc.y != j || loc.layer != layer_num
-                    || !is_sub_tile_compatible(physical_tile, logical_block, loc.sub_tile)) {
+                int usage_check = 0;
+                for (int k = 0; k < type->capacity; k++) {
+                    auto bnum = place_ctx.grid_blocks.block_at_location({i, j, k, layer_num});
+                    if (EMPTY_BLOCK_ID == bnum || INVALID_BLOCK_ID == bnum)
+                        continue;
+
+                    auto logical_block = cluster_ctx.clb_nlist.block_type(bnum);
+                    auto physical_tile = type;
+
+                    if (physical_tile_type(bnum) != physical_tile) {
+                        VTR_LOG_ERROR(
+                            "Block %zu type (%s) does not match grid location (%zu,%zu, %d) type (%s).\n",
+                            size_t(bnum), logical_block->name, i, j, layer_num, physical_tile->name);
+                        error++;
+                    }
+
+                    auto& loc = place_ctx.block_locs[bnum].loc;
+                    if (loc.x != i || loc.y != j || loc.layer != layer_num
+                        || !is_sub_tile_compatible(physical_tile, logical_block, loc.sub_tile)) {
+                        VTR_LOG_ERROR(
+                            "Block %zu's location is (%d,%d,%d) but found in grid at (%zu,%zu,%d,%d).\n",
+                            size_t(bnum), loc.x, loc.y, loc.sub_tile, tile_loc.x, tile_loc.y, tile_loc.layer_num, layer_num);
+                        error++;
+                    }
+                    ++usage_check;
+                    bdone[bnum]++;
+                }
+                if (usage_check != place_ctx.grid_blocks.get_usage(tile_loc)) {
                     VTR_LOG_ERROR(
-                        "Block %zu's location is (%d,%d,%d) but found in grid at (%zu,%zu,%d,%d).\n",
-                        size_t(bnum), loc.x, loc.y, loc.sub_tile, tile_loc.x, tile_loc.y, tile_loc.layer_num, layer_num);
+                        "%d block(s) were placed at location (%d,%d,%d), but location contains %d block(s).\n",
+                        place_ctx.grid_blocks.get_usage(tile_loc), tile_loc.x, tile_loc.y, tile_loc.layer_num, usage_check);
                     error++;
                 }
-                ++usage_check;
-                bdone[bnum]++;
             }
-            if (usage_check != place_ctx.grid_blocks.get_usage(tile_loc)) {
-                VTR_LOG_ERROR(
-                    "%d block(s) were placed at location (%d,%d,%d), but location contains %d block(s).\n",
-                    place_ctx.grid_blocks.get_usage(tile_loc), tile_loc.x, tile_loc.y, tile_loc.layer_num, usage_check);
-                error++;
-            }
-        }
         }
     }
 
@@ -2211,7 +2209,7 @@ int check_macro_placement_consistency() {
                 error++;
             }
         } // Finish going through all the members
-    } // Finish going through all the macros
+    }     // Finish going through all the macros
     return error;
 }
 
@@ -2464,10 +2462,10 @@ static void calculate_reward_and_process_outcome(
     } else if (reward_fun == WL_BIASED_RUNTIME_AWARE) {
         if (delta_c < 0) {
             float reward = -1
-                        * (move_outcome_stats.delta_cost_norm
-                            + (0.5 - timing_bb_factor)
+                           * (move_outcome_stats.delta_cost_norm
+                              + (0.5 - timing_bb_factor)
                                     * move_outcome_stats.delta_timing_cost_norm
-                            + timing_bb_factor
+                              + timing_bb_factor
                                     * move_outcome_stats.delta_bb_cost_norm);
             move_generator.process_outcome(reward, reward_fun.value());
         } else {
diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index 53c88d76cea..860855b0602 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -50,7 +50,7 @@ static GridBlock init_grid_blocks() {
 void t_placer_costs::update_norm_factors() {
     if (place_algorithm.is_timing_driven()) {
         bb_cost_norm = 1 / bb_cost;
-        if (cong_cost) cong_cost_norm = 1/ cong_cost;
+        if (cong_cost) cong_cost_norm = 1 / cong_cost;
         //Prevent the norm factor from going to infinity
         timing_cost_norm = std::min(1 / timing_cost, MAX_INV_TIMING_COST);
     } else {