diff --git a/src/fpnew_divsqrt_multi.sv b/src/fpnew_divsqrt_multi.sv
index ac23c43e..44a030a1 100644
--- a/src/fpnew_divsqrt_multi.sv
+++ b/src/fpnew_divsqrt_multi.sv
@@ -41,10 +41,6 @@ module fpnew_divsqrt_multi #(
   // Input Handshake
   input  logic                        in_valid_i,
   output logic                        in_ready_o,
-  output logic                        divsqrt_done_o,
-  input  logic                        simd_synch_done_i,
-  output logic                        divsqrt_ready_o,
-  input  logic                        simd_synch_rdy_i,
   input  logic                        flush_i,
   // Output signals
   output logic [WIDTH-1:0]            result_o,
@@ -170,11 +166,10 @@ module fpnew_divsqrt_multi #(
 
   logic in_ready;               // input handshake with upstream
   logic div_valid, sqrt_valid;  // input signalling with unit
-  logic unit_ready, unit_done, unit_done_q;  // status signals from unit instance
+  logic unit_ready, unit_done;  // status signals from unit instance
   logic op_starting;            // high in the cycle a new operation starts
   logic out_valid, out_ready;   // output handshake with downstream
   logic unit_busy;              // valid data in flight
-  logic simd_synch_done;
   // FSM states
   typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e;
   fsm_state_e state_q, state_d;
@@ -198,21 +193,8 @@ module fpnew_divsqrt_multi #(
   `FFL(result_aux_q,    inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0)
   `FFL(result_vec_op_q, inp_pipe_vec_op_q[NUM_INP_REGS], op_starting, '0)
 
-  // Wait for other lanes only if the operation is vectorial
-  assign simd_synch_done = simd_synch_done_i || ~result_vec_op_q;
-
-  // Valid synch with other lanes
-  // When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes
-  // As soon as all the lanes are over, we can clear this FF and start with a new operation
-  `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done, 1'b0, clk_i, rst_ni);
-  // Tell the other units that this unit has finished now or in the past
-  assign divsqrt_done_o = (unit_done_q | unit_done) & result_vec_op_q;
-
-  // Ready synch with other lanes
-  // Bring the FSM-generated ready outside the unit, to synchronize it with the other lanes
-  assign divsqrt_ready_o = in_ready;
-  // Upstream ready comes from sanitization FSM, and it is synched among all the lanes
-  assign inp_pipe_ready[NUM_INP_REGS] = result_vec_op_q ? simd_synch_rdy_i : in_ready;
+  // Upstream ready comes from FSM
+  assign inp_pipe_ready[NUM_INP_REGS] = in_ready;
 
   // FSM to safely apply and receive data from DIVSQRT unit
   always_comb begin : flag_fsm
@@ -234,7 +216,7 @@ module fpnew_divsqrt_multi #(
       BUSY: begin
         unit_busy = 1'b1; // data in flight
         // If all the lanes are done with processing
-        if (simd_synch_done_i || (~result_vec_op_q && unit_done)) begin
+        if (unit_done) begin
           out_valid = 1'b1; // try to commit result downstream
           // If downstream accepts our result
           if (out_ready) begin
@@ -305,22 +287,6 @@ module fpnew_divsqrt_multi #(
   // Adjust result width and fix FP8
   assign adjusted_result = result_is_fp8_q ? unit_result >> 8 : unit_result;
 
-  // Hold the result when one lane has finished execution, except when all the lanes finish together,
-  // or the operation is not vectorial, and the result can be accepted downstream
-  assign hold_en = unit_done & (~simd_synch_done_i | ~out_ready) & ~(~result_vec_op_q & out_ready);
-  // The Hold register (load, no reset)
-  `FFLNR(held_result_q, adjusted_result, hold_en, clk_i)
-  `FFLNR(held_status_q, unit_status,     hold_en, clk_i)
-
-  // --------------
-  // Output Select
-  // --------------
-  logic [WIDTH-1:0]   result_d;
-  fpnew_pkg::status_t status_d;
-  // Prioritize hold register data
-  assign result_d = unit_done_q ? held_result_q : adjusted_result;
-  assign status_d = unit_done_q ? held_status_q : unit_status;
-
   // ----------------
   // Output Pipeline
   // ----------------
@@ -335,8 +301,8 @@ module fpnew_divsqrt_multi #(
   logic [0:NUM_OUT_REGS] out_pipe_ready;
 
   // Input stage: First element of pipeline is taken from inputs
-  assign out_pipe_result_q[0] = result_d;
-  assign out_pipe_status_q[0] = status_d;
+  assign out_pipe_result_q[0] = adjusted_result;
+  assign out_pipe_status_q[0] = unit_status;
   assign out_pipe_tag_q[0]    = result_tag_q;
   assign out_pipe_mask_q[0]   = result_mask_q;
   assign out_pipe_aux_q[0]    = result_aux_q;
diff --git a/src/fpnew_divsqrt_th_64_multi.sv b/src/fpnew_divsqrt_th_64_multi.sv
index eff0620d..df781554 100644
--- a/src/fpnew_divsqrt_th_64_multi.sv
+++ b/src/fpnew_divsqrt_th_64_multi.sv
@@ -39,14 +39,9 @@ module fpnew_divsqrt_th_64_multi #(
   input  TagType                      tag_i,
   input  logic                        mask_i,
   input  AuxType                      aux_i,
-  input  logic                        vectorial_op_i,
   // Input Handshake
   input  logic                        in_valid_i,
   output logic                        in_ready_o,
-  output logic                        divsqrt_done_o,
-  input  logic                        simd_synch_done_i,
-  output logic                        divsqrt_ready_o,
-  input  logic                        simd_synch_rdy_i,
   input  logic                        flush_i,
   // Output signals
   output logic [WIDTH-1:0]            result_o,
@@ -95,7 +90,6 @@ module fpnew_divsqrt_th_64_multi #(
   TagType                [0:NUM_INP_REGS]                       inp_pipe_tag_q;
   logic                  [0:NUM_INP_REGS]                       inp_pipe_mask_q;
   AuxType                [0:NUM_INP_REGS]                       inp_pipe_aux_q;
-  logic                  [0:NUM_INP_REGS]                       inp_pipe_vec_op_q;
   logic                  [0:NUM_INP_REGS]                       inp_pipe_valid_q;
   // Ready signal is combinatorial for all stages
   logic [0:NUM_INP_REGS] inp_pipe_ready;
@@ -108,7 +102,6 @@ module fpnew_divsqrt_th_64_multi #(
   assign inp_pipe_tag_q[0]      = tag_i;
   assign inp_pipe_mask_q[0]     = mask_i;
   assign inp_pipe_aux_q[0]      = aux_i;
-  assign inp_pipe_vec_op_q[0]   = vectorial_op_i;
   assign inp_pipe_valid_q[0]    = in_valid_i;
   // Input stage: Propagate pipeline ready signal to upstream circuitry
   assign in_ready_o = inp_pipe_ready[0];
@@ -132,7 +125,6 @@ module fpnew_divsqrt_th_64_multi #(
     `FFL(inp_pipe_tag_q[i+1],      inp_pipe_tag_q[i],      reg_ena, TagType'('0))
     `FFL(inp_pipe_mask_q[i+1],     inp_pipe_mask_q[i],     reg_ena, '0)
     `FFL(inp_pipe_aux_q[i+1],      inp_pipe_aux_q[i],      reg_ena, AuxType'('0))
-    `FFL(inp_pipe_vec_op_q[i+1],   inp_pipe_vec_op_q[i],   reg_ena, AuxType'('0))
   end
   // Output stage: assign selected pipe outputs to signals for later use
   assign operands_q = inp_pipe_operands_q[NUM_INP_REGS];
@@ -181,11 +173,11 @@ module fpnew_divsqrt_th_64_multi #(
 
   logic in_ready;               // input handshake with upstream
   logic div_valid, sqrt_valid;  // input signalling with unit
-  logic unit_ready, unit_done, unit_done_q;  // status signals from unit instance
+  logic unit_ready, unit_done;  // status signals from unit instance
   logic op_starting;            // high in the cycle a new operation starts
   logic out_valid, out_ready;   // output handshake with downstream
   logic unit_busy;              // valid data in flight
-  logic simd_synch_done;
+
   // FSM states
   typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e;
   fsm_state_e state_q, state_d;
@@ -200,29 +192,13 @@ module fpnew_divsqrt_th_64_multi #(
   TagType result_tag_q;
   logic result_mask_q;
   AuxType result_aux_q;
-  logic result_vec_op_q;
 
   // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst)
   `FFL(result_tag_q,    inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0)
   `FFL(result_mask_q,   inp_pipe_mask_q[NUM_INP_REGS],op_starting, '0)
   `FFL(result_aux_q,    inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0)
-  `FFL(result_vec_op_q, inp_pipe_vec_op_q[NUM_INP_REGS], op_starting, '0)
-
-  // Wait for other lanes only if the operation is vectorial
-  assign simd_synch_done = simd_synch_done_i || ~result_vec_op_q;
-
-  // Valid synch with other lanes
-  // When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes
-  // As soon as all the lanes are over, we can clear this FF and start with a new operation
-  `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done, 1'b0, clk_i, rst_ni);
-  // Tell the other units that this unit has finished now or in the past
-  assign divsqrt_done_o = (unit_done_q | unit_done) & result_vec_op_q;
 
-  // Ready synch with other lanes
-  // Bring the FSM-generated ready outside the unit, to synchronize it with the other lanes
-  assign divsqrt_ready_o = in_ready;
-  // Upstream ready comes from sanitization FSM, and it is synched among all the lanes
-  assign inp_pipe_ready[NUM_INP_REGS] = result_vec_op_q ? simd_synch_rdy_i : in_ready;
+  assign inp_pipe_ready[NUM_INP_REGS] = in_ready;
 
   // FSM to safely apply and receive data from DIVSQRT unit
   always_comb begin : flag_fsm
@@ -244,7 +220,7 @@ module fpnew_divsqrt_th_64_multi #(
       BUSY: begin
         unit_busy = 1'b1; // data in flight
         // If all the lanes are done with processing
-        if (simd_synch_done_i || (~result_vec_op_q && unit_done)) begin
+        if (unit_done) begin
           out_valid = 1'b1; // try to commit result downstream
           // If downstream accepts our result
           if (out_ready) begin
@@ -410,22 +386,6 @@ module fpnew_divsqrt_th_64_multi #(
 
   assign unit_ready = !vfdsu_dp_fdiv_busy;
 
-  // Hold the result when one lane has finished execution, except when all the lanes finish together,
-  // or the operation is not vectorial, and the result can be accepted downstream
-  assign hold_en = unit_done & (~simd_synch_done_i | ~out_ready) & ~(~result_vec_op_q & out_ready);
-  // The Hold register (load, no reset)
-  `FFLNR(held_result_q, unit_result, hold_en, clk_i)
-  `FFLNR(held_status_q, unit_status, hold_en, clk_i)
-
-  // --------------
-  // Output Select
-  // --------------
-  logic [WIDTH-1:0]   result_d;
-  fpnew_pkg::status_t status_d;
-  // Prioritize hold register data
-  assign result_d[WIDTH-1:0] = unit_done_q ? held_result_q[WIDTH-1:0] : unit_result[WIDTH-1:0];
-  assign status_d = unit_done_q ? held_status_q : unit_status;
-
   // ----------------
   // Output Pipeline
   // ----------------
@@ -440,8 +400,8 @@ module fpnew_divsqrt_th_64_multi #(
   logic [0:NUM_OUT_REGS] out_pipe_ready;
 
   // Input stage: First element of pipeline is taken from inputs
-  assign out_pipe_result_q[0] = result_d;
-  assign out_pipe_status_q[0] = status_d;
+  assign out_pipe_result_q[0] = unit_result;
+  assign out_pipe_status_q[0] = unit_status;
   assign out_pipe_tag_q[0]    = result_tag_q;
   assign out_pipe_mask_q[0]   = result_mask_q;
   assign out_pipe_aux_q[0]    = result_aux_q;
diff --git a/src/fpnew_opgroup_multifmt_slice.sv b/src/fpnew_opgroup_multifmt_slice.sv
index e3974bea..e99bb3af 100644
--- a/src/fpnew_opgroup_multifmt_slice.sv
+++ b/src/fpnew_opgroup_multifmt_slice.sv
@@ -119,7 +119,6 @@ or on 16b inputs producing 32b outputs");
   logic                result_fmt_is_int, result_is_cpk;
   logic [1:0]          result_vec_op; // info for vectorial results (for packing)
 
-  logic simd_synch_rdy, simd_synch_done;
   fpnew_pkg::roundmode_e rnd_mode;
 
   // -----------
@@ -171,13 +170,13 @@ or on 16b inputs producing 32b outputs");
   // ------------ 
   if (OpGroup == fpnew_pkg::DIVSQRT) begin: gen_reduced_throughput_lanes
     // Reduced throughput specific lane signals
-    logic   [NUM_LANES-1:0] lane_in_ready, lane_out_valid, divsqrt_done, divsqrt_ready; // Handshake signals for the lanes
+    logic   [NUM_LANES-1:0] lane_in_ready, lane_out_valid; // Handshake signals for the lanes
     TagType [NUM_LANES-1:0]               lane_tags; // only the first one is actually used
     logic   [NUM_LANES-1:0][AUX_BITS-1:0] lane_aux; // only the first one is actually used
     logic   [NUM_LANES-1:0]               lane_busy; // dito
 
     // Input side
-    assign in_ready_o   = lane_in_ready[0]; // Upstream ready is given by first lane
+    assign in_ready_o = vectorial_op ? &lane_in_ready : lane_in_ready[0]; // Upstream ready is given all lanes if vectorial
 
     // ---------------
     // Generate Lanes
@@ -218,7 +217,7 @@ or on 16b inputs producing 32b outputs");
         logic [LANE_WIDTH-1:0]                   op_result;       // lane-local results
         fpnew_pkg::status_t                      op_status;
 
-        assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors
+        assign in_valid = in_valid_i & ((lane == 0) | vectorial_op) & in_ready_o; // upper lanes only for vectors
 
         // Slice out the operands for this lane, upper bits are ignored in the unit
         always_comb begin : prepare_input
@@ -277,13 +276,8 @@ or on 16b inputs producing 32b outputs");
               .tag_i,
               .mask_i           ( simd_mask_i[lane]   ),
               .aux_i            ( in_aux              ),
-              .vectorial_op_i   ( vectorial_op        ), // synchronize only vectorial operations
               .in_valid_i       ( in_valid            ),
               .in_ready_o       ( lane_in_ready[lane] ),
-              .divsqrt_done_o   ( divsqrt_done[lane]  ),
-              .simd_synch_done_i( simd_synch_done     ),
-              .divsqrt_ready_o  ( divsqrt_ready[lane] ),
-              .simd_synch_rdy_i ( simd_synch_rdy      ),
               .flush_i,
               .result_o         ( op_result           ),
               .status_o         ( op_status           ),
@@ -313,13 +307,9 @@ or on 16b inputs producing 32b outputs");
               .tag_i,
               .mask_i           ( simd_mask_i[lane]   ),
               .aux_i            ( in_aux              ),
-              .vectorial_op_i   ( vectorial_op        ), // synchronize only vectorial operations
               .in_valid_i       ( in_valid            ),
               .in_ready_o       ( lane_in_ready[lane] ),
-              .divsqrt_done_o   ( divsqrt_done[lane]  ),
-              .simd_synch_done_i( simd_synch_done     ),
-              .divsqrt_ready_o  ( divsqrt_ready[lane] ),
-              .simd_synch_rdy_i ( simd_synch_rdy      ),
+
               .flush_i,
               .result_o         ( op_result           ),
               .status_o         ( op_status           ),
@@ -349,8 +339,6 @@ or on 16b inputs producing 32b outputs");
         assign lane_in_ready[lane]  = 1'b0; // unused lane
         assign lane_aux[lane]       = 1'b0; // unused lane
         assign lane_tags[lane]      = 1'b0; // unused lane
-        assign divsqrt_done[lane]   = 1'b0; // unused lane
-        assign divsqrt_ready[lane]  = 1'b0; // unused lane
         assign lane_busy[lane]      = 1'b0;
 
         // Signals in any kind of laned instance
@@ -390,21 +378,11 @@ or on 16b inputs producing 32b outputs");
       assign ifmt_slice_result[ifmt] = '0;
     end
 
-    if ((DivSqrtSel != fpnew_pkg::TH32) && (OpGroup == fpnew_pkg::DIVSQRT)) begin
-      // Synch lanes if there is more than one
-      assign simd_synch_rdy  = EnableVectors ? &divsqrt_ready[NUM_DIVSQRT_LANES-1:0] : divsqrt_ready[0];
-      assign simd_synch_done = EnableVectors ? &divsqrt_done[NUM_DIVSQRT_LANES-1:0]  : divsqrt_done[0];
-    end else begin
-      // Unused (TH32 divider only supported for scalar FP32 divsqrt)
-      assign simd_synch_rdy  = '0;
-      assign simd_synch_done = '0;
-    end
-
     // Group signals from all lanes
     assign extension_bit_o = lane_ext_bit[0]; // don't care about upper ones
     assign tag_o           = lane_tags[0];    // don't care about upper ones
     assign busy_o          = lane_busy[0];
-    assign out_valid_o     = lane_out_valid[0]; // don't care about upper ones
+    assign out_valid_o     = result_is_vector ? &lane_out_valid : lane_out_valid[0]; // Only care about upper ones if vectorial
     assign out_aux         = lane_aux[0]; // don't care about upper ones
 
     // Lane is always non_conv