From 0656f0809208160f83a7dd1ae91d9f09b582ce35 Mon Sep 17 00:00:00 2001
From: Guoxiong Li <gli@openjdk.org>
Date: Thu, 11 Apr 2024 15:01:41 +0000
Subject: [PATCH 01/32] 8329469: Generational ZGC: Move the methods
 forwarding_[index|find|insert] from zRelocate.cpp to ZForwarding

Reviewed-by: stefank, eosterlund
---
 src/hotspot/share/gc/z/zForwarding.hpp        | 17 ++++++--
 src/hotspot/share/gc/z/zForwarding.inline.hpp | 39 ++++++++++++++---
 src/hotspot/share/gc/z/zRelocate.cpp          | 42 +++----------------
 3 files changed, 52 insertions(+), 46 deletions(-)
diff --git a/src/hotspot/share/gc/z/zForwarding.hpp b/src/hotspot/share/gc/z/zForwarding.hpp
index a99473322d44c..ee37508903f6c 100644
--- a/src/hotspot/share/gc/z/zForwarding.hpp
+++ b/src/hotspot/share/gc/z/zForwarding.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -83,6 +83,14 @@ class ZForwarding {
   ZForwardingEntry first(uintptr_t from_index, ZForwardingCursor* cursor) const;
   ZForwardingEntry next(ZForwardingCursor* cursor) const;
 
+  uintptr_t index(zoffset from_offset);
+
+  ZForwardingEntry find(uintptr_t from_index, ZForwardingCursor* cursor) const;
+  zaddress find(zoffset from_offset, ZForwardingCursor* cursor);
+
+  zoffset insert(uintptr_t from_index, zoffset to_offset, ZForwardingCursor* cursor);
+  zaddress insert(zoffset from_offset, zaddress to_addr, ZForwardingCursor* cursor);
+
   template <typename Function>
   void object_iterate_forwarded_via_livemap(Function function);
 
@@ -140,10 +148,11 @@ class ZForwarding {
   void mark_done();
   bool is_done() const;
 
-  zaddress find(zaddress_unsafe addr);
+  zaddress find(zaddress from_addr, ZForwardingCursor* cursor);
+  zaddress find(zaddress_unsafe from_addr, ZForwardingCursor* cursor);
+  zaddress find(zaddress_unsafe from_addr);
 
-  ZForwardingEntry find(uintptr_t from_index, ZForwardingCursor* cursor) const;
-  zoffset insert(uintptr_t from_index, zoffset to_offset, ZForwardingCursor* cursor);
+  zaddress insert(zaddress from_addr, zaddress to_addr, ZForwardingCursor* cursor);
 
   // Relocated remembered set fields support
   void relocated_remembered_fields_register(volatile zpointer* p);
diff --git a/src/hotspot/share/gc/z/zForwarding.inline.hpp b/src/hotspot/share/gc/z/zForwarding.inline.hpp
index b0cb67a70cdd1..eb5f4a36161ed 100644
--- a/src/hotspot/share/gc/z/zForwarding.inline.hpp
+++ b/src/hotspot/share/gc/z/zForwarding.inline.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -218,11 +218,8 @@ inline ZForwardingEntry ZForwarding::next(ZForwardingCursor* cursor) const {
   return at(cursor);
 }
 
-inline zaddress ZForwarding::find(zaddress_unsafe addr) {
-  const uintptr_t from_index = (ZAddress::offset(addr) - start()) >> object_alignment_shift();
-  ZForwardingCursor cursor;
-  const ZForwardingEntry entry = find(from_index, &cursor);
-  return entry.populated() ? ZOffset::address(to_zoffset(entry.to_offset())) : zaddress::null;
+inline uintptr_t ZForwarding::index(zoffset from_offset) {
+  return (from_offset - start()) >> object_alignment_shift();
 }
 
 inline ZForwardingEntry ZForwarding::find(uintptr_t from_index, ZForwardingCursor* cursor) const {
@@ -243,6 +240,25 @@ inline ZForwardingEntry ZForwarding::find(uintptr_t from_index, ZForwardingCurso
   return entry;
 }
 
+inline zaddress ZForwarding::find(zoffset from_offset, ZForwardingCursor* cursor) {
+  const uintptr_t from_index = index(from_offset);
+  const ZForwardingEntry entry = find(from_index, cursor);
+  return entry.populated() ? ZOffset::address(to_zoffset(entry.to_offset())) : zaddress::null;
+}
+
+inline zaddress ZForwarding::find(zaddress from_addr, ZForwardingCursor* cursor) {
+  return find(ZAddress::offset(from_addr), cursor);
+}
+
+inline zaddress ZForwarding::find(zaddress_unsafe from_addr, ZForwardingCursor* cursor) {
+  return find(ZAddress::offset(from_addr), cursor);
+}
+
+inline zaddress ZForwarding::find(zaddress_unsafe from_addr) {
+  ZForwardingCursor cursor;
+  return find(from_addr, &cursor);
+}
+
 inline zoffset ZForwarding::insert(uintptr_t from_index, zoffset to_offset, ZForwardingCursor* cursor) {
   const ZForwardingEntry new_entry(from_index, untype(to_offset));
   const ZForwardingEntry old_entry; // Empty
@@ -271,6 +287,17 @@ inline zoffset ZForwarding::insert(uintptr_t from_index, zoffset to_offset, ZFor
   }
 }
 
+inline zaddress ZForwarding::insert(zoffset from_offset, zaddress to_addr, ZForwardingCursor* cursor) {
+  const uintptr_t from_index = index(from_offset);
+  const zoffset to_offset = ZAddress::offset(to_addr);
+  const zoffset to_offset_final = insert(from_index, to_offset, cursor);
+  return ZOffset::address(to_offset_final);
+}
+
+inline zaddress ZForwarding::insert(zaddress from_addr, zaddress to_addr, ZForwardingCursor* cursor) {
+  return insert(ZAddress::offset(from_addr), to_addr, cursor);
+}
+
 inline void ZForwarding::relocated_remembered_fields_register(volatile zpointer* p) {
   // Invariant: Page is being retained
   assert(ZGeneration::young()->is_phase_mark(), "Only called when");
diff --git a/src/hotspot/share/gc/z/zRelocate.cpp b/src/hotspot/share/gc/z/zRelocate.cpp
index 78efa7cdb12a1..b55a1863bdee3 100644
--- a/src/hotspot/share/gc/z/zRelocate.cpp
+++ b/src/hotspot/share/gc/z/zRelocate.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -52,35 +52,6 @@
 static const ZStatCriticalPhase ZCriticalPhaseRelocationStall("Relocation Stall");
 static const ZStatSubPhase ZSubPhaseConcurrentRelocateRememberedSetFlipPromotedYoung("Concurrent Relocate Remset FP", ZGenerationId::young);
 
-static uintptr_t forwarding_index(ZForwarding* forwarding, zoffset from_offset) {
-  return (from_offset - forwarding->start()) >> forwarding->object_alignment_shift();
-}
-
-static zaddress forwarding_find(ZForwarding* forwarding, zoffset from_offset, ZForwardingCursor* cursor) {
-  const uintptr_t from_index = forwarding_index(forwarding, from_offset);
-  const ZForwardingEntry entry = forwarding->find(from_index, cursor);
-  return entry.populated() ? ZOffset::address(to_zoffset(entry.to_offset())) : zaddress::null;
-}
-
-static zaddress forwarding_find(ZForwarding* forwarding, zaddress_unsafe from_addr, ZForwardingCursor* cursor) {
-  return forwarding_find(forwarding, ZAddress::offset(from_addr), cursor);
-}
-
-static zaddress forwarding_find(ZForwarding* forwarding, zaddress from_addr, ZForwardingCursor* cursor) {
-  return forwarding_find(forwarding, ZAddress::offset(from_addr), cursor);
-}
-
-static zaddress forwarding_insert(ZForwarding* forwarding, zoffset from_offset, zaddress to_addr, ZForwardingCursor* cursor) {
-  const uintptr_t from_index = forwarding_index(forwarding, from_offset);
-  const zoffset to_offset = ZAddress::offset(to_addr);
-  const zoffset to_offset_final = forwarding->insert(from_index, to_offset, cursor);
-  return ZOffset::address(to_offset_final);
-}
-
-static zaddress forwarding_insert(ZForwarding* forwarding, zaddress from_addr, zaddress to_addr, ZForwardingCursor* cursor) {
-  return forwarding_insert(forwarding, ZAddress::offset(from_addr), to_addr, cursor);
-}
-
 ZRelocateQueue::ZRelocateQueue()
   : _lock(),
     _queue(),
@@ -368,7 +339,7 @@ static zaddress relocate_object_inner(ZForwarding* forwarding, zaddress from_add
   ZUtils::object_copy_disjoint(from_addr, to_addr, size);
 
   // Insert forwarding
-  const zaddress to_addr_final = forwarding_insert(forwarding, from_addr, to_addr, cursor);
+  const zaddress to_addr_final = forwarding->insert(from_addr, to_addr, cursor);
 
   if (to_addr_final != to_addr) {
     // Already relocated, try undo allocation
@@ -382,7 +353,7 @@ zaddress ZRelocate::relocate_object(ZForwarding* forwarding, zaddress_unsafe fro
   ZForwardingCursor cursor;
 
   // Lookup forwarding
-  zaddress to_addr = forwarding_find(forwarding, from_addr, &cursor);
+  zaddress to_addr = forwarding->find(from_addr, &cursor);
   if (!is_null(to_addr)) {
     // Already relocated
     return to_addr;
@@ -409,8 +380,7 @@ zaddress ZRelocate::relocate_object(ZForwarding* forwarding, zaddress_unsafe fro
 }
 
 zaddress ZRelocate::forward_object(ZForwarding* forwarding, zaddress_unsafe from_addr) {
-  ZForwardingCursor cursor;
-  const zaddress to_addr = forwarding_find(forwarding, from_addr, &cursor);
+  const zaddress to_addr = forwarding->find(from_addr);
   assert(!is_null(to_addr), "Should be forwarded: " PTR_FORMAT, untype(from_addr));
   return to_addr;
 }
@@ -626,7 +596,7 @@ class ZRelocateWork : public StackObj {
 
     // Lookup forwarding
     {
-      const zaddress to_addr = forwarding_find(_forwarding, from_addr, &cursor);
+      const zaddress to_addr = _forwarding->find(from_addr, &cursor);
       if (!is_null(to_addr)) {
         // Already relocated
         increase_other_forwarded(size);
@@ -650,7 +620,7 @@ class ZRelocateWork : public StackObj {
     }
 
     // Insert forwarding
-    const zaddress to_addr = forwarding_insert(_forwarding, from_addr, allocated_addr, &cursor);
+    const zaddress to_addr = _forwarding->insert(from_addr, allocated_addr, &cursor);
     if (to_addr != allocated_addr) {
       // Already relocated, undo allocation
       _allocator->undo_alloc_object(to_page, to_addr, size);

From 31ee5108e059afae0a3809947adb7b91e19baec6 Mon Sep 17 00:00:00 2001
From: Cesar Soares Lucas <cslucas@openjdk.org>
Date: Thu, 11 Apr 2024 15:44:49 +0000
Subject: [PATCH 02/32] 8241503: C2: Share MacroAssembler between mach nodes
 during code emission

Reviewed-by: kvn, mdoerr, amitkumar, lucy
---
 src/hotspot/cpu/aarch64/aarch64.ad            |  232 +--
 src/hotspot/cpu/aarch64/aarch64_vector.ad     |   28 +-
 src/hotspot/cpu/aarch64/aarch64_vector_ad.m4  |   28 +-
 src/hotspot/cpu/aarch64/ad_encode.m4          |    9 +-
 .../cpu/aarch64/compiledIC_aarch64.cpp        |   14 +-
 .../gc/shenandoah/shenandoah_aarch64.ad       |   26 +-
 src/hotspot/cpu/aarch64/gc/x/x_aarch64.ad     |   22 +-
 src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad     |   62 +-
 .../cpu/aarch64/sharedRuntime_aarch64.cpp     |    6 +-
 src/hotspot/cpu/arm/arm.ad                    |  188 +-
 src/hotspot/cpu/arm/compiledIC_arm.cpp        |    8 +-
 src/hotspot/cpu/ppc/compiledIC_ppc.cpp        |   10 +-
 .../cpu/ppc/gc/shenandoah/shenandoah_ppc.ad   |   16 +-
 src/hotspot/cpu/ppc/gc/x/x_ppc.ad             |   32 +-
 src/hotspot/cpu/ppc/gc/z/z_ppc.ad             |   58 +-
 src/hotspot/cpu/ppc/ppc.ad                    |  198 +-
 src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp     |    6 +-
 src/hotspot/cpu/riscv/compiledIC_riscv.cpp    |   14 +-
 .../riscv/gc/shenandoah/shenandoah_riscv.ad   |   24 +-
 src/hotspot/cpu/riscv/gc/x/x_riscv.ad         |   20 +-
 src/hotspot/cpu/riscv/gc/z/z_riscv.ad         |   52 +-
 src/hotspot/cpu/riscv/riscv.ad                |  105 +-
 src/hotspot/cpu/riscv/riscv_v.ad              |   18 +-
 src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp |    6 +-
 src/hotspot/cpu/s390/compiledIC_s390.cpp      |   10 +-
 src/hotspot/cpu/s390/s390.ad                  |  208 +-
 src/hotspot/cpu/x86/assembler_x86.cpp         |   13 +
 .../cpu/x86/c2_intelJccErratum_x86.cpp        |    4 +-
 .../cpu/x86/c2_intelJccErratum_x86.hpp        |    6 +-
 src/hotspot/cpu/x86/compiledIC_x86.cpp        |   10 +-
 .../x86/gc/shenandoah/shenandoah_x86_32.ad    |    4 +-
 .../x86/gc/shenandoah/shenandoah_x86_64.ad    |    8 +-
 .../cpu/x86/gc/x/xBarrierSetAssembler_x86.cpp |    8 +-
 src/hotspot/cpu/x86/gc/x/x_x86_64.ad          |   24 +-
 .../cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp |   10 +-
 src/hotspot/cpu/x86/gc/z/z_x86_64.ad          |   46 +-
 src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp  |    6 +-
 src/hotspot/cpu/x86/x86.ad                    |   27 +-
 src/hotspot/cpu/x86/x86_32.ad                 | 1717 ++++++++---------
 src/hotspot/cpu/x86/x86_64.ad                 |  168 +-
 .../cpu/zero/c2_MacroAssembler_zero.hpp       |   31 +
 src/hotspot/cpu/zero/compiledIC_zero.cpp      |    2 +-
 src/hotspot/share/adlc/adlparse.cpp           |    8 -
 src/hotspot/share/adlc/output_c.cpp           |   19 +-
 src/hotspot/share/adlc/output_h.cpp           |    2 +-
 src/hotspot/share/asm/assembler.hpp           |   19 +-
 src/hotspot/share/code/compiledIC.hpp         |    3 +-
 .../share/jvmci/jvmciCodeInstaller.cpp        |    3 +-
 src/hotspot/share/opto/c2_CodeStubs.cpp       |    9 +-
 src/hotspot/share/opto/c2_CodeStubs.hpp       |    2 +-
 src/hotspot/share/opto/cfgnode.hpp            |    2 +-
 src/hotspot/share/opto/constantTable.cpp      |   46 +-
 src/hotspot/share/opto/constantTable.hpp      |    8 +-
 src/hotspot/share/opto/locknode.hpp           |    2 +-
 src/hotspot/share/opto/machnode.cpp           |    4 +-
 src/hotspot/share/opto/machnode.hpp           |   32 +-
 src/hotspot/share/opto/node.cpp               |    4 +-
 src/hotspot/share/opto/node.hpp               |    6 +-
 src/hotspot/share/opto/output.cpp             |   84 +-
 src/hotspot/share/opto/output.hpp             |    2 +-
 60 files changed, 1713 insertions(+), 2026 deletions(-)
 create mode 100644 src/hotspot/cpu/zero/c2_MacroAssembler_zero.hpp

diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad
index 171ce00ae5664..2d13447573961 100644
--- a/src/hotspot/cpu/aarch64/aarch64.ad
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
@@ -1148,8 +1148,8 @@ class HandlerImpl {
 
  public:
 
-  static int emit_exception_handler(CodeBuffer &cbuf);
-  static int emit_deopt_handler(CodeBuffer& cbuf);
+  static int emit_exception_handler(C2_MacroAssembler *masm);
+  static int emit_deopt_handler(C2_MacroAssembler* masm);
 
   static uint size_exception_handler() {
     return MacroAssembler::far_codestub_branch_size();
@@ -1602,7 +1602,7 @@ bool needs_acquiring_load_exclusive(const Node *n)
   return true;
 }
 
-#define __ _masm.
+#define __ masm->
 
 // advance declarations for helper functions to convert register
 // indices to register objects
@@ -1657,8 +1657,7 @@ void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 }
 #endif
 
-void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  C2_MacroAssembler _masm(&cbuf);
+void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
   __ brk(0);
 }
 
@@ -1674,8 +1673,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
   }
 #endif
 
-  void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
-    C2_MacroAssembler _masm(&cbuf);
+  void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
     for (int i = 0; i < _count; i++) {
       __ nop();
     }
@@ -1697,7 +1695,7 @@ void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, Phase
   ShouldNotReachHere();
 }
 
-void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
+void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
   // Empty encoding
 }
 
@@ -1751,9 +1749,8 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 }
 #endif
 
-void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
   Compile* C = ra_->C;
-  C2_MacroAssembler _masm(&cbuf);
 
   // n.b. frame size includes space for return pc and rfp
   const int framesize = C->output()->frame_size_in_bytes();
@@ -1802,7 +1799,7 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
         guard = &stub->guard();
       }
       // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
-      bs->nmethod_entry_barrier(&_masm, slow_path, continuation, guard);
+      bs->nmethod_entry_barrier(masm, slow_path, continuation, guard);
     }
   }
 
@@ -1810,7 +1807,7 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
     Unimplemented();
   }
 
-  C->output()->set_frame_complete(cbuf.insts_size());
+  C->output()->set_frame_complete(__ offset());
 
   if (C->has_mach_constant_base_node()) {
     // NOTE: We set the table base offset here because users might be
@@ -1864,9 +1861,8 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 }
 #endif
 
-void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
   Compile* C = ra_->C;
-  C2_MacroAssembler _masm(&cbuf);
   int framesize = C->output()->frame_slots() << LogBytesPerInt;
 
   __ remove_frame(framesize);
@@ -1938,7 +1934,7 @@ static enum RC rc_class(OptoReg::Name reg) {
   return rc_stack;
 }
 
-uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
+uint MachSpillCopyNode::implementation(C2_MacroAssembler *masm, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
   Compile* C = ra_->C;
 
   // Get registers to move.
@@ -1971,8 +1967,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
 
   if (bottom_type()->isa_vect() && !bottom_type()->isa_vectmask()) {
     uint ireg = ideal_reg();
-    if (ireg == Op_VecA && cbuf) {
-      C2_MacroAssembler _masm(cbuf);
+    if (ireg == Op_VecA && masm) {
       int sve_vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
         // stack->stack
@@ -1991,9 +1986,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
       } else {
         ShouldNotReachHere();
       }
-    } else if (cbuf) {
+    } else if (masm) {
       assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
-      C2_MacroAssembler _masm(cbuf);
       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
         // stack->stack
@@ -2020,8 +2014,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
         ShouldNotReachHere();
       }
     }
-  } else if (cbuf) {
-    C2_MacroAssembler _masm(cbuf);
+  } else if (masm) {
     switch (src_lo_rc) {
     case rc_int:
       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
@@ -2029,7 +2022,6 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
                    as_Register(Matcher::_regEncode[src_lo]));
         } else {
-            C2_MacroAssembler _masm(cbuf);
             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
                     as_Register(Matcher::_regEncode[src_lo]));
         }
@@ -2157,8 +2149,8 @@ void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 }
 #endif
 
-void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  implementation(&cbuf, ra_, false, nullptr);
+void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
+  implementation(masm, ra_, false, nullptr);
 }
 
 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
@@ -2176,9 +2168,7 @@ void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 }
 #endif
 
-void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  C2_MacroAssembler _masm(&cbuf);
-
+void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
   int reg    = ra_->get_encode(this);
 
@@ -2217,10 +2207,8 @@ void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 }
 #endif
 
-void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
+void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 {
-  // This is the unverified entry point.
-  C2_MacroAssembler _masm(&cbuf);
   __ ic_check(InteriorEntryAlignment);
 }
 
@@ -2234,13 +2222,12 @@ uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 //=============================================================================
 
 // Emit exception handler code.
-int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
+int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm)
 {
   // mov rscratch1 #exception_blob_entry_point
   // br rscratch1
   // Note that the code buffer's insts_mark is always relative to insts.
   // That's why we must use the macroassembler to generate a handler.
-  C2_MacroAssembler _masm(&cbuf);
   address base = __ start_a_stub(size_exception_handler());
   if (base == nullptr) {
     ciEnv::current()->record_failure("CodeCache is full");
@@ -2254,11 +2241,10 @@ int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
 }
 
 // Emit deopt handler code.
-int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
+int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm)
 {
   // Note that the code buffer's insts_mark is always relative to insts.
   // That's why we must use the macroassembler to generate a handler.
-  C2_MacroAssembler _masm(&cbuf);
   address base = __ start_a_stub(size_deopt_handler());
   if (base == nullptr) {
     ciEnv::current()->record_failure("CodeCache is full");
@@ -2677,7 +2663,6 @@ bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack,
 }
 
 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
-  C2_MacroAssembler _masm(&cbuf);                                       \
   {                                                                     \
     guarantee(INDEX == -1, "mode not permitted for volatile");          \
     guarantee(DISP == 0, "mode not permitted for volatile");            \
@@ -2722,7 +2707,7 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
   // Used for all non-volatile memory accesses.  The use of
   // $mem->opcode() to discover whether this pattern uses sign-extended
   // offsets is something of a kludge.
-  static void loadStore(C2_MacroAssembler masm, mem_insn insn,
+  static void loadStore(C2_MacroAssembler* masm, mem_insn insn,
                         Register reg, int opcode,
                         Register base, int index, int scale, int disp,
                         int size_in_memory)
@@ -2732,12 +2717,12 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
       /* Fix up any out-of-range offsets. */
       assert_different_registers(rscratch1, base);
       assert_different_registers(rscratch1, reg);
-      addr = masm.legitimize_address(addr, size_in_memory, rscratch1);
+      addr = __ legitimize_address(addr, size_in_memory, rscratch1);
     }
-    (masm.*insn)(reg, addr);
+    (masm->*insn)(reg, addr);
   }
 
-  static void loadStore(C2_MacroAssembler masm, mem_float_insn insn,
+  static void loadStore(C2_MacroAssembler* masm, mem_float_insn insn,
                         FloatRegister reg, int opcode,
                         Register base, int index, int size, int disp,
                         int size_in_memory)
@@ -2760,23 +2745,23 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
       /* Fix up any out-of-range offsets. */
       assert_different_registers(rscratch1, base);
       Address addr = Address(base, disp);
-      addr = masm.legitimize_address(addr, size_in_memory, rscratch1);
-      (masm.*insn)(reg, addr);
+      addr = __ legitimize_address(addr, size_in_memory, rscratch1);
+      (masm->*insn)(reg, addr);
     } else {
       assert(disp == 0, "unsupported address mode: disp = %d", disp);
-      (masm.*insn)(reg, Address(base, as_Register(index), scale));
+      (masm->*insn)(reg, Address(base, as_Register(index), scale));
     }
   }
 
-  static void loadStore(C2_MacroAssembler masm, mem_vector_insn insn,
+  static void loadStore(C2_MacroAssembler* masm, mem_vector_insn insn,
                         FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
                         int opcode, Register base, int index, int size, int disp)
   {
     if (index == -1) {
-      (masm.*insn)(reg, T, Address(base, disp));
+      (masm->*insn)(reg, T, Address(base, disp));
     } else {
       assert(disp == 0, "unsupported address mode");
-      (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
+      (masm->*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
     }
   }
 
@@ -2821,7 +2806,6 @@ encode %{
 
   // catch all for unimplemented encodings
   enc_class enc_unimplemented %{
-    C2_MacroAssembler _masm(&cbuf);
     __ unimplemented("C2 catch all");
   %}
 
@@ -2831,7 +2815,7 @@ encode %{
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_ldrsbw(iRegI dst, memory1 mem) %{
     Register dst_reg = as_Register($dst$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
   %}
 
@@ -2839,7 +2823,7 @@ encode %{
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_ldrsb(iRegI dst, memory1 mem) %{
     Register dst_reg = as_Register($dst$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
   %}
 
@@ -2847,7 +2831,7 @@ encode %{
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_ldrb(iRegI dst, memory1 mem) %{
     Register dst_reg = as_Register($dst$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
   %}
 
@@ -2855,7 +2839,7 @@ encode %{
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_ldrb(iRegL dst, memory1 mem) %{
     Register dst_reg = as_Register($dst$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
   %}
 
@@ -2863,7 +2847,7 @@ encode %{
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_ldrshw(iRegI dst, memory2 mem) %{
     Register dst_reg = as_Register($dst$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
   %}
 
@@ -2871,7 +2855,7 @@ encode %{
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_ldrsh(iRegI dst, memory2 mem) %{
     Register dst_reg = as_Register($dst$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
   %}
 
@@ -2879,7 +2863,7 @@ encode %{
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_ldrh(iRegI dst, memory2 mem) %{
     Register dst_reg = as_Register($dst$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
   %}
 
@@ -2887,7 +2871,7 @@ encode %{
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_ldrh(iRegL dst, memory2 mem) %{
     Register dst_reg = as_Register($dst$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
   %}
 
@@ -2895,7 +2879,7 @@ encode %{
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_ldrw(iRegI dst, memory4 mem) %{
     Register dst_reg = as_Register($dst$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
   %}
 
@@ -2903,7 +2887,7 @@ encode %{
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_ldrw(iRegL dst, memory4 mem) %{
     Register dst_reg = as_Register($dst$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
   %}
 
@@ -2911,7 +2895,7 @@ encode %{
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_ldrsw(iRegL dst, memory4 mem) %{
     Register dst_reg = as_Register($dst$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
   %}
 
@@ -2919,7 +2903,7 @@ encode %{
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_ldr(iRegL dst, memory8 mem) %{
     Register dst_reg = as_Register($dst$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::ldr, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
   %}
 
@@ -2927,7 +2911,7 @@ encode %{
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_ldrs(vRegF dst, memory4 mem) %{
     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
   %}
 
@@ -2935,7 +2919,7 @@ encode %{
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_ldrd(vRegD dst, memory8 mem) %{
     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
   %}
 
@@ -2943,15 +2927,14 @@ encode %{
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_strb(iRegI src, memory1 mem) %{
     Register src_reg = as_Register($src$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::strb, src_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
   %}
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_strb0(memory1 mem) %{
-    C2_MacroAssembler _masm(&cbuf);
-    loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::strb, zr, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
   %}
 
@@ -2959,15 +2942,14 @@ encode %{
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_strh(iRegI src, memory2 mem) %{
     Register src_reg = as_Register($src$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::strh, src_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
   %}
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_strh0(memory2 mem) %{
-    C2_MacroAssembler _masm(&cbuf);
-    loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::strh, zr, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
   %}
 
@@ -2975,15 +2957,14 @@ encode %{
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_strw(iRegI src, memory4 mem) %{
     Register src_reg = as_Register($src$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::strw, src_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
   %}
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_strw0(memory4 mem) %{
-    C2_MacroAssembler _masm(&cbuf);
-    loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::strw, zr, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
   %}
 
@@ -2994,20 +2975,18 @@ encode %{
     // we sometimes get asked to store the stack pointer into the
     // current thread -- we cannot do that directly on AArch64
     if (src_reg == r31_sp) {
-      C2_MacroAssembler _masm(&cbuf);
       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
       __ mov(rscratch2, sp);
       src_reg = rscratch2;
     }
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::str, src_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
   %}
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_str0(memory8 mem) %{
-    C2_MacroAssembler _masm(&cbuf);
-    loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::str, zr, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
   %}
 
@@ -3015,7 +2994,7 @@ encode %{
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_strs(vRegF src, memory4 mem) %{
     FloatRegister src_reg = as_FloatRegister($src$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::strs, src_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
   %}
 
@@ -3023,16 +3002,15 @@ encode %{
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_strd(vRegD src, memory8 mem) %{
     FloatRegister src_reg = as_FloatRegister($src$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::strd, src_reg, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
   %}
 
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_strb0_ordered(memory4 mem) %{
-      C2_MacroAssembler _masm(&cbuf);
       __ membar(Assembler::StoreStore);
-      loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
+      loadStore(masm, &MacroAssembler::strb, zr, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
   %}
 
@@ -3041,49 +3019,49 @@ encode %{
   // Vector loads and stores
   enc_class aarch64_enc_ldrvH(vReg dst, memory mem) %{
     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::H,
+    loadStore(masm, &MacroAssembler::ldr, dst_reg, MacroAssembler::H,
        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
   %}
 
   enc_class aarch64_enc_ldrvS(vReg dst, memory mem) %{
     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
+    loadStore(masm, &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
   %}
 
   enc_class aarch64_enc_ldrvD(vReg dst, memory mem) %{
     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
+    loadStore(masm, &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
   %}
 
   enc_class aarch64_enc_ldrvQ(vReg dst, memory mem) %{
     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
+    loadStore(masm, &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
   %}
 
   enc_class aarch64_enc_strvH(vReg src, memory mem) %{
     FloatRegister src_reg = as_FloatRegister($src$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::H,
+    loadStore(masm, &MacroAssembler::str, src_reg, MacroAssembler::H,
        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
   %}
 
   enc_class aarch64_enc_strvS(vReg src, memory mem) %{
     FloatRegister src_reg = as_FloatRegister($src$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
+    loadStore(masm, &MacroAssembler::str, src_reg, MacroAssembler::S,
        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
   %}
 
   enc_class aarch64_enc_strvD(vReg src, memory mem) %{
     FloatRegister src_reg = as_FloatRegister($src$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
+    loadStore(masm, &MacroAssembler::str, src_reg, MacroAssembler::D,
        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
   %}
 
   enc_class aarch64_enc_strvQ(vReg src, memory mem) %{
     FloatRegister src_reg = as_FloatRegister($src$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
+    loadStore(masm, &MacroAssembler::str, src_reg, MacroAssembler::Q,
        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
   %}
 
@@ -3199,7 +3177,6 @@ encode %{
     // we sometimes get asked to store the stack pointer into the
     // current thread -- we cannot do that directly on AArch64
     if (src_reg == r31_sp) {
-      C2_MacroAssembler _masm(&cbuf);
       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
       __ mov(rscratch2, sp);
       src_reg = rscratch2;
@@ -3215,7 +3192,6 @@ encode %{
 
   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
     {
-      C2_MacroAssembler _masm(&cbuf);
       FloatRegister src_reg = as_FloatRegister($src$$reg);
       __ fmovs(rscratch2, src_reg);
     }
@@ -3225,7 +3201,6 @@ encode %{
 
   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
     {
-      C2_MacroAssembler _masm(&cbuf);
       FloatRegister src_reg = as_FloatRegister($src$$reg);
       __ fmovd(rscratch2, src_reg);
     }
@@ -3236,7 +3211,6 @@ encode %{
   // synchronized read/update encodings
 
   enc_class aarch64_enc_ldaxr(iRegL dst, memory8 mem) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     Register base = as_Register($mem$$base);
     int index = $mem$$index;
@@ -3265,7 +3239,6 @@ encode %{
   %}
 
   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory8 mem) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register src_reg = as_Register($src$$reg);
     Register base = as_Register($mem$$base);
     int index = $mem$$index;
@@ -3295,7 +3268,6 @@ encode %{
   %}
 
   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
-    C2_MacroAssembler _masm(&cbuf);
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
                Assembler::xword, /*acquire*/ false, /*release*/ true,
@@ -3303,7 +3275,6 @@ encode %{
   %}
 
   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
-    C2_MacroAssembler _masm(&cbuf);
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
                Assembler::word, /*acquire*/ false, /*release*/ true,
@@ -3311,7 +3282,6 @@ encode %{
   %}
 
   enc_class aarch64_enc_cmpxchgs(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
-    C2_MacroAssembler _masm(&cbuf);
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
                Assembler::halfword, /*acquire*/ false, /*release*/ true,
@@ -3319,7 +3289,6 @@ encode %{
   %}
 
   enc_class aarch64_enc_cmpxchgb(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
-    C2_MacroAssembler _masm(&cbuf);
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
                Assembler::byte, /*acquire*/ false, /*release*/ true,
@@ -3332,7 +3301,6 @@ encode %{
   // CompareAndSwap sequence to serve as a barrier on acquiring a
   // lock.
   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
-    C2_MacroAssembler _masm(&cbuf);
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
                Assembler::xword, /*acquire*/ true, /*release*/ true,
@@ -3340,7 +3308,6 @@ encode %{
   %}
 
   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
-    C2_MacroAssembler _masm(&cbuf);
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
                Assembler::word, /*acquire*/ true, /*release*/ true,
@@ -3348,7 +3315,6 @@ encode %{
   %}
 
   enc_class aarch64_enc_cmpxchgs_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
-    C2_MacroAssembler _masm(&cbuf);
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
                Assembler::halfword, /*acquire*/ true, /*release*/ true,
@@ -3356,7 +3322,6 @@ encode %{
   %}
 
   enc_class aarch64_enc_cmpxchgb_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
-    C2_MacroAssembler _masm(&cbuf);
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
                Assembler::byte, /*acquire*/ true, /*release*/ true,
@@ -3365,7 +3330,6 @@ encode %{
 
   // auxiliary used for CompareAndSwapX to set result register
   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register res_reg = as_Register($res$$reg);
     __ cset(res_reg, Assembler::EQ);
   %}
@@ -3373,7 +3337,6 @@ encode %{
   // prefetch encodings
 
   enc_class aarch64_enc_prefetchw(memory mem) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register base = as_Register($mem$$base);
     int index = $mem$$index;
     int scale = $mem$$scale;
@@ -3394,7 +3357,6 @@ encode %{
   /// mov envcodings
 
   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
-    C2_MacroAssembler _masm(&cbuf);
     uint32_t con = (uint32_t)$src$$constant;
     Register dst_reg = as_Register($dst$$reg);
     if (con == 0) {
@@ -3405,7 +3367,6 @@ encode %{
   %}
 
   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     uint64_t con = (uint64_t)$src$$constant;
     if (con == 0) {
@@ -3416,7 +3377,6 @@ encode %{
   %}
 
   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     address con = (address)$src$$constant;
     if (con == nullptr || con == (address)1) {
@@ -3442,24 +3402,20 @@ encode %{
   %}
 
   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     __ mov(dst_reg, zr);
   %}
 
   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     __ mov(dst_reg, (uint64_t)1);
   %}
 
   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
-    C2_MacroAssembler _masm(&cbuf);
     __ load_byte_map_base($dst$$Register);
   %}
 
   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     address con = (address)$src$$constant;
     if (con == nullptr) {
@@ -3472,13 +3428,11 @@ encode %{
   %}
 
   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     __ mov(dst_reg, zr);
   %}
 
   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     address con = (address)$src$$constant;
     if (con == nullptr) {
@@ -3493,7 +3447,6 @@ encode %{
   // arithmetic encodings
 
   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     Register src_reg = as_Register($src1$$reg);
     int32_t con = (int32_t)$src2$$constant;
@@ -3507,7 +3460,6 @@ encode %{
   %}
 
   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     Register src_reg = as_Register($src1$$reg);
     int32_t con = (int32_t)$src2$$constant;
@@ -3521,7 +3473,6 @@ encode %{
   %}
 
   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
-    C2_MacroAssembler _masm(&cbuf);
    Register dst_reg = as_Register($dst$$reg);
    Register src1_reg = as_Register($src1$$reg);
    Register src2_reg = as_Register($src2$$reg);
@@ -3529,7 +3480,6 @@ encode %{
   %}
 
   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
-    C2_MacroAssembler _masm(&cbuf);
    Register dst_reg = as_Register($dst$$reg);
    Register src1_reg = as_Register($src1$$reg);
    Register src2_reg = as_Register($src2$$reg);
@@ -3537,7 +3487,6 @@ encode %{
   %}
 
   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
-    C2_MacroAssembler _masm(&cbuf);
    Register dst_reg = as_Register($dst$$reg);
    Register src1_reg = as_Register($src1$$reg);
    Register src2_reg = as_Register($src2$$reg);
@@ -3545,7 +3494,6 @@ encode %{
   %}
 
   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
-    C2_MacroAssembler _masm(&cbuf);
    Register dst_reg = as_Register($dst$$reg);
    Register src1_reg = as_Register($src1$$reg);
    Register src2_reg = as_Register($src2$$reg);
@@ -3555,14 +3503,12 @@ encode %{
   // compare instruction encodings
 
   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register reg1 = as_Register($src1$$reg);
     Register reg2 = as_Register($src2$$reg);
     __ cmpw(reg1, reg2);
   %}
 
   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register reg = as_Register($src1$$reg);
     int32_t val = $src2$$constant;
     if (val >= 0) {
@@ -3573,7 +3519,6 @@ encode %{
   %}
 
   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register reg1 = as_Register($src1$$reg);
     uint32_t val = (uint32_t)$src2$$constant;
     __ movw(rscratch1, val);
@@ -3581,14 +3526,12 @@ encode %{
   %}
 
   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register reg1 = as_Register($src1$$reg);
     Register reg2 = as_Register($src2$$reg);
     __ cmp(reg1, reg2);
   %}
 
   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register reg = as_Register($src1$$reg);
     int64_t val = $src2$$constant;
     if (val >= 0) {
@@ -3603,7 +3546,6 @@ encode %{
   %}
 
   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register reg1 = as_Register($src1$$reg);
     uint64_t val = (uint64_t)$src2$$constant;
     __ mov(rscratch1, val);
@@ -3611,45 +3553,38 @@ encode %{
   %}
 
   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register reg1 = as_Register($src1$$reg);
     Register reg2 = as_Register($src2$$reg);
     __ cmp(reg1, reg2);
   %}
 
   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register reg1 = as_Register($src1$$reg);
     Register reg2 = as_Register($src2$$reg);
     __ cmpw(reg1, reg2);
   %}
 
   enc_class aarch64_enc_testp(iRegP src) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register reg = as_Register($src$$reg);
     __ cmp(reg, zr);
   %}
 
   enc_class aarch64_enc_testn(iRegN src) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register reg = as_Register($src$$reg);
     __ cmpw(reg, zr);
   %}
 
   enc_class aarch64_enc_b(label lbl) %{
-    C2_MacroAssembler _masm(&cbuf);
     Label *L = $lbl$$label;
     __ b(*L);
   %}
 
   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
-    C2_MacroAssembler _masm(&cbuf);
     Label *L = $lbl$$label;
     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
   %}
 
   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
-    C2_MacroAssembler _masm(&cbuf);
     Label *L = $lbl$$label;
     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
   %}
@@ -3662,7 +3597,6 @@ encode %{
      Register result_reg = as_Register($result$$reg);
 
      Label miss;
-     C2_MacroAssembler _masm(&cbuf);
      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
                                      nullptr, &miss,
                                      /*set_cond_codes:*/ true);
@@ -3673,8 +3607,6 @@ encode %{
   %}
 
   enc_class aarch64_enc_java_static_call(method meth) %{
-    C2_MacroAssembler _masm(&cbuf);
-
     address addr = (address)$meth$$method;
     address call;
     if (!_method) {
@@ -3690,7 +3622,7 @@ encode %{
       __ nop();
       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
     } else {
-      int method_index = resolved_method_index(cbuf);
+      int method_index = resolved_method_index(masm);
       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
                                                   : static_call_Relocation::spec(method_index);
       call = __ trampoline_call(Address(addr, rspec));
@@ -3701,10 +3633,10 @@ encode %{
       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
         // Calls of the same statically bound method can share
         // a stub to the interpreter.
-        cbuf.shared_stub_to_interp_for(_method, call - cbuf.insts_begin());
+        __ code()->shared_stub_to_interp_for(_method, call - __ begin());
       } else {
         // Emit stub for static call
-        address stub = CompiledDirectCall::emit_to_interp_stub(cbuf, call);
+        address stub = CompiledDirectCall::emit_to_interp_stub(masm, call);
         if (stub == nullptr) {
           ciEnv::current()->record_failure("CodeCache is full");
           return;
@@ -3721,8 +3653,7 @@ encode %{
   %}
 
   enc_class aarch64_enc_java_dynamic_call(method meth) %{
-    C2_MacroAssembler _masm(&cbuf);
-    int method_index = resolved_method_index(cbuf);
+    int method_index = resolved_method_index(masm);
     address call = __ ic_call((address)$meth$$method, method_index);
     if (call == nullptr) {
       ciEnv::current()->record_failure("CodeCache is full");
@@ -3735,7 +3666,6 @@ encode %{
   %}
 
   enc_class aarch64_enc_call_epilog() %{
-    C2_MacroAssembler _masm(&cbuf);
     if (VerifyStackAtCalls) {
       // Check that stack depth is unchanged: find majik cookie on stack
       __ call_Unimplemented();
@@ -3743,8 +3673,6 @@ encode %{
   %}
 
   enc_class aarch64_enc_java_to_runtime(method meth) %{
-    C2_MacroAssembler _masm(&cbuf);
-
     // some calls to generated routines (arraycopy code) are scheduled
     // by C2 as runtime calls. if so we can call them using a br (they
     // will be in a reachable segment) otherwise we have to use a blr
@@ -3775,12 +3703,10 @@ encode %{
   %}
 
   enc_class aarch64_enc_rethrow() %{
-    C2_MacroAssembler _masm(&cbuf);
     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
   %}
 
   enc_class aarch64_enc_ret() %{
-    C2_MacroAssembler _masm(&cbuf);
 #ifdef ASSERT
     if (Compile::current()->max_vector_size() > 0) {
       __ verify_ptrue();
@@ -3790,13 +3716,11 @@ encode %{
   %}
 
   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register target_reg = as_Register($jump_target$$reg);
     __ br(target_reg);
   %}
 
   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register target_reg = as_Register($jump_target$$reg);
     // exception oop should be in r0
     // ret addr has been popped into lr
@@ -4565,7 +4489,7 @@ operand immP()
   interface(CONST_INTER);
 %}
 
-// Null Pointer Immediate
+// nullptr Pointer Immediate
 operand immP0()
 %{
   predicate(n->get_ptr() == 0);
@@ -4673,7 +4597,7 @@ operand immN()
   interface(CONST_INTER);
 %}
 
-// Narrow Null Pointer Immediate
+// Narrow nullptr Pointer Immediate
 operand immN0()
 %{
   predicate(n->get_narrowcon() == 0);
@@ -6768,7 +6692,7 @@ instruct loadConP0(iRegPNoSp dst, immP0 con)
   match(Set dst con);
 
   ins_cost(INSN_COST);
-  format %{ "mov  $dst, $con\t# null pointer" %}
+  format %{ "mov  $dst, $con\t# nullptr ptr" %}
 
   ins_encode(aarch64_enc_mov_p0(dst, con));
 
@@ -6782,7 +6706,7 @@ instruct loadConP1(iRegPNoSp dst, immP_1 con)
   match(Set dst con);
 
   ins_cost(INSN_COST);
-  format %{ "mov  $dst, $con\t# null pointer" %}
+  format %{ "mov  $dst, $con\t# nullptr ptr" %}
 
   ins_encode(aarch64_enc_mov_p1(dst, con));
 
@@ -6824,7 +6748,7 @@ instruct loadConN0(iRegNNoSp dst, immN0 con)
   match(Set dst con);
 
   ins_cost(INSN_COST);
-  format %{ "mov  $dst, $con\t# compressed null pointer" %}
+  format %{ "mov  $dst, $con\t# compressed nullptr ptr" %}
 
   ins_encode(aarch64_enc_mov_n0(dst, con));
 
@@ -7735,7 +7659,7 @@ instruct popCountI_mem(iRegINoSp dst, memory4 mem, vRegF tmp) %{
             "mov    $dst, $tmp\t# vector (1D)" %}
   ins_encode %{
     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
@@ -7776,7 +7700,7 @@ instruct popCountL_mem(iRegINoSp dst, memory8 mem, vRegD tmp) %{
             "mov    $dst, $tmp\t# vector (1D)" %}
   ins_encode %{
     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
@@ -16870,7 +16794,7 @@ instruct compressBitsI_memcon(iRegINoSp dst, memory4 mem, immI mask,
             "mov    $dst, $tdst"
           %}
   ins_encode %{
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrs, $tsrc$$FloatRegister, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::ldrs, $tsrc$$FloatRegister, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
     __ ldrs($tmask$$FloatRegister, $constantaddress($mask));
     __ sve_bext($tdst$$FloatRegister, __ S, $tsrc$$FloatRegister, $tmask$$FloatRegister);
@@ -16907,7 +16831,7 @@ instruct compressBitsL_memcon(iRegLNoSp dst, memory8 mem, immL mask,
             "mov    $dst, $tdst"
           %}
   ins_encode %{
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrd, $tsrc$$FloatRegister, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::ldrd, $tsrc$$FloatRegister, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
     __ ldrd($tmask$$FloatRegister, $constantaddress($mask));
     __ sve_bext($tdst$$FloatRegister, __ D, $tsrc$$FloatRegister, $tmask$$FloatRegister);
@@ -16944,7 +16868,7 @@ instruct expandBitsI_memcon(iRegINoSp dst, memory4 mem, immI mask,
             "mov    $dst, $tdst"
           %}
   ins_encode %{
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrs, $tsrc$$FloatRegister, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::ldrs, $tsrc$$FloatRegister, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
     __ ldrs($tmask$$FloatRegister, $constantaddress($mask));
     __ sve_bdep($tdst$$FloatRegister, __ S, $tsrc$$FloatRegister, $tmask$$FloatRegister);
@@ -16982,7 +16906,7 @@ instruct expandBitsL_memcon(iRegINoSp dst, memory8 mem, immL mask,
             "mov    $dst, $tdst"
           %}
   ins_encode %{
-    loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrd, $tsrc$$FloatRegister, $mem->opcode(),
+    loadStore(masm, &MacroAssembler::ldrd, $tsrc$$FloatRegister, $mem->opcode(),
               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
     __ ldrd($tmask$$FloatRegister, $constantaddress($mask));
     __ sve_bdep($tdst$$FloatRegister, __ D, $tsrc$$FloatRegister, $tmask$$FloatRegister);
diff --git a/src/hotspot/cpu/aarch64/aarch64_vector.ad b/src/hotspot/cpu/aarch64/aarch64_vector.ad
index d611c14f403ab..467d6ec22508d 100644
--- a/src/hotspot/cpu/aarch64/aarch64_vector.ad
+++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad
@@ -94,7 +94,7 @@ source %{
                                                              PRegister Pg, const Address &adr);
 
   // Predicated load/store, with optional ptrue to all elements of given predicate register.
-  static void loadStoreA_predicated(C2_MacroAssembler masm, bool is_store, FloatRegister reg,
+  static void loadStoreA_predicated(C2_MacroAssembler* masm, bool is_store, FloatRegister reg,
                                     PRegister pg, BasicType mem_elem_bt, BasicType vector_elem_bt,
                                     int opcode, Register base, int index, int size, int disp) {
     sve_mem_insn_predicate insn;
@@ -119,7 +119,7 @@ source %{
         ShouldNotReachHere();
       }
       int imm4 = disp / mesize / Matcher::scalable_vector_reg_size(vector_elem_bt);
-      (masm.*insn)(reg, Assembler::elemType_to_regVariant(vector_elem_bt), pg, Address(base, imm4));
+      (masm->*insn)(reg, Assembler::elemType_to_regVariant(vector_elem_bt), pg, Address(base, imm4));
     } else {
       assert(false, "unimplemented");
       ShouldNotReachHere();
@@ -422,7 +422,7 @@ instruct loadV(vReg dst, vmemA mem) %{
     BasicType bt = Matcher::vector_element_basic_type(this);
     uint length_in_bytes = Matcher::vector_length_in_bytes(this);
     assert(length_in_bytes == MaxVectorSize, "invalid vector length");
-    loadStoreA_predicated(C2_MacroAssembler(&cbuf), /* is_store */ false,
+    loadStoreA_predicated(masm, /* is_store */ false,
                           $dst$$FloatRegister, ptrue, bt, bt, $mem->opcode(),
                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
   %}
@@ -439,7 +439,7 @@ instruct storeV(vReg src, vmemA mem) %{
     BasicType bt = Matcher::vector_element_basic_type(this, $src);
     uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
     assert(length_in_bytes == MaxVectorSize, "invalid vector length");
-    loadStoreA_predicated(C2_MacroAssembler(&cbuf), /* is_store */ true,
+    loadStoreA_predicated(masm, /* is_store */ true,
                           $src$$FloatRegister, ptrue, bt, bt, $mem->opcode(),
                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
   %}
@@ -454,7 +454,7 @@ instruct loadV_masked(vReg dst, vmemA mem, pRegGov pg) %{
   format %{ "loadV_masked $dst, $pg, $mem" %}
   ins_encode %{
     BasicType bt = Matcher::vector_element_basic_type(this);
-    loadStoreA_predicated(C2_MacroAssembler(&cbuf), /* is_store */ false, $dst$$FloatRegister,
+    loadStoreA_predicated(masm, /* is_store */ false, $dst$$FloatRegister,
                           $pg$$PRegister, bt, bt, $mem->opcode(),
                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
   %}
@@ -467,7 +467,7 @@ instruct storeV_masked(vReg src, vmemA mem, pRegGov pg) %{
   format %{ "storeV_masked $mem, $pg, $src" %}
   ins_encode %{
     BasicType bt = Matcher::vector_element_basic_type(this, $src);
-    loadStoreA_predicated(C2_MacroAssembler(&cbuf), /* is_store */ true, $src$$FloatRegister,
+    loadStoreA_predicated(masm, /* is_store */ true, $src$$FloatRegister,
                           $pg$$PRegister, bt, bt, $mem->opcode(),
                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
   %}
@@ -4929,7 +4929,7 @@ instruct vloadmask_loadV(pReg dst, indirect mem, vReg tmp, rFlagsReg cr) %{
     BasicType bt = Matcher::vector_element_basic_type(this);
     uint length_in_bytes = Matcher::vector_length_in_bytes(this);
     assert(length_in_bytes == MaxVectorSize, "invalid vector length");
-    loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, $tmp$$FloatRegister,
+    loadStoreA_predicated(masm, false, $tmp$$FloatRegister,
                           ptrue, T_BOOLEAN, bt, $mem->opcode(),
                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
     __ sve_cmp(Assembler::NE, $dst$$PRegister, __ elemType_to_regVariant(bt),
@@ -4950,7 +4950,7 @@ instruct vloadmask_loadV_masked(pReg dst, indirect mem, pRegGov pg,
     // Load valid mask values which are boolean type, and extend them to the
     // defined vector element type. Convert the vector to predicate.
     BasicType bt = Matcher::vector_element_basic_type(this);
-    loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, $tmp$$FloatRegister,
+    loadStoreA_predicated(masm, false, $tmp$$FloatRegister,
                           $pg$$PRegister, T_BOOLEAN, bt, $mem->opcode(),
                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
     __ sve_cmp(Assembler::NE, $dst$$PRegister, __ elemType_to_regVariant(bt),
@@ -4977,7 +4977,7 @@ instruct vloadmask_loadVMasked(pReg dst, vmemA mem, pRegGov pg, vReg tmp, rFlags
     BasicType bt = Matcher::vector_element_basic_type(this);
     uint length_in_bytes = Matcher::vector_length_in_bytes(this);
     assert(length_in_bytes == MaxVectorSize, "invalid vector length");
-    loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, $tmp$$FloatRegister,
+    loadStoreA_predicated(masm, false, $tmp$$FloatRegister,
                           ptrue, T_BOOLEAN, bt, $mem->opcode(),
                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
     __ sve_cmp(Assembler::NE, $dst$$PRegister, __ elemType_to_regVariant(bt),
@@ -5005,7 +5005,7 @@ instruct vloadmask_loadVMasked_masked(pReg dst, vmemA mem, pRegGov pg1, pRegGov
     BasicType bt = Matcher::vector_element_basic_type(this);
     uint length_in_bytes = Matcher::vector_length_in_bytes(this);
     assert(length_in_bytes == MaxVectorSize, "invalid vector length");
-    loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, $tmp$$FloatRegister,
+    loadStoreA_predicated(masm, false, $tmp$$FloatRegister,
                           $pg2$$PRegister, T_BOOLEAN, bt, $mem->opcode(),
                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
     __ sve_cmp(Assembler::NE, $dst$$PRegister, __ elemType_to_regVariant(bt),
@@ -5030,7 +5030,7 @@ instruct storeV_vstoremask(indirect mem, pReg src, immI_gt_1 esize, vReg tmp) %{
     assert(type2aelembytes(bt) == (int)$esize$$constant, "unsupported type");
     Assembler::SIMD_RegVariant size = __ elemBytes_to_regVariant($esize$$constant);
     __ sve_cpy($tmp$$FloatRegister, size, $src$$PRegister, 1, false);
-    loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, $tmp$$FloatRegister,
+    loadStoreA_predicated(masm, true, $tmp$$FloatRegister,
                           ptrue, T_BOOLEAN, bt, $mem->opcode(),
                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
   %}
@@ -5052,7 +5052,7 @@ instruct storeV_vstoremask_masked(indirect mem, pReg src, immI_gt_1 esize,
     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
     __ sve_cpy($tmp$$FloatRegister, size, $src$$PRegister, 1, false);
     __ sve_gen_mask_imm($pgtmp$$PRegister, bt, Matcher::vector_length(this, $src));
-    loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, $tmp$$FloatRegister,
+    loadStoreA_predicated(masm, true, $tmp$$FloatRegister,
                           $pgtmp$$PRegister, T_BOOLEAN, bt, $mem->opcode(),
                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
   %}
@@ -5078,7 +5078,7 @@ instruct storeVMasked_vstoremask(vmemA mem, pReg src, pRegGov pg, immI_gt_1 esiz
     assert(type2aelembytes(bt) == (int)$esize$$constant, "unsupported type.");
     Assembler::SIMD_RegVariant size = __ elemBytes_to_regVariant($esize$$constant);
     __ sve_cpy($tmp$$FloatRegister, size, $src$$PRegister, 1, false);
-    loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, $tmp$$FloatRegister,
+    loadStoreA_predicated(masm, true, $tmp$$FloatRegister,
                           ptrue, T_BOOLEAN, bt, $mem->opcode(),
                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
   %}
@@ -5105,7 +5105,7 @@ instruct storeVMasked_vstoremask_masked(vmemA mem, pReg src, pRegGov pg, immI_gt
     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
     __ sve_cpy($tmp$$FloatRegister, size, $src$$PRegister, 1, false);
     __ sve_gen_mask_imm($pgtmp$$PRegister, bt, Matcher::vector_length(this, $src));
-    loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, $tmp$$FloatRegister,
+    loadStoreA_predicated(masm, true, $tmp$$FloatRegister,
                           $pgtmp$$PRegister, T_BOOLEAN, bt, $mem->opcode(),
                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
   %}
diff --git a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
index 5c4e13d432f51..d31a4e05799c2 100644
--- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
+++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
@@ -84,7 +84,7 @@ source %{
                                                              PRegister Pg, const Address &adr);
 
   // Predicated load/store, with optional ptrue to all elements of given predicate register.
-  static void loadStoreA_predicated(C2_MacroAssembler masm, bool is_store, FloatRegister reg,
+  static void loadStoreA_predicated(C2_MacroAssembler* masm, bool is_store, FloatRegister reg,
                                     PRegister pg, BasicType mem_elem_bt, BasicType vector_elem_bt,
                                     int opcode, Register base, int index, int size, int disp) {
     sve_mem_insn_predicate insn;
@@ -109,7 +109,7 @@ source %{
         ShouldNotReachHere();
       }
       int imm4 = disp / mesize / Matcher::scalable_vector_reg_size(vector_elem_bt);
-      (masm.*insn)(reg, Assembler::elemType_to_regVariant(vector_elem_bt), pg, Address(base, imm4));
+      (masm->*insn)(reg, Assembler::elemType_to_regVariant(vector_elem_bt), pg, Address(base, imm4));
     } else {
       assert(false, "unimplemented");
       ShouldNotReachHere();
@@ -361,7 +361,7 @@ instruct loadV(vReg dst, vmemA mem) %{
     BasicType bt = Matcher::vector_element_basic_type(this);
     uint length_in_bytes = Matcher::vector_length_in_bytes(this);
     assert(length_in_bytes == MaxVectorSize, "invalid vector length");
-    loadStoreA_predicated(C2_MacroAssembler(&cbuf), /* is_store */ false,
+    loadStoreA_predicated(masm, /* is_store */ false,
                           $dst$$FloatRegister, ptrue, bt, bt, $mem->opcode(),
                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
   %}
@@ -378,7 +378,7 @@ instruct storeV(vReg src, vmemA mem) %{
     BasicType bt = Matcher::vector_element_basic_type(this, $src);
     uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
     assert(length_in_bytes == MaxVectorSize, "invalid vector length");
-    loadStoreA_predicated(C2_MacroAssembler(&cbuf), /* is_store */ true,
+    loadStoreA_predicated(masm, /* is_store */ true,
                           $src$$FloatRegister, ptrue, bt, bt, $mem->opcode(),
                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
   %}
@@ -393,7 +393,7 @@ instruct loadV_masked(vReg dst, vmemA mem, pRegGov pg) %{
   format %{ "loadV_masked $dst, $pg, $mem" %}
   ins_encode %{
     BasicType bt = Matcher::vector_element_basic_type(this);
-    loadStoreA_predicated(C2_MacroAssembler(&cbuf), /* is_store */ false, $dst$$FloatRegister,
+    loadStoreA_predicated(masm, /* is_store */ false, $dst$$FloatRegister,
                           $pg$$PRegister, bt, bt, $mem->opcode(),
                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
   %}
@@ -406,7 +406,7 @@ instruct storeV_masked(vReg src, vmemA mem, pRegGov pg) %{
   format %{ "storeV_masked $mem, $pg, $src" %}
   ins_encode %{
     BasicType bt = Matcher::vector_element_basic_type(this, $src);
-    loadStoreA_predicated(C2_MacroAssembler(&cbuf), /* is_store */ true, $src$$FloatRegister,
+    loadStoreA_predicated(masm, /* is_store */ true, $src$$FloatRegister,
                           $pg$$PRegister, bt, bt, $mem->opcode(),
                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
   %}
@@ -3321,7 +3321,7 @@ instruct vloadmask_loadV(pReg dst, indirect mem, vReg tmp, rFlagsReg cr) %{
     BasicType bt = Matcher::vector_element_basic_type(this);
     uint length_in_bytes = Matcher::vector_length_in_bytes(this);
     assert(length_in_bytes == MaxVectorSize, "invalid vector length");
-    loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, $tmp$$FloatRegister,
+    loadStoreA_predicated(masm, false, $tmp$$FloatRegister,
                           ptrue, T_BOOLEAN, bt, $mem->opcode(),
                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
     __ sve_cmp(Assembler::NE, $dst$$PRegister, __ elemType_to_regVariant(bt),
@@ -3342,7 +3342,7 @@ instruct vloadmask_loadV_masked(pReg dst, indirect mem, pRegGov pg,
     // Load valid mask values which are boolean type, and extend them to the
     // defined vector element type. Convert the vector to predicate.
     BasicType bt = Matcher::vector_element_basic_type(this);
-    loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, $tmp$$FloatRegister,
+    loadStoreA_predicated(masm, false, $tmp$$FloatRegister,
                           $pg$$PRegister, T_BOOLEAN, bt, $mem->opcode(),
                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
     __ sve_cmp(Assembler::NE, $dst$$PRegister, __ elemType_to_regVariant(bt),
@@ -3369,7 +3369,7 @@ instruct vloadmask_loadVMasked(pReg dst, vmemA mem, pRegGov pg, vReg tmp, rFlags
     BasicType bt = Matcher::vector_element_basic_type(this);
     uint length_in_bytes = Matcher::vector_length_in_bytes(this);
     assert(length_in_bytes == MaxVectorSize, "invalid vector length");
-    loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, $tmp$$FloatRegister,
+    loadStoreA_predicated(masm, false, $tmp$$FloatRegister,
                           ptrue, T_BOOLEAN, bt, $mem->opcode(),
                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
     __ sve_cmp(Assembler::NE, $dst$$PRegister, __ elemType_to_regVariant(bt),
@@ -3397,7 +3397,7 @@ instruct vloadmask_loadVMasked_masked(pReg dst, vmemA mem, pRegGov pg1, pRegGov
     BasicType bt = Matcher::vector_element_basic_type(this);
     uint length_in_bytes = Matcher::vector_length_in_bytes(this);
     assert(length_in_bytes == MaxVectorSize, "invalid vector length");
-    loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, $tmp$$FloatRegister,
+    loadStoreA_predicated(masm, false, $tmp$$FloatRegister,
                           $pg2$$PRegister, T_BOOLEAN, bt, $mem->opcode(),
                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
     __ sve_cmp(Assembler::NE, $dst$$PRegister, __ elemType_to_regVariant(bt),
@@ -3422,7 +3422,7 @@ instruct storeV_vstoremask(indirect mem, pReg src, immI_gt_1 esize, vReg tmp) %{
     assert(type2aelembytes(bt) == (int)$esize$$constant, "unsupported type");
     Assembler::SIMD_RegVariant size = __ elemBytes_to_regVariant($esize$$constant);
     __ sve_cpy($tmp$$FloatRegister, size, $src$$PRegister, 1, false);
-    loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, $tmp$$FloatRegister,
+    loadStoreA_predicated(masm, true, $tmp$$FloatRegister,
                           ptrue, T_BOOLEAN, bt, $mem->opcode(),
                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
   %}
@@ -3444,7 +3444,7 @@ instruct storeV_vstoremask_masked(indirect mem, pReg src, immI_gt_1 esize,
     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
     __ sve_cpy($tmp$$FloatRegister, size, $src$$PRegister, 1, false);
     __ sve_gen_mask_imm($pgtmp$$PRegister, bt, Matcher::vector_length(this, $src));
-    loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, $tmp$$FloatRegister,
+    loadStoreA_predicated(masm, true, $tmp$$FloatRegister,
                           $pgtmp$$PRegister, T_BOOLEAN, bt, $mem->opcode(),
                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
   %}
@@ -3470,7 +3470,7 @@ instruct storeVMasked_vstoremask(vmemA mem, pReg src, pRegGov pg, immI_gt_1 esiz
     assert(type2aelembytes(bt) == (int)$esize$$constant, "unsupported type.");
     Assembler::SIMD_RegVariant size = __ elemBytes_to_regVariant($esize$$constant);
     __ sve_cpy($tmp$$FloatRegister, size, $src$$PRegister, 1, false);
-    loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, $tmp$$FloatRegister,
+    loadStoreA_predicated(masm, true, $tmp$$FloatRegister,
                           ptrue, T_BOOLEAN, bt, $mem->opcode(),
                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
   %}
@@ -3497,7 +3497,7 @@ instruct storeVMasked_vstoremask_masked(vmemA mem, pReg src, pRegGov pg, immI_gt
     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
     __ sve_cpy($tmp$$FloatRegister, size, $src$$PRegister, 1, false);
     __ sve_gen_mask_imm($pgtmp$$PRegister, bt, Matcher::vector_length(this, $src));
-    loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, $tmp$$FloatRegister,
+    loadStoreA_predicated(masm, true, $tmp$$FloatRegister,
                           $pgtmp$$PRegister, T_BOOLEAN, bt, $mem->opcode(),
                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
   %}
diff --git a/src/hotspot/cpu/aarch64/ad_encode.m4 b/src/hotspot/cpu/aarch64/ad_encode.m4
index 4897998d8709e..008dbd2c9369c 100644
--- a/src/hotspot/cpu/aarch64/ad_encode.m4
+++ b/src/hotspot/cpu/aarch64/ad_encode.m4
@@ -29,7 +29,7 @@ define(choose, `loadStore($1, &MacroAssembler::$3, $2, $4,
   %}')dnl
 define(access, `
     $3Register $1_reg = as_$3Register($$1$$reg);
-    $4choose(C2_MacroAssembler(&cbuf), $1_reg,$2,$mem->opcode(),
+    $4choose(masm, $1_reg,$2,$mem->opcode(),
         as_Register($mem$$base),$mem$$index,$mem$$scale,$mem$$disp,$5)')dnl
 define(load,`
   // This encoding class is generated automatically from ad_encode.m4.
@@ -59,8 +59,7 @@ define(STORE0,`
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_$2`'0(memory$4 mem) %{
-    C2_MacroAssembler _masm(&cbuf);
-    choose(_masm,zr,$2,$mem->opcode(),
+    choose(masm,zr,$2,$mem->opcode(),
         as_$3Register($mem$$base),$mem$$index,$mem$$scale,$mem$$disp,$4)')dnl
 STORE(iRegI,strb,,,1)
 STORE0(iRegI,strb,,1)
@@ -72,7 +71,6 @@ STORE(iRegL,str,,
 `// we sometimes get asked to store the stack pointer into the
     // current thread -- we cannot do that directly on AArch64
     if (src_reg == r31_sp) {
-      C2_MacroAssembler _masm(&cbuf);
       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
       __ mov(rscratch2, sp);
       src_reg = rscratch2;
@@ -85,8 +83,7 @@ STORE(vRegD,strd,Float,,8)
   // This encoding class is generated automatically from ad_encode.m4.
   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
   enc_class aarch64_enc_strb0_ordered(memory4 mem) %{
-      C2_MacroAssembler _masm(&cbuf);
       __ membar(Assembler::StoreStore);
-      loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
+      loadStore(masm, &MacroAssembler::strb, zr, $mem->opcode(),
                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
   %}
diff --git a/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp b/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp
index 23c08f11d1a8b..03ee729b7679a 100644
--- a/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp
@@ -34,10 +34,10 @@
 
 // ----------------------------------------------------------------------------
 
-#define __ _masm.
-address CompiledDirectCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
-  precond(cbuf.stubs()->start() != badAddress);
-  precond(cbuf.stubs()->end() != badAddress);
+#define __ masm->
+address CompiledDirectCall::emit_to_interp_stub(MacroAssembler *masm, address mark) {
+  precond(__ code()->stubs()->start() != badAddress);
+  precond(__ code()->stubs()->end() != badAddress);
 
   // Stub is fixed up when the corresponding call is converted from
   // calling compiled code to calling interpreted code.
@@ -45,13 +45,9 @@ address CompiledDirectCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark)
   // jmp -4 # to self
 
   if (mark == nullptr) {
-    mark = cbuf.insts_mark();  // Get mark within main instrs section.
+    mark = __ inst_mark();  // Get mark within main instrs section.
   }
 
-  // Note that the code buffer's insts_mark is always relative to insts.
-  // That's why we must use the macroassembler to generate a stub.
-  MacroAssembler _masm(&cbuf);
-
   address base = __ start_a_stub(to_interp_stub_size());
   int offset = __ offset();
   if (base == nullptr) {
diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad
index 0572e7d8d11cd..d5dcf7f953448 100644
--- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad
+++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad
@@ -29,20 +29,18 @@ source_hpp %{
 
 encode %{
   enc_class aarch64_enc_cmpxchg_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, iRegINoSp res) %{
-    MacroAssembler _masm(&cbuf);
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     Register tmp = $tmp$$Register;
     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
                                                    /*acquire*/ false, /*release*/ true, /*is_cae*/ false, $res$$Register);
   %}
 
   enc_class aarch64_enc_cmpxchg_acq_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, iRegINoSp res) %{
-    MacroAssembler _masm(&cbuf);
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     Register tmp = $tmp$$Register;
     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
                                                    /*acquire*/ true, /*release*/ true, /*is_cae*/ false, $res$$Register);
   %}
 %}
@@ -77,7 +75,7 @@ instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, i
   ins_encode %{
     Register tmp = $tmp$$Register;
     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, /*acquire*/ false, /*release*/ true, /*is_cae*/ false, $res$$Register);
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register, /*acquire*/ false, /*release*/ true, /*is_cae*/ false, $res$$Register);
   %}
 
   ins_pipe(pipe_slow);
@@ -115,7 +113,7 @@ instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval
   ins_encode %{
     Register tmp = $tmp$$Register;
     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, /*acquire*/ true, /*release*/ true, /*is_cae*/ false, $res$$Register);
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register, /*acquire*/ true, /*release*/ true, /*is_cae*/ false, $res$$Register);
   %}
 
   ins_pipe(pipe_slow);
@@ -131,7 +129,7 @@ instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldva
   ins_encode %{
     Register tmp = $tmp$$Register;
     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
                                                    /*acquire*/ false, /*release*/ true, /*is_cae*/ true, $res$$Register);
   %}
   ins_pipe(pipe_slow);
@@ -147,7 +145,7 @@ instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldva
   ins_encode %{
     Register tmp = $tmp$$Register;
     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
                                                    /*acquire*/ false, /*release*/ true, /*is_cae*/ true, $res$$Register);
   %}
   ins_pipe(pipe_slow);
@@ -164,7 +162,7 @@ instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN ol
   ins_encode %{
     Register tmp = $tmp$$Register;
     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
                                                    /*acquire*/ true, /*release*/ true, /*is_cae*/ true, $res$$Register);
   %}
   ins_pipe(pipe_slow);
@@ -181,7 +179,7 @@ instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP ol
   ins_encode %{
     Register tmp = $tmp$$Register;
     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
                                                    /*acquire*/ true, /*release*/ true, /*is_cae*/ true, $res$$Register);
   %}
   ins_pipe(pipe_slow);
@@ -199,7 +197,7 @@ instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldva
     Register tmp = $tmp$$Register;
     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
     // Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
                                                    /*acquire*/ false, /*release*/ true, /*is_cae*/ false, $res$$Register);
   %}
   ins_pipe(pipe_slow);
@@ -216,7 +214,7 @@ instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldva
     Register tmp = $tmp$$Register;
     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
     // Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
                                                    /*acquire*/ false, /*release*/ true, /*is_cae*/ false, $res$$Register);
   %}
   ins_pipe(pipe_slow);
@@ -235,7 +233,7 @@ instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN ol
     Register tmp = $tmp$$Register;
     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
     // Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
                                                    /*acquire*/ true, /*release*/ true, /*is_cae*/ false, $res$$Register);
   %}
   ins_pipe(pipe_slow);
@@ -254,7 +252,7 @@ instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP ol
     Register tmp = $tmp$$Register;
     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
     // Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
                                                    /*acquire*/ true, /*release*/ true, /*is_cae*/ false, $res$$Register);
   %}
   ins_pipe(pipe_slow);
diff --git a/src/hotspot/cpu/aarch64/gc/x/x_aarch64.ad b/src/hotspot/cpu/aarch64/gc/x/x_aarch64.ad
index a8ef3ce9f13d6..c7c7165affb57 100644
--- a/src/hotspot/cpu/aarch64/gc/x/x_aarch64.ad
+++ b/src/hotspot/cpu/aarch64/gc/x/x_aarch64.ad
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -31,7 +31,7 @@ source_hpp %{
 
 source %{
 
-static void x_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, uint8_t barrier_data) {
+static void x_load_barrier(MacroAssembler* masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, uint8_t barrier_data) {
   if (barrier_data == XLoadBarrierElided) {
     return;
   }
@@ -42,7 +42,7 @@ static void x_load_barrier(MacroAssembler& _masm, const MachNode* node, Address
   __ bind(*stub->continuation());
 }
 
-static void x_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
+static void x_load_barrier_slow_path(MacroAssembler* masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
   XLoadBarrierStubC2* const stub = XLoadBarrierStubC2::create(node, ref_addr, ref, tmp, XLoadBarrierStrong);
   __ b(*stub->entry());
   __ bind(*stub->continuation());
@@ -64,7 +64,7 @@ instruct xLoadP(iRegPNoSp dst, memory8 mem, rFlagsReg cr)
   ins_encode %{
     const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
     __ ldr($dst$$Register, ref_addr);
-    x_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, barrier_data());
+    x_load_barrier(masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, barrier_data());
   %}
 
   ins_pipe(iload_reg_mem);
@@ -83,7 +83,7 @@ instruct xLoadPVolatile(iRegPNoSp dst, indirect mem /* sync_memory */, rFlagsReg
 
   ins_encode %{
     __ ldar($dst$$Register, $mem$$Register);
-    x_load_barrier(_masm, this, Address($mem$$Register), $dst$$Register, rscratch2 /* tmp */, barrier_data());
+    x_load_barrier(masm, this, Address($mem$$Register), $dst$$Register, rscratch2 /* tmp */, barrier_data());
   %}
 
   ins_pipe(pipe_serial);
@@ -110,7 +110,7 @@ instruct xCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newva
       __ ldr(rscratch1, Address(rthread, XThreadLocalData::address_bad_mask_offset()));
       __ andr(rscratch1, rscratch1, rscratch2);
       __ cbz(rscratch1, good);
-      x_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */);
+      x_load_barrier_slow_path(masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */);
       __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
                  false /* acquire */, true /* release */, false /* weak */, rscratch2);
       __ cset($res$$Register, Assembler::EQ);
@@ -142,7 +142,7 @@ instruct xCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP ne
       __ ldr(rscratch1, Address(rthread, XThreadLocalData::address_bad_mask_offset()));
       __ andr(rscratch1, rscratch1, rscratch2);
       __ cbz(rscratch1, good);
-      x_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */ );
+      x_load_barrier_slow_path(masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */ );
       __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
                  true /* acquire */, true /* release */, false /* weak */, rscratch2);
       __ cset($res$$Register, Assembler::EQ);
@@ -171,7 +171,7 @@ instruct xCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP n
       __ ldr(rscratch1, Address(rthread, XThreadLocalData::address_bad_mask_offset()));
       __ andr(rscratch1, rscratch1, $res$$Register);
       __ cbz(rscratch1, good);
-      x_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */);
+      x_load_barrier_slow_path(masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */);
       __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
                  false /* acquire */, true /* release */, false /* weak */, $res$$Register);
       __ bind(good);
@@ -199,7 +199,7 @@ instruct xCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iReg
       __ ldr(rscratch1, Address(rthread, XThreadLocalData::address_bad_mask_offset()));
       __ andr(rscratch1, rscratch1, $res$$Register);
       __ cbz(rscratch1, good);
-      x_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */);
+      x_load_barrier_slow_path(masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */);
       __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
                  true /* acquire */, true /* release */, false /* weak */, $res$$Register);
       __ bind(good);
@@ -220,7 +220,7 @@ instruct xGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
 
   ins_encode %{
     __ atomic_xchg($prev$$Register, $newv$$Register, $mem$$Register);
-    x_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, barrier_data());
+    x_load_barrier(masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, barrier_data());
   %}
 
   ins_pipe(pipe_serial);
@@ -237,7 +237,7 @@ instruct xGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr)
 
   ins_encode %{
     __ atomic_xchgal($prev$$Register, $newv$$Register, $mem$$Register);
-    x_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, barrier_data());
+    x_load_barrier(masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, barrier_data());
   %}
   ins_pipe(pipe_serial);
 %}
diff --git a/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad b/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad
index 23564a3f23c38..92181e2b6b908 100644
--- a/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad
+++ b/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad
@@ -33,40 +33,40 @@ source %{
 
 #include "gc/z/zBarrierSetAssembler.hpp"
 
-static void z_color(MacroAssembler& _masm, const MachNode* node, Register dst, Register src) {
+static void z_color(MacroAssembler* masm, const MachNode* node, Register dst, Register src) {
   assert_different_registers(src, dst);
   __ relocate(barrier_Relocation::spec(), ZBarrierRelocationFormatStoreGoodBeforeMov);
   __ movzw(dst, barrier_Relocation::unpatched);
   __ orr(dst, dst, src, Assembler::LSL, ZPointerLoadShift);
 }
 
-static void z_uncolor(MacroAssembler& _masm, const MachNode* node, Register ref) {
+static void z_uncolor(MacroAssembler* masm, const MachNode* node, Register ref) {
   __ lsr(ref, ref, ZPointerLoadShift);
 }
 
-static void z_keep_alive_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
+static void z_keep_alive_load_barrier(MacroAssembler* masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
   __ relocate(barrier_Relocation::spec(), ZBarrierRelocationFormatMarkBadBeforeMov);
   __ movzw(tmp, barrier_Relocation::unpatched);
   __ tst(ref, tmp);
   ZLoadBarrierStubC2Aarch64* const stub = ZLoadBarrierStubC2Aarch64::create(node, ref_addr, ref);
   __ br(Assembler::NE, *stub->entry());
-  z_uncolor(_masm, node, ref);
+  z_uncolor(masm, node, ref);
   __ bind(*stub->continuation());
 }
 
-static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
-  Assembler::InlineSkippedInstructionsCounter skipped_counter(&_masm);
+static void z_load_barrier(MacroAssembler* masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
+  Assembler::InlineSkippedInstructionsCounter skipped_counter(masm);
   const bool on_non_strong =
       ((node->barrier_data() & ZBarrierWeak) != 0) ||
       ((node->barrier_data() & ZBarrierPhantom) != 0);
 
   if (on_non_strong) {
-    z_keep_alive_load_barrier(_masm, node, ref_addr, ref, tmp);
+    z_keep_alive_load_barrier(masm, node, ref_addr, ref, tmp);
     return;
   }
 
   if (node->barrier_data() == ZBarrierElided) {
-    z_uncolor(_masm, node, ref);
+    z_uncolor(masm, node, ref);
     return;
   }
 
@@ -81,19 +81,19 @@ static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address
     __ b(*stub->entry());
     __ bind(good);
   }
-  z_uncolor(_masm, node, ref);
+  z_uncolor(masm, node, ref);
   __ bind(*stub->continuation());
 }
 
-static void z_store_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register rnew_zaddress, Register rnew_zpointer, Register tmp, bool is_atomic) {
-  Assembler::InlineSkippedInstructionsCounter skipped_counter(&_masm);
+static void z_store_barrier(MacroAssembler* masm, const MachNode* node, Address ref_addr, Register rnew_zaddress, Register rnew_zpointer, Register tmp, bool is_atomic) {
+  Assembler::InlineSkippedInstructionsCounter skipped_counter(masm);
   if (node->barrier_data() == ZBarrierElided) {
-    z_color(_masm, node, rnew_zpointer, rnew_zaddress);
+    z_color(masm, node, rnew_zpointer, rnew_zaddress);
   } else {
     bool is_native = (node->barrier_data() & ZBarrierNative) != 0;
     ZStoreBarrierStubC2Aarch64* const stub = ZStoreBarrierStubC2Aarch64::create(node, ref_addr, rnew_zaddress, rnew_zpointer, is_native, is_atomic);
     ZBarrierSetAssembler* bs_asm = ZBarrierSet::assembler();
-    bs_asm->store_barrier_fast(&_masm, ref_addr, rnew_zaddress, rnew_zpointer, tmp, true /* in_nmethod */, is_atomic, *stub->entry(), *stub->continuation());
+    bs_asm->store_barrier_fast(masm, ref_addr, rnew_zaddress, rnew_zpointer, tmp, true /* in_nmethod */, is_atomic, *stub->entry(), *stub->continuation());
   }
 }
 
@@ -113,7 +113,7 @@ instruct zLoadP(iRegPNoSp dst, memory8 mem, rFlagsReg cr)
   ins_encode %{
     const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
     __ ldr($dst$$Register, ref_addr);
-    z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch1);
+    z_load_barrier(masm, this, ref_addr, $dst$$Register, rscratch1);
   %}
 
   ins_pipe(iload_reg_mem);
@@ -133,7 +133,7 @@ instruct zLoadPVolatile(iRegPNoSp dst, indirect mem /* sync_memory */, rFlagsReg
   ins_encode %{
     const Address ref_addr = Address($mem$$Register);
     __ ldar($dst$$Register, $mem$$Register);
-    z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch1);
+    z_load_barrier(masm, this, ref_addr, $dst$$Register, rscratch1);
   %}
 
   ins_pipe(pipe_serial);
@@ -150,7 +150,7 @@ instruct zStoreP(memory mem, iRegP src, iRegPNoSp tmp, rFlagsReg cr)
   format %{ "movq    $mem, $src\t# ptr" %}
   ins_encode %{
     const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
-    z_store_barrier(_masm, this, ref_addr, $src$$Register, $tmp$$Register, rscratch2, false /* is_atomic */);
+    z_store_barrier(masm, this, ref_addr, $src$$Register, $tmp$$Register, rscratch2, false /* is_atomic */);
     __ str($tmp$$Register, ref_addr);
   %}
   ins_pipe(pipe_serial);
@@ -167,7 +167,7 @@ instruct zStorePVolatile(indirect mem, iRegP src, iRegPNoSp tmp, rFlagsReg cr)
   format %{ "movq    $mem, $src\t# ptr" %}
   ins_encode %{
     const Address ref_addr = Address($mem$$Register);
-    z_store_barrier(_masm, this, ref_addr, $src$$Register, $tmp$$Register, rscratch2, false /* is_atomic */);
+    z_store_barrier(masm, this, ref_addr, $src$$Register, $tmp$$Register, rscratch2, false /* is_atomic */);
     __ stlr($tmp$$Register, $mem$$Register);
   %}
   ins_pipe(pipe_serial);
@@ -187,8 +187,8 @@ instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newva
   ins_encode %{
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     Address ref_addr($mem$$Register);
-    z_store_barrier(_masm, this, ref_addr, $newval$$Register, $newval_tmp$$Register, rscratch2, true /* is_atomic */);
-    z_color(_masm, this, $oldval_tmp$$Register, $oldval$$Register);
+    z_store_barrier(masm, this, ref_addr, $newval$$Register, $newval_tmp$$Register, rscratch2, true /* is_atomic */);
+    z_color(masm, this, $oldval_tmp$$Register, $oldval$$Register);
     __ cmpxchg($mem$$Register, $oldval_tmp$$Register, $newval_tmp$$Register, Assembler::xword,
                false /* acquire */, true /* release */, false /* weak */, noreg);
     __ cset($res$$Register, Assembler::EQ);
@@ -211,8 +211,8 @@ instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP ne
   ins_encode %{
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     Address ref_addr($mem$$Register);
-    z_store_barrier(_masm, this, ref_addr, $newval$$Register, $newval_tmp$$Register, rscratch2, true /* is_atomic */);
-    z_color(_masm, this, $oldval_tmp$$Register, $oldval$$Register);
+    z_store_barrier(masm, this, ref_addr, $newval$$Register, $newval_tmp$$Register, rscratch2, true /* is_atomic */);
+    z_color(masm, this, $oldval_tmp$$Register, $oldval$$Register);
     __ cmpxchg($mem$$Register, $oldval_tmp$$Register, $newval_tmp$$Register, Assembler::xword,
                true /* acquire */, true /* release */, false /* weak */, noreg);
     __ cset($res$$Register, Assembler::EQ);
@@ -235,11 +235,11 @@ instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP n
   ins_encode %{
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     Address ref_addr($mem$$Register);
-    z_store_barrier(_masm, this, ref_addr, $newval$$Register, $newval_tmp$$Register, rscratch2, true /* is_atomic */);
-    z_color(_masm, this, $oldval_tmp$$Register, $oldval$$Register);
+    z_store_barrier(masm, this, ref_addr, $newval$$Register, $newval_tmp$$Register, rscratch2, true /* is_atomic */);
+    z_color(masm, this, $oldval_tmp$$Register, $oldval$$Register);
     __ cmpxchg($mem$$Register, $oldval_tmp$$Register, $newval_tmp$$Register, Assembler::xword,
                false /* acquire */, true /* release */, false /* weak */, $res$$Register);
-    z_uncolor(_masm, this, $res$$Register);
+    z_uncolor(masm, this, $res$$Register);
   %}
 
   ins_pipe(pipe_slow);
@@ -258,11 +258,11 @@ instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iReg
   ins_encode %{
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     Address ref_addr($mem$$Register);
-    z_store_barrier(_masm, this, ref_addr, $newval$$Register, $newval_tmp$$Register, rscratch2, true /* is_atomic */);
-    z_color(_masm, this, $oldval_tmp$$Register, $oldval$$Register);
+    z_store_barrier(masm, this, ref_addr, $newval$$Register, $newval_tmp$$Register, rscratch2, true /* is_atomic */);
+    z_color(masm, this, $oldval_tmp$$Register, $oldval$$Register);
     __ cmpxchg($mem$$Register, $oldval_tmp$$Register, $newval_tmp$$Register, Assembler::xword,
                true /* acquire */, true /* release */, false /* weak */, $res$$Register);
-    z_uncolor(_masm, this, $res$$Register);
+    z_uncolor(masm, this, $res$$Register);
   %}
 
   ins_pipe(pipe_slow);
@@ -278,9 +278,9 @@ instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
 
   ins_encode %{
-    z_store_barrier(_masm, this, Address($mem$$Register), $newv$$Register, $prev$$Register, rscratch2, true /* is_atomic */);
+    z_store_barrier(masm, this, Address($mem$$Register), $newv$$Register, $prev$$Register, rscratch2, true /* is_atomic */);
     __ atomic_xchg($prev$$Register, $prev$$Register, $mem$$Register);
-    z_uncolor(_masm, this, $prev$$Register);
+    z_uncolor(masm, this, $prev$$Register);
   %}
 
   ins_pipe(pipe_serial);
@@ -296,9 +296,9 @@ instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr)
   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
 
   ins_encode %{
-    z_store_barrier(_masm, this, Address($mem$$Register), $newv$$Register, $prev$$Register, rscratch2, true /* is_atomic */);
+    z_store_barrier(masm, this, Address($mem$$Register), $newv$$Register, $prev$$Register, rscratch2, true /* is_atomic */);
     __ atomic_xchgal($prev$$Register, $prev$$Register, $mem$$Register);
-    z_uncolor(_masm, this, $prev$$Register);
+    z_uncolor(masm, this, $prev$$Register);
   %}
 
   ins_pipe(pipe_serial);
diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
index 97a10afde7ab2..b200fb4c4b0e0 100644
--- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
@@ -1108,8 +1108,7 @@ static void gen_continuation_enter(MacroAssembler* masm,
 
     __ b(exit);
 
-    CodeBuffer* cbuf = masm->code_section()->outer();
-    address stub = CompiledDirectCall::emit_to_interp_stub(*cbuf, tr_call);
+    address stub = CompiledDirectCall::emit_to_interp_stub(masm, tr_call);
     if (stub == nullptr) {
       fatal("CodeCache is full at gen_continuation_enter");
     }
@@ -1173,8 +1172,7 @@ static void gen_continuation_enter(MacroAssembler* masm,
       __ br(r1); // the exception handler
   }
 
-  CodeBuffer* cbuf = masm->code_section()->outer();
-  address stub = CompiledDirectCall::emit_to_interp_stub(*cbuf, tr_call);
+  address stub = CompiledDirectCall::emit_to_interp_stub(masm, tr_call);
   if (stub == nullptr) {
     fatal("CodeCache is full at gen_continuation_enter");
   }
diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad
index 07c5b9532542c..638c48ad5aa31 100644
--- a/src/hotspot/cpu/arm/arm.ad
+++ b/src/hotspot/cpu/arm/arm.ad
@@ -105,8 +105,8 @@ class HandlerImpl {
 
  public:
 
-  static int emit_exception_handler(CodeBuffer &cbuf);
-  static int emit_deopt_handler(CodeBuffer& cbuf);
+  static int emit_exception_handler(C2_MacroAssembler *masm);
+  static int emit_deopt_handler(C2_MacroAssembler* masm);
 
   static uint size_exception_handler() {
     return ( 3 * 4 );
@@ -138,7 +138,7 @@ bool assert_not_var_shift(const Node *n) {
   return true;
 }
 
-#define __ _masm.
+#define __ masm->
 
 static FloatRegister reg_to_FloatRegister_object(int register_encoding);
 static Register reg_to_register_object(int register_encoding);
@@ -159,8 +159,7 @@ int MachNode::compute_padding(int current_offset) const {
 // REQUIRED FUNCTIONALITY
 
 // emit an interrupt that is caught by the debugger (for debugging compiler)
-void emit_break(CodeBuffer &cbuf) {
-  C2_MacroAssembler _masm(&cbuf);
+void emit_break(C2_MacroAssembler *masm) {
   __ breakpoint();
 }
 
@@ -170,8 +169,8 @@ void MachBreakpointNode::format( PhaseRegAlloc *, outputStream *st ) const {
 }
 #endif
 
-void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  emit_break(cbuf);
+void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
+  emit_break(masm);
 }
 
 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
@@ -179,16 +178,14 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
 }
 
 
-void emit_nop(CodeBuffer &cbuf) {
-  C2_MacroAssembler _masm(&cbuf);
+void emit_nop(C2_MacroAssembler *masm) {
   __ nop();
 }
 
 
-void emit_call_reloc(CodeBuffer &cbuf, const MachCallNode *n, MachOper *m, RelocationHolder const& rspec) {
+void emit_call_reloc(C2_MacroAssembler *masm, const MachCallNode *n, MachOper *m, RelocationHolder const& rspec) {
   int ret_addr_offset0 = n->as_MachCall()->ret_addr_offset();
-  int call_site_offset = cbuf.insts()->mark_off();
-  C2_MacroAssembler _masm(&cbuf);
+  int call_site_offset = __ code()->insts()->mark_off();
   __ set_inst_mark(); // needed in emit_to_interp_stub() to locate the call
   address target = (address)m->method();
   assert(n->as_MachCall()->entry_point() == target, "sanity");
@@ -210,8 +207,8 @@ void emit_call_reloc(CodeBuffer &cbuf, const MachCallNode *n, MachOper *m, Reloc
 
 //=============================================================================
 // REQUIRED FUNCTIONALITY for encoding
-void emit_lo(CodeBuffer &cbuf, int val) {  }
-void emit_hi(CodeBuffer &cbuf, int val) {  }
+void emit_lo(C2_MacroAssembler *masm, int val) {  }
+void emit_hi(C2_MacroAssembler *masm, int val) {  }
 
 
 //=============================================================================
@@ -232,10 +229,9 @@ void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, Phase
   ShouldNotReachHere();
 }
 
-void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
+void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
   Compile* C = ra_->C;
   ConstantTable& constant_table = C->output()->constant_table();
-  C2_MacroAssembler _masm(&cbuf);
 
   Register r = as_Register(ra_->get_encode(this));
   CodeSection* consts_section = __ code()->consts();
@@ -303,9 +299,8 @@ void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
 }
 #endif
 
-void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
   Compile* C = ra_->C;
-  C2_MacroAssembler _masm(&cbuf);
 
   for (int i = 0; i < OptoPrologueNops; i++) {
     __ nop();
@@ -334,11 +329,11 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 
   if (C->stub_function() == nullptr) {
     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
-    bs->nmethod_entry_barrier(&_masm);
+    bs->nmethod_entry_barrier(masm);
   }
 
   // offset from scratch buffer is not valid
-  if (strcmp(cbuf.name(), "Compile::Fill_buffer") == 0) {
+  if (strcmp(__ code()->name(), "Compile::Fill_buffer") == 0) {
     C->output()->set_frame_complete( __ offset() );
   }
 
@@ -379,8 +374,7 @@ void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
 }
 #endif
 
-void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  C2_MacroAssembler _masm(&cbuf);
+void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
   Compile* C = ra_->C;
 
   size_t framesize = C->output()->frame_size_in_bytes();
@@ -430,7 +424,7 @@ static inline bool is_iRegLd_memhd(OptoReg::Name src_first, OptoReg::Name src_se
   return (rlo&1)==0 && (rlo+1 == rhi) && is_memoryHD(offset);
 }
 
-uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
+uint MachSpillCopyNode::implementation( C2_MacroAssembler *masm,
                                         PhaseRegAlloc *ra_,
                                         bool do_size,
                                         outputStream* st ) const {
@@ -463,14 +457,12 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
   // Bailout only for real instruction emit.
   // This requires a single comment change in shared code. ( see output.cpp "Normal" instruction case )
 
-  C2_MacroAssembler _masm(cbuf);
-
   // --------------------------------------
   // Check for mem-mem move.  Load into unused float registers and fall into
   // the float-store case.
   if (src_first_rc == rc_stack && dst_first_rc == rc_stack) {
     int offset = ra_->reg2offset(src_first);
-    if (cbuf && !is_memoryfp(offset)) {
+    if (masm && !is_memoryfp(offset)) {
       ra_->C->record_method_not_compilable("unable to handle large constant offsets");
       return 0;
     } else {
@@ -480,7 +472,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
         src_second    = OptoReg::Name(R_mem_copy_hi_num);
         src_first_rc  = rc_float;
         src_second_rc = rc_float;
-        if (cbuf) {
+        if (masm) {
           __ ldr_double(Rmemcopy, Address(SP, offset));
         } else if (!do_size) {
           st->print(LDR_DOUBLE "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset);
@@ -488,7 +480,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
       } else {
         src_first     = OptoReg::Name(R_mem_copy_lo_num);
         src_first_rc  = rc_float;
-        if (cbuf) {
+        if (masm) {
           __ ldr_float(Rmemcopy, Address(SP, offset));
         } else if (!do_size) {
           st->print(LDR_FLOAT "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset);
@@ -507,7 +499,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
   if (src_first_rc == rc_int && dst_first_rc == rc_int) {
     // Else normal reg-reg copy
     assert( src_second != dst_first, "smashed second before evacuating it" );
-    if (cbuf) {
+    if (masm) {
       __ mov(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
     } else if (!do_size) {
@@ -522,13 +514,13 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
   // Check for integer store
   if (src_first_rc == rc_int && dst_first_rc == rc_stack) {
     int offset = ra_->reg2offset(dst_first);
-    if (cbuf && !is_memoryI(offset)) {
+    if (masm && !is_memoryI(offset)) {
       ra_->C->record_method_not_compilable("unable to handle large constant offsets");
       return 0;
     } else {
       if (src_second_rc != rc_bad && is_iRegLd_memhd(src_first, src_second, offset)) {
         assert((src_first&1)==0 && src_first+1 == src_second, "pair of registers must be aligned/contiguous");
-        if (cbuf) {
+        if (masm) {
           __ str_64(reg_to_register_object(Matcher::_regEncode[src_first]), Address(SP, offset));
 #ifndef PRODUCT
         } else if (!do_size) {
@@ -538,7 +530,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
         }
         return size + 4;
       } else {
-        if (cbuf) {
+        if (masm) {
           __ str_32(reg_to_register_object(Matcher::_regEncode[src_first]), Address(SP, offset));
 #ifndef PRODUCT
         } else if (!do_size) {
@@ -554,13 +546,13 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
   // Check for integer load
   if (dst_first_rc == rc_int && src_first_rc == rc_stack) {
     int offset = ra_->reg2offset(src_first);
-    if (cbuf && !is_memoryI(offset)) {
+    if (masm && !is_memoryI(offset)) {
       ra_->C->record_method_not_compilable("unable to handle large constant offsets");
       return 0;
     } else {
       if (src_second_rc != rc_bad && is_iRegLd_memhd(dst_first, dst_second, offset)) {
         assert((src_first&1)==0 && src_first+1 == src_second, "pair of registers must be aligned/contiguous");
-        if (cbuf) {
+        if (masm) {
           __ ldr_64(reg_to_register_object(Matcher::_regEncode[dst_first]), Address(SP, offset));
 #ifndef PRODUCT
         } else if (!do_size) {
@@ -570,7 +562,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
         }
         return size + 4;
       } else {
-        if (cbuf) {
+        if (masm) {
           __ ldr_32(reg_to_register_object(Matcher::_regEncode[dst_first]), Address(SP, offset));
 #ifndef PRODUCT
         } else if (!do_size) {
@@ -587,7 +579,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
   if (src_first_rc == rc_float && dst_first_rc == rc_float) {
     if (src_second_rc != rc_bad) {
       assert((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers must be aligned/contiguous");
-      if (cbuf) {
+      if (masm) {
       __ mov_double(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
       } else if (!do_size) {
@@ -598,7 +590,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
       }
       return 4;
     }
-    if (cbuf) {
+    if (masm) {
       __ mov_float(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
     } else if (!do_size) {
@@ -613,14 +605,14 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
   // Check for float store
   if (src_first_rc == rc_float && dst_first_rc == rc_stack) {
     int offset = ra_->reg2offset(dst_first);
-    if (cbuf && !is_memoryfp(offset)) {
+    if (masm && !is_memoryfp(offset)) {
       ra_->C->record_method_not_compilable("unable to handle large constant offsets");
       return 0;
     } else {
       // Further check for aligned-adjacent pair, so we can use a double store
       if (src_second_rc != rc_bad) {
         assert((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers and stack slots must be aligned/contiguous");
-        if (cbuf) {
+        if (masm) {
           __ str_double(reg_to_FloatRegister_object(Matcher::_regEncode[src_first]), Address(SP, offset));
 #ifndef PRODUCT
         } else if (!do_size) {
@@ -630,7 +622,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
         }
         return size + 4;
       } else {
-        if (cbuf) {
+        if (masm) {
           __ str_float(reg_to_FloatRegister_object(Matcher::_regEncode[src_first]), Address(SP, offset));
 #ifndef PRODUCT
         } else if (!do_size) {
@@ -646,14 +638,14 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
   // Check for float load
   if (dst_first_rc == rc_float && src_first_rc == rc_stack) {
     int offset = ra_->reg2offset(src_first);
-    if (cbuf && !is_memoryfp(offset)) {
+    if (masm && !is_memoryfp(offset)) {
       ra_->C->record_method_not_compilable("unable to handle large constant offsets");
       return 0;
     } else {
       // Further check for aligned-adjacent pair, so we can use a double store
       if (src_second_rc != rc_bad) {
         assert((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers and stack slots must be aligned/contiguous");
-        if (cbuf) {
+        if (masm) {
           __ ldr_double(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), Address(SP, offset));
 #ifndef PRODUCT
         } else if (!do_size) {
@@ -663,7 +655,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
         }
         return size + 4;
       } else {
-        if (cbuf) {
+        if (masm) {
           __ ldr_float(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), Address(SP, offset));
 #ifndef PRODUCT
         } else if (!do_size) {
@@ -683,7 +675,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
       assert((dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers must be aligned/contiguous");
       assert((src_first&1)==0 && src_first+1 == src_second, "pairs of registers must be aligned/contiguous");
       assert(src_second_rc == rc_int && dst_second_rc == rc_float, "unsupported");
-      if (cbuf) {
+      if (masm) {
         __ fmdrr(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first]), reg_to_register_object(Matcher::_regEncode[src_second]));
 #ifndef PRODUCT
       } else if (!do_size) {
@@ -693,7 +685,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
       }
       return size + 4;
     } else {
-      if (cbuf) {
+      if (masm) {
         __ fmsr(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
       } else if (!do_size) {
@@ -712,7 +704,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
       assert((src_first&1)==0 && src_first+1 == src_second, "pairs of registers must be aligned/contiguous");
       assert((dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers must be aligned/contiguous");
       assert(src_second_rc == rc_float && dst_second_rc == rc_int, "unsupported");
-      if (cbuf) {
+      if (masm) {
         __ fmrrd(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[dst_second]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
       } else if (!do_size) {
@@ -722,7 +714,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
       }
       return size + 4;
     } else {
-      if (cbuf) {
+      if (masm) {
         __ fmrs(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
       } else if (!do_size) {
@@ -746,7 +738,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
   // register (else it's a hi-bits-to-hi-bits copy which should have
   // happened already as part of a 64-bit move)
   if (src_second_rc == rc_int && dst_second_rc == rc_int) {
-    if (cbuf) {
+    if (masm) {
       __ mov(reg_to_register_object(Matcher::_regEncode[dst_second]), reg_to_register_object(Matcher::_regEncode[src_second]));
 #ifndef PRODUCT
     } else if (!do_size) {
@@ -763,11 +755,11 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
   if (src_second_rc == rc_int && dst_second_rc == rc_stack) {
     int offset = ra_->reg2offset(dst_second);
 
-    if (cbuf && !is_memoryP(offset)) {
+    if (masm && !is_memoryP(offset)) {
       ra_->C->record_method_not_compilable("unable to handle large constant offsets");
       return 0;
     } else {
-      if (cbuf) {
+      if (masm) {
         __ str(reg_to_register_object(Matcher::_regEncode[src_second]), Address(SP, offset));
 #ifndef PRODUCT
       } else if (!do_size) {
@@ -782,11 +774,11 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
   // Check for high word integer load
   if (dst_second_rc == rc_int && src_second_rc == rc_stack) {
     int offset = ra_->reg2offset(src_second);
-    if (cbuf && !is_memoryP(offset)) {
+    if (masm && !is_memoryP(offset)) {
       ra_->C->record_method_not_compilable("unable to handle large constant offsets");
       return 0;
     } else {
-      if (cbuf) {
+      if (masm) {
         __ ldr(reg_to_register_object(Matcher::_regEncode[dst_second]), Address(SP, offset));
 #ifndef PRODUCT
       } else if (!do_size) {
@@ -804,12 +796,12 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
 
 #ifndef PRODUCT
 void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
-  implementation(nullptr, ra_, false, st );
+  implementation( nullptr, ra_, false, st );
 }
 #endif
 
-void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  implementation( &cbuf, ra_, false, nullptr );
+void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
+  implementation( masm, ra_, false, nullptr );
 }
 
 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
@@ -823,8 +815,7 @@ void MachNopNode::format( PhaseRegAlloc *, outputStream *st ) const {
 }
 #endif
 
-void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
-  C2_MacroAssembler _masm(&cbuf);
+void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc * ) const {
   for(int i = 0; i < _count; i += 1) {
     __ nop();
   }
@@ -844,8 +835,7 @@ void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
 }
 #endif
 
-void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  C2_MacroAssembler _masm(&cbuf);
+void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
   int reg = ra_->get_encode(this);
   Register dst = reg_to_register_object(reg);
@@ -875,8 +865,7 @@ void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
 }
 #endif
 
-void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  C2_MacroAssembler _masm(&cbuf);
+void MachUEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
   __ ic_check(InteriorEntryAlignment);
 }
 
@@ -888,9 +877,7 @@ uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 //=============================================================================
 
 // Emit exception handler code.
-int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
-  C2_MacroAssembler _masm(&cbuf);
-
+int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) {
   address base = __ start_a_stub(size_exception_handler());
   if (base == nullptr) {
     ciEnv::current()->record_failure("CodeCache is full");
@@ -909,11 +896,9 @@ int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
   return offset;
 }
 
-int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
+int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
   // Can't use any of the current frame's registers as we may have deopted
   // at a poll and everything can be live.
-  C2_MacroAssembler _masm(&cbuf);
-
   address base = __ start_a_stub(size_deopt_handler());
   if (base == nullptr) {
     ciEnv::current()->record_failure("CodeCache is full");
@@ -1208,13 +1193,25 @@ bool maybe_far_call(const MachCallNode *n) {
 // tertiary opcode.  Only the opcode sections which a particular instruction
 // needs for encoding need to be specified.
 encode %{
+  // Set instruction mark in MacroAssembler. This is used only in
+  // instructions that emit bytes directly to the CodeBuffer wraped
+  // in the MacroAssembler. Should go away once all "instruct" are
+  // patched to emit bytes only using methods in MacroAssembler.
+  enc_class SetInstMark %{
+    __ set_inst_mark();
+  %}
+
+  enc_class ClearInstMark %{
+    __ clear_inst_mark();
+  %}
+
   enc_class call_epilog %{
     // nothing
   %}
 
   enc_class Java_To_Runtime (method meth) %{
     // CALL directly to the runtime
-    emit_call_reloc(cbuf, as_MachCall(), $meth, runtime_call_Relocation::spec());
+    emit_call_reloc(masm, as_MachCall(), $meth, runtime_call_Relocation::spec());
   %}
 
   enc_class Java_Static_Call (method meth) %{
@@ -1222,15 +1219,15 @@ encode %{
     // who we intended to call.
 
     if ( !_method) {
-      emit_call_reloc(cbuf, as_MachCall(), $meth, runtime_call_Relocation::spec());
+      emit_call_reloc(masm, as_MachCall(), $meth, runtime_call_Relocation::spec());
     } else {
-      int method_index = resolved_method_index(cbuf);
+      int method_index = resolved_method_index(masm);
       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
                                                   : static_call_Relocation::spec(method_index);
-      emit_call_reloc(cbuf, as_MachCall(), $meth, rspec);
+      emit_call_reloc(masm, as_MachCall(), $meth, rspec);
 
       // Emit stubs for static call.
-      address stub = CompiledDirectCall::emit_to_interp_stub(cbuf);
+      address stub = CompiledDirectCall::emit_to_interp_stub(masm);
       if (stub == nullptr) {
         ciEnv::current()->record_failure("CodeCache is full");
         return;
@@ -1240,39 +1237,35 @@ encode %{
 
   enc_class save_last_PC %{
     // preserve mark
-    address mark = cbuf.insts()->mark();
-    debug_only(int off0 = cbuf.insts_size());
-    C2_MacroAssembler _masm(&cbuf);
+    address mark = __ inst_mark();
+    debug_only(int off0 = __ offset());
     int ret_addr_offset = as_MachCall()->ret_addr_offset();
     __ adr(LR, mark + ret_addr_offset);
     __ str(LR, Address(Rthread, JavaThread::last_Java_pc_offset()));
-    debug_only(int off1 = cbuf.insts_size());
+    debug_only(int off1 = __ offset());
     assert(off1 - off0 == 2 * Assembler::InstructionSize, "correct size prediction");
     // restore mark
-    cbuf.insts()->set_mark(mark);
+    __ set_inst_mark(mark);
   %}
 
   enc_class preserve_SP %{
     // preserve mark
-    address mark = cbuf.insts()->mark();
-    debug_only(int off0 = cbuf.insts_size());
-    C2_MacroAssembler _masm(&cbuf);
+    address mark = __ inst_mark();
+    debug_only(int off0 = __ offset());
     // FP is preserved across all calls, even compiled calls.
     // Use it to preserve SP in places where the callee might change the SP.
     __ mov(Rmh_SP_save, SP);
-    debug_only(int off1 = cbuf.insts_size());
+    debug_only(int off1 = __ offset());
     assert(off1 - off0 == 4, "correct size prediction");
     // restore mark
-    cbuf.insts()->set_mark(mark);
+    __ set_inst_mark(mark);
   %}
 
   enc_class restore_SP %{
-    C2_MacroAssembler _masm(&cbuf);
     __ mov(SP, Rmh_SP_save);
   %}
 
   enc_class Java_Dynamic_Call (method meth) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register R8_ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode());
     assert(R8_ic_reg == Ricklass, "should be");
     __ set_inst_mark();
@@ -1281,9 +1274,9 @@ encode %{
     address  virtual_call_oop_addr = __ inst_mark();
     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
     // who we intended to call.
-    int method_index = resolved_method_index(cbuf);
+    int method_index = resolved_method_index(masm);
     __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr, method_index));
-    emit_call_reloc(cbuf, as_MachCall(), $meth, RelocationHolder::none);
+    emit_call_reloc(masm, as_MachCall(), $meth, RelocationHolder::none);
   %}
 
   enc_class LdReplImmI(immI src, regD dst, iRegI tmp, int cnt, int wth) %{
@@ -1300,7 +1293,6 @@ encode %{
         val |= (val << bit_width);
       }
     }
-    C2_MacroAssembler _masm(&cbuf);
 
     if (val == -1) {
       __ mvn($tmp$$Register, 0);
@@ -1317,7 +1309,6 @@ encode %{
     // Replicate float con 2 times and pack into vector (8 bytes) in regD.
     float fval = $src$$constant;
     int val = *((int*)&fval);
-    C2_MacroAssembler _masm(&cbuf);
 
     if (val == -1) {
       __ mvn($tmp$$Register, 0);
@@ -1332,7 +1323,6 @@ encode %{
 
   enc_class enc_String_Compare(R0RegP str1, R1RegP str2, R2RegI cnt1, R3RegI cnt2, iRegI result, iRegI tmp1, iRegI tmp2) %{
     Label Ldone, Lloop;
-    C2_MacroAssembler _masm(&cbuf);
 
     Register   str1_reg = $str1$$Register;
     Register   str2_reg = $str2$$Register;
@@ -1424,7 +1414,6 @@ encode %{
 
   enc_class enc_String_Equals(R0RegP str1, R1RegP str2, R2RegI cnt, iRegI result, iRegI tmp1, iRegI tmp2) %{
     Label Lchar, Lchar_loop, Ldone, Lequal;
-    C2_MacroAssembler _masm(&cbuf);
 
     Register   str1_reg = $str1$$Register;
     Register   str2_reg = $str2$$Register;
@@ -1486,7 +1475,6 @@ encode %{
 
   enc_class enc_Array_Equals(R0RegP ary1, R1RegP ary2, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI result) %{
     Label Ldone, Lloop, Lequal;
-    C2_MacroAssembler _masm(&cbuf);
 
     Register   ary1_reg = $ary1$$Register;
     Register   ary2_reg = $ary2$$Register;
@@ -8847,7 +8835,7 @@ instruct CallStaticJavaDirect( method meth ) %{
 
   ins_cost(CALL_COST);
   format %{ "CALL,static ==> " %}
-  ins_encode( Java_Static_Call( meth ), call_epilog );
+  ins_encode( SetInstMark, Java_Static_Call( meth ), call_epilog, ClearInstMark );
   ins_pipe(simple_call);
 %}
 
@@ -8861,7 +8849,7 @@ instruct CallStaticJavaHandle( method meth ) %{
 
   ins_cost(CALL_COST);
   format %{ "CALL,static/MethodHandle ==> " %}
-  ins_encode( preserve_SP, Java_Static_Call( meth ), restore_SP, call_epilog );
+  ins_encode( SetInstMark, preserve_SP, Java_Static_Call( meth ), restore_SP, call_epilog, ClearInstMark );
   ins_pipe(simple_call);
 %}
 
@@ -8873,7 +8861,7 @@ instruct CallDynamicJavaDirect( method meth ) %{
   ins_cost(CALL_COST);
   format %{ "MOV_OOP    (empty),R_R8\n\t"
             "CALL,dynamic  ; NOP ==> " %}
-  ins_encode( Java_Dynamic_Call( meth ), call_epilog );
+  ins_encode( SetInstMark, Java_Dynamic_Call( meth ), call_epilog, ClearInstMark );
   ins_pipe(call);
 %}
 
@@ -8883,8 +8871,8 @@ instruct CallRuntimeDirect(method meth) %{
   effect(USE meth);
   ins_cost(CALL_COST);
   format %{ "CALL,runtime" %}
-  ins_encode( Java_To_Runtime( meth ),
-              call_epilog );
+  ins_encode( SetInstMark, Java_To_Runtime( meth ),
+              call_epilog, ClearInstMark );
   ins_pipe(simple_call);
 %}
 
@@ -8895,8 +8883,8 @@ instruct CallLeafDirect(method meth) %{
   ins_cost(CALL_COST);
   format %{ "CALL,runtime leaf" %}
   // TODO: need save_last_PC here?
-  ins_encode( Java_To_Runtime( meth ),
-              call_epilog );
+  ins_encode( SetInstMark, Java_To_Runtime( meth ),
+              call_epilog, ClearInstMark );
   ins_pipe(simple_call);
 %}
 
@@ -8907,8 +8895,8 @@ instruct CallLeafNoFPDirect(method meth) %{
   ins_cost(CALL_COST);
   format %{ "CALL,runtime leaf nofp" %}
   // TODO: need save_last_PC here?
-  ins_encode( Java_To_Runtime( meth ),
-              call_epilog );
+  ins_encode( SetInstMark, Java_To_Runtime( meth ),
+              call_epilog, ClearInstMark );
   ins_pipe(simple_call);
 %}
 
diff --git a/src/hotspot/cpu/arm/compiledIC_arm.cpp b/src/hotspot/cpu/arm/compiledIC_arm.cpp
index 71389d2353d66..a2c37e2907cb6 100644
--- a/src/hotspot/cpu/arm/compiledIC_arm.cpp
+++ b/src/hotspot/cpu/arm/compiledIC_arm.cpp
@@ -34,20 +34,18 @@
 
 // ----------------------------------------------------------------------------
 #if COMPILER2_OR_JVMCI
-#define __ _masm.
+#define __ masm->
 // emit call stub, compiled java to interpreter
-address CompiledDirectCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
+address CompiledDirectCall::emit_to_interp_stub(MacroAssembler *masm, address mark) {
   // Stub is fixed up when the corresponding call is converted from calling
   // compiled code to calling interpreted code.
   // set (empty), R9
   // b -1
 
   if (mark == nullptr) {
-    mark = cbuf.insts_mark();  // get mark within main instrs section
+    mark = __ inst_mark();  // get mark within main instrs section
   }
 
-  MacroAssembler _masm(&cbuf);
-
   address base = __ start_a_stub(to_interp_stub_size());
   if (base == nullptr) {
     return nullptr;  // CodeBuffer::expand failed
diff --git a/src/hotspot/cpu/ppc/compiledIC_ppc.cpp b/src/hotspot/cpu/ppc/compiledIC_ppc.cpp
index 355ac4815d551..0780837e8e5e3 100644
--- a/src/hotspot/cpu/ppc/compiledIC_ppc.cpp
+++ b/src/hotspot/cpu/ppc/compiledIC_ppc.cpp
@@ -77,18 +77,14 @@
 // Usage of r1 and r2 in the stubs allows to distinguish them.
 
 const int IC_pos_in_java_to_interp_stub = 8;
-#define __ _masm.
-address CompiledDirectCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark/* = nullptr*/) {
+#define __ masm->
+address CompiledDirectCall::emit_to_interp_stub(MacroAssembler *masm, address mark/* = nullptr*/) {
 #ifdef COMPILER2
   if (mark == nullptr) {
     // Get the mark within main instrs section which is set to the address of the call.
-    mark = cbuf.insts_mark();
+    mark = __ inst_mark();
   }
 
-  // Note that the code buffer's insts_mark is always relative to insts.
-  // That's why we must use the macroassembler to generate a stub.
-  MacroAssembler _masm(&cbuf);
-
   // Start the stub.
   address stub = __ start_a_stub(CompiledDirectCall::to_interp_stub_size());
   if (stub == nullptr) {
diff --git a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoah_ppc.ad b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoah_ppc.ad
index 4825ca9cf81cd..eb4894656e202 100644
--- a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoah_ppc.ad
+++ b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoah_ppc.ad
@@ -44,7 +44,7 @@ instruct compareAndSwapP_shenandoah(iRegIdst res, indirect mem, iRegPsrc oldval,
   format %{ "CMPXCHG $res, $mem, $oldval, $newval; as bool; ptr" %}
   ins_encode %{
     ShenandoahBarrierSet::assembler()->cmpxchg_oop(
-        &_masm,
+        masm,
         $mem$$Register, $oldval$$Register, $newval$$Register,
         $tmp1$$Register, $tmp2$$Register,
         false, $res$$Register
@@ -65,7 +65,7 @@ instruct compareAndSwapN_shenandoah(iRegIdst res, indirect mem, iRegNsrc oldval,
   format %{ "CMPXCHG $res, $mem, $oldval, $newval; as bool; ptr" %}
   ins_encode %{
     ShenandoahBarrierSet::assembler()->cmpxchg_oop(
-        &_masm,
+        masm,
         $mem$$Register, $oldval$$Register, $newval$$Register,
         $tmp1$$Register, $tmp2$$Register,
         false, $res$$Register
@@ -86,7 +86,7 @@ instruct compareAndSwapP_acq_shenandoah(iRegIdst res, indirect mem, iRegPsrc old
   format %{ "CMPXCHGD acq $res, $mem, $oldval, $newval; as bool; ptr" %}
   ins_encode %{
     ShenandoahBarrierSet::assembler()->cmpxchg_oop(
-        &_masm,
+        masm,
         $mem$$Register, $oldval$$Register, $newval$$Register,
         $tmp1$$Register, $tmp2$$Register,
         false, $res$$Register
@@ -112,7 +112,7 @@ instruct compareAndSwapN_acq_shenandoah(iRegIdst res, indirect mem, iRegNsrc old
   format %{ "CMPXCHGD acq $res, $mem, $oldval, $newval; as bool; ptr" %}
   ins_encode %{
     ShenandoahBarrierSet::assembler()->cmpxchg_oop(
-        &_masm,
+        masm,
         $mem$$Register, $oldval$$Register, $newval$$Register,
         $tmp1$$Register, $tmp2$$Register,
         false, $res$$Register
@@ -137,7 +137,7 @@ instruct compareAndExchangeP_shenandoah(iRegPdst res, indirect mem, iRegPsrc old
   format %{ "CMPXCHGD $res, $mem, $oldval, $newval; as ptr; ptr" %}
   ins_encode %{
     ShenandoahBarrierSet::assembler()->cmpxchg_oop(
-        &_masm,
+        masm,
         $mem$$Register, $oldval$$Register, $newval$$Register,
         $tmp1$$Register, $tmp2$$Register,
         true, $res$$Register
@@ -157,7 +157,7 @@ instruct compareAndExchangeN_shenandoah(iRegNdst res, indirect mem, iRegNsrc old
   format %{ "CMPXCHGD $res, $mem, $oldval, $newval; as ptr; ptr" %}
   ins_encode %{
     ShenandoahBarrierSet::assembler()->cmpxchg_oop(
-        &_masm,
+        masm,
         $mem$$Register, $oldval$$Register, $newval$$Register,
         $tmp1$$Register, $tmp2$$Register,
         true, $res$$Register
@@ -177,7 +177,7 @@ instruct compareAndExchangePAcq_shenandoah(iRegPdst res, indirect mem, iRegPsrc
   format %{ "CMPXCHGD acq $res, $mem, $oldval, $newval; as ptr; ptr" %}
   ins_encode %{
     ShenandoahBarrierSet::assembler()->cmpxchg_oop(
-        &_masm,
+        masm,
         $mem$$Register, $oldval$$Register, $newval$$Register,
         $tmp1$$Register, $tmp2$$Register,
         true, $res$$Register
@@ -202,7 +202,7 @@ instruct compareAndExchangeNAcq_shenandoah(iRegNdst res, indirect mem, iRegNsrc
   format %{ "CMPXCHGD acq $res, $mem, $oldval, $newval; as ptr; ptr" %}
   ins_encode %{
     ShenandoahBarrierSet::assembler()->cmpxchg_oop(
-        &_masm,
+        masm,
         $mem$$Register, $oldval$$Register, $newval$$Register,
         $tmp1$$Register, $tmp2$$Register,
         true, $res$$Register
diff --git a/src/hotspot/cpu/ppc/gc/x/x_ppc.ad b/src/hotspot/cpu/ppc/gc/x/x_ppc.ad
index 644fb3def3838..b206b6593fb45 100644
--- a/src/hotspot/cpu/ppc/gc/x/x_ppc.ad
+++ b/src/hotspot/cpu/ppc/gc/x/x_ppc.ad
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
 // Copyright (c) 2021 SAP SE. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
@@ -32,7 +32,7 @@ source_hpp %{
 
 source %{
 
-static void x_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref,
+static void x_load_barrier(MacroAssembler* masm, const MachNode* node, Address ref_addr, Register ref,
                            Register tmp, uint8_t barrier_data) {
   if (barrier_data == XLoadBarrierElided) {
     return;
@@ -45,14 +45,14 @@ static void x_load_barrier(MacroAssembler& _masm, const MachNode* node, Address
   __ bind(*stub->continuation());
 }
 
-static void x_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref,
+static void x_load_barrier_slow_path(MacroAssembler* masm, const MachNode* node, Address ref_addr, Register ref,
                                      Register tmp) {
   XLoadBarrierStubC2* const stub = XLoadBarrierStubC2::create(node, ref_addr, ref, tmp, XLoadBarrierStrong);
   __ b(*stub->entry());
   __ bind(*stub->continuation());
 }
 
-static void x_compare_and_swap(MacroAssembler& _masm, const MachNode* node,
+static void x_compare_and_swap(MacroAssembler* masm, const MachNode* node,
                               Register res, Register mem, Register oldval, Register newval,
                               Register tmp_xchg, Register tmp_mask,
                               bool weak, bool acquire) {
@@ -70,7 +70,7 @@ static void x_compare_and_swap(MacroAssembler& _masm, const MachNode* node,
     __ beq(CCR0, skip_barrier);
 
     // CAS must have failed because pointer in memory is bad.
-    x_load_barrier_slow_path(_masm, node, Address(mem), tmp_xchg, res /* used as tmp */);
+    x_load_barrier_slow_path(masm, node, Address(mem), tmp_xchg, res /* used as tmp */);
 
     __ cmpxchgd(CCR0, tmp_xchg, oldval, newval, mem,
                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), res, nullptr, true, weak);
@@ -89,7 +89,7 @@ static void x_compare_and_swap(MacroAssembler& _masm, const MachNode* node,
   }
 }
 
-static void x_compare_and_exchange(MacroAssembler& _masm, const MachNode* node,
+static void x_compare_and_exchange(MacroAssembler* masm, const MachNode* node,
                                    Register res, Register mem, Register oldval, Register newval, Register tmp,
                                    bool weak, bool acquire) {
   // z-specific load barrier requires strong CAS operations.
@@ -104,7 +104,7 @@ static void x_compare_and_exchange(MacroAssembler& _masm, const MachNode* node,
     __ and_(tmp, tmp, res);
     __ beq(CCR0, skip_barrier);
 
-    x_load_barrier_slow_path(_masm, node, Address(mem), res, tmp);
+    x_load_barrier_slow_path(masm, node, Address(mem), res, tmp);
 
     __ cmpxchgd(CCR0, res, oldval, newval, mem,
                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), noreg, nullptr, true, weak);
@@ -138,7 +138,7 @@ instruct xLoadP(iRegPdst dst, memoryAlg4 mem, iRegPdst tmp, flagsRegCR0 cr0)
   ins_encode %{
     assert($mem$$index == 0, "sanity");
     __ ld($dst$$Register, $mem$$disp, $mem$$base$$Register);
-    x_load_barrier(_masm, this, Address($mem$$base$$Register, $mem$$disp), $dst$$Register, $tmp$$Register, barrier_data());
+    x_load_barrier(masm, this, Address($mem$$base$$Register, $mem$$disp), $dst$$Register, $tmp$$Register, barrier_data());
   %}
   ins_pipe(pipe_class_default);
 %}
@@ -156,7 +156,7 @@ instruct xLoadP_acq(iRegPdst dst, memoryAlg4 mem, iRegPdst tmp, flagsRegCR0 cr0)
   format %{ "LD acq $dst, $mem" %}
   ins_encode %{
     __ ld($dst$$Register, $mem$$disp, $mem$$base$$Register);
-    x_load_barrier(_masm, this, Address($mem$$base$$Register, $mem$$disp), $dst$$Register, $tmp$$Register, barrier_data());
+    x_load_barrier(masm, this, Address($mem$$base$$Register, $mem$$disp), $dst$$Register, $tmp$$Register, barrier_data());
 
     // Uses the isync instruction as an acquire barrier.
     // This exploits the compare and the branch in the z load barrier (load, compare and branch, isync).
@@ -175,7 +175,7 @@ instruct xCompareAndSwapP(iRegIdst res, iRegPdst mem, iRegPsrc oldval, iRegPsrc
 
   format %{ "CMPXCHG $res, $mem, $oldval, $newval; as bool; ptr" %}
   ins_encode %{
-    x_compare_and_swap(_masm, this,
+    x_compare_and_swap(masm, this,
                                 $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register,
                                 $tmp_xchg$$Register, $tmp_mask$$Register,
                                 false /* weak */, false /* acquire */);
@@ -193,7 +193,7 @@ instruct xCompareAndSwapP_acq(iRegIdst res, iRegPdst mem, iRegPsrc oldval, iRegP
 
   format %{ "CMPXCHG acq $res, $mem, $oldval, $newval; as bool; ptr" %}
   ins_encode %{
-    x_compare_and_swap(_masm, this,
+    x_compare_and_swap(masm, this,
                                 $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register,
                                 $tmp_xchg$$Register, $tmp_mask$$Register,
                                 false /* weak */, true /* acquire */);
@@ -211,7 +211,7 @@ instruct xCompareAndSwapPWeak(iRegIdst res, iRegPdst mem, iRegPsrc oldval, iRegP
 
   format %{ "weak CMPXCHG $res, $mem, $oldval, $newval; as bool; ptr" %}
   ins_encode %{
-    x_compare_and_swap(_masm, this,
+    x_compare_and_swap(masm, this,
                                 $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register,
                                 $tmp_xchg$$Register, $tmp_mask$$Register,
                                 true /* weak */, false /* acquire */);
@@ -229,7 +229,7 @@ instruct xCompareAndSwapPWeak_acq(iRegIdst res, iRegPdst mem, iRegPsrc oldval, i
 
   format %{ "weak CMPXCHG acq $res, $mem, $oldval, $newval; as bool; ptr" %}
   ins_encode %{
-    x_compare_and_swap(_masm, this,
+    x_compare_and_swap(masm, this,
                                 $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register,
                                 $tmp_xchg$$Register, $tmp_mask$$Register,
                                 true /* weak */, true /* acquire */);
@@ -250,7 +250,7 @@ instruct xCompareAndExchangeP(iRegPdst res, iRegPdst mem, iRegPsrc oldval, iRegP
 
   format %{ "CMPXCHG $res, $mem, $oldval, $newval; as ptr; ptr" %}
   ins_encode %{
-    x_compare_and_exchange(_masm, this,
+    x_compare_and_exchange(masm, this,
                                     $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register, $tmp$$Register,
                                     false /* weak */, false /* acquire */);
   %}
@@ -270,7 +270,7 @@ instruct xCompareAndExchangeP_acq(iRegPdst res, iRegPdst mem, iRegPsrc oldval, i
 
   format %{ "CMPXCHG acq $res, $mem, $oldval, $newval; as ptr; ptr" %}
   ins_encode %{
-    x_compare_and_exchange(_masm, this,
+    x_compare_and_exchange(masm, this,
                                     $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register, $tmp$$Register,
                                     false /* weak */, true /* acquire */);
   %}
@@ -286,7 +286,7 @@ instruct xGetAndSetP(iRegPdst res, iRegPdst mem, iRegPsrc newval, iRegPdst tmp,
   format %{ "GetAndSetP $res, $mem, $newval" %}
   ins_encode %{
     __ getandsetd($res$$Register, $newval$$Register, $mem$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
-    x_load_barrier(_masm, this, Address(noreg, (intptr_t) 0), $res$$Register, $tmp$$Register, barrier_data());
+    x_load_barrier(masm, this, Address(noreg, (intptr_t) 0), $res$$Register, $tmp$$Register, barrier_data());
 
     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
       __ isync();
diff --git a/src/hotspot/cpu/ppc/gc/z/z_ppc.ad b/src/hotspot/cpu/ppc/gc/z/z_ppc.ad
index 777e5a785a79d..017574d40ff8b 100644
--- a/src/hotspot/cpu/ppc/gc/z/z_ppc.ad
+++ b/src/hotspot/cpu/ppc/gc/z/z_ppc.ad
@@ -34,7 +34,7 @@ source %{
 
 #include "gc/z/zBarrierSetAssembler.hpp"
 
-static void z_color(MacroAssembler& _masm, Register dst, Register src) {
+static void z_color(MacroAssembler* masm, Register dst, Register src) {
   assert_different_registers(dst, src);
   __ relocate(barrier_Relocation::spec(), ZBarrierRelocationFormatStoreGoodBits);
   __ li(dst, barrier_Relocation::unpatched); // Load color bits.
@@ -47,55 +47,55 @@ static void z_color(MacroAssembler& _masm, Register dst, Register src) {
   }
 }
 
-static void z_uncolor(MacroAssembler& _masm, Register ref) {
+static void z_uncolor(MacroAssembler* masm, Register ref) {
   __ srdi(ref, ref, ZPointerLoadShift);
 }
 
-static void check_color(MacroAssembler& _masm, Register ref, bool on_non_strong) {
+static void check_color(MacroAssembler* masm, Register ref, bool on_non_strong) {
   int relocFormat = on_non_strong ? ZBarrierRelocationFormatMarkBadMask
                                   : ZBarrierRelocationFormatLoadBadMask;
   __ relocate(barrier_Relocation::spec(), relocFormat);
   __ andi_(R0, ref, barrier_Relocation::unpatched);
 }
 
-static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref) {
-  Assembler::InlineSkippedInstructionsCounter skipped_counter(&_masm);
+static void z_load_barrier(MacroAssembler* masm, const MachNode* node, Address ref_addr, Register ref) {
+  Assembler::InlineSkippedInstructionsCounter skipped_counter(masm);
   if (node->barrier_data() == ZBarrierElided) {
-    z_uncolor(_masm, ref);
+    z_uncolor(masm, ref);
   } else {
     const bool on_non_strong =
       ((node->barrier_data() & ZBarrierWeak) != 0) ||
       ((node->barrier_data() & ZBarrierPhantom) != 0);
 
-    check_color(_masm, ref, on_non_strong);
+    check_color(masm, ref, on_non_strong);
 
     ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref);
     __ bne_far(CCR0, *stub->entry(), MacroAssembler::bc_far_optimize_on_relocate);
 
-    z_uncolor(_masm, ref);
+    z_uncolor(masm, ref);
     __ bind(*stub->continuation());
   }
 }
 
-static void z_store_barrier(MacroAssembler& _masm, const MachNode* node, Register ref_base, intptr_t disp, Register rnew_zaddress, Register rnew_zpointer, bool is_atomic) {
-  Assembler::InlineSkippedInstructionsCounter skipped_counter(&_masm);
+static void z_store_barrier(MacroAssembler* masm, const MachNode* node, Register ref_base, intptr_t disp, Register rnew_zaddress, Register rnew_zpointer, bool is_atomic) {
+  Assembler::InlineSkippedInstructionsCounter skipped_counter(masm);
   if (node->barrier_data() == ZBarrierElided) {
-    z_color(_masm, rnew_zpointer, rnew_zaddress);
+    z_color(masm, rnew_zpointer, rnew_zaddress);
   } else {
     bool is_native = (node->barrier_data() & ZBarrierNative) != 0;
     ZStoreBarrierStubC2* const stub = ZStoreBarrierStubC2::create(node, Address(ref_base, disp), rnew_zaddress, rnew_zpointer, is_native, is_atomic);
     ZBarrierSetAssembler* bs_asm = ZBarrierSet::assembler();
-    bs_asm->store_barrier_fast(&_masm, ref_base, disp, rnew_zaddress, rnew_zpointer, true /* in_nmethod */, is_atomic, *stub->entry(), *stub->continuation());
+    bs_asm->store_barrier_fast(masm, ref_base, disp, rnew_zaddress, rnew_zpointer, true /* in_nmethod */, is_atomic, *stub->entry(), *stub->continuation());
   }
 }
 
-static void z_compare_and_swap(MacroAssembler& _masm, const MachNode* node,
+static void z_compare_and_swap(MacroAssembler* masm, const MachNode* node,
                               Register res, Register mem, Register oldval, Register newval,
                               Register tmp1, Register tmp2, bool acquire) {
 
   Register rold_zpointer = tmp1, rnew_zpointer = tmp2;
-  z_store_barrier(_masm, node, mem, 0, newval, rnew_zpointer, true /* is_atomic */);
-  z_color(_masm, rold_zpointer, oldval);
+  z_store_barrier(masm, node, mem, 0, newval, rnew_zpointer, true /* is_atomic */);
+  z_color(masm, rold_zpointer, oldval);
   __ cmpxchgd(CCR0, R0, rold_zpointer, rnew_zpointer, mem,
               MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), res, nullptr, true,
               false /* we could support weak, but benefit is questionable */);
@@ -111,17 +111,17 @@ static void z_compare_and_swap(MacroAssembler& _masm, const MachNode* node,
   }
 }
 
-static void z_compare_and_exchange(MacroAssembler& _masm, const MachNode* node,
+static void z_compare_and_exchange(MacroAssembler* masm, const MachNode* node,
                                    Register res, Register mem, Register oldval, Register newval,
                                    Register tmp, bool acquire) {
 
   Register rold_zpointer = R0, rnew_zpointer = tmp;
-  z_store_barrier(_masm, node, mem, 0, newval, rnew_zpointer, true /* is_atomic */);
-  z_color(_masm, rold_zpointer, oldval);
+  z_store_barrier(masm, node, mem, 0, newval, rnew_zpointer, true /* is_atomic */);
+  z_color(masm, rold_zpointer, oldval);
   __ cmpxchgd(CCR0, res, rold_zpointer, rnew_zpointer, mem,
               MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), noreg, nullptr, true,
               false /* we could support weak, but benefit is questionable */);
-  z_uncolor(_masm, res);
+  z_uncolor(masm, res);
 
   if (acquire) {
     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
@@ -149,7 +149,7 @@ instruct zLoadP(iRegPdst dst, memoryAlg4 mem, flagsRegCR0 cr0)
   ins_encode %{
     assert($mem$$index == 0, "sanity");
     __ ld($dst$$Register, $mem$$disp, $mem$$base$$Register);
-    z_load_barrier(_masm, this, Address($mem$$base$$Register, $mem$$disp), $dst$$Register);
+    z_load_barrier(masm, this, Address($mem$$base$$Register, $mem$$disp), $dst$$Register);
   %}
   ins_pipe(pipe_class_default);
 %}
@@ -167,7 +167,7 @@ instruct zLoadP_acq(iRegPdst dst, memoryAlg4 mem, flagsRegCR0 cr0)
   format %{ "LD acq $dst, $mem" %}
   ins_encode %{
     __ ld($dst$$Register, $mem$$disp, $mem$$base$$Register);
-    z_load_barrier(_masm, this, Address($mem$$base$$Register, $mem$$disp), $dst$$Register);
+    z_load_barrier(masm, this, Address($mem$$base$$Register, $mem$$disp), $dst$$Register);
 
     // Uses the isync instruction as an acquire barrier.
     // This exploits the compare and the branch in the z load barrier (load, compare and branch, isync).
@@ -186,7 +186,7 @@ instruct zStoreP(memoryAlg4 mem, iRegPsrc src, iRegPdst tmp, flagsRegCR0 cr0)
   ins_cost(2 * MEMORY_REF_COST);
   format %{ "std    $mem, $src\t# ptr" %}
   ins_encode %{
-    z_store_barrier(_masm, this, $mem$$base$$Register, $mem$$disp, $src$$Register, $tmp$$Register, false /* is_atomic */);
+    z_store_barrier(masm, this, $mem$$base$$Register, $mem$$disp, $src$$Register, $tmp$$Register, false /* is_atomic */);
     __ std($tmp$$Register, $mem$$disp, $mem$$base$$Register);
   %}
   ins_pipe(pipe_class_default);
@@ -200,7 +200,7 @@ instruct zStorePNull(memoryAlg4 mem, immP_0 zero, iRegPdst tmp, flagsRegCR0 cr0)
   ins_cost(MEMORY_REF_COST);
   format %{ "std    $mem, null\t# ptr" %}
   ins_encode %{
-    z_store_barrier(_masm, this, $mem$$base$$Register, $mem$$disp, noreg, $tmp$$Register, false /* is_atomic */);
+    z_store_barrier(masm, this, $mem$$base$$Register, $mem$$disp, noreg, $tmp$$Register, false /* is_atomic */);
     __ std($tmp$$Register, $mem$$disp, $mem$$base$$Register);
   %}
   ins_pipe(pipe_class_default);
@@ -217,7 +217,7 @@ instruct zCompareAndSwapP(iRegIdst res, iRegPdst mem, iRegPsrc oldval, iRegPsrc
 
   format %{ "CMPXCHG $res, $mem, $oldval, $newval; as bool; ptr" %}
   ins_encode %{
-    z_compare_and_swap(_masm, this,
+    z_compare_and_swap(masm, this,
                        $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register,
                        $tmp1$$Register, $tmp2$$Register,
                        false /* acquire */);
@@ -236,7 +236,7 @@ instruct zCompareAndSwapP_acq(iRegIdst res, iRegPdst mem, iRegPsrc oldval, iRegP
 
   format %{ "CMPXCHG acq $res, $mem, $oldval, $newval; as bool; ptr" %}
   ins_encode %{
-    z_compare_and_swap(_masm, this,
+    z_compare_and_swap(masm, this,
                        $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register,
                        $tmp1$$Register, $tmp2$$Register,
                        true /* acquire */);
@@ -257,7 +257,7 @@ instruct zCompareAndExchangeP(iRegPdst res, iRegPdst mem, iRegPsrc oldval, iRegP
 
   format %{ "CMPXCHG $res, $mem, $oldval, $newval; as ptr; ptr" %}
   ins_encode %{
-    z_compare_and_exchange(_masm, this,
+    z_compare_and_exchange(masm, this,
                            $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register, $tmp$$Register,
                            false /* acquire */);
   %}
@@ -277,7 +277,7 @@ instruct zCompareAndExchangeP_acq(iRegPdst res, iRegPdst mem, iRegPsrc oldval, i
 
   format %{ "CMPXCHG acq $res, $mem, $oldval, $newval; as ptr; ptr" %}
   ins_encode %{
-    z_compare_and_exchange(_masm, this,
+    z_compare_and_exchange(masm, this,
                            $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register, $tmp$$Register,
                            true /* acquire */);
   %}
@@ -293,9 +293,9 @@ instruct zGetAndSetP(iRegPdst res, iRegPdst mem, iRegPsrc newval, iRegPdst tmp,
   format %{ "GetAndSetP $res, $mem, $newval" %}
   ins_encode %{
     Register rnew_zpointer = $tmp$$Register, result = $res$$Register;
-    z_store_barrier(_masm, this, $mem$$Register, 0, $newval$$Register, rnew_zpointer, true /* is_atomic */);
+    z_store_barrier(masm, this, $mem$$Register, 0, $newval$$Register, rnew_zpointer, true /* is_atomic */);
     __ getandsetd(result, rnew_zpointer, $mem$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
-    z_uncolor(_masm, result);
+    z_uncolor(masm, result);
 
     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
       __ isync();
diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad
index 773e60e3f4f91..cbe28deb51676 100644
--- a/src/hotspot/cpu/ppc/ppc.ad
+++ b/src/hotspot/cpu/ppc/ppc.ad
@@ -1079,7 +1079,7 @@ bool followed_by_acquire(const Node *load) {
   return false;
 }
 
-#define __ _masm.
+#define __ masm->
 
 // Tertiary op of a LoadP or StoreP encoding.
 #define REGP_OP true
@@ -1189,8 +1189,7 @@ int cmprb_Whitespace_reg_reg_prefixedNode::compute_padding(int current_offset) c
 //=============================================================================
 
 // Emit an interrupt that is caught by the debugger (for debugging compiler).
-void emit_break(CodeBuffer &cbuf) {
-  C2_MacroAssembler _masm(&cbuf);
+void emit_break(C2_MacroAssembler *masm) {
   __ illtrap();
 }
 
@@ -1200,8 +1199,8 @@ void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 }
 #endif
 
-void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  emit_break(cbuf);
+void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
+  emit_break(masm);
 }
 
 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
@@ -1210,14 +1209,13 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
 
 //=============================================================================
 
-void emit_nop(CodeBuffer &cbuf) {
-  C2_MacroAssembler _masm(&cbuf);
+void emit_nop(C2_MacroAssembler *masm) {
   __ nop();
 }
 
-static inline void emit_long(CodeBuffer &cbuf, int value) {
-  *((int*)(cbuf.insts_end())) = value;
-  cbuf.set_insts_end(cbuf.insts_end() + BytesPerInstWord);
+static inline void emit_long(C2_MacroAssembler *masm, int value) {
+  *((int*)(__ pc())) = value;
+  __ set_inst_end(__ pc() + BytesPerInstWord);
 }
 
 //=============================================================================
@@ -1237,7 +1235,7 @@ class CallStubImpl {
  public:
 
   // Emit call stub, compiled java to interpreter.
-  static void emit_trampoline_stub(C2_MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset);
+  static void emit_trampoline_stub(C2_MacroAssembler *masm, int destination_toc_offset, int insts_call_instruction_offset);
 
   // Size of call trampoline stub.
   // This doesn't need to be accurate to the byte, but it
@@ -1268,7 +1266,7 @@ source %{
 //   load the call target from the constant pool
 //   branch via CTR (LR/link still points to the call-site above)
 
-void CallStubImpl::emit_trampoline_stub(C2_MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset) {
+void CallStubImpl::emit_trampoline_stub(C2_MacroAssembler *masm, int destination_toc_offset, int insts_call_instruction_offset) {
   address stub = __ emit_trampoline_stub(destination_toc_offset, insts_call_instruction_offset);
   if (stub == nullptr) {
     ciEnv::current()->record_out_of_memory_failure();
@@ -1299,7 +1297,7 @@ typedef struct {
 // - Add a relocation at the branch-and-link instruction.
 // - Emit a branch-and-link.
 // - Remember the return pc offset.
-EmitCallOffsets emit_call_with_trampoline_stub(C2_MacroAssembler &_masm, address entry_point, relocInfo::relocType rtype) {
+EmitCallOffsets emit_call_with_trampoline_stub(C2_MacroAssembler *masm, address entry_point, relocInfo::relocType rtype) {
   EmitCallOffsets offsets = { -1, -1 };
   const int start_offset = __ offset();
   offsets.insts_call_instruction_offset = __ offset();
@@ -1316,7 +1314,7 @@ EmitCallOffsets emit_call_with_trampoline_stub(C2_MacroAssembler &_masm, address
   const int     entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
 
   // Emit the trampoline stub which will be related to the branch-and-link below.
-  CallStubImpl::emit_trampoline_stub(_masm, entry_point_toc_offset, offsets.insts_call_instruction_offset);
+  CallStubImpl::emit_trampoline_stub(masm, entry_point_toc_offset, offsets.insts_call_instruction_offset);
   if (ciEnv::current()->failing()) { return offsets; } // Code cache may be full.
   __ relocate(rtype);
 
@@ -1366,7 +1364,7 @@ void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, Phase
   nodes->push(m2);
 }
 
-void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
+void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
   // Is postalloc expanded.
   ShouldNotReachHere();
 }
@@ -1404,9 +1402,8 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 }
 #endif
 
-void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
   Compile* C = ra_->C;
-  C2_MacroAssembler _masm(&cbuf);
 
   const long framesize = C->output()->frame_size_in_bytes();
   assert(framesize % (2 * wordSize) == 0, "must preserve 2*wordSize alignment");
@@ -1556,10 +1553,10 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 
   if (C->stub_function() == nullptr) {
     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
-    bs->nmethod_entry_barrier(&_masm, push_frame_temp);
+    bs->nmethod_entry_barrier(masm, push_frame_temp);
   }
 
-  C->output()->set_frame_complete(cbuf.insts_size());
+  C->output()->set_frame_complete(__ offset());
 }
 
 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
@@ -1588,9 +1585,8 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 }
 #endif
 
-void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
   Compile* C = ra_->C;
-  C2_MacroAssembler _masm(&cbuf);
 
   const long framesize = ((long)C->output()->frame_slots()) << LogBytesPerInt;
   assert(framesize >= 0, "negative frame-size?");
@@ -1668,7 +1664,7 @@ static enum RC rc_class(OptoReg::Name reg) {
   return rc_stack;
 }
 
-static int ld_st_helper(CodeBuffer *cbuf, const char *op_str, uint opcode, int reg, int offset,
+static int ld_st_helper(C2_MacroAssembler *masm, const char *op_str, uint opcode, int reg, int offset,
                         bool do_print, Compile* C, outputStream *st) {
 
   assert(opcode == Assembler::LD_OPCODE   ||
@@ -1681,12 +1677,12 @@ static int ld_st_helper(CodeBuffer *cbuf, const char *op_str, uint opcode, int r
          opcode == Assembler::STFS_OPCODE,
          "opcode not supported");
 
-  if (cbuf) {
+  if (masm) {
     int d =
       (Assembler::LD_OPCODE == opcode || Assembler::STD_OPCODE == opcode) ?
         Assembler::ds(offset+0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/)
       : Assembler::d1(offset+0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/); // Makes no difference in opt build.
-    emit_long(*cbuf, opcode | Assembler::rt(Matcher::_regEncode[reg]) | d | Assembler::ra(R1_SP));
+    emit_long(masm, opcode | Assembler::rt(Matcher::_regEncode[reg]) | d | Assembler::ra(R1_SP));
   }
 #ifndef PRODUCT
   else if (do_print) {
@@ -1699,7 +1695,7 @@ static int ld_st_helper(CodeBuffer *cbuf, const char *op_str, uint opcode, int r
   return 4; // size
 }
 
-uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
+uint MachSpillCopyNode::implementation(C2_MacroAssembler *masm, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
   Compile* C = ra_->C;
 
   // Get registers to move.
@@ -1729,8 +1725,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
     if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
       int src_offset = ra_->reg2offset(src_lo);
       int dst_offset = ra_->reg2offset(dst_lo);
-      if (cbuf) {
-        C2_MacroAssembler _masm(cbuf);
+      if (masm) {
         __ ld(R0, src_offset, R1_SP);
         __ std(R0, dst_offset, R1_SP);
         __ ld(R0, src_offset+8, R1_SP);
@@ -1742,8 +1737,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
     else if (src_lo_rc == rc_vs && dst_lo_rc == rc_stack) {
       VectorSRegister Rsrc = as_VectorSRegister(Matcher::_regEncode[src_lo]);
       int dst_offset = ra_->reg2offset(dst_lo);
-      if (cbuf) {
-        C2_MacroAssembler _masm(cbuf);
+      if (masm) {
         __ addi(R0, R1_SP, dst_offset);
         __ stxvd2x(Rsrc, R0);
       }
@@ -1753,8 +1747,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
     else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vs) {
       VectorSRegister Rdst = as_VectorSRegister(Matcher::_regEncode[dst_lo]);
       int src_offset = ra_->reg2offset(src_lo);
-      if (cbuf) {
-        C2_MacroAssembler _masm(cbuf);
+      if (masm) {
         __ addi(R0, R1_SP, src_offset);
         __ lxvd2x(Rdst, R0);
       }
@@ -1764,8 +1757,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
     else if (src_lo_rc == rc_vs && dst_lo_rc == rc_vs) {
       VectorSRegister Rsrc = as_VectorSRegister(Matcher::_regEncode[src_lo]);
       VectorSRegister Rdst = as_VectorSRegister(Matcher::_regEncode[dst_lo]);
-      if (cbuf) {
-        C2_MacroAssembler _masm(cbuf);
+      if (masm) {
         __ xxlor(Rdst, Rsrc, Rsrc);
       }
       size += 4;
@@ -1784,13 +1776,13 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
     if (src_hi != OptoReg::Bad) {
       assert(src_hi_rc==rc_stack && dst_hi_rc==rc_stack,
              "expected same type of move for high parts");
-      size += ld_st_helper(cbuf, "LD  ", Assembler::LD_OPCODE,  R0_num, src_offset, !do_size, C, st);
-      if (!cbuf && !do_size) st->print("\n\t");
-      size += ld_st_helper(cbuf, "STD ", Assembler::STD_OPCODE, R0_num, dst_offset, !do_size, C, st);
+      size += ld_st_helper(masm, "LD  ", Assembler::LD_OPCODE,  R0_num, src_offset, !do_size, C, st);
+      if (!masm && !do_size) st->print("\n\t");
+      size += ld_st_helper(masm, "STD ", Assembler::STD_OPCODE, R0_num, dst_offset, !do_size, C, st);
     } else {
-      size += ld_st_helper(cbuf, "LWZ ", Assembler::LWZ_OPCODE, R0_num, src_offset, !do_size, C, st);
-      if (!cbuf && !do_size) st->print("\n\t");
-      size += ld_st_helper(cbuf, "STW ", Assembler::STW_OPCODE, R0_num, dst_offset, !do_size, C, st);
+      size += ld_st_helper(masm, "LWZ ", Assembler::LWZ_OPCODE, R0_num, src_offset, !do_size, C, st);
+      if (!masm && !do_size) st->print("\n\t");
+      size += ld_st_helper(masm, "STW ", Assembler::STW_OPCODE, R0_num, dst_offset, !do_size, C, st);
     }
     return size;
   }
@@ -1808,8 +1800,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
       Register Rdst = as_Register(Matcher::_regEncode[dst_lo]);
       size = (Rsrc != Rdst) ? 4 : 0;
 
-      if (cbuf) {
-        C2_MacroAssembler _masm(cbuf);
+      if (masm) {
         if (size) {
           __ mr(Rdst, Rsrc);
         }
@@ -1832,9 +1823,9 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
     if (src_hi != OptoReg::Bad) {
       assert(src_hi_rc==rc_int && dst_hi_rc==rc_stack,
              "expected same type of move for high parts");
-      size += ld_st_helper(cbuf, "STD ", Assembler::STD_OPCODE, src_lo, dst_offset, !do_size, C, st);
+      size += ld_st_helper(masm, "STD ", Assembler::STD_OPCODE, src_lo, dst_offset, !do_size, C, st);
     } else {
-      size += ld_st_helper(cbuf, "STW ", Assembler::STW_OPCODE, src_lo, dst_offset, !do_size, C, st);
+      size += ld_st_helper(masm, "STW ", Assembler::STW_OPCODE, src_lo, dst_offset, !do_size, C, st);
     }
     return size;
   }
@@ -1845,17 +1836,16 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
     if (src_hi != OptoReg::Bad) {
       assert(dst_hi_rc==rc_int && src_hi_rc==rc_stack,
              "expected same type of move for high parts");
-      size += ld_st_helper(cbuf, "LD  ", Assembler::LD_OPCODE, dst_lo, src_offset, !do_size, C, st);
+      size += ld_st_helper(masm, "LD  ", Assembler::LD_OPCODE, dst_lo, src_offset, !do_size, C, st);
     } else {
-      size += ld_st_helper(cbuf, "LWZ ", Assembler::LWZ_OPCODE, dst_lo, src_offset, !do_size, C, st);
+      size += ld_st_helper(masm, "LWZ ", Assembler::LWZ_OPCODE, dst_lo, src_offset, !do_size, C, st);
     }
     return size;
   }
 
   // Check for float reg-reg copy.
   if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
-    if (cbuf) {
-      C2_MacroAssembler _masm(cbuf);
+    if (masm) {
       FloatRegister Rsrc = as_FloatRegister(Matcher::_regEncode[src_lo]);
       FloatRegister Rdst = as_FloatRegister(Matcher::_regEncode[dst_lo]);
       __ fmr(Rdst, Rsrc);
@@ -1874,9 +1864,9 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
     if (src_hi != OptoReg::Bad) {
       assert(src_hi_rc==rc_float && dst_hi_rc==rc_stack,
              "expected same type of move for high parts");
-      size += ld_st_helper(cbuf, "STFD", Assembler::STFD_OPCODE, src_lo, dst_offset, !do_size, C, st);
+      size += ld_st_helper(masm, "STFD", Assembler::STFD_OPCODE, src_lo, dst_offset, !do_size, C, st);
     } else {
-      size += ld_st_helper(cbuf, "STFS", Assembler::STFS_OPCODE, src_lo, dst_offset, !do_size, C, st);
+      size += ld_st_helper(masm, "STFS", Assembler::STFS_OPCODE, src_lo, dst_offset, !do_size, C, st);
     }
     return size;
   }
@@ -1887,9 +1877,9 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
     if (src_hi != OptoReg::Bad) {
       assert(dst_hi_rc==rc_float && src_hi_rc==rc_stack,
              "expected same type of move for high parts");
-      size += ld_st_helper(cbuf, "LFD ", Assembler::LFD_OPCODE, dst_lo, src_offset, !do_size, C, st);
+      size += ld_st_helper(masm, "LFD ", Assembler::LFD_OPCODE, dst_lo, src_offset, !do_size, C, st);
     } else {
-      size += ld_st_helper(cbuf, "LFS ", Assembler::LFS_OPCODE, dst_lo, src_offset, !do_size, C, st);
+      size += ld_st_helper(masm, "LFS ", Assembler::LFS_OPCODE, dst_lo, src_offset, !do_size, C, st);
     }
     return size;
   }
@@ -1914,8 +1904,8 @@ void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 }
 #endif
 
-void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  implementation(&cbuf, ra_, false, nullptr);
+void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
+  implementation(masm, ra_, false, nullptr);
 }
 
 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
@@ -1928,8 +1918,7 @@ void MachNopNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 }
 #endif
 
-void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *) const {
-  C2_MacroAssembler _masm(&cbuf);
+void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *) const {
   // _count contains the number of nops needed for padding.
   for (int i = 0; i < _count; i++) {
     __ nop();
@@ -1949,9 +1938,7 @@ void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 }
 #endif
 
-void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  C2_MacroAssembler _masm(&cbuf);
-
+void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
   int reg    = ra_->get_encode(this);
 
@@ -1974,10 +1961,8 @@ void MachUEPNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 }
 #endif
 
-void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+void MachUEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
   // This is the unverified entry point.
-  C2_MacroAssembler _masm(&cbuf);
-
   __ ic_check(CodeEntryAlignment);
   // Argument is valid and klass is as expected, continue.
 }
@@ -1997,8 +1982,8 @@ class HandlerImpl {
 
  public:
 
-  static int emit_exception_handler(CodeBuffer &cbuf);
-  static int emit_deopt_handler(CodeBuffer& cbuf);
+  static int emit_exception_handler(C2_MacroAssembler *masm);
+  static int emit_deopt_handler(C2_MacroAssembler* masm);
 
   static uint size_exception_handler() {
     // The exception_handler is a b64_patchable.
@@ -2023,9 +2008,7 @@ public:
 
 source %{
 
-int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) {
-  C2_MacroAssembler _masm(&cbuf);
-
+int HandlerImpl::emit_exception_handler(C2_MacroAssembler *masm) {
   address base = __ start_a_stub(size_exception_handler());
   if (base == nullptr) {
     ciEnv::current()->record_failure("CodeCache is full");
@@ -2043,9 +2026,7 @@ int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) {
 
 // The deopt_handler is like the exception handler, but it calls to
 // the deoptimization blob instead of jumping to the exception blob.
-int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
-  C2_MacroAssembler _masm(&cbuf);
-
+int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
   address base = __ start_a_stub(size_deopt_handler());
   if (base == nullptr) {
     ciEnv::current()->record_failure("CodeCache is full");
@@ -2421,27 +2402,23 @@ const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 // needs for encoding need to be specified.
 encode %{
   enc_class enc_unimplemented %{
-    C2_MacroAssembler _masm(&cbuf);
     __ unimplemented("Unimplemented mach node encoding in AD file.", 13);
   %}
 
   enc_class enc_untested %{
 #ifdef ASSERT
-    C2_MacroAssembler _masm(&cbuf);
     __ untested("Untested mach node encoding in AD file.");
 #else
 #endif
   %}
 
   enc_class enc_lbz(iRegIdst dst, memory mem) %{
-    C2_MacroAssembler _masm(&cbuf);
     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
     __ lbz($dst$$Register, Idisp, $mem$$base$$Register);
   %}
 
   // Load acquire.
   enc_class enc_lbz_ac(iRegIdst dst, memory mem) %{
-    C2_MacroAssembler _masm(&cbuf);
     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
     __ lbz($dst$$Register, Idisp, $mem$$base$$Register);
     __ twi_0($dst$$Register);
@@ -2449,16 +2426,12 @@ encode %{
   %}
 
   enc_class enc_lhz(iRegIdst dst, memory mem) %{
-
-    C2_MacroAssembler _masm(&cbuf);
     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
     __ lhz($dst$$Register, Idisp, $mem$$base$$Register);
   %}
 
   // Load acquire.
   enc_class enc_lhz_ac(iRegIdst dst, memory mem) %{
-
-    C2_MacroAssembler _masm(&cbuf);
     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
     __ lhz($dst$$Register, Idisp, $mem$$base$$Register);
     __ twi_0($dst$$Register);
@@ -2466,16 +2439,12 @@ encode %{
   %}
 
   enc_class enc_lwz(iRegIdst dst, memory mem) %{
-
-    C2_MacroAssembler _masm(&cbuf);
     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
     __ lwz($dst$$Register, Idisp, $mem$$base$$Register);
   %}
 
   // Load acquire.
   enc_class enc_lwz_ac(iRegIdst dst, memory mem) %{
-
-    C2_MacroAssembler _masm(&cbuf);
     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
     __ lwz($dst$$Register, Idisp, $mem$$base$$Register);
     __ twi_0($dst$$Register);
@@ -2483,7 +2452,6 @@ encode %{
   %}
 
   enc_class enc_ld(iRegLdst dst, memoryAlg4 mem) %{
-    C2_MacroAssembler _masm(&cbuf);
     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
     // Operand 'ds' requires 4-alignment.
     assert((Idisp & 0x3) == 0, "unaligned offset");
@@ -2492,7 +2460,6 @@ encode %{
 
   // Load acquire.
   enc_class enc_ld_ac(iRegLdst dst, memoryAlg4 mem) %{
-    C2_MacroAssembler _masm(&cbuf);
     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
     // Operand 'ds' requires 4-alignment.
     assert((Idisp & 0x3) == 0, "unaligned offset");
@@ -2502,14 +2469,11 @@ encode %{
   %}
 
   enc_class enc_lfd(RegF dst, memory mem) %{
-    C2_MacroAssembler _masm(&cbuf);
     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
     __ lfd($dst$$FloatRegister, Idisp, $mem$$base$$Register);
   %}
 
   enc_class enc_load_long_constL(iRegLdst dst, immL src, iRegLdst toc) %{
-
-    C2_MacroAssembler _masm(&cbuf);
     int toc_offset = 0;
 
     address const_toc_addr;
@@ -2531,9 +2495,6 @@ encode %{
   %}
 
   enc_class enc_load_long_constL_hi(iRegLdst dst, iRegLdst toc, immL src) %{
-
-    C2_MacroAssembler _masm(&cbuf);
-
     if (!ra_->C->output()->in_scratch_emit_size()) {
       address const_toc_addr;
       // Create a non-oop constant, no relocation needed.
@@ -2765,8 +2726,6 @@ encode %{
   %}
 
   enc_class enc_load_long_constP(iRegLdst dst, immP src, iRegLdst toc) %{
-
-    C2_MacroAssembler _masm(&cbuf);
     int toc_offset = 0;
 
     intptr_t val = $src$$constant;
@@ -2799,8 +2758,6 @@ encode %{
   %}
 
   enc_class enc_load_long_constP_hi(iRegLdst dst, immP src, iRegLdst toc) %{
-
-    C2_MacroAssembler _masm(&cbuf);
     if (!ra_->C->output()->in_scratch_emit_size()) {
       intptr_t val = $src$$constant;
       relocInfo::relocType constant_reloc = $src->constant_reloc();  // src
@@ -2935,13 +2892,11 @@ encode %{
   %}
 
   enc_class enc_stw(iRegIsrc src, memory mem) %{
-    C2_MacroAssembler _masm(&cbuf);
     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
     __ stw($src$$Register, Idisp, $mem$$base$$Register);
   %}
 
   enc_class enc_std(iRegIsrc src, memoryAlg4 mem) %{
-    C2_MacroAssembler _masm(&cbuf);
     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
     // Operand 'ds' requires 4-alignment.
     assert((Idisp & 0x3) == 0, "unaligned offset");
@@ -2949,13 +2904,11 @@ encode %{
   %}
 
   enc_class enc_stfs(RegF src, memory mem) %{
-    C2_MacroAssembler _masm(&cbuf);
     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
     __ stfs($src$$FloatRegister, Idisp, $mem$$base$$Register);
   %}
 
   enc_class enc_stfd(RegF src, memory mem) %{
-    C2_MacroAssembler _masm(&cbuf);
     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
     __ stfd($src$$FloatRegister, Idisp, $mem$$base$$Register);
   %}
@@ -3154,8 +3107,6 @@ encode %{
   %}
 
   enc_class enc_cmove_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src, cmpOp cmp) %{
-
-    C2_MacroAssembler _masm(&cbuf);
     int cc        = $cmp$$cmpcode;
     int flags_reg = $crx$$reg;
     Label done;
@@ -3167,8 +3118,6 @@ encode %{
   %}
 
   enc_class enc_cmove_imm(iRegIdst dst, flagsRegSrc crx, immI16 src, cmpOp cmp) %{
-
-    C2_MacroAssembler _masm(&cbuf);
     Label done;
     assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
     // Branch if not (cmp crx).
@@ -3180,14 +3129,10 @@ encode %{
   // This enc_class is needed so that scheduler gets proper
   // input mapping for latency computation.
   enc_class enc_andc(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
-    C2_MacroAssembler _masm(&cbuf);
     __ andc($dst$$Register, $src1$$Register, $src2$$Register);
   %}
 
   enc_class enc_convI2B_regI__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{
-
-    C2_MacroAssembler _masm(&cbuf);
-
     Label done;
     __ cmpwi($crx$$CondRegister, $src$$Register, 0);
     __ li($dst$$Register, $zero$$constant);
@@ -3197,9 +3142,6 @@ encode %{
   %}
 
   enc_class enc_convP2B_regP__cmove(iRegIdst dst, iRegPsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{
-
-    C2_MacroAssembler _masm(&cbuf);
-
     Label done;
     __ cmpdi($crx$$CondRegister, $src$$Register, 0);
     __ li($dst$$Register, $zero$$constant);
@@ -3209,8 +3151,6 @@ encode %{
   %}
 
   enc_class enc_cmove_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL mem ) %{
-
-    C2_MacroAssembler _masm(&cbuf);
     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
     Label done;
     __ bso($crx$$CondRegister, done);
@@ -3219,8 +3159,6 @@ encode %{
   %}
 
   enc_class enc_cmove_bso_reg(iRegLdst dst, flagsRegSrc crx, regD src) %{
-
-    C2_MacroAssembler _masm(&cbuf);
     Label done;
     __ bso($crx$$CondRegister, done);
     __ mffprd($dst$$Register, $src$$FloatRegister);
@@ -3228,8 +3166,6 @@ encode %{
   %}
 
   enc_class enc_bc(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
-
-    C2_MacroAssembler _masm(&cbuf);
     Label d;   // dummy
     __ bind(d);
     Label* p = ($lbl$$label);
@@ -3257,8 +3193,6 @@ encode %{
   enc_class enc_bc_far(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
     // The scheduler doesn't know about branch shortening, so we set the opcode
     // to ppc64Opcode_bc in order to hide this detail from the scheduler.
-
-    C2_MacroAssembler _masm(&cbuf);
     Label d;    // dummy
     __ bind(d);
     Label* p = ($lbl$$label);
@@ -3333,7 +3267,6 @@ encode %{
     // Fake operand dst needed for PPC scheduler.
     assert($dst$$constant == 0x0, "dst must be 0x0");
 
-    C2_MacroAssembler _masm(&cbuf);
     // Mark the code position where the load from the safepoint
     // polling page was emitted as relocInfo::poll_type.
     __ relocate(relocInfo::poll_type);
@@ -3387,13 +3320,11 @@ encode %{
   //
   // Usage of r1 and r2 in the stubs allows to distinguish them.
   enc_class enc_java_static_call(method meth) %{
-
-    C2_MacroAssembler _masm(&cbuf);
     address entry_point = (address)$meth$$method;
 
     if (!_method) {
       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
-      emit_call_with_trampoline_stub(_masm, entry_point, relocInfo::runtime_call_type);
+      emit_call_with_trampoline_stub(masm, entry_point, relocInfo::runtime_call_type);
       if (ciEnv::current()->failing()) { return; } // Code cache may be full.
     } else {
       // Remember the offset not the address.
@@ -3413,9 +3344,9 @@ encode %{
       const int entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
 
       // Emit the trampoline stub which will be related to the branch-and-link below.
-      CallStubImpl::emit_trampoline_stub(_masm, entry_point_toc_offset, start_offset);
+      CallStubImpl::emit_trampoline_stub(masm, entry_point_toc_offset, start_offset);
       if (ciEnv::current()->failing()) { return; } // Code cache may be full.
-      int method_index = resolved_method_index(cbuf);
+      int method_index = resolved_method_index(masm);
       __ relocate(_optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
                   : static_call_Relocation::spec(method_index));
 
@@ -3423,11 +3354,12 @@ encode %{
       // Note: At this point we do not have the address of the trampoline
       // stub, and the entry point might be too far away for bl, so __ pc()
       // serves as dummy and the bl will be patched later.
-      cbuf.set_insts_mark();
+      __ set_inst_mark();
       __ bl(__ pc());  // Emits a relocation.
 
       // The stub for call to interpreter.
-      address stub = CompiledDirectCall::emit_to_interp_stub(cbuf);
+      address stub = CompiledDirectCall::emit_to_interp_stub(masm);
+      __ clear_inst_mark();
       if (stub == nullptr) {
         ciEnv::current()->record_failure("CodeCache is full");
         return;
@@ -3438,9 +3370,6 @@ encode %{
 
   // Second node of expanded dynamic call - the call.
   enc_class enc_java_dynamic_call_sched(method meth) %{
-
-    C2_MacroAssembler _masm(&cbuf);
-
     if (!ra_->C->output()->in_scratch_emit_size()) {
       // Create a call trampoline stub for the given method.
       const address entry_point = !($meth$$method) ? 0 : (address)$meth$$method;
@@ -3450,7 +3379,7 @@ encode %{
         return;
       }
       const int entry_point_const_toc_offset = __ offset_to_method_toc(entry_point_const);
-      CallStubImpl::emit_trampoline_stub(_masm, entry_point_const_toc_offset, __ offset());
+      CallStubImpl::emit_trampoline_stub(masm, entry_point_const_toc_offset, __ offset());
       if (ra_->C->env()->failing()) { return; } // Code cache may be full.
 
       // Build relocation at call site with ic position as data.
@@ -3466,7 +3395,7 @@ encode %{
       const address virtual_call_oop_addr = __ addr_at(virtual_call_oop_addr_offset);
       assert(MacroAssembler::is_load_const_from_method_toc_at(virtual_call_oop_addr),
              "should be load from TOC");
-      int method_index = resolved_method_index(cbuf);
+      int method_index = resolved_method_index(masm);
       __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr, method_index));
     }
 
@@ -3541,7 +3470,6 @@ encode %{
   // Toc is only passed so that it can be used in ins_encode statement.
   // In the code we have to use $constanttablebase.
   enc_class enc_java_dynamic_call(method meth, iRegLdst toc) %{
-    C2_MacroAssembler _masm(&cbuf);
     int start_offset = __ offset();
 
     Register Rtoc = (ra_) ? $constanttablebase : R2_TOC;
@@ -3564,7 +3492,7 @@ encode %{
       // CALL to fixup routine.  Fixup routine uses ScopeDesc info
       // to determine who we intended to call.
       __ relocate(virtual_call_Relocation::spec(virtual_call_meta_addr));
-      emit_call_with_trampoline_stub(_masm, (address)$meth$$method, relocInfo::none);
+      emit_call_with_trampoline_stub(masm, (address)$meth$$method, relocInfo::none);
       if (ciEnv::current()->failing()) { return; } // Code cache may be full.
       assert(((MachCallDynamicJavaNode*)this)->ret_addr_offset() == __ offset() - start_offset,
              "Fix constant in ret_addr_offset(), expected %d", __ offset() - start_offset);
@@ -3595,8 +3523,6 @@ encode %{
 
   // a runtime call
   enc_class enc_java_to_runtime_call (method meth) %{
-
-    C2_MacroAssembler _masm(&cbuf);
     const address start_pc = __ pc();
 
 #if defined(ABI_ELFv2)
@@ -3630,7 +3556,6 @@ encode %{
   // This enc_class is needed so that scheduler gets proper
   // input mapping for latency computation.
   enc_class enc_leaf_call_mtctr(iRegLsrc src) %{
-    C2_MacroAssembler _masm(&cbuf);
     __ mtctr($src$$Register);
   %}
 
@@ -4085,7 +4010,7 @@ operand immN() %{
   interface(CONST_INTER);
 %}
 
-// Null Pointer Immediate
+// nullptr Pointer Immediate
 operand immN_0() %{
   predicate(n->get_narrowcon() == 0);
   match(ConN);
@@ -14581,8 +14506,9 @@ instruct RethrowException() %{
 
   format %{ "Jmp     rethrow_stub" %}
   ins_encode %{
-    cbuf.set_insts_mark();
+    __ set_inst_mark();
     __ b64_patchable((address)OptoRuntime::rethrow_stub(), relocInfo::runtime_call_type);
+    __ clear_inst_mark();
   %}
   ins_pipe(pipe_class_call);
 %}
diff --git a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
index 66b19794b057e..666b46817f9e3 100644
--- a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
+++ b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
@@ -1760,8 +1760,7 @@ static void gen_continuation_enter(MacroAssembler* masm,
     __ b(L_exit);
 
     // static stub for the call above
-    CodeBuffer* cbuf = masm->code_section()->outer();
-    stub = CompiledDirectCall::emit_to_interp_stub(*cbuf, c2i_call_pc);
+    stub = CompiledDirectCall::emit_to_interp_stub(masm, c2i_call_pc);
     guarantee(stub != nullptr, "no space for static stub");
   }
 
@@ -1853,8 +1852,7 @@ static void gen_continuation_enter(MacroAssembler* masm,
   __ blr();
 
   // static stub for the call above
-  CodeBuffer* cbuf = masm->code_section()->outer();
-  stub = CompiledDirectCall::emit_to_interp_stub(*cbuf, call_pc);
+  stub = CompiledDirectCall::emit_to_interp_stub(masm, call_pc);
   guarantee(stub != nullptr, "no space for static stub");
 }
 
diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
index fdb2bcb06ff97..60dceb3ada734 100644
--- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
+++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
@@ -35,23 +35,19 @@
 
 // ----------------------------------------------------------------------------
 
-#define __ _masm.
-address CompiledDirectCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
-  precond(cbuf.stubs()->start() != badAddress);
-  precond(cbuf.stubs()->end() != badAddress);
+#define __ masm->
+address CompiledDirectCall::emit_to_interp_stub(MacroAssembler *masm, address mark) {
+  precond(__ code()->stubs()->start() != badAddress);
+  precond(__ code()->stubs()->end() != badAddress);
   // Stub is fixed up when the corresponding call is converted from
   // calling compiled code to calling interpreted code.
   // mv xmethod, 0
   // jalr -4 # to self
 
   if (mark == nullptr) {
-    mark = cbuf.insts_mark();  // Get mark within main instrs section.
+    mark = __ inst_mark();  // Get mark within main instrs section.
   }
 
-  // Note that the code buffer's insts_mark is always relative to insts.
-  // That's why we must use the macroassembler to generate a stub.
-  MacroAssembler _masm(&cbuf);
-
   address base = __ start_a_stub(to_interp_stub_size());
   int offset = __ offset();
   if (base == nullptr) {
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv.ad
index 6c855f23c2af1..81bcd3c1362af 100644
--- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv.ad
+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv.ad
@@ -41,7 +41,7 @@ instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, i
   ins_encode %{
     Register tmp = $tmp$$Register;
     __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
                                                    Assembler::relaxed /* acquire */, Assembler::rl /* release */,
                                                    false /* is_cae */, $res$$Register);
   %}
@@ -62,7 +62,7 @@ instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, i
   ins_encode %{
     Register tmp = $tmp$$Register;
     __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
                                                    Assembler::relaxed /* acquire */, Assembler::rl /* release */,
                                                    false /* is_cae */, $res$$Register);
   %}
@@ -84,7 +84,7 @@ instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval
   ins_encode %{
     Register tmp = $tmp$$Register;
     __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
                                                    Assembler::aq /* acquire */, Assembler::rl /* release */,
                                                    false /* is_cae */, $res$$Register);
   %}
@@ -106,7 +106,7 @@ instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval
   ins_encode %{
     Register tmp = $tmp$$Register;
     __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
                                                    Assembler::aq /* acquire */, Assembler::rl /* release */,
                                                    false /* is_cae */, $res$$Register);
   %}
@@ -126,7 +126,7 @@ instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldva
   ins_encode %{
     Register tmp = $tmp$$Register;
     __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
                                                    Assembler::relaxed /* acquire */, Assembler::rl /* release */,
                                                    true /* is_cae */, $res$$Register);
   %}
@@ -146,7 +146,7 @@ instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldva
   ins_encode %{
     Register tmp = $tmp$$Register;
     __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
                                                    Assembler::relaxed /* acquire */, Assembler::rl /* release */,
                                                    true /* is_cae */, $res$$Register);
   %}
@@ -168,7 +168,7 @@ instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldva
     Register tmp = $tmp$$Register;
     __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
     // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
                                                    Assembler::relaxed /* acquire */, Assembler::rl /* release */,
                                                    false /* is_cae */, $res$$Register);
   %}
@@ -189,7 +189,7 @@ instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN ol
   ins_encode %{
     Register tmp = $tmp$$Register;
     __ mv(tmp, $oldval$$Register);
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
                                                    Assembler::aq /* acquire */, Assembler::rl /* release */,
                                                    true /* is_cae */, $res$$Register);
   %}
@@ -210,7 +210,7 @@ instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP ol
   ins_encode %{
     Register tmp = $tmp$$Register;
     __ mv(tmp, $oldval$$Register);
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
                                                    Assembler::aq /* acquire */, Assembler::rl /* release */,
                                                    true /* is_cae */, $res$$Register);
   %}
@@ -230,7 +230,7 @@ instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldva
   ins_encode %{
     Register tmp = $tmp$$Register;
     __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
                                                    Assembler::relaxed /* acquire */, Assembler::rl /* release */,
                                                    false /* is_cae */, $res$$Register);
   %}
@@ -253,7 +253,7 @@ instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN ol
     Register tmp = $tmp$$Register;
     __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
     // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
                                                    Assembler::aq /* acquire */, Assembler::rl /* release */,
                                                    false /* is_cae */, $res$$Register);
   %}
@@ -276,7 +276,7 @@ instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP ol
     Register tmp = $tmp$$Register;
     __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
     // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, $mem$$Register, tmp, $newval$$Register,
                                                    Assembler::aq /* acquire */, Assembler::rl /* release */,
                                                    false /* is_cae */, $res$$Register);
   %}
diff --git a/src/hotspot/cpu/riscv/gc/x/x_riscv.ad b/src/hotspot/cpu/riscv/gc/x/x_riscv.ad
index 3d0273109ace3..ef02f301c6aeb 100644
--- a/src/hotspot/cpu/riscv/gc/x/x_riscv.ad
+++ b/src/hotspot/cpu/riscv/gc/x/x_riscv.ad
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
 // Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
@@ -32,7 +32,7 @@ source_hpp %{
 
 source %{
 
-static void x_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, int barrier_data) {
+static void x_load_barrier(MacroAssembler* masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, int barrier_data) {
   if (barrier_data == XLoadBarrierElided) {
     return;
   }
@@ -43,7 +43,7 @@ static void x_load_barrier(MacroAssembler& _masm, const MachNode* node, Address
   __ bind(*stub->continuation());
 }
 
-static void x_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
+static void x_load_barrier_slow_path(MacroAssembler* masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
   XLoadBarrierStubC2* const stub = XLoadBarrierStubC2::create(node, ref_addr, ref, tmp, XLoadBarrierStrong);
   __ j(*stub->entry());
   __ bind(*stub->continuation());
@@ -65,7 +65,7 @@ instruct xLoadP(iRegPNoSp dst, memory mem, iRegPNoSp tmp)
   ins_encode %{
     const Address ref_addr (as_Register($mem$$base), $mem$$disp);
     __ ld($dst$$Register, ref_addr);
-    x_load_barrier(_masm, this, ref_addr, $dst$$Register, $tmp$$Register /* tmp */, barrier_data());
+    x_load_barrier(masm, this, ref_addr, $dst$$Register, $tmp$$Register /* tmp */, barrier_data());
   %}
 
   ins_pipe(iload_reg_mem);
@@ -94,7 +94,7 @@ instruct xCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newva
       __ ld(t0, Address(xthread, XThreadLocalData::address_bad_mask_offset()));
       __ andr(t0, t0, $tmp$$Register);
       __ beqz(t0, good);
-      x_load_barrier_slow_path(_masm, this, Address($mem$$Register), $tmp$$Register /* ref */, $res$$Register /* tmp */);
+      x_load_barrier_slow_path(masm, this, Address($mem$$Register), $tmp$$Register /* ref */, $res$$Register /* tmp */);
       __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
                  Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
                  true /* result_as_bool */);
@@ -128,7 +128,7 @@ instruct xCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP ne
       __ ld(t0, Address(xthread, XThreadLocalData::address_bad_mask_offset()));
       __ andr(t0, t0, $tmp$$Register);
       __ beqz(t0, good);
-      x_load_barrier_slow_path(_masm, this, Address($mem$$Register), $tmp$$Register /* ref */, $res$$Register /* tmp */);
+      x_load_barrier_slow_path(masm, this, Address($mem$$Register), $tmp$$Register /* ref */, $res$$Register /* tmp */);
       __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
                  Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
                  true /* result_as_bool */);
@@ -157,7 +157,7 @@ instruct xCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP n
       __ ld(t0, Address(xthread, XThreadLocalData::address_bad_mask_offset()));
       __ andr(t0, t0, $res$$Register);
       __ beqz(t0, good);
-      x_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, $tmp$$Register /* tmp */);
+      x_load_barrier_slow_path(masm, this, Address($mem$$Register), $res$$Register /* ref */, $tmp$$Register /* tmp */);
       __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
                  Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register);
       __ bind(good);
@@ -185,7 +185,7 @@ instruct xCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iReg
       __ ld(t0, Address(xthread, XThreadLocalData::address_bad_mask_offset()));
       __ andr(t0, t0, $res$$Register);
       __ beqz(t0, good);
-      x_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, $tmp$$Register /* tmp */);
+      x_load_barrier_slow_path(masm, this, Address($mem$$Register), $res$$Register /* ref */, $tmp$$Register /* tmp */);
       __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
                  Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register);
       __ bind(good);
@@ -206,7 +206,7 @@ instruct xGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, iRegPNoSp tmp) %{
 
   ins_encode %{
     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
-    x_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, $tmp$$Register /* tmp */, barrier_data());
+    x_load_barrier(masm, this, Address(noreg, 0), $prev$$Register, $tmp$$Register /* tmp */, barrier_data());
   %}
 
   ins_pipe(pipe_serial);
@@ -223,7 +223,7 @@ instruct xGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, iRegPNoSp tmp)
 
   ins_encode %{
     __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
-    x_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, $tmp$$Register /* tmp */, barrier_data());
+    x_load_barrier(masm, this, Address(noreg, 0), $prev$$Register, $tmp$$Register /* tmp */, barrier_data());
   %}
   ins_pipe(pipe_serial);
 %}
diff --git a/src/hotspot/cpu/riscv/gc/z/z_riscv.ad b/src/hotspot/cpu/riscv/gc/z/z_riscv.ad
index 29de823402980..4c94e504475ee 100644
--- a/src/hotspot/cpu/riscv/gc/z/z_riscv.ad
+++ b/src/hotspot/cpu/riscv/gc/z/z_riscv.ad
@@ -33,7 +33,7 @@ source_hpp %{
 source %{
 #include "gc/z/zBarrierSetAssembler.hpp"
 
-static void z_color(MacroAssembler& _masm, const MachNode* node, Register dst, Register src, Register tmp) {
+static void z_color(MacroAssembler* masm, const MachNode* node, Register dst, Register src, Register tmp) {
   assert_different_registers(dst, tmp);
 
   __ relocate(barrier_Relocation::spec(), [&] {
@@ -43,11 +43,11 @@ static void z_color(MacroAssembler& _masm, const MachNode* node, Register dst, R
   __ orr(dst, dst, tmp);
 }
 
-static void z_uncolor(MacroAssembler& _masm, const MachNode* node, Register ref) {
+static void z_uncolor(MacroAssembler* masm, const MachNode* node, Register ref) {
   __ srli(ref, ref, ZPointerLoadShift);
 }
 
-static void check_color(MacroAssembler& _masm, Register ref, bool on_non_strong, Register result) {
+static void check_color(MacroAssembler* masm, Register ref, bool on_non_strong, Register result) {
   int format = on_non_strong ? ZBarrierRelocationFormatMarkBadMask
                              : ZBarrierRelocationFormatLoadBadMask;
   __ relocate(barrier_Relocation::spec(), [&] {
@@ -56,35 +56,35 @@ static void check_color(MacroAssembler& _masm, Register ref, bool on_non_strong,
   __ andr(result, ref, result);
 }
 
-static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
+static void z_load_barrier(MacroAssembler* masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
   const bool on_non_strong =
       ((node->barrier_data() & ZBarrierWeak) != 0) ||
       ((node->barrier_data() & ZBarrierPhantom) != 0);
 
   if (node->barrier_data() == ZBarrierElided) {
-    z_uncolor(_masm, node, ref);
+    z_uncolor(masm, node, ref);
     return;
   }
 
   ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref);
   Label good;
-  check_color(_masm, ref, on_non_strong, tmp);
+  check_color(masm, ref, on_non_strong, tmp);
   __ beqz(tmp, good);
   __ j(*stub->entry());
 
   __ bind(good);
-  z_uncolor(_masm, node, ref);
+  z_uncolor(masm, node, ref);
   __ bind(*stub->continuation());
 }
 
-static void z_store_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register rnew_zaddress, Register rnew_zpointer, Register tmp, bool is_atomic) {
+static void z_store_barrier(MacroAssembler* masm, const MachNode* node, Address ref_addr, Register rnew_zaddress, Register rnew_zpointer, Register tmp, bool is_atomic) {
   if (node->barrier_data() == ZBarrierElided) {
-    z_color(_masm, node, rnew_zpointer, rnew_zaddress, tmp);
+    z_color(masm, node, rnew_zpointer, rnew_zaddress, tmp);
   } else {
     bool is_native = (node->barrier_data() & ZBarrierNative) != 0;
     ZStoreBarrierStubC2* const stub = ZStoreBarrierStubC2::create(node, ref_addr, rnew_zaddress, rnew_zpointer, is_native, is_atomic);
     ZBarrierSetAssembler* bs_asm = ZBarrierSet::assembler();
-    bs_asm->store_barrier_fast(&_masm, ref_addr, rnew_zaddress, rnew_zpointer, tmp, true /* in_nmethod */, is_atomic, *stub->entry(), *stub->continuation());
+    bs_asm->store_barrier_fast(masm, ref_addr, rnew_zaddress, rnew_zpointer, tmp, true /* in_nmethod */, is_atomic, *stub->entry(), *stub->continuation());
   }
 }
 %}
@@ -103,7 +103,7 @@ instruct zLoadP(iRegPNoSp dst, memory mem, iRegPNoSp tmp)
   ins_encode %{
     const Address ref_addr(as_Register($mem$$base), $mem$$disp);
     __ ld($dst$$Register, ref_addr);
-    z_load_barrier(_masm, this, ref_addr, $dst$$Register, $tmp$$Register);
+    z_load_barrier(masm, this, ref_addr, $dst$$Register, $tmp$$Register);
   %}
 
   ins_pipe(iload_reg_mem);
@@ -120,7 +120,7 @@ instruct zStoreP(memory mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2)
   format %{ "sd    $mem, $src\t# ptr" %}
   ins_encode %{
     const Address ref_addr(as_Register($mem$$base), $mem$$disp);
-    z_store_barrier(_masm, this, ref_addr, $src$$Register, $tmp1$$Register, $tmp2$$Register, false /* is_atomic */);
+    z_store_barrier(masm, this, ref_addr, $src$$Register, $tmp1$$Register, $tmp2$$Register, false /* is_atomic */);
     __ sd($tmp1$$Register, ref_addr);
   %}
   ins_pipe(pipe_serial);
@@ -141,8 +141,8 @@ instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newva
   ins_encode %{
     guarantee($mem$$disp == 0, "impossible encoding");
     Address ref_addr($mem$$Register);
-    z_color(_masm, this, $oldval_tmp$$Register, $oldval$$Register, $tmp1$$Register);
-    z_store_barrier(_masm, this, ref_addr, $newval$$Register, $newval_tmp$$Register, $tmp1$$Register, true /* is_atomic */);
+    z_color(masm, this, $oldval_tmp$$Register, $oldval$$Register, $tmp1$$Register);
+    z_store_barrier(masm, this, ref_addr, $newval$$Register, $newval_tmp$$Register, $tmp1$$Register, true /* is_atomic */);
     __ cmpxchg($mem$$Register, $oldval_tmp$$Register, $newval_tmp$$Register, Assembler::int64, Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, true /* result_as_bool */);
   %}
 
@@ -164,8 +164,8 @@ instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP ne
   ins_encode %{
     guarantee($mem$$disp == 0, "impossible encoding");
     Address ref_addr($mem$$Register);
-    z_color(_masm, this, $oldval_tmp$$Register, $oldval$$Register, $tmp1$$Register);
-    z_store_barrier(_masm, this, ref_addr, $newval$$Register, $newval_tmp$$Register, $tmp1$$Register, true /* is_atomic */);
+    z_color(masm, this, $oldval_tmp$$Register, $oldval$$Register, $tmp1$$Register);
+    z_store_barrier(masm, this, ref_addr, $newval$$Register, $newval_tmp$$Register, $tmp1$$Register, true /* is_atomic */);
     __ cmpxchg($mem$$Register, $oldval_tmp$$Register, $newval_tmp$$Register, Assembler::int64, Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, true /* result_as_bool */);
   %}
 
@@ -185,10 +185,10 @@ instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP n
   ins_encode %{
     guarantee($mem$$disp == 0, "impossible encoding");
     Address ref_addr($mem$$Register);
-    z_color(_masm, this, $oldval_tmp$$Register, $oldval$$Register, $tmp1$$Register);
-    z_store_barrier(_masm, this, ref_addr, $newval$$Register, $newval_tmp$$Register, $tmp1$$Register, true /* is_atomic */);
+    z_color(masm, this, $oldval_tmp$$Register, $oldval$$Register, $tmp1$$Register);
+    z_store_barrier(masm, this, ref_addr, $newval$$Register, $newval_tmp$$Register, $tmp1$$Register, true /* is_atomic */);
     __ cmpxchg($mem$$Register, $oldval_tmp$$Register, $newval_tmp$$Register, Assembler::int64, Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register);
-    z_uncolor(_masm, this, $res$$Register);
+    z_uncolor(masm, this, $res$$Register);
   %}
 
   ins_pipe(pipe_slow);
@@ -207,10 +207,10 @@ instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iReg
   ins_encode %{
     guarantee($mem$$disp == 0, "impossible encoding");
     Address ref_addr($mem$$Register);
-    z_color(_masm, this, $oldval_tmp$$Register, $oldval$$Register, $tmp1$$Register);
-    z_store_barrier(_masm, this, ref_addr, $newval$$Register, $newval_tmp$$Register, $tmp1$$Register, true /* is_atomic */);
+    z_color(masm, this, $oldval_tmp$$Register, $oldval$$Register, $tmp1$$Register);
+    z_store_barrier(masm, this, ref_addr, $newval$$Register, $newval_tmp$$Register, $tmp1$$Register, true /* is_atomic */);
     __ cmpxchg($mem$$Register, $oldval_tmp$$Register, $newval_tmp$$Register, Assembler::int64, Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register);
-    z_uncolor(_masm, this, $res$$Register);
+    z_uncolor(masm, this, $res$$Register);
   %}
 
   ins_pipe(pipe_slow);
@@ -226,9 +226,9 @@ instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, iRegPNoSp tmp) %{
   format %{ "atomic_xchg  $prev, $newv, [$mem], #@zGetAndSetP" %}
 
   ins_encode %{
-    z_store_barrier(_masm, this, Address($mem$$Register), $newv$$Register, $prev$$Register, $tmp$$Register, true /* is_atomic */);
+    z_store_barrier(masm, this, Address($mem$$Register), $newv$$Register, $prev$$Register, $tmp$$Register, true /* is_atomic */);
     __ atomic_xchg($prev$$Register, $prev$$Register, $mem$$Register);
-    z_uncolor(_masm, this, $prev$$Register);
+    z_uncolor(masm, this, $prev$$Register);
   %}
 
   ins_pipe(pipe_serial);
@@ -244,9 +244,9 @@ instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, iRegPNoSp tmp)
   format %{ "atomic_xchg_acq  $prev, $newv, [$mem], #@zGetAndSetPAcq" %}
 
   ins_encode %{
-    z_store_barrier(_masm, this, Address($mem$$Register), $newv$$Register, $prev$$Register, $tmp$$Register, true /* is_atomic */);
+    z_store_barrier(masm, this, Address($mem$$Register), $newv$$Register, $prev$$Register, $tmp$$Register, true /* is_atomic */);
     __ atomic_xchgal($prev$$Register, $prev$$Register, $mem$$Register);
-    z_uncolor(_masm, this, $prev$$Register);
+    z_uncolor(masm, this, $prev$$Register);
   %}
   ins_pipe(pipe_serial);
 %}
diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
index 56a2d71bb5f15..2c69486a0e595 100644
--- a/src/hotspot/cpu/riscv/riscv.ad
+++ b/src/hotspot/cpu/riscv/riscv.ad
@@ -1060,8 +1060,8 @@ class HandlerImpl {
 
  public:
 
-  static int emit_exception_handler(CodeBuffer &cbuf);
-  static int emit_deopt_handler(CodeBuffer& cbuf);
+  static int emit_exception_handler(C2_MacroAssembler *masm);
+  static int emit_deopt_handler(C2_MacroAssembler* masm);
 
   static uint size_exception_handler() {
     return MacroAssembler::far_branch_size();
@@ -1207,7 +1207,7 @@ bool needs_acquiring_load_reserved(const Node *n)
   // so we can just return true here
   return true;
 }
-#define __ _masm.
+#define __ masm->
 
 // advance declarations for helper functions to convert register
 // indices to register objects
@@ -1291,8 +1291,7 @@ void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 }
 #endif
 
-void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  C2_MacroAssembler _masm(&cbuf);
+void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
   __ ebreak();
 }
 
@@ -1308,9 +1307,8 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
   }
 #endif
 
-  void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
-    C2_MacroAssembler _masm(&cbuf);
-    Assembler::CompressibleRegion cr(&_masm); // nops shall be 2-byte under RVC for alignment purposes.
+  void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
+    Assembler::CompressibleRegion cr(masm); // nops shall be 2-byte under RVC for alignment purposes.
     for (int i = 0; i < _count; i++) {
       __ nop();
     }
@@ -1332,7 +1330,7 @@ void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, Phase
   ShouldNotReachHere();
 }
 
-void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
+void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
   // Empty encoding
 }
 
@@ -1376,10 +1374,9 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 }
 #endif
 
-void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
   assert_cond(ra_ != nullptr);
   Compile* C = ra_->C;
-  C2_MacroAssembler _masm(&cbuf);
 
   // n.b. frame size includes space for return pc and fp
   const int framesize = C->output()->frame_size_in_bytes();
@@ -1387,7 +1384,7 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
   // insert a nop at the start of the prolog so we can patch in a
   // branch if we need to invalidate the method later
   {
-    Assembler::IncompressibleRegion ir(&_masm);  // keep the nop as 4 bytes for patching.
+    Assembler::IncompressibleRegion ir(masm);  // keep the nop as 4 bytes for patching.
     MacroAssembler::assert_alignment(__ pc());
     __ nop();  // 4 bytes
   }
@@ -1431,7 +1428,7 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
         guard = &stub->guard();
       }
       // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
-      bs->nmethod_entry_barrier(&_masm, slow_path, continuation, guard);
+      bs->nmethod_entry_barrier(masm, slow_path, continuation, guard);
     }
   }
 
@@ -1439,7 +1436,7 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
     Unimplemented();
   }
 
-  C->output()->set_frame_complete(cbuf.insts_size());
+  C->output()->set_frame_complete(__ offset());
 
   if (C->has_mach_constant_base_node()) {
     // NOTE: We set the table base offset here because users might be
@@ -1490,10 +1487,9 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 }
 #endif
 
-void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
   assert_cond(ra_ != nullptr);
   Compile* C = ra_->C;
-  C2_MacroAssembler _masm(&cbuf);
   assert_cond(C != nullptr);
   int framesize = C->output()->frame_size_in_bytes();
 
@@ -1567,7 +1563,7 @@ static enum RC rc_class(OptoReg::Name reg) {
   return rc_stack;
 }
 
-uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
+uint MachSpillCopyNode::implementation(C2_MacroAssembler *masm, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
   assert_cond(ra_ != nullptr);
   Compile* C = ra_->C;
 
@@ -1601,8 +1597,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
 
   if (bottom_type()->isa_vect() != nullptr) {
     uint ireg = ideal_reg();
-    if (ireg == Op_VecA && cbuf) {
-      C2_MacroAssembler _masm(cbuf);
+    if (ireg == Op_VecA && masm) {
       int vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
         // stack to stack
@@ -1620,8 +1615,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
       } else {
         ShouldNotReachHere();
       }
-    } else if (bottom_type()->isa_vectmask() && cbuf) {
-      C2_MacroAssembler _masm(cbuf);
+    } else if (bottom_type()->isa_vectmask() && masm) {
       int vmask_size_in_bytes = Matcher::scalable_predicate_reg_slots() * 32 / 8;
       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
         // stack to stack
@@ -1640,8 +1634,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
         ShouldNotReachHere();
       }
     }
-  } else if (cbuf != nullptr) {
-    C2_MacroAssembler _masm(cbuf);
+  } else if (masm != nullptr) {
     switch (src_lo_rc) {
       case rc_int:
         if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
@@ -1753,8 +1746,8 @@ void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 }
 #endif
 
-void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  implementation(&cbuf, ra_, false, nullptr);
+void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
+  implementation(masm, ra_, false, nullptr);
 }
 
 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
@@ -1773,9 +1766,8 @@ void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 }
 #endif
 
-void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  C2_MacroAssembler _masm(&cbuf);
-  Assembler::IncompressibleRegion ir(&_masm);  // Fixed length: see BoxLockNode::size()
+void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
+  Assembler::IncompressibleRegion ir(masm);  // Fixed length: see BoxLockNode::size()
 
   assert_cond(ra_ != nullptr);
   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
@@ -1820,10 +1812,9 @@ void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 }
 #endif
 
-void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
+void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 {
   // This is the unverified entry point.
-  C2_MacroAssembler _masm(&cbuf);
   __ ic_check(CodeEntryAlignment);
 
   // Verified entry point must be properly 4 bytes aligned for patching by NativeJump::patch_verified_entry().
@@ -1842,13 +1833,12 @@ uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 //=============================================================================
 
 // Emit exception handler code.
-int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
+int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm)
 {
   // auipc t0, #exception_blob_entry_point
   // jr (offset)t0
   // Note that the code buffer's insts_mark is always relative to insts.
   // That's why we must use the macroassembler to generate a handler.
-  C2_MacroAssembler _masm(&cbuf);
   address base = __ start_a_stub(size_exception_handler());
   if (base == nullptr) {
     ciEnv::current()->record_failure("CodeCache is full");
@@ -1862,11 +1852,8 @@ int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
 }
 
 // Emit deopt handler code.
-int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
+int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm)
 {
-  // Note that the code buffer's insts_mark is always relative to insts.
-  // That's why we must use the macroassembler to generate a handler.
-  C2_MacroAssembler _masm(&cbuf);
   address base = __ start_a_stub(size_deopt_handler());
   if (base == nullptr) {
     ciEnv::current()->record_failure("CodeCache is full");
@@ -2201,14 +2188,12 @@ encode %{
   // BEGIN Non-volatile memory access
 
   enc_class riscv_enc_mov_imm(iRegIorL dst, immIorL src) %{
-    C2_MacroAssembler _masm(&cbuf);
     int64_t con = (int64_t)$src$$constant;
     Register dst_reg = as_Register($dst$$reg);
     __ mv(dst_reg, con);
   %}
 
   enc_class riscv_enc_mov_p(iRegP dst, immP src) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     address con = (address)$src$$constant;
     if (con == nullptr || con == (address)1) {
@@ -2227,18 +2212,15 @@ encode %{
   %}
 
   enc_class riscv_enc_mov_p1(iRegP dst) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     __ mv(dst_reg, 1);
   %}
 
   enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{
-    C2_MacroAssembler _masm(&cbuf);
     __ load_byte_map_base($dst$$Register);
   %}
 
   enc_class riscv_enc_mov_n(iRegN dst, immN src) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     address con = (address)$src$$constant;
     if (con == nullptr) {
@@ -2251,13 +2233,11 @@ encode %{
   %}
 
   enc_class riscv_enc_mov_zero(iRegNorP dst) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     __ mv(dst_reg, zr);
   %}
 
   enc_class riscv_enc_mov_nk(iRegN dst, immNKlass src) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     address con = (address)$src$$constant;
     if (con == nullptr) {
@@ -2270,42 +2250,36 @@ encode %{
   %}
 
   enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
-    C2_MacroAssembler _masm(&cbuf);
     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
                /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
                /*result as bool*/ true);
   %}
 
   enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
-    C2_MacroAssembler _masm(&cbuf);
     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
                /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
                /*result as bool*/ true);
   %}
 
   enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegL oldval, iRegL newval) %{
-    C2_MacroAssembler _masm(&cbuf);
     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
                /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
                /*result as bool*/ true);
   %}
 
   enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
-    C2_MacroAssembler _masm(&cbuf);
     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
                /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
                /*result as bool*/ true);
   %}
 
   enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
-    C2_MacroAssembler _masm(&cbuf);
     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
                /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
                /*result as bool*/ true);
   %}
 
   enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegL oldval, iRegL newval) %{
-    C2_MacroAssembler _masm(&cbuf);
     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
                /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
                /*result as bool*/ true);
@@ -2314,13 +2288,11 @@ encode %{
   // compare and branch instruction encodings
 
   enc_class riscv_enc_j(label lbl) %{
-    C2_MacroAssembler _masm(&cbuf);
     Label* L = $lbl$$label;
     __ j(*L);
   %}
 
   enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{
-    C2_MacroAssembler _masm(&cbuf);
     Label* L = $lbl$$label;
     switch ($cmp$$cmpcode) {
       case(BoolTest::ge):
@@ -2344,7 +2316,6 @@ encode %{
 
     Label miss;
     Label done;
-    C2_MacroAssembler _masm(&cbuf);
     __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
                                      nullptr, &miss);
     if ($primary) {
@@ -2363,8 +2334,7 @@ encode %{
   %}
 
   enc_class riscv_enc_java_static_call(method meth) %{
-    C2_MacroAssembler _masm(&cbuf);
-    Assembler::IncompressibleRegion ir(&_masm);  // Fixed length: see ret_addr_offset
+    Assembler::IncompressibleRegion ir(masm);  // Fixed length: see ret_addr_offset
 
     address addr = (address)$meth$$method;
     address call = nullptr;
@@ -2382,7 +2352,7 @@ encode %{
       __ nop();
       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
     } else {
-      int method_index = resolved_method_index(cbuf);
+      int method_index = resolved_method_index(masm);
       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
                                                   : static_call_Relocation::spec(method_index);
       call = __ trampoline_call(Address(addr, rspec));
@@ -2394,10 +2364,10 @@ encode %{
       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
         // Calls of the same statically bound method can share
         // a stub to the interpreter.
-        cbuf.shared_stub_to_interp_for(_method, call - cbuf.insts_begin());
+        __ code()->shared_stub_to_interp_for(_method, call - (__ begin()));
       } else {
         // Emit stub for static call
-        address stub = CompiledDirectCall::emit_to_interp_stub(cbuf, call);
+        address stub = CompiledDirectCall::emit_to_interp_stub(masm, call);
         if (stub == nullptr) {
           ciEnv::current()->record_failure("CodeCache is full");
           return;
@@ -2409,9 +2379,8 @@ encode %{
   %}
 
   enc_class riscv_enc_java_dynamic_call(method meth) %{
-    C2_MacroAssembler _masm(&cbuf);
-    Assembler::IncompressibleRegion ir(&_masm);  // Fixed length: see ret_addr_offset
-    int method_index = resolved_method_index(cbuf);
+    Assembler::IncompressibleRegion ir(masm);  // Fixed length: see ret_addr_offset
+    int method_index = resolved_method_index(masm);
     address call = __ ic_call((address)$meth$$method, method_index);
     if (call == nullptr) {
       ciEnv::current()->record_failure("CodeCache is full");
@@ -2422,7 +2391,6 @@ encode %{
   %}
 
   enc_class riscv_enc_call_epilog() %{
-    C2_MacroAssembler _masm(&cbuf);
     if (VerifyStackAtCalls) {
       // Check that stack depth is unchanged: find majik cookie on stack
       __ call_Unimplemented();
@@ -2430,8 +2398,7 @@ encode %{
   %}
 
   enc_class riscv_enc_java_to_runtime(method meth) %{
-    C2_MacroAssembler _masm(&cbuf);
-    Assembler::IncompressibleRegion ir(&_masm);  // Fixed length: see ret_addr_offset
+    Assembler::IncompressibleRegion ir(masm);  // Fixed length: see ret_addr_offset
 
     // some calls to generated routines (arraycopy code) are scheduled
     // by C2 as runtime calls. if so we can call them using a jr (they
@@ -2463,7 +2430,6 @@ encode %{
   // arithmetic encodings
 
   enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     Register src1_reg = as_Register($src1$$reg);
     Register src2_reg = as_Register($src2$$reg);
@@ -2471,7 +2437,6 @@ encode %{
   %}
 
   enc_class riscv_enc_divuw(iRegI dst, iRegI src1, iRegI src2) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     Register src1_reg = as_Register($src1$$reg);
     Register src2_reg = as_Register($src2$$reg);
@@ -2479,7 +2444,6 @@ encode %{
   %}
 
   enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     Register src1_reg = as_Register($src1$$reg);
     Register src2_reg = as_Register($src2$$reg);
@@ -2487,7 +2451,6 @@ encode %{
   %}
 
   enc_class riscv_enc_divu(iRegI dst, iRegI src1, iRegI src2) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     Register src1_reg = as_Register($src1$$reg);
     Register src2_reg = as_Register($src2$$reg);
@@ -2495,7 +2458,6 @@ encode %{
   %}
 
   enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     Register src1_reg = as_Register($src1$$reg);
     Register src2_reg = as_Register($src2$$reg);
@@ -2503,7 +2465,6 @@ encode %{
   %}
 
   enc_class riscv_enc_moduw(iRegI dst, iRegI src1, iRegI src2) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     Register src1_reg = as_Register($src1$$reg);
     Register src2_reg = as_Register($src2$$reg);
@@ -2511,7 +2472,6 @@ encode %{
   %}
 
   enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     Register src1_reg = as_Register($src1$$reg);
     Register src2_reg = as_Register($src2$$reg);
@@ -2519,7 +2479,6 @@ encode %{
   %}
 
   enc_class riscv_enc_modu(iRegI dst, iRegI src1, iRegI src2) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     Register src1_reg = as_Register($src1$$reg);
     Register src2_reg = as_Register($src2$$reg);
@@ -2527,13 +2486,11 @@ encode %{
   %}
 
   enc_class riscv_enc_tail_call(iRegP jump_target) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register target_reg = as_Register($jump_target$$reg);
     __ jr(target_reg);
   %}
 
   enc_class riscv_enc_tail_jmp(iRegP jump_target) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register target_reg = as_Register($jump_target$$reg);
     // exception oop should be in x10
     // ret addr has been popped into ra
@@ -2543,12 +2500,10 @@ encode %{
   %}
 
   enc_class riscv_enc_rethrow() %{
-    C2_MacroAssembler _masm(&cbuf);
     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
   %}
 
   enc_class riscv_enc_ret() %{
-    C2_MacroAssembler _masm(&cbuf);
     __ ret();
   %}
 
diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad
index 25ad0ba39ea22..36f4d499c38b4 100644
--- a/src/hotspot/cpu/riscv/riscv_v.ad
+++ b/src/hotspot/cpu/riscv/riscv_v.ad
@@ -30,19 +30,19 @@ opclass vmemA(indirect);
 
 source %{
 
-  static void loadStore(C2_MacroAssembler masm, bool is_store,
+  static void loadStore(C2_MacroAssembler* masm, bool is_store,
                         VectorRegister reg, BasicType bt, Register base,
                         uint vector_length, Assembler::VectorMask vm = Assembler::unmasked) {
     Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
-    masm.vsetvli_helper(bt, vector_length);
+    __ vsetvli_helper(bt, vector_length);
 
     if (is_store) {
-      masm.vsex_v(reg, base, sew, vm);
+      __ vsex_v(reg, base, sew, vm);
     } else {
       if (vm == Assembler::v0_t) {
-        masm.vxor_vv(reg, reg, reg);
+        __ vxor_vv(reg, reg, reg);
       }
-      masm.vlex_v(reg, base, sew, vm);
+      __ vlex_v(reg, base, sew, vm);
     }
   }
 
@@ -108,7 +108,7 @@ instruct loadV(vReg dst, vmemA mem) %{
   format %{ "loadV $dst, $mem\t# vector (rvv)" %}
   ins_encode %{
     VectorRegister dst_reg = as_VectorRegister($dst$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), false, dst_reg,
+    loadStore(masm, false, dst_reg,
               Matcher::vector_element_basic_type(this), as_Register($mem$$base), Matcher::vector_length(this));
   %}
   ins_pipe(pipe_slow);
@@ -120,7 +120,7 @@ instruct storeV(vReg src, vmemA mem) %{
   format %{ "storeV $mem, $src\t# vector (rvv)" %}
   ins_encode %{
     VectorRegister src_reg = as_VectorRegister($src$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), true, src_reg,
+    loadStore(masm, true, src_reg,
               Matcher::vector_element_basic_type(this, $src), as_Register($mem$$base), Matcher::vector_length(this, $src));
   %}
   ins_pipe(pipe_slow);
@@ -3154,7 +3154,7 @@ instruct loadV_masked(vReg dst, vmemA mem, vRegMask_V0 v0) %{
   format %{ "loadV_masked $dst, $mem, $v0" %}
   ins_encode %{
     VectorRegister dst_reg = as_VectorRegister($dst$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), false, dst_reg,
+    loadStore(masm, false, dst_reg,
               Matcher::vector_element_basic_type(this), as_Register($mem$$base),
               Matcher::vector_length(this), Assembler::v0_t);
   %}
@@ -3166,7 +3166,7 @@ instruct storeV_masked(vReg src, vmemA mem, vRegMask_V0 v0) %{
   format %{ "storeV_masked $mem, $src, $v0" %}
   ins_encode %{
     VectorRegister src_reg = as_VectorRegister($src$$reg);
-    loadStore(C2_MacroAssembler(&cbuf), true, src_reg,
+    loadStore(masm, true, src_reg,
               Matcher::vector_element_basic_type(this, $src), as_Register($mem$$base),
               Matcher::vector_length(this, $src), Assembler::v0_t);
   %}
diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
index 5945f9d5fe2ab..b84e31b40a7c6 100644
--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
@@ -974,8 +974,7 @@ static void gen_continuation_enter(MacroAssembler* masm,
 
     __ j(exit);
 
-    CodeBuffer* cbuf = masm->code_section()->outer();
-    address stub = CompiledDirectCall::emit_to_interp_stub(*cbuf, tr_call);
+    address stub = CompiledDirectCall::emit_to_interp_stub(masm, tr_call);
     if (stub == nullptr) {
       fatal("CodeCache is full at gen_continuation_enter");
     }
@@ -1040,8 +1039,7 @@ static void gen_continuation_enter(MacroAssembler* masm,
     __ jr(x11); // the exception handler
   }
 
-  CodeBuffer* cbuf = masm->code_section()->outer();
-  address stub = CompiledDirectCall::emit_to_interp_stub(*cbuf, tr_call);
+  address stub = CompiledDirectCall::emit_to_interp_stub(masm, tr_call);
   if (stub == nullptr) {
     fatal("CodeCache is full at gen_continuation_enter");
   }
diff --git a/src/hotspot/cpu/s390/compiledIC_s390.cpp b/src/hotspot/cpu/s390/compiledIC_s390.cpp
index 3adcfbc85f185..9c9073cb93d99 100644
--- a/src/hotspot/cpu/s390/compiledIC_s390.cpp
+++ b/src/hotspot/cpu/s390/compiledIC_s390.cpp
@@ -37,22 +37,18 @@
 // ----------------------------------------------------------------------------
 
 #undef  __
-#define __ _masm.
+#define __ masm->
 
-address CompiledDirectCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark/* = nullptr*/) {
+address CompiledDirectCall::emit_to_interp_stub(MacroAssembler *masm, address mark/* = nullptr*/) {
 #ifdef COMPILER2
   // Stub is fixed up when the corresponding call is converted from calling
   // compiled code to calling interpreted code.
   if (mark == nullptr) {
     // Get the mark within main instrs section which is set to the address of the call.
-    mark = cbuf.insts_mark();
+    mark = __ inst_mark();
   }
   assert(mark != nullptr, "mark must not be null");
 
-  // Note that the code buffer's insts_mark is always relative to insts.
-  // That's why we must use the macroassembler to generate a stub.
-  MacroAssembler _masm(&cbuf);
-
   address stub = __ start_a_stub(CompiledDirectCall::to_interp_stub_size());
   if (stub == nullptr) {
     return nullptr;  // CodeBuffer::expand failed.
diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad
index e2a11733cd6bc..28cac16864dee 100644
--- a/src/hotspot/cpu/s390/s390.ad
+++ b/src/hotspot/cpu/s390/s390.ad
@@ -584,7 +584,7 @@ source %{
 #define BIND(label)        __ bind(label); BLOCK_COMMENT(#label ":")
 #endif
 
-#define __ _masm.
+#define __ masm->
 
 #define Z_DISP_SIZE Immediate::is_uimm12((long)opnd_array(1)->disp(ra_,this,2)) ?  4 : 6
 #define Z_DISP3_SIZE 6
@@ -666,14 +666,12 @@ int CallLeafNoFPDirectNode::compute_padding(int current_offset) const {
   return (12 - current_offset) & 2;
 }
 
-void emit_nop(CodeBuffer &cbuf) {
-  C2_MacroAssembler _masm(&cbuf);
+void emit_nop(C2_MacroAssembler *masm) {
   __ z_nop();
 }
 
 // Emit an interrupt that is caught by the debugger (for debugging compiler).
-void emit_break(CodeBuffer &cbuf) {
-  C2_MacroAssembler _masm(&cbuf);
+void emit_break(C2_MacroAssembler *masm) {
   __ z_illtrap();
 }
 
@@ -683,51 +681,45 @@ void MachBreakpointNode::format(PhaseRegAlloc *, outputStream *os) const {
 }
 #endif
 
-void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  emit_break(cbuf);
+void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
+  emit_break(masm);
 }
 
 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
   return MachNode::size(ra_);
 }
 
-static inline void z_emit16(CodeBuffer &cbuf, long value) {
-  C2_MacroAssembler _masm(&cbuf);
+static inline void z_emit16(C2_MacroAssembler *masm, long value) {
   __ emit_instruction((unsigned long)value, 2);
 }
 
-static inline void z_emit32(CodeBuffer &cbuf, long value) {
-  C2_MacroAssembler _masm(&cbuf);
+static inline void z_emit32(C2_MacroAssembler *masm, long value) {
   __ emit_instruction((unsigned long)value, 4);
 }
 
-static inline void z_emit48(CodeBuffer &cbuf, long value) {
-  C2_MacroAssembler _masm(&cbuf);
+static inline void z_emit48(C2_MacroAssembler *masm, long value) {
   __ emit_instruction((unsigned long)value, 6);
 }
 
-static inline unsigned int z_emit_inst(CodeBuffer &cbuf, long value) {
+static inline unsigned int z_emit_inst(C2_MacroAssembler *masm, long value) {
   if (value < 0) {
     // There obviously has been an unintended sign extension (int->long). Revert it.
     value = (long)((unsigned long)((unsigned int)value));
   }
 
-  C2_MacroAssembler _masm(&cbuf);
   int len = __ emit_instruction((unsigned long)value, 0);
   return len;
 }
 
 // Check effective address (at runtime) for required alignment.
-static inline void z_assert_aligned(CodeBuffer &cbuf, int disp, Register index, Register base, int alignment) {
-  C2_MacroAssembler _masm(&cbuf);
-
+static inline void z_assert_aligned(C2_MacroAssembler *masm, int disp, Register index, Register base, int alignment) {
   __ z_lay(Z_R0, disp, index, base);
   __ z_nill(Z_R0, alignment-1);
   __ z_brc(Assembler::bcondEqual, +3);
   __ z_illtrap();
 }
 
-int emit_call_reloc(C2_MacroAssembler &_masm, intptr_t entry_point, relocInfo::relocType rtype,
+int emit_call_reloc(C2_MacroAssembler *masm, intptr_t entry_point, relocInfo::relocType rtype,
                     PhaseRegAlloc* ra_, bool is_native_call = false) {
   __ set_inst_mark(); // Used in z_enc_java_static_call() and emit_java_to_interp().
   address old_mark = __ inst_mark();
@@ -758,7 +750,7 @@ int emit_call_reloc(C2_MacroAssembler &_masm, intptr_t entry_point, relocInfo::r
   return (ret_off - start_off);
 }
 
-static int emit_call_reloc(C2_MacroAssembler &_masm, intptr_t entry_point, RelocationHolder const& rspec) {
+static int emit_call_reloc(C2_MacroAssembler *masm, intptr_t entry_point, RelocationHolder const& rspec) {
   __ set_inst_mark(); // Used in z_enc_java_static_call() and emit_java_to_interp().
   address old_mark = __ inst_mark();
   unsigned int start_off = __ offset();
@@ -790,8 +782,7 @@ void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, Phase
 
 // Even with PC-relative TOC addressing, we still need this node.
 // Float loads/stores do not support PC-relative addresses.
-void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
-  C2_MacroAssembler _masm(&cbuf);
+void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
   Register Rtoc = as_Register(ra_->get_encode(this));
   __ load_toc(Rtoc);
 }
@@ -841,9 +832,8 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 }
 #endif
 
-void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
   Compile* C = ra_->C;
-  C2_MacroAssembler _masm(&cbuf);
 
   size_t framesize = C->output()->frame_size_in_bytes();
   size_t bangsize  = C->output()->bang_size_in_bytes();
@@ -892,10 +882,10 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 
   if (C->stub_function() == nullptr) {
     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
-    bs->nmethod_entry_barrier(&_masm);
+    bs->nmethod_entry_barrier(masm);
   }
 
-  C->output()->set_frame_complete(cbuf.insts_size());
+  C->output()->set_frame_complete(__ offset());
 }
 
 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
@@ -921,8 +911,7 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *os) const {
 }
 #endif
 
-void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  C2_MacroAssembler _masm(&cbuf);
+void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
   Compile* C = ra_->C;
 
   // If this does safepoint polling, then do it here.
@@ -990,15 +979,15 @@ static enum RC rc_class(OptoReg::Name reg) {
 }
 
 // Returns size as obtained from z_emit_instr.
-static unsigned int z_ld_st_helper(CodeBuffer *cbuf, const char *op_str, unsigned long opcode,
+static unsigned int z_ld_st_helper(C2_MacroAssembler *masm, const char *op_str, unsigned long opcode,
                                    int reg, int offset, bool do_print, outputStream *os) {
 
-  if (cbuf) {
+  if (masm) {
     if (opcode > (1L<<32)) {
-      return z_emit_inst(*cbuf, opcode | Assembler::reg(Matcher::_regEncode[reg], 8, 48) |
+      return z_emit_inst(masm, opcode | Assembler::reg(Matcher::_regEncode[reg], 8, 48) |
                          Assembler::simm20(offset) | Assembler::reg(Z_R0, 12, 48) | Assembler::regz(Z_SP, 16, 48));
     } else {
-      return z_emit_inst(*cbuf, opcode | Assembler::reg(Matcher::_regEncode[reg], 8, 32) |
+      return z_emit_inst(masm, opcode | Assembler::reg(Matcher::_regEncode[reg], 8, 32) |
                          Assembler::uimm12(offset, 20, 32) | Assembler::reg(Z_R0, 12, 32) | Assembler::regz(Z_SP, 16, 32));
     }
   }
@@ -1011,9 +1000,8 @@ static unsigned int z_ld_st_helper(CodeBuffer *cbuf, const char *op_str, unsigne
   return (opcode > (1L << 32)) ? 6 : 4;
 }
 
-static unsigned int z_mvc_helper(CodeBuffer *cbuf, int len, int dst_off, int src_off, bool do_print, outputStream *os) {
-  if (cbuf) {
-    C2_MacroAssembler _masm(cbuf);
+static unsigned int z_mvc_helper(C2_MacroAssembler *masm, int len, int dst_off, int src_off, bool do_print, outputStream *os) {
+  if (masm) {
     __ z_mvc(dst_off, len-1, Z_SP, src_off, Z_SP);
   }
 
@@ -1026,7 +1014,7 @@ static unsigned int z_mvc_helper(CodeBuffer *cbuf, int len, int dst_off, int src
   return 6;
 }
 
-uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *os) const {
+uint MachSpillCopyNode::implementation(C2_MacroAssembler *masm, PhaseRegAlloc *ra_, bool do_size, outputStream *os) const {
   // Get registers to move.
   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
@@ -1066,17 +1054,17 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
            "expected same type of move for high parts");
 
     if (src12 && dst12) {
-      return z_mvc_helper(cbuf, is64 ? 8 : 4, dst_offset, src_offset, print, os);
+      return z_mvc_helper(masm, is64 ? 8 : 4, dst_offset, src_offset, print, os);
     }
 
     int r0 = Z_R0_num;
     if (is64) {
-      return z_ld_st_helper(cbuf, "LG  ", LG_ZOPC, r0, src_offset, print, os) +
-             z_ld_st_helper(cbuf, "STG ", STG_ZOPC, r0, dst_offset, print, os);
+      return z_ld_st_helper(masm, "LG  ", LG_ZOPC, r0, src_offset, print, os) +
+             z_ld_st_helper(masm, "STG ", STG_ZOPC, r0, dst_offset, print, os);
     }
 
-    return z_ld_st_helper(cbuf, "LY   ", LY_ZOPC, r0, src_offset, print, os) +
-           z_ld_st_helper(cbuf, "STY  ", STY_ZOPC, r0, dst_offset, print, os);
+    return z_ld_st_helper(masm, "LY   ", LY_ZOPC, r0, src_offset, print, os) +
+           z_ld_st_helper(masm, "STY  ", STY_ZOPC, r0, dst_offset, print, os);
   }
 
   // Check for float->int copy. Requires a trip through memory.
@@ -1086,8 +1074,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
 
   // Check for integer reg-reg copy.
   if (src_lo_rc == rc_int && dst_lo_rc == rc_int) {
-    if (cbuf) {
-      C2_MacroAssembler _masm(cbuf);
+    if (masm) {
       Register Rsrc = as_Register(Matcher::_regEncode[src_lo]);
       Register Rdst = as_Register(Matcher::_regEncode[dst_lo]);
       __ z_lgr(Rdst, Rsrc);
@@ -1108,14 +1095,14 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
            "expected same type of move for high parts");
 
     if (is64) {
-      return z_ld_st_helper(cbuf, "STG ", STG_ZOPC, src_lo, dst_offset, print, os);
+      return z_ld_st_helper(masm, "STG ", STG_ZOPC, src_lo, dst_offset, print, os);
     }
 
     // else
     mnemo = dst12 ? "ST  " : "STY ";
     opc = dst12 ? ST_ZOPC : STY_ZOPC;
 
-    return z_ld_st_helper(cbuf, mnemo, opc, src_lo, dst_offset, print, os);
+    return z_ld_st_helper(masm, mnemo, opc, src_lo, dst_offset, print, os);
   }
 
   // Check for integer load
@@ -1128,13 +1115,12 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
     mnemo = is64 ? "LG  " : "LLGF";
     opc = is64 ? LG_ZOPC : LLGF_ZOPC;
 
-    return z_ld_st_helper(cbuf, mnemo, opc, dst_lo, src_offset, print, os);
+    return z_ld_st_helper(masm, mnemo, opc, dst_lo, src_offset, print, os);
   }
 
   // Check for float reg-reg copy.
   if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
-    if (cbuf) {
-      C2_MacroAssembler _masm(cbuf);
+    if (masm) {
       FloatRegister Rsrc = as_FloatRegister(Matcher::_regEncode[src_lo]);
       FloatRegister Rdst = as_FloatRegister(Matcher::_regEncode[dst_lo]);
       __ z_ldr(Rdst, Rsrc);
@@ -1157,13 +1143,13 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
     if (is64) {
       mnemo = dst12 ? "STD  " : "STDY ";
       opc = dst12 ? STD_ZOPC : STDY_ZOPC;
-      return z_ld_st_helper(cbuf, mnemo, opc, src_lo, dst_offset, print, os);
+      return z_ld_st_helper(masm, mnemo, opc, src_lo, dst_offset, print, os);
     }
     // else
 
     mnemo = dst12 ? "STE  " : "STEY ";
     opc = dst12 ? STE_ZOPC : STEY_ZOPC;
-    return z_ld_st_helper(cbuf, mnemo, opc, src_lo, dst_offset, print, os);
+    return z_ld_st_helper(masm, mnemo, opc, src_lo, dst_offset, print, os);
   }
 
   // Check for float load.
@@ -1174,13 +1160,13 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
     if (is64) {
       mnemo = src12 ? "LD   " : "LDY  ";
       opc = src12 ? LD_ZOPC : LDY_ZOPC;
-      return z_ld_st_helper(cbuf, mnemo, opc, dst_lo, src_offset, print, os);
+      return z_ld_st_helper(masm, mnemo, opc, dst_lo, src_offset, print, os);
     }
     // else
 
     mnemo = src12 ? "LE   " : "LEY  ";
     opc = src12 ? LE_ZOPC : LEY_ZOPC;
-    return z_ld_st_helper(cbuf, mnemo, opc, dst_lo, src_offset, print, os);
+    return z_ld_st_helper(masm, mnemo, opc, dst_lo, src_offset, print, os);
   }
 
   // --------------------------------------------------------------------
@@ -1216,8 +1202,8 @@ void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *os) const {
 }
 #endif
 
-void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  implementation(&cbuf, ra_, false, nullptr);
+void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
+  implementation(masm, ra_, false, nullptr);
 }
 
 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
@@ -1232,12 +1218,10 @@ void MachNopNode::format(PhaseRegAlloc *, outputStream *os) const {
 }
 #endif
 
-void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ra_) const {
-  C2_MacroAssembler _masm(&cbuf);
-
+void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc * ra_) const {
   int rem_space = 0;
   if (!(ra_->C->output()->in_scratch_emit_size())) {
-    rem_space = cbuf.insts()->remaining();
+    rem_space = __ code()->insts()->remaining();
     if (rem_space <= _count*2 + 8) {
       tty->print("NopNode: _count = %3.3d, remaining space before = %d", _count, rem_space);
     }
@@ -1249,7 +1233,7 @@ void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ra_) const {
 
   if (!(ra_->C->output()->in_scratch_emit_size())) {
     if (rem_space <= _count*2 + 8) {
-      int rem_space2 = cbuf.insts()->remaining();
+      int rem_space2 = __ code()->insts()->remaining();
       tty->print_cr(", after = %d", rem_space2);
     }
   }
@@ -1272,9 +1256,7 @@ void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *os) const {
 #endif
 
 // Take care of the size function, if you make changes here!
-void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  C2_MacroAssembler _masm(&cbuf);
-
+void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
   int reg = ra_->get_encode(this);
   __ z_lay(as_Register(reg), offset, Z_SP);
@@ -1340,9 +1322,8 @@ void MachUEPNode::format(PhaseRegAlloc *ra_, outputStream *os) const {
 }
 #endif
 
-void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+void MachUEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
   // This is Unverified Entry Point
-  C2_MacroAssembler _masm(&cbuf);
   __ ic_check(CodeEntryAlignment);
 }
 
@@ -1360,8 +1341,8 @@ source_hpp %{ // Header information of the source block.
 class HandlerImpl {
  public:
 
-  static int emit_exception_handler(CodeBuffer &cbuf);
-  static int emit_deopt_handler(CodeBuffer& cbuf);
+  static int emit_exception_handler(C2_MacroAssembler *masm);
+  static int emit_deopt_handler(C2_MacroAssembler* masm);
 
   static uint size_exception_handler() {
     return NativeJump::max_instruction_size();
@@ -1399,9 +1380,8 @@ source %{
 // 3) The handler will get patched such that it does not jump to the
 //   exception blob, but to an entry in the deoptimization blob being
 //   aware of the exception.
-int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) {
+int HandlerImpl::emit_exception_handler(C2_MacroAssembler *masm) {
   Register temp_reg = Z_R1;
-  C2_MacroAssembler _masm(&cbuf);
 
   address base = __ start_a_stub(size_exception_handler());
   if (base == nullptr) {
@@ -1422,8 +1402,7 @@ int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) {
 }
 
 // Emit deopt handler code.
-int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
-  C2_MacroAssembler _masm(&cbuf);
+int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
   address        base = __ start_a_stub(size_deopt_handler());
 
   if (base == nullptr) {
@@ -1701,13 +1680,11 @@ bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack,
 // needs for encoding need to be specified.
 encode %{
   enc_class enc_unimplemented %{
-    C2_MacroAssembler _masm(&cbuf);
     __ unimplemented("Unimplemented mach node encoding in AD file.", 13);
   %}
 
   enc_class enc_untested %{
 #ifdef ASSERT
-    C2_MacroAssembler _masm(&cbuf);
     __ untested("Untested mach node encoding in AD file.");
 #endif
   %}
@@ -1715,21 +1692,21 @@ encode %{
   enc_class z_rrform(iRegI dst, iRegI src) %{
     assert((($primary >> 14) & 0x03) == 0, "Instruction format error");
     assert( ($primary >> 16)         == 0, "Instruction format error");
-    z_emit16(cbuf, $primary |
+    z_emit16(masm, $primary |
              Assembler::reg($dst$$reg,8,16) |
              Assembler::reg($src$$reg,12,16));
   %}
 
   enc_class z_rreform(iRegI dst1, iRegI src2) %{
     assert((($primary >> 30) & 0x03) == 2, "Instruction format error");
-    z_emit32(cbuf, $primary |
+    z_emit32(masm, $primary |
              Assembler::reg($dst1$$reg,24,32) |
              Assembler::reg($src2$$reg,28,32));
   %}
 
   enc_class z_rrfform(iRegI dst1, iRegI src2, iRegI src3) %{
     assert((($primary >> 30) & 0x03) == 2, "Instruction format error");
-    z_emit32(cbuf, $primary |
+    z_emit32(masm, $primary |
              Assembler::reg($dst1$$reg,24,32) |
              Assembler::reg($src2$$reg,28,32) |
              Assembler::reg($src3$$reg,16,32));
@@ -1737,21 +1714,21 @@ encode %{
 
   enc_class z_riform_signed(iRegI dst, immI16 src) %{
     assert((($primary>>30) & 0x03) == 2, "Instruction format error");
-    z_emit32(cbuf, $primary |
+    z_emit32(masm, $primary |
              Assembler::reg($dst$$reg,8,32) |
              Assembler::simm16($src$$constant,16,32));
   %}
 
   enc_class z_riform_unsigned(iRegI dst, uimmI16 src) %{
     assert((($primary>>30) & 0x03) == 2, "Instruction format error");
-    z_emit32(cbuf, $primary |
+    z_emit32(masm, $primary |
              Assembler::reg($dst$$reg,8,32) |
              Assembler::uimm16($src$$constant,16,32));
   %}
 
   enc_class z_rieform_d(iRegI dst1, iRegI src3, immI src2) %{
     assert((($primary>>46) & 0x03) == 3, "Instruction format error");
-    z_emit48(cbuf, $primary |
+    z_emit48(masm, $primary |
              Assembler::reg($dst1$$reg,8,48) |
              Assembler::reg($src3$$reg,12,48) |
              Assembler::simm16($src2$$constant,16,48));
@@ -1759,27 +1736,27 @@ encode %{
 
   enc_class z_rilform_signed(iRegI dst, immL32 src) %{
     assert((($primary>>46) & 0x03) == 3, "Instruction format error");
-    z_emit48(cbuf, $primary |
+    z_emit48(masm, $primary |
              Assembler::reg($dst$$reg,8,48) |
              Assembler::simm32($src$$constant,16,48));
   %}
 
   enc_class z_rilform_unsigned(iRegI dst, uimmL32 src) %{
     assert((($primary>>46) & 0x03) == 3, "Instruction format error");
-    z_emit48(cbuf, $primary |
+    z_emit48(masm, $primary |
              Assembler::reg($dst$$reg,8,48) |
              Assembler::uimm32($src$$constant,16,48));
   %}
 
   enc_class z_rsyform_const(iRegI dst, iRegI src1, immI src2) %{
-    z_emit48(cbuf, $primary |
+    z_emit48(masm, $primary |
              Assembler::reg($dst$$reg,8,48) |
              Assembler::reg($src1$$reg,12,48) |
              Assembler::simm20($src2$$constant));
   %}
 
   enc_class z_rsyform_reg_reg(iRegI dst, iRegI src, iRegI shft) %{
-    z_emit48(cbuf, $primary |
+    z_emit48(masm, $primary |
              Assembler::reg($dst$$reg,8,48) |
              Assembler::reg($src$$reg,12,48) |
              Assembler::reg($shft$$reg,16,48) |
@@ -1788,7 +1765,7 @@ encode %{
 
   enc_class z_rxform_imm_reg_reg(iRegL dst, immL con, iRegL src1, iRegL src2) %{
     assert((($primary>>30) & 0x03) == 1, "Instruction format error");
-    z_emit32(cbuf, $primary |
+    z_emit32(masm, $primary |
              Assembler::reg($dst$$reg,8,32) |
              Assembler::reg($src1$$reg,12,32) |
              Assembler::reg($src2$$reg,16,32) |
@@ -1797,14 +1774,14 @@ encode %{
 
   enc_class z_rxform_imm_reg(iRegL dst, immL con, iRegL src) %{
     assert((($primary>>30) & 0x03) == 1, "Instruction format error");
-    z_emit32(cbuf, $primary |
+    z_emit32(masm, $primary |
              Assembler::reg($dst$$reg,8,32) |
              Assembler::reg($src$$reg,16,32) |
              Assembler::uimm12($con$$constant,20,32));
   %}
 
   enc_class z_rxyform_imm_reg_reg(iRegL dst, immL con, iRegL src1, iRegL src2) %{
-    z_emit48(cbuf, $primary |
+    z_emit48(masm, $primary |
              Assembler::reg($dst$$reg,8,48) |
              Assembler::reg($src1$$reg,12,48) |
              Assembler::reg($src2$$reg,16,48) |
@@ -1812,7 +1789,7 @@ encode %{
   %}
 
   enc_class z_rxyform_imm_reg(iRegL dst, immL con, iRegL src) %{
-    z_emit48(cbuf, $primary |
+    z_emit48(masm, $primary |
              Assembler::reg($dst$$reg,8,48) |
              Assembler::reg($src$$reg,16,48) |
              Assembler::simm20($con$$constant));
@@ -1825,14 +1802,14 @@ encode %{
     int      con  = $src$$constant;
 
     assert(VM_Version::has_MemWithImmALUOps(), "unsupported CPU");
-    z_emit_inst(cbuf, $primary |
+    z_emit_inst(masm, $primary |
                 Assembler::regz(base,16,48) |
                 Assembler::simm20(disp) |
                 Assembler::simm8(con,8,48));
   %}
 
   enc_class z_silform(memoryRS mem, immI16 src) %{
-    z_emit_inst(cbuf, $primary |
+    z_emit_inst(masm, $primary |
                 Assembler::regz(reg_to_register_object($mem$$base),16,48) |
                 Assembler::uimm12($mem$$disp,20,48) |
                 Assembler::simm16($src$$constant,32,48));
@@ -1843,13 +1820,13 @@ encode %{
     Register Ridx = $mem$$index$$Register;
     if (Ridx == noreg) { Ridx = Z_R0; } // Index is 0.
     if ($primary > (1L << 32)) {
-      z_emit_inst(cbuf, $primary |
+      z_emit_inst(masm, $primary |
                   Assembler::reg($dst$$reg, 8, 48) |
                   Assembler::uimm12($mem$$disp, 20, 48) |
                   Assembler::reg(Ridx, 12, 48) |
                   Assembler::regz(reg_to_register_object($mem$$base), 16, 48));
     } else {
-      z_emit_inst(cbuf, $primary |
+      z_emit_inst(masm, $primary |
                   Assembler::reg($dst$$reg, 8, 32) |
                   Assembler::uimm12($mem$$disp, 20, 32) |
                   Assembler::reg(Ridx, 12, 32) |
@@ -1861,13 +1838,13 @@ encode %{
     Register Ridx = $mem$$index$$Register;
     if (Ridx == noreg) { Ridx = Z_R0; } // Index is 0.
     if ($primary > (1L<<32)) {
-      z_emit_inst(cbuf, $primary |
+      z_emit_inst(masm, $primary |
                   Assembler::reg($dst$$reg, 8, 48) |
                   Assembler::simm20($mem$$disp) |
                   Assembler::reg(Ridx, 12, 48) |
                   Assembler::regz(reg_to_register_object($mem$$base), 16, 48));
     } else {
-      z_emit_inst(cbuf, $primary |
+      z_emit_inst(masm, $primary |
                   Assembler::reg($dst$$reg, 8, 32) |
                   Assembler::uimm12($mem$$disp, 20, 32) |
                   Assembler::reg(Ridx, 12, 32) |
@@ -1881,22 +1858,21 @@ encode %{
     if (Ridx == noreg) { Ridx = Z_R0; } // Index is 0.
 
     if (Displacement::is_shortDisp((long)$mem$$disp)) {
-      z_emit_inst(cbuf, $secondary |
+      z_emit_inst(masm, $secondary |
                   Assembler::reg($dst$$reg, 8, isize) |
                   Assembler::uimm12($mem$$disp, 20, isize) |
                   Assembler::reg(Ridx, 12, isize) |
                   Assembler::regz(reg_to_register_object($mem$$base), 16, isize));
     } else if (Displacement::is_validDisp((long)$mem$$disp)) {
-      z_emit_inst(cbuf, $primary |
+      z_emit_inst(masm, $primary |
                   Assembler::reg($dst$$reg, 8, 48) |
                   Assembler::simm20($mem$$disp) |
                   Assembler::reg(Ridx, 12, 48) |
                   Assembler::regz(reg_to_register_object($mem$$base), 16, 48));
     } else {
-        C2_MacroAssembler _masm(&cbuf);
         __ load_const_optimized(Z_R1_scratch, $mem$$disp);
         if (Ridx != Z_R0) { __ z_agr(Z_R1_scratch, Ridx); }
-        z_emit_inst(cbuf, $secondary |
+        z_emit_inst(masm, $secondary |
                     Assembler::reg($dst$$reg, 8, isize) |
                     Assembler::uimm12(0, 20, isize) |
                     Assembler::reg(Z_R1_scratch, 12, isize) |
@@ -1905,7 +1881,6 @@ encode %{
   %}
 
   enc_class z_enc_brul(Label lbl) %{
-    C2_MacroAssembler _masm(&cbuf);
     Label* p = $lbl$$label;
 
     // 'p' is `nullptr' when this encoding class is used only to
@@ -1918,7 +1893,6 @@ encode %{
   %}
 
   enc_class z_enc_bru(Label lbl) %{
-    C2_MacroAssembler _masm(&cbuf);
     Label* p = $lbl$$label;
 
     // 'p' is `nullptr' when this encoding class is used only to
@@ -1931,7 +1905,6 @@ encode %{
   %}
 
   enc_class z_enc_branch_con_far(cmpOp cmp, Label lbl) %{
-    C2_MacroAssembler _masm(&cbuf);
     Label* p = $lbl$$label;
 
     // 'p' is `nullptr' when this encoding class is used only to
@@ -1944,7 +1917,6 @@ encode %{
   %}
 
   enc_class z_enc_branch_con_short(cmpOp cmp, Label lbl) %{
-    C2_MacroAssembler _masm(&cbuf);
     Label* p = $lbl$$label;
 
     // 'p' is `nullptr' when this encoding class is used only to
@@ -1957,7 +1929,6 @@ encode %{
   %}
 
   enc_class z_enc_cmpb_regreg(iRegI src1, iRegI src2, Label lbl, cmpOpT cmp) %{
-    C2_MacroAssembler _masm(&cbuf);
     Label* p = $lbl$$label;
 
     // 'p' is `nullptr' when this encoding class is used only to
@@ -1981,7 +1952,6 @@ encode %{
   %}
 
   enc_class z_enc_cmpb_regregFar(iRegI src1, iRegI src2, Label lbl, cmpOpT cmp) %{
-    C2_MacroAssembler _masm(&cbuf);
     Label* p = $lbl$$label;
 
     // 'p' is `nullptr' when this encoding class is used only to
@@ -2007,7 +1977,6 @@ encode %{
   %}
 
   enc_class z_enc_cmpb_regimm(iRegI src1, immI8 src2, Label lbl, cmpOpT cmp) %{
-    C2_MacroAssembler _masm(&cbuf);
     Label* p = $lbl$$label;
 
     // 'p' is `nullptr' when this encoding class is used only to
@@ -2032,7 +2001,6 @@ encode %{
   %}
 
   enc_class z_enc_cmpb_regimmFar(iRegI src1, immI8 src2, Label lbl, cmpOpT cmp) %{
-    C2_MacroAssembler _masm(&cbuf);
     Label* p = $lbl$$label;
 
     // 'p' is `nullptr' when this encoding class is used only to
@@ -2059,8 +2027,6 @@ encode %{
 
   // Call from Java to runtime.
   enc_class z_enc_java_to_runtime_call(method meth) %{
-    C2_MacroAssembler _masm(&cbuf);
-
     // Save return pc before call to the place where we need it, since
     // callee doesn't.
     unsigned int start_off = __ offset();
@@ -2087,36 +2053,37 @@ encode %{
   enc_class z_enc_java_static_call(method meth) %{
     // Call to fixup routine. Fixup routine uses ScopeDesc info to determine
     // whom we intended to call.
-    C2_MacroAssembler _masm(&cbuf);
     int ret_offset = 0;
 
     if (!_method) {
-      ret_offset = emit_call_reloc(_masm, $meth$$method,
+      ret_offset = emit_call_reloc(masm, $meth$$method,
                                    relocInfo::runtime_call_w_cp_type, ra_);
     } else {
-      int method_index = resolved_method_index(cbuf);
+      int method_index = resolved_method_index(masm);
       if (_optimized_virtual) {
-        ret_offset = emit_call_reloc(_masm, $meth$$method,
+        ret_offset = emit_call_reloc(masm, $meth$$method,
                                      opt_virtual_call_Relocation::spec(method_index));
       } else {
-        ret_offset = emit_call_reloc(_masm, $meth$$method,
+        ret_offset = emit_call_reloc(masm, $meth$$method,
                                      static_call_Relocation::spec(method_index));
       }
     }
     assert(__ inst_mark() != nullptr, "emit_call_reloc must set_inst_mark()");
 
     if (_method) { // Emit stub for static call.
-      address stub = CompiledDirectCall::emit_to_interp_stub(cbuf);
+      address stub = CompiledDirectCall::emit_to_interp_stub(masm);
       if (stub == nullptr) {
+        __ clear_inst_mark();
         ciEnv::current()->record_failure("CodeCache is full");
         return;
       }
     }
+
+    __ clear_inst_mark();
   %}
 
   // Java dynamic call
   enc_class z_enc_java_dynamic_call(method meth) %{
-    C2_MacroAssembler _masm(&cbuf);
     unsigned int start_off = __ offset();
 
     int vtable_index = this->_vtable_index;
@@ -2134,11 +2101,12 @@ encode %{
 
       // Call to fixup routine. Fixup routine uses ScopeDesc info
       // to determine who we intended to call.
-      int method_index = resolved_method_index(cbuf);
+      int method_index = resolved_method_index(masm);
       __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr, method_index));
       unsigned int ret_off = __ offset();
       assert(__ offset() - start_off == 6, "bad prelude len: %d", __ offset() - start_off);
-      ret_off += emit_call_reloc(_masm, $meth$$method, relocInfo::none, ra_);
+      ret_off += emit_call_reloc(masm, $meth$$method, relocInfo::none, ra_);
+      __ clear_inst_mark();
       assert(_method, "lazy_constant may be wrong when _method==null");
     } else {
       assert(!UseInlineCaches, "expect vtable calls only if not using ICs");
@@ -2171,7 +2139,6 @@ encode %{
   %}
 
   enc_class z_enc_cmov_reg(cmpOp cmp, iRegI dst, iRegI src) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register Rdst = reg_to_register_object($dst$$reg);
     Register Rsrc = reg_to_register_object($src$$reg);
 
@@ -2192,7 +2159,6 @@ encode %{
   %}
 
   enc_class z_enc_cmov_imm(cmpOp cmp, iRegI dst, immI16 src) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register Rdst = reg_to_register_object($dst$$reg);
     int      Csrc = $src$$constant;
     Assembler::branch_condition cc = (Assembler::branch_condition)$cmp$$cmpcode;
@@ -2209,7 +2175,6 @@ encode %{
   %}
 
   enc_class z_enc_cctobool(iRegI res) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register Rres = reg_to_register_object($res$$reg);
 
     if (VM_Version::has_LoadStoreConditional()) {
@@ -2226,7 +2191,6 @@ encode %{
   %}
 
   enc_class z_enc_casI(iRegI compare_value, iRegI exchange_value, iRegP addr_ptr) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register Rcomp = reg_to_register_object($compare_value$$reg);
     Register Rnew  = reg_to_register_object($exchange_value$$reg);
     Register Raddr = reg_to_register_object($addr_ptr$$reg);
@@ -2235,7 +2199,6 @@ encode %{
   %}
 
   enc_class z_enc_casL(iRegL compare_value, iRegL exchange_value, iRegP addr_ptr) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register Rcomp = reg_to_register_object($compare_value$$reg);
     Register Rnew  = reg_to_register_object($exchange_value$$reg);
     Register Raddr = reg_to_register_object($addr_ptr$$reg);
@@ -2244,7 +2207,6 @@ encode %{
   %}
 
   enc_class z_enc_SwapI(memoryRSY mem, iRegI dst, iRegI tmp) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register Rdst = reg_to_register_object($dst$$reg);
     Register Rtmp = reg_to_register_object($tmp$$reg);
     guarantee(Rdst != Rtmp, "Fix match rule to use TEMP_DEF");
@@ -2260,7 +2222,6 @@ encode %{
   %}
 
   enc_class z_enc_SwapL(memoryRSY mem, iRegL dst, iRegL tmp) %{
-    C2_MacroAssembler _masm(&cbuf);
     Register Rdst = reg_to_register_object($dst$$reg);
     Register Rtmp = reg_to_register_object($tmp$$reg);
     guarantee(Rdst != Rtmp, "Fix match rule to use TEMP_DEF");
@@ -9558,9 +9519,10 @@ instruct RethrowException() %{
   // TODO: s390 port size(VARIABLE_SIZE);
   format %{ "Jmp    rethrow_stub" %}
   ins_encode %{
-    cbuf.set_insts_mark();
+    __ set_inst_mark();
     __ load_const_optimized(Z_R1_scratch, (address)OptoRuntime::rethrow_stub());
     __ z_br(Z_R1_scratch);
+    __ clear_inst_mark();
   %}
   ins_pipe(pipe_class_dummy);
 %}
diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp
index bb04ae12fa834..0896fbd8bf5ee 100644
--- a/src/hotspot/cpu/x86/assembler_x86.cpp
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp
@@ -4260,6 +4260,7 @@ void Assembler::vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
 
 void Assembler::vpermb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx512_vbmi(), "");
+  InstructionMark im(this);
   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_is_evex_instruction();
   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
@@ -4695,6 +4696,7 @@ void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
 
 void Assembler::pextrd(Address dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
+  InstructionMark im(this);
   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@@ -4712,6 +4714,7 @@ void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
 
 void Assembler::pextrq(Address dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
+  InstructionMark im(this);
   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@@ -4729,6 +4732,7 @@ void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
 
 void Assembler::pextrw(Address dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
+  InstructionMark im(this);
   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@@ -4746,6 +4750,7 @@ void Assembler::pextrb(Register dst, XMMRegister src, int imm8) {
 
 void Assembler::pextrb(Address dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
+  InstructionMark im(this);
   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@@ -4763,6 +4768,7 @@ void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
 
 void Assembler::pinsrd(XMMRegister dst, Address src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
+  InstructionMark im(this);
   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@@ -4787,6 +4793,7 @@ void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
 
 void Assembler::pinsrq(XMMRegister dst, Address src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
+  InstructionMark im(this);
   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@@ -4811,6 +4818,7 @@ void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
 
 void Assembler::pinsrw(XMMRegister dst, Address src, int imm8) {
   assert(VM_Version::supports_sse2(), "");
+  InstructionMark im(this);
   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
@@ -4828,6 +4836,7 @@ void Assembler::vpinsrw(XMMRegister dst, XMMRegister nds, Register src, int imm8
 
 void Assembler::pinsrb(XMMRegister dst, Address src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
+  InstructionMark im(this);
   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@@ -13318,21 +13327,25 @@ void Assembler::decq(Address dst) {
 }
 
 void Assembler::fxrstor(Address src) {
+  InstructionMark im(this);
   emit_int24(get_prefixq(src), 0x0F, (unsigned char)0xAE);
   emit_operand(as_Register(1), src, 0);
 }
 
 void Assembler::xrstor(Address src) {
+  InstructionMark im(this);
   emit_int24(get_prefixq(src), 0x0F, (unsigned char)0xAE);
   emit_operand(as_Register(5), src, 0);
 }
 
 void Assembler::fxsave(Address dst) {
+  InstructionMark im(this);
   emit_int24(get_prefixq(dst), 0x0F, (unsigned char)0xAE);
   emit_operand(as_Register(0), dst, 0);
 }
 
 void Assembler::xsave(Address dst) {
+  InstructionMark im(this);
   emit_int24(get_prefixq(dst), 0x0F, (unsigned char)0xAE);
   emit_operand(as_Register(4), dst, 0);
 }
diff --git a/src/hotspot/cpu/x86/c2_intelJccErratum_x86.cpp b/src/hotspot/cpu/x86/c2_intelJccErratum_x86.cpp
index fce09c31b9c9a..7b71a6bdbfc99 100644
--- a/src/hotspot/cpu/x86/c2_intelJccErratum_x86.cpp
+++ b/src/hotspot/cpu/x86/c2_intelJccErratum_x86.cpp
@@ -114,13 +114,13 @@ int IntelJccErratum::compute_padding(uintptr_t current_offset, const MachNode* m
   }
 }
 
-#define __ _masm.
+#define __ _masm->
 
 uintptr_t IntelJccErratumAlignment::pc() {
   return (uintptr_t)__ pc();
 }
 
-IntelJccErratumAlignment::IntelJccErratumAlignment(MacroAssembler& masm, int jcc_size) :
+IntelJccErratumAlignment::IntelJccErratumAlignment(MacroAssembler* masm, int jcc_size) :
     _masm(masm),
     _start_pc(pc()) {
   if (!VM_Version::has_intel_jcc_erratum()) {
diff --git a/src/hotspot/cpu/x86/c2_intelJccErratum_x86.hpp b/src/hotspot/cpu/x86/c2_intelJccErratum_x86.hpp
index 415d8a99933e2..485a2d17c169b 100644
--- a/src/hotspot/cpu/x86/c2_intelJccErratum_x86.hpp
+++ b/src/hotspot/cpu/x86/c2_intelJccErratum_x86.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -54,13 +54,13 @@ class IntelJccErratum : public AllStatic {
 
 class IntelJccErratumAlignment {
 private:
-  MacroAssembler& _masm;
+  MacroAssembler* _masm;
   uintptr_t       _start_pc;
 
   uintptr_t pc();
 
 public:
-  IntelJccErratumAlignment(MacroAssembler& masm, int jcc_size);
+  IntelJccErratumAlignment(MacroAssembler* masm, int jcc_size);
   ~IntelJccErratumAlignment();
 };
 
diff --git a/src/hotspot/cpu/x86/compiledIC_x86.cpp b/src/hotspot/cpu/x86/compiledIC_x86.cpp
index 95b41f62b6aab..e46f892b38802 100644
--- a/src/hotspot/cpu/x86/compiledIC_x86.cpp
+++ b/src/hotspot/cpu/x86/compiledIC_x86.cpp
@@ -34,21 +34,17 @@
 
 // ----------------------------------------------------------------------------
 
-#define __ _masm.
-address CompiledDirectCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
+#define __ masm->
+address CompiledDirectCall::emit_to_interp_stub(MacroAssembler *masm, address mark) {
   // Stub is fixed up when the corresponding call is converted from
   // calling compiled code to calling interpreted code.
   // movq rbx, 0
   // jmp -5 # to self
 
   if (mark == nullptr) {
-    mark = cbuf.insts_mark();  // Get mark within main instrs section.
+    mark = __ inst_mark();  // Get mark within main instrs section.
   }
 
-  // Note that the code buffer's insts_mark is always relative to insts.
-  // That's why we must use the macroassembler to generate a stub.
-  MacroAssembler _masm(&cbuf);
-
   address base = __ start_a_stub(to_interp_stub_size());
   if (base == nullptr) {
     return nullptr;  // CodeBuffer::expand failed.
diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_32.ad b/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_32.ad
index 8675a34324f4f..3cf82bf9fb197 100644
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_32.ad
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_32.ad
@@ -40,7 +40,7 @@ instruct compareAndSwapP_shenandoah(rRegI res,
   format %{ "shenandoah_cas_oop $mem_ptr,$newval" %}
 
   ins_encode %{
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm,
                                                    $res$$Register, $mem_ptr$$Address, $oldval$$Register, $newval$$Register,
                                                    false, // swap
                                                    $tmp1$$Register, $tmp2$$Register
@@ -61,7 +61,7 @@ instruct compareAndExchangeP_shenandoah(memory mem_ptr,
   format %{ "shenandoah_cas_oop $mem_ptr,$newval" %}
 
   ins_encode %{
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm,
                                                    noreg, $mem_ptr$$Address, $oldval$$Register, $newval$$Register,
                                                    true,  // exchange
                                                    $tmp1$$Register, $tmp2$$Register
diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_64.ad b/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_64.ad
index cb595f161686f..c580d21c9b869 100644
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_64.ad
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_64.ad
@@ -40,7 +40,7 @@ instruct compareAndSwapP_shenandoah(rRegI res,
   format %{ "shenandoah_cas_oop $mem_ptr,$newval" %}
 
   ins_encode %{
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm,
                                                    $res$$Register, $mem_ptr$$Address, $oldval$$Register, $newval$$Register,
                                                    false, // swap
                                                    $tmp1$$Register, $tmp2$$Register
@@ -61,7 +61,7 @@ instruct compareAndSwapN_shenandoah(rRegI res,
   format %{ "shenandoah_cas_oop $mem_ptr,$newval" %}
 
   ins_encode %{
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm,
                                                    $res$$Register, $mem_ptr$$Address, $oldval$$Register, $newval$$Register,
                                                    false, // swap
                                                    $tmp1$$Register, $tmp2$$Register
@@ -80,7 +80,7 @@ instruct compareAndExchangeN_shenandoah(memory mem_ptr,
   format %{ "shenandoah_cas_oop $mem_ptr,$newval" %}
 
   ins_encode %{
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm,
                                                    noreg, $mem_ptr$$Address, $oldval$$Register, $newval$$Register,
                                                    true, // exchange
                                                    $tmp1$$Register, $tmp2$$Register
@@ -101,7 +101,7 @@ instruct compareAndExchangeP_shenandoah(memory mem_ptr,
   format %{ "shenandoah_cas_oop $mem_ptr,$newval" %}
 
   ins_encode %{
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm,
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm,
                                                    noreg, $mem_ptr$$Address, $oldval$$Register, $newval$$Register,
                                                    true,  // exchange
                                                    $tmp1$$Register, $tmp2$$Register
diff --git a/src/hotspot/cpu/x86/gc/x/xBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/x/xBarrierSetAssembler_x86.cpp
index 38129a9fc81e5..4805b21308442 100644
--- a/src/hotspot/cpu/x86/gc/x/xBarrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/x/xBarrierSetAssembler_x86.cpp
@@ -375,7 +375,7 @@ OptoReg::Name XBarrierSetAssembler::refine_register(const Node* node, OptoReg::N
 }
 
 // We use the vec_spill_helper from the x86.ad file to avoid reinventing this wheel
-extern void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
+extern void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
                             int stack_offset, int reg, uint ireg, outputStream* st);
 
 #undef __
@@ -437,13 +437,15 @@ class XSaveLiveRegisters {
     const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
     const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
     _spill_offset -= reg_data._size;
-    vec_spill_helper(__ code(), false /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
+    C2_MacroAssembler c2_masm(__ code());
+    vec_spill_helper(&c2_masm, false /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
   }
 
   void xmm_register_restore(const XMMRegisterData& reg_data) {
     const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
     const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
-    vec_spill_helper(__ code(), true /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
+    C2_MacroAssembler c2_masm(__ code());
+    vec_spill_helper(&c2_masm, true /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
     _spill_offset += reg_data._size;
   }
 
diff --git a/src/hotspot/cpu/x86/gc/x/x_x86_64.ad b/src/hotspot/cpu/x86/gc/x/x_x86_64.ad
index c33a994a4b87a..116fb3cbc6d5e 100644
--- a/src/hotspot/cpu/x86/gc/x/x_x86_64.ad
+++ b/src/hotspot/cpu/x86/gc/x/x_x86_64.ad
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -33,34 +33,34 @@ source %{
 
 #include "c2_intelJccErratum_x86.hpp"
 
-static void x_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, uint8_t barrier_data) {
+static void x_load_barrier(MacroAssembler* masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, uint8_t barrier_data) {
   if (barrier_data == XLoadBarrierElided) {
     return;
   }
   XLoadBarrierStubC2* const stub = XLoadBarrierStubC2::create(node, ref_addr, ref, tmp, barrier_data);
   {
-    IntelJccErratumAlignment intel_alignment(_masm, 10 /* jcc_size */);
+    IntelJccErratumAlignment intel_alignment(masm, 10 /* jcc_size */);
     __ testptr(ref, Address(r15_thread, XThreadLocalData::address_bad_mask_offset()));
     __ jcc(Assembler::notZero, *stub->entry());
   }
   __ bind(*stub->continuation());
 }
 
-static void x_load_barrier_cmpxchg(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, Label& good) {
+static void x_load_barrier_cmpxchg(MacroAssembler* masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, Label& good) {
   XLoadBarrierStubC2* const stub = XLoadBarrierStubC2::create(node, ref_addr, ref, tmp, XLoadBarrierStrong);
   {
-    IntelJccErratumAlignment intel_alignment(_masm, 10 /* jcc_size */);
+    IntelJccErratumAlignment intel_alignment(masm, 10 /* jcc_size */);
     __ testptr(ref, Address(r15_thread, XThreadLocalData::address_bad_mask_offset()));
     __ jcc(Assembler::zero, good);
   }
   {
-    IntelJccErratumAlignment intel_alignment(_masm, 5 /* jcc_size */);
+    IntelJccErratumAlignment intel_alignment(masm, 5 /* jcc_size */);
     __ jmp(*stub->entry());
   }
   __ bind(*stub->continuation());
 }
 
-static void x_cmpxchg_common(MacroAssembler& _masm, const MachNode* node, Register mem_reg, Register newval, Register tmp) {
+static void x_cmpxchg_common(MacroAssembler* masm, const MachNode* node, Register mem_reg, Register newval, Register tmp) {
   // Compare value (oldval) is in rax
    const Address mem = Address(mem_reg, 0);
 
@@ -73,7 +73,7 @@ static void x_cmpxchg_common(MacroAssembler& _masm, const MachNode* node, Regist
 
   if (node->barrier_data() != XLoadBarrierElided) {
     Label good;
-    x_load_barrier_cmpxchg(_masm, node, mem, rax, tmp, good);
+    x_load_barrier_cmpxchg(masm, node, mem, rax, tmp, good);
     __ movptr(rax, tmp);
     __ lock();
     __ cmpxchgptr(newval, mem);
@@ -96,7 +96,7 @@ instruct xLoadP(rRegP dst, memory mem, rFlagsReg cr)
 
   ins_encode %{
     __ movptr($dst$$Register, $mem$$Address);
-    x_load_barrier(_masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, barrier_data());
+    x_load_barrier(masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, barrier_data());
   %}
 
   ins_pipe(ialu_reg_mem);
@@ -112,7 +112,7 @@ instruct xCompareAndExchangeP(indirect mem, rax_RegP oldval, rRegP newval, rRegP
 
   ins_encode %{
     precond($oldval$$Register == rax);
-    x_cmpxchg_common(_masm, this, $mem$$Register, $newval$$Register, $tmp$$Register);
+    x_cmpxchg_common(masm, this, $mem$$Register, $newval$$Register, $tmp$$Register);
   %}
 
   ins_pipe(pipe_cmpxchg);
@@ -131,7 +131,7 @@ instruct xCompareAndSwapP(rRegI res, indirect mem, rRegP newval, rRegP tmp, rFla
 
   ins_encode %{
     precond($oldval$$Register == rax);
-    x_cmpxchg_common(_masm, this, $mem$$Register, $newval$$Register, $tmp$$Register);
+    x_cmpxchg_common(masm, this, $mem$$Register, $newval$$Register, $tmp$$Register);
     if (barrier_data() != XLoadBarrierElided) {
       __ cmpptr($tmp$$Register, rax);
     }
@@ -151,7 +151,7 @@ instruct xXChgP(indirect mem, rRegP newval, rFlagsReg cr) %{
 
   ins_encode %{
     __ xchgptr($newval$$Register, Address($mem$$Register, 0));
-    x_load_barrier(_masm, this, Address(noreg, 0), $newval$$Register, noreg /* tmp */, barrier_data());
+    x_load_barrier(masm, this, Address(noreg, 0), $newval$$Register, noreg /* tmp */, barrier_data());
   %}
 
   ins_pipe(pipe_cmpxchg);
diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
index 6cb16a09d5574..5f0b73bf23f7c 100644
--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
@@ -356,7 +356,7 @@ static void emit_store_fast_path_check_c2(MacroAssembler* masm, Address ref_addr
   // This is a JCC erratum mitigation wrapper for calling the inner check
   int size = store_fast_path_check_size(masm, ref_addr, is_atomic, medium_path);
   // Emit JCC erratum mitigation nops with the right size
-  IntelJccErratumAlignment intel_alignment(*masm, size);
+  IntelJccErratumAlignment intel_alignment(masm, size);
   // Emit the JCC erratum mitigation guarded code
   emit_store_fast_path_check(masm, ref_addr, is_atomic, medium_path);
 #endif
@@ -1184,7 +1184,7 @@ OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::N
 }
 
 // We use the vec_spill_helper from the x86.ad file to avoid reinventing this wheel
-extern void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
+extern void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
                             int stack_offset, int reg, uint ireg, outputStream* st);
 
 #undef __
@@ -1246,13 +1246,15 @@ class ZSaveLiveRegisters {
     const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
     const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
     _spill_offset -= reg_data._size;
-    vec_spill_helper(__ code(), false /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
+    C2_MacroAssembler c2_masm(__ code());
+    vec_spill_helper(&c2_masm, false /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
   }
 
   void xmm_register_restore(const XMMRegisterData& reg_data) {
     const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
     const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
-    vec_spill_helper(__ code(), true /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
+    C2_MacroAssembler c2_masm(__ code());
+    vec_spill_helper(&c2_masm, true /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
     _spill_offset += reg_data._size;
   }
 
diff --git a/src/hotspot/cpu/x86/gc/z/z_x86_64.ad b/src/hotspot/cpu/x86/gc/z/z_x86_64.ad
index 0cc2ea03b3537..1a4499c3d447d 100644
--- a/src/hotspot/cpu/x86/gc/z/z_x86_64.ad
+++ b/src/hotspot/cpu/x86/gc/z/z_x86_64.ad
@@ -34,66 +34,66 @@ source %{
 #include "c2_intelJccErratum_x86.hpp"
 #include "gc/z/zBarrierSetAssembler.hpp"
 
-static void z_color(MacroAssembler& _masm, const MachNode* node, Register ref) {
+static void z_color(MacroAssembler* masm, const MachNode* node, Register ref) {
   __ relocate(barrier_Relocation::spec(), ZBarrierRelocationFormatLoadGoodBeforeShl);
   __ shlq(ref, barrier_Relocation::unpatched);
   __ orq_imm32(ref, barrier_Relocation::unpatched);
   __ relocate(barrier_Relocation::spec(), ZBarrierRelocationFormatStoreGoodAfterOr);
 }
 
-static void z_uncolor(MacroAssembler& _masm, const MachNode* node, Register ref) {
+static void z_uncolor(MacroAssembler* masm, const MachNode* node, Register ref) {
   __ relocate(barrier_Relocation::spec(), ZBarrierRelocationFormatLoadGoodBeforeShl);
   __ shrq(ref, barrier_Relocation::unpatched);
 }
 
-static void z_keep_alive_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref) {
+static void z_keep_alive_load_barrier(MacroAssembler* masm, const MachNode* node, Address ref_addr, Register ref) {
   __ Assembler::testl(ref, barrier_Relocation::unpatched);
   __ relocate(barrier_Relocation::spec(), ZBarrierRelocationFormatMarkBadAfterTest);
 
   ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref);
   __ jcc(Assembler::notEqual, *stub->entry());
 
-  z_uncolor(_masm, node, ref);
+  z_uncolor(masm, node, ref);
 
   __ bind(*stub->continuation());
 }
 
-static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref) {
-  Assembler::InlineSkippedInstructionsCounter skipped_counter(&_masm);
+static void z_load_barrier(MacroAssembler* masm, const MachNode* node, Address ref_addr, Register ref) {
+  Assembler::InlineSkippedInstructionsCounter skipped_counter(masm);
   const bool on_non_strong =
       ((node->barrier_data() & ZBarrierWeak) != 0) ||
       ((node->barrier_data() & ZBarrierPhantom) != 0);
 
   if (on_non_strong) {
-    z_keep_alive_load_barrier(_masm, node, ref_addr, ref);
+    z_keep_alive_load_barrier(masm, node, ref_addr, ref);
     return;
   }
 
-  z_uncolor(_masm, node, ref);
+  z_uncolor(masm, node, ref);
   if (node->barrier_data() == ZBarrierElided) {
     return;
   }
   ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref);
   {
-    IntelJccErratumAlignment intel_alignment(_masm, 6);
+    IntelJccErratumAlignment intel_alignment(masm, 6);
     __ jcc(Assembler::above, *stub->entry());
   }
   __ bind(*stub->continuation());
 }
 
-static void z_store_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register rnew_zaddress, Register rnew_zpointer, bool is_atomic) {
-  Assembler::InlineSkippedInstructionsCounter skipped_counter(&_masm);
+static void z_store_barrier(MacroAssembler* masm, const MachNode* node, Address ref_addr, Register rnew_zaddress, Register rnew_zpointer, bool is_atomic) {
+  Assembler::InlineSkippedInstructionsCounter skipped_counter(masm);
   if (node->barrier_data() == ZBarrierElided) {
     if (rnew_zaddress != noreg) {
       // noreg means null; no need to color
       __ movptr(rnew_zpointer, rnew_zaddress);
-      z_color(_masm, node, rnew_zpointer);
+      z_color(masm, node, rnew_zpointer);
     }
   } else {
     bool is_native = (node->barrier_data() & ZBarrierNative) != 0;
     ZStoreBarrierStubC2* const stub = ZStoreBarrierStubC2::create(node, ref_addr, rnew_zaddress, rnew_zpointer, is_native, is_atomic);
     ZBarrierSetAssembler* bs_asm = ZBarrierSet::assembler();
-    bs_asm->store_barrier_fast(&_masm, ref_addr, rnew_zaddress, rnew_zpointer, true /* in_nmethod */, is_atomic, *stub->entry(), *stub->continuation());
+    bs_asm->store_barrier_fast(masm, ref_addr, rnew_zaddress, rnew_zpointer, true /* in_nmethod */, is_atomic, *stub->entry(), *stub->continuation());
   }
 }
 
@@ -124,7 +124,7 @@ instruct zLoadP(rRegP dst, memory mem, rFlagsReg cr)
 
   ins_encode %{
     __ movptr($dst$$Register, $mem$$Address);
-    z_load_barrier(_masm, this, $mem$$Address, $dst$$Register);
+    z_load_barrier(masm, this, $mem$$Address, $dst$$Register);
   %}
 
   ins_pipe(ialu_reg_mem);
@@ -156,7 +156,7 @@ instruct zStoreP(memory mem, any_RegP src, rRegP tmp, rFlagsReg cr)
   ins_cost(125); // XXX
   format %{ "movq    $mem, $src\t# ptr" %}
   ins_encode %{
-    z_store_barrier(_masm, this, $mem$$Address, $src$$Register, $tmp$$Register, false /* is_atomic */);
+    z_store_barrier(masm, this, $mem$$Address, $src$$Register, $tmp$$Register, false /* is_atomic */);
     __ movq($mem$$Address, $tmp$$Register);
   %}
   ins_pipe(ialu_mem_reg);
@@ -172,7 +172,7 @@ instruct zStorePNull(memory mem, immP0 zero, rRegP tmp, rFlagsReg cr)
   ins_cost(125); // XXX
   format %{ "movq    $mem, 0\t# ptr" %}
   ins_encode %{
-    z_store_barrier(_masm, this, $mem$$Address, noreg, $tmp$$Register, false /* is_atomic */);
+    z_store_barrier(masm, this, $mem$$Address, noreg, $tmp$$Register, false /* is_atomic */);
     // Store a colored null - barrier code above does not need to color
     __ movq($mem$$Address, barrier_Relocation::unpatched);
     // The relocation cant be fully after the mov, as that is the beginning of a random subsequent
@@ -194,11 +194,11 @@ instruct zCompareAndExchangeP(indirect mem, no_rax_RegP newval, rRegP tmp, rax_R
     assert_different_registers($oldval$$Register, $mem$$Register);
     assert_different_registers($oldval$$Register, $newval$$Register);
     const Address mem_addr = Address($mem$$Register, 0);
-    z_store_barrier(_masm, this, mem_addr, $newval$$Register, $tmp$$Register, true /* is_atomic */);
-    z_color(_masm, this, $oldval$$Register);
+    z_store_barrier(masm, this, mem_addr, $newval$$Register, $tmp$$Register, true /* is_atomic */);
+    z_color(masm, this, $oldval$$Register);
     __ lock();
     __ cmpxchgptr($tmp$$Register, mem_addr);
-    z_uncolor(_masm, this, $oldval$$Register);
+    z_uncolor(masm, this, $oldval$$Register);
   %}
 
   ins_pipe(pipe_cmpxchg);
@@ -218,8 +218,8 @@ instruct zCompareAndSwapP(rRegI res, indirect mem, rRegP newval, rRegP tmp, rax_
   ins_encode %{
     assert_different_registers($oldval$$Register, $mem$$Register);
     const Address mem_addr = Address($mem$$Register, 0);
-    z_store_barrier(_masm, this, mem_addr, $newval$$Register, $tmp$$Register, true /* is_atomic */);
-    z_color(_masm, this, $oldval$$Register);
+    z_store_barrier(masm, this, mem_addr, $newval$$Register, $tmp$$Register, true /* is_atomic */);
+    z_color(masm, this, $oldval$$Register);
     __ lock();
     __ cmpxchgptr($tmp$$Register, mem_addr);
     __ setb(Assembler::equal, $res$$Register);
@@ -239,10 +239,10 @@ instruct zXChgP(indirect mem, rRegP newval, rRegP tmp, rFlagsReg cr) %{
   ins_encode %{
     assert_different_registers($mem$$Register, $newval$$Register);
     const Address mem_addr = Address($mem$$Register, 0);
-    z_store_barrier(_masm, this, mem_addr, $newval$$Register, $tmp$$Register, true /* is_atomic */);
+    z_store_barrier(masm, this, mem_addr, $newval$$Register, $tmp$$Register, true /* is_atomic */);
     __ movptr($newval$$Register, $tmp$$Register);
     __ xchgptr($newval$$Register, mem_addr);
-    z_uncolor(_masm, this, $newval$$Register);
+    z_uncolor(masm, this, $newval$$Register);
   %}
 
   ins_pipe(pipe_cmpxchg);
diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
index c666f982d0f52..0c1dc865c78bf 100644
--- a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
+++ b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
@@ -1441,8 +1441,7 @@ static void gen_continuation_enter(MacroAssembler* masm,
     // Make sure the call is patchable
     __ align(BytesPerWord, __ offset() + NativeCall::displacement_offset);
     // Emit stub for static call
-    CodeBuffer* cbuf = masm->code_section()->outer();
-    address stub = CompiledDirectCall::emit_to_interp_stub(*cbuf, __ pc());
+    address stub = CompiledDirectCall::emit_to_interp_stub(masm, __ pc());
     if (stub == nullptr) {
       fatal("CodeCache is full at gen_continuation_enter");
     }
@@ -1478,8 +1477,7 @@ static void gen_continuation_enter(MacroAssembler* masm,
   __ align(BytesPerWord, __ offset() + NativeCall::displacement_offset);
 
   // Emit stub for static call
-  CodeBuffer* cbuf = masm->code_section()->outer();
-  address stub = CompiledDirectCall::emit_to_interp_stub(*cbuf, __ pc());
+  address stub = CompiledDirectCall::emit_to_interp_stub(masm, __ pc());
   if (stub == nullptr) {
     fatal("CodeCache is full at gen_continuation_enter");
   }
diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad
index 54306d02ea082..0b262bb9c37d4 100644
--- a/src/hotspot/cpu/x86/x86.ad
+++ b/src/hotspot/cpu/x86/x86.ad
@@ -1187,8 +1187,8 @@ class HandlerImpl {
 
  public:
 
-  static int emit_exception_handler(CodeBuffer &cbuf);
-  static int emit_deopt_handler(CodeBuffer& cbuf);
+  static int emit_exception_handler(C2_MacroAssembler *masm);
+  static int emit_deopt_handler(C2_MacroAssembler* masm);
 
   static uint size_exception_handler() {
     // NativeCall instruction size is the same as NativeJump.
@@ -1306,11 +1306,10 @@ int MachNode::compute_padding(int current_offset) const {
 
 // Emit exception handler code.
 // Stuff framesize into a register and call a VM stub routine.
-int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
+int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) {
 
   // Note that the code buffer's insts_mark is always relative to insts.
   // That's why we must use the macroassembler to generate a handler.
-  C2_MacroAssembler _masm(&cbuf);
   address base = __ start_a_stub(size_exception_handler());
   if (base == nullptr) {
     ciEnv::current()->record_failure("CodeCache is full");
@@ -1324,11 +1323,10 @@ int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
 }
 
 // Emit deopt handler code.
-int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
+int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 
   // Note that the code buffer's insts_mark is always relative to insts.
   // That's why we must use the macroassembler to generate a handler.
-  C2_MacroAssembler _masm(&cbuf);
   address base = __ start_a_stub(size_deopt_handler());
   if (base == nullptr) {
     ciEnv::current()->record_failure("CodeCache is full");
@@ -2523,14 +2521,13 @@ static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_
 }
 
 // Helper methods for MachSpillCopyNode::implementation().
-static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
+static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
   assert(ireg == Op_VecS || // 32bit vector
          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
          "no non-adjacent vector moves" );
-  if (cbuf) {
-    C2_MacroAssembler _masm(cbuf);
+  if (masm) {
     switch (ireg) {
     case Op_VecS: // copy whole register
     case Op_VecD:
@@ -2581,10 +2578,9 @@ static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
   }
 }
 
-void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
+void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
                      int stack_offset, int reg, uint ireg, outputStream* st) {
-  if (cbuf) {
-    C2_MacroAssembler _masm(cbuf);
+  if (masm) {
     if (is_load) {
       switch (ireg) {
       case Op_VecS:
@@ -2742,8 +2738,7 @@ static inline jlong high_bit_set(BasicType bt) {
   }
 #endif
 
-  void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
-    C2_MacroAssembler _masm(&cbuf);
+  void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
     __ nop(_count);
   }
 
@@ -2757,8 +2752,7 @@ static inline jlong high_bit_set(BasicType bt) {
   }
 #endif
 
-  void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
-    C2_MacroAssembler _masm(&cbuf);
+  void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
     __ int3();
   }
 
@@ -2771,7 +2765,6 @@ static inline jlong high_bit_set(BasicType bt) {
 encode %{
 
   enc_class call_epilog %{
-    C2_MacroAssembler _masm(&cbuf);
     if (VerifyStackAtCalls) {
       // Check that stack depth is unchanged: find majik cookie on stack
       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
diff --git a/src/hotspot/cpu/x86/x86_32.ad b/src/hotspot/cpu/x86/x86_32.ad
index 36ec4d283ed5f..39c09efc49ce3 100644
--- a/src/hotspot/cpu/x86/x86_32.ad
+++ b/src/hotspot/cpu/x86/x86_32.ad
@@ -252,7 +252,7 @@ source %{
 #define   RELOC_IMM32    Assembler::imm_operand
 #define   RELOC_DISP32   Assembler::disp32_operand
 
-#define __ _masm.
+#define __ masm->
 
 // How to find the high register of a Long pair, given the low register
 #define   HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2))
@@ -337,107 +337,107 @@ int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 }
 
 // EMIT_RM()
-void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
+void emit_rm(C2_MacroAssembler *masm, int f1, int f2, int f3) {
   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
-  cbuf.insts()->emit_int8(c);
+  __ emit_int8(c);
 }
 
 // EMIT_CC()
-void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
+void emit_cc(C2_MacroAssembler *masm, int f1, int f2) {
   unsigned char c = (unsigned char)( f1 | f2 );
-  cbuf.insts()->emit_int8(c);
+  __ emit_int8(c);
 }
 
 // EMIT_OPCODE()
-void emit_opcode(CodeBuffer &cbuf, int code) {
-  cbuf.insts()->emit_int8((unsigned char) code);
+void emit_opcode(C2_MacroAssembler *masm, int code) {
+  __ emit_int8((unsigned char) code);
 }
 
 // EMIT_OPCODE() w/ relocation information
-void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
-  cbuf.relocate(cbuf.insts_mark() + offset, reloc);
-  emit_opcode(cbuf, code);
+void emit_opcode(C2_MacroAssembler *masm, int code, relocInfo::relocType reloc, int offset = 0) {
+  __ relocate(__ inst_mark() + offset, reloc);
+  emit_opcode(masm, code);
 }
 
 // EMIT_D8()
-void emit_d8(CodeBuffer &cbuf, int d8) {
-  cbuf.insts()->emit_int8((unsigned char) d8);
+void emit_d8(C2_MacroAssembler *masm, int d8) {
+  __ emit_int8((unsigned char) d8);
 }
 
 // EMIT_D16()
-void emit_d16(CodeBuffer &cbuf, int d16) {
-  cbuf.insts()->emit_int16(d16);
+void emit_d16(C2_MacroAssembler *masm, int d16) {
+  __ emit_int16(d16);
 }
 
 // EMIT_D32()
-void emit_d32(CodeBuffer &cbuf, int d32) {
-  cbuf.insts()->emit_int32(d32);
+void emit_d32(C2_MacroAssembler *masm, int d32) {
+  __ emit_int32(d32);
 }
 
 // emit 32 bit value and construct relocation entry from relocInfo::relocType
-void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
+void emit_d32_reloc(C2_MacroAssembler *masm, int d32, relocInfo::relocType reloc,
         int format) {
-  cbuf.relocate(cbuf.insts_mark(), reloc, format);
-  cbuf.insts()->emit_int32(d32);
+  __ relocate(__ inst_mark(), reloc, format);
+  __ emit_int32(d32);
 }
 
 // emit 32 bit value and construct relocation entry from RelocationHolder
-void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
+void emit_d32_reloc(C2_MacroAssembler *masm, int d32, RelocationHolder const& rspec,
         int format) {
 #ifdef ASSERT
   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
   }
 #endif
-  cbuf.relocate(cbuf.insts_mark(), rspec, format);
-  cbuf.insts()->emit_int32(d32);
+  __ relocate(__ inst_mark(), rspec, format);
+  __ emit_int32(d32);
 }
 
 // Access stack slot for load or store
-void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
-  emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
+void store_to_stackslot(C2_MacroAssembler *masm, int opcode, int rm_field, int disp) {
+  emit_opcode( masm, opcode );               // (e.g., FILD   [ESP+src])
   if( -128 <= disp && disp <= 127 ) {
-    emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
-    emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
-    emit_d8 (cbuf, disp);     // Displacement  // R/M byte
+    emit_rm( masm, 0x01, rm_field, ESP_enc );  // R/M byte
+    emit_rm( masm, 0x00, ESP_enc, ESP_enc);    // SIB byte
+    emit_d8 (masm, disp);     // Displacement  // R/M byte
   } else {
-    emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
-    emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
-    emit_d32(cbuf, disp);     // Displacement  // R/M byte
+    emit_rm( masm, 0x02, rm_field, ESP_enc );  // R/M byte
+    emit_rm( masm, 0x00, ESP_enc, ESP_enc);    // SIB byte
+    emit_d32(masm, disp);     // Displacement  // R/M byte
   }
 }
 
    // rRegI ereg, memory mem) %{    // emit_reg_mem
-void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
+void encode_RegMem( C2_MacroAssembler *masm, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
   // There is no index & no scale, use form without SIB byte
   if ((index == 0x4) &&
       (scale == 0) && (base != ESP_enc)) {
     // If no displacement, mode is 0x0; unless base is [EBP]
     if ( (displace == 0) && (base != EBP_enc) ) {
-      emit_rm(cbuf, 0x0, reg_encoding, base);
+      emit_rm(masm, 0x0, reg_encoding, base);
     }
     else {                    // If 8-bit displacement, mode 0x1
       if ((displace >= -128) && (displace <= 127)
           && (disp_reloc == relocInfo::none) ) {
-        emit_rm(cbuf, 0x1, reg_encoding, base);
-        emit_d8(cbuf, displace);
+        emit_rm(masm, 0x1, reg_encoding, base);
+        emit_d8(masm, displace);
       }
       else {                  // If 32-bit displacement
         if (base == -1) { // Special flag for absolute address
-          emit_rm(cbuf, 0x0, reg_encoding, 0x5);
+          emit_rm(masm, 0x0, reg_encoding, 0x5);
           // (manual lies; no SIB needed here)
           if ( disp_reloc != relocInfo::none ) {
-            emit_d32_reloc(cbuf, displace, disp_reloc, 1);
+            emit_d32_reloc(masm, displace, disp_reloc, 1);
           } else {
-            emit_d32      (cbuf, displace);
+            emit_d32      (masm, displace);
           }
         }
         else {                // Normal base + offset
-          emit_rm(cbuf, 0x2, reg_encoding, base);
+          emit_rm(masm, 0x2, reg_encoding, base);
           if ( disp_reloc != relocInfo::none ) {
-            emit_d32_reloc(cbuf, displace, disp_reloc, 1);
+            emit_d32_reloc(masm, displace, disp_reloc, 1);
           } else {
-            emit_d32      (cbuf, displace);
+            emit_d32      (masm, displace);
           }
         }
       }
@@ -446,28 +446,28 @@ void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int
   else {                      // Else, encode with the SIB byte
     // If no displacement, mode is 0x0; unless base is [EBP]
     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
-      emit_rm(cbuf, 0x0, reg_encoding, 0x4);
-      emit_rm(cbuf, scale, index, base);
+      emit_rm(masm, 0x0, reg_encoding, 0x4);
+      emit_rm(masm, scale, index, base);
     }
     else {                    // If 8-bit displacement, mode 0x1
       if ((displace >= -128) && (displace <= 127)
           && (disp_reloc == relocInfo::none) ) {
-        emit_rm(cbuf, 0x1, reg_encoding, 0x4);
-        emit_rm(cbuf, scale, index, base);
-        emit_d8(cbuf, displace);
+        emit_rm(masm, 0x1, reg_encoding, 0x4);
+        emit_rm(masm, scale, index, base);
+        emit_d8(masm, displace);
       }
       else {                  // If 32-bit displacement
         if (base == 0x04 ) {
-          emit_rm(cbuf, 0x2, reg_encoding, 0x4);
-          emit_rm(cbuf, scale, index, 0x04);
+          emit_rm(masm, 0x2, reg_encoding, 0x4);
+          emit_rm(masm, scale, index, 0x04);
         } else {
-          emit_rm(cbuf, 0x2, reg_encoding, 0x4);
-          emit_rm(cbuf, scale, index, base);
+          emit_rm(masm, 0x2, reg_encoding, 0x4);
+          emit_rm(masm, scale, index, base);
         }
         if ( disp_reloc != relocInfo::none ) {
-          emit_d32_reloc(cbuf, displace, disp_reloc, 1);
+          emit_d32_reloc(masm, displace, disp_reloc, 1);
         } else {
-          emit_d32      (cbuf, displace);
+          emit_d32      (masm, displace);
         }
       }
     }
@@ -475,16 +475,16 @@ void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int
 }
 
 
-void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
+void encode_Copy( C2_MacroAssembler *masm, int dst_encoding, int src_encoding ) {
   if( dst_encoding == src_encoding ) {
     // reg-reg copy, use an empty encoding
   } else {
-    emit_opcode( cbuf, 0x8B );
-    emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
+    emit_opcode( masm, 0x8B );
+    emit_rm(masm, 0x3, dst_encoding, src_encoding );
   }
 }
 
-void emit_cmpfp_fixup(MacroAssembler& _masm) {
+void emit_cmpfp_fixup(MacroAssembler* masm) {
   Label exit;
   __ jccb(Assembler::noParity, exit);
   __ pushf();
@@ -504,7 +504,7 @@ void emit_cmpfp_fixup(MacroAssembler& _masm) {
   __ bind(exit);
 }
 
-static void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
+static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
   Label done;
   __ movl(dst, -1);
   __ jcc(Assembler::parity, done);
@@ -527,7 +527,7 @@ void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, Phase
   ShouldNotReachHere();
 }
 
-void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
+void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
   // Empty encoding
 }
 
@@ -607,16 +607,15 @@ void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 #endif
 
 
-void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
   Compile* C = ra_->C;
-  C2_MacroAssembler _masm(&cbuf);
 
   int framesize = C->output()->frame_size_in_bytes();
   int bangsize = C->output()->bang_size_in_bytes();
 
   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != nullptr);
 
-  C->output()->set_frame_complete(cbuf.insts_size());
+  C->output()->set_frame_complete(__ offset());
 
   if (C->has_mach_constant_base_node()) {
     // NOTE: We set the table base offset here because users might be
@@ -664,18 +663,17 @@ void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 }
 #endif
 
-void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
   Compile *C = ra_->C;
-  MacroAssembler _masm(&cbuf);
 
   if (C->max_vector_size() > 16) {
     // Clear upper bits of YMM registers when current compiled code uses
     // wide vectors to avoid AVX <-> SSE transition penalty during call.
-    _masm.vzeroupper();
+    __ vzeroupper();
   }
   // If method set FPU control word, restore to standard control word
   if (C->in_24_bit_fp_mode()) {
-    _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
+    __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
   }
 
   int framesize = C->output()->frame_size_in_bytes();
@@ -686,16 +684,16 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 
   if (framesize >= 128) {
-    emit_opcode(cbuf, 0x81); // add  SP, #framesize
-    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
-    emit_d32(cbuf, framesize);
+    emit_opcode(masm, 0x81); // add  SP, #framesize
+    emit_rm(masm, 0x3, 0x00, ESP_enc);
+    emit_d32(masm, framesize);
   } else if (framesize) {
-    emit_opcode(cbuf, 0x83); // add  SP, #framesize
-    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
-    emit_d8(cbuf, framesize);
+    emit_opcode(masm, 0x83); // add  SP, #framesize
+    emit_rm(masm, 0x3, 0x00, ESP_enc);
+    emit_d8(masm, framesize);
   }
 
-  emit_opcode(cbuf, 0x58 | EBP_enc);
+  emit_opcode(masm, 0x58 | EBP_enc);
 
   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
     __ reserved_stack_check();
@@ -703,7 +701,6 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 
   if (do_polling() && C->is_method_compilation()) {
     Register thread = as_Register(EBX_enc);
-    MacroAssembler masm(&cbuf);
     __ get_thread(thread);
     Label dummy_label;
     Label* code_stub = &dummy_label;
@@ -712,7 +709,9 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
       C->output()->add_stub(stub);
       code_stub = &stub->entry();
     }
+    __ set_inst_mark();
     __ relocate(relocInfo::poll_return_type);
+    __ clear_inst_mark();
     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
   }
 }
@@ -749,11 +748,13 @@ static enum RC rc_class( OptoReg::Name reg ) {
   return rc_xmm;
 }
 
-static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
+static int impl_helper( C2_MacroAssembler *masm, bool do_size, bool is_load, int offset, int reg,
                         int opcode, const char *op_str, int size, outputStream* st ) {
-  if( cbuf ) {
-    emit_opcode  (*cbuf, opcode );
-    encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
+  if( masm ) {
+    masm->set_inst_mark();
+    emit_opcode  (masm, opcode );
+    encode_RegMem(masm, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
+    masm->clear_inst_mark();
 #ifndef PRODUCT
   } else if( !do_size ) {
     if( size != 0 ) st->print("\n\t");
@@ -770,7 +771,7 @@ static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset
 }
 
 // Helper for XMM registers.  Extra opcode bits, limited syntax.
-static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
+static int impl_x_helper( C2_MacroAssembler *masm, bool do_size, bool is_load,
                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
   int in_size_in_bits = Assembler::EVEX_32bit;
   int evex_encoding = 0;
@@ -778,11 +779,10 @@ static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
     in_size_in_bits = Assembler::EVEX_64bit;
     evex_encoding = Assembler::VEX_W;
   }
-  if (cbuf) {
-    MacroAssembler _masm(cbuf);
+  if (masm) {
     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
     //                          it maps more cases to single byte displacement
-    _masm.set_managed();
+    __ set_managed();
     if (reg_lo+1 == reg_hi) { // double move?
       if (is_load) {
         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
@@ -829,12 +829,11 @@ static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 }
 
 
-static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
+static int impl_movx_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
                             int src_hi, int dst_hi, int size, outputStream* st ) {
-  if (cbuf) {
-    MacroAssembler _masm(cbuf);
+  if (masm) {
     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
-    _masm.set_managed();
+    __ set_managed();
     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
                 as_XMMRegister(Matcher::_regEncode[src_lo]));
@@ -868,13 +867,12 @@ static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst
   return size + sz;
 }
 
-static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
+static int impl_movgpr2x_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
                             int src_hi, int dst_hi, int size, outputStream* st ) {
   // 32-bit
-  if (cbuf) {
-    MacroAssembler _masm(cbuf);
+  if (masm) {
     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
-    _masm.set_managed();
+    __ set_managed();
     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
              as_Register(Matcher::_regEncode[src_lo]));
 #ifndef PRODUCT
@@ -886,13 +884,12 @@ static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int
 }
 
 
-static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
+static int impl_movx2gpr_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
                                  int src_hi, int dst_hi, int size, outputStream* st ) {
   // 32-bit
-  if (cbuf) {
-    MacroAssembler _masm(cbuf);
+  if (masm) {
     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
-    _masm.set_managed();
+    __ set_managed();
     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
              as_XMMRegister(Matcher::_regEncode[src_lo]));
 #ifndef PRODUCT
@@ -903,10 +900,10 @@ static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int
   return (UseAVX> 2) ? 6 : 4;
 }
 
-static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
-  if( cbuf ) {
-    emit_opcode(*cbuf, 0x8B );
-    emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
+static int impl_mov_helper( C2_MacroAssembler *masm, bool do_size, int src, int dst, int size, outputStream* st ) {
+  if( masm ) {
+    emit_opcode(masm, 0x8B );
+    emit_rm    (masm, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 #ifndef PRODUCT
   } else if( !do_size ) {
     if( size != 0 ) st->print("\n\t");
@@ -916,12 +913,12 @@ static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, in
   return size+2;
 }
 
-static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
+static int impl_fp_store_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
                                  int offset, int size, outputStream* st ) {
   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
-    if( cbuf ) {
-      emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
-      emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
+    if( masm ) {
+      emit_opcode( masm, 0xD9 );  // FLD (i.e., push it)
+      emit_d8( masm, 0xC0-1+Matcher::_regEncode[src_lo] );
 #ifndef PRODUCT
     } else if( !do_size ) {
       if( size != 0 ) st->print("\n\t");
@@ -943,20 +940,19 @@ static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int
     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
   }
 
-  return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
+  return impl_helper(masm,do_size,false,offset,st_op,op,op_str,size, st);
 }
 
 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
-static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
+static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 
-void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
+void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
                             int stack_offset, int reg, uint ireg, outputStream* st);
 
-static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
+static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
                                      int dst_offset, uint ireg, outputStream* st) {
-  if (cbuf) {
-    MacroAssembler _masm(cbuf);
+  if (masm) {
     switch (ireg) {
     case Op_VecS:
       __ pushl(Address(rsp, src_offset));
@@ -1032,7 +1028,7 @@ static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
   }
 }
 
-uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
+uint MachSpillCopyNode::implementation( C2_MacroAssembler *masm, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
   // Get registers to move
   OptoReg::Name src_second = ra_->get_reg_second(in(1));
   OptoReg::Name src_first = ra_->get_reg_first(in(1));
@@ -1061,15 +1057,15 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
       // mem -> mem
       int src_offset = ra_->reg2offset(src_first);
       int dst_offset = ra_->reg2offset(dst_first);
-      vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
+      vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
-      vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
+      vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
       int stack_offset = ra_->reg2offset(dst_first);
-      vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
+      vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
       int stack_offset = ra_->reg2offset(src_first);
-      vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
+      vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
     } else {
       ShouldNotReachHere();
     }
@@ -1081,16 +1077,16 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
     if( src_second == dst_first ) { // overlapping stack copy ranges
       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
-      size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
-      size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
+      size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
+      size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
     }
     // move low bits
-    size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
-    size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
+    size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
+    size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
-      size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
-      size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
+      size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
+      size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
     }
     return size;
   }
@@ -1098,41 +1094,41 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
   // --------------------------------------
   // Check for integer reg-reg copy
   if( src_first_rc == rc_int && dst_first_rc == rc_int )
-    size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
+    size = impl_mov_helper(masm,do_size,src_first,dst_first,size, st);
 
   // Check for integer store
   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
-    size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
+    size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 
   // Check for integer load
   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
-    size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
+    size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 
   // Check for integer reg-xmm reg copy
   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
             "no 64 bit integer-float reg moves" );
-    return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
+    return impl_movgpr2x_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
   }
   // --------------------------------------
   // Check for float reg-reg copy
   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
-    if( cbuf ) {
+    if( masm ) {
 
       // Note the mucking with the register encode to compensate for the 0/1
       // indexing issue mentioned in a comment in the reg_def sections
       // for FPR registers many lines above here.
 
       if( src_first != FPR1L_num ) {
-        emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
-        emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
-        emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
-        emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
+        emit_opcode  (masm, 0xD9 );           // FLD    ST(i)
+        emit_d8      (masm, 0xC0+Matcher::_regEncode[src_first]-1 );
+        emit_opcode  (masm, 0xDD );           // FSTP   ST(i)
+        emit_d8      (masm, 0xD8+Matcher::_regEncode[dst_first] );
      } else {
-        emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
-        emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
+        emit_opcode  (masm, 0xDD );           // FST    ST(i)
+        emit_d8      (masm, 0xD0+Matcher::_regEncode[dst_first]-1 );
      }
 #ifndef PRODUCT
     } else if( !do_size ) {
@@ -1146,7 +1142,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
 
   // Check for float store
   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
-    return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
+    return impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
   }
 
   // Check for float load
@@ -1162,11 +1158,13 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
       op = 0xD9;
       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
     }
-    if( cbuf ) {
-      emit_opcode  (*cbuf, op );
-      encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
-      emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
-      emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
+    if( masm ) {
+      masm->set_inst_mark();
+      emit_opcode  (masm, op );
+      encode_RegMem(masm, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
+      emit_opcode  (masm, 0xDD );           // FSTP   ST(i)
+      emit_d8      (masm, 0xD8+Matcher::_regEncode[dst_first] );
+      masm->clear_inst_mark();
 #ifndef PRODUCT
     } else if( !do_size ) {
       if( size != 0 ) st->print("\n\t");
@@ -1182,35 +1180,35 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
             (src_first+1 == src_second && dst_first+1 == dst_second),
             "no non-adjacent float-moves" );
-    return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
+    return impl_movx_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
   }
 
   // Check for xmm reg-integer reg copy
   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
             "no 64 bit float-integer reg moves" );
-    return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
+    return impl_movx2gpr_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
   }
 
   // Check for xmm store
   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
-    return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
+    return impl_x_helper(masm,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
   }
 
   // Check for float xmm load
   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
-    return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
+    return impl_x_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
   }
 
   // Copy from float reg to xmm reg
   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
     // copy to the top of stack from floating point reg
     // and use LEA to preserve flags
-    if( cbuf ) {
-      emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
-      emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
-      emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
-      emit_d8(*cbuf,0xF8);
+    if( masm ) {
+      emit_opcode(masm,0x8D);  // LEA  ESP,[ESP-8]
+      emit_rm(masm, 0x1, ESP_enc, 0x04);
+      emit_rm(masm, 0x0, 0x04, ESP_enc);
+      emit_d8(masm,0xF8);
 #ifndef PRODUCT
     } else if( !do_size ) {
       if( size != 0 ) st->print("\n\t");
@@ -1219,16 +1217,16 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
     }
     size += 4;
 
-    size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
+    size = impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 
     // Copy from the temp memory to the xmm reg.
-    size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
+    size = impl_x_helper(masm,do_size,true ,0,dst_first, dst_second, size, st);
 
-    if( cbuf ) {
-      emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
-      emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
-      emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
-      emit_d8(*cbuf,0x08);
+    if( masm ) {
+      emit_opcode(masm,0x8D);  // LEA  ESP,[ESP+8]
+      emit_rm(masm, 0x1, ESP_enc, 0x04);
+      emit_rm(masm, 0x0, 0x04, ESP_enc);
+      emit_d8(masm,0x08);
 #ifndef PRODUCT
     } else if( !do_size ) {
       if( size != 0 ) st->print("\n\t");
@@ -1244,8 +1242,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
     int offset = ra_->reg2offset(src_first);
-    if (cbuf != nullptr) {
-      MacroAssembler _masm(cbuf);
+    if (masm != nullptr) {
       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 #ifndef PRODUCT
     } else {
@@ -1259,8 +1256,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
     int offset = ra_->reg2offset(dst_first);
-    if (cbuf != nullptr) {
-      MacroAssembler _masm(cbuf);
+    if (masm != nullptr) {
       __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
     } else {
@@ -1283,8 +1279,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
-    if (cbuf != nullptr) {
-      MacroAssembler _masm(cbuf);
+    if (masm != nullptr) {
       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
     } else {
@@ -1304,15 +1299,15 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
 
   // Check for second word int-int move
   if( src_second_rc == rc_int && dst_second_rc == rc_int )
-    return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
+    return impl_mov_helper(masm,do_size,src_second,dst_second,size, st);
 
   // Check for second word integer store
   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
-    return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
+    return impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 
   // Check for second word integer load
   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
-    return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
+    return impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 
   Unimplemented();
   return 0; // Mute compiler
@@ -1324,8 +1319,8 @@ void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 }
 #endif
 
-void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  implementation( &cbuf, ra_, false, nullptr );
+void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
+  implementation( masm, ra_, false, nullptr );
 }
 
 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
@@ -1342,20 +1337,20 @@ void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 }
 #endif
 
-void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
   int reg = ra_->get_encode(this);
   if( offset >= 128 ) {
-    emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
-    emit_rm(cbuf, 0x2, reg, 0x04);
-    emit_rm(cbuf, 0x0, 0x04, ESP_enc);
-    emit_d32(cbuf, offset);
+    emit_opcode(masm, 0x8D);      // LEA  reg,[SP+offset]
+    emit_rm(masm, 0x2, reg, 0x04);
+    emit_rm(masm, 0x0, 0x04, ESP_enc);
+    emit_d32(masm, offset);
   }
   else {
-    emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
-    emit_rm(cbuf, 0x1, reg, 0x04);
-    emit_rm(cbuf, 0x0, 0x04, ESP_enc);
-    emit_d8(cbuf, offset);
+    emit_opcode(masm, 0x8D);      // LEA  reg,[SP+offset]
+    emit_rm(masm, 0x1, reg, 0x04);
+    emit_rm(masm, 0x0, 0x04, ESP_enc);
+    emit_d8(masm, offset);
   }
 }
 
@@ -1381,9 +1376,8 @@ void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 }
 #endif
 
-void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  MacroAssembler masm(&cbuf);
-  masm.ic_check(CodeEntryAlignment);
+void MachUEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
+  __ ic_check(CodeEntryAlignment);
 }
 
 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
@@ -1528,37 +1522,49 @@ encode %{
   // adding a syntax that specifies the sizes of fields in an order,
   // so that the adlc can build the emit functions automagically
 
+  // Set instruction mark in MacroAssembler. This is used only in
+  // instructions that emit bytes directly to the CodeBuffer wraped
+  // in the MacroAssembler. Should go away once all "instruct" are
+  // patched to emit bytes only using methods in MacroAssembler.
+  enc_class SetInstMark %{
+    __ set_inst_mark();
+  %}
+
+  enc_class ClearInstMark %{
+    __ clear_inst_mark();
+  %}
+
   // Emit primary opcode
   enc_class OpcP %{
-    emit_opcode(cbuf, $primary);
+    emit_opcode(masm, $primary);
   %}
 
   // Emit secondary opcode
   enc_class OpcS %{
-    emit_opcode(cbuf, $secondary);
+    emit_opcode(masm, $secondary);
   %}
 
   // Emit opcode directly
   enc_class Opcode(immI d8) %{
-    emit_opcode(cbuf, $d8$$constant);
+    emit_opcode(masm, $d8$$constant);
   %}
 
   enc_class SizePrefix %{
-    emit_opcode(cbuf,0x66);
+    emit_opcode(masm,0x66);
   %}
 
   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
-    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
+    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
   %}
 
   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
-    emit_opcode(cbuf,$opcode$$constant);
-    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
+    emit_opcode(masm,$opcode$$constant);
+    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
   %}
 
   enc_class mov_r32_imm0( rRegI dst ) %{
-    emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
-    emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
+    emit_opcode( masm, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
+    emit_d32   ( masm, 0x0  );             //                         imm32==0x0
   %}
 
   enc_class cdq_enc %{
@@ -1585,26 +1591,26 @@ encode %{
     //  F7 F9                idiv        rax,ecx
     //                  done:
     //
-    emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
-    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
-    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
-    emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
-    emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
-    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
-    emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
-    emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
-    emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
-    emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
-    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
+    emit_opcode(masm,0x81); emit_d8(masm,0xF8);
+    emit_opcode(masm,0x00); emit_d8(masm,0x00);
+    emit_opcode(masm,0x00); emit_d8(masm,0x80);                     // cmp rax,80000000h
+    emit_opcode(masm,0x0F); emit_d8(masm,0x85);
+    emit_opcode(masm,0x0B); emit_d8(masm,0x00);
+    emit_opcode(masm,0x00); emit_d8(masm,0x00);                     // jne normal_case
+    emit_opcode(masm,0x33); emit_d8(masm,0xD2);                     // xor rdx,edx
+    emit_opcode(masm,0x83); emit_d8(masm,0xF9); emit_d8(masm,0xFF); // cmp rcx,0FFh
+    emit_opcode(masm,0x0F); emit_d8(masm,0x84);
+    emit_opcode(masm,0x03); emit_d8(masm,0x00);
+    emit_opcode(masm,0x00); emit_d8(masm,0x00);                     // je done
     // normal_case:
-    emit_opcode(cbuf,0x99);                                         // cdq
+    emit_opcode(masm,0x99);                                         // cdq
     // idiv (note: must be emitted by the user of this rule)
     // normal:
   %}
 
   // Dense encoding for older common ops
   enc_class Opc_plus(immI opcode, rRegI reg) %{
-    emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
+    emit_opcode(masm, $opcode$$constant + $reg$$reg);
   %}
 
 
@@ -1612,10 +1618,10 @@ encode %{
   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
     // Check for 8-bit immediate, and set sign extend bit in opcode
     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
-      emit_opcode(cbuf, $primary | 0x02);
+      emit_opcode(masm, $primary | 0x02);
     }
     else {                          // If 32-bit immediate
-      emit_opcode(cbuf, $primary);
+      emit_opcode(masm, $primary);
     }
   %}
 
@@ -1623,12 +1629,12 @@ encode %{
     // Emit primary opcode and set sign-extend bit
     // Check for 8-bit immediate, and set sign extend bit in opcode
     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
-      emit_opcode(cbuf, $primary | 0x02);    }
+      emit_opcode(masm, $primary | 0x02);    }
     else {                          // If 32-bit immediate
-      emit_opcode(cbuf, $primary);
+      emit_opcode(masm, $primary);
     }
     // Emit r/m byte with secondary opcode, after primary opcode.
-    emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
+    emit_rm(masm, 0x3, $secondary, $dst$$reg);
   %}
 
   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
@@ -1646,62 +1652,62 @@ encode %{
     // Emit primary opcode and set sign-extend bit
     // Check for 8-bit immediate, and set sign extend bit in opcode
     int con = (int)$imm$$constant; // Throw away top bits
-    emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
+    emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
     // Emit r/m byte with secondary opcode, after primary opcode.
-    emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
-    if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
-    else                               emit_d32(cbuf,con);
+    emit_rm(masm, 0x3, $secondary, $dst$$reg);
+    if ((con >= -128) && (con <= 127)) emit_d8 (masm,con);
+    else                               emit_d32(masm,con);
   %}
 
   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
     // Emit primary opcode and set sign-extend bit
     // Check for 8-bit immediate, and set sign extend bit in opcode
     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
-    emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
+    emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
     // Emit r/m byte with tertiary opcode, after primary opcode.
-    emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
-    if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
-    else                               emit_d32(cbuf,con);
+    emit_rm(masm, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
+    if ((con >= -128) && (con <= 127)) emit_d8 (masm,con);
+    else                               emit_d32(masm,con);
   %}
 
   enc_class OpcSReg (rRegI dst) %{    // BSWAP
-    emit_cc(cbuf, $secondary, $dst$$reg );
+    emit_cc(masm, $secondary, $dst$$reg );
   %}
 
   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
     int destlo = $dst$$reg;
     int desthi = HIGH_FROM_LOW_ENC(destlo);
     // bswap lo
-    emit_opcode(cbuf, 0x0F);
-    emit_cc(cbuf, 0xC8, destlo);
+    emit_opcode(masm, 0x0F);
+    emit_cc(masm, 0xC8, destlo);
     // bswap hi
-    emit_opcode(cbuf, 0x0F);
-    emit_cc(cbuf, 0xC8, desthi);
+    emit_opcode(masm, 0x0F);
+    emit_cc(masm, 0xC8, desthi);
     // xchg lo and hi
-    emit_opcode(cbuf, 0x87);
-    emit_rm(cbuf, 0x3, destlo, desthi);
+    emit_opcode(masm, 0x87);
+    emit_rm(masm, 0x3, destlo, desthi);
   %}
 
   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
-    emit_rm(cbuf, 0x3, $secondary, $div$$reg );
+    emit_rm(masm, 0x3, $secondary, $div$$reg );
   %}
 
   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
     $$$emit8$primary;
-    emit_cc(cbuf, $secondary, $cop$$cmpcode);
+    emit_cc(masm, $secondary, $cop$$cmpcode);
   %}
 
   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
-    emit_d8(cbuf, op >> 8 );
-    emit_d8(cbuf, op & 255);
+    emit_d8(masm, op >> 8 );
+    emit_d8(masm, op & 255);
   %}
 
   // emulate a CMOV with a conditional branch around a MOV
   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
     // Invert sense of branch from sense of CMOV
-    emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
-    emit_d8( cbuf, $brOffs$$constant );
+    emit_cc( masm, 0x70, ($cop$$cmpcode^1) );
+    emit_d8( masm, $brOffs$$constant );
   %}
 
   enc_class enc_PartialSubtypeCheck( ) %{
@@ -1711,7 +1717,6 @@ encode %{
     Register Resi = as_Register(ESI_enc); // sub class
     Label miss;
 
-    MacroAssembler _masm(&cbuf);
     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
                                      nullptr, &miss,
                                      /*set_cond_codes:*/ true);
@@ -1722,43 +1727,40 @@ encode %{
   %}
 
   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
-    MacroAssembler masm(&cbuf);
-    int start = masm.offset();
+    int start = __ offset();
     if (UseSSE >= 2) {
       if (VerifyFPU) {
-        masm.verify_FPU(0, "must be empty in SSE2+ mode");
+        __ verify_FPU(0, "must be empty in SSE2+ mode");
       }
     } else {
       // External c_calling_convention expects the FPU stack to be 'clean'.
       // Compiled code leaves it dirty.  Do cleanup now.
-      masm.empty_FPU_stack();
+      __ empty_FPU_stack();
     }
     if (sizeof_FFree_Float_Stack_All == -1) {
-      sizeof_FFree_Float_Stack_All = masm.offset() - start;
+      sizeof_FFree_Float_Stack_All = __ offset() - start;
     } else {
-      assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
+      assert(__ offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
     }
   %}
 
   enc_class Verify_FPU_For_Leaf %{
     if( VerifyFPU ) {
-      MacroAssembler masm(&cbuf);
-      masm.verify_FPU( -3, "Returning from Runtime Leaf call");
+      __ verify_FPU( -3, "Returning from Runtime Leaf call");
     }
   %}
 
   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
     // This is the instruction starting address for relocation info.
-    MacroAssembler _masm(&cbuf);
-    cbuf.set_insts_mark();
+    __ set_inst_mark();
     $$$emit8$primary;
     // CALL directly to the runtime
-    emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
+    emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
                 runtime_call_Relocation::spec(), RELOC_IMM32 );
+    __ clear_inst_mark();
     __ post_call_nop();
 
     if (UseSSE >= 2) {
-      MacroAssembler _masm(&cbuf);
       BasicType rt = tf()->return_type();
 
       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
@@ -1783,54 +1785,53 @@ encode %{
 
   enc_class pre_call_resets %{
     // If method sets FPU control word restore it here
-    debug_only(int off0 = cbuf.insts_size());
+    debug_only(int off0 = __ offset());
     if (ra_->C->in_24_bit_fp_mode()) {
-      MacroAssembler _masm(&cbuf);
       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
     }
     // Clear upper bits of YMM registers when current compiled code uses
     // wide vectors to avoid AVX <-> SSE transition penalty during call.
-    MacroAssembler _masm(&cbuf);
     __ vzeroupper();
-    debug_only(int off1 = cbuf.insts_size());
+    debug_only(int off1 = __ offset());
     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
   %}
 
   enc_class post_call_FPU %{
     // If method sets FPU control word do it here also
     if (Compile::current()->in_24_bit_fp_mode()) {
-      MacroAssembler masm(&cbuf);
-      masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
+      __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
     }
   %}
 
   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
     // who we intended to call.
-    MacroAssembler _masm(&cbuf);
-    cbuf.set_insts_mark();
+    __ set_inst_mark();
     $$$emit8$primary;
 
     if (!_method) {
-      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
+      emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
                      runtime_call_Relocation::spec(),
                      RELOC_IMM32);
+      __ clear_inst_mark();
       __ post_call_nop();
     } else {
-      int method_index = resolved_method_index(cbuf);
+      int method_index = resolved_method_index(masm);
       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
                                                   : static_call_Relocation::spec(method_index);
-      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
+      emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
                      rspec, RELOC_DISP32);
       __ post_call_nop();
-      address mark = cbuf.insts_mark();
+      address mark = __ inst_mark();
       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
         // Calls of the same statically bound method can share
         // a stub to the interpreter.
-        cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off());
+        __ code()->shared_stub_to_interp_for(_method, __ code()->insts()->mark_off());
+        __ clear_inst_mark();
       } else {
         // Emit stubs for static call.
-        address stub = CompiledDirectCall::emit_to_interp_stub(cbuf, mark);
+        address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
+        __ clear_inst_mark();
         if (stub == nullptr) {
           ciEnv::current()->record_failure("CodeCache is full");
           return;
@@ -1840,8 +1841,7 @@ encode %{
   %}
 
   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
-    MacroAssembler _masm(&cbuf);
-    __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
+    __ ic_call((address)$meth$$method, resolved_method_index(masm));
     __ post_call_nop();
   %}
 
@@ -1850,57 +1850,31 @@ encode %{
     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 
     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
-    MacroAssembler _masm(&cbuf);
-    cbuf.set_insts_mark();
+    __ set_inst_mark();
     $$$emit8$primary;
-    emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
-    emit_d8(cbuf, disp);             // Displacement
+    emit_rm(masm, 0x01, $secondary, EAX_enc );  // R/M byte
+    emit_d8(masm, disp);             // Displacement
+    __ clear_inst_mark();
     __ post_call_nop();
   %}
 
-//   Following encoding is no longer used, but may be restored if calling
-//   convention changes significantly.
-//   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
-//
-//   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
-//     // int ic_reg     = Matcher::inline_cache_reg();
-//     // int ic_encode  = Matcher::_regEncode[ic_reg];
-//     // int imo_reg    = Matcher::interpreter_method_reg();
-//     // int imo_encode = Matcher::_regEncode[imo_reg];
-//
-//     // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
-//     // // so we load it immediately before the call
-//     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_ptr
-//     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
-//
-//     // xor rbp,ebp
-//     emit_opcode(cbuf, 0x33);
-//     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
-//
-//     // CALL to interpreter.
-//     cbuf.set_insts_mark();
-//     $$$emit8$primary;
-//     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
-//                 runtime_call_Relocation::spec(), RELOC_IMM32 );
-//   %}
-
   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
     $$$emit8$primary;
-    emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
+    emit_rm(masm, 0x3, $secondary, $dst$$reg);
     $$$emit8$shift$$constant;
   %}
 
   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
     // Load immediate does not have a zero or sign extended version
     // for 8-bit immediates
-    emit_opcode(cbuf, 0xB8 + $dst$$reg);
+    emit_opcode(masm, 0xB8 + $dst$$reg);
     $$$emit32$src$$constant;
   %}
 
   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
     // Load immediate does not have a zero or sign extended version
     // for 8-bit immediates
-    emit_opcode(cbuf, $primary + $dst$$reg);
+    emit_opcode(masm, $primary + $dst$$reg);
     $$$emit32$src$$constant;
   %}
 
@@ -1911,11 +1885,11 @@ encode %{
     int src_con = $src$$constant & 0x0FFFFFFFFL;
     if (src_con == 0) {
       // xor dst, dst
-      emit_opcode(cbuf, 0x33);
-      emit_rm(cbuf, 0x3, dst_enc, dst_enc);
+      emit_opcode(masm, 0x33);
+      emit_rm(masm, 0x3, dst_enc, dst_enc);
     } else {
-      emit_opcode(cbuf, $primary + dst_enc);
-      emit_d32(cbuf, src_con);
+      emit_opcode(masm, $primary + dst_enc);
+      emit_d32(masm, src_con);
     }
   %}
 
@@ -1926,48 +1900,48 @@ encode %{
     int src_con = ((julong)($src$$constant)) >> 32;
     if (src_con == 0) {
       // xor dst, dst
-      emit_opcode(cbuf, 0x33);
-      emit_rm(cbuf, 0x3, dst_enc, dst_enc);
+      emit_opcode(masm, 0x33);
+      emit_rm(masm, 0x3, dst_enc, dst_enc);
     } else {
-      emit_opcode(cbuf, $primary + dst_enc);
-      emit_d32(cbuf, src_con);
+      emit_opcode(masm, $primary + dst_enc);
+      emit_d32(masm, src_con);
     }
   %}
 
 
   // Encode a reg-reg copy.  If it is useless, then empty encoding.
   enc_class enc_Copy( rRegI dst, rRegI src ) %{
-    encode_Copy( cbuf, $dst$$reg, $src$$reg );
+    encode_Copy( masm, $dst$$reg, $src$$reg );
   %}
 
   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
-    encode_Copy( cbuf, $dst$$reg, $src$$reg );
+    encode_Copy( masm, $dst$$reg, $src$$reg );
   %}
 
   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
-    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
+    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
   %}
 
   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
     $$$emit8$primary;
-    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
+    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
   %}
 
   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
     $$$emit8$secondary;
-    emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
+    emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
   %}
 
   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
-    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
+    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
   %}
 
   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
-    emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
+    emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
   %}
 
   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
-    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg));
+    emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg));
   %}
 
   enc_class Con32 (immI src) %{    // Con32(storeImmI)
@@ -1979,14 +1953,14 @@ encode %{
     // Output Float immediate bits
     jfloat jf = $src$$constant;
     int    jf_as_bits = jint_cast( jf );
-    emit_d32(cbuf, jf_as_bits);
+    emit_d32(masm, jf_as_bits);
   %}
 
   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
     // Output Float immediate bits
     jfloat jf = $src$$constant;
     int    jf_as_bits = jint_cast( jf );
-    emit_d32(cbuf, jf_as_bits);
+    emit_d32(masm, jf_as_bits);
   %}
 
   enc_class Con16 (immI src) %{    // Con16(storeImmI)
@@ -1995,17 +1969,17 @@ encode %{
   %}
 
   enc_class Con_d32(immI src) %{
-    emit_d32(cbuf,$src$$constant);
+    emit_d32(masm,$src$$constant);
   %}
 
   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
     // Output immediate memory reference
-    emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
-    emit_d32(cbuf, 0x00);
+    emit_rm(masm, 0x00, $t1$$reg, 0x05 );
+    emit_d32(masm, 0x00);
   %}
 
   enc_class lock_prefix( ) %{
-    emit_opcode(cbuf,0xF0);         // [Lock]
+    emit_opcode(masm,0xF0);         // [Lock]
   %}
 
   // Cmp-xchg long value.
@@ -2016,71 +1990,67 @@ encode %{
   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 
     // XCHG  rbx,ecx
-    emit_opcode(cbuf,0x87);
-    emit_opcode(cbuf,0xD9);
+    emit_opcode(masm,0x87);
+    emit_opcode(masm,0xD9);
     // [Lock]
-    emit_opcode(cbuf,0xF0);
+    emit_opcode(masm,0xF0);
     // CMPXCHG8 [Eptr]
-    emit_opcode(cbuf,0x0F);
-    emit_opcode(cbuf,0xC7);
-    emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
+    emit_opcode(masm,0x0F);
+    emit_opcode(masm,0xC7);
+    emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
     // XCHG  rbx,ecx
-    emit_opcode(cbuf,0x87);
-    emit_opcode(cbuf,0xD9);
+    emit_opcode(masm,0x87);
+    emit_opcode(masm,0xD9);
   %}
 
   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
     // [Lock]
-    emit_opcode(cbuf,0xF0);
+    emit_opcode(masm,0xF0);
 
     // CMPXCHG [Eptr]
-    emit_opcode(cbuf,0x0F);
-    emit_opcode(cbuf,0xB1);
-    emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
+    emit_opcode(masm,0x0F);
+    emit_opcode(masm,0xB1);
+    emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
   %}
 
   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
     // [Lock]
-    emit_opcode(cbuf,0xF0);
+    emit_opcode(masm,0xF0);
 
     // CMPXCHGB [Eptr]
-    emit_opcode(cbuf,0x0F);
-    emit_opcode(cbuf,0xB0);
-    emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
+    emit_opcode(masm,0x0F);
+    emit_opcode(masm,0xB0);
+    emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
   %}
 
   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
     // [Lock]
-    emit_opcode(cbuf,0xF0);
+    emit_opcode(masm,0xF0);
 
     // 16-bit mode
-    emit_opcode(cbuf, 0x66);
+    emit_opcode(masm, 0x66);
 
     // CMPXCHGW [Eptr]
-    emit_opcode(cbuf,0x0F);
-    emit_opcode(cbuf,0xB1);
-    emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
+    emit_opcode(masm,0x0F);
+    emit_opcode(masm,0xB1);
+    emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
   %}
 
   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
     int res_encoding = $res$$reg;
 
     // MOV  res,0
-    emit_opcode( cbuf, 0xB8 + res_encoding);
-    emit_d32( cbuf, 0 );
+    emit_opcode( masm, 0xB8 + res_encoding);
+    emit_d32( masm, 0 );
     // JNE,s  fail
-    emit_opcode(cbuf,0x75);
-    emit_d8(cbuf, 5 );
+    emit_opcode(masm,0x75);
+    emit_d8(masm, 5 );
     // MOV  res,1
-    emit_opcode( cbuf, 0xB8 + res_encoding);
-    emit_d32( cbuf, 1 );
+    emit_opcode( masm, 0xB8 + res_encoding);
+    emit_d32( masm, 1 );
     // fail:
   %}
 
-  enc_class set_instruction_start( ) %{
-    cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
-  %}
-
   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
     int reg_encoding = $ereg$$reg;
     int base  = $mem$$base;
@@ -2088,7 +2058,7 @@ encode %{
     int scale = $mem$$scale;
     int displace = $mem$$disp;
     relocInfo::relocType disp_reloc = $mem->disp_reloc();
-    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
+    encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
   %}
 
   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
@@ -2098,33 +2068,33 @@ encode %{
     int scale = $mem$$scale;
     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
-    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
+    encode_RegMem(masm, reg_encoding, base, index, scale, displace, relocInfo::none);
   %}
 
   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
     int r1, r2;
     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
-    emit_opcode(cbuf,0x0F);
-    emit_opcode(cbuf,$tertiary);
-    emit_rm(cbuf, 0x3, r1, r2);
-    emit_d8(cbuf,$cnt$$constant);
-    emit_d8(cbuf,$primary);
-    emit_rm(cbuf, 0x3, $secondary, r1);
-    emit_d8(cbuf,$cnt$$constant);
+    emit_opcode(masm,0x0F);
+    emit_opcode(masm,$tertiary);
+    emit_rm(masm, 0x3, r1, r2);
+    emit_d8(masm,$cnt$$constant);
+    emit_d8(masm,$primary);
+    emit_rm(masm, 0x3, $secondary, r1);
+    emit_d8(masm,$cnt$$constant);
   %}
 
   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
-    emit_opcode( cbuf, 0x8B ); // Move
-    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
+    emit_opcode( masm, 0x8B ); // Move
+    emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
     if( $cnt$$constant > 32 ) { // Shift, if not by zero
-      emit_d8(cbuf,$primary);
-      emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
-      emit_d8(cbuf,$cnt$$constant-32);
+      emit_d8(masm,$primary);
+      emit_rm(masm, 0x3, $secondary, $dst$$reg);
+      emit_d8(masm,$cnt$$constant-32);
     }
-    emit_d8(cbuf,$primary);
-    emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg));
-    emit_d8(cbuf,31);
+    emit_d8(masm,$primary);
+    emit_rm(masm, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg));
+    emit_d8(masm,31);
   %}
 
   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
@@ -2132,28 +2102,28 @@ encode %{
     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 
-    emit_opcode( cbuf, 0x8B ); // Move r1,r2
-    emit_rm(cbuf, 0x3, r1, r2);
+    emit_opcode( masm, 0x8B ); // Move r1,r2
+    emit_rm(masm, 0x3, r1, r2);
     if( $cnt$$constant > 32 ) { // Shift, if not by zero
-      emit_opcode(cbuf,$primary);
-      emit_rm(cbuf, 0x3, $secondary, r1);
-      emit_d8(cbuf,$cnt$$constant-32);
+      emit_opcode(masm,$primary);
+      emit_rm(masm, 0x3, $secondary, r1);
+      emit_d8(masm,$cnt$$constant-32);
     }
-    emit_opcode(cbuf,0x33);  // XOR r2,r2
-    emit_rm(cbuf, 0x3, r2, r2);
+    emit_opcode(masm,0x33);  // XOR r2,r2
+    emit_rm(masm, 0x3, r2, r2);
   %}
 
   // Clone of RegMem but accepts an extra parameter to access each
   // half of a double in memory; it never needs relocation info.
   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
-    emit_opcode(cbuf,$opcode$$constant);
+    emit_opcode(masm,$opcode$$constant);
     int reg_encoding = $rm_reg$$reg;
     int base     = $mem$$base;
     int index    = $mem$$index;
     int scale    = $mem$$scale;
     int displace = $mem$$disp + $disp_for_half$$constant;
     relocInfo::relocType disp_reloc = relocInfo::none;
-    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
+    encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
   %}
 
   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
@@ -2168,7 +2138,7 @@ encode %{
     int scale    = $mem$$scale;
     int displace = $mem$$disp;
     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
-    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
+    encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
   %}
 
   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
@@ -2178,7 +2148,7 @@ encode %{
     int scale    = $mem$$scale;
     int displace = $mem$$disp;
     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
-    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
+    encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
   %}
 
   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
@@ -2188,31 +2158,31 @@ encode %{
     int scale        = 0x00;            // 0x00 indicates no scale
     int displace     = $src1$$constant; // 0x00 indicates no displacement
     relocInfo::relocType disp_reloc = relocInfo::none;
-    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
+    encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
   %}
 
   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
     // Compare dst,src
-    emit_opcode(cbuf,0x3B);
-    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
+    emit_opcode(masm,0x3B);
+    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
     // jmp dst < src around move
-    emit_opcode(cbuf,0x7C);
-    emit_d8(cbuf,2);
+    emit_opcode(masm,0x7C);
+    emit_d8(masm,2);
     // move dst,src
-    emit_opcode(cbuf,0x8B);
-    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
+    emit_opcode(masm,0x8B);
+    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
   %}
 
   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
     // Compare dst,src
-    emit_opcode(cbuf,0x3B);
-    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
+    emit_opcode(masm,0x3B);
+    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
     // jmp dst > src around move
-    emit_opcode(cbuf,0x7F);
-    emit_d8(cbuf,2);
+    emit_opcode(masm,0x7F);
+    emit_d8(masm,2);
     // move dst,src
-    emit_opcode(cbuf,0x8B);
-    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
+    emit_opcode(masm,0x8B);
+    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
   %}
 
   enc_class enc_FPR_store(memory mem, regDPR src) %{
@@ -2226,115 +2196,116 @@ encode %{
     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
     if( $src$$reg != FPR1L_enc ) {
       reg_encoding = 0x3;  // Store & pop
-      emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
-      emit_d8( cbuf, 0xC0-1+$src$$reg );
+      emit_opcode( masm, 0xD9 ); // FLD (i.e., push it)
+      emit_d8( masm, 0xC0-1+$src$$reg );
     }
-    cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
-    emit_opcode(cbuf,$primary);
-    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
+    __ set_inst_mark();       // Mark start of opcode for reloc info in mem operand
+    emit_opcode(masm,$primary);
+    encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
+    __ clear_inst_mark();
   %}
 
   enc_class neg_reg(rRegI dst) %{
     // NEG $dst
-    emit_opcode(cbuf,0xF7);
-    emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
+    emit_opcode(masm,0xF7);
+    emit_rm(masm, 0x3, 0x03, $dst$$reg );
   %}
 
   enc_class setLT_reg(eCXRegI dst) %{
     // SETLT $dst
-    emit_opcode(cbuf,0x0F);
-    emit_opcode(cbuf,0x9C);
-    emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
+    emit_opcode(masm,0x0F);
+    emit_opcode(masm,0x9C);
+    emit_rm( masm, 0x3, 0x4, $dst$$reg );
   %}
 
   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
     int tmpReg = $tmp$$reg;
 
     // SUB $p,$q
-    emit_opcode(cbuf,0x2B);
-    emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
+    emit_opcode(masm,0x2B);
+    emit_rm(masm, 0x3, $p$$reg, $q$$reg);
     // SBB $tmp,$tmp
-    emit_opcode(cbuf,0x1B);
-    emit_rm(cbuf, 0x3, tmpReg, tmpReg);
+    emit_opcode(masm,0x1B);
+    emit_rm(masm, 0x3, tmpReg, tmpReg);
     // AND $tmp,$y
-    emit_opcode(cbuf,0x23);
-    emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
+    emit_opcode(masm,0x23);
+    emit_rm(masm, 0x3, tmpReg, $y$$reg);
     // ADD $p,$tmp
-    emit_opcode(cbuf,0x03);
-    emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
+    emit_opcode(masm,0x03);
+    emit_rm(masm, 0x3, $p$$reg, tmpReg);
   %}
 
   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
     // TEST shift,32
-    emit_opcode(cbuf,0xF7);
-    emit_rm(cbuf, 0x3, 0, ECX_enc);
-    emit_d32(cbuf,0x20);
+    emit_opcode(masm,0xF7);
+    emit_rm(masm, 0x3, 0, ECX_enc);
+    emit_d32(masm,0x20);
     // JEQ,s small
-    emit_opcode(cbuf, 0x74);
-    emit_d8(cbuf, 0x04);
+    emit_opcode(masm, 0x74);
+    emit_d8(masm, 0x04);
     // MOV    $dst.hi,$dst.lo
-    emit_opcode( cbuf, 0x8B );
-    emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
+    emit_opcode( masm, 0x8B );
+    emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
     // CLR    $dst.lo
-    emit_opcode(cbuf, 0x33);
-    emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
+    emit_opcode(masm, 0x33);
+    emit_rm(masm, 0x3, $dst$$reg, $dst$$reg);
 // small:
     // SHLD   $dst.hi,$dst.lo,$shift
-    emit_opcode(cbuf,0x0F);
-    emit_opcode(cbuf,0xA5);
-    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
+    emit_opcode(masm,0x0F);
+    emit_opcode(masm,0xA5);
+    emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
     // SHL    $dst.lo,$shift"
-    emit_opcode(cbuf,0xD3);
-    emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
+    emit_opcode(masm,0xD3);
+    emit_rm(masm, 0x3, 0x4, $dst$$reg );
   %}
 
   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
     // TEST shift,32
-    emit_opcode(cbuf,0xF7);
-    emit_rm(cbuf, 0x3, 0, ECX_enc);
-    emit_d32(cbuf,0x20);
+    emit_opcode(masm,0xF7);
+    emit_rm(masm, 0x3, 0, ECX_enc);
+    emit_d32(masm,0x20);
     // JEQ,s small
-    emit_opcode(cbuf, 0x74);
-    emit_d8(cbuf, 0x04);
+    emit_opcode(masm, 0x74);
+    emit_d8(masm, 0x04);
     // MOV    $dst.lo,$dst.hi
-    emit_opcode( cbuf, 0x8B );
-    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
+    emit_opcode( masm, 0x8B );
+    emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
     // CLR    $dst.hi
-    emit_opcode(cbuf, 0x33);
-    emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg));
+    emit_opcode(masm, 0x33);
+    emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg));
 // small:
     // SHRD   $dst.lo,$dst.hi,$shift
-    emit_opcode(cbuf,0x0F);
-    emit_opcode(cbuf,0xAD);
-    emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
+    emit_opcode(masm,0x0F);
+    emit_opcode(masm,0xAD);
+    emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
     // SHR    $dst.hi,$shift"
-    emit_opcode(cbuf,0xD3);
-    emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) );
+    emit_opcode(masm,0xD3);
+    emit_rm(masm, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) );
   %}
 
   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
     // TEST shift,32
-    emit_opcode(cbuf,0xF7);
-    emit_rm(cbuf, 0x3, 0, ECX_enc);
-    emit_d32(cbuf,0x20);
+    emit_opcode(masm,0xF7);
+    emit_rm(masm, 0x3, 0, ECX_enc);
+    emit_d32(masm,0x20);
     // JEQ,s small
-    emit_opcode(cbuf, 0x74);
-    emit_d8(cbuf, 0x05);
+    emit_opcode(masm, 0x74);
+    emit_d8(masm, 0x05);
     // MOV    $dst.lo,$dst.hi
-    emit_opcode( cbuf, 0x8B );
-    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
+    emit_opcode( masm, 0x8B );
+    emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
     // SAR    $dst.hi,31
-    emit_opcode(cbuf, 0xC1);
-    emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) );
-    emit_d8(cbuf, 0x1F );
+    emit_opcode(masm, 0xC1);
+    emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) );
+    emit_d8(masm, 0x1F );
 // small:
     // SHRD   $dst.lo,$dst.hi,$shift
-    emit_opcode(cbuf,0x0F);
-    emit_opcode(cbuf,0xAD);
-    emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
+    emit_opcode(masm,0x0F);
+    emit_opcode(masm,0xAD);
+    emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
     // SAR    $dst.hi,$shift"
-    emit_opcode(cbuf,0xD3);
-    emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) );
+    emit_opcode(masm,0xD3);
+    emit_rm(masm, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) );
   %}
 
 
@@ -2342,136 +2313,135 @@ encode %{
   // May leave result in FPU-TOS or FPU reg depending on opcodes
   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
     $$$emit8$primary;
-    emit_rm(cbuf, 0x3, $secondary, $src$$reg );
+    emit_rm(masm, 0x3, $secondary, $src$$reg );
   %}
 
   // Pop argument in FPR0 with FSTP ST(0)
   enc_class PopFPU() %{
-    emit_opcode( cbuf, 0xDD );
-    emit_d8( cbuf, 0xD8 );
+    emit_opcode( masm, 0xDD );
+    emit_d8( masm, 0xD8 );
   %}
 
   // !!!!! equivalent to Pop_Reg_F
   enc_class Pop_Reg_DPR( regDPR dst ) %{
-    emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
-    emit_d8( cbuf, 0xD8+$dst$$reg );
+    emit_opcode( masm, 0xDD );           // FSTP   ST(i)
+    emit_d8( masm, 0xD8+$dst$$reg );
   %}
 
   enc_class Push_Reg_DPR( regDPR dst ) %{
-    emit_opcode( cbuf, 0xD9 );
-    emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
+    emit_opcode( masm, 0xD9 );
+    emit_d8( masm, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
   %}
 
   enc_class strictfp_bias1( regDPR dst ) %{
-    emit_opcode( cbuf, 0xDB );           // FLD m80real
-    emit_opcode( cbuf, 0x2D );
-    emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
-    emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
-    emit_opcode( cbuf, 0xC8+$dst$$reg );
+    emit_opcode( masm, 0xDB );           // FLD m80real
+    emit_opcode( masm, 0x2D );
+    emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
+    emit_opcode( masm, 0xDE );           // FMULP ST(dst), ST0
+    emit_opcode( masm, 0xC8+$dst$$reg );
   %}
 
   enc_class strictfp_bias2( regDPR dst ) %{
-    emit_opcode( cbuf, 0xDB );           // FLD m80real
-    emit_opcode( cbuf, 0x2D );
-    emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
-    emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
-    emit_opcode( cbuf, 0xC8+$dst$$reg );
+    emit_opcode( masm, 0xDB );           // FLD m80real
+    emit_opcode( masm, 0x2D );
+    emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
+    emit_opcode( masm, 0xDE );           // FMULP ST(dst), ST0
+    emit_opcode( masm, 0xC8+$dst$$reg );
   %}
 
   // Special case for moving an integer register to a stack slot.
   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
-    store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
+    store_to_stackslot( masm, $primary, $src$$reg, $dst$$disp );
   %}
 
   // Special case for moving a register to a stack slot.
   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
     // Opcode already emitted
-    emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
-    emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
-    emit_d32(cbuf, $dst$$disp);   // Displacement
+    emit_rm( masm, 0x02, $src$$reg, ESP_enc );   // R/M byte
+    emit_rm( masm, 0x00, ESP_enc, ESP_enc);          // SIB byte
+    emit_d32(masm, $dst$$disp);   // Displacement
   %}
 
   // Push the integer in stackSlot 'src' onto FP-stack
   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
-    store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
+    store_to_stackslot( masm, $primary, $secondary, $src$$disp );
   %}
 
   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
-    store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
+    store_to_stackslot( masm, 0xD9, 0x03, $dst$$disp );
   %}
 
   // Same as Pop_Mem_F except for opcode
   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
-    store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
+    store_to_stackslot( masm, 0xDD, 0x03, $dst$$disp );
   %}
 
   enc_class Pop_Reg_FPR( regFPR dst ) %{
-    emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
-    emit_d8( cbuf, 0xD8+$dst$$reg );
+    emit_opcode( masm, 0xDD );           // FSTP   ST(i)
+    emit_d8( masm, 0xD8+$dst$$reg );
   %}
 
   enc_class Push_Reg_FPR( regFPR dst ) %{
-    emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
-    emit_d8( cbuf, 0xC0-1+$dst$$reg );
+    emit_opcode( masm, 0xD9 );           // FLD    ST(i-1)
+    emit_d8( masm, 0xC0-1+$dst$$reg );
   %}
 
   // Push FPU's float to a stack-slot, and pop FPU-stack
   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
     int pop = 0x02;
     if ($src$$reg != FPR1L_enc) {
-      emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
-      emit_d8( cbuf, 0xC0-1+$src$$reg );
+      emit_opcode( masm, 0xD9 );         // FLD    ST(i-1)
+      emit_d8( masm, 0xC0-1+$src$$reg );
       pop = 0x03;
     }
-    store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
+    store_to_stackslot( masm, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
   %}
 
   // Push FPU's double to a stack-slot, and pop FPU-stack
   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
     int pop = 0x02;
     if ($src$$reg != FPR1L_enc) {
-      emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
-      emit_d8( cbuf, 0xC0-1+$src$$reg );
+      emit_opcode( masm, 0xD9 );         // FLD    ST(i-1)
+      emit_d8( masm, 0xC0-1+$src$$reg );
       pop = 0x03;
     }
-    store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
+    store_to_stackslot( masm, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
   %}
 
   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
     int pop = 0xD0 - 1; // -1 since we skip FLD
     if ($src$$reg != FPR1L_enc) {
-      emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
-      emit_d8( cbuf, 0xC0-1+$src$$reg );
+      emit_opcode( masm, 0xD9 );         // FLD    ST(src-1)
+      emit_d8( masm, 0xC0-1+$src$$reg );
       pop = 0xD8;
     }
-    emit_opcode( cbuf, 0xDD );
-    emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
+    emit_opcode( masm, 0xDD );
+    emit_d8( masm, pop+$dst$$reg );      // FST<P> ST(i)
   %}
 
 
   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
     // load dst in FPR0
-    emit_opcode( cbuf, 0xD9 );
-    emit_d8( cbuf, 0xC0-1+$dst$$reg );
+    emit_opcode( masm, 0xD9 );
+    emit_d8( masm, 0xC0-1+$dst$$reg );
     if ($src$$reg != FPR1L_enc) {
       // fincstp
-      emit_opcode (cbuf, 0xD9);
-      emit_opcode (cbuf, 0xF7);
+      emit_opcode (masm, 0xD9);
+      emit_opcode (masm, 0xF7);
       // swap src with FPR1:
       // FXCH FPR1 with src
-      emit_opcode(cbuf, 0xD9);
-      emit_d8(cbuf, 0xC8-1+$src$$reg );
+      emit_opcode(masm, 0xD9);
+      emit_d8(masm, 0xC8-1+$src$$reg );
       // fdecstp
-      emit_opcode (cbuf, 0xD9);
-      emit_opcode (cbuf, 0xF6);
+      emit_opcode (masm, 0xD9);
+      emit_opcode (masm, 0xF6);
     }
   %}
 
   enc_class Push_ModD_encoding(regD src0, regD src1) %{
-    MacroAssembler _masm(&cbuf);
     __ subptr(rsp, 8);
     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
     __ fld_d(Address(rsp, 0));
@@ -2480,7 +2450,6 @@ encode %{
   %}
 
   enc_class Push_ModF_encoding(regF src0, regF src1) %{
-    MacroAssembler _masm(&cbuf);
     __ subptr(rsp, 4);
     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
     __ fld_s(Address(rsp, 0));
@@ -2489,38 +2458,32 @@ encode %{
   %}
 
   enc_class Push_ResultD(regD dst) %{
-    MacroAssembler _masm(&cbuf);
     __ fstp_d(Address(rsp, 0));
     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
     __ addptr(rsp, 8);
   %}
 
   enc_class Push_ResultF(regF dst, immI d8) %{
-    MacroAssembler _masm(&cbuf);
     __ fstp_s(Address(rsp, 0));
     __ movflt($dst$$XMMRegister, Address(rsp, 0));
     __ addptr(rsp, $d8$$constant);
   %}
 
   enc_class Push_SrcD(regD src) %{
-    MacroAssembler _masm(&cbuf);
     __ subptr(rsp, 8);
     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
     __ fld_d(Address(rsp, 0));
   %}
 
   enc_class push_stack_temp_qword() %{
-    MacroAssembler _masm(&cbuf);
     __ subptr(rsp, 8);
   %}
 
   enc_class pop_stack_temp_qword() %{
-    MacroAssembler _masm(&cbuf);
     __ addptr(rsp, 8);
   %}
 
   enc_class push_xmm_to_fpr1(regD src) %{
-    MacroAssembler _masm(&cbuf);
     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
     __ fld_d(Address(rsp, 0));
   %}
@@ -2528,90 +2491,86 @@ encode %{
   enc_class Push_Result_Mod_DPR( regDPR src) %{
     if ($src$$reg != FPR1L_enc) {
       // fincstp
-      emit_opcode (cbuf, 0xD9);
-      emit_opcode (cbuf, 0xF7);
+      emit_opcode (masm, 0xD9);
+      emit_opcode (masm, 0xF7);
       // FXCH FPR1 with src
-      emit_opcode(cbuf, 0xD9);
-      emit_d8(cbuf, 0xC8-1+$src$$reg );
+      emit_opcode(masm, 0xD9);
+      emit_d8(masm, 0xC8-1+$src$$reg );
       // fdecstp
-      emit_opcode (cbuf, 0xD9);
-      emit_opcode (cbuf, 0xF6);
+      emit_opcode (masm, 0xD9);
+      emit_opcode (masm, 0xF6);
     }
-    // // following asm replaced with Pop_Reg_F or Pop_Mem_F
-    // // FSTP   FPR$dst$$reg
-    // emit_opcode( cbuf, 0xDD );
-    // emit_d8( cbuf, 0xD8+$dst$$reg );
   %}
 
   enc_class fnstsw_sahf_skip_parity() %{
     // fnstsw ax
-    emit_opcode( cbuf, 0xDF );
-    emit_opcode( cbuf, 0xE0 );
+    emit_opcode( masm, 0xDF );
+    emit_opcode( masm, 0xE0 );
     // sahf
-    emit_opcode( cbuf, 0x9E );
+    emit_opcode( masm, 0x9E );
     // jnp  ::skip
-    emit_opcode( cbuf, 0x7B );
-    emit_opcode( cbuf, 0x05 );
+    emit_opcode( masm, 0x7B );
+    emit_opcode( masm, 0x05 );
   %}
 
   enc_class emitModDPR() %{
     // fprem must be iterative
     // :: loop
     // fprem
-    emit_opcode( cbuf, 0xD9 );
-    emit_opcode( cbuf, 0xF8 );
+    emit_opcode( masm, 0xD9 );
+    emit_opcode( masm, 0xF8 );
     // wait
-    emit_opcode( cbuf, 0x9b );
+    emit_opcode( masm, 0x9b );
     // fnstsw ax
-    emit_opcode( cbuf, 0xDF );
-    emit_opcode( cbuf, 0xE0 );
+    emit_opcode( masm, 0xDF );
+    emit_opcode( masm, 0xE0 );
     // sahf
-    emit_opcode( cbuf, 0x9E );
+    emit_opcode( masm, 0x9E );
     // jp  ::loop
-    emit_opcode( cbuf, 0x0F );
-    emit_opcode( cbuf, 0x8A );
-    emit_opcode( cbuf, 0xF4 );
-    emit_opcode( cbuf, 0xFF );
-    emit_opcode( cbuf, 0xFF );
-    emit_opcode( cbuf, 0xFF );
+    emit_opcode( masm, 0x0F );
+    emit_opcode( masm, 0x8A );
+    emit_opcode( masm, 0xF4 );
+    emit_opcode( masm, 0xFF );
+    emit_opcode( masm, 0xFF );
+    emit_opcode( masm, 0xFF );
   %}
 
   enc_class fpu_flags() %{
     // fnstsw_ax
-    emit_opcode( cbuf, 0xDF);
-    emit_opcode( cbuf, 0xE0);
+    emit_opcode( masm, 0xDF);
+    emit_opcode( masm, 0xE0);
     // test ax,0x0400
-    emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
-    emit_opcode( cbuf, 0xA9 );
-    emit_d16   ( cbuf, 0x0400 );
+    emit_opcode( masm, 0x66 );   // operand-size prefix for 16-bit immediate
+    emit_opcode( masm, 0xA9 );
+    emit_d16   ( masm, 0x0400 );
     // // // This sequence works, but stalls for 12-16 cycles on PPro
     // // test rax,0x0400
-    // emit_opcode( cbuf, 0xA9 );
-    // emit_d32   ( cbuf, 0x00000400 );
+    // emit_opcode( masm, 0xA9 );
+    // emit_d32   ( masm, 0x00000400 );
     //
     // jz exit (no unordered comparison)
-    emit_opcode( cbuf, 0x74 );
-    emit_d8    ( cbuf, 0x02 );
+    emit_opcode( masm, 0x74 );
+    emit_d8    ( masm, 0x02 );
     // mov ah,1 - treat as LT case (set carry flag)
-    emit_opcode( cbuf, 0xB4 );
-    emit_d8    ( cbuf, 0x01 );
+    emit_opcode( masm, 0xB4 );
+    emit_d8    ( masm, 0x01 );
     // sahf
-    emit_opcode( cbuf, 0x9E);
+    emit_opcode( masm, 0x9E);
   %}
 
   enc_class cmpF_P6_fixup() %{
     // Fixup the integer flags in case comparison involved a NaN
     //
     // JNP exit (no unordered comparison, P-flag is set by NaN)
-    emit_opcode( cbuf, 0x7B );
-    emit_d8    ( cbuf, 0x03 );
+    emit_opcode( masm, 0x7B );
+    emit_d8    ( masm, 0x03 );
     // MOV AH,1 - treat as LT case (set carry flag)
-    emit_opcode( cbuf, 0xB4 );
-    emit_d8    ( cbuf, 0x01 );
+    emit_opcode( masm, 0xB4 );
+    emit_d8    ( masm, 0x01 );
     // SAHF
-    emit_opcode( cbuf, 0x9E);
+    emit_opcode( masm, 0x9E);
     // NOP     // target for branch to avoid branch to branch
-    emit_opcode( cbuf, 0x90);
+    emit_opcode( masm, 0x90);
   %}
 
 //     fnstsw_ax();
@@ -2631,31 +2590,31 @@ encode %{
 
   enc_class CmpF_Result(rRegI dst) %{
     // fnstsw_ax();
-    emit_opcode( cbuf, 0xDF);
-    emit_opcode( cbuf, 0xE0);
+    emit_opcode( masm, 0xDF);
+    emit_opcode( masm, 0xE0);
     // sahf
-    emit_opcode( cbuf, 0x9E);
+    emit_opcode( masm, 0x9E);
     // movl(dst, nan_result);
-    emit_opcode( cbuf, 0xB8 + $dst$$reg);
-    emit_d32( cbuf, -1 );
+    emit_opcode( masm, 0xB8 + $dst$$reg);
+    emit_d32( masm, -1 );
     // jcc(Assembler::parity, exit);
-    emit_opcode( cbuf, 0x7A );
-    emit_d8    ( cbuf, 0x13 );
+    emit_opcode( masm, 0x7A );
+    emit_d8    ( masm, 0x13 );
     // movl(dst, less_result);
-    emit_opcode( cbuf, 0xB8 + $dst$$reg);
-    emit_d32( cbuf, -1 );
+    emit_opcode( masm, 0xB8 + $dst$$reg);
+    emit_d32( masm, -1 );
     // jcc(Assembler::below, exit);
-    emit_opcode( cbuf, 0x72 );
-    emit_d8    ( cbuf, 0x0C );
+    emit_opcode( masm, 0x72 );
+    emit_d8    ( masm, 0x0C );
     // movl(dst, equal_result);
-    emit_opcode( cbuf, 0xB8 + $dst$$reg);
-    emit_d32( cbuf, 0 );
+    emit_opcode( masm, 0xB8 + $dst$$reg);
+    emit_d32( masm, 0 );
     // jcc(Assembler::equal, exit);
-    emit_opcode( cbuf, 0x74 );
-    emit_d8    ( cbuf, 0x05 );
+    emit_opcode( masm, 0x74 );
+    emit_d8    ( masm, 0x05 );
     // movl(dst, greater_result);
-    emit_opcode( cbuf, 0xB8 + $dst$$reg);
-    emit_d32( cbuf, 1 );
+    emit_opcode( masm, 0xB8 + $dst$$reg);
+    emit_d32( masm, 1 );
   %}
 
 
@@ -2663,14 +2622,14 @@ encode %{
   // BROKEN!  Do Not use as-is
   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
     // CMP    $src1.hi,$src2.hi
-    emit_opcode( cbuf, 0x3B );
-    emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
+    emit_opcode( masm, 0x3B );
+    emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
     // JNE,s  done
-    emit_opcode(cbuf,0x75);
-    emit_d8(cbuf, 2 );
+    emit_opcode(masm,0x75);
+    emit_d8(masm, 2 );
     // CMP    $src1.lo,$src2.lo
-    emit_opcode( cbuf, 0x3B );
-    emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
+    emit_opcode( masm, 0x3B );
+    emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
 // done:
   %}
 
@@ -2678,223 +2637,223 @@ encode %{
     // mov $dst.lo,$src
     int dst_encoding = $dst$$reg;
     int src_encoding = $src$$reg;
-    encode_Copy( cbuf, dst_encoding  , src_encoding );
+    encode_Copy( masm, dst_encoding  , src_encoding );
     // mov $dst.hi,$src
-    encode_Copy( cbuf, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding );
+    encode_Copy( masm, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding );
     // sar $dst.hi,31
-    emit_opcode( cbuf, 0xC1 );
-    emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) );
-    emit_d8(cbuf, 0x1F );
+    emit_opcode( masm, 0xC1 );
+    emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) );
+    emit_d8(masm, 0x1F );
   %}
 
   enc_class convert_long_double( eRegL src ) %{
     // push $src.hi
-    emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
+    emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
     // push $src.lo
-    emit_opcode(cbuf, 0x50+$src$$reg  );
+    emit_opcode(masm, 0x50+$src$$reg  );
     // fild 64-bits at [SP]
-    emit_opcode(cbuf,0xdf);
-    emit_d8(cbuf, 0x6C);
-    emit_d8(cbuf, 0x24);
-    emit_d8(cbuf, 0x00);
+    emit_opcode(masm,0xdf);
+    emit_d8(masm, 0x6C);
+    emit_d8(masm, 0x24);
+    emit_d8(masm, 0x00);
     // pop stack
-    emit_opcode(cbuf, 0x83); // add  SP, #8
-    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
-    emit_d8(cbuf, 0x8);
+    emit_opcode(masm, 0x83); // add  SP, #8
+    emit_rm(masm, 0x3, 0x00, ESP_enc);
+    emit_d8(masm, 0x8);
   %}
 
   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
     // IMUL   EDX:EAX,$src1
-    emit_opcode( cbuf, 0xF7 );
-    emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
+    emit_opcode( masm, 0xF7 );
+    emit_rm( masm, 0x3, 0x5, $src1$$reg );
     // SAR    EDX,$cnt-32
     int shift_count = ((int)$cnt$$constant) - 32;
     if (shift_count > 0) {
-      emit_opcode(cbuf, 0xC1);
-      emit_rm(cbuf, 0x3, 7, $dst$$reg );
-      emit_d8(cbuf, shift_count);
+      emit_opcode(masm, 0xC1);
+      emit_rm(masm, 0x3, 7, $dst$$reg );
+      emit_d8(masm, shift_count);
     }
   %}
 
   // this version doesn't have add sp, 8
   enc_class convert_long_double2( eRegL src ) %{
     // push $src.hi
-    emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
+    emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
     // push $src.lo
-    emit_opcode(cbuf, 0x50+$src$$reg  );
+    emit_opcode(masm, 0x50+$src$$reg  );
     // fild 64-bits at [SP]
-    emit_opcode(cbuf,0xdf);
-    emit_d8(cbuf, 0x6C);
-    emit_d8(cbuf, 0x24);
-    emit_d8(cbuf, 0x00);
+    emit_opcode(masm,0xdf);
+    emit_d8(masm, 0x6C);
+    emit_d8(masm, 0x24);
+    emit_d8(masm, 0x00);
   %}
 
   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
     // Basic idea: long = (long)int * (long)int
     // IMUL EDX:EAX, src
-    emit_opcode( cbuf, 0xF7 );
-    emit_rm( cbuf, 0x3, 0x5, $src$$reg);
+    emit_opcode( masm, 0xF7 );
+    emit_rm( masm, 0x3, 0x5, $src$$reg);
   %}
 
   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
     // MUL EDX:EAX, src
-    emit_opcode( cbuf, 0xF7 );
-    emit_rm( cbuf, 0x3, 0x4, $src$$reg);
+    emit_opcode( masm, 0xF7 );
+    emit_rm( masm, 0x3, 0x4, $src$$reg);
   %}
 
   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
     // Basic idea: lo(result) = lo(x_lo * y_lo)
     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
     // MOV    $tmp,$src.lo
-    encode_Copy( cbuf, $tmp$$reg, $src$$reg );
+    encode_Copy( masm, $tmp$$reg, $src$$reg );
     // IMUL   $tmp,EDX
-    emit_opcode( cbuf, 0x0F );
-    emit_opcode( cbuf, 0xAF );
-    emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
+    emit_opcode( masm, 0x0F );
+    emit_opcode( masm, 0xAF );
+    emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
     // MOV    EDX,$src.hi
-    encode_Copy( cbuf, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) );
+    encode_Copy( masm, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) );
     // IMUL   EDX,EAX
-    emit_opcode( cbuf, 0x0F );
-    emit_opcode( cbuf, 0xAF );
-    emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
+    emit_opcode( masm, 0x0F );
+    emit_opcode( masm, 0xAF );
+    emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
     // ADD    $tmp,EDX
-    emit_opcode( cbuf, 0x03 );
-    emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
+    emit_opcode( masm, 0x03 );
+    emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
     // MUL   EDX:EAX,$src.lo
-    emit_opcode( cbuf, 0xF7 );
-    emit_rm( cbuf, 0x3, 0x4, $src$$reg );
+    emit_opcode( masm, 0xF7 );
+    emit_rm( masm, 0x3, 0x4, $src$$reg );
     // ADD    EDX,ESI
-    emit_opcode( cbuf, 0x03 );
-    emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg );
+    emit_opcode( masm, 0x03 );
+    emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg );
   %}
 
   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
     // Basic idea: lo(result) = lo(src * y_lo)
     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
     // IMUL   $tmp,EDX,$src
-    emit_opcode( cbuf, 0x6B );
-    emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
-    emit_d8( cbuf, (int)$src$$constant );
+    emit_opcode( masm, 0x6B );
+    emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
+    emit_d8( masm, (int)$src$$constant );
     // MOV    EDX,$src
-    emit_opcode(cbuf, 0xB8 + EDX_enc);
-    emit_d32( cbuf, (int)$src$$constant );
+    emit_opcode(masm, 0xB8 + EDX_enc);
+    emit_d32( masm, (int)$src$$constant );
     // MUL   EDX:EAX,EDX
-    emit_opcode( cbuf, 0xF7 );
-    emit_rm( cbuf, 0x3, 0x4, EDX_enc );
+    emit_opcode( masm, 0xF7 );
+    emit_rm( masm, 0x3, 0x4, EDX_enc );
     // ADD    EDX,ESI
-    emit_opcode( cbuf, 0x03 );
-    emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
+    emit_opcode( masm, 0x03 );
+    emit_rm( masm, 0x3, EDX_enc, $tmp$$reg );
   %}
 
   enc_class long_div( eRegL src1, eRegL src2 ) %{
     // PUSH src1.hi
-    emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
+    emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
     // PUSH src1.lo
-    emit_opcode(cbuf,               0x50+$src1$$reg  );
+    emit_opcode(masm,               0x50+$src1$$reg  );
     // PUSH src2.hi
-    emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
+    emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
     // PUSH src2.lo
-    emit_opcode(cbuf,               0x50+$src2$$reg  );
+    emit_opcode(masm,               0x50+$src2$$reg  );
     // CALL directly to the runtime
-    MacroAssembler _masm(&cbuf);
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf,0xE8);       // Call into runtime
-    emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
+    __ set_inst_mark();
+    emit_opcode(masm,0xE8);       // Call into runtime
+    emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
+    __ clear_inst_mark();
     __ post_call_nop();
     // Restore stack
-    emit_opcode(cbuf, 0x83); // add  SP, #framesize
-    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
-    emit_d8(cbuf, 4*4);
+    emit_opcode(masm, 0x83); // add  SP, #framesize
+    emit_rm(masm, 0x3, 0x00, ESP_enc);
+    emit_d8(masm, 4*4);
   %}
 
   enc_class long_mod( eRegL src1, eRegL src2 ) %{
     // PUSH src1.hi
-    emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
+    emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
     // PUSH src1.lo
-    emit_opcode(cbuf,               0x50+$src1$$reg  );
+    emit_opcode(masm,               0x50+$src1$$reg  );
     // PUSH src2.hi
-    emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
+    emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
     // PUSH src2.lo
-    emit_opcode(cbuf,               0x50+$src2$$reg  );
+    emit_opcode(masm,               0x50+$src2$$reg  );
     // CALL directly to the runtime
-    MacroAssembler _masm(&cbuf);
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf,0xE8);       // Call into runtime
-    emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
+    __ set_inst_mark();
+    emit_opcode(masm,0xE8);       // Call into runtime
+    emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
+    __ clear_inst_mark();
     __ post_call_nop();
     // Restore stack
-    emit_opcode(cbuf, 0x83); // add  SP, #framesize
-    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
-    emit_d8(cbuf, 4*4);
+    emit_opcode(masm, 0x83); // add  SP, #framesize
+    emit_rm(masm, 0x3, 0x00, ESP_enc);
+    emit_d8(masm, 4*4);
   %}
 
   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
     // MOV   $tmp,$src.lo
-    emit_opcode(cbuf, 0x8B);
-    emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
+    emit_opcode(masm, 0x8B);
+    emit_rm(masm, 0x3, $tmp$$reg, $src$$reg);
     // OR    $tmp,$src.hi
-    emit_opcode(cbuf, 0x0B);
-    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg));
+    emit_opcode(masm, 0x0B);
+    emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg));
   %}
 
   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
     // CMP    $src1.lo,$src2.lo
-    emit_opcode( cbuf, 0x3B );
-    emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
+    emit_opcode( masm, 0x3B );
+    emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
     // JNE,s  skip
-    emit_cc(cbuf, 0x70, 0x5);
-    emit_d8(cbuf,2);
+    emit_cc(masm, 0x70, 0x5);
+    emit_d8(masm,2);
     // CMP    $src1.hi,$src2.hi
-    emit_opcode( cbuf, 0x3B );
-    emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
+    emit_opcode( masm, 0x3B );
+    emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
   %}
 
   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
-    emit_opcode( cbuf, 0x3B );
-    emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
+    emit_opcode( masm, 0x3B );
+    emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
     // MOV    $tmp,$src1.hi
-    emit_opcode( cbuf, 0x8B );
-    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) );
+    emit_opcode( masm, 0x8B );
+    emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) );
     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
-    emit_opcode( cbuf, 0x1B );
-    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) );
+    emit_opcode( masm, 0x1B );
+    emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) );
   %}
 
   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
     // XOR    $tmp,$tmp
-    emit_opcode(cbuf,0x33);  // XOR
-    emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
+    emit_opcode(masm,0x33);  // XOR
+    emit_rm(masm,0x3, $tmp$$reg, $tmp$$reg);
     // CMP    $tmp,$src.lo
-    emit_opcode( cbuf, 0x3B );
-    emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
+    emit_opcode( masm, 0x3B );
+    emit_rm(masm, 0x3, $tmp$$reg, $src$$reg );
     // SBB    $tmp,$src.hi
-    emit_opcode( cbuf, 0x1B );
-    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) );
+    emit_opcode( masm, 0x1B );
+    emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) );
   %}
 
  // Sniff, sniff... smells like Gnu Superoptimizer
   enc_class neg_long( eRegL dst ) %{
-    emit_opcode(cbuf,0xF7);    // NEG hi
-    emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
-    emit_opcode(cbuf,0xF7);    // NEG lo
-    emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
-    emit_opcode(cbuf,0x83);    // SBB hi,0
-    emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
-    emit_d8    (cbuf,0 );
+    emit_opcode(masm,0xF7);    // NEG hi
+    emit_rm    (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
+    emit_opcode(masm,0xF7);    // NEG lo
+    emit_rm    (masm,0x3, 0x3,               $dst$$reg );
+    emit_opcode(masm,0x83);    // SBB hi,0
+    emit_rm    (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
+    emit_d8    (masm,0 );
   %}
 
   enc_class enc_pop_rdx() %{
-    emit_opcode(cbuf,0x5A);
+    emit_opcode(masm,0x5A);
   %}
 
   enc_class enc_rethrow() %{
-    MacroAssembler _masm(&cbuf);
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf, 0xE9);        // jmp    entry
-    emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
+    __ set_inst_mark();
+    emit_opcode(masm, 0xE9);        // jmp    entry
+    emit_d32_reloc(masm, (int)OptoRuntime::rethrow_stub() - ((int)__ pc())-4,
                    runtime_call_Relocation::spec(), RELOC_IMM32 );
+    __ clear_inst_mark();
     __ post_call_nop();
   %}
 
@@ -2912,83 +2871,83 @@ encode %{
     // invalid-op exceptions hanging.  We would have to clear them before
     // enabling them and that is more expensive than just testing for the
     // invalid value Intel stores down in the corner cases.
-    emit_opcode(cbuf,0xD9);            // FLDCW  trunc
-    emit_opcode(cbuf,0x2D);
-    emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
+    emit_opcode(masm,0xD9);            // FLDCW  trunc
+    emit_opcode(masm,0x2D);
+    emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
     // Allocate a word
-    emit_opcode(cbuf,0x83);            // SUB ESP,4
-    emit_opcode(cbuf,0xEC);
-    emit_d8(cbuf,0x04);
+    emit_opcode(masm,0x83);            // SUB ESP,4
+    emit_opcode(masm,0xEC);
+    emit_d8(masm,0x04);
     // Encoding assumes a double has been pushed into FPR0.
     // Store down the double as an int, popping the FPU stack
-    emit_opcode(cbuf,0xDB);            // FISTP [ESP]
-    emit_opcode(cbuf,0x1C);
-    emit_d8(cbuf,0x24);
+    emit_opcode(masm,0xDB);            // FISTP [ESP]
+    emit_opcode(masm,0x1C);
+    emit_d8(masm,0x24);
     // Restore the rounding mode; mask the exception
-    emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
-    emit_opcode(cbuf,0x2D);
-    emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
+    emit_opcode(masm,0xD9);            // FLDCW   std/24-bit mode
+    emit_opcode(masm,0x2D);
+    emit_d32( masm, Compile::current()->in_24_bit_fp_mode()
         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 
     // Load the converted int; adjust CPU stack
-    emit_opcode(cbuf,0x58);       // POP EAX
-    emit_opcode(cbuf,0x3D);       // CMP EAX,imm
-    emit_d32   (cbuf,0x80000000); //         0x80000000
-    emit_opcode(cbuf,0x75);       // JNE around_slow_call
-    emit_d8    (cbuf,0x07);       // Size of slow_call
+    emit_opcode(masm,0x58);       // POP EAX
+    emit_opcode(masm,0x3D);       // CMP EAX,imm
+    emit_d32   (masm,0x80000000); //         0x80000000
+    emit_opcode(masm,0x75);       // JNE around_slow_call
+    emit_d8    (masm,0x07);       // Size of slow_call
     // Push src onto stack slow-path
-    emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
-    emit_d8    (cbuf,0xC0-1+$src$$reg );
+    emit_opcode(masm,0xD9 );      // FLD     ST(i)
+    emit_d8    (masm,0xC0-1+$src$$reg );
     // CALL directly to the runtime
-    MacroAssembler _masm(&cbuf);
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf,0xE8);       // Call into runtime
-    emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
+    __ set_inst_mark();
+    emit_opcode(masm,0xE8);       // Call into runtime
+    emit_d32_reloc(masm, (StubRoutines::x86::d2i_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
+    __ clear_inst_mark();
     __ post_call_nop();
     // Carry on here...
   %}
 
   enc_class DPR2L_encoding( regDPR src ) %{
-    emit_opcode(cbuf,0xD9);            // FLDCW  trunc
-    emit_opcode(cbuf,0x2D);
-    emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
+    emit_opcode(masm,0xD9);            // FLDCW  trunc
+    emit_opcode(masm,0x2D);
+    emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
     // Allocate a word
-    emit_opcode(cbuf,0x83);            // SUB ESP,8
-    emit_opcode(cbuf,0xEC);
-    emit_d8(cbuf,0x08);
+    emit_opcode(masm,0x83);            // SUB ESP,8
+    emit_opcode(masm,0xEC);
+    emit_d8(masm,0x08);
     // Encoding assumes a double has been pushed into FPR0.
     // Store down the double as a long, popping the FPU stack
-    emit_opcode(cbuf,0xDF);            // FISTP [ESP]
-    emit_opcode(cbuf,0x3C);
-    emit_d8(cbuf,0x24);
+    emit_opcode(masm,0xDF);            // FISTP [ESP]
+    emit_opcode(masm,0x3C);
+    emit_d8(masm,0x24);
     // Restore the rounding mode; mask the exception
-    emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
-    emit_opcode(cbuf,0x2D);
-    emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
+    emit_opcode(masm,0xD9);            // FLDCW   std/24-bit mode
+    emit_opcode(masm,0x2D);
+    emit_d32( masm, Compile::current()->in_24_bit_fp_mode()
         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 
     // Load the converted int; adjust CPU stack
-    emit_opcode(cbuf,0x58);       // POP EAX
-    emit_opcode(cbuf,0x5A);       // POP EDX
-    emit_opcode(cbuf,0x81);       // CMP EDX,imm
-    emit_d8    (cbuf,0xFA);       // rdx
-    emit_d32   (cbuf,0x80000000); //         0x80000000
-    emit_opcode(cbuf,0x75);       // JNE around_slow_call
-    emit_d8    (cbuf,0x07+4);     // Size of slow_call
-    emit_opcode(cbuf,0x85);       // TEST EAX,EAX
-    emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
-    emit_opcode(cbuf,0x75);       // JNE around_slow_call
-    emit_d8    (cbuf,0x07);       // Size of slow_call
+    emit_opcode(masm,0x58);       // POP EAX
+    emit_opcode(masm,0x5A);       // POP EDX
+    emit_opcode(masm,0x81);       // CMP EDX,imm
+    emit_d8    (masm,0xFA);       // rdx
+    emit_d32   (masm,0x80000000); //         0x80000000
+    emit_opcode(masm,0x75);       // JNE around_slow_call
+    emit_d8    (masm,0x07+4);     // Size of slow_call
+    emit_opcode(masm,0x85);       // TEST EAX,EAX
+    emit_opcode(masm,0xC0);       // 2/rax,/rax,
+    emit_opcode(masm,0x75);       // JNE around_slow_call
+    emit_d8    (masm,0x07);       // Size of slow_call
     // Push src onto stack slow-path
-    emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
-    emit_d8    (cbuf,0xC0-1+$src$$reg );
+    emit_opcode(masm,0xD9 );      // FLD     ST(i)
+    emit_d8    (masm,0xC0-1+$src$$reg );
     // CALL directly to the runtime
-    MacroAssembler _masm(&cbuf);
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf,0xE8);       // Call into runtime
-    emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
+    __ set_inst_mark();
+    emit_opcode(masm,0xE8);       // Call into runtime
+    emit_d32_reloc(masm, (StubRoutines::x86::d2l_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
+    __ clear_inst_mark();
     __ post_call_nop();
     // Carry on here...
   %}
@@ -2996,68 +2955,68 @@ encode %{
   enc_class FMul_ST_reg( eRegFPR src1 ) %{
     // Operand was loaded from memory into fp ST (stack top)
     // FMUL   ST,$src  /* D8 C8+i */
-    emit_opcode(cbuf, 0xD8);
-    emit_opcode(cbuf, 0xC8 + $src1$$reg);
+    emit_opcode(masm, 0xD8);
+    emit_opcode(masm, 0xC8 + $src1$$reg);
   %}
 
   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
     // FADDP  ST,src2  /* D8 C0+i */
-    emit_opcode(cbuf, 0xD8);
-    emit_opcode(cbuf, 0xC0 + $src2$$reg);
+    emit_opcode(masm, 0xD8);
+    emit_opcode(masm, 0xC0 + $src2$$reg);
     //could use FADDP  src2,fpST  /* DE C0+i */
   %}
 
   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
     // FADDP  src2,ST  /* DE C0+i */
-    emit_opcode(cbuf, 0xDE);
-    emit_opcode(cbuf, 0xC0 + $src2$$reg);
+    emit_opcode(masm, 0xDE);
+    emit_opcode(masm, 0xC0 + $src2$$reg);
   %}
 
   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
     // Operand has been loaded into fp ST (stack top)
       // FSUB   ST,$src1
-      emit_opcode(cbuf, 0xD8);
-      emit_opcode(cbuf, 0xE0 + $src1$$reg);
+      emit_opcode(masm, 0xD8);
+      emit_opcode(masm, 0xE0 + $src1$$reg);
 
       // FDIV
-      emit_opcode(cbuf, 0xD8);
-      emit_opcode(cbuf, 0xF0 + $src2$$reg);
+      emit_opcode(masm, 0xD8);
+      emit_opcode(masm, 0xF0 + $src2$$reg);
   %}
 
   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
     // Operand was loaded from memory into fp ST (stack top)
     // FADD   ST,$src  /* D8 C0+i */
-    emit_opcode(cbuf, 0xD8);
-    emit_opcode(cbuf, 0xC0 + $src1$$reg);
+    emit_opcode(masm, 0xD8);
+    emit_opcode(masm, 0xC0 + $src1$$reg);
 
     // FMUL  ST,src2  /* D8 C*+i */
-    emit_opcode(cbuf, 0xD8);
-    emit_opcode(cbuf, 0xC8 + $src2$$reg);
+    emit_opcode(masm, 0xD8);
+    emit_opcode(masm, 0xC8 + $src2$$reg);
   %}
 
 
   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
     // Operand was loaded from memory into fp ST (stack top)
     // FADD   ST,$src  /* D8 C0+i */
-    emit_opcode(cbuf, 0xD8);
-    emit_opcode(cbuf, 0xC0 + $src1$$reg);
+    emit_opcode(masm, 0xD8);
+    emit_opcode(masm, 0xC0 + $src1$$reg);
 
     // FMULP  src2,ST  /* DE C8+i */
-    emit_opcode(cbuf, 0xDE);
-    emit_opcode(cbuf, 0xC8 + $src2$$reg);
+    emit_opcode(masm, 0xDE);
+    emit_opcode(masm, 0xC8 + $src2$$reg);
   %}
 
   // Atomically load the volatile long
   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
-    emit_opcode(cbuf,0xDF);
+    emit_opcode(masm,0xDF);
     int rm_byte_opcode = 0x05;
     int base     = $mem$$base;
     int index    = $mem$$index;
     int scale    = $mem$$scale;
     int displace = $mem$$disp;
     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
-    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
-    store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
+    encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
+    store_to_stackslot( masm, 0x0DF, 0x07, $dst$$disp );
   %}
 
   // Volatile Store Long.  Must be atomic, so move it into
@@ -3065,16 +3024,17 @@ encode %{
   // target address before the store (for null-ptr checks)
   // so the memory operand is used twice in the encoding.
   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
-    store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
-    cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
-    emit_opcode(cbuf,0xDF);
+    store_to_stackslot( masm, 0x0DF, 0x05, $src$$disp );
+    __ set_inst_mark();            // Mark start of FIST in case $mem has an oop
+    emit_opcode(masm,0xDF);
     int rm_byte_opcode = 0x07;
     int base     = $mem$$base;
     int index    = $mem$$index;
     int scale    = $mem$$scale;
     int displace = $mem$$disp;
     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
-    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
+    encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
+    __ clear_inst_mark();
   %}
 
 %}
@@ -5754,7 +5714,7 @@ instruct loadRange(rRegI dst, memory mem) %{
   ins_cost(125);
   format %{ "MOV    $dst,$mem" %}
   opcode(0x8B);
-  ins_encode( OpcP, RegMem(dst,mem));
+  ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
   ins_pipe( ialu_reg_mem );
 %}
 
@@ -5766,7 +5726,7 @@ instruct loadP(eRegP dst, memory mem) %{
   ins_cost(125);
   format %{ "MOV    $dst,$mem" %}
   opcode(0x8B);
-  ins_encode( OpcP, RegMem(dst,mem));
+  ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
   ins_pipe( ialu_reg_mem );
 %}
 
@@ -5777,7 +5737,7 @@ instruct loadKlass(eRegP dst, memory mem) %{
   ins_cost(125);
   format %{ "MOV    $dst,$mem" %}
   opcode(0x8B);
-  ins_encode( OpcP, RegMem(dst,mem));
+  ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
   ins_pipe( ialu_reg_mem );
 %}
 
@@ -5790,8 +5750,8 @@ instruct loadDPR(regDPR dst, memory mem) %{
   format %{ "FLD_D  ST,$mem\n\t"
             "FSTP   $dst" %}
   opcode(0xDD);               /* DD /0 */
-  ins_encode( OpcP, RMopc_Mem(0x00,mem),
-              Pop_Reg_DPR(dst) );
+  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
+              Pop_Reg_DPR(dst), ClearInstMark );
   ins_pipe( fpu_reg_mem );
 %}
 
@@ -5840,8 +5800,8 @@ instruct loadFPR(regFPR dst, memory mem) %{
   format %{ "FLD_S  ST,$mem\n\t"
             "FSTP   $dst" %}
   opcode(0xD9);               /* D9 /0 */
-  ins_encode( OpcP, RMopc_Mem(0x00,mem),
-              Pop_Reg_FPR(dst) );
+  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
+              Pop_Reg_FPR(dst), ClearInstMark );
   ins_pipe( fpu_reg_mem );
 %}
 
@@ -5852,7 +5812,7 @@ instruct leaP8(eRegP dst, indOffset8 mem) %{
   ins_cost(110);
   format %{ "LEA    $dst,$mem" %}
   opcode(0x8D);
-  ins_encode( OpcP, RegMem(dst,mem));
+  ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
   ins_pipe( ialu_reg_reg_fat );
 %}
 
@@ -5862,7 +5822,7 @@ instruct leaP32(eRegP dst, indOffset32 mem) %{
   ins_cost(110);
   format %{ "LEA    $dst,$mem" %}
   opcode(0x8D);
-  ins_encode( OpcP, RegMem(dst,mem));
+  ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
   ins_pipe( ialu_reg_reg_fat );
 %}
 
@@ -5872,7 +5832,7 @@ instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
   ins_cost(110);
   format %{ "LEA    $dst,$mem" %}
   opcode(0x8D);
-  ins_encode( OpcP, RegMem(dst,mem));
+  ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
   ins_pipe( ialu_reg_reg_fat );
 %}
 
@@ -5882,7 +5842,7 @@ instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
   ins_cost(110);
   format %{ "LEA    $dst,$mem" %}
   opcode(0x8D);
-  ins_encode( OpcP, RegMem(dst,mem));
+  ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
   ins_pipe( ialu_reg_reg_fat );
 %}
 
@@ -5892,7 +5852,7 @@ instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
   ins_cost(110);
   format %{ "LEA    $dst,$mem" %}
   opcode(0x8D);
-  ins_encode( OpcP, RegMem(dst,mem));
+  ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
   ins_pipe( ialu_reg_reg_fat );
 %}
 
@@ -5901,7 +5861,7 @@ instruct loadConI(rRegI dst, immI src) %{
   match(Set dst src);
 
   format %{ "MOV    $dst,$src" %}
-  ins_encode( LdImmI(dst, src) );
+  ins_encode( SetInstMark, LdImmI(dst, src), ClearInstMark );
   ins_pipe( ialu_reg_fat );
 %}
 
@@ -5922,7 +5882,7 @@ instruct loadConP(eRegP dst, immP src) %{
 
   format %{ "MOV    $dst,$src" %}
   opcode(0xB8);  /* + rd */
-  ins_encode( LdImmP(dst, src) );
+  ins_encode( SetInstMark, LdImmP(dst, src), ClearInstMark );
   ins_pipe( ialu_reg_fat );
 %}
 
@@ -6080,7 +6040,7 @@ instruct loadSSI(rRegI dst, stackSlotI src) %{
 
   format %{ "MOV    $dst,$src" %}
   opcode(0x8B);
-  ins_encode( OpcP, RegMem(dst,src));
+  ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark);
   ins_pipe( ialu_reg_mem );
 %}
 
@@ -6091,7 +6051,7 @@ instruct loadSSL(eRegL dst, stackSlotL src) %{
   format %{ "MOV    $dst,$src.lo\n\t"
             "MOV    $dst+4,$src.hi" %}
   opcode(0x8B, 0x8B);
-  ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
+  ins_encode( SetInstMark, OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ), ClearInstMark );
   ins_pipe( ialu_mem_long_reg );
 %}
 
@@ -6102,7 +6062,7 @@ instruct loadSSP(eRegP dst, stackSlotP src) %{
 
   format %{ "MOV    $dst,$src" %}
   opcode(0x8B);
-  ins_encode( OpcP, RegMem(dst,src));
+  ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark);
   ins_pipe( ialu_reg_mem );
 %}
 
@@ -6114,8 +6074,8 @@ instruct loadSSF(regFPR dst, stackSlotF src) %{
   format %{ "FLD_S  $src\n\t"
             "FSTP   $dst" %}
   opcode(0xD9);               /* D9 /0, FLD m32real */
-  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
-              Pop_Reg_FPR(dst) );
+  ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
+              Pop_Reg_FPR(dst), ClearInstMark );
   ins_pipe( fpu_reg_mem );
 %}
 
@@ -6127,8 +6087,8 @@ instruct loadSSD(regDPR dst, stackSlotD src) %{
   format %{ "FLD_D  $src\n\t"
             "FSTP   $dst" %}
   opcode(0xDD);               /* DD /0, FLD m64real */
-  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
-              Pop_Reg_DPR(dst) );
+  ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
+              Pop_Reg_DPR(dst), ClearInstMark );
   ins_pipe( fpu_reg_mem );
 %}
 
@@ -6202,7 +6162,7 @@ instruct storeB(memory mem, xRegI src) %{
   ins_cost(125);
   format %{ "MOV8   $mem,$src" %}
   opcode(0x88);
-  ins_encode( OpcP, RegMem( src, mem ) );
+  ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
   ins_pipe( ialu_mem_reg );
 %}
 
@@ -6213,7 +6173,7 @@ instruct storeC(memory mem, rRegI src) %{
   ins_cost(125);
   format %{ "MOV16  $mem,$src" %}
   opcode(0x89, 0x66);
-  ins_encode( OpcS, OpcP, RegMem( src, mem ) );
+  ins_encode( SetInstMark, OpcS, OpcP, RegMem( src, mem ), ClearInstMark );
   ins_pipe( ialu_mem_reg );
 %}
 
@@ -6224,7 +6184,7 @@ instruct storeI(memory mem, rRegI src) %{
   ins_cost(125);
   format %{ "MOV    $mem,$src" %}
   opcode(0x89);
-  ins_encode( OpcP, RegMem( src, mem ) );
+  ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
   ins_pipe( ialu_mem_reg );
 %}
 
@@ -6237,7 +6197,7 @@ instruct storeL(long_memory mem, eRegL src) %{
   format %{ "MOV    $mem,$src.lo\n\t"
             "MOV    $mem+4,$src.hi" %}
   opcode(0x89, 0x89);
-  ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
+  ins_encode( SetInstMark, OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ), ClearInstMark );
   ins_pipe( ialu_mem_long_reg );
 %}
 
@@ -6265,7 +6225,7 @@ instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
             "FILD   $src\n\t"
             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
   opcode(0x3B);
-  ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
+  ins_encode( SetInstMark, OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src), ClearInstMark);
   ins_pipe( fpu_reg_mem );
 %}
 
@@ -6312,7 +6272,7 @@ instruct storeP(memory mem, anyRegP src) %{
   ins_cost(125);
   format %{ "MOV    $mem,$src" %}
   opcode(0x89);
-  ins_encode( OpcP, RegMem( src, mem ) );
+  ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
   ins_pipe( ialu_mem_reg );
 %}
 
@@ -6323,7 +6283,7 @@ instruct storeImmI(memory mem, immI src) %{
   ins_cost(150);
   format %{ "MOV    $mem,$src" %}
   opcode(0xC7);               /* C7 /0 */
-  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
+  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32(src), ClearInstMark);
   ins_pipe( ialu_mem_imm );
 %}
 
@@ -6335,7 +6295,7 @@ instruct storeImmI16(memory mem, immI16 src) %{
   ins_cost(150);
   format %{ "MOV16  $mem,$src" %}
   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
-  ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
+  ins_encode( SetInstMark, SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16(src), ClearInstMark);
   ins_pipe( ialu_mem_imm );
 %}
 
@@ -6347,7 +6307,7 @@ instruct storeImmP(memory mem, immP src) %{
   ins_cost(150);
   format %{ "MOV    $mem,$src" %}
   opcode(0xC7);               /* C7 /0 */
-  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
+  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32( src ), ClearInstMark);
   ins_pipe( ialu_mem_imm );
 %}
 
@@ -6358,7 +6318,7 @@ instruct storeImmB(memory mem, immI8 src) %{
   ins_cost(150);
   format %{ "MOV8   $mem,$src" %}
   opcode(0xC6);               /* C6 /0 */
-  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
+  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con8or32(src), ClearInstMark);
   ins_pipe( ialu_mem_imm );
 %}
 
@@ -6369,7 +6329,7 @@ instruct storeImmCM(memory mem, immI8 src) %{
   ins_cost(150);
   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
   opcode(0xC6);               /* C6 /0 */
-  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
+  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con8or32(src), ClearInstMark);
   ins_pipe( ialu_mem_imm );
 %}
 
@@ -6468,7 +6428,7 @@ instruct storeFPR_imm( memory mem, immFPR src) %{
   ins_cost(50);
   format %{ "MOV    $mem,$src\t# store float" %}
   opcode(0xC7);               /* C7 /0 */
-  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
+  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits(src), ClearInstMark);
   ins_pipe( ialu_mem_imm );
 %}
 
@@ -6480,7 +6440,7 @@ instruct storeF_imm( memory mem, immF src) %{
   ins_cost(50);
   format %{ "MOV    $mem,$src\t# store float" %}
   opcode(0xC7);               /* C7 /0 */
-  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
+  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits(src), ClearInstMark);
   ins_pipe( ialu_mem_imm );
 %}
 
@@ -6514,7 +6474,7 @@ instruct storeSSL(stackSlotL dst, eRegL src) %{
   format %{ "MOV    $dst,$src.lo\n\t"
             "MOV    $dst+4,$src.hi" %}
   opcode(0x89, 0x89);
-  ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
+  ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark );
   ins_pipe( ialu_mem_long_reg );
 %}
 
@@ -6689,7 +6649,7 @@ instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
   ins_cost(250);
   format %{ "CMOV$cop $dst,$src" %}
   opcode(0x0F,0x40);
-  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
+  ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark );
   ins_pipe( pipe_cmov_mem );
 %}
 
@@ -6700,7 +6660,7 @@ instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
   ins_cost(250);
   format %{ "CMOV$cop $dst,$src" %}
   opcode(0x0F,0x40);
-  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
+  ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark );
   ins_pipe( pipe_cmov_mem );
 %}
 
@@ -6998,7 +6958,7 @@ instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 
   format %{ "LEA    $dst,[$src0 + $src1]" %}
   opcode(0x8D); /* 0x8D /r */
-  ins_encode( OpcP, RegLea( dst, src0, src1 ) );
+  ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark );
   ins_pipe( ialu_reg_reg );
 %}
 
@@ -7008,7 +6968,7 @@ instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 
   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
   opcode(0x8D); /* 0x8D /r */
-  ins_encode( OpcP, RegLea( dst, src0, src1 ) );
+  ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark );
   ins_pipe( ialu_reg_reg );
 %}
 
@@ -7053,7 +7013,7 @@ instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
   ins_cost(150);
   format %{ "ADD    $dst,$src" %}
   opcode(0x03);
-  ins_encode( OpcP, RegMem( dst, src) );
+  ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
   ins_pipe( ialu_reg_mem );
 %}
 
@@ -7064,7 +7024,7 @@ instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
   ins_cost(150);
   format %{ "ADD    $dst,$src" %}
   opcode(0x01);  /* Opcode 01 /r */
-  ins_encode( OpcP, RegMem( src, dst ) );
+  ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
   ins_pipe( ialu_mem_reg );
 %}
 
@@ -7076,7 +7036,7 @@ instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
   ins_cost(125);
   format %{ "ADD    $dst,$src" %}
   opcode(0x81);               /* Opcode 81 /0 id */
-  ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
+  ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32(src), ClearInstMark );
   ins_pipe( ialu_mem_imm );
 %}
 
@@ -7087,7 +7047,7 @@ instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
   ins_cost(125);
   format %{ "INC    $dst" %}
   opcode(0xFF);               /* Opcode FF /0 */
-  ins_encode( OpcP, RMopc_Mem(0x00,dst));
+  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,dst), ClearInstMark);
   ins_pipe( ialu_mem_imm );
 %}
 
@@ -7098,7 +7058,7 @@ instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
   ins_cost(125);
   format %{ "DEC    $dst" %}
   opcode(0xFF);               /* Opcode FF /1 */
-  ins_encode( OpcP, RMopc_Mem(0x01,dst));
+  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x01,dst), ClearInstMark);
   ins_pipe( ialu_mem_imm );
 %}
 
@@ -7420,7 +7380,7 @@ instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
   ins_cost(150);
   format %{ "SUB    $dst,$src" %}
   opcode(0x2B);
-  ins_encode( OpcP, RegMem( dst, src) );
+  ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
   ins_pipe( ialu_reg_mem );
 %}
 
@@ -7431,7 +7391,7 @@ instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
   ins_cost(150);
   format %{ "SUB    $dst,$src" %}
   opcode(0x29);  /* Opcode 29 /r */
-  ins_encode( OpcP, RegMem( src, dst ) );
+  ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
   ins_pipe( ialu_mem_reg );
 %}
 
@@ -7537,7 +7497,7 @@ instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
   ins_cost(300);
   format %{ "IMUL   $dst,$src,$imm" %}
   opcode(0x69);  /* 69 /r id */
-  ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
+  ins_encode( SetInstMark, OpcSE(imm), RegMem( dst, src ), Con8or32( imm ), ClearInstMark );
   ins_pipe( ialu_reg_mem_alu0 );
 %}
 
@@ -7549,7 +7509,7 @@ instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
   ins_cost(350);
   format %{ "IMUL   $dst,$src" %}
   opcode(0xAF, 0x0F);
-  ins_encode( OpcS, OpcP, RegMem( dst, src) );
+  ins_encode( SetInstMark, OpcS, OpcP, RegMem( dst, src), ClearInstMark );
   ins_pipe( ialu_reg_mem_alu0 );
 %}
 
@@ -7954,7 +7914,7 @@ instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
   effect(KILL cr);
   format %{ "SAR    $dst,$shift" %}
   opcode(0xD1, 0x7);  /* D1 /7 */
-  ins_encode( OpcP, RMopc_Mem(secondary,dst) );
+  ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary,dst), ClearInstMark );
   ins_pipe( ialu_mem_imm );
 %}
 
@@ -7977,7 +7937,7 @@ instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 
   format %{ "SAR    $dst,$shift" %}
   opcode(0xC1, 0x7);  /* C1 /7 ib */
-  ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
+  ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary, dst ), Con8or32(shift), ClearInstMark );
   ins_pipe( ialu_mem_imm );
 %}
 
@@ -8093,7 +8053,7 @@ instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
   ins_cost(150);
   format %{ "AND    $dst,$src" %}
   opcode(0x23);
-  ins_encode( OpcP, RegMem( dst, src) );
+  ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
   ins_pipe( ialu_reg_mem );
 %}
 
@@ -8105,7 +8065,7 @@ instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
   ins_cost(150);
   format %{ "AND    $dst,$src" %}
   opcode(0x21);  /* Opcode 21 /r */
-  ins_encode( OpcP, RegMem( src, dst ) );
+  ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
   ins_pipe( ialu_mem_reg );
 %}
 
@@ -8118,7 +8078,7 @@ instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
   format %{ "AND    $dst,$src" %}
   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
   // ins_encode( MemImm( dst, src) );
-  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
+  ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
   ins_pipe( ialu_mem_imm );
 %}
 
@@ -8284,7 +8244,7 @@ instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
   ins_cost(150);
   format %{ "OR     $dst,$src" %}
   opcode(0x0B);
-  ins_encode( OpcP, RegMem( dst, src) );
+  ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
   ins_pipe( ialu_reg_mem );
 %}
 
@@ -8296,7 +8256,7 @@ instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
   ins_cost(150);
   format %{ "OR     $dst,$src" %}
   opcode(0x09);  /* Opcode 09 /r */
-  ins_encode( OpcP, RegMem( src, dst ) );
+  ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
   ins_pipe( ialu_mem_reg );
 %}
 
@@ -8309,7 +8269,7 @@ instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
   format %{ "OR     $dst,$src" %}
   opcode(0x81,0x1);  /* Opcode 81 /1 id */
   // ins_encode( MemImm( dst, src) );
-  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
+  ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
   ins_pipe( ialu_mem_imm );
 %}
 
@@ -8491,7 +8451,7 @@ instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
   ins_cost(150);
   format %{ "XOR    $dst,$src" %}
   opcode(0x33);
-  ins_encode( OpcP, RegMem(dst, src) );
+  ins_encode( SetInstMark, OpcP, RegMem(dst, src), ClearInstMark );
   ins_pipe( ialu_reg_mem );
 %}
 
@@ -8503,7 +8463,7 @@ instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
   ins_cost(150);
   format %{ "XOR    $dst,$src" %}
   opcode(0x31);  /* Opcode 31 /r */
-  ins_encode( OpcP, RegMem( src, dst ) );
+  ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
   ins_pipe( ialu_mem_reg );
 %}
 
@@ -8515,7 +8475,7 @@ instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
   ins_cost(125);
   format %{ "XOR    $dst,$src" %}
   opcode(0x81,0x6);  /* Opcode 81 /6 id */
-  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
+  ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
   ins_pipe( ialu_mem_imm );
 %}
 
@@ -8801,7 +8761,7 @@ instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
   format %{ "ADD    $dst.lo,$mem\n\t"
             "ADC    $dst.hi,$mem+4" %}
   opcode(0x03, 0x13);
-  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
+  ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
   ins_pipe( ialu_reg_long_mem );
 %}
 
@@ -8836,7 +8796,7 @@ instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
   format %{ "SUB    $dst.lo,$mem\n\t"
             "SBB    $dst.hi,$mem+4" %}
   opcode(0x2B, 0x1B);
-  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
+  ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
   ins_pipe( ialu_reg_long_mem );
 %}
 
@@ -8879,7 +8839,7 @@ instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
   format %{ "AND    $dst.lo,$mem\n\t"
             "AND    $dst.hi,$mem+4" %}
   opcode(0x23, 0x23);
-  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
+  ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
   ins_pipe( ialu_reg_long_mem );
 %}
 
@@ -9117,7 +9077,7 @@ instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
   format %{ "OR     $dst.lo,$mem\n\t"
             "OR     $dst.hi,$mem+4" %}
   opcode(0x0B,0x0B);
-  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
+  ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
   ins_pipe( ialu_reg_long_mem );
 %}
 
@@ -9163,7 +9123,7 @@ instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
   format %{ "XOR    $dst.lo,$mem\n\t"
             "XOR    $dst.hi,$mem+4" %}
   opcode(0x33,0x33);
-  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
+  ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
   ins_pipe( ialu_reg_long_mem );
 %}
 
@@ -9445,7 +9405,7 @@ instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
     "exit:" %}
   ins_encode %{
     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
-    emit_cmpfp_fixup(_masm);
+    emit_cmpfp_fixup(masm);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -9474,7 +9434,7 @@ instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
     "exit:" %}
   ins_encode %{
     __ ucomisd($src1$$XMMRegister, $src2$$Address);
-    emit_cmpfp_fixup(_masm);
+    emit_cmpfp_fixup(masm);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -9505,7 +9465,7 @@ instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
     "done:" %}
   ins_encode %{
     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
-    emit_cmpfp3(_masm, $dst$$Register);
+    emit_cmpfp3(masm, $dst$$Register);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -9525,7 +9485,7 @@ instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
     "done:" %}
   ins_encode %{
     __ ucomisd($src1$$XMMRegister, $src2$$Address);
-    emit_cmpfp3(_masm, $dst$$Register);
+    emit_cmpfp3(masm, $dst$$Register);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -9567,8 +9527,8 @@ instruct subDPR_reg_mem(regDPR dst, memory src) %{
   format %{ "FLD    $src\n\t"
             "DSUBp  $dst,ST" %}
   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
-  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
-              OpcP, RegOpc(dst) );
+  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
+              OpcP, RegOpc(dst), ClearInstMark );
   ins_pipe( fpu_reg_mem );
 %}
 
@@ -9629,8 +9589,8 @@ instruct addDPR_reg_mem(regDPR dst, memory src) %{
   format %{ "FLD    $src\n\t"
             "DADDp  $dst,ST" %}
   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
-  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
-              OpcP, RegOpc(dst) );
+  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
+              OpcP, RegOpc(dst), ClearInstMark );
   ins_pipe( fpu_reg_mem );
 %}
 
@@ -9644,10 +9604,11 @@ instruct addDPR_mem_reg(memory dst, regDPR src) %{
             "DADD   ST,$src\n\t"
             "FST_D  $dst" %}
   opcode(0xDD, 0x0);
-  ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
-              Opcode(0xD8), RegOpc(src),
-              set_instruction_start,
-              Opcode(0xDD), RMopc_Mem(0x03,dst) );
+  ins_encode( SetInstMark, Opcode(0xDD), RMopc_Mem(0x00,dst),
+              Opcode(0xD8), RegOpc(src), ClearInstMark,
+              SetInstMark,
+              Opcode(0xDD), RMopc_Mem(0x03,dst),
+              ClearInstMark);
   ins_pipe( fpu_reg_mem );
 %}
 
@@ -9752,8 +9713,8 @@ instruct mulDPR_reg_mem(regDPR dst, memory src) %{
   format %{ "FLD_D  $src\n\t"
             "DMULp  $dst,ST" %}
   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
-  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
-              OpcP, RegOpc(dst) );
+  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
+              OpcP, RegOpc(dst), ClearInstMark );
   ins_pipe( fpu_reg_mem );
 %}
 
@@ -9767,9 +9728,9 @@ instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
             "DMUL   ST,$src\n\t"
             "FSTP_D $dst" %}
   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
-  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
+  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem),
               OpcReg_FPR(src),
-              Pop_Reg_DPR(dst) );
+              Pop_Reg_DPR(dst), ClearInstMark );
   ins_pipe( fpu_reg_reg_mem );
 %}
 
@@ -10028,7 +9989,7 @@ instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
     "exit:" %}
   ins_encode %{
     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
-    emit_cmpfp_fixup(_masm);
+    emit_cmpfp_fixup(masm);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -10057,7 +10018,7 @@ instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
     "exit:" %}
   ins_encode %{
     __ ucomiss($src1$$XMMRegister, $src2$$Address);
-    emit_cmpfp_fixup(_masm);
+    emit_cmpfp_fixup(masm);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -10088,7 +10049,7 @@ instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
     "done:" %}
   ins_encode %{
     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
-    emit_cmpfp3(_masm, $dst$$Register);
+    emit_cmpfp3(masm, $dst$$Register);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -10108,7 +10069,7 @@ instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
     "done:" %}
   ins_encode %{
     __ ucomiss($src1$$XMMRegister, $src2$$Address);
-    emit_cmpfp3(_masm, $dst$$Register);
+    emit_cmpfp3(masm, $dst$$Register);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -10194,9 +10155,9 @@ instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
             "FADD   ST,$src1\n\t"
             "FSTP_S $dst" %}
   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
-  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
+  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
               OpcReg_FPR(src1),
-              Pop_Mem_FPR(dst) );
+              Pop_Mem_FPR(dst), ClearInstMark );
   ins_pipe( fpu_mem_reg_mem );
 %}
 //
@@ -10208,8 +10169,8 @@ instruct addFPR_reg_mem(regFPR dst, memory src) %{
 
   format %{ "FADD   $dst,$src" %}
   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
-  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
-              OpcP, RegOpc(dst) );
+  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
+              OpcP, RegOpc(dst), ClearInstMark );
   ins_pipe( fpu_reg_mem );
 %}
 
@@ -10221,9 +10182,9 @@ instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
 
   format %{ "FADD   $dst,$src1,$src2" %}
   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
-  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
+  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src1),
               OpcReg_FPR(src2),
-              Pop_Mem_FPR(dst) );
+              Pop_Mem_FPR(dst), ClearInstMark );
   ins_pipe( fpu_mem_reg_mem );
 %}
 
@@ -10235,10 +10196,10 @@ instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
 
   format %{ "FADD   $dst,$src1,$src2 cisc" %}
   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
-  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
-              set_instruction_start,
+  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
               OpcP, RMopc_Mem(secondary,src1),
-              Pop_Mem_FPR(dst) );
+              Pop_Mem_FPR(dst),
+              ClearInstMark);
   ins_pipe( fpu_mem_mem_mem );
 %}
 
@@ -10249,10 +10210,10 @@ instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
 
   format %{ "FADD   $dst,$src1,$src2" %}
   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
-  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
-              set_instruction_start,
+  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
               OpcP, RMopc_Mem(secondary,src1),
-              Pop_Mem_FPR(dst) );
+              Pop_Mem_FPR(dst),
+              ClearInstMark);
   ins_pipe( fpu_mem_mem_mem );
 %}
 
@@ -10328,9 +10289,9 @@ instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
             "FMUL   $src1\n\t"
             "FSTP_S $dst"  %}
   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
-  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
+  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
               OpcReg_FPR(src1),
-              Pop_Mem_FPR(dst) );
+              Pop_Mem_FPR(dst), ClearInstMark );
   ins_pipe( fpu_mem_reg_mem );
 %}
 //
@@ -10342,9 +10303,9 @@ instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
 
   format %{ "FMUL   $dst,$src1,$src2" %}
   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
-  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
+  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
               OpcReg_FPR(src1),
-              Pop_Reg_FPR(dst) );
+              Pop_Reg_FPR(dst), ClearInstMark );
   ins_pipe( fpu_reg_reg_mem );
 %}
 
@@ -10355,10 +10316,10 @@ instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
 
   format %{ "FMUL   $dst,$src1,$src2" %}
   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
-  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
-              set_instruction_start,
+  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
               OpcP, RMopc_Mem(secondary,src1),
-              Pop_Mem_FPR(dst) );
+              Pop_Mem_FPR(dst),
+              ClearInstMark );
   ins_pipe( fpu_mem_mem_mem );
 %}
 
@@ -10406,9 +10367,9 @@ instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
             "FMUL   ST,$src\n\t"
             "FSTP   $dst" %}
   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
-  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
+  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem1),
               OpcReg_FPR(src),
-              Pop_Reg_FPR(dst) );
+              Pop_Reg_FPR(dst), ClearInstMark );
   ins_pipe( fpu_reg_reg_mem );
 %}
 //
@@ -10424,10 +10385,10 @@ instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR sr
             "FADD   ST,$src2\n\t"
             "FSTP   $dst" %}
   opcode(0xD9); /* LoadF D9 /0 */
-  ins_encode( OpcP, RMopc_Mem(0x00,mem1),
+  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem1),
               FMul_ST_reg(src1),
               FAdd_ST_reg(src2),
-              Pop_Reg_FPR(dst) );
+              Pop_Reg_FPR(dst), ClearInstMark );
   ins_pipe( fpu_reg_mem_reg_reg );
 %}
 
@@ -10966,8 +10927,8 @@ instruct convI2DPR_mem(regDPR dst, memory mem) %{
   format %{ "FILD   $mem\n\t"
             "FSTP   $dst" %}
   opcode(0xDB);      /* DB /0 */
-  ins_encode( OpcP, RMopc_Mem(0x00,mem),
-              Pop_Reg_DPR(dst));
+  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
+              Pop_Reg_DPR(dst), ClearInstMark);
   ins_pipe( fpu_reg_mem );
 %}
 
@@ -11004,8 +10965,8 @@ instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
   format %{ "FILD   $mem\n\t"
             "FSTP_S $dst" %}
   opcode(0xDB);  /* DB /0 */
-  ins_encode( OpcP, RMopc_Mem(0x00,mem),
-              Pop_Mem_FPR(dst));
+  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
+              Pop_Mem_FPR(dst), ClearInstMark);
   ins_pipe( fpu_mem_mem );
 %}
 
@@ -11028,8 +10989,8 @@ instruct convI2FPR_mem(regFPR dst, memory mem) %{
   format %{ "FILD   $mem\n\t"
             "FSTP   $dst" %}
   opcode(0xDB);      /* DB /0 */
-  ins_encode( OpcP, RMopc_Mem(0x00,mem),
-              Pop_Reg_FPR(dst));
+  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
+              Pop_Reg_FPR(dst), ClearInstMark);
   ins_pipe( fpu_reg_mem );
 %}
 
@@ -11227,8 +11188,8 @@ instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
   format %{ "FLD_S  $src\n\t"
             "FSTP   $dst\t# MoveI2F_stack_reg" %}
   opcode(0xD9);               /* D9 /0, FLD m32real */
-  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
-              Pop_Reg_FPR(dst) );
+  ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
+              Pop_Reg_FPR(dst), ClearInstMark );
   ins_pipe( fpu_reg_mem );
 %}
 
@@ -11266,7 +11227,7 @@ instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
   format %{ "MOV    $dst.lo,$src\n\t"
             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
   opcode(0x8B, 0x8B);
-  ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
+  ins_encode( SetInstMark, OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src), ClearInstMark);
   ins_pipe( ialu_mem_long_reg );
 %}
 
@@ -11317,7 +11278,7 @@ instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
   format %{ "MOV    $dst,$src.lo\n\t"
             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
   opcode(0x89, 0x89);
-  ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
+  ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark );
   ins_pipe( ialu_mem_long_reg );
 %}
 
@@ -11331,8 +11292,8 @@ instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
   format %{ "FLD_D  $src\n\t"
             "FSTP   $dst\t# MoveL2D_stack_reg" %}
   opcode(0xDD);               /* DD /0, FLD m64real */
-  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
-              Pop_Reg_DPR(dst) );
+  ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
+              Pop_Reg_DPR(dst), ClearInstMark );
   ins_pipe( fpu_reg_mem );
 %}
 
@@ -12245,7 +12206,7 @@ instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
   format %{ "CMP    $op1,$op2" %}
   ins_cost(500);
   opcode(0x3B);  /* Opcode 3B /r */
-  ins_encode( OpcP, RegMem( op1, op2) );
+  ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
   ins_pipe( ialu_cr_reg_mem );
 %}
 
@@ -12273,7 +12234,7 @@ instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
 
   format %{ "TEST   $src,$mem" %}
   opcode(0x85);
-  ins_encode( OpcP, RegMem( src, mem ) );
+  ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
   ins_pipe( ialu_cr_reg_mem );
 %}
 
@@ -12304,7 +12265,7 @@ instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
   format %{ "CMPu   $op1,$op2" %}
   ins_cost(500);
   opcode(0x3B);  /* Opcode 3B /r */
-  ins_encode( OpcP, RegMem( op1, op2) );
+  ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
   ins_pipe( ialu_cr_reg_mem );
 %}
 
@@ -12342,7 +12303,7 @@ instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
 
   format %{ "CMPu   $op1,$op2" %}
   opcode(0x81,0x07);  /* Opcode 81 /7 */
-  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
+  ins_encode( SetInstMark, OpcSErm( op1, op2 ), Con8or32( op2 ), ClearInstMark );
   ins_pipe( ialu_cr_reg_imm );
 %}
 
@@ -12353,7 +12314,7 @@ instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
   format %{ "CMPu   $op1,$op2" %}
   ins_cost(500);
   opcode(0x3B);  /* Opcode 3B /r */
-  ins_encode( OpcP, RegMem( op1, op2) );
+  ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
   ins_pipe( ialu_cr_reg_mem );
 %}
 
@@ -12376,7 +12337,7 @@ instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
 
   format %{ "CMPu   $op1,$op2" %}
   opcode(0x3B);  /* Opcode 3B /r */
-  ins_encode( OpcP, RegMem( op1, op2) );
+  ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
   ins_pipe( ialu_cr_reg_mem );
 %}
 
@@ -12401,7 +12362,7 @@ instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
   format %{ "TEST   $op,0xFFFFFFFF" %}
   ins_cost(500);
   opcode(0xF7);               /* Opcode F7 /0 */
-  ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
+  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF), ClearInstMark );
   ins_pipe( ialu_cr_reg_imm );
 %}
 
@@ -12969,7 +12930,7 @@ instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_lo
   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
             "CMOV$cmp $dst.hi,$src.hi" %}
   opcode(0x0F,0x40);
-  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
+  ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
   ins_pipe( pipe_cmov_reg_long );
 %}
 
@@ -13008,7 +12969,7 @@ instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory
   ins_cost(250);
   format %{ "CMOV$cmp $dst,$src" %}
   opcode(0x0F,0x40);
-  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
+  ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
   ins_pipe( pipe_cmov_mem );
 %}
 
@@ -13178,7 +13139,7 @@ instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_lo
   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
             "CMOV$cmp $dst.hi,$src.hi" %}
   opcode(0x0F,0x40);
-  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
+  ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
   ins_pipe( pipe_cmov_reg_long );
 %}
 
@@ -13199,7 +13160,7 @@ instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory
   ins_cost(250);
   format %{ "CMOV$cmp $dst,$src" %}
   opcode(0x0F,0x40);
-  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
+  ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
   ins_pipe( pipe_cmov_mem );
 %}
 
@@ -13379,7 +13340,7 @@ instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst,
   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
             "CMOV$cmp $dst.hi,$src.hi+4" %}
   opcode(0x0F,0x40);
-  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
+  ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
   ins_pipe( pipe_cmov_reg_long );
 %}
 
@@ -13418,7 +13379,7 @@ instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst,
   ins_cost(250);
   format %{ "CMOV$cmp $dst,$src" %}
   opcode(0x0F,0x40);
-  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
+  ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
   ins_pipe( pipe_cmov_mem );
 %}
 
@@ -13765,7 +13726,9 @@ instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
   // EBP would need size(3)
   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
   ins_encode %{
+    __ set_inst_mark();
     __ relocate(relocInfo::poll_type);
+    __ clear_inst_mark();
     address pre_pc = __ pc();
     __ testl(rax, Address($poll$$Register, 0));
     address post_pc = __ pc();
diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad
index 4e57f3e1bbe3c..2e0cf770c2268 100644
--- a/src/hotspot/cpu/x86/x86_64.ad
+++ b/src/hotspot/cpu/x86/x86_64.ad
@@ -358,7 +358,7 @@ source %{
 #define   RELOC_IMM64    Assembler::imm_operand
 #define   RELOC_DISP32   Assembler::disp32_operand
 
-#define __ _masm.
+#define __ masm->
 
 RegMask _ANY_REG_mask;
 RegMask _PTR_REG_mask;
@@ -519,7 +519,7 @@ int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 }
 
 // This could be in MacroAssembler but it's fairly C2 specific
-static void emit_cmpfp_fixup(MacroAssembler& _masm) {
+static void emit_cmpfp_fixup(MacroAssembler* masm) {
   Label exit;
   __ jccb(Assembler::noParity, exit);
   __ pushf();
@@ -539,7 +539,7 @@ static void emit_cmpfp_fixup(MacroAssembler& _masm) {
   __ bind(exit);
 }
 
-static void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
+static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
   Label done;
   __ movl(dst, -1);
   __ jcc(Assembler::parity, done);
@@ -558,7 +558,7 @@ static void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 // je            #
 // |-jz -> a | b # a & b
 // |    -> a     #
-static void emit_fp_min_max(MacroAssembler& _masm, XMMRegister dst,
+static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
                             XMMRegister a, XMMRegister b,
                             XMMRegister xmmt, Register rt,
                             bool min, bool single) {
@@ -643,7 +643,7 @@ void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, Phase
   ShouldNotReachHere();
 }
 
-void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
+void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
   // Empty encoding
 }
 
@@ -719,9 +719,8 @@ void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 }
 #endif
 
-void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
   Compile* C = ra_->C;
-  C2_MacroAssembler _masm(&cbuf);
 
   int framesize = C->output()->frame_size_in_bytes();
   int bangsize = C->output()->bang_size_in_bytes();
@@ -743,7 +742,7 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 
   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
 
-  C->output()->set_frame_complete(cbuf.insts_size());
+  C->output()->set_frame_complete(__ offset());
 
   if (C->has_mach_constant_base_node()) {
     // NOTE: We set the table base offset here because users might be
@@ -795,10 +794,9 @@ void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 }
 #endif
 
-void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
+void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 {
   Compile* C = ra_->C;
-  MacroAssembler _masm(&cbuf);
 
   if (generate_vzeroupper(C)) {
     // Clear upper bits of YMM registers when current compiled code uses
@@ -825,7 +823,6 @@ void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
   }
 
   if (do_polling() && C->is_method_compilation()) {
-    MacroAssembler _masm(&cbuf);
     Label dummy_label;
     Label* code_stub = &dummy_label;
     if (!C->output()->in_scratch_emit_size()) {
@@ -881,16 +878,15 @@ static enum RC rc_class(OptoReg::Name reg)
 }
 
 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
-static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
+static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 
-void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
+void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
                      int stack_offset, int reg, uint ireg, outputStream* st);
 
-static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
+static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
                                       int dst_offset, uint ireg, outputStream* st) {
-  if (cbuf) {
-    MacroAssembler _masm(cbuf);
+  if (masm) {
     switch (ireg) {
     case Op_VecS:
       __ movq(Address(rsp, -8), rax);
@@ -966,11 +962,11 @@ static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
   }
 }
 
-uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
+uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
                                        PhaseRegAlloc* ra_,
                                        bool do_size,
                                        outputStream* st) const {
-  assert(cbuf != nullptr || st  != nullptr, "sanity");
+  assert(masm != nullptr || st  != nullptr, "sanity");
   // Get registers to move
   OptoReg::Name src_second = ra_->get_reg_second(in(1));
   OptoReg::Name src_first = ra_->get_reg_first(in(1));
@@ -997,15 +993,15 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
       // mem -> mem
       int src_offset = ra_->reg2offset(src_first);
       int dst_offset = ra_->reg2offset(dst_first);
-      vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
+      vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
-      vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
+      vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
       int stack_offset = ra_->reg2offset(dst_first);
-      vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
+      vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
       int stack_offset = ra_->reg2offset(src_first);
-      vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
+      vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
     } else {
       ShouldNotReachHere();
     }
@@ -1021,8 +1017,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
         // 64-bit
         int src_offset = ra_->reg2offset(src_first);
         int dst_offset = ra_->reg2offset(dst_first);
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
+        if (masm) {
           __ pushq(Address(rsp, src_offset));
           __ popq (Address(rsp, dst_offset));
 #ifndef PRODUCT
@@ -1039,8 +1034,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
         // No pushl/popl, so:
         int src_offset = ra_->reg2offset(src_first);
         int dst_offset = ra_->reg2offset(dst_first);
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
+        if (masm) {
           __ movq(Address(rsp, -8), rax);
           __ movl(rax, Address(rsp, src_offset));
           __ movl(Address(rsp, dst_offset), rax);
@@ -1062,8 +1056,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
         // 64-bit
         int offset = ra_->reg2offset(src_first);
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
+        if (masm) {
           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 #ifndef PRODUCT
         } else {
@@ -1077,8 +1070,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
         int offset = ra_->reg2offset(src_first);
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
+        if (masm) {
           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 #ifndef PRODUCT
         } else {
@@ -1095,8 +1087,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
         // 64-bit
         int offset = ra_->reg2offset(src_first);
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
+        if (masm) {
           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 #ifndef PRODUCT
         } else {
@@ -1111,8 +1102,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
         int offset = ra_->reg2offset(src_first);
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
+        if (masm) {
           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 #ifndef PRODUCT
         } else {
@@ -1129,8 +1119,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
         // 64-bit
         int offset = ra_->reg2offset(src_first);
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
+        if (masm) {
           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 #ifndef PRODUCT
         } else {
@@ -1150,8 +1139,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
         // 64-bit
         int offset = ra_->reg2offset(dst_first);
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
+        if (masm) {
           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else {
@@ -1165,8 +1153,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
         int offset = ra_->reg2offset(dst_first);
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
+        if (masm) {
           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else {
@@ -1182,8 +1169,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
         // 64-bit
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
+        if (masm) {
           __ movq(as_Register(Matcher::_regEncode[dst_first]),
                   as_Register(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
@@ -1198,8 +1184,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
+        if (masm) {
           __ movl(as_Register(Matcher::_regEncode[dst_first]),
                   as_Register(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
@@ -1216,8 +1201,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
         // 64-bit
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
+        if (masm) {
           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else {
@@ -1230,8 +1214,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
+        if (masm) {
           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else {
@@ -1246,8 +1229,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
         // 64-bit
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
+        if (masm) {
           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
   #ifndef PRODUCT
         } else {
@@ -1268,8 +1250,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
         // 64-bit
         int offset = ra_->reg2offset(dst_first);
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
+        if (masm) {
           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else {
@@ -1283,8 +1264,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
         int offset = ra_->reg2offset(dst_first);
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
+        if (masm) {
           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else {
@@ -1300,8 +1280,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
         // 64-bit
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
+        if (masm) {
           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else {
@@ -1314,8 +1293,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
+        if (masm) {
           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else {
@@ -1331,8 +1309,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
         // 64-bit
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
+        if (masm) {
           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else {
@@ -1346,8 +1323,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
+        if (masm) {
           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else {
@@ -1370,8 +1346,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
         // 64-bit
         int offset = ra_->reg2offset(dst_first);
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
+        if (masm) {
           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else {
@@ -1386,8 +1361,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
         // 64-bit
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
+        if (masm) {
           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else {
@@ -1403,8 +1377,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
         // 64-bit
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
+        if (masm) {
           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else {
@@ -1432,8 +1405,8 @@ void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 }
 #endif
 
-void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  implementation(&cbuf, ra_, false, nullptr);
+void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
+  implementation(masm, ra_, false, nullptr);
 }
 
 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
@@ -1451,13 +1424,12 @@ void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 }
 #endif
 
-void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
+void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 {
   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
   int reg = ra_->get_encode(this);
 
-  MacroAssembler masm(&cbuf);
-  masm.lea(as_Register(reg), Address(rsp, offset));
+  __ lea(as_Register(reg), Address(rsp, offset));
 }
 
 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
@@ -1481,10 +1453,9 @@ void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 }
 #endif
 
-void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
+void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 {
-  MacroAssembler masm(&cbuf);
-  masm.ic_check(InteriorEntryAlignment);
+  __ ic_check(InteriorEntryAlignment);
 }
 
 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
@@ -1663,7 +1634,6 @@ encode %{
     //  [REX_B]
     //    f:   f7 f9                   idiv   $div
     // 0000000000000011 <done>:
-    MacroAssembler _masm(&cbuf);
     Label normal;
     Label done;
 
@@ -1719,7 +1689,6 @@ encode %{
     //   17:   48 99                   cqto
     //   19:   48 f7 f9                idiv   $div
     // 000000000000001c <done>:
-    MacroAssembler _masm(&cbuf);
     Label normal;
     Label done;
 
@@ -1761,7 +1730,6 @@ encode %{
     Label miss;
     const bool set_cond_codes = true;
 
-    MacroAssembler _masm(&cbuf);
     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
                                      nullptr, &miss,
                                      /*set_cond_codes:*/ true);
@@ -1772,21 +1740,19 @@ encode %{
   %}
 
   enc_class clear_avx %{
-    debug_only(int off0 = cbuf.insts_size());
+    debug_only(int off0 = __ offset());
     if (generate_vzeroupper(Compile::current())) {
       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
       // Clear upper bits of YMM registers when current compiled code uses
       // wide vectors to avoid AVX <-> SSE transition penalty during call.
-      MacroAssembler _masm(&cbuf);
       __ vzeroupper();
     }
-    debug_only(int off1 = cbuf.insts_size());
+    debug_only(int off1 = __ offset());
     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
   %}
 
   enc_class Java_To_Runtime(method meth) %{
     // No relocation needed
-    MacroAssembler _masm(&cbuf);
     __ mov64(r10, (int64_t) $meth$$method);
     __ call(r10);
     __ post_call_nop();
@@ -1797,8 +1763,6 @@ encode %{
     // JAVA STATIC CALL
     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
     // determine who we intended to call.
-    MacroAssembler _masm(&cbuf);
-
     if (!_method) {
       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
@@ -1807,7 +1771,7 @@ encode %{
       __ addr_nop_5();
       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
     } else {
-      int method_index = resolved_method_index(cbuf);
+      int method_index = resolved_method_index(masm);
       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
                                                   : static_call_Relocation::spec(method_index);
       address mark = __ pc();
@@ -1816,10 +1780,11 @@ encode %{
       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
         // Calls of the same statically bound method can share
         // a stub to the interpreter.
-        cbuf.shared_stub_to_interp_for(_method, call_offset);
+        __ code()->shared_stub_to_interp_for(_method, call_offset);
       } else {
         // Emit stubs for static call.
-        address stub = CompiledDirectCall::emit_to_interp_stub(cbuf, mark);
+        address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
+        __ clear_inst_mark();
         if (stub == nullptr) {
           ciEnv::current()->record_failure("CodeCache is full");
           return;
@@ -1830,8 +1795,7 @@ encode %{
   %}
 
   enc_class Java_Dynamic_Call(method meth) %{
-    MacroAssembler _masm(&cbuf);
-    __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
+    __ ic_call((address)$meth$$method, resolved_method_index(masm));
     __ post_call_nop();
   %}
 
@@ -4351,7 +4315,7 @@ instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRe
 
   format %{ "$dst = max($a, $b)\t# intrinsic (float)" %}
   ins_encode %{
-    emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
+    emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
                     false /*min*/, true /*single*/);
   %}
   ins_pipe( pipe_slow );
@@ -4376,7 +4340,7 @@ instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xmmt, rRe
 
   format %{ "$dst = max($a, $b)\t# intrinsic (double)" %}
   ins_encode %{
-    emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
+    emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
                     false /*min*/, false /*single*/);
   %}
   ins_pipe( pipe_slow );
@@ -4401,7 +4365,7 @@ instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRe
 
   format %{ "$dst = min($a, $b)\t# intrinsic (float)" %}
   ins_encode %{
-    emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
+    emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
                     true /*min*/, true /*single*/);
   %}
   ins_pipe( pipe_slow );
@@ -4426,7 +4390,7 @@ instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xmmt, rRe
 
   format %{ "$dst = min($a, $b)\t# intrinsic (double)" %}
   ins_encode %{
-    emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
+    emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
                     true /*min*/, false /*single*/);
   %}
   ins_pipe( pipe_slow );
@@ -9732,7 +9696,7 @@ instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
     "exit:" %}
   ins_encode %{
     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
-    emit_cmpfp_fixup(_masm);
+    emit_cmpfp_fixup(masm);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -9783,7 +9747,7 @@ instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
     "exit:" %}
   ins_encode %{
     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
-    emit_cmpfp_fixup(_masm);
+    emit_cmpfp_fixup(masm);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -9836,7 +9800,7 @@ instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
     "done:" %}
   ins_encode %{
     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
-    emit_cmpfp3(_masm, $dst$$Register);
+    emit_cmpfp3(masm, $dst$$Register);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -9857,7 +9821,7 @@ instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
     "done:" %}
   ins_encode %{
     __ ucomiss($src1$$XMMRegister, $src2$$Address);
-    emit_cmpfp3(_masm, $dst$$Register);
+    emit_cmpfp3(masm, $dst$$Register);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -9877,7 +9841,7 @@ instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
     "done:" %}
   ins_encode %{
     __ ucomiss($src$$XMMRegister, $constantaddress($con));
-    emit_cmpfp3(_masm, $dst$$Register);
+    emit_cmpfp3(masm, $dst$$Register);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -9898,7 +9862,7 @@ instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
     "done:" %}
   ins_encode %{
     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
-    emit_cmpfp3(_masm, $dst$$Register);
+    emit_cmpfp3(masm, $dst$$Register);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -9919,7 +9883,7 @@ instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
     "done:" %}
   ins_encode %{
     __ ucomisd($src1$$XMMRegister, $src2$$Address);
-    emit_cmpfp3(_masm, $dst$$Register);
+    emit_cmpfp3(masm, $dst$$Register);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -9939,7 +9903,7 @@ instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
     "done:" %}
   ins_encode %{
     __ ucomisd($src$$XMMRegister, $constantaddress($con));
-    emit_cmpfp3(_masm, $dst$$Register);
+    emit_cmpfp3(masm, $dst$$Register);
   %}
   ins_pipe(pipe_slow);
 %}
diff --git a/src/hotspot/cpu/zero/c2_MacroAssembler_zero.hpp b/src/hotspot/cpu/zero/c2_MacroAssembler_zero.hpp
new file mode 100644
index 0000000000000..3efa561c8e65c
--- /dev/null
+++ b/src/hotspot/cpu/zero/c2_MacroAssembler_zero.hpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_ZERO_C2_MACROASSEMBLER_ZERO_HPP
+#define CPU_ZERO_C2_MACROASSEMBLER_ZERO_HPP
+
+// C2_MacroAssembler contains high-level macros for C2
+
+#endif // CPU_ZERO_C2_MACROASSEMBLER_ZERO_HPP
diff --git a/src/hotspot/cpu/zero/compiledIC_zero.cpp b/src/hotspot/cpu/zero/compiledIC_zero.cpp
index 24153aeacc5e1..7db93a5f3cfa6 100644
--- a/src/hotspot/cpu/zero/compiledIC_zero.cpp
+++ b/src/hotspot/cpu/zero/compiledIC_zero.cpp
@@ -42,7 +42,7 @@
 
 // ----------------------------------------------------------------------------
 
-address CompiledDirectCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
+address CompiledDirectCall::emit_to_interp_stub(MacroAssembler *masm, address mark) {
   ShouldNotReachHere(); // Only needed for COMPILER2.
   return nullptr;
 }
diff --git a/src/hotspot/share/adlc/adlparse.cpp b/src/hotspot/share/adlc/adlparse.cpp
index 57c6b0c5a99b1..48dea8804f3ff 100644
--- a/src/hotspot/share/adlc/adlparse.cpp
+++ b/src/hotspot/share/adlc/adlparse.cpp
@@ -2896,14 +2896,6 @@ void ADLParser::ins_encode_parse_block(InstructForm& inst) {
     encoding->add_parameter(opForm->_ident, param);
   }
 
-  if (!inst._is_postalloc_expand) {
-    // Define a MacroAssembler instance for use by the encoding.  The
-    // name is chosen to match the __ idiom used for assembly in other
-    // parts of hotspot and assumes the existence of the standard
-    // #define __ _masm.
-    encoding->add_code("    C2_MacroAssembler _masm(&cbuf);\n");
-  }
-
   // Parse the following %{ }% block
   ins_encode_parse_block_impl(inst, encoding, ec_name);
 
diff --git a/src/hotspot/share/adlc/output_c.cpp b/src/hotspot/share/adlc/output_c.cpp
index b54e62663e752..77332b21c0112 100644
--- a/src/hotspot/share/adlc/output_c.cpp
+++ b/src/hotspot/share/adlc/output_c.cpp
@@ -1902,7 +1902,7 @@ void ArchDesc::defineExpand(FILE *fp, InstructForm *node) {
 // target specific instruction object encodings.
 // Define the ___Node::emit() routine
 //
-// (1) void  ___Node::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+// (1) void  ___Node::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 // (2)   // ...  encoding defined by user
 // (3)
 // (4) }
@@ -2301,7 +2301,7 @@ class DefineEmitState {
       // Check results of prior scan
       if ( ! _may_reloc ) {
         // Definitely don't need relocation information
-        fprintf( _fp, "emit_%s(cbuf, ", d32_hi_lo );
+        fprintf( _fp, "emit_%s(masm, ", d32_hi_lo );
         emit_replacement(); fprintf(_fp, ")");
       }
       else {
@@ -2315,26 +2315,26 @@ class DefineEmitState {
         fprintf(_fp,"if ( opnd_array(%d)->%s_reloc() != relocInfo::none ) {\n",
                 _operand_idx, disp_constant);
         fprintf(_fp,"  ");
-        fprintf(_fp,"emit_%s_reloc(cbuf, ", d32_hi_lo );
+        fprintf(_fp,"emit_%s_reloc(masm, ", d32_hi_lo );
         emit_replacement();             fprintf(_fp,", ");
         fprintf(_fp,"opnd_array(%d)->%s_reloc(), ",
                 _operand_idx, disp_constant);
         fprintf(_fp, "%d", _reloc_form);fprintf(_fp, ");");
         fprintf(_fp,"\n");
         fprintf(_fp,"} else {\n");
-        fprintf(_fp,"  emit_%s(cbuf, ", d32_hi_lo);
+        fprintf(_fp,"  emit_%s(masm, ", d32_hi_lo);
         emit_replacement(); fprintf(_fp, ");\n"); fprintf(_fp,"}");
       }
     }
     else if ( _doing_emit_d16 ) {
       // Relocation of 16-bit values is not supported
-      fprintf(_fp,"emit_d16(cbuf, ");
+      fprintf(_fp,"emit_d16(masm, ");
       emit_replacement(); fprintf(_fp, ")");
       // No relocation done for 16-bit values
     }
     else if ( _doing_emit8 ) {
       // Relocation of 8-bit values is not supported
-      fprintf(_fp,"emit_d8(cbuf, ");
+      fprintf(_fp,"emit_d8(masm, ");
       emit_replacement(); fprintf(_fp, ")");
       // No relocation done for 8-bit values
     }
@@ -2675,7 +2675,7 @@ void ArchDesc::defineEmit(FILE* fp, InstructForm& inst) {
 
   // (1)
   // Output instruction's emit prototype
-  fprintf(fp, "void %sNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {\n", inst._ident);
+  fprintf(fp, "void %sNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {\n", inst._ident);
 
   // If user did not define an encode section,
   // provide stub that does not generate any machine code.
@@ -2685,12 +2685,9 @@ void ArchDesc::defineEmit(FILE* fp, InstructForm& inst) {
     return;
   }
 
-  // Save current instruction's starting address (helps with relocation).
-  fprintf(fp, "  cbuf.set_insts_mark();\n");
-
   // For MachConstantNodes which are ideal jump nodes, fill the jump table.
   if (inst.is_mach_constant() && inst.is_ideal_jump()) {
-    fprintf(fp, "  ra_->C->output()->constant_table().fill_jump_table(cbuf, (MachConstantNode*) this, _index2label);\n");
+    fprintf(fp, "  ra_->C->output()->constant_table().fill_jump_table(masm, (MachConstantNode*) this, _index2label);\n");
   }
 
   // Output each operand's offset into the array of registers.
diff --git a/src/hotspot/share/adlc/output_h.cpp b/src/hotspot/share/adlc/output_h.cpp
index 17a0fd0e01e68..9d54d8406887f 100644
--- a/src/hotspot/share/adlc/output_h.cpp
+++ b/src/hotspot/share/adlc/output_h.cpp
@@ -1629,7 +1629,7 @@ void ArchDesc::declareClasses(FILE *fp) {
         fprintf(fp,"  virtual bool           requires_postalloc_expand() const { return true; }\n");
         fprintf(fp,"  virtual void           postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_);\n");
       } else {
-        fprintf(fp,"  virtual void           emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;\n");
+        fprintf(fp,"  virtual void           emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const;\n");
       }
     }
 
diff --git a/src/hotspot/share/asm/assembler.hpp b/src/hotspot/share/asm/assembler.hpp
index a533b96384468..d05278f7fe107 100644
--- a/src/hotspot/share/asm/assembler.hpp
+++ b/src/hotspot/share/asm/assembler.hpp
@@ -227,7 +227,8 @@ class AbstractAssembler : public ResourceObj  {
   bool isByte(int x) const             { return 0 <= x && x < 0x100; }
   bool isShiftCount(int x) const       { return 0 <= x && x < 32; }
 
-  // Instruction boundaries (required when emitting relocatable values).
+  // Mark instruction boundaries, this is required when emitting relocatable values.
+  // Basically, all instructions that directly or indirectly use Assembler::emit_data* methods.
   class InstructionMark: public StackObj {
    private:
     AbstractAssembler* _assm;
@@ -366,6 +367,7 @@ class AbstractAssembler : public ResourceObj  {
   CodeBuffer*   code()         const   { return code_section()->outer(); }
   int           sect()         const   { return code_section()->index(); }
   address       pc()           const   { return code_section()->end();   }
+  address       begin()        const   { return code_section()->start(); }
   int           offset()       const   { return code_section()->size();  }
   int           locator()      const   { return CodeBuffer::locator(offset(), sect()); }
 
@@ -374,10 +376,11 @@ class AbstractAssembler : public ResourceObj  {
 
   void   register_skipped(int size) { code_section()->register_skipped(size); }
 
-  address       inst_mark() const { return code_section()->mark();       }
-  void      set_inst_mark()       {        code_section()->set_mark();   }
-  void    clear_inst_mark()       {        code_section()->clear_mark(); }
-
+  address       inst_mark() const         { return code_section()->mark();          }
+  void      set_inst_mark()               {        code_section()->set_mark();      }
+  void      set_inst_mark(address addr)   {        code_section()->set_mark(addr);  }
+  void    clear_inst_mark()               {        code_section()->clear_mark();    }
+  void set_inst_end(address addr)         {        code_section()->set_end(addr);   }
 
   // Constants in code
   void relocate(RelocationHolder const& rspec, int format = 0) {
@@ -389,6 +392,12 @@ class AbstractAssembler : public ResourceObj  {
   void relocate(   relocInfo::relocType rtype, int format = 0) {
     code_section()->relocate(code_section()->end(), rtype, format);
   }
+  void relocate(address addr, relocInfo::relocType rtype, int format = 0) {
+    code_section()->relocate(addr, rtype, format);
+  }
+  void relocate(address addr, RelocationHolder const& rspec, int format = 0) {
+    code_section()->relocate(addr, rspec, format);
+  }
 
   static int code_fill_byte();         // used to pad out odd-sized code buffers
 
diff --git a/src/hotspot/share/code/compiledIC.hpp b/src/hotspot/share/code/compiledIC.hpp
index 4439ff958f725..22b93c1760aa5 100644
--- a/src/hotspot/share/code/compiledIC.hpp
+++ b/src/hotspot/share/code/compiledIC.hpp
@@ -28,6 +28,7 @@
 #include "code/nativeInst.hpp"
 #include "interpreter/linkResolver.hpp"
 #include "runtime/safepointVerifiers.hpp"
+#include "opto/c2_MacroAssembler.hpp"
 
 //-----------------------------------------------------------------------------
 // The CompiledIC represents a compiled inline cache.
@@ -185,7 +186,7 @@ class CompiledDirectCall : public ResourceObj {
 
  public:
   // Returns null if CodeBuffer::expand fails
-  static address emit_to_interp_stub(CodeBuffer &cbuf, address mark = nullptr);
+  static address emit_to_interp_stub(MacroAssembler *masm, address mark = nullptr);
   static int to_interp_stub_size();
   static int to_trampoline_stub_size();
   static int reloc_to_interp_stub();
diff --git a/src/hotspot/share/jvmci/jvmciCodeInstaller.cpp b/src/hotspot/share/jvmci/jvmciCodeInstaller.cpp
index 9b1cb351a687e..f59d049462b39 100644
--- a/src/hotspot/share/jvmci/jvmciCodeInstaller.cpp
+++ b/src/hotspot/share/jvmci/jvmciCodeInstaller.cpp
@@ -1243,7 +1243,8 @@ void CodeInstaller::site_Call(CodeBuffer& buffer, u1 tag, jint pc_offset, HotSpo
     CodeInstaller::pd_relocate_JavaMethod(buffer, method, pc_offset, JVMCI_CHECK);
     if (_next_call_type == INVOKESTATIC || _next_call_type == INVOKESPECIAL) {
       // Need a static call stub for transitions from compiled to interpreted.
-      if (CompiledDirectCall::emit_to_interp_stub(buffer, _instructions->start() + pc_offset) == nullptr) {
+      MacroAssembler masm(&buffer);
+      if (CompiledDirectCall::emit_to_interp_stub(&masm, _instructions->start() + pc_offset) == nullptr) {
         JVMCI_ERROR("could not emit to_interp stub - code cache is full");
       }
     }
diff --git a/src/hotspot/share/opto/c2_CodeStubs.cpp b/src/hotspot/share/opto/c2_CodeStubs.cpp
index f96f086d7a142..793e915da7d66 100644
--- a/src/hotspot/share/opto/c2_CodeStubs.cpp
+++ b/src/hotspot/share/opto/c2_CodeStubs.cpp
@@ -33,22 +33,21 @@
 C2CodeStubList::C2CodeStubList() :
   _stubs(Compile::current()->comp_arena(), 2, 0, nullptr) {}
 
-void C2CodeStubList::emit(CodeBuffer& cb) {
-  C2_MacroAssembler masm(&cb);
+void C2CodeStubList::emit(C2_MacroAssembler& masm) {
   for (int i = _stubs.length() - 1; i >= 0; i--) {
     C2CodeStub* stub = _stubs.at(i);
     int max_size = stub->max_size();
     // Make sure there is enough space in the code buffer
-    if (cb.insts()->maybe_expand_to_ensure_remaining(max_size) && cb.blob() == nullptr) {
+    if (masm.code()->insts()->maybe_expand_to_ensure_remaining(max_size) && masm.code()->blob() == nullptr) {
       ciEnv::current()->record_failure("CodeCache is full");
       return;
     }
 
-    DEBUG_ONLY(int size_before = cb.insts_size();)
+    DEBUG_ONLY(int size_before = masm.offset();)
 
     stub->emit(masm);
 
-    DEBUG_ONLY(int actual_size = cb.insts_size() - size_before;)
+    DEBUG_ONLY(int actual_size = masm.offset() - size_before;)
     assert(max_size >= actual_size, "Expected stub size (%d) must be larger than or equal to actual stub size (%d)", max_size, actual_size);
   }
 }
diff --git a/src/hotspot/share/opto/c2_CodeStubs.hpp b/src/hotspot/share/opto/c2_CodeStubs.hpp
index 83d170810703d..1316fa68ed430 100644
--- a/src/hotspot/share/opto/c2_CodeStubs.hpp
+++ b/src/hotspot/share/opto/c2_CodeStubs.hpp
@@ -68,7 +68,7 @@ class C2CodeStubList {
   C2CodeStubList();
 
   void add_stub(C2CodeStub* stub) { _stubs.append(stub); }
-  void emit(CodeBuffer& cb);
+  void emit(C2_MacroAssembler& masm);
 };
 
 class C2SafepointPollStub : public C2CodeStub {
diff --git a/src/hotspot/share/opto/cfgnode.hpp b/src/hotspot/share/opto/cfgnode.hpp
index 4bc4962ccf000..869ac68bd9e56 100644
--- a/src/hotspot/share/opto/cfgnode.hpp
+++ b/src/hotspot/share/opto/cfgnode.hpp
@@ -687,7 +687,7 @@ class NeverBranchNode : public MultiBranchNode {
   virtual const Type* Value(PhaseGVN* phase) const;
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
   virtual int required_outcnt() const { return 2; }
-  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { }
+  virtual void emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { }
   virtual uint size(PhaseRegAlloc *ra_) const { return 0; }
 #ifndef PRODUCT
   virtual void format( PhaseRegAlloc *, outputStream *st ) const;
diff --git a/src/hotspot/share/opto/constantTable.cpp b/src/hotspot/share/opto/constantTable.cpp
index 354fe27d30b84..66cf810547684 100644
--- a/src/hotspot/share/opto/constantTable.cpp
+++ b/src/hotspot/share/opto/constantTable.cpp
@@ -144,28 +144,27 @@ void ConstantTable::calculate_offsets_and_size() {
   _size = align_up(offset, (int)CodeEntryAlignment);
 }
 
-bool ConstantTable::emit(CodeBuffer& cb) const {
-  MacroAssembler _masm(&cb);
+bool ConstantTable::emit(C2_MacroAssembler* masm) const {
   for (int i = 0; i < _constants.length(); i++) {
     Constant con = _constants.at(i);
     address constant_addr = nullptr;
     if (con.is_array()) {
-      constant_addr = _masm.array_constant(con.type(), con.get_array(), con.alignment());
+      constant_addr = masm->array_constant(con.type(), con.get_array(), con.alignment());
     } else {
       switch (con.type()) {
-      case T_INT:    constant_addr = _masm.int_constant(   con.get_jint()   ); break;
-      case T_LONG:   constant_addr = _masm.long_constant(  con.get_jlong()  ); break;
-      case T_FLOAT:  constant_addr = _masm.float_constant( con.get_jfloat() ); break;
-      case T_DOUBLE: constant_addr = _masm.double_constant(con.get_jdouble()); break;
+      case T_INT:    constant_addr = masm->int_constant(   con.get_jint()   ); break;
+      case T_LONG:   constant_addr = masm->long_constant(  con.get_jlong()  ); break;
+      case T_FLOAT:  constant_addr = masm->float_constant( con.get_jfloat() ); break;
+      case T_DOUBLE: constant_addr = masm->double_constant(con.get_jdouble()); break;
       case T_OBJECT: {
         jobject obj = con.get_jobject();
-        int oop_index = _masm.oop_recorder()->find_index(obj);
-        constant_addr = _masm.address_constant((address) obj, oop_Relocation::spec(oop_index));
+        int oop_index = masm->oop_recorder()->find_index(obj);
+        constant_addr = masm->address_constant((address) obj, oop_Relocation::spec(oop_index));
         break;
       }
       case T_ADDRESS: {
         address addr = (address) con.get_jobject();
-        constant_addr = _masm.address_constant(addr);
+        constant_addr = masm->address_constant(addr);
         break;
       }
       // We use T_VOID as marker for jump-table entries (labels) which
@@ -175,23 +174,23 @@ bool ConstantTable::emit(CodeBuffer& cb) const {
         // Fill the jump-table with a dummy word.  The real value is
         // filled in later in fill_jump_table.
         address dummy = (address) n;
-        constant_addr = _masm.address_constant(dummy);
+        constant_addr = masm->address_constant(dummy);
         if (constant_addr == nullptr) {
           return false;
         }
-        assert((constant_addr - _masm.code()->consts()->start()) == con.offset(),
-              "must be: %d == %d", (int)(constant_addr - _masm.code()->consts()->start()), (int)(con.offset()));
+        assert((constant_addr - masm->code()->consts()->start()) == con.offset(),
+              "must be: %d == %d", (int)(constant_addr - masm->code()->consts()->start()), (int)(con.offset()));
 
         // Expand jump-table
         address last_addr = nullptr;
         for (uint j = 1; j < n->outcnt(); j++) {
-          last_addr = _masm.address_constant(dummy + j);
+          last_addr = masm->address_constant(dummy + j);
           if (last_addr == nullptr) {
             return false;
           }
         }
 #ifdef ASSERT
-        address start = _masm.code()->consts()->start();
+        address start = masm->code()->consts()->start();
         address new_constant_addr = last_addr - ((n->outcnt() - 1) * sizeof(address));
         // Expanding the jump-table could result in an expansion of the const code section.
         // In that case, we need to check if the new constant address matches the offset.
@@ -203,8 +202,8 @@ bool ConstantTable::emit(CodeBuffer& cb) const {
       }
       case T_METADATA: {
         Metadata* obj = con.get_metadata();
-        int metadata_index = _masm.oop_recorder()->find_index(obj);
-        constant_addr = _masm.address_constant((address) obj, metadata_Relocation::spec(metadata_index));
+        int metadata_index = masm->oop_recorder()->find_index(obj);
+        constant_addr = masm->address_constant((address) obj, metadata_Relocation::spec(metadata_index));
         break;
       }
       default: ShouldNotReachHere();
@@ -214,8 +213,8 @@ bool ConstantTable::emit(CodeBuffer& cb) const {
     if (constant_addr == nullptr) {
       return false;
     }
-    assert((constant_addr - _masm.code()->consts()->start()) == con.offset(),
-            "must be: %d == %d", (int)(constant_addr - _masm.code()->consts()->start()), (int)(con.offset()));
+    assert((constant_addr - masm->code()->consts()->start()) == con.offset(),
+            "must be: %d == %d", (int)(constant_addr - masm->code()->consts()->start()), (int)(con.offset()));
   }
   return true;
 }
@@ -292,7 +291,7 @@ ConstantTable::Constant ConstantTable::add_jump_table(MachConstantNode* n) {
   return con;
 }
 
-void ConstantTable::fill_jump_table(CodeBuffer& cb, MachConstantNode* n, GrowableArray<Label*> labels) const {
+void ConstantTable::fill_jump_table(C2_MacroAssembler* masm, MachConstantNode* n, GrowableArray<Label*> labels) const {
   // If called from Compile::scratch_emit_size do nothing.
   if (Compile::current()->output()->in_scratch_emit_size())  return;
 
@@ -304,13 +303,12 @@ void ConstantTable::fill_jump_table(CodeBuffer& cb, MachConstantNode* n, Growabl
   // to get the plain offset into the constant table.
   int offset = n->constant_offset() - table_base_offset();
 
-  MacroAssembler _masm(&cb);
-  address* jump_table_base = (address*) (_masm.code()->consts()->start() + offset);
+  address* jump_table_base = (address*) (masm->code()->consts()->start() + offset);
 
   for (uint i = 0; i < n->outcnt(); i++) {
     address* constant_addr = &jump_table_base[i];
     assert(*constant_addr == (((address) n) + i), "all jump-table entries must contain adjusted node pointer: " INTPTR_FORMAT " == " INTPTR_FORMAT, p2i(*constant_addr), p2i(((address) n) + i));
-    *constant_addr = cb.consts()->target(*labels.at(i), (address) constant_addr);
-    cb.consts()->relocate((address) constant_addr, relocInfo::internal_word_type);
+    *constant_addr = masm->code()->consts()->target(*labels.at(i), (address) constant_addr);
+    masm->code()->consts()->relocate((address) constant_addr, relocInfo::internal_word_type);
   }
 }
diff --git a/src/hotspot/share/opto/constantTable.hpp b/src/hotspot/share/opto/constantTable.hpp
index 001193f6d0de2..f7197783773e3 100644
--- a/src/hotspot/share/opto/constantTable.hpp
+++ b/src/hotspot/share/opto/constantTable.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,10 +27,10 @@
 
 #include "utilities/globalDefinitions.hpp"
 
-class CodeBuffer;
 class Metadata;
 class MachConstantNode;
 class MachOper;
+class C2_MacroAssembler;
 
 class ConstantTable {
 public:
@@ -139,7 +139,7 @@ class ConstantTable {
   void set_table_base_offset(int x)  { assert(_table_base_offset == -1 || x == _table_base_offset, "can't change"); _table_base_offset = x; }
   int      table_base_offset() const { assert(_table_base_offset != -1, "not set yet");                      return _table_base_offset; }
 
-  bool emit(CodeBuffer& cb) const;
+  bool emit(C2_MacroAssembler* masm) const;
 
   // Returns the offset of the last entry (the top) of the constant table.
   int  top_offset() const { assert(_constants.top().offset() != -1, "not bound yet"); return _constants.top().offset(); }
@@ -172,7 +172,7 @@ class ConstantTable {
 
   // Jump-table
   Constant  add_jump_table(MachConstantNode* n);
-  void     fill_jump_table(CodeBuffer& cb, MachConstantNode* n, GrowableArray<Label*> labels) const;
+  void     fill_jump_table(C2_MacroAssembler* masm, MachConstantNode* n, GrowableArray<Label*> labels) const;
 };
 
 
diff --git a/src/hotspot/share/opto/locknode.hpp b/src/hotspot/share/opto/locknode.hpp
index 3bc684c40a9da..fcc8da0eb343e 100644
--- a/src/hotspot/share/opto/locknode.hpp
+++ b/src/hotspot/share/opto/locknode.hpp
@@ -68,7 +68,7 @@ class BoxLockNode : public Node {
 public:
   BoxLockNode( int lock );
   virtual int Opcode() const;
-  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+  virtual void emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const;
   virtual uint size(PhaseRegAlloc *ra_) const;
   virtual const RegMask &in_RegMask(uint) const;
   virtual const RegMask &out_RegMask() const;
diff --git a/src/hotspot/share/opto/machnode.cpp b/src/hotspot/share/opto/machnode.cpp
index 173a38fa9d854..b2c0028b1a3ab 100644
--- a/src/hotspot/share/opto/machnode.cpp
+++ b/src/hotspot/share/opto/machnode.cpp
@@ -132,7 +132,7 @@ bool methodOper::cmp( const MachOper &oper ) const {
 //------------------------------MachNode---------------------------------------
 
 //------------------------------emit-------------------------------------------
-void MachNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+void MachNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
   #ifdef ASSERT
   tty->print("missing MachNode emit function: ");
   dump();
@@ -604,7 +604,7 @@ void MachNullCheckNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
 }
 #endif
 
-void MachNullCheckNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+void MachNullCheckNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
   // only emits entries in the null-pointer exception handler table
 }
 void MachNullCheckNode::label_set(Label* label, uint block_num) {
diff --git a/src/hotspot/share/opto/machnode.hpp b/src/hotspot/share/opto/machnode.hpp
index b834f994df45a..677e1bcd1d1a5 100644
--- a/src/hotspot/share/opto/machnode.hpp
+++ b/src/hotspot/share/opto/machnode.hpp
@@ -25,6 +25,7 @@
 #ifndef SHARE_OPTO_MACHNODE_HPP
 #define SHARE_OPTO_MACHNODE_HPP
 
+#include "opto/c2_MacroAssembler.hpp"
 #include "opto/callnode.hpp"
 #include "opto/constantTable.hpp"
 #include "opto/matcher.hpp"
@@ -34,7 +35,6 @@
 #include "utilities/growableArray.hpp"
 
 class BufferBlob;
-class CodeBuffer;
 class JVMState;
 class MachCallDynamicJavaNode;
 class MachCallJavaNode;
@@ -284,8 +284,8 @@ class MachNode : public Node {
   MachOper **_opnds;
   uint16_t num_opnds() const { return _num_opnds; }
 
-  // Emit bytes into cbuf
-  virtual void  emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+  // Emit bytes using C2_MacroAssembler
+  virtual void  emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const;
   // Expand node after register allocation.
   // Node is replaced by several nodes in the postalloc expand phase.
   // Corresponding methods are generated for nodes if they specify
@@ -421,7 +421,7 @@ class MachTypeNode : public MachNode {
 class MachBreakpointNode : public MachIdealNode {
 public:
   MachBreakpointNode( ) {}
-  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+  virtual void emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const;
   virtual uint size(PhaseRegAlloc *ra_) const;
 
 #ifndef PRODUCT
@@ -447,7 +447,7 @@ class MachConstantBaseNode : public MachIdealNode {
   virtual bool requires_postalloc_expand() const;
   virtual void postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_);
 
-  virtual void emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const;
+  virtual void emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const;
   virtual uint size(PhaseRegAlloc* ra_) const;
 
   static const RegMask& static_out_RegMask() { return _out_RegMask; }
@@ -498,7 +498,7 @@ class MachConstantNode : public MachTypeNode {
 class MachUEPNode : public MachIdealNode {
 public:
   MachUEPNode( ) {}
-  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+  virtual void emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const;
   virtual uint size(PhaseRegAlloc *ra_) const;
 
 #ifndef PRODUCT
@@ -512,7 +512,7 @@ class MachUEPNode : public MachIdealNode {
 class MachPrologNode : public MachIdealNode {
 public:
   MachPrologNode( ) {}
-  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+  virtual void emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const;
   virtual uint size(PhaseRegAlloc *ra_) const;
   virtual int reloc() const;
 
@@ -527,7 +527,7 @@ class MachPrologNode : public MachIdealNode {
 class MachEpilogNode : public MachIdealNode {
 public:
   MachEpilogNode(bool do_poll = false) : _do_polling(do_poll) {}
-  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+  virtual void emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const;
   virtual uint size(PhaseRegAlloc *ra_) const;
   virtual int reloc() const;
   virtual const Pipeline *pipeline() const;
@@ -552,7 +552,7 @@ class MachNopNode : public MachIdealNode {
 public:
   MachNopNode( ) : _count(1) {}
   MachNopNode( int count ) : _count(count) {}
-  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+  virtual void emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const;
   virtual uint size(PhaseRegAlloc *ra_) const;
 
   virtual const class Type *bottom_type() const { return Type::CONTROL; }
@@ -610,9 +610,9 @@ class MachSpillCopyNode : public MachIdealNode {
   virtual const class Type *bottom_type() const { return _type; }
   virtual uint ideal_reg() const { return _type->ideal_reg(); }
   virtual uint oper_input_base() const { return 1; }
-  uint implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const;
+  uint implementation( C2_MacroAssembler *masm, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const;
 
-  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+  virtual void emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const;
   virtual uint size(PhaseRegAlloc *ra_) const;
 
 
@@ -675,7 +675,7 @@ class MachMergeNode : public MachIdealNode {
   virtual const class Type *bottom_type() const { return in(1)->bottom_type(); }
   virtual uint ideal_reg() const { return bottom_type()->ideal_reg(); }
   virtual uint oper_input_base() const { return 1; }
-  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { }
+  virtual void emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { }
   virtual uint size(PhaseRegAlloc *ra_) const { return 0; }
 #ifndef PRODUCT
   virtual const char *Name() const { return "MachMerge"; }
@@ -715,7 +715,7 @@ class MachNullCheckNode : public MachBranchNode {
   virtual int Opcode() const;
   virtual uint size_of() const { return sizeof(*this); }
 
-  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+  virtual void emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const;
   virtual void label_set(Label* label, uint block_num);
   virtual void save_label(Label** label, uint* block_num);
   virtual void negate() { }
@@ -946,13 +946,13 @@ class MachCallJavaNode : public MachCallNode {
 
   virtual const RegMask &in_RegMask(uint) const;
 
-  int resolved_method_index(CodeBuffer &cbuf) const {
+  int resolved_method_index(C2_MacroAssembler *masm) const {
     if (_override_symbolic_info) {
       // Attach corresponding Method* to the call site, so VM can use it during resolution
       // instead of querying symbolic info from bytecode.
       assert(_method != nullptr, "method should be set");
       assert(_method->constant_encoding()->is_method(), "should point to a Method");
-      return cbuf.oop_recorder()->find_index(_method->constant_encoding());
+      return masm->code()->oop_recorder()->find_index(_method->constant_encoding());
     }
     return 0; // Use symbolic info from bytecode (resolved_method is null).
   }
@@ -1057,7 +1057,7 @@ class MachTempNode : public MachNode {
 public:
   virtual const RegMask &out_RegMask() const { return *_opnds[0]->in_RegMask(0); }
   virtual uint rule() const { return 9999999; }
-  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {}
+  virtual void emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {}
 
   MachTempNode(MachOper* oper) {
     init_class_id(Class_MachTemp);
diff --git a/src/hotspot/share/opto/node.cpp b/src/hotspot/share/opto/node.cpp
index dee4ce80d35d2..cb3b519ba5d8c 100644
--- a/src/hotspot/share/opto/node.cpp
+++ b/src/hotspot/share/opto/node.cpp
@@ -1092,8 +1092,8 @@ juint Node::max_flags() {
 // Print as assembly
 void Node::format( PhaseRegAlloc *, outputStream *st ) const {}
 //------------------------------emit-------------------------------------------
-// Emit bytes starting at parameter 'ptr'.
-void Node::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {}
+// Emit bytes using C2_MacroAssembler
+void Node::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {}
 //------------------------------size-------------------------------------------
 // Size of instruction in bytes
 uint Node::size(PhaseRegAlloc *ra_) const { return 0; }
diff --git a/src/hotspot/share/opto/node.hpp b/src/hotspot/share/opto/node.hpp
index 14cf14366719d..19b9a11d9ce4e 100644
--- a/src/hotspot/share/opto/node.hpp
+++ b/src/hotspot/share/opto/node.hpp
@@ -191,6 +191,7 @@ class ShiftVNode;
 class ExpandVNode;
 class CompressVNode;
 class CompressMNode;
+class C2_MacroAssembler;
 
 
 #ifndef OPTO_DU_ITERATOR_ASSERT
@@ -1181,9 +1182,8 @@ class Node {
 
   // Print as assembly
   virtual void format( PhaseRegAlloc *, outputStream* st = tty ) const;
-  // Emit bytes starting at parameter 'ptr'
-  // Bump 'ptr' by the number of output bytes
-  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+  // Emit bytes using C2_MacroAssembler
+  virtual void emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const;
   // Size of instruction in bytes
   virtual uint size(PhaseRegAlloc *ra_) const;
 
diff --git a/src/hotspot/share/opto/output.cpp b/src/hotspot/share/opto/output.cpp
index 640a24693dedd..a3b9926346942 100644
--- a/src/hotspot/share/opto/output.cpp
+++ b/src/hotspot/share/opto/output.cpp
@@ -363,7 +363,8 @@ void PhaseOutput::Output() {
     return;
   }
 
-  fill_buffer(cb, blk_starts);
+  C2_MacroAssembler masm(cb);
+  fill_buffer(&masm, blk_starts);
 }
 
 bool PhaseOutput::need_stack_bang(int frame_size_in_bytes) const {
@@ -1368,7 +1369,7 @@ CodeBuffer* PhaseOutput::init_buffer() {
 }
 
 //------------------------------fill_buffer------------------------------------
-void PhaseOutput::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
+void PhaseOutput::fill_buffer(C2_MacroAssembler* masm, uint* blk_starts) {
   // blk_starts[] contains offsets calculated during short branches processing,
   // offsets should not be increased during following steps.
 
@@ -1424,7 +1425,7 @@ void PhaseOutput::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
 
   // Emit the constant table.
   if (C->has_mach_constant_base_node()) {
-    if (!constant_table().emit(*cb)) {
+    if (!constant_table().emit(masm)) {
       C->record_failure("consts section overflow");
       return;
     }
@@ -1447,14 +1448,14 @@ void PhaseOutput::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
     // than by falling-thru from the previous block), then force the
     // start of a new bundle.
     if (Pipeline::requires_bundling() && starts_bundle(head)) {
-      cb->flush_bundle(true);
+      masm->code()->flush_bundle(true);
     }
 
 #ifdef ASSERT
     if (!block->is_connector()) {
       stringStream st;
       block->dump_head(C->cfg(), &st);
-      MacroAssembler(cb).block_comment(st.freeze());
+      masm->block_comment(st.freeze());
     }
     jmp_target[i] = 0;
     jmp_offset[i] = 0;
@@ -1464,7 +1465,7 @@ void PhaseOutput::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
     int blk_offset = current_offset;
 
     // Define the label at the beginning of the basic block
-    MacroAssembler(cb).bind(blk_labels[block->_pre_order]);
+    masm->bind(blk_labels[block->_pre_order]);
 
     uint last_inst = block->number_of_nodes();
 
@@ -1488,7 +1489,7 @@ void PhaseOutput::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
       // If this starts a new instruction group, then flush the current one
       // (but allow split bundles)
       if (Pipeline::requires_bundling() && starts_bundle(n))
-        cb->flush_bundle(false);
+        masm->code()->flush_bundle(false);
 
       // Special handling for SafePoint/Call Nodes
       bool is_mcall = false;
@@ -1499,8 +1500,8 @@ void PhaseOutput::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
 
         // If this requires all previous instructions be flushed, then do so
         if (is_sfn || is_mcall || mach->alignment_required() != 1) {
-          cb->flush_bundle(true);
-          current_offset = cb->insts_size();
+          masm->code()->flush_bundle(true);
+          current_offset = masm->offset();
         }
 
         // A padding may be needed again since a previous instruction
@@ -1527,14 +1528,14 @@ void PhaseOutput::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
           last_inst++;
           C->cfg()->map_node_to_block(nop, block);
           // Ensure enough space.
-          cb->insts()->maybe_expand_to_ensure_remaining(MAX_inst_size);
-          if ((cb->blob() == nullptr) || (!CompileBroker::should_compile_new_jobs())) {
+          masm->code()->insts()->maybe_expand_to_ensure_remaining(MAX_inst_size);
+          if ((masm->code()->blob() == nullptr) || (!CompileBroker::should_compile_new_jobs())) {
             C->record_failure("CodeCache is full");
             return;
           }
-          nop->emit(*cb, C->regalloc());
-          cb->flush_bundle(true);
-          current_offset = cb->insts_size();
+          nop->emit(masm, C->regalloc());
+          masm->code()->flush_bundle(true);
+          current_offset = masm->offset();
         }
 
         bool observe_safepoint = is_sfn;
@@ -1612,9 +1613,9 @@ void PhaseOutput::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
                 block->insert_node(nop, j++);
                 C->cfg()->map_node_to_block(nop, block);
                 last_inst++;
-                nop->emit(*cb, C->regalloc());
-                cb->flush_bundle(true);
-                current_offset = cb->insts_size();
+                nop->emit(masm, C->regalloc());
+                masm->code()->flush_bundle(true);
+                current_offset = masm->offset();
               }
 #ifdef ASSERT
               jmp_target[i] = block_num;
@@ -1679,8 +1680,8 @@ void PhaseOutput::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
       }
 
       // Verify that there is sufficient space remaining
-      cb->insts()->maybe_expand_to_ensure_remaining(MAX_inst_size);
-      if ((cb->blob() == nullptr) || (!CompileBroker::should_compile_new_jobs())) {
+      masm->code()->insts()->maybe_expand_to_ensure_remaining(MAX_inst_size);
+      if ((masm->code()->blob() == nullptr) || (!CompileBroker::should_compile_new_jobs())) {
         C->record_failure("CodeCache is full");
         return;
       }
@@ -1688,15 +1689,15 @@ void PhaseOutput::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
       // Save the offset for the listing
 #if defined(SUPPORT_OPTO_ASSEMBLY)
       if ((node_offsets != nullptr) && (n->_idx < node_offset_limit)) {
-        node_offsets[n->_idx] = cb->insts_size();
+        node_offsets[n->_idx] = masm->offset();
       }
 #endif
       assert(!C->failing(), "Should not reach here if failing.");
 
       // "Normal" instruction case
-      DEBUG_ONLY(uint instr_offset = cb->insts_size());
-      n->emit(*cb, C->regalloc());
-      current_offset = cb->insts_size();
+      DEBUG_ONLY(uint instr_offset = masm->offset());
+      n->emit(masm, C->regalloc());
+      current_offset = masm->offset();
 
       // Above we only verified that there is enough space in the instruction section.
       // However, the instruction may emit stubs that cause code buffer expansion.
@@ -1715,7 +1716,7 @@ void PhaseOutput::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
         n->dump();
         mach->dump_format(C->regalloc(), tty);
         tty->print_cr(" n_size (%d), current_offset (%d), instr_offset (%d)", n_size, current_offset, instr_offset);
-        Disassembler::decode(cb->insts_begin() + instr_offset, cb->insts_begin() + current_offset + 1, tty);
+        Disassembler::decode(masm->code()->insts_begin() + instr_offset, masm->code()->insts_begin() + current_offset + 1, tty);
         tty->print_cr(" ------------------- ");
         BufferBlob* blob = this->scratch_buffer_blob();
         address blob_begin = blob->content_begin();
@@ -1746,12 +1747,12 @@ void PhaseOutput::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
         guarantee(delay_slot != nullptr, "expecting delay slot node");
 
         // Back up 1 instruction
-        cb->set_insts_end(cb->insts_end() - Pipeline::instr_unit_size());
+        masm->code()->set_insts_end(masm->code()->insts_end() - Pipeline::instr_unit_size());
 
         // Save the offset for the listing
 #if defined(SUPPORT_OPTO_ASSEMBLY)
         if ((node_offsets != nullptr) && (delay_slot->_idx < node_offset_limit)) {
-          node_offsets[delay_slot->_idx] = cb->insts_size();
+          node_offsets[delay_slot->_idx] = masm->offset();
         }
 #endif
 
@@ -1773,7 +1774,7 @@ void PhaseOutput::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
         }
 
         // Insert the delay slot instruction
-        delay_slot->emit(*cb, C->regalloc());
+        delay_slot->emit(masm, C->regalloc());
 
         // Don't reuse it
         delay_slot = nullptr;
@@ -1790,8 +1791,8 @@ void PhaseOutput::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
         MachNode *nop = new MachNopNode(padding / nop_size);
         block->insert_node(nop, block->number_of_nodes());
         C->cfg()->map_node_to_block(nop, block);
-        nop->emit(*cb, C->regalloc());
-        current_offset = cb->insts_size();
+        nop->emit(masm, C->regalloc());
+        current_offset = masm->offset();
       }
     }
     // Verify that the distance for generated before forward
@@ -1809,7 +1810,7 @@ void PhaseOutput::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
   if (C->failing())  return;
 
   // Define a pseudo-label at the end of the code
-  MacroAssembler(cb).bind( blk_labels[nblocks] );
+  masm->bind( blk_labels[nblocks] );
 
   // Compute the size of the first block
   _first_block_size = blk_labels[1].loc_pos() - blk_labels[0].loc_pos();
@@ -1827,22 +1828,23 @@ void PhaseOutput::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
   }
 #endif
 
-  if (!cb->finalize_stubs()) {
+  if (!masm->code()->finalize_stubs()) {
     C->record_failure("CodeCache is full");
     return;
   }
 
   BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
-  bs->emit_stubs(*cb);
+  bs->emit_stubs(*masm->code());
   if (C->failing())  return;
 
   // Fill in stubs.
-  _stub_list.emit(*cb);
+  assert(masm->inst_mark() == nullptr, "should be.");
+  _stub_list.emit(*masm);
   if (C->failing())  return;
 
 #ifndef PRODUCT
   // Information on the size of the method, without the extraneous code
-  Scheduling::increment_method_size(cb->insts_size());
+  Scheduling::increment_method_size(masm->offset());
 #endif
 
   // ------------------
@@ -1853,23 +1855,23 @@ void PhaseOutput::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
   // class HandlerImpl is platform-specific and defined in the *.ad files.
   if (C->method()) {
     // Emit the exception handler code.
-    _code_offsets.set_value(CodeOffsets::Exceptions, HandlerImpl::emit_exception_handler(*cb));
+    _code_offsets.set_value(CodeOffsets::Exceptions, HandlerImpl::emit_exception_handler(masm));
     if (C->failing()) {
       return; // CodeBuffer::expand failed
     }
     // Emit the deopt handler code.
-    _code_offsets.set_value(CodeOffsets::Deopt, HandlerImpl::emit_deopt_handler(*cb));
+    _code_offsets.set_value(CodeOffsets::Deopt, HandlerImpl::emit_deopt_handler(masm));
 
     // Emit the MethodHandle deopt handler code (if required).
     if (C->has_method_handle_invokes() && !C->failing()) {
       // We can use the same code as for the normal deopt handler, we
       // just need a different entry point address.
-      _code_offsets.set_value(CodeOffsets::DeoptMH, HandlerImpl::emit_deopt_handler(*cb));
+      _code_offsets.set_value(CodeOffsets::DeoptMH, HandlerImpl::emit_deopt_handler(masm));
     }
   }
 
   // One last check for failed CodeBuffer::expand:
-  if ((cb->blob() == nullptr) || (!CompileBroker::should_compile_new_jobs())) {
+  if ((masm->code()->blob() == nullptr) || (!CompileBroker::should_compile_new_jobs())) {
     C->record_failure("CodeCache is full");
     return;
   }
@@ -3357,13 +3359,13 @@ uint PhaseOutput::scratch_emit_size(const Node* n) {
   Label*   saveL = nullptr;
   uint save_bnum = 0;
   bool is_branch = n->is_MachBranch();
+  C2_MacroAssembler masm(&buf);
+  masm.bind(fakeL);
   if (is_branch) {
-    MacroAssembler masm(&buf);
-    masm.bind(fakeL);
     n->as_MachBranch()->save_label(&saveL, &save_bnum);
     n->as_MachBranch()->label_set(&fakeL, 0);
   }
-  n->emit(buf, C->regalloc());
+  n->emit(&masm, C->regalloc());
 
   // Emitting into the scratch buffer should not fail
   assert (!C->failing(), "Must not have pending failure. Reason is: %s", C->failure_reason());
diff --git a/src/hotspot/share/opto/output.hpp b/src/hotspot/share/opto/output.hpp
index 363520834192a..b58b97e77455e 100644
--- a/src/hotspot/share/opto/output.hpp
+++ b/src/hotspot/share/opto/output.hpp
@@ -154,7 +154,7 @@ class PhaseOutput : public Phase {
   CodeBuffer* init_buffer();
 
   // Write out basic block data to code buffer
-  void fill_buffer(CodeBuffer* cb, uint* blk_starts);
+  void fill_buffer(C2_MacroAssembler* masm, uint* blk_starts);
 
   // Compute the information for the exception tables
   void FillExceptionTables(uint cnt, uint *call_returns, uint *inct_starts, Label *blk_labels);

From 5808f30b89382af22027c43ebf14e36b0c16f041 Mon Sep 17 00:00:00 2001
From: Albert Mingkun Yang <ayang@openjdk.org>
Date: Thu, 11 Apr 2024 19:22:11 +0000
Subject: [PATCH 03/32] 8330026: Serial: Move some includes to
 vmStructs_serial.hpp

Reviewed-by: kbarrett
---
 src/hotspot/share/gc/serial/vmStructs_serial.hpp | 2 ++
 src/hotspot/share/gc/shared/vmStructs_gc.hpp     | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/hotspot/share/gc/serial/vmStructs_serial.hpp b/src/hotspot/share/gc/serial/vmStructs_serial.hpp
index f7bb570b60163..d1e29a426325c 100644
--- a/src/hotspot/share/gc/serial/vmStructs_serial.hpp
+++ b/src/hotspot/share/gc/serial/vmStructs_serial.hpp
@@ -26,6 +26,8 @@
 #define SHARE_GC_SERIAL_VMSTRUCTS_SERIAL_HPP
 
 #include "gc/serial/cardTableRS.hpp"
+#include "gc/serial/defNewGeneration.hpp"
+#include "gc/serial/generation.hpp"
 #include "gc/serial/serialHeap.hpp"
 #include "gc/serial/tenuredGeneration.hpp"
 
diff --git a/src/hotspot/share/gc/shared/vmStructs_gc.hpp b/src/hotspot/share/gc/shared/vmStructs_gc.hpp
index 4c1d7b6b2aa00..786f7b635e77e 100644
--- a/src/hotspot/share/gc/shared/vmStructs_gc.hpp
+++ b/src/hotspot/share/gc/shared/vmStructs_gc.hpp
@@ -40,8 +40,6 @@
 #include "gc/parallel/vmStructs_parallelgc.hpp"
 #endif
 #if INCLUDE_SERIALGC
-#include "gc/serial/defNewGeneration.hpp"
-#include "gc/serial/generation.hpp"
 #include "gc/serial/vmStructs_serial.hpp"
 #endif
 #if INCLUDE_SHENANDOAHGC

From 0db42906e390a98b3a6be078f1b8c3f2a03a838f Mon Sep 17 00:00:00 2001
From: Jorn Vernee <jvernee@openjdk.org>
Date: Thu, 11 Apr 2024 20:54:09 +0000
Subject: [PATCH 04/32] 8330049: Remove unused AbstractLinker::linkerByteOrder

Reviewed-by: mcimadamore
---
 .../classes/jdk/internal/foreign/abi/AbstractLinker.java | 3 ---
 .../foreign/abi/aarch64/linux/LinuxAArch64Linker.java    | 7 +------
 .../foreign/abi/aarch64/macos/MacOsAArch64Linker.java    | 7 +------
 .../abi/aarch64/windows/WindowsAArch64Linker.java        | 7 +------
 .../internal/foreign/abi/fallback/FallbackLinker.java    | 5 -----
 .../internal/foreign/abi/ppc64/aix/AixPPC64Linker.java   | 9 ++-------
 .../foreign/abi/ppc64/linux/LinuxPPC64Linker.java        | 7 +------
 .../foreign/abi/ppc64/linux/LinuxPPC64leLinker.java      | 7 +------
 .../foreign/abi/riscv64/linux/LinuxRISCV64Linker.java    | 7 +------
 .../internal/foreign/abi/s390/linux/LinuxS390Linker.java | 7 +------
 .../jdk/internal/foreign/abi/x64/sysv/SysVx64Linker.java | 7 +------
 .../foreign/abi/x64/windows/Windowsx64Linker.java        | 7 +------
 12 files changed, 11 insertions(+), 69 deletions(-)

diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/AbstractLinker.java b/src/java.base/share/classes/jdk/internal/foreign/abi/AbstractLinker.java
index 12c9a2dfaa241..4f3baaa0e7118 100644
--- a/src/java.base/share/classes/jdk/internal/foreign/abi/AbstractLinker.java
+++ b/src/java.base/share/classes/jdk/internal/foreign/abi/AbstractLinker.java
@@ -141,9 +141,6 @@ public SystemLookup defaultLookup() {
         return SystemLookup.getInstance();
     }
 
-    /** {@return byte order used by this linker} */
-    protected abstract ByteOrder linkerByteOrder();
-
     // C spec mandates that variadic arguments smaller than int are promoted to int,
     // and float is promoted to double
     // See: https://en.cppreference.com/w/c/language/conversion#Default_argument_promotions
diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/aarch64/linux/LinuxAArch64Linker.java b/src/java.base/share/classes/jdk/internal/foreign/abi/aarch64/linux/LinuxAArch64Linker.java
index 54307e1ec212a..4ffd15aefa0ad 100644
--- a/src/java.base/share/classes/jdk/internal/foreign/abi/aarch64/linux/LinuxAArch64Linker.java
+++ b/src/java.base/share/classes/jdk/internal/foreign/abi/aarch64/linux/LinuxAArch64Linker.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2019, 2021, Arm Limited. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -69,11 +69,6 @@ protected UpcallStubFactory arrangeUpcall(MethodType targetType, FunctionDescrip
         return CallArranger.LINUX.arrangeUpcall(targetType, function, options);
     }
 
-    @Override
-    protected ByteOrder linkerByteOrder() {
-        return ByteOrder.LITTLE_ENDIAN;
-    }
-
     @Override
     public Map<String, MemoryLayout> canonicalLayouts() {
         return CANONICAL_LAYOUTS;
diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/aarch64/macos/MacOsAArch64Linker.java b/src/java.base/share/classes/jdk/internal/foreign/abi/aarch64/macos/MacOsAArch64Linker.java
index 0772c09fdb539..c60f01524df42 100644
--- a/src/java.base/share/classes/jdk/internal/foreign/abi/aarch64/macos/MacOsAArch64Linker.java
+++ b/src/java.base/share/classes/jdk/internal/foreign/abi/aarch64/macos/MacOsAArch64Linker.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2021, Arm Limited. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -69,11 +69,6 @@ protected UpcallStubFactory arrangeUpcall(MethodType targetType, FunctionDescrip
         return CallArranger.MACOS.arrangeUpcall(targetType, function, options);
     }
 
-    @Override
-    protected ByteOrder linkerByteOrder() {
-        return ByteOrder.LITTLE_ENDIAN;
-    }
-
     @Override
     public Map<String, MemoryLayout> canonicalLayouts() {
         return CANONICAL_LAYOUTS;
diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/aarch64/windows/WindowsAArch64Linker.java b/src/java.base/share/classes/jdk/internal/foreign/abi/aarch64/windows/WindowsAArch64Linker.java
index eac1d49c1ddee..23fb046aeb1da 100644
--- a/src/java.base/share/classes/jdk/internal/foreign/abi/aarch64/windows/WindowsAArch64Linker.java
+++ b/src/java.base/share/classes/jdk/internal/foreign/abi/aarch64/windows/WindowsAArch64Linker.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2021, Arm Limited. All rights reserved.
  * Copyright (c) 2021, 2022, Microsoft. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -65,11 +65,6 @@ protected UpcallStubFactory arrangeUpcall(MethodType targetType, FunctionDescrip
         return CallArranger.WINDOWS.arrangeUpcall(targetType, function, options);
     }
 
-    @Override
-    protected ByteOrder linkerByteOrder() {
-        return ByteOrder.LITTLE_ENDIAN;
-    }
-
     @Override
     public Map<String, MemoryLayout> canonicalLayouts() {
         return CANONICAL_LAYOUTS;
diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/fallback/FallbackLinker.java b/src/java.base/share/classes/jdk/internal/foreign/abi/fallback/FallbackLinker.java
index d617e535dd673..59d3810ecc07e 100644
--- a/src/java.base/share/classes/jdk/internal/foreign/abi/fallback/FallbackLinker.java
+++ b/src/java.base/share/classes/jdk/internal/foreign/abi/fallback/FallbackLinker.java
@@ -123,11 +123,6 @@ protected UpcallStubFactory arrangeUpcall(MethodType targetType, FunctionDescrip
         };
     }
 
-    @Override
-    protected ByteOrder linkerByteOrder() {
-        return ByteOrder.nativeOrder();
-    }
-
     private static MemorySegment makeCif(MethodType methodType, FunctionDescriptor function, LinkerOptions options, Arena scope) {
         FFIABI abi = FFIABI.DEFAULT;
 
diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/ppc64/aix/AixPPC64Linker.java b/src/java.base/share/classes/jdk/internal/foreign/abi/ppc64/aix/AixPPC64Linker.java
index 2ec8d31f3f286..503dac3e4fb85 100644
--- a/src/java.base/share/classes/jdk/internal/foreign/abi/ppc64/aix/AixPPC64Linker.java
+++ b/src/java.base/share/classes/jdk/internal/foreign/abi/ppc64/aix/AixPPC64Linker.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2023 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -65,7 +65,7 @@ protected void checkStructMember(MemoryLayout member, long offset) {
             if (vl.byteAlignment() != 4) {
                 throw new IllegalArgumentException("double struct member " + vl + " at offset " + offset + " should be 4-byte aligned");
             }
-            if (vl.order() != linkerByteOrder()) {
+            if (vl.order() != ByteOrder.BIG_ENDIAN) {
                 throw new IllegalArgumentException("double struct member " + vl + " at offset " + offset + " has an unexpected byte order");
             }
         } else {
@@ -83,11 +83,6 @@ protected UpcallStubFactory arrangeUpcall(MethodType targetType, FunctionDescrip
         return CallArranger.AIX.arrangeUpcall(targetType, function, options);
     }
 
-    @Override
-    protected ByteOrder linkerByteOrder() {
-        return ByteOrder.BIG_ENDIAN;
-    }
-
     @Override
     public Map<String, MemoryLayout> canonicalLayouts() {
         return CANONICAL_LAYOUTS;
diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/ppc64/linux/LinuxPPC64Linker.java b/src/java.base/share/classes/jdk/internal/foreign/abi/ppc64/linux/LinuxPPC64Linker.java
index 7cf2d524bffc1..a9f5bb4d60045 100644
--- a/src/java.base/share/classes/jdk/internal/foreign/abi/ppc64/linux/LinuxPPC64Linker.java
+++ b/src/java.base/share/classes/jdk/internal/foreign/abi/ppc64/linux/LinuxPPC64Linker.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2023 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -65,11 +65,6 @@ protected UpcallStubFactory arrangeUpcall(MethodType targetType, FunctionDescrip
         return CallArranger.ABIv1.arrangeUpcall(targetType, function, options);
     }
 
-    @Override
-    protected ByteOrder linkerByteOrder() {
-        return ByteOrder.BIG_ENDIAN;
-    }
-
     @Override
     public Map<String, MemoryLayout> canonicalLayouts() {
         return CANONICAL_LAYOUTS;
diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/ppc64/linux/LinuxPPC64leLinker.java b/src/java.base/share/classes/jdk/internal/foreign/abi/ppc64/linux/LinuxPPC64leLinker.java
index be4d217d54743..e8305669ba035 100644
--- a/src/java.base/share/classes/jdk/internal/foreign/abi/ppc64/linux/LinuxPPC64leLinker.java
+++ b/src/java.base/share/classes/jdk/internal/foreign/abi/ppc64/linux/LinuxPPC64leLinker.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2023 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -65,11 +65,6 @@ protected UpcallStubFactory arrangeUpcall(MethodType targetType, FunctionDescrip
         return CallArranger.ABIv2.arrangeUpcall(targetType, function, options);
     }
 
-    @Override
-    protected ByteOrder linkerByteOrder() {
-        return ByteOrder.LITTLE_ENDIAN;
-    }
-
     @Override
     public Map<String, MemoryLayout> canonicalLayouts() {
         return CANONICAL_LAYOUTS;
diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/riscv64/linux/LinuxRISCV64Linker.java b/src/java.base/share/classes/jdk/internal/foreign/abi/riscv64/linux/LinuxRISCV64Linker.java
index 1c3558b474201..706fdacf60875 100644
--- a/src/java.base/share/classes/jdk/internal/foreign/abi/riscv64/linux/LinuxRISCV64Linker.java
+++ b/src/java.base/share/classes/jdk/internal/foreign/abi/riscv64/linux/LinuxRISCV64Linker.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2023, Institute of Software, Chinese Academy of Sciences.
  * All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -67,11 +67,6 @@ protected UpcallStubFactory arrangeUpcall(MethodType targetType, FunctionDescrip
         return LinuxRISCV64CallArranger.arrangeUpcall(targetType, function, options);
     }
 
-    @Override
-    protected ByteOrder linkerByteOrder() {
-        return ByteOrder.LITTLE_ENDIAN;
-    }
-
     @Override
     public Map<String, MemoryLayout> canonicalLayouts() {
         return CANONICAL_LAYOUTS;
diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/s390/linux/LinuxS390Linker.java b/src/java.base/share/classes/jdk/internal/foreign/abi/s390/linux/LinuxS390Linker.java
index 80aa950026ed1..9810fbf169579 100644
--- a/src/java.base/share/classes/jdk/internal/foreign/abi/s390/linux/LinuxS390Linker.java
+++ b/src/java.base/share/classes/jdk/internal/foreign/abi/s390/linux/LinuxS390Linker.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2023 IBM Corp. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -64,11 +64,6 @@ protected UpcallStubFactory arrangeUpcall(MethodType targetType, FunctionDescrip
         return LinuxS390CallArranger.arrangeUpcall(targetType, function, options);
     }
 
-    @Override
-    protected ByteOrder linkerByteOrder() {
-        return ByteOrder.BIG_ENDIAN;
-    }
-
     @Override
     public Map<String, MemoryLayout> canonicalLayouts() {
         return CANONICAL_LAYOUTS;
diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/x64/sysv/SysVx64Linker.java b/src/java.base/share/classes/jdk/internal/foreign/abi/x64/sysv/SysVx64Linker.java
index 91a0dc7683174..b1fce35af8375 100644
--- a/src/java.base/share/classes/jdk/internal/foreign/abi/x64/sysv/SysVx64Linker.java
+++ b/src/java.base/share/classes/jdk/internal/foreign/abi/x64/sysv/SysVx64Linker.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -67,11 +67,6 @@ protected UpcallStubFactory arrangeUpcall(MethodType targetType, FunctionDescrip
         return CallArranger.arrangeUpcall(targetType, function, options);
     }
 
-    @Override
-    protected ByteOrder linkerByteOrder() {
-        return ByteOrder.LITTLE_ENDIAN;
-    }
-
     @Override
     public Map<String, MemoryLayout> canonicalLayouts() {
         return CANONICAL_LAYOUTS;
diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/x64/windows/Windowsx64Linker.java b/src/java.base/share/classes/jdk/internal/foreign/abi/x64/windows/Windowsx64Linker.java
index f262bd6872be2..30e4a5bdaf495 100644
--- a/src/java.base/share/classes/jdk/internal/foreign/abi/x64/windows/Windowsx64Linker.java
+++ b/src/java.base/share/classes/jdk/internal/foreign/abi/x64/windows/Windowsx64Linker.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -66,11 +66,6 @@ protected UpcallStubFactory arrangeUpcall(MethodType targetType, FunctionDescrip
         return CallArranger.arrangeUpcall(targetType, function, options);
     }
 
-    @Override
-    protected ByteOrder linkerByteOrder() {
-        return ByteOrder.LITTLE_ENDIAN;
-    }
-
     @Override
     public Map<String, MemoryLayout> canonicalLayouts() {
         return CANONICAL_LAYOUTS;

From ece7d4349a13f75c654e2ca0f4d1b66d3af5cf10 Mon Sep 17 00:00:00 2001
From: Matias Saavedra Silva <matsaave@openjdk.org>
Date: Thu, 11 Apr 2024 22:05:55 +0000
Subject: [PATCH 05/32] 8329416: Split relocation pointer map into read-write
 and read-only maps

Reviewed-by: iklam, ccheung
---
 src/hotspot/share/cds/archiveBuilder.cpp    |  7 +-
 src/hotspot/share/cds/archiveBuilder.hpp    | 11 ++-
 src/hotspot/share/cds/archiveHeapLoader.cpp |  2 +-
 src/hotspot/share/cds/archiveUtils.cpp      | 37 +++++++++-
 src/hotspot/share/cds/archiveUtils.hpp      | 15 +++-
 src/hotspot/share/cds/filemap.cpp           | 82 +++++++++++++--------
 src/hotspot/share/cds/filemap.hpp           | 14 ++--
 7 files changed, 123 insertions(+), 45 deletions(-)

diff --git a/src/hotspot/share/cds/archiveBuilder.cpp b/src/hotspot/share/cds/archiveBuilder.cpp
index 841b68a9fb2b7..0fc8c29a5b769 100644
--- a/src/hotspot/share/cds/archiveBuilder.cpp
+++ b/src/hotspot/share/cds/archiveBuilder.cpp
@@ -158,6 +158,8 @@ ArchiveBuilder::ArchiveBuilder() :
   _rw_region("rw", MAX_SHARED_DELTA),
   _ro_region("ro", MAX_SHARED_DELTA),
   _ptrmap(mtClassShared),
+  _rw_ptrmap(mtClassShared),
+  _ro_ptrmap(mtClassShared),
   _rw_src_objs(),
   _ro_src_objs(),
   _src_obj_table(INITIAL_TABLE_SIZE, MAX_TABLE_SIZE),
@@ -1275,8 +1277,11 @@ void ArchiveBuilder::write_archive(FileMapInfo* mapinfo, ArchiveHeapInfo* heap_i
   write_region(mapinfo, MetaspaceShared::rw, &_rw_region, /*read_only=*/false,/*allow_exec=*/false);
   write_region(mapinfo, MetaspaceShared::ro, &_ro_region, /*read_only=*/true, /*allow_exec=*/false);
 
+  // Split pointer map into read-write and read-only bitmaps
+  ArchivePtrMarker::initialize_rw_ro_maps(&_rw_ptrmap, &_ro_ptrmap);
+
   size_t bitmap_size_in_bytes;
-  char* bitmap = mapinfo->write_bitmap_region(ArchivePtrMarker::ptrmap(), heap_info,
+  char* bitmap = mapinfo->write_bitmap_region(ArchivePtrMarker::rw_ptrmap(), ArchivePtrMarker::ro_ptrmap(), heap_info,
                                               bitmap_size_in_bytes);
 
   if (heap_info->is_used()) {
diff --git a/src/hotspot/share/cds/archiveBuilder.hpp b/src/hotspot/share/cds/archiveBuilder.hpp
index 4f811a0c51265..a80370d39761e 100644
--- a/src/hotspot/share/cds/archiveBuilder.hpp
+++ b/src/hotspot/share/cds/archiveBuilder.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -204,7 +204,14 @@ class ArchiveBuilder : public StackObj {
 
   DumpRegion _rw_region;
   DumpRegion _ro_region;
-  CHeapBitMap _ptrmap;    // bitmap used by ArchivePtrMarker
+
+  // Combined bitmap to track pointers in both RW and RO regions. This is updated
+  // as objects are copied into RW and RO.
+  CHeapBitMap _ptrmap;
+
+  // _ptrmap is split into these two bitmaps which are written into the archive.
+  CHeapBitMap _rw_ptrmap;   // marks pointers in the RW region
+  CHeapBitMap _ro_ptrmap;   // marks pointers in the RO region
 
   SourceObjList _rw_src_objs;                 // objs to put in rw region
   SourceObjList _ro_src_objs;                 // objs to put in ro region
diff --git a/src/hotspot/share/cds/archiveHeapLoader.cpp b/src/hotspot/share/cds/archiveHeapLoader.cpp
index fe30be1642796..2ef502a3643d3 100644
--- a/src/hotspot/share/cds/archiveHeapLoader.cpp
+++ b/src/hotspot/share/cds/archiveHeapLoader.cpp
@@ -442,7 +442,7 @@ void ArchiveHeapLoader::patch_native_pointers() {
   FileMapRegion* r = FileMapInfo::current_info()->region_at(MetaspaceShared::hp);
   if (r->mapped_base() != nullptr && r->has_ptrmap()) {
     log_info(cds, heap)("Patching native pointers in heap region");
-    BitMapView bm = r->ptrmap_view();
+    BitMapView bm = FileMapInfo::current_info()->ptrmap_view(MetaspaceShared::hp);
     PatchNativePointers patcher((Metadata**)r->mapped_base() + FileMapInfo::current_info()->heap_ptrmap_start_pos());
     bm.iterate(&patcher);
   }
diff --git a/src/hotspot/share/cds/archiveUtils.cpp b/src/hotspot/share/cds/archiveUtils.cpp
index b14dfc8c33e6a..5ba36960c55b1 100644
--- a/src/hotspot/share/cds/archiveUtils.cpp
+++ b/src/hotspot/share/cds/archiveUtils.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -46,12 +46,16 @@
 #include "utilities/globalDefinitions.hpp"
 
 CHeapBitMap* ArchivePtrMarker::_ptrmap = nullptr;
+CHeapBitMap* ArchivePtrMarker::_rw_ptrmap = nullptr;
+CHeapBitMap* ArchivePtrMarker::_ro_ptrmap = nullptr;
 VirtualSpace* ArchivePtrMarker::_vs;
 
 bool ArchivePtrMarker::_compacted;
 
 void ArchivePtrMarker::initialize(CHeapBitMap* ptrmap, VirtualSpace* vs) {
   assert(_ptrmap == nullptr, "initialize only once");
+  assert(_rw_ptrmap == nullptr, "initialize only once");
+  assert(_ro_ptrmap == nullptr, "initialize only once");
   _vs = vs;
   _compacted = false;
   _ptrmap = ptrmap;
@@ -67,6 +71,37 @@ void ArchivePtrMarker::initialize(CHeapBitMap* ptrmap, VirtualSpace* vs) {
   _ptrmap->initialize(estimated_archive_size / sizeof(intptr_t));
 }
 
+void ArchivePtrMarker::initialize_rw_ro_maps(CHeapBitMap* rw_ptrmap, CHeapBitMap* ro_ptrmap) {
+  address* rw_bottom = (address*)ArchiveBuilder::current()->rw_region()->base();
+  address* ro_bottom = (address*)ArchiveBuilder::current()->ro_region()->base();
+
+  _rw_ptrmap = rw_ptrmap;
+  _ro_ptrmap = ro_ptrmap;
+
+  size_t rw_size = ArchiveBuilder::current()->rw_region()->used() / sizeof(address);
+  size_t ro_size = ArchiveBuilder::current()->ro_region()->used() / sizeof(address);
+  // ro_start is the first bit in _ptrmap that covers the pointer that would sit at ro_bottom.
+  // E.g., if rw_bottom = (address*)100
+  //          ro_bottom = (address*)116
+  //       then for 64-bit platform:
+  //          ro_start = ro_bottom - rw_bottom = (116 - 100) / sizeof(address) = 2;
+  size_t ro_start = ro_bottom - rw_bottom;
+
+  // Note: ptrmap is big enough only to cover the last pointer in ro_region.
+  // See ArchivePtrMarker::compact()
+  _rw_ptrmap->initialize(rw_size);
+  _ro_ptrmap->initialize(_ptrmap->size() - ro_start);
+
+  for (size_t rw_bit = 0; rw_bit < _rw_ptrmap->size(); rw_bit++) {
+    _rw_ptrmap->at_put(rw_bit, _ptrmap->at(rw_bit));
+  }
+
+  for(size_t ro_bit = ro_start; ro_bit < _ptrmap->size(); ro_bit++) {
+    _ro_ptrmap->at_put(ro_bit-ro_start, _ptrmap->at(ro_bit));
+  }
+  assert(_ptrmap->size() - ro_start == _ro_ptrmap->size(), "must be");
+}
+
 void ArchivePtrMarker::mark_pointer(address* ptr_loc) {
   assert(_ptrmap != nullptr, "not initialized");
   assert(!_compacted, "cannot mark anymore");
diff --git a/src/hotspot/share/cds/archiveUtils.hpp b/src/hotspot/share/cds/archiveUtils.hpp
index 2c965f8fe9cf1..efe5a468b93b4 100644
--- a/src/hotspot/share/cds/archiveUtils.hpp
+++ b/src/hotspot/share/cds/archiveUtils.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -42,6 +42,8 @@ class VirtualSpace;
 // fixed, but _ptr_end can be expanded as more objects are dumped.
 class ArchivePtrMarker : AllStatic {
   static CHeapBitMap*  _ptrmap;
+  static CHeapBitMap*  _rw_ptrmap;
+  static CHeapBitMap*  _ro_ptrmap;
   static VirtualSpace* _vs;
 
   // Once _ptrmap is compacted, we don't allow bit marking anymore. This is to
@@ -53,6 +55,7 @@ class ArchivePtrMarker : AllStatic {
 
 public:
   static void initialize(CHeapBitMap* ptrmap, VirtualSpace* vs);
+  static void initialize_rw_ro_maps(CHeapBitMap* rw_ptrmap, CHeapBitMap* ro_ptrmap);
   static void mark_pointer(address* ptr_loc);
   static void clear_pointer(address* ptr_loc);
   static void compact(address relocatable_base, address relocatable_end);
@@ -73,8 +76,18 @@ class ArchivePtrMarker : AllStatic {
     return _ptrmap;
   }
 
+  static CHeapBitMap* rw_ptrmap() {
+    return _rw_ptrmap;
+  }
+
+  static CHeapBitMap* ro_ptrmap() {
+    return _ro_ptrmap;
+  }
+
   static void reset_map_and_vs() {
     _ptrmap = nullptr;
+    _rw_ptrmap = nullptr;
+    _ro_ptrmap = nullptr;
     _vs = nullptr;
   }
 };
diff --git a/src/hotspot/share/cds/filemap.cpp b/src/hotspot/share/cds/filemap.cpp
index 2b35e64c26480..c81838ed2eff8 100644
--- a/src/hotspot/share/cds/filemap.cpp
+++ b/src/hotspot/share/cds/filemap.cpp
@@ -294,7 +294,6 @@ void FileMapHeader::print(outputStream* st) {
   st->print_cr("- allow_archiving_with_java_agent:%d", _allow_archiving_with_java_agent);
   st->print_cr("- use_optimized_module_handling:  %d", _use_optimized_module_handling);
   st->print_cr("- has_full_module_graph           %d", _has_full_module_graph);
-  st->print_cr("- ptrmap_size_in_bits:            " SIZE_FORMAT, _ptrmap_size_in_bits);
 }
 
 void SharedClassPathEntry::init_as_non_existent(const char* path, TRAPS) {
@@ -1453,22 +1452,6 @@ void FileMapRegion::init_ptrmap(size_t offset, size_t size_in_bits) {
   _ptrmap_size_in_bits = size_in_bits;
 }
 
-BitMapView FileMapRegion::bitmap_view(bool is_oopmap) {
-  char* bitmap_base = FileMapInfo::current_info()->map_bitmap_region();
-  bitmap_base += is_oopmap ? _oopmap_offset : _ptrmap_offset;
-  size_t size_in_bits = is_oopmap ? _oopmap_size_in_bits : _ptrmap_size_in_bits;
-  return BitMapView((BitMap::bm_word_t*)(bitmap_base), size_in_bits);
-}
-
-BitMapView FileMapRegion::oopmap_view() {
-  return bitmap_view(true);
-}
-
-BitMapView FileMapRegion::ptrmap_view() {
-  assert(has_ptrmap(), "must be");
-  return bitmap_view(false);
-}
-
 bool FileMapRegion::check_region_crc(char* base) const {
   // This function should be called after the region has been properly
   // loaded into memory via FileMapInfo::map_region() or FileMapInfo::read_region().
@@ -1497,6 +1480,27 @@ static const char* region_name(int region_index) {
   return names[region_index];
 }
 
+BitMapView FileMapInfo::bitmap_view(int region_index, bool is_oopmap) {
+  FileMapRegion* r = region_at(region_index);
+  char* bitmap_base = is_static() ? FileMapInfo::current_info()->map_bitmap_region() : FileMapInfo::dynamic_info()->map_bitmap_region();
+  bitmap_base += is_oopmap ? r->oopmap_offset() : r->ptrmap_offset();
+  size_t size_in_bits = is_oopmap ? r->oopmap_size_in_bits() : r->ptrmap_size_in_bits();
+
+  log_debug(cds, reloc)("mapped %s relocation %smap @ " INTPTR_FORMAT " (" SIZE_FORMAT " bits)",
+                        region_name(region_index), is_oopmap ? "oop" : "ptr",
+                        p2i(bitmap_base), size_in_bits);
+
+  return BitMapView((BitMap::bm_word_t*)(bitmap_base), size_in_bits);
+}
+
+BitMapView FileMapInfo::oopmap_view(int region_index) {
+    return bitmap_view(region_index, /*is_oopmap*/true);
+  }
+
+BitMapView FileMapInfo::ptrmap_view(int region_index) {
+  return bitmap_view(region_index, /*is_oopmap*/false);
+}
+
 void FileMapRegion::print(outputStream* st, int region_index) {
   st->print_cr("============ region ============= %d \"%s\"", region_index, region_name(region_index));
   st->print_cr("- crc:                            0x%08x", _crc);
@@ -1510,6 +1514,8 @@ void FileMapRegion::print(outputStream* st, int region_index) {
   st->print_cr("- used:                           " SIZE_FORMAT, _used);
   st->print_cr("- oopmap_offset:                  " SIZE_FORMAT_X, _oopmap_offset);
   st->print_cr("- oopmap_size_in_bits:            " SIZE_FORMAT, _oopmap_size_in_bits);
+  st->print_cr("- ptrmap_offset:                  " SIZE_FORMAT_X, _ptrmap_offset);
+  st->print_cr("- ptrmap_size_in_bits:            " SIZE_FORMAT, _ptrmap_size_in_bits);
   st->print_cr("- mapped_base:                    " INTPTR_FORMAT, p2i(_mapped_base));
 }
 
@@ -1586,9 +1592,9 @@ size_t FileMapInfo::remove_bitmap_leading_zeros(CHeapBitMap* map) {
   return old_zeros;
 }
 
-char* FileMapInfo::write_bitmap_region(const CHeapBitMap* ptrmap, ArchiveHeapInfo* heap_info,
+char* FileMapInfo::write_bitmap_region(const CHeapBitMap* rw_ptrmap, const CHeapBitMap* ro_ptrmap, ArchiveHeapInfo* heap_info,
                                        size_t &size_in_bytes) {
-  size_in_bytes = ptrmap->size_in_bytes();
+  size_in_bytes = rw_ptrmap->size_in_bytes() + ro_ptrmap->size_in_bytes();
 
   if (heap_info->is_used()) {
     // Remove leading zeros
@@ -1602,14 +1608,19 @@ char* FileMapInfo::write_bitmap_region(const CHeapBitMap* ptrmap, ArchiveHeapInf
     size_in_bytes += heap_info->ptrmap()->size_in_bytes();
   }
 
-  // The bitmap region contains up to 3 parts:
-  // ptrmap:              metaspace pointers inside the ro/rw regions
+  // The bitmap region contains up to 4 parts:
+  // rw_ptrmap:           metaspace pointers inside the read-write region
+  // ro_ptrmap:           metaspace pointers inside the read-only region
   // heap_info->oopmap(): Java oop pointers in the heap region
   // heap_info->ptrmap(): metaspace pointers in the heap region
   char* buffer = NEW_C_HEAP_ARRAY(char, size_in_bytes, mtClassShared);
   size_t written = 0;
-  written = write_bitmap(ptrmap, buffer, written);
-  header()->set_ptrmap_size_in_bits(ptrmap->size());
+
+  region_at(MetaspaceShared::rw)->init_ptrmap(0, rw_ptrmap->size());
+  written = write_bitmap(rw_ptrmap, buffer, written);
+
+  region_at(MetaspaceShared::ro)->init_ptrmap(written, ro_ptrmap->size());
+  written = write_bitmap(ro_ptrmap, buffer, written);
 
   if (heap_info->is_used()) {
     FileMapRegion* r = region_at(MetaspaceShared::hp);
@@ -1904,15 +1915,19 @@ bool FileMapInfo::relocate_pointers_in_core_regions(intx addr_delta) {
   if (bitmap_base == nullptr) {
     return false; // OOM, or CRC check failure
   } else {
-    size_t ptrmap_size_in_bits = header()->ptrmap_size_in_bits();
-    log_debug(cds, reloc)("mapped relocation bitmap @ " INTPTR_FORMAT " (" SIZE_FORMAT " bits)",
-                          p2i(bitmap_base), ptrmap_size_in_bits);
+    BitMapView rw_ptrmap = ptrmap_view(MetaspaceShared::rw);
+    BitMapView ro_ptrmap = ptrmap_view(MetaspaceShared::ro);
 
-    BitMapView ptrmap((BitMap::bm_word_t*)bitmap_base, ptrmap_size_in_bits);
+    FileMapRegion* rw_region = first_core_region();
+    FileMapRegion* ro_region = last_core_region();
 
-    // Patch all pointers in the mapped region that are marked by ptrmap.
-    address patch_base = (address)mapped_base();
-    address patch_end  = (address)mapped_end();
+    // Patch all pointers inside the RW region
+    address rw_patch_base = (address)rw_region->mapped_base();
+    address rw_patch_end  = (address)rw_region->mapped_end();
+
+    // Patch all pointers inside the RO region
+    address ro_patch_base = (address)ro_region->mapped_base();
+    address ro_patch_end  = (address)ro_region->mapped_end();
 
     // the current value of the pointers to be patched must be within this
     // range (i.e., must be between the requested base address and the address of the current archive).
@@ -1925,9 +1940,12 @@ bool FileMapInfo::relocate_pointers_in_core_regions(intx addr_delta) {
     address valid_new_base = (address)header()->mapped_base_address();
     address valid_new_end  = (address)mapped_end();
 
-    SharedDataRelocator patcher((address*)patch_base, (address*)patch_end, valid_old_base, valid_old_end,
+    SharedDataRelocator rw_patcher((address*)rw_patch_base, (address*)rw_patch_end, valid_old_base, valid_old_end,
+                                valid_new_base, valid_new_end, addr_delta);
+    SharedDataRelocator ro_patcher((address*)ro_patch_base, (address*)ro_patch_end, valid_old_base, valid_old_end,
                                 valid_new_base, valid_new_end, addr_delta);
-    ptrmap.iterate(&patcher);
+    rw_ptrmap.iterate(&rw_patcher);
+    ro_ptrmap.iterate(&ro_patcher);
 
     // The MetaspaceShared::bm region will be unmapped in MetaspaceShared::initialize_shared_spaces().
 
diff --git a/src/hotspot/share/cds/filemap.hpp b/src/hotspot/share/cds/filemap.hpp
index e7c131ee5bb64..6d106ef099d7e 100644
--- a/src/hotspot/share/cds/filemap.hpp
+++ b/src/hotspot/share/cds/filemap.hpp
@@ -131,7 +131,6 @@ class SharedPathTable {
 
 
 class FileMapRegion: private CDSFileMapRegion {
-  BitMapView bitmap_view(bool is_oopmap);
 public:
   void assert_is_heap_region() const {
     assert(_is_heap_region, "must be heap region");
@@ -158,6 +157,8 @@ class FileMapRegion: private CDSFileMapRegion {
   bool   mapped_from_file()         const { return _mapped_from_file != 0; }
   size_t oopmap_offset()            const { assert_is_heap_region();     return _oopmap_offset; }
   size_t oopmap_size_in_bits()      const { assert_is_heap_region();     return _oopmap_size_in_bits; }
+  size_t ptrmap_offset()            const { return _ptrmap_offset; }
+  size_t ptrmap_size_in_bits()      const { return _ptrmap_size_in_bits; }
 
   void set_file_offset(size_t s)     { _file_offset = s; }
   void set_read_only(bool v)         { _read_only = v; }
@@ -167,8 +168,6 @@ class FileMapRegion: private CDSFileMapRegion {
             bool allow_exec, int crc);
   void init_oopmap(size_t offset, size_t size_in_bits);
   void init_ptrmap(size_t offset, size_t size_in_bits);
-  BitMapView oopmap_view();
-  BitMapView ptrmap_view();
   bool has_ptrmap()                  { return _ptrmap_size_in_bits != 0; }
 
   bool check_region_crc(char* base) const;
@@ -225,7 +224,6 @@ class FileMapHeader: private CDSFileMapHeaderBase {
   bool   _use_optimized_module_handling;// No module-relation VM options were specified, so we can skip
                                         // some expensive operations.
   bool   _has_full_module_graph;        // Does this CDS archive contain the full archived module graph?
-  size_t _ptrmap_size_in_bits;          // Size of pointer relocation bitmap
   size_t _heap_roots_offset;            // Offset of the HeapShared::roots() object, from the bottom
                                         // of the archived heap objects, in bytes.
   size_t _heap_oopmap_start_pos;        // The first bit in the oopmap corresponds to this position in the heap.
@@ -267,7 +265,6 @@ class FileMapHeader: private CDSFileMapHeaderBase {
   char* mapped_base_address()              const { return _mapped_base_address; }
   bool has_platform_or_app_classes()       const { return _has_platform_or_app_classes; }
   bool has_non_jar_in_classpath()          const { return _has_non_jar_in_classpath; }
-  size_t ptrmap_size_in_bits()             const { return _ptrmap_size_in_bits; }
   bool compressed_oops()                   const { return _compressed_oops; }
   bool compressed_class_pointers()         const { return _compressed_class_ptrs; }
   size_t heap_roots_offset()               const { return _heap_roots_offset; }
@@ -282,7 +279,6 @@ class FileMapHeader: private CDSFileMapHeaderBase {
   void set_has_platform_or_app_classes(bool v)   { _has_platform_or_app_classes = v; }
   void set_cloned_vtables(char* p)               { set_as_offset(p, &_cloned_vtables_offset); }
   void set_serialized_data(char* p)              { set_as_offset(p, &_serialized_data_offset); }
-  void set_ptrmap_size_in_bits(size_t s)         { _ptrmap_size_in_bits = s; }
   void set_mapped_base_address(char* p)          { _mapped_base_address = p; }
   void set_heap_roots_offset(size_t n)           { _heap_roots_offset = n; }
   void set_heap_oopmap_start_pos(size_t n)       { _heap_oopmap_start_pos = n; }
@@ -443,7 +439,7 @@ class FileMapInfo : public CHeapObj<mtInternal> {
   void  write_region(int region, char* base, size_t size,
                      bool read_only, bool allow_exec);
   size_t remove_bitmap_leading_zeros(CHeapBitMap* map);
-  char* write_bitmap_region(const CHeapBitMap* ptrmap, ArchiveHeapInfo* heap_info,
+  char* write_bitmap_region(const CHeapBitMap* rw_ptrmap, const CHeapBitMap* ro_ptrmap, ArchiveHeapInfo* heap_info,
                             size_t &size_in_bytes);
   size_t write_heap_region(ArchiveHeapInfo* heap_info);
   void  write_bytes(const void* buffer, size_t count);
@@ -526,6 +522,10 @@ class FileMapInfo : public CHeapObj<mtInternal> {
     return header()->region_at(i);
   }
 
+  BitMapView bitmap_view(int region_index, bool is_oopmap);
+  BitMapView oopmap_view(int region_index);
+  BitMapView ptrmap_view(int region_index);
+
   void print(outputStream* st) const;
 
   const char* vm_version() {

From e45fea5a801ac09c3d572ac07d6179e80c422942 Mon Sep 17 00:00:00 2001
From: Axel Boldt-Christmas <aboldtch@openjdk.org>
Date: Fri, 12 Apr 2024 06:04:20 +0000
Subject: [PATCH 06/32] 8329757: Crash with fatal error: DEBUG MESSAGE: Fast
 Unlock lock on stack

Reviewed-by: pchilanomate, kvn
---
 src/hotspot/share/runtime/deoptimization.cpp  | 15 +++-
 src/hotspot/share/runtime/lockStack.cpp       | 58 +++++++++++++++
 src/hotspot/share/runtime/lockStack.hpp       |  5 ++
 .../compiler/escapeAnalysis/Test8329757.java  | 72 +++++++++++++++++++
 4 files changed, 149 insertions(+), 1 deletion(-)
 create mode 100644 test/hotspot/jtreg/compiler/escapeAnalysis/Test8329757.java

diff --git a/src/hotspot/share/runtime/deoptimization.cpp b/src/hotspot/share/runtime/deoptimization.cpp
index 1bdc597e4ce97..cf2f7e9c216d1 100644
--- a/src/hotspot/share/runtime/deoptimization.cpp
+++ b/src/hotspot/share/runtime/deoptimization.cpp
@@ -391,6 +391,7 @@ static void restore_eliminated_locks(JavaThread* thread, GrowableArray<compiledV
 #ifndef PRODUCT
   bool first = true;
 #endif // !PRODUCT
+  DEBUG_ONLY(GrowableArray<oop> lock_order{0};)
   // Start locking from outermost/oldest frame
   for (int i = (chunk->length() - 1); i >= 0; i--) {
     compiledVFrame* cvf = chunk->at(i);
@@ -400,6 +401,13 @@ static void restore_eliminated_locks(JavaThread* thread, GrowableArray<compiledV
       bool relocked = Deoptimization::relock_objects(thread, monitors, deoptee_thread, deoptee,
                                                      exec_mode, realloc_failures);
       deoptimized_objects = deoptimized_objects || relocked;
+#ifdef ASSERT
+      if (LockingMode == LM_LIGHTWEIGHT && !realloc_failures) {
+        for (MonitorInfo* mi : *monitors) {
+          lock_order.push(mi->owner());
+        }
+      }
+#endif // ASSERT
 #ifndef PRODUCT
       if (PrintDeoptimizationDetails) {
         ResourceMark rm;
@@ -431,6 +439,11 @@ static void restore_eliminated_locks(JavaThread* thread, GrowableArray<compiledV
 #endif // !PRODUCT
     }
   }
+#ifdef ASSERT
+  if (LockingMode == LM_LIGHTWEIGHT && !realloc_failures) {
+    deoptee_thread->lock_stack().verify_consistent_lock_order(lock_order, exec_mode != Deoptimization::Unpack_none);
+  }
+#endif // ASSERT
 }
 
 // Deoptimize objects, that is reallocate and relock them, just before they escape through JVMTI.
@@ -1642,7 +1655,7 @@ bool Deoptimization::relock_objects(JavaThread* thread, GrowableArray<MonitorInf
             }
           }
         }
-        if (LockingMode == LM_LIGHTWEIGHT && exec_mode == Unpack_none) {
+        if (LockingMode == LM_LIGHTWEIGHT) {
           // We have lost information about the correct state of the lock stack.
           // Inflate the locks instead. Enter then inflate to avoid races with
           // deflation.
diff --git a/src/hotspot/share/runtime/lockStack.cpp b/src/hotspot/share/runtime/lockStack.cpp
index d7dcbdda7e968..c7889da1a76a2 100644
--- a/src/hotspot/share/runtime/lockStack.cpp
+++ b/src/hotspot/share/runtime/lockStack.cpp
@@ -26,8 +26,11 @@
 
 #include "precompiled.hpp"
 #include "memory/allocation.hpp"
+#include "oops/markWord.hpp"
+#include "oops/oop.inline.hpp"
 #include "runtime/globals.hpp"
 #include "runtime/lockStack.inline.hpp"
+#include "runtime/objectMonitor.inline.hpp"
 #include "runtime/safepoint.hpp"
 #include "runtime/stackWatermark.hpp"
 #include "runtime/stackWatermarkSet.inline.hpp"
@@ -35,6 +38,7 @@
 #include "utilities/copy.hpp"
 #include "utilities/debug.hpp"
 #include "utilities/globalDefinitions.hpp"
+#include "utilities/growableArray.hpp"
 #include "utilities/ostream.hpp"
 
 #include <type_traits>
@@ -99,6 +103,60 @@ void LockStack::verify(const char* msg) const {
 }
 #endif
 
+#ifdef ASSERT
+void LockStack::verify_consistent_lock_order(GrowableArray<oop>& lock_order, bool leaf_frame) const {
+  int top_index = to_index(_top);
+  int lock_index = lock_order.length();
+
+  if (!leaf_frame) {
+    // If the lock_order is not from the leaf frame we must search
+    // for the top_index which fits with the most recent fast_locked
+    // objects in the lock stack.
+    while (lock_index-- > 0) {
+      const oop obj = lock_order.at(lock_index);
+      if (contains(obj)) {
+        for (int index = 0; index < top_index; index++) {
+          if (_base[index] == obj) {
+            // Found top index
+            top_index = index + 1;
+            break;
+          }
+        }
+
+        if (VM_Version::supports_recursive_lightweight_locking()) {
+          // With recursive looks there may be more of the same object
+          while (lock_index-- > 0 && lock_order.at(lock_index) == obj) {
+            top_index++;
+          }
+          assert(top_index <= to_index(_top), "too many obj in lock_order");
+        }
+
+        break;
+      }
+    }
+
+    lock_index = lock_order.length();
+  }
+
+  while (lock_index-- > 0) {
+    const oop obj = lock_order.at(lock_index);
+    const markWord mark = obj->mark_acquire();
+    assert(obj->is_locked(), "must be locked");
+    if (top_index > 0 && obj == _base[top_index - 1]) {
+      assert(mark.is_fast_locked() || mark.monitor()->is_owner_anonymous(),
+             "must be fast_locked or inflated by other thread");
+      top_index--;
+    } else {
+      assert(!mark.is_fast_locked(), "must be inflated");
+      assert(mark.monitor()->owner_raw() == get_thread() ||
+             (!leaf_frame && get_thread()->current_waiting_monitor() == mark.monitor()),
+             "must be owned by (or waited on by) thread");
+      assert(!contains(obj), "must not be on lock_stack");
+    }
+  }
+}
+#endif
+
 void LockStack::print_on(outputStream* st) {
   for (int i = to_index(_top); (--i) >= 0;) {
     st->print("LockStack[%d]: ", i);
diff --git a/src/hotspot/share/runtime/lockStack.hpp b/src/hotspot/share/runtime/lockStack.hpp
index 17b0a1ca836ee..2cf02f00a8d47 100644
--- a/src/hotspot/share/runtime/lockStack.hpp
+++ b/src/hotspot/share/runtime/lockStack.hpp
@@ -34,6 +34,8 @@
 class JavaThread;
 class OopClosure;
 class outputStream;
+template<typename>
+class GrowableArray;
 
 class LockStack {
   friend class LockStackTest;
@@ -119,6 +121,9 @@ class LockStack {
 
   // Printing
   void print_on(outputStream* st);
+
+  // Verify Lock Stack consistent with lock order
+  void verify_consistent_lock_order(GrowableArray<oop>& lock_order, bool leaf_frame) const NOT_DEBUG_RETURN;
 };
 
 #endif // SHARE_RUNTIME_LOCKSTACK_HPP
diff --git a/test/hotspot/jtreg/compiler/escapeAnalysis/Test8329757.java b/test/hotspot/jtreg/compiler/escapeAnalysis/Test8329757.java
new file mode 100644
index 0000000000000..483c2c49962fb
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/escapeAnalysis/Test8329757.java
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8329757
+ * @summary Deoptimization with nested eliminated and not eliminated locks
+ *          caused reordered lock stacks. This can be handled by the interpreter
+ *          but when a frame is migrated back to compiled code via OSR the C2
+ *          assumption about balanced monitorenter-monitorexit is broken.
+ *
+ * @requires vm.compMode != "Xint"
+ *
+ * @run main/othervm compiler.escapeAnalysis.Test8329757
+ */
+
+package compiler.escapeAnalysis;
+
+public class Test8329757 {
+
+    int a = 400;
+    Double ddd;
+
+    void q() {
+        int e;
+        synchronized (new Double(1.1f)) {
+        int[] f = new int[a];
+        synchronized (Test8329757.class) {
+            for (int d = 4; d < 127; d++) {
+            e = 13;
+            do switch (d * 5) {
+                case 0:
+                case 42:
+                case 29:
+                e = d;
+                default:
+                f[1] = e;
+            } while (--e > 0);
+            }
+        }
+        }
+    }
+
+    void n() {
+        for (int j = 6; j < 274; ++j) q();
+    }
+
+    public static void main(String[] args) {
+        Test8329757 r = new Test8329757();
+        for (int i = 0; i < 1000; i++) r.n();
+    }
+}

From bde3fc0c03c87d1f2605ae6bb84c33fadb7aa865 Mon Sep 17 00:00:00 2001
From: Roland Westrelin <roland@openjdk.org>
Date: Fri, 12 Apr 2024 07:17:27 +0000
Subject: [PATCH 07/32] 8330106: C2: VectorInsertNode::make() shouldn't call
 ConINode::make() directly

Reviewed-by: kvn, thartmann
---
 src/hotspot/share/opto/vectorIntrinsics.cpp | 2 +-
 src/hotspot/share/opto/vectornode.cpp       | 4 ++--
 src/hotspot/share/opto/vectornode.hpp       | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/hotspot/share/opto/vectorIntrinsics.cpp b/src/hotspot/share/opto/vectorIntrinsics.cpp
index 41d8fd713e079..5249d8d67afef 100644
--- a/src/hotspot/share/opto/vectorIntrinsics.cpp
+++ b/src/hotspot/share/opto/vectorIntrinsics.cpp
@@ -2696,7 +2696,7 @@ bool LibraryCallKit::inline_vector_insert() {
     default: fatal("%s", type2name(elem_bt)); break;
   }
 
-  Node* operation = gvn().transform(VectorInsertNode::make(opd, insert_val, idx->get_con()));
+  Node* operation = gvn().transform(VectorInsertNode::make(opd, insert_val, idx->get_con(), gvn()));
 
   Node* vbox = box_vector(operation, vbox_type, elem_bt, num_elem);
   set_result(vbox);
diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp
index f81c29649bbac..bef4a2a109570 100644
--- a/src/hotspot/share/opto/vectornode.cpp
+++ b/src/hotspot/share/opto/vectornode.cpp
@@ -1676,9 +1676,9 @@ Node* VectorReinterpretNode::Identity(PhaseGVN *phase) {
   return this;
 }
 
-Node* VectorInsertNode::make(Node* vec, Node* new_val, int position) {
+Node* VectorInsertNode::make(Node* vec, Node* new_val, int position, PhaseGVN& gvn) {
   assert(position < (int)vec->bottom_type()->is_vect()->length(), "pos in range");
-  ConINode* pos = ConINode::make(position);
+  ConINode* pos = gvn.intcon(position);
   return new VectorInsertNode(vec, new_val, pos, vec->bottom_type()->is_vect());
 }
 
diff --git a/src/hotspot/share/opto/vectornode.hpp b/src/hotspot/share/opto/vectornode.hpp
index 740c07d64ff58..ae37202cd257d 100644
--- a/src/hotspot/share/opto/vectornode.hpp
+++ b/src/hotspot/share/opto/vectornode.hpp
@@ -1688,7 +1688,7 @@ class VectorInsertNode : public VectorNode {
   virtual int Opcode() const;
   uint pos() const { return in(3)->get_int(); }
 
-  static Node* make(Node* vec, Node* new_val, int position);
+  static Node* make(Node* vec, Node* new_val, int position, PhaseGVN& gvn);
 };
 
 class VectorBoxNode : public Node {

From 2c45eca15943826cb6bfbdf6e6fd88abc196e8f7 Mon Sep 17 00:00:00 2001
From: Thomas Schatzl <tschatzl@openjdk.org>
Date: Fri, 12 Apr 2024 07:22:06 +0000
Subject: [PATCH 08/32] 8328879: G1: Some gtests modify global state crashing
 the JVM during GC after JDK-8289822

Reviewed-by: iwalulya, kbarrett
---
 test/hotspot/gtest/gc/g1/test_freeRegionList.cpp | 2 +-
 test/hotspot/gtest/gc/g1/test_heapRegion.cpp     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/hotspot/gtest/gc/g1/test_freeRegionList.cpp b/test/hotspot/gtest/gc/g1/test_freeRegionList.cpp
index 91540f7b78d64..4639f1c969430 100644
--- a/test/hotspot/gtest/gc/g1/test_freeRegionList.cpp
+++ b/test/hotspot/gtest/gc/g1/test_freeRegionList.cpp
@@ -34,7 +34,7 @@
 #include "unittest.hpp"
 
 // @requires UseG1GC
-TEST_VM(FreeRegionList, length) {
+TEST_OTHER_VM(FreeRegionList, length) {
   if (!UseG1GC) {
     return;
   }
diff --git a/test/hotspot/gtest/gc/g1/test_heapRegion.cpp b/test/hotspot/gtest/gc/g1/test_heapRegion.cpp
index e329a2b80ae50..c274e1c8494df 100644
--- a/test/hotspot/gtest/gc/g1/test_heapRegion.cpp
+++ b/test/hotspot/gtest/gc/g1/test_heapRegion.cpp
@@ -122,7 +122,7 @@ void VM_HeapRegionApplyToMarkedObjectsTest::doit() {
   region->set_top(old_top);
 }
 
-TEST_VM(HeapRegion, apply_to_marked_object) {
+TEST_OTHER_VM(HeapRegion, apply_to_marked_object) {
   if (!UseG1GC) {
     return;
   }

From 2c8b432b8911bc1a52b02def89e4820c76ea67ba Mon Sep 17 00:00:00 2001
From: Guoxiong Li <gli@openjdk.org>
Date: Fri, 12 Apr 2024 07:26:01 +0000
Subject: [PATCH 09/32] 8330003: Serial: Move the logic of
 FastEvacuateFollowersClosure to SerialHeap

Reviewed-by: ayang, tschatzl
---
 .../share/gc/serial/defNewGeneration.cpp      | 19 +----------------
 src/hotspot/share/gc/serial/serialHeap.cpp    | 11 +++++++++-
 src/hotspot/share/gc/serial/serialHeap.hpp    |  9 ++------
 .../share/gc/serial/serialHeap.inline.hpp     | 21 ++++++++++++-------
 4 files changed, 27 insertions(+), 33 deletions(-)

diff --git a/src/hotspot/share/gc/serial/defNewGeneration.cpp b/src/hotspot/share/gc/serial/defNewGeneration.cpp
index ee7ef765fa358..7ac0d9b554ad2 100644
--- a/src/hotspot/share/gc/serial/defNewGeneration.cpp
+++ b/src/hotspot/share/gc/serial/defNewGeneration.cpp
@@ -76,20 +76,6 @@ class PromoteFailureClosure : public InHeapScanClosure {
   void do_oop(narrowOop* p) { do_oop_work(p); }
 };
 
-class YoungGenScanClosure : public InHeapScanClosure {
-  template <typename T>
-  void do_oop_work(T* p) {
-    assert(SerialHeap::heap()->young_gen()->to()->is_in_reserved(p), "precondition");
-
-    try_scavenge(p, [] (auto) {});
-  }
-public:
-  YoungGenScanClosure(DefNewGeneration* g) : InHeapScanClosure(g) {}
-
-  void do_oop(oop* p)       { do_oop_work(p); }
-  void do_oop(narrowOop* p) { do_oop_work(p); }
-};
-
 class RootScanClosure : public OffHeapScanClosure {
   template <typename T>
   void do_oop_work(T* p) {
@@ -231,10 +217,7 @@ class FastEvacuateFollowersClosure: public VoidClosure {
   {}
 
   void do_void() {
-    do {
-      _heap->oop_since_save_marks_iterate(_young_cl, _old_cl);
-    } while (!_heap->no_allocs_since_save_marks());
-    guarantee(_heap->young_gen()->promo_failure_scan_is_complete(), "Failed to finish scan");
+    _heap->scan_evacuated_objs(_young_cl, _old_cl);
   }
 };
 
diff --git a/src/hotspot/share/gc/serial/serialHeap.cpp b/src/hotspot/share/gc/serial/serialHeap.cpp
index 8c43239b201a4..84d941fbdb764 100644
--- a/src/hotspot/share/gc/serial/serialHeap.cpp
+++ b/src/hotspot/share/gc/serial/serialHeap.cpp
@@ -32,7 +32,7 @@
 #include "gc/serial/cardTableRS.hpp"
 #include "gc/serial/defNewGeneration.inline.hpp"
 #include "gc/serial/serialFullGC.hpp"
-#include "gc/serial/serialHeap.hpp"
+#include "gc/serial/serialHeap.inline.hpp"
 #include "gc/serial/serialMemoryPools.hpp"
 #include "gc/serial/serialVMOperations.hpp"
 #include "gc/serial/tenuredGeneration.inline.hpp"
@@ -762,6 +762,15 @@ bool SerialHeap::no_allocs_since_save_marks() {
          _old_gen->no_allocs_since_save_marks();
 }
 
+void SerialHeap::scan_evacuated_objs(YoungGenScanClosure* young_cl,
+                                     OldGenScanClosure* old_cl) {
+  do {
+    young_gen()->oop_since_save_marks_iterate(young_cl);
+    old_gen()->oop_since_save_marks_iterate(old_cl);
+  } while (!no_allocs_since_save_marks());
+  guarantee(young_gen()->promo_failure_scan_is_complete(), "Failed to finish scan");
+}
+
 // public collection interfaces
 void SerialHeap::collect(GCCause::Cause cause) {
   // The caller doesn't have the Heap_lock
diff --git a/src/hotspot/share/gc/serial/serialHeap.hpp b/src/hotspot/share/gc/serial/serialHeap.hpp
index 54a776406fad2..d13b8706c22e2 100644
--- a/src/hotspot/share/gc/serial/serialHeap.hpp
+++ b/src/hotspot/share/gc/serial/serialHeap.hpp
@@ -355,13 +355,8 @@ class SerialHeap : public CollectedHeap {
     return _old_gen;
   }
 
-  // Apply "cur->do_oop" or "older->do_oop" to all the oops in objects
-  // allocated since the last call to save_marks in the young generation.
-  // The "cur" closure is applied to references in the younger generation
-  // at "level", and the "older" closure to older generations.
-  template <typename OopClosureType1, typename OopClosureType2>
-  void oop_since_save_marks_iterate(OopClosureType1* cur,
-                                    OopClosureType2* older);
+  void scan_evacuated_objs(YoungGenScanClosure* young_cl,
+                           OldGenScanClosure* old_cl);
 
   void safepoint_synchronize_begin() override;
   void safepoint_synchronize_end() override;
diff --git a/src/hotspot/share/gc/serial/serialHeap.inline.hpp b/src/hotspot/share/gc/serial/serialHeap.inline.hpp
index 27819f0d1c933..750c0e9c31134 100644
--- a/src/hotspot/share/gc/serial/serialHeap.inline.hpp
+++ b/src/hotspot/share/gc/serial/serialHeap.inline.hpp
@@ -30,13 +30,6 @@
 #include "gc/serial/defNewGeneration.inline.hpp"
 #include "gc/serial/tenuredGeneration.inline.hpp"
 
-template <typename OopClosureType1, typename OopClosureType2>
-void SerialHeap::oop_since_save_marks_iterate(OopClosureType1* cur,
-                                              OopClosureType2* older) {
-  young_gen()->oop_since_save_marks_iterate(cur);
-  old_gen()->oop_since_save_marks_iterate(older);
-}
-
 class ScavengeHelper {
   DefNewGeneration* _young_gen;
   HeapWord*         _young_gen_end;
@@ -100,6 +93,20 @@ class OffHeapScanClosure : public OopClosure {
   OffHeapScanClosure(DefNewGeneration* young_gen) :  _helper(young_gen) {}
 };
 
+class YoungGenScanClosure : public InHeapScanClosure {
+  template <typename T>
+  void do_oop_work(T* p) {
+    assert(SerialHeap::heap()->young_gen()->to()->is_in_reserved(p), "precondition");
+
+    try_scavenge(p, [] (auto) {});
+  }
+public:
+  YoungGenScanClosure(DefNewGeneration* g) : InHeapScanClosure(g) {}
+
+  void do_oop(oop* p)       { do_oop_work(p); }
+  void do_oop(narrowOop* p) { do_oop_work(p); }
+};
+
 class OldGenScanClosure : public InHeapScanClosure {
   CardTableRS* _rs;
 

From c7fcd62302a4b70214e4aea7052e661a2aa9b03b Mon Sep 17 00:00:00 2001
From: Guoxiong Li <gli@openjdk.org>
Date: Fri, 12 Apr 2024 07:29:41 +0000
Subject: [PATCH 10/32] 8330006: Serial: Extract out
 ContiguousSpace::block_start_const

Reviewed-by: ayang, tschatzl
---
 src/hotspot/share/gc/serial/cardTableRS.cpp   | 12 +++----
 src/hotspot/share/gc/serial/cardTableRS.hpp   |  4 +--
 .../share/gc/serial/defNewGeneration.cpp      | 29 +++++++++++++--
 .../share/gc/serial/tenuredGeneration.cpp     | 18 ++++++++--
 .../share/gc/serial/tenuredGeneration.hpp     |  2 +-
 src/hotspot/share/gc/shared/space.cpp         | 35 -------------------
 src/hotspot/share/gc/shared/space.hpp         |  8 -----
 7 files changed, 50 insertions(+), 58 deletions(-)

diff --git a/src/hotspot/share/gc/serial/cardTableRS.cpp b/src/hotspot/share/gc/serial/cardTableRS.cpp
index 789b5f21cffa1..71492a8468dee 100644
--- a/src/hotspot/share/gc/serial/cardTableRS.cpp
+++ b/src/hotspot/share/gc/serial/cardTableRS.cpp
@@ -31,9 +31,9 @@
 #include "memory/iterator.inline.hpp"
 #include "utilities/align.hpp"
 
-void CardTableRS::scan_old_to_young_refs(TenuredSpace* sp, HeapWord* saved_mark_word) {
-  const MemRegion ur    = sp->used_region();
-  const MemRegion urasm = MemRegion(sp->bottom(), saved_mark_word);
+void CardTableRS::scan_old_to_young_refs(TenuredGeneration* tg, HeapWord* saved_mark_word) {
+  const MemRegion ur    = tg->used_region();
+  const MemRegion urasm = MemRegion(tg->space()->bottom(), saved_mark_word);
 
   assert(ur.contains(urasm),
          "Did you forget to call save_marks()? "
@@ -43,7 +43,7 @@ void CardTableRS::scan_old_to_young_refs(TenuredSpace* sp, HeapWord* saved_mark_
 
   if (!urasm.is_empty()) {
     OldGenScanClosure cl(SerialHeap::heap()->young_gen());
-    non_clean_card_iterate(sp, urasm, &cl);
+    non_clean_card_iterate(tg, urasm, &cl);
   }
 }
 
@@ -225,7 +225,7 @@ static void scan_obj_with_limit(oop obj,
   }
 }
 
-void CardTableRS::non_clean_card_iterate(TenuredSpace* sp,
+void CardTableRS::non_clean_card_iterate(TenuredGeneration* tg,
                                          MemRegion mr,
                                          OldGenScanClosure* cl) {
   struct {
@@ -238,7 +238,7 @@ void CardTableRS::non_clean_card_iterate(TenuredSpace* sp,
       assert(cached_obj.start_addr != nullptr, "inv");
       return cached_obj.start_addr;
     }
-    HeapWord* result = sp->block_start_const(addr);
+    HeapWord* result = tg->block_start(addr);
 
     cached_obj.start_addr = result;
     cached_obj.end_addr = result + cast_to_oop(result)->size();
diff --git a/src/hotspot/share/gc/serial/cardTableRS.hpp b/src/hotspot/share/gc/serial/cardTableRS.hpp
index 9be2c720dcb6f..c12cd906482bd 100644
--- a/src/hotspot/share/gc/serial/cardTableRS.hpp
+++ b/src/hotspot/share/gc/serial/cardTableRS.hpp
@@ -60,7 +60,7 @@ class CardTableRS : public CardTable {
 public:
   CardTableRS(MemRegion whole_heap);
 
-  void scan_old_to_young_refs(TenuredSpace* sp, HeapWord* saved_mark_word);
+  void scan_old_to_young_refs(TenuredGeneration* tg, HeapWord* saved_mark_word);
 
   void inline_write_ref_field_gc(void* field) {
     CardValue* byte = byte_for(field);
@@ -83,7 +83,7 @@ class CardTableRS : public CardTable {
   // Iterate over the portion of the card-table which covers the given
   // region mr in the given space and apply cl to any dirty sub-regions
   // of mr. Clears the dirty cards as they are processed.
-  void non_clean_card_iterate(TenuredSpace* sp,
+  void non_clean_card_iterate(TenuredGeneration* tg,
                               MemRegion mr,
                               OldGenScanClosure* cl);
 
diff --git a/src/hotspot/share/gc/serial/defNewGeneration.cpp b/src/hotspot/share/gc/serial/defNewGeneration.cpp
index 7ac0d9b554ad2..9817bb7620c05 100644
--- a/src/hotspot/share/gc/serial/defNewGeneration.cpp
+++ b/src/hotspot/share/gc/serial/defNewGeneration.cpp
@@ -571,15 +571,38 @@ void DefNewGeneration::object_iterate(ObjectClosure* blk) {
   from()->object_iterate(blk);
 }
 
+// If "p" is in the space, returns the address of the start of the
+// "block" that contains "p".  We say "block" instead of "object" since
+// some heaps may not pack objects densely; a chunk may either be an
+// object or a non-object.  If "p" is not in the space, return null.
+// Very general, slow implementation.
+static HeapWord* block_start_const(const ContiguousSpace* cs, const void* p) {
+  assert(MemRegion(cs->bottom(), cs->end()).contains(p),
+         "p (" PTR_FORMAT ") not in space [" PTR_FORMAT ", " PTR_FORMAT ")",
+         p2i(p), p2i(cs->bottom()), p2i(cs->end()));
+  if (p >= cs->top()) {
+    return cs->top();
+  } else {
+    HeapWord* last = cs->bottom();
+    HeapWord* cur = last;
+    while (cur <= p) {
+      last = cur;
+      cur += cast_to_oop(cur)->size();
+    }
+    assert(oopDesc::is_oop(cast_to_oop(last)), PTR_FORMAT " should be an object start", p2i(last));
+    return last;
+  }
+}
+
 HeapWord* DefNewGeneration::block_start(const void* p) const {
   if (eden()->is_in_reserved(p)) {
-    return eden()->block_start_const(p);
+    return block_start_const(eden(), p);
   }
   if (from()->is_in_reserved(p)) {
-    return from()->block_start_const(p);
+    return block_start_const(from(), p);
   }
   assert(to()->is_in_reserved(p), "inv");
-  return to()->block_start_const(p);
+  return block_start_const(to(), p);
 }
 
 // The last collection bailed out, we are running out of heap space,
diff --git a/src/hotspot/share/gc/serial/tenuredGeneration.cpp b/src/hotspot/share/gc/serial/tenuredGeneration.cpp
index d6a1a4a69105a..ddbb7b8403be8 100644
--- a/src/hotspot/share/gc/serial/tenuredGeneration.cpp
+++ b/src/hotspot/share/gc/serial/tenuredGeneration.cpp
@@ -264,12 +264,24 @@ void TenuredGeneration::compute_new_size_inner() {
   }
 }
 
-HeapWord* TenuredGeneration::block_start(const void* p) const {
-  return space()->block_start_const(p);
+HeapWord* TenuredGeneration::block_start(const void* addr) const {
+  HeapWord* cur_block = _bts->block_start_reaching_into_card(addr);
+
+  while (true) {
+    HeapWord* next_block = cur_block + cast_to_oop(cur_block)->size();
+    if (next_block > addr) {
+      assert(cur_block <= addr, "postcondition");
+      return cur_block;
+    }
+    cur_block = next_block;
+    // Because the BOT is precise, we should never step into the next card
+    // (i.e. crossing the card boundary).
+    assert(!SerialBlockOffsetTable::is_crossing_card_boundary(cur_block, (HeapWord*)addr), "must be");
+  }
 }
 
 void TenuredGeneration::scan_old_to_young_refs() {
-  _rs->scan_old_to_young_refs(space(), saved_mark_word());
+  _rs->scan_old_to_young_refs(this, saved_mark_word());
 }
 
 TenuredGeneration::TenuredGeneration(ReservedSpace rs,
diff --git a/src/hotspot/share/gc/serial/tenuredGeneration.hpp b/src/hotspot/share/gc/serial/tenuredGeneration.hpp
index 9983790b82eb2..04d5d61207432 100644
--- a/src/hotspot/share/gc/serial/tenuredGeneration.hpp
+++ b/src/hotspot/share/gc/serial/tenuredGeneration.hpp
@@ -112,7 +112,7 @@ class TenuredGeneration: public Generation {
     return _virtual_space.uncommitted_size() == 0;
   }
 
-  HeapWord* block_start(const void* p) const;
+  HeapWord* block_start(const void* addr) const;
 
   void scan_old_to_young_refs();
 
diff --git a/src/hotspot/share/gc/shared/space.cpp b/src/hotspot/share/gc/shared/space.cpp
index f5622cd64dea0..a7e22856a569d 100644
--- a/src/hotspot/share/gc/shared/space.cpp
+++ b/src/hotspot/share/gc/shared/space.cpp
@@ -125,25 +125,6 @@ void ContiguousSpace::object_iterate(ObjectClosure* blk) {
   }
 }
 
-// Very general, slow implementation.
-HeapWord* ContiguousSpace::block_start_const(const void* p) const {
-  assert(MemRegion(bottom(), end()).contains(p),
-         "p (" PTR_FORMAT ") not in space [" PTR_FORMAT ", " PTR_FORMAT ")",
-         p2i(p), p2i(bottom()), p2i(end()));
-  if (p >= top()) {
-    return top();
-  } else {
-    HeapWord* last = bottom();
-    HeapWord* cur = last;
-    while (cur <= p) {
-      last = cur;
-      cur += cast_to_oop(cur)->size();
-    }
-    assert(oopDesc::is_oop(cast_to_oop(last)), PTR_FORMAT " should be an object start", p2i(last));
-    return last;
-  }
-}
-
 // This version requires locking.
 inline HeapWord* ContiguousSpace::allocate_impl(size_t size) {
   assert(Heap_lock->owned_by_self() ||
@@ -191,22 +172,6 @@ HeapWord* ContiguousSpace::par_allocate(size_t size) {
 }
 
 #if INCLUDE_SERIALGC
-HeapWord* TenuredSpace::block_start_const(const void* addr) const {
-  HeapWord* cur_block = _offsets->block_start_reaching_into_card(addr);
-
-  while (true) {
-    HeapWord* next_block = cur_block + cast_to_oop(cur_block)->size();
-    if (next_block > addr) {
-      assert(cur_block <= addr, "postcondition");
-      return cur_block;
-    }
-    cur_block = next_block;
-    // Because the BOT is precise, we should never step into the next card
-    // (i.e. crossing the card boundary).
-    assert(!SerialBlockOffsetTable::is_crossing_card_boundary(cur_block, (HeapWord*)addr), "must be");
-  }
-}
-
 TenuredSpace::TenuredSpace(SerialBlockOffsetTable* offsets,
                            MemRegion mr) :
   _offsets(offsets)
diff --git a/src/hotspot/share/gc/shared/space.hpp b/src/hotspot/share/gc/shared/space.hpp
index a4679b3adad2b..44f10cbd1ceb6 100644
--- a/src/hotspot/share/gc/shared/space.hpp
+++ b/src/hotspot/share/gc/shared/space.hpp
@@ -169,12 +169,6 @@ class ContiguousSpace: public CHeapObj<mtGC> {
   // Iteration
   void object_iterate(ObjectClosure* blk);
 
-  // If "p" is in the space, returns the address of the start of the
-  // "block" that contains "p".  We say "block" instead of "object" since
-  // some heaps may not pack objects densely; a chunk may either be an
-  // object or a non-object.  If "p" is not in the space, return null.
-  virtual HeapWord* block_start_const(const void* p) const;
-
   // Addresses for inlined allocation
   HeapWord** top_addr() { return &_top; }
 
@@ -197,8 +191,6 @@ class TenuredSpace: public ContiguousSpace {
   TenuredSpace(SerialBlockOffsetTable* offsets,
                MemRegion mr);
 
-  HeapWord* block_start_const(const void* addr) const override;
-
   // Add offset table update.
   inline HeapWord* allocate(size_t word_size) override;
   inline HeapWord* par_allocate(size_t word_size) override;

From 006a516aa0e10d74ffafca2e2da2ae89faf47457 Mon Sep 17 00:00:00 2001
From: Albert Mingkun Yang <ayang@openjdk.org>
Date: Fri, 12 Apr 2024 07:37:48 +0000
Subject: [PATCH 11/32] 8329962: Remove CardTable::invalidate

Reviewed-by: tschatzl, gli
---
 src/hotspot/share/gc/shared/cardTable.cpp           | 10 +---------
 src/hotspot/share/gc/shared/cardTable.hpp           |  2 --
 src/hotspot/share/gc/shared/cardTableBarrierSet.cpp |  2 +-
 3 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/src/hotspot/share/gc/shared/cardTable.cpp b/src/hotspot/share/gc/shared/cardTable.cpp
index 16edfada77d49..1095defaf9fc5 100644
--- a/src/hotspot/share/gc/shared/cardTable.cpp
+++ b/src/hotspot/share/gc/shared/cardTable.cpp
@@ -202,6 +202,7 @@ void CardTable::resize_covered_region(MemRegion new_region) {
 void CardTable::dirty_MemRegion(MemRegion mr) {
   assert(align_down(mr.start(), HeapWordSize) == mr.start(), "Unaligned start");
   assert(align_up  (mr.end(),   HeapWordSize) == mr.end(),   "Unaligned end"  );
+  assert(_covered[0].contains(mr) || _covered[1].contains(mr), "precondition");
   CardValue* cur  = byte_for(mr.start());
   CardValue* last = byte_after(mr.last());
   memset(cur, dirty_card, pointer_delta(last, cur, sizeof(CardValue)));
@@ -226,15 +227,6 @@ uintx CardTable::ct_max_alignment_constraint() {
   return GCCardSizeInBytes * os::vm_page_size();
 }
 
-void CardTable::invalidate(MemRegion mr) {
-  assert(align_down(mr.start(), HeapWordSize) == mr.start(), "Unaligned start");
-  assert(align_up  (mr.end(),   HeapWordSize) == mr.end(),   "Unaligned end"  );
-  for (int i = 0; i < max_covered_regions; i++) {
-    MemRegion mri = mr.intersection(_covered[i]);
-    if (!mri.is_empty()) dirty_MemRegion(mri);
-  }
-}
-
 #ifndef PRODUCT
 void CardTable::verify_region(MemRegion mr, CardValue val, bool val_equals) {
   CardValue* start    = byte_for(mr.start());
diff --git a/src/hotspot/share/gc/shared/cardTable.hpp b/src/hotspot/share/gc/shared/cardTable.hpp
index 13285be4fc5df..ee41be06be0df 100644
--- a/src/hotspot/share/gc/shared/cardTable.hpp
+++ b/src/hotspot/share/gc/shared/cardTable.hpp
@@ -131,8 +131,6 @@ class CardTable: public CHeapObj<mtGC> {
     return byte_for(p) + 1;
   }
 
-  void invalidate(MemRegion mr);
-
   // Provide read-only access to the card table array.
   const CardValue* byte_for_const(const void* p) const {
     return byte_for(p);
diff --git a/src/hotspot/share/gc/shared/cardTableBarrierSet.cpp b/src/hotspot/share/gc/shared/cardTableBarrierSet.cpp
index c4f318561d64b..1aab76d5ab55b 100644
--- a/src/hotspot/share/gc/shared/cardTableBarrierSet.cpp
+++ b/src/hotspot/share/gc/shared/cardTableBarrierSet.cpp
@@ -85,7 +85,7 @@ void CardTableBarrierSet::write_ref_array_work(MemRegion mr) {
 }
 
 void CardTableBarrierSet::invalidate(MemRegion mr) {
-  _card_table->invalidate(mr);
+  _card_table->dirty_MemRegion(mr);
 }
 
 void CardTableBarrierSet::print_on(outputStream* st) const {

From aebfd53e9d19f5939c81fa1a2fc75716c3355900 Mon Sep 17 00:00:00 2001
From: Ivan Walulya <iwalulya@openjdk.org>
Date: Fri, 12 Apr 2024 07:46:43 +0000
Subject: [PATCH 12/32] 8329660: G1: Improve TestGCLogMessages to be more
 precise

Reviewed-by: tschatzl, ayang
---
 src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp    |   2 +-
 .../jtreg/gc/g1/TestGCLogMessages.java        | 142 +++++++++---------
 2 files changed, 72 insertions(+), 72 deletions(-)

diff --git a/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp b/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp
index 29b7c41477778..32a56d7120569 100644
--- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp
+++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp
@@ -534,7 +534,7 @@ double G1GCPhaseTimes::print_post_evacuate_collection_set(bool evacuation_failed
   trace_time("Serial Free Collection Set", _recorded_serial_free_cset_time_ms);
 
   debug_time("Rebuild Free List", _recorded_total_rebuild_freelist_time_ms);
-  trace_time("Serial Rebuild Free List ", _recorded_serial_rebuild_freelist_time_ms);
+  trace_time("Serial Rebuild Free List", _recorded_serial_rebuild_freelist_time_ms);
   trace_phase(_gc_par_phases[RebuildFreeList]);
 
   debug_time("Prepare For Mutator", _recorded_prepare_for_mutator_time_ms);
diff --git a/test/hotspot/jtreg/gc/g1/TestGCLogMessages.java b/test/hotspot/jtreg/gc/g1/TestGCLogMessages.java
index e5db6cb0daeda..d37bf56738143 100644
--- a/test/hotspot/jtreg/gc/g1/TestGCLogMessages.java
+++ b/test/hotspot/jtreg/gc/g1/TestGCLogMessages.java
@@ -102,64 +102,64 @@ public boolean isAvailable() {
     }
 
     private LogMessageWithLevel allLogMessages[] = new LogMessageWithLevel[] {
-        new LogMessageWithLevel("Pre Evacuate Collection Set", Level.INFO),
-        new LogMessageWithLevel("Evacuate Collection Set", Level.INFO),
-        new LogMessageWithLevel("Post Evacuate Collection Set", Level.INFO),
-        new LogMessageWithLevel("Other", Level.INFO),
+        new LogMessageWithLevel("Pre Evacuate Collection Set:", Level.INFO),
+        new LogMessageWithLevel("Evacuate Collection Set:", Level.INFO),
+        new LogMessageWithLevel("Post Evacuate Collection Set:", Level.INFO),
+        new LogMessageWithLevel("Other:", Level.INFO),
 
         // Pre Evacuate Collection Set
-        new LogMessageWithLevel("JT Retire TLABs And Flush Logs", Level.DEBUG),
-        new LogMessageWithLevel("Non-JT Flush Logs", Level.DEBUG),
-        new LogMessageWithLevel("Choose Collection Set", Level.DEBUG),
-        new LogMessageWithLevel("Region Register", Level.DEBUG),
-        new LogMessageWithLevel("Prepare Heap Roots", Level.DEBUG),
+        new LogMessageWithLevel("JT Retire TLABs And Flush Logs \\(ms\\):", Level.DEBUG),
+        new LogMessageWithLevel("Non-JT Flush Logs \\(ms\\):", Level.DEBUG),
+        new LogMessageWithLevel("Choose Collection Set:", Level.DEBUG),
+        new LogMessageWithLevel("Region Register:", Level.DEBUG),
+        new LogMessageWithLevel("Prepare Heap Roots:", Level.DEBUG),
         // Merge Heap Roots
-        new LogMessageWithLevel("Merge Heap Roots", Level.INFO),
-        new LogMessageWithLevel("Prepare Merge Heap Roots", Level.DEBUG),
-        new LogMessageWithLevel("Eager Reclaim", Level.DEBUG),
-        new LogMessageWithLevel("Remembered Sets", Level.DEBUG),
-        new LogMessageWithLevel("Merged Inline", Level.DEBUG),
-        new LogMessageWithLevel("Merged ArrayOfCards", Level.DEBUG),
-        new LogMessageWithLevel("Merged Howl", Level.DEBUG),
-        new LogMessageWithLevel("Merged Full", Level.DEBUG),
-        new LogMessageWithLevel("Merged Howl Inline", Level.DEBUG),
-        new LogMessageWithLevel("Merged Howl ArrayOfCards", Level.DEBUG),
-        new LogMessageWithLevel("Merged Howl BitMap", Level.DEBUG),
-        new LogMessageWithLevel("Merged Howl Full", Level.DEBUG),
-        new LogMessageWithLevel("Log Buffers", Level.DEBUG),
-        new LogMessageWithLevel("Dirty Cards", Level.DEBUG),
-        new LogMessageWithLevel("Merged Cards", Level.DEBUG),
-        new LogMessageWithLevel("Skipped Cards", Level.DEBUG),
+        new LogMessageWithLevel("Merge Heap Roots:", Level.INFO),
+        new LogMessageWithLevel("Prepare Merge Heap Roots:", Level.DEBUG),
+        new LogMessageWithLevel("Eager Reclaim \\(ms\\):", Level.DEBUG),
+        new LogMessageWithLevel("Remembered Sets \\(ms\\):", Level.DEBUG),
+        new LogMessageWithLevel("Merged Inline:", Level.DEBUG),
+        new LogMessageWithLevel("Merged ArrayOfCards:", Level.DEBUG),
+        new LogMessageWithLevel("Merged Howl:", Level.DEBUG),
+        new LogMessageWithLevel("Merged Full:", Level.DEBUG),
+        new LogMessageWithLevel("Merged Howl Inline:", Level.DEBUG),
+        new LogMessageWithLevel("Merged Howl ArrayOfCards:", Level.DEBUG),
+        new LogMessageWithLevel("Merged Howl BitMap:", Level.DEBUG),
+        new LogMessageWithLevel("Merged Howl Full:", Level.DEBUG),
+        new LogMessageWithLevel("Log Buffers \\(ms\\):", Level.DEBUG),
+        new LogMessageWithLevel("Dirty Cards:", Level.DEBUG),
+        new LogMessageWithLevel("Merged Cards:", Level.DEBUG),
+        new LogMessageWithLevel("Skipped Cards:", Level.DEBUG),
         // Evacuate Collection Set
-        new LogMessageWithLevel("Ext Root Scanning", Level.DEBUG),
-        new LogMessageWithLevel("Thread Roots", Level.TRACE),
-        new LogMessageWithLevel("CLDG Roots", Level.TRACE),
-        new LogMessageWithLevel("CM RefProcessor Roots", Level.TRACE),
+        new LogMessageWithLevel("Ext Root Scanning \\(ms\\):", Level.DEBUG),
+        new LogMessageWithLevel("Thread Roots \\(ms\\):", Level.TRACE),
+        new LogMessageWithLevel("CLDG Roots \\(ms\\):", Level.TRACE),
+        new LogMessageWithLevel("CM RefProcessor Roots \\(ms\\):", Level.TRACE),
         new LogMessageWithLevel("JNI Global Roots", Level.TRACE),
         new LogMessageWithLevel("VM Global Roots", Level.TRACE),
         // Scan Heap Roots
-        new LogMessageWithLevel("Scan Heap Roots", Level.DEBUG),
-        new LogMessageWithLevel("Scanned Cards", Level.DEBUG),
-        new LogMessageWithLevel("Scanned Blocks", Level.DEBUG),
-        new LogMessageWithLevel("Claimed Chunks", Level.DEBUG),
-        new LogMessageWithLevel("Found Roots", Level.DEBUG),
+        new LogMessageWithLevel("Scan Heap Roots \\(ms\\):", Level.DEBUG),
+        new LogMessageWithLevel("Scanned Cards:", Level.DEBUG),
+        new LogMessageWithLevel("Scanned Blocks:", Level.DEBUG),
+        new LogMessageWithLevel("Claimed Chunks:", Level.DEBUG),
+        new LogMessageWithLevel("Found Roots:", Level.DEBUG),
         // Code Roots Scan
-        new LogMessageWithLevel("Code Root Scan", Level.DEBUG),
+        new LogMessageWithLevel("Code Root Scan \\(ms\\):", Level.DEBUG),
         // Object Copy
-        new LogMessageWithLevel("Object Copy", Level.DEBUG),
-        new LogMessageWithLevel("Copied Bytes", Level.DEBUG),
-        new LogMessageWithLevel("LAB Waste", Level.DEBUG),
-        new LogMessageWithLevel("LAB Undo Waste", Level.DEBUG),
+        new LogMessageWithLevel("Object Copy \\(ms\\):", Level.DEBUG),
+        new LogMessageWithLevel("Copied Bytes:", Level.DEBUG),
+        new LogMessageWithLevel("LAB Waste:", Level.DEBUG),
+        new LogMessageWithLevel("LAB Undo Waste:", Level.DEBUG),
         // Termination
-        new LogMessageWithLevel("Termination", Level.DEBUG),
-        new LogMessageWithLevel("Termination Attempts", Level.DEBUG),
+        new LogMessageWithLevel("Termination \\(ms\\):", Level.DEBUG),
+        new LogMessageWithLevel("Termination Attempts:", Level.DEBUG),
         // Post Evacuate Collection Set
         // NMethod List Cleanup
-        new LogMessageWithLevel("NMethod List Cleanup", Level.DEBUG),
+        new LogMessageWithLevel("NMethod List Cleanup:", Level.DEBUG),
         // Reference Processing
-        new LogMessageWithLevel("Reference Processing", Level.DEBUG),
+        new LogMessageWithLevel("Reference Processing:", Level.DEBUG),
         // VM internal reference processing
-        new LogMessageWithLevel("Weak Processing", Level.DEBUG),
+        new LogMessageWithLevel("Weak Processing:", Level.DEBUG),
         new LogMessageWithLevel("VM Weak", Level.DEBUG),
         new LogMessageWithLevel("ObjectSynchronizer Weak", Level.DEBUG),
         new LogMessageWithLevel("JVMTI Tag Weak OopStorage", Level.DEBUG),
@@ -169,31 +169,31 @@ public boolean isAvailable() {
         new LogMessageWithLevel("JNI Weak", Level.DEBUG),
 
         // Post Evacuate Cleanup 1
-        new LogMessageWithLevel("Post Evacuate Cleanup 1", Level.DEBUG),
-        new LogMessageWithLevel("Merge Per-Thread State", Level.DEBUG),
-        new LogMessageWithLevel("LAB Waste", Level.DEBUG),
-        new LogMessageWithLevel("LAB Undo Waste", Level.DEBUG),
-        new LogMessageWithLevel("Evac Fail Extra Cards", Level.DEBUG),
-        new LogMessageWithLevel("Clear Logged Cards", Level.DEBUG),
-        new LogMessageWithLevel("Recalculate Used Memory", Level.DEBUG),
+        new LogMessageWithLevel("Post Evacuate Cleanup 1:", Level.DEBUG),
+        new LogMessageWithLevel("Merge Per-Thread State \\(ms\\):", Level.DEBUG),
+        new LogMessageWithLevel("LAB Waste:", Level.DEBUG),
+        new LogMessageWithLevel("LAB Undo Waste:", Level.DEBUG),
+        new LogMessageWithLevel("Evac Fail Extra Cards:", Level.DEBUG),
+        new LogMessageWithLevel("Clear Logged Cards \\(ms\\):", Level.DEBUG),
+        new LogMessageWithLevel("Recalculate Used Memory \\(ms\\):", Level.DEBUG),
 
         // Post Evacuate Cleanup 2
-        new LogMessageWithLevel("Post Evacuate Cleanup 2", Level.DEBUG),
+        new LogMessageWithLevel("Post Evacuate Cleanup 2:", Level.DEBUG),
         new LogMessageWithLevelC2OrJVMCIOnly("Update Derived Pointers", Level.DEBUG),
-        new LogMessageWithLevel("Redirty Logged Cards", Level.DEBUG),
-        new LogMessageWithLevel("Redirtied Cards", Level.DEBUG),
-        new LogMessageWithLevel("Resize TLABs", Level.DEBUG),
-        new LogMessageWithLevel("Free Collection Set", Level.DEBUG),
-        new LogMessageWithLevel("Serial Free Collection Set", Level.TRACE),
-        new LogMessageWithLevel("Young Free Collection Set", Level.TRACE),
-        new LogMessageWithLevel("Non-Young Free Collection Set", Level.TRACE),
+        new LogMessageWithLevel("Redirty Logged Cards \\(ms\\):", Level.DEBUG),
+        new LogMessageWithLevel("Redirtied Cards:", Level.DEBUG),
+        new LogMessageWithLevel("Resize TLABs \\(ms\\):", Level.DEBUG),
+        new LogMessageWithLevel("Free Collection Set \\(ms\\):", Level.DEBUG),
+        new LogMessageWithLevel("Serial Free Collection Set:", Level.TRACE),
+        new LogMessageWithLevel("Young Free Collection Set \\(ms\\):", Level.TRACE),
+        new LogMessageWithLevel("Non-Young Free Collection Set \\(ms\\):", Level.TRACE),
 
         // Misc Top-level
-        new LogMessageWithLevel("Rebuild Free List", Level.DEBUG),
-        new LogMessageWithLevel("Serial Rebuild Free List", Level.TRACE),
-        new LogMessageWithLevel("Parallel Rebuild Free List", Level.TRACE),
-        new LogMessageWithLevel("Prepare For Mutator", Level.DEBUG),
-        new LogMessageWithLevel("Expand Heap After Collection", Level.DEBUG),
+        new LogMessageWithLevel("Rebuild Free List:", Level.DEBUG),
+        new LogMessageWithLevel("Serial Rebuild Free List:", Level.TRACE),
+        new LogMessageWithLevel("Parallel Rebuild Free List \\(ms\\):", Level.TRACE),
+        new LogMessageWithLevel("Prepare For Mutator:", Level.DEBUG),
+        new LogMessageWithLevel("Expand Heap After Collection:", Level.DEBUG),
     };
 
     void checkMessagesAtLevel(OutputAnalyzer output, LogMessageWithLevel messages[], Level level) throws Exception {
@@ -258,13 +258,13 @@ private void testConcurrentRefinementLogs() throws Exception {
     }
 
     LogMessageWithLevel exhFailureMessages[] = new LogMessageWithLevel[] {
-        new LogMessageWithLevel("Recalculate Used Memory", Level.DEBUG),
-        new LogMessageWithLevel("Restore Preserved Marks", Level.DEBUG),
-        new LogMessageWithLevel("Restore Evacuation Failed Regions", Level.DEBUG),
-        new LogMessageWithLevel("Process Evacuation Failed Regions", Level.DEBUG),
-        new LogMessageWithLevel("Evacuation Failed Regions", Level.DEBUG),
-        new LogMessageWithLevel("Pinned Regions", Level.DEBUG),
-        new LogMessageWithLevel("Allocation Failed Regions", Level.DEBUG),
+        new LogMessageWithLevel("Recalculate Used Memory \\(ms\\):", Level.DEBUG),
+        new LogMessageWithLevel("Restore Preserved Marks \\(ms\\):", Level.DEBUG),
+        new LogMessageWithLevel("Restore Evacuation Failed Regions \\(ms\\):", Level.DEBUG),
+        new LogMessageWithLevel("Process Evacuation Failed Regions \\(ms\\):", Level.DEBUG),
+        new LogMessageWithLevel("Evacuation Failed Regions:", Level.DEBUG),
+        new LogMessageWithLevel("Pinned Regions:", Level.DEBUG),
+        new LogMessageWithLevel("Allocation Failed Regions:", Level.DEBUG),
     };
 
     private void testWithEvacuationFailureLogs() throws Exception {

From 717a07b932e3dcabbad130d299b15cb963d50a67 Mon Sep 17 00:00:00 2001
From: Tejesh R <tr@openjdk.org>
Date: Fri, 12 Apr 2024 10:21:31 +0000
Subject: [PATCH 13/32] 8322140: javax/swing/JTable/JTableScrollPrintTest.java
 does not print the rows and columns of the table in Nimbus and Aqua
 LookAndFeel

Reviewed-by: psadhukhan, abhiscxk
---
 .../share/classes/javax/swing/JViewport.java  | 16 +++-----
 .../share/classes/sun/print/PathGraphics.java | 18 +++++++-
 .../classes/sun/swing/SwingUtilities2.java    |  4 +-
 .../swing/JTable/JTableScrollPrintTest.java   | 41 ++++++++-----------
 4 files changed, 43 insertions(+), 36 deletions(-)

diff --git a/src/java.desktop/share/classes/javax/swing/JViewport.java b/src/java.desktop/share/classes/javax/swing/JViewport.java
index 7ceab7f83dde3..f7c27314750c4 100644
--- a/src/java.desktop/share/classes/javax/swing/JViewport.java
+++ b/src/java.desktop/share/classes/javax/swing/JViewport.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -603,15 +603,11 @@ public final Insets getInsets(Insets insets) {
 
 
     private Graphics getBackingStoreGraphics(Graphics g) {
-        if (!SwingUtilities2.isPrinting(g)) {
-            Graphics bsg = backingStoreImage.getGraphics();
-            bsg.setColor(g.getColor());
-            bsg.setFont(g.getFont());
-            bsg.setClip(g.getClipBounds());
-            return bsg;
-        } else {
-            return g;
-        }
+        Graphics bsg = backingStoreImage.getGraphics();
+        bsg.setColor(g.getColor());
+        bsg.setFont(g.getFont());
+        bsg.setClip(g.getClipBounds());
+        return bsg;
     }
 
 
diff --git a/src/java.desktop/share/classes/sun/print/PathGraphics.java b/src/java.desktop/share/classes/sun/print/PathGraphics.java
index e7fc04a97c400..7a82ffb4a4826 100644
--- a/src/java.desktop/share/classes/sun/print/PathGraphics.java
+++ b/src/java.desktop/share/classes/sun/print/PathGraphics.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -64,6 +64,7 @@
 import java.awt.image.DataBufferInt;
 import java.awt.image.ImageObserver;
 import java.awt.image.IndexColorModel;
+import java.awt.image.MultiResolutionImage;
 import java.awt.image.Raster;
 import java.awt.image.RenderedImage;
 import java.awt.image.SampleModel;
@@ -1132,6 +1133,9 @@ protected BufferedImage getBufferedImage(Image img) {
             // VI needs to make a new BI: this is unavoidable but
             // I don't expect VI's to be "huge" in any case.
             return ((VolatileImage)img).getSnapshot();
+        } else if (img instanceof MultiResolutionImage) {
+            return convertToBufferedImage((MultiResolutionImage) img,
+                                           img.getWidth(null), img.getHeight(null));
         } else {
             // may be null or may be some non-standard Image which
             // shouldn't happen as Image is implemented by the platform
@@ -1142,6 +1146,18 @@ protected BufferedImage getBufferedImage(Image img) {
         }
     }
 
+    protected BufferedImage convertToBufferedImage(MultiResolutionImage multiResolutionImage,
+                                                       double width, double height ) {
+        Image resolutionImage = multiResolutionImage.getResolutionVariant(width, height);
+        BufferedImage bufferedImage = new BufferedImage(resolutionImage.getWidth(null),
+                                                        resolutionImage.getHeight(null),
+                                                        BufferedImage.TYPE_INT_ARGB);
+        Graphics2D g2d = bufferedImage.createGraphics();
+        g2d.drawImage(resolutionImage, 0, 0, (int) width, (int) height, null);
+        g2d.dispose();
+        return bufferedImage;
+    }
+
     /**
      * Return true if the BufferedImage argument has non-opaque
      * bits in it and therefore can not be directly rendered by
diff --git a/src/java.desktop/share/classes/sun/swing/SwingUtilities2.java b/src/java.desktop/share/classes/sun/swing/SwingUtilities2.java
index d85d10963ed03..d763a72d528be 100644
--- a/src/java.desktop/share/classes/sun/swing/SwingUtilities2.java
+++ b/src/java.desktop/share/classes/sun/swing/SwingUtilities2.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1317,7 +1317,7 @@ public int hashCode() {
      * returns true if the Graphics is print Graphics
      * false otherwise
      */
-    public static boolean isPrinting(Graphics g) {
+    static boolean isPrinting(Graphics g) {
         return (g instanceof PrinterGraphics || g instanceof PrintGraphics);
     }
 
diff --git a/test/jdk/javax/swing/JTable/JTableScrollPrintTest.java b/test/jdk/javax/swing/JTable/JTableScrollPrintTest.java
index 6a12a361345e5..5621b35934606 100644
--- a/test/jdk/javax/swing/JTable/JTableScrollPrintTest.java
+++ b/test/jdk/javax/swing/JTable/JTableScrollPrintTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,13 +36,12 @@
 import javax.swing.JScrollPane;
 import javax.swing.JTable;
 import javax.swing.JViewport;
-import javax.swing.SwingUtilities;
 import javax.swing.table.DefaultTableModel;
 
 /*
  * @test
  * @key headful
- * @bug 8210807
+ * @bug 8210807 8322140
  * @library /java/awt/regtesthelpers
  * @build PassFailJFrame
  * @summary Test to check if JTable can be printed when JScrollPane added to it.
@@ -50,32 +49,27 @@
  */
 
 public class JTableScrollPrintTest {
-    public static JFrame frame;
-    public static PassFailJFrame passFailJFrame;
-
     public static void main(String[] args) throws Exception {
-        SwingUtilities.invokeAndWait(() -> {
-            try {
-                initialize();
-            } catch (Exception e) {
-                throw new RuntimeException(e);
-            }
-        });
-        passFailJFrame.awaitAndCheck();
-    }
-
-    public static void initialize() throws Exception {
-        final String INSTRUCTIONS = """
+        String INSTRUCTIONS = """
                 Instructions to Test:
                 1. Print table onto Paper/PDF, using the Print Dialog.
                 2. If entire table is printed, then the Test is PASS.
                 3. If table is partially printed without table cells,
                 then the Test is FAIL.
                 """;
-        TestTable testTable = new TestTable(true);
-        frame = new JFrame("JTable Print Test");
-        passFailJFrame = new PassFailJFrame("Test Instructions", INSTRUCTIONS, 5L, 6, 35);
+        PassFailJFrame.builder()
+                .title("Test Instructions")
+                .instructions(INSTRUCTIONS)
+                .rows(6)
+                .columns(35)
+                .testUI(JTableScrollPrintTest::initialize)
+                .build()
+                .awaitAndCheck();
+    }
 
+    public static JFrame initialize() {
+        TestTable testTable = new TestTable(true);
+        JFrame frame = new JFrame("JTable Print Test");
         PassFailJFrame.addTestWindow(frame);
         PassFailJFrame.positionTestWindow(frame, PassFailJFrame.Position.VERTICAL);
         frame.add(testTable);
@@ -83,6 +77,7 @@ public static void initialize() throws Exception {
         frame.setVisible(true);
         PrintUtilities printerJob = new PrintUtilities(testTable);
         printerJob.print("Test BackingStore Image Print");
+        return frame;
     }
 
     public static class TestTable extends JPanel {
@@ -103,7 +98,7 @@ public TestTable(Boolean useScrollPane) {
 
             JTable table = new JTable(model);
 
-            if (useScrollPane == true) {
+            if (useScrollPane) {
                 JScrollPane sp = new JScrollPane(table,
                         JScrollPane.VERTICAL_SCROLLBAR_ALWAYS,
                         JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED);
@@ -117,7 +112,7 @@ public TestTable(Boolean useScrollPane) {
     }
 
     static class PrintUtilities implements Printable {
-        private Component componentToBePrinted;
+        private final Component componentToBePrinted;
 
         public void printComponent(Component c, String jobname) {
             new PrintUtilities(c).print(jobname);

From 77a217df6000190cf73a1ca42a42cbcec42fb60f Mon Sep 17 00:00:00 2001
From: Gui Cao <gcao@openjdk.org>
Date: Fri, 12 Apr 2024 11:42:05 +0000
Subject: [PATCH 14/32] 8330095: RISC-V: Remove obsolete vandn_vi instruction

Reviewed-by: fyang, luhenry
---
 src/hotspot/cpu/riscv/assembler_riscv.hpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
index bc8c9f1cd7f2e..388e7f9eb941f 100644
--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
@@ -1870,7 +1870,6 @@ enum Nf {
   // Vector Bit-manipulation used in Cryptography (Zvkb) Extension
   INSN(vandn_vv,   0b1010111, 0b000, 0b000001);
   INSN(vandn_vx,   0b1010111, 0b100, 0b000001);
-  INSN(vandn_vi,   0b1010111, 0b011, 0b000001);
   INSN(vclmul_vv,  0b1010111, 0b010, 0b001100);
   INSN(vclmul_vx,  0b1010111, 0b110, 0b001100);
   INSN(vclmulh_vv, 0b1010111, 0b010, 0b001101);

From 3e9c3811669196945d7227affc28728670a256c5 Mon Sep 17 00:00:00 2001
From: Coleen Phillimore <coleenp@openjdk.org>
Date: Fri, 12 Apr 2024 12:16:49 +0000
Subject: [PATCH 15/32] 8329488: Move OopStorage code from safepoint cleanup
 and remove safepoint cleanup code

Reviewed-by: kbarrett, eosterlund
---
 src/hotspot/share/gc/shared/oopStorage.cpp    |  82 +++++--------
 src/hotspot/share/jfr/metadata/metadata.xml   |  13 +--
 src/hotspot/share/runtime/globals.hpp         |   5 +
 src/hotspot/share/runtime/safepoint.cpp       | 109 +-----------------
 src/hotspot/share/runtime/safepoint.hpp       |  12 --
 src/hotspot/share/runtime/serviceThread.cpp   |   7 +-
 .../classes/jdk/jfr/internal/query/view.ini   |   8 +-
 src/jdk.jfr/share/conf/jfr/default.jfc        |  10 --
 src/jdk.jfr/share/conf/jfr/profile.jfc        |  10 --
 .../stress/TestReclaimStringsLeaksMemory.java |   4 +-
 .../runtime/logging/SafepointCleanupTest.java |  67 -----------
 .../metadata/TestLookForUntestedEvents.java   |   3 +-
 .../event/runtime/TestSafepointEvents.java    |   1 -
 test/lib/jdk/test/lib/jfr/EventNames.java     |   4 +-
 14 files changed, 54 insertions(+), 281 deletions(-)
 delete mode 100644 test/hotspot/jtreg/runtime/logging/SafepointCleanupTest.java

diff --git a/src/hotspot/share/gc/shared/oopStorage.cpp b/src/hotspot/share/gc/shared/oopStorage.cpp
index 4d1a720bb3430..7117b86b26403 100644
--- a/src/hotspot/share/gc/shared/oopStorage.cpp
+++ b/src/hotspot/share/gc/shared/oopStorage.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -410,7 +410,7 @@ OopStorage::Block::block_for_ptr(const OopStorage* owner, const oop* ptr) {
 // allocations until some entries in it are released.
 //
 // release() is performed lock-free. (Note: This means it can't notify the
-// service thread of pending cleanup work.  It must be lock-free because
+// ServiceThread of pending cleanup work.  It must be lock-free because
 // it is called in all kinds of contexts where even quite low ranked locks
 // may be held.)  release() first looks up the block for
 // the entry, using address alignment to find the enclosing block (thereby
@@ -705,7 +705,7 @@ void OopStorage::Block::release_entries(uintx releasing, OopStorage* owner) {
       // Only request cleanup for to-empty transitions, not for from-full.
       // There isn't any rush to process from-full transitions.  Allocation
       // will reduce deferrals before allocating new blocks, so may process
-      // some.  And the service thread will drain the entire deferred list
+      // some.  And the ServiceThread will drain the entire deferred list
       // if there are any pending to-empty transitions.
       if (releasing == old_allocated) {
         owner->record_needs_cleanup();
@@ -880,67 +880,51 @@ bool OopStorage::should_report_num_dead() const {
 }
 
 // Managing service thread notifications.
-//
-// We don't want cleanup work to linger indefinitely, but we also don't want
-// to run the service thread too often.  We're also very limited in what we
-// can do in a release operation, where cleanup work is created.
-//
+
 // When a release operation changes a block's state to empty, it records the
 // need for cleanup in both the associated storage object and in the global
-// request state.  A safepoint cleanup task notifies the service thread when
+// request state. The ServiceThread checks at timed intervals if
 // there may be cleanup work for any storage object, based on the global
-// request state.  But that notification is deferred if the service thread
-// has run recently, and we also avoid duplicate notifications.  The service
-// thread updates the timestamp and resets the state flags on every iteration.
+// request state.  We don't want to run empty block cleanup too often in the
+// face of frequent explicit ServiceThread wakeups, hence the defer period.
 
 // Global cleanup request state.
 static volatile bool needs_cleanup_requested = false;
 
-// Flag for avoiding duplicate notifications.
-static bool needs_cleanup_triggered = false;
+// Time after which a cleanup is permitted.
+static jlong cleanup_permit_time = 0;
 
-// Time after which a notification can be made.
-static jlong cleanup_trigger_permit_time = 0;
-
-// Minimum time since last service thread check before notification is
-// permitted.  The value of 500ms was an arbitrary choice; frequent, but not
-// too frequent.
-const jlong cleanup_trigger_defer_period = 500 * NANOSECS_PER_MILLISEC;
-
-void OopStorage::trigger_cleanup_if_needed() {
-  MonitorLocker ml(Service_lock, Monitor::_no_safepoint_check_flag);
-  if (Atomic::load(&needs_cleanup_requested) &&
-      !needs_cleanup_triggered &&
-      (os::javaTimeNanos() > cleanup_trigger_permit_time)) {
-    needs_cleanup_triggered = true;
-    ml.notify_all();
-  }
-}
+// Minimum time between ServiceThread cleanups.
+// The value of 500ms was an arbitrary choice; frequent, but not too frequent.
+const jlong cleanup_defer_period = 500 * NANOSECS_PER_MILLISEC;
 
 bool OopStorage::has_cleanup_work_and_reset() {
   assert_lock_strong(Service_lock);
-  cleanup_trigger_permit_time =
-    os::javaTimeNanos() + cleanup_trigger_defer_period;
-  needs_cleanup_triggered = false;
-  // Set the request flag false and return its old value.
-  // Needs to be atomic to avoid dropping a concurrent request.
-  // Can't use Atomic::xchg, which may not support bool.
-  return Atomic::cmpxchg(&needs_cleanup_requested, true, false);
+
+  if (Atomic::load_acquire(&needs_cleanup_requested) &&
+      os::javaTimeNanos() > cleanup_permit_time) {
+    cleanup_permit_time =
+      os::javaTimeNanos() + cleanup_defer_period;
+    // Set the request flag false and return its old value.
+    Atomic::release_store(&needs_cleanup_requested, false);
+    return true;
+  } else {
+    return false;
+  }
 }
 
-// Record that cleanup is needed, without notifying the Service thread.
-// Used by release(), where we can't lock even Service_lock.
+// Record that cleanup is needed, without notifying the Service thread, because
+// we can't lock the Service_lock.  Used by release().
 void OopStorage::record_needs_cleanup() {
-  // Set local flag first, else service thread could wake up and miss
-  // the request.  This order may instead (rarely) unnecessarily notify.
+  // Set local flag first, else ServiceThread could wake up and miss
+  // the request.
   Atomic::release_store(&_needs_cleanup, true);
   Atomic::release_store_fence(&needs_cleanup_requested, true);
 }
 
 bool OopStorage::delete_empty_blocks() {
-  // Service thread might have oopstorage work, but not for this object.
-  // Check for deferred updates even though that's not a service thread
-  // trigger; since we're here, we might as well process them.
+  // ServiceThread might have oopstorage work, but not for this object.
+  // But check for deferred updates, which might provide cleanup work.
   if (!Atomic::load_acquire(&_needs_cleanup) &&
       (Atomic::load_acquire(&_deferred_updates) == nullptr)) {
     return false;
@@ -986,7 +970,7 @@ bool OopStorage::delete_empty_blocks() {
         // Don't interfere with an active concurrent iteration.
         // Instead, give up immediately.  There is more work to do,
         // but don't re-notify, to avoid useless spinning of the
-        // service thread.  Instead, iteration completion notifies.
+        // ServiceThread.  Instead, iteration completion notifies.
         if (_concurrent_iteration_count > 0) return true;
         _active_array->remove(block);
       }
@@ -998,10 +982,8 @@ bool OopStorage::delete_empty_blocks() {
       ThreadBlockInVM tbiv(JavaThread::current());
     }
   }
-  // Exceeded work limit or can't delete last block.  This will
-  // cause the service thread to loop, giving other subtasks an
-  // opportunity to run too.  There's no need for a notification,
-  // because we are part of the service thread (unless gtesting).
+  // Exceeded work limit or can't delete last block so still needs cleanup
+  // for the next time.
   record_needs_cleanup();
   return true;
 }
diff --git a/src/hotspot/share/jfr/metadata/metadata.xml b/src/hotspot/share/jfr/metadata/metadata.xml
index 78cd95840b22d..42facf5621733 100644
--- a/src/hotspot/share/jfr/metadata/metadata.xml
+++ b/src/hotspot/share/jfr/metadata/metadata.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="utf-8"?>
 
 <!--
- Copyright (c) 2012, 2023, Oracle and/or its affiliates. All rights reserved.
+ Copyright (c) 2012, 2024, Oracle and/or its affiliates. All rights reserved.
  DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 
  This code is free software; you can redistribute it and/or modify it
@@ -684,17 +684,6 @@
     <Field type="int" name="iterations" label="Iterations" description="Number of state check iterations" />
   </Event>
 
-  <Event name="SafepointCleanup" category="Java Virtual Machine, Runtime, Safepoint" label="Safepoint Cleanup" description="Safepointing begin running cleanup tasks"
-    thread="true">
-    <Field type="ulong" name="safepointId" label="Safepoint Identifier" relation="SafepointId" />
-  </Event>
-
-  <Event name="SafepointCleanupTask" category="Java Virtual Machine, Runtime, Safepoint" label="Safepoint Cleanup Task" description="Safepointing begin running cleanup tasks"
-    thread="true">
-    <Field type="ulong" name="safepointId" label="Safepoint Identifier" relation="SafepointId" />
-    <Field type="string" name="name" label="Task Name" description="The task name" />
-  </Event>
-
   <Event name="SafepointEnd" category="Java Virtual Machine, Runtime, Safepoint" label="Safepoint End" description="Safepointing end" thread="true">
     <Field type="ulong" name="safepointId" label="Safepoint Identifier" relation="SafepointId" />
   </Event>
diff --git a/src/hotspot/share/runtime/globals.hpp b/src/hotspot/share/runtime/globals.hpp
index 575d9a3de36fc..f74930b62e166 100644
--- a/src/hotspot/share/runtime/globals.hpp
+++ b/src/hotspot/share/runtime/globals.hpp
@@ -1282,6 +1282,11 @@ const int ObjectAlignmentInBytes = 8;
           "(0 means none)")                                                 \
           range(0, max_jint)                                                \
                                                                             \
+  product(intx, ServiceThreadCleanupInterval, 1000, DIAGNOSTIC,             \
+          "Wake the ServiceThread to do periodic cleanup checks every so "  \
+          "many milliseconds (0 means none)")                               \
+          range(0, max_jint)                                                \
+                                                                            \
   product(double, SafepointTimeoutDelay, 10000,                             \
           "Delay in milliseconds for option SafepointTimeout; "             \
           "supports sub-millisecond resolution with fractional values.")    \
diff --git a/src/hotspot/share/runtime/safepoint.cpp b/src/hotspot/share/runtime/safepoint.cpp
index 935c1beee40e0..05f59de4a17ef 100644
--- a/src/hotspot/share/runtime/safepoint.cpp
+++ b/src/hotspot/share/runtime/safepoint.cpp
@@ -80,12 +80,6 @@ static void post_safepoint_begin_event(EventSafepointBegin& event,
   }
 }
 
-static void post_safepoint_cleanup_event(EventSafepointCleanup& event, uint64_t safepoint_id) {
-  if (event.should_commit()) {
-    event.set_safepointId(safepoint_id);
-    event.commit();
-  }
-}
 
 static void post_safepoint_synchronize_event(EventSafepointStateSynchronization& event,
                                              uint64_t safepoint_id,
@@ -101,16 +95,6 @@ static void post_safepoint_synchronize_event(EventSafepointStateSynchronization&
   }
 }
 
-static void post_safepoint_cleanup_task_event(EventSafepointCleanupTask& event,
-                                              uint64_t safepoint_id,
-                                              const char* name) {
-  if (event.should_commit()) {
-    event.set_safepointId(safepoint_id);
-    event.set_name(name);
-    event.commit();
-  }
-}
-
 static void post_safepoint_end_event(EventSafepointEnd& event, uint64_t safepoint_id) {
   if (event.should_commit()) {
     event.set_safepointId(safepoint_id);
@@ -435,14 +419,7 @@ void SafepointSynchronize::begin() {
 
   SafepointTracing::synchronized(nof_threads, initial_running, _nof_threads_hit_polling_page);
 
-  // We do the safepoint cleanup first since a GC related safepoint
-  // needs cleanup to be completed before running the GC op.
-  EventSafepointCleanup cleanup_event;
-  do_cleanup_tasks();
-  post_safepoint_cleanup_event(cleanup_event, _safepoint_id);
-
   post_safepoint_begin_event(begin_event, _safepoint_id, nof_threads, _current_jni_active_count);
-  SafepointTracing::cleanup();
 }
 
 void SafepointSynchronize::disarm_safepoint() {
@@ -507,68 +484,6 @@ void SafepointSynchronize::end() {
   post_safepoint_end_event(event, safepoint_id());
 }
 
-class ParallelCleanupTask : public WorkerTask {
-private:
-  SubTasksDone _subtasks;
-
-  class Tracer {
-  private:
-    const char*               _name;
-    EventSafepointCleanupTask _event;
-    TraceTime                 _timer;
-
-  public:
-    Tracer(const char* name) :
-        _name(name),
-        _event(),
-        _timer(name, TRACETIME_LOG(Info, safepoint, cleanup)) {}
-    ~Tracer() {
-      post_safepoint_cleanup_task_event(_event, SafepointSynchronize::safepoint_id(), _name);
-    }
-  };
-
-public:
-  ParallelCleanupTask() :
-    WorkerTask("Parallel Safepoint Cleanup"),
-    _subtasks(SafepointSynchronize::SAFEPOINT_CLEANUP_NUM_TASKS) {}
-
-  uint expected_num_workers() const {
-    uint workers = 0;
-
-    return MAX2<uint>(1, workers);
-  }
-
-  void work(uint worker_id) {
-    if (_subtasks.try_claim_task(SafepointSynchronize::SAFEPOINT_CLEANUP_REQUEST_OOPSTORAGE_CLEANUP)) {
-      // Don't bother reporting event or time for this very short operation.
-      // To have any utility we'd also want to report whether needed.
-      OopStorage::trigger_cleanup_if_needed();
-    }
-
-    _subtasks.all_tasks_claimed();
-  }
-};
-
-// Various cleaning tasks that should be done periodically at safepoints.
-void SafepointSynchronize::do_cleanup_tasks() {
-
-  TraceTime timer("safepoint cleanup tasks", TRACETIME_LOG(Info, safepoint, cleanup));
-
-  CollectedHeap* heap = Universe::heap();
-  assert(heap != nullptr, "heap not initialized yet?");
-  ParallelCleanupTask cleanup;
-  WorkerThreads* cleanup_workers = heap->safepoint_workers();
-  const uint expected_num_workers = cleanup.expected_num_workers();
-  if (cleanup_workers != nullptr && expected_num_workers > 1) {
-    // Parallel cleanup using GC provided thread pool.
-    const uint num_workers = MIN2(expected_num_workers, cleanup_workers->active_workers());
-    cleanup_workers->run_task(&cleanup, num_workers);
-  } else {
-    // Serial cleanup using VMThread.
-    cleanup.work(0);
-  }
-}
-
 // Methods for determining if a JavaThread is safepoint safe.
 
 // False means unsafe with undetermined state.
@@ -946,7 +861,6 @@ void ThreadSafepointState::handle_polling_page_exception() {
 
 jlong SafepointTracing::_last_safepoint_begin_time_ns = 0;
 jlong SafepointTracing::_last_safepoint_sync_time_ns = 0;
-jlong SafepointTracing::_last_safepoint_cleanup_time_ns = 0;
 jlong SafepointTracing::_last_safepoint_end_time_ns = 0;
 jlong SafepointTracing::_last_app_time_ns = 0;
 int SafepointTracing::_nof_threads = 0;
@@ -954,7 +868,6 @@ int SafepointTracing::_nof_running = 0;
 int SafepointTracing::_page_trap = 0;
 VM_Operation::VMOp_Type SafepointTracing::_current_type;
 jlong     SafepointTracing::_max_sync_time = 0;
-jlong     SafepointTracing::_max_cleanup_time = 0;
 jlong     SafepointTracing::_max_vmop_time = 0;
 uint64_t  SafepointTracing::_op_count[VM_Operation::VMOp_Terminating] = {0};
 
@@ -970,7 +883,7 @@ static void print_header(outputStream* st) {
 
   st->print("VM Operation                 "
             "[ threads: total initial_running ]"
-            "[ time:       sync    cleanup       vmop      total ]");
+            "[ time:       sync    vmop      total ]");
 
   st->print_cr(" page_trap_count");
 }
@@ -999,11 +912,9 @@ void SafepointTracing::statistics_log() {
            _nof_threads,
            _nof_running);
   ls.print("[       "
-           INT64_FORMAT_W(10) " " INT64_FORMAT_W(10) " "
-           INT64_FORMAT_W(10) " " INT64_FORMAT_W(10) " ]",
+           INT64_FORMAT_W(10) " " INT64_FORMAT_W(10) " " INT64_FORMAT_W(10) " ]",
            (int64_t)(_last_safepoint_sync_time_ns - _last_safepoint_begin_time_ns),
-           (int64_t)(_last_safepoint_cleanup_time_ns - _last_safepoint_sync_time_ns),
-           (int64_t)(_last_safepoint_end_time_ns - _last_safepoint_cleanup_time_ns),
+           (int64_t)(_last_safepoint_end_time_ns - _last_safepoint_sync_time_ns),
            (int64_t)(_last_safepoint_end_time_ns - _last_safepoint_begin_time_ns));
 
   ls.print_cr(INT32_FORMAT_W(16), _page_trap);
@@ -1024,8 +935,6 @@ void SafepointTracing::statistics_exit_log() {
 
   log_info(safepoint, stats)("Maximum sync time  " INT64_FORMAT" ns",
                               (int64_t)(_max_sync_time));
-  log_info(safepoint, stats)("Maximum cleanup time  " INT64_FORMAT" ns",
-                              (int64_t)(_max_cleanup_time));
   log_info(safepoint, stats)("Maximum vm operation time (except for Exit VM operation)  "
                               INT64_FORMAT " ns",
                               (int64_t)(_max_vmop_time));
@@ -1038,7 +947,6 @@ void SafepointTracing::begin(VM_Operation::VMOp_Type type) {
   // update the time stamp to begin recording safepoint time
   _last_safepoint_begin_time_ns = os::javaTimeNanos();
   _last_safepoint_sync_time_ns = 0;
-  _last_safepoint_cleanup_time_ns = 0;
 
   _last_app_time_ns = _last_safepoint_begin_time_ns - _last_safepoint_end_time_ns;
   _last_safepoint_end_time_ns = 0;
@@ -1054,19 +962,12 @@ void SafepointTracing::synchronized(int nof_threads, int nof_running, int traps)
   RuntimeService::record_safepoint_synchronized(_last_safepoint_sync_time_ns - _last_safepoint_begin_time_ns);
 }
 
-void SafepointTracing::cleanup() {
-  _last_safepoint_cleanup_time_ns = os::javaTimeNanos();
-}
-
 void SafepointTracing::end() {
   _last_safepoint_end_time_ns = os::javaTimeNanos();
 
   if (_max_sync_time < (_last_safepoint_sync_time_ns - _last_safepoint_begin_time_ns)) {
     _max_sync_time = _last_safepoint_sync_time_ns - _last_safepoint_begin_time_ns;
   }
-  if (_max_cleanup_time < (_last_safepoint_cleanup_time_ns - _last_safepoint_sync_time_ns)) {
-    _max_cleanup_time = _last_safepoint_cleanup_time_ns - _last_safepoint_sync_time_ns;
-  }
   if (_max_vmop_time < (_last_safepoint_end_time_ns - _last_safepoint_sync_time_ns)) {
     _max_vmop_time = _last_safepoint_end_time_ns - _last_safepoint_sync_time_ns;
   }
@@ -1078,14 +979,12 @@ void SafepointTracing::end() {
      "Safepoint \"%s\", "
      "Time since last: " JLONG_FORMAT " ns, "
      "Reaching safepoint: " JLONG_FORMAT " ns, "
-     "Cleanup: " JLONG_FORMAT " ns, "
      "At safepoint: " JLONG_FORMAT " ns, "
      "Total: " JLONG_FORMAT " ns",
       VM_Operation::name(_current_type),
       _last_app_time_ns,
       _last_safepoint_sync_time_ns    - _last_safepoint_begin_time_ns,
-      _last_safepoint_cleanup_time_ns - _last_safepoint_sync_time_ns,
-      _last_safepoint_end_time_ns     - _last_safepoint_cleanup_time_ns,
+      _last_safepoint_end_time_ns     - _last_safepoint_sync_time_ns,
       _last_safepoint_end_time_ns     - _last_safepoint_begin_time_ns
      );
 
diff --git a/src/hotspot/share/runtime/safepoint.hpp b/src/hotspot/share/runtime/safepoint.hpp
index c8b80b3421777..93ede70c6acd4 100644
--- a/src/hotspot/share/runtime/safepoint.hpp
+++ b/src/hotspot/share/runtime/safepoint.hpp
@@ -68,13 +68,6 @@ class SafepointSynchronize : AllStatic {
                                                // VM thread and any NonJavaThread may be running.
   };
 
-  // The enums are listed in the order of the tasks when done serially.
-  enum SafepointCleanupTasks {
-    SAFEPOINT_CLEANUP_REQUEST_OOPSTORAGE_CLEANUP,
-    // Leave this one last.
-    SAFEPOINT_CLEANUP_NUM_TASKS
-  };
-
  private:
   friend class SafepointMechanism;
   friend class ThreadSafepointState;
@@ -155,8 +148,6 @@ class SafepointSynchronize : AllStatic {
   // Exception handling for page polling
   static void handle_polling_page_exception(JavaThread *thread);
 
-  static void do_cleanup_tasks();
-
   static void set_is_at_safepoint()             { _state = _synchronized; }
   static void set_is_not_at_safepoint()         { _state = _not_synchronized; }
 
@@ -239,7 +230,6 @@ class SafepointTracing : public AllStatic {
   // Absolute
   static jlong _last_safepoint_begin_time_ns;
   static jlong _last_safepoint_sync_time_ns;
-  static jlong _last_safepoint_cleanup_time_ns;
   static jlong _last_safepoint_end_time_ns;
 
   // Relative
@@ -251,7 +241,6 @@ class SafepointTracing : public AllStatic {
 
   static VM_Operation::VMOp_Type _current_type;
   static jlong     _max_sync_time;
-  static jlong     _max_cleanup_time;
   static jlong     _max_vmop_time;
   static uint64_t  _op_count[VM_Operation::VMOp_Terminating];
 
@@ -262,7 +251,6 @@ class SafepointTracing : public AllStatic {
 
   static void begin(VM_Operation::VMOp_Type type);
   static void synchronized(int nof_threads, int nof_running, int traps);
-  static void cleanup();
   static void end();
 
   static void statistics_exit_log();
diff --git a/src/hotspot/share/runtime/serviceThread.cpp b/src/hotspot/share/runtime/serviceThread.cpp
index 0118c5005abf9..a5082fad47937 100644
--- a/src/hotspot/share/runtime/serviceThread.cpp
+++ b/src/hotspot/share/runtime/serviceThread.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -126,8 +126,9 @@ void ServiceThread::service_thread_entry(JavaThread* jt, TRAPS) {
               (cldg_cleanup_work = ClassLoaderDataGraph::should_clean_metaspaces_and_reset()) |
               (jvmti_tagmap_work = JvmtiTagMap::has_object_free_events_and_reset())
              ) == 0) {
-        // Wait until notified that there is some work to do.
-        ml.wait();
+        // Wait until notified that there is some work to do or timer expires.
+        // Some cleanup requests don't notify the ServiceThread so work needs to be done at periodic intervals.
+        ml.wait(ServiceThreadCleanupInterval);
       }
 
       if (has_jvmti_events) {
diff --git a/src/jdk.jfr/share/classes/jdk/jfr/internal/query/view.ini b/src/jdk.jfr/share/classes/jdk/jfr/internal/query/view.ini
index c28d13c0e9122..8db19fdc239fe 100644
--- a/src/jdk.jfr/share/classes/jdk/jfr/internal/query/view.ini
+++ b/src/jdk.jfr/share/classes/jdk/jfr/internal/query/view.ini
@@ -1,5 +1,5 @@
 ;
-; Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+; Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
 ; DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 ;
 ; This code is free software; you can redistribute it and/or modify it
@@ -525,13 +525,13 @@ form = "COLUMN 'Event Count', 'First Recorded Event', 'Last Recorded Event',
 [jvm.safepoints]
 label = "Safepoints"
 table = "COLUMN  'Start Time', 'Duration',
-                   'State Syncronization', 'Cleanup',
+                   'State Syncronization',
                    'JNI Critical Threads', 'Total Threads'
          SELECT    B.startTime,  DIFF([B|E].startTime),
-                   S.duration, C.duration,
+                   S.duration,
                    jniCriticalThreadCount, totalThreadCount
          FROM SafepointBegin AS B, SafepointEnd AS E,
-              SafepointCleanup AS C, SafepointStateSynchronization AS S
+              SafepointStateSynchronization AS S
          GROUP BY safepointId ORDER BY B.startTime"
 
 [jvm.longest-compilations]
diff --git a/src/jdk.jfr/share/conf/jfr/default.jfc b/src/jdk.jfr/share/conf/jfr/default.jfc
index 1ea936ae20d5d..79aee3f334925 100644
--- a/src/jdk.jfr/share/conf/jfr/default.jfc
+++ b/src/jdk.jfr/share/conf/jfr/default.jfc
@@ -200,16 +200,6 @@
       <setting name="threshold">10 ms</setting>
     </event>
 
-    <event name="jdk.SafepointCleanup">
-      <setting name="enabled">false</setting>
-      <setting name="threshold">10 ms</setting>
-    </event>
-
-    <event name="jdk.SafepointCleanupTask">
-      <setting name="enabled">false</setting>
-      <setting name="threshold">10 ms</setting>
-    </event>
-
     <event name="jdk.SafepointEnd">
       <setting name="enabled">false</setting>
       <setting name="threshold">10 ms</setting>
diff --git a/src/jdk.jfr/share/conf/jfr/profile.jfc b/src/jdk.jfr/share/conf/jfr/profile.jfc
index 0b018d33058ce..6691638e5d41d 100644
--- a/src/jdk.jfr/share/conf/jfr/profile.jfc
+++ b/src/jdk.jfr/share/conf/jfr/profile.jfc
@@ -200,16 +200,6 @@
       <setting name="threshold">0 ms</setting>
     </event>
 
-    <event name="jdk.SafepointCleanup">
-      <setting name="enabled">false</setting>
-      <setting name="threshold">0 ms</setting>
-    </event>
-
-    <event name="jdk.SafepointCleanupTask">
-      <setting name="enabled">false</setting>
-      <setting name="threshold">0 ms</setting>
-    </event>
-
     <event name="jdk.SafepointEnd">
       <setting name="enabled">false</setting>
       <setting name="threshold">0 ms</setting>
diff --git a/test/hotspot/jtreg/gc/stress/TestReclaimStringsLeaksMemory.java b/test/hotspot/jtreg/gc/stress/TestReclaimStringsLeaksMemory.java
index bfe19e2c9c5af..645583ec450c3 100644
--- a/test/hotspot/jtreg/gc/stress/TestReclaimStringsLeaksMemory.java
+++ b/test/hotspot/jtreg/gc/stress/TestReclaimStringsLeaksMemory.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -55,7 +55,7 @@ public class TestReclaimStringsLeaksMemory {
     public static void main(String[] args) throws Exception {
         ArrayList<String> baseargs = new ArrayList<>(Arrays.asList("-Xms256M",
                                                                    "-Xmx256M",
-                                                                   "-Xlog:gc*,stringtable*=debug:gc.log",
+                                                                   "-Xlog:gc*,stringtable*=debug,oopstorage+blocks=debug:gc.log",
                                                                    "-XX:NativeMemoryTracking=summary",
                                                                    "-XX:+UnlockDiagnosticVMOptions",
                                                                    "-XX:+PrintNMTStatistics" ));
diff --git a/test/hotspot/jtreg/runtime/logging/SafepointCleanupTest.java b/test/hotspot/jtreg/runtime/logging/SafepointCleanupTest.java
deleted file mode 100644
index 0586bf29989d9..0000000000000
--- a/test/hotspot/jtreg/runtime/logging/SafepointCleanupTest.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2016, 2023, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-/*
- * @test
- * @bug 8149991
- * @summary safepoint+cleanup=info should have output from the code
- * @requires vm.flagless
- * @library /test/lib
- * @modules java.base/jdk.internal.misc
- *          java.management
- * @run driver SafepointCleanupTest
- */
-
-import jdk.test.lib.process.OutputAnalyzer;
-import jdk.test.lib.process.ProcessTools;
-
-public class SafepointCleanupTest {
-    static void analyzeOutputOn(ProcessBuilder pb) throws Exception {
-        OutputAnalyzer output = new OutputAnalyzer(pb.start());
-        output.shouldContain("[safepoint,cleanup]");
-        output.shouldContain("safepoint cleanup tasks");
-        output.shouldHaveExitValue(0);
-    }
-
-    static void analyzeOutputOff(ProcessBuilder pb) throws Exception {
-        OutputAnalyzer output = new OutputAnalyzer(pb.start());
-        output.shouldNotContain("[safepoint,cleanup]");
-        output.shouldHaveExitValue(0);
-    }
-
-    public static void main(String[] args) throws Exception {
-        ProcessBuilder pb = ProcessTools.createLimitedTestJavaProcessBuilder("-Xlog:safepoint+cleanup=info",
-                                                                             InnerClass.class.getName());
-        analyzeOutputOn(pb);
-
-        pb = ProcessTools.createLimitedTestJavaProcessBuilder("-Xlog:safepoint+cleanup=off",
-                                                              InnerClass.class.getName());
-        analyzeOutputOff(pb);
-    }
-
-    public static class InnerClass {
-        public static void main(String[] args) throws Exception {
-            System.out.println("Safepoint Cleanup test");
-        }
-    }
-}
diff --git a/test/jdk/jdk/jfr/event/metadata/TestLookForUntestedEvents.java b/test/jdk/jdk/jfr/event/metadata/TestLookForUntestedEvents.java
index c701a846780e8..981579ba341e6 100644
--- a/test/jdk/jdk/jfr/event/metadata/TestLookForUntestedEvents.java
+++ b/test/jdk/jdk/jfr/event/metadata/TestLookForUntestedEvents.java
@@ -56,8 +56,7 @@ public class TestLookForUntestedEvents {
         Arrays.asList(
             "DataLoss", "IntFlag", "ReservedStackActivation", "NativeLibraryUnload",
             "DoubleFlag", "UnsignedLongFlagChanged", "IntFlagChanged",
-            "UnsignedIntFlag", "UnsignedIntFlagChanged", "DoubleFlagChanged",
-            "SafepointCleanupTask")
+            "UnsignedIntFlag", "UnsignedIntFlagChanged", "DoubleFlagChanged")
     );
 
     // GC uses specific framework to test the events, instead of using event names literally.
diff --git a/test/jdk/jdk/jfr/event/runtime/TestSafepointEvents.java b/test/jdk/jdk/jfr/event/runtime/TestSafepointEvents.java
index 31ac371859256..8ea0181b0dd40 100644
--- a/test/jdk/jdk/jfr/event/runtime/TestSafepointEvents.java
+++ b/test/jdk/jdk/jfr/event/runtime/TestSafepointEvents.java
@@ -52,7 +52,6 @@ public class TestSafepointEvents {
     static final String[] EVENT_NAMES = new String[] {
         EventNames.SafepointBegin,
         EventNames.SafepointStateSynchronization,
-        EventNames.SafepointCleanup,
         EventNames.SafepointEnd
     };
 
diff --git a/test/lib/jdk/test/lib/jfr/EventNames.java b/test/lib/jdk/test/lib/jfr/EventNames.java
index 3799af54804a1..5ea58ab84641a 100644
--- a/test/lib/jdk/test/lib/jfr/EventNames.java
+++ b/test/lib/jdk/test/lib/jfr/EventNames.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -65,8 +65,6 @@ public class EventNames {
     public static final String ClassUnload = PREFIX + "ClassUnload";
     public static final String SafepointBegin = PREFIX + "SafepointBegin";
     public static final String SafepointStateSynchronization = PREFIX + "SafepointStateSynchronization";
-    public static final String SafepointCleanup = PREFIX + "SafepointCleanup";
-    public static final String SafepointCleanupTask = PREFIX + "SafepointCleanupTask";
     public static final String SafepointEnd = PREFIX + "SafepointEnd";
     public static final String ExecuteVMOperation = PREFIX + "ExecuteVMOperation";
     public static final String Shutdown = PREFIX + "Shutdown";

From b8f675f45b890a9e969c250d9bf3117e6d61c2ff Mon Sep 17 00:00:00 2001
From: Guoxiong Li <gli@openjdk.org>
Date: Fri, 12 Apr 2024 12:54:36 +0000
Subject: [PATCH 16/32] 8329771: G1: Refactor G1BlockOffsetTable::verify

Reviewed-by: ayang, tschatzl
---
 .../share/gc/g1/g1BlockOffsetTable.cpp        | 75 ++++++++-----------
 .../share/gc/g1/g1BlockOffsetTable.hpp        |  3 +-
 src/hotspot/share/gc/g1/g1HeapRegion.cpp      |  5 --
 3 files changed, 32 insertions(+), 51 deletions(-)

diff --git a/src/hotspot/share/gc/g1/g1BlockOffsetTable.cpp b/src/hotspot/share/gc/g1/g1BlockOffsetTable.cpp
index 18ab40174d6f2..ac546faafd919 100644
--- a/src/hotspot/share/gc/g1/g1BlockOffsetTable.cpp
+++ b/src/hotspot/share/gc/g1/g1BlockOffsetTable.cpp
@@ -211,55 +211,40 @@ void G1BlockOffsetTable::update_for_block_work(HeapWord* blk_start, HeapWord* bl
          "boundary: " PTR_FORMAT,
          (uint)offset_array(offset_card),
          p2i(blk_start), p2i(boundary));
-  for (uint8_t* j = offset_card + 1; j <= end_card; j++) {
-    assert(offset_array(j) > 0 &&
-           offset_array(j) <= (uint8_t) (CardTable::card_size_in_words() + BOTConstants::N_powers - 1),
-           "offset array should have been set - "
-           "%u not > 0 OR %u not <= %u",
-           (uint) offset_array(j),
-           (uint) offset_array(j),
-           (uint) (CardTable::card_size_in_words() + BOTConstants::N_powers - 1));
-  }
-#endif
-}
 
-void G1BlockOffsetTable::verify(const HeapRegion* hr) const {
-  assert(hr->bottom() < hr->top(), "Only non-empty regions should be verified.");
-  uint8_t* start_card = entry_for_addr(hr->bottom());
-  uint8_t* end_card = entry_for_addr(hr->top() - 1);
+  G1BlockOffsetTable::verify_for_block(blk_start, blk_end);
+#endif // ASSERT
+}
 
-  for (uint8_t* current_card = start_card; current_card < end_card; current_card++) {
-    uint8_t entry = offset_array(current_card);
-    if (entry < CardTable::card_size_in_words()) {
-      // The entry should point to an object before the current card. Verify that
-      // it is possible to walk from that object in to the current card by just
-      // iterating over the objects following it.
-      HeapWord* card_address = addr_for_entry(current_card);
-      HeapWord* obj_end = card_address - entry;
-      while (obj_end < card_address) {
-        HeapWord* obj = obj_end;
-        size_t obj_size = hr->block_size(obj);
-        obj_end = obj + obj_size;
-        guarantee(obj_end > obj && obj_end <= hr->top(),
-                  "Invalid object end. obj: " PTR_FORMAT " obj_size: " SIZE_FORMAT " obj_end: " PTR_FORMAT " top: " PTR_FORMAT,
-                  p2i(obj), obj_size, p2i(obj_end), p2i(hr->top()));
-      }
-    } else {
-      // Because we refine the BOT based on which cards are dirty there is not much we can verify here.
-      // We need to make sure that we are going backwards and that we don't pass the start of the
-      // corresponding heap region. But that is about all we can verify.
-      size_t backskip = BOTConstants::entry_to_cards_back(entry);
-      guarantee(backskip >= 1, "Must be going back at least one card.");
+#ifdef ASSERT
+void G1BlockOffsetTable::verify_offset(uint8_t* card_index, uint8_t upper_boundary) const {
+  assert(offset_array(card_index) <= upper_boundary,
+         "Offset %u should not be larger than upper boundary %u.",
+         (uint) offset_array(card_index),
+         (uint) upper_boundary);
+}
 
-      size_t max_backskip = current_card - start_card;
-      guarantee(backskip <= max_backskip,
-                "Going backwards beyond the start_card. start_card: " SIZE_FORMAT " current_card: " SIZE_FORMAT " backskip: " SIZE_FORMAT,
-                p2i(start_card), p2i(current_card), backskip);
+void G1BlockOffsetTable::verify_for_block(HeapWord* blk_start, HeapWord* blk_end) const {
+  assert(is_crossing_card_boundary(blk_start, blk_end), "precondition");
 
-      HeapWord* backskip_address = addr_for_entry(current_card - backskip);
-      guarantee(backskip_address >= hr->bottom(),
-                "Going backwards beyond bottom of the region: bottom: " PTR_FORMAT ", backskip_address: " PTR_FORMAT,
-                p2i(hr->bottom()), p2i(backskip_address));
+  uint8_t* start_card = entry_for_addr(align_up_by_card_size(blk_start));
+  uint8_t* end_card = entry_for_addr(blk_end - 1);
+  // Check cards in [start_card, end_card]
+  verify_offset(start_card, CardTable::card_size_in_words());
+
+  for (uint8_t* current_card = start_card + 1; current_card <= end_card; ++current_card) {
+    assert(offset_array(current_card) > 0,
+           "Offset %u is not larger than 0.",
+           (uint) offset_array(current_card));
+    verify_offset(current_card, (uint8_t) (CardTable::card_size_in_words() + BOTConstants::N_powers - 1));
+
+    uint8_t* prev  = current_card - 1;
+    uint8_t* value = current_card;
+    if (offset_array(prev) != offset_array(value)) {
+      assert(offset_array(value) >= offset_array(prev), "monotonic");
+      size_t n_cards_back = BOTConstants::entry_to_cards_back(offset_array(value));
+      assert(start_card == (current_card - n_cards_back), "inv");
     }
   }
 }
+#endif // ASSERT
diff --git a/src/hotspot/share/gc/g1/g1BlockOffsetTable.hpp b/src/hotspot/share/gc/g1/g1BlockOffsetTable.hpp
index 4eff09f243e38..518c595b22520 100644
--- a/src/hotspot/share/gc/g1/g1BlockOffsetTable.hpp
+++ b/src/hotspot/share/gc/g1/g1BlockOffsetTable.hpp
@@ -112,7 +112,8 @@ class G1BlockOffsetTable: public CHeapObj<mtGC> {
     return obj_end > cur_card_boundary;
   }
 
-  void verify(const HeapRegion* hr) const;
+  void verify_offset(uint8_t* card_index, uint8_t upper) const NOT_DEBUG_RETURN;
+  void verify_for_block(HeapWord* blk_start, HeapWord* blk_end) const NOT_DEBUG_RETURN;
 
   // Returns the address of the start of the block reaching into the card containing
   // "addr".
diff --git a/src/hotspot/share/gc/g1/g1HeapRegion.cpp b/src/hotspot/share/gc/g1/g1HeapRegion.cpp
index 4583cae2046a3..698a4e0e39fbe 100644
--- a/src/hotspot/share/gc/g1/g1HeapRegion.cpp
+++ b/src/hotspot/share/gc/g1/g1HeapRegion.cpp
@@ -695,11 +695,6 @@ bool HeapRegion::verify(VerifyOption vo) const {
     return true;
   }
 
-  // Only regions in old generation contain valid BOT.
-  if (!is_empty() && !is_young()) {
-    _bot->verify(this);
-  }
-
   if (is_humongous()) {
     oop obj = cast_to_oop(this->humongous_start_region()->bottom());
     if (cast_from_oop<HeapWord*>(obj) > bottom() || cast_from_oop<HeapWord*>(obj) + obj->size() < bottom()) {

From 0f78d017afb786345fca635862a4a70f43fff251 Mon Sep 17 00:00:00 2001
From: Guoxiong Li <gli@openjdk.org>
Date: Fri, 12 Apr 2024 12:59:49 +0000
Subject: [PATCH 17/32] 8329658: Serial: Refactor
 ContiguousSpace::_next_compaction_space

Reviewed-by: ayang, tschatzl
---
 src/hotspot/share/gc/serial/defNewGeneration.cpp | 14 --------------
 src/hotspot/share/gc/serial/serialFullGC.cpp     |  2 +-
 src/hotspot/share/gc/shared/space.cpp            |  2 --
 src/hotspot/share/gc/shared/space.hpp            | 15 ---------------
 4 files changed, 1 insertion(+), 32 deletions(-)

diff --git a/src/hotspot/share/gc/serial/defNewGeneration.cpp b/src/hotspot/share/gc/serial/defNewGeneration.cpp
index 9817bb7620c05..008ea957b9e79 100644
--- a/src/hotspot/share/gc/serial/defNewGeneration.cpp
+++ b/src/hotspot/share/gc/serial/defNewGeneration.cpp
@@ -359,24 +359,12 @@ void DefNewGeneration::compute_space_boundaries(uintx minimum_eden_size,
   }
   from()->initialize(fromMR, clear_space, mangle_space);
   to()->initialize(toMR, clear_space, mangle_space);
-
-  // Set next compaction spaces.
-  eden()->set_next_compaction_space(from());
-  // The to-space is normally empty before a compaction so need
-  // not be considered.  The exception is during promotion
-  // failure handling when to-space can contain live objects.
-  from()->set_next_compaction_space(nullptr);
 }
 
 void DefNewGeneration::swap_spaces() {
   ContiguousSpace* s = from();
   _from_space        = to();
   _to_space          = s;
-  eden()->set_next_compaction_space(from());
-  // The to-space is normally empty before a compaction so need
-  // not be considered.  The exception is during promotion
-  // failure handling when to-space can contain live objects.
-  from()->set_next_compaction_space(nullptr);
 
   if (UsePerfData) {
     CSpaceCounters* c = _from_counters;
@@ -780,7 +768,6 @@ void DefNewGeneration::collect(bool   full,
     // as a result of a partial evacuation of eden
     // and from-space.
     swap_spaces();   // For uniformity wrt ParNewGeneration.
-    from()->set_next_compaction_space(to());
     heap->set_incremental_collection_failed();
 
     _gc_tracer->report_promotion_failed(_promotion_failed_info);
@@ -801,7 +788,6 @@ void DefNewGeneration::collect(bool   full,
 void DefNewGeneration::init_assuming_no_promotion_failure() {
   _promotion_failed = false;
   _promotion_failed_info.reset();
-  from()->set_next_compaction_space(nullptr);
 }
 
 void DefNewGeneration::remove_forwarding_pointers() {
diff --git a/src/hotspot/share/gc/serial/serialFullGC.cpp b/src/hotspot/share/gc/serial/serialFullGC.cpp
index 4bcdf702fe282..df5e43b3e575f 100644
--- a/src/hotspot/share/gc/serial/serialFullGC.cpp
+++ b/src/hotspot/share/gc/serial/serialFullGC.cpp
@@ -267,7 +267,7 @@ class Compacter {
     _spaces[1].init(heap->young_gen()->eden());
     _spaces[2].init(heap->young_gen()->from());
 
-    bool is_promotion_failed = (heap->young_gen()->from()->next_compaction_space() != nullptr);
+    bool is_promotion_failed = !heap->young_gen()->to()->is_empty();
     if (is_promotion_failed) {
       _spaces[3].init(heap->young_gen()->to());
       _num_spaces = 4;
diff --git a/src/hotspot/share/gc/shared/space.cpp b/src/hotspot/share/gc/shared/space.cpp
index a7e22856a569d..e1ef2e984e2f7 100644
--- a/src/hotspot/share/gc/shared/space.cpp
+++ b/src/hotspot/share/gc/shared/space.cpp
@@ -43,7 +43,6 @@
 ContiguousSpace::ContiguousSpace():
   _bottom(nullptr),
   _end(nullptr),
-  _next_compaction_space(nullptr),
   _top(nullptr) {
   _mangler = new GenSpaceMangler(this);
 }
@@ -64,7 +63,6 @@ void ContiguousSpace::initialize(MemRegion mr,
   if (clear_space) {
     clear(mangle_space);
   }
-  _next_compaction_space = nullptr;
 }
 
 void ContiguousSpace::clear(bool mangle_space) {
diff --git a/src/hotspot/share/gc/shared/space.hpp b/src/hotspot/share/gc/shared/space.hpp
index 44f10cbd1ceb6..36b96a9632509 100644
--- a/src/hotspot/share/gc/shared/space.hpp
+++ b/src/hotspot/share/gc/shared/space.hpp
@@ -64,9 +64,6 @@ class ContiguousSpace: public CHeapObj<mtGC> {
 private:
   HeapWord* _bottom;
   HeapWord* _end;
-
-  ContiguousSpace* _next_compaction_space;
-
   HeapWord* _top;
   // A helper for mangling the unused area of the space in debug builds.
   GenSpaceMangler* _mangler;
@@ -123,18 +120,6 @@ class ContiguousSpace: public CHeapObj<mtGC> {
   // had allocation performed in it, but is now to be considered empty.
   void clear(bool mangle_space);
 
-  // Returns the next space (in the current generation) to be compacted in
-  // the global compaction order.  Also is used to select the next
-  // space into which to compact.
-
-  ContiguousSpace* next_compaction_space() const {
-    return _next_compaction_space;
-  }
-
-  void set_next_compaction_space(ContiguousSpace* csp) {
-    _next_compaction_space = csp;
-  }
-
   // Accessors
   HeapWord* top() const            { return _top;    }
   void set_top(HeapWord* value)    { _top = value; }

From f7069494f4d292060834d0b7b7e92e5516a3001d Mon Sep 17 00:00:00 2001
From: Guoxiong Li <gli@openjdk.org>
Date: Fri, 12 Apr 2024 14:06:07 +0000
Subject: [PATCH 18/32] 8329781: Serial: Remove serialFullGC.inline.hpp

Reviewed-by: ayang, stefank, tschatzl
---
 src/hotspot/share/gc/serial/serialFullGC.cpp  | 30 ++++++++-
 src/hotspot/share/gc/serial/serialFullGC.hpp  |  4 +-
 .../share/gc/serial/serialFullGC.inline.hpp   | 65 -------------------
 3 files changed, 28 insertions(+), 71 deletions(-)
 delete mode 100644 src/hotspot/share/gc/serial/serialFullGC.inline.hpp

diff --git a/src/hotspot/share/gc/serial/serialFullGC.cpp b/src/hotspot/share/gc/serial/serialFullGC.cpp
index df5e43b3e575f..e9e4ad4046f8a 100644
--- a/src/hotspot/share/gc/serial/serialFullGC.cpp
+++ b/src/hotspot/share/gc/serial/serialFullGC.cpp
@@ -24,7 +24,8 @@
 
 #include "precompiled.hpp"
 #include "classfile/classLoaderDataGraph.hpp"
-#include "classfile/javaClasses.hpp"
+#include "classfile/classLoaderData.inline.hpp"
+#include "classfile/javaClasses.inline.hpp"
 #include "classfile/stringTable.hpp"
 #include "classfile/symbolTable.hpp"
 #include "classfile/systemDictionary.hpp"
@@ -34,11 +35,13 @@
 #include "compiler/oopMap.hpp"
 #include "gc/serial/cardTableRS.hpp"
 #include "gc/serial/defNewGeneration.hpp"
-#include "gc/serial/serialFullGC.inline.hpp"
+#include "gc/serial/serialFullGC.hpp"
 #include "gc/serial/serialGcRefProcProxyTask.hpp"
 #include "gc/serial/serialHeap.hpp"
+#include "gc/serial/serialStringDedup.hpp"
 #include "gc/shared/classUnloadingContext.hpp"
 #include "gc/shared/collectedHeap.inline.hpp"
+#include "gc/shared/continuationGCSupport.inline.hpp"
 #include "gc/shared/gcHeapSummary.hpp"
 #include "gc/shared/gcTimer.hpp"
 #include "gc/shared/gcTrace.hpp"
@@ -56,11 +59,13 @@
 #include "oops/access.inline.hpp"
 #include "oops/compressedOops.inline.hpp"
 #include "oops/instanceRefKlass.hpp"
+#include "oops/markWord.hpp"
 #include "oops/methodData.hpp"
 #include "oops/objArrayKlass.inline.hpp"
 #include "oops/oop.inline.hpp"
 #include "oops/typeArrayOop.inline.hpp"
 #include "runtime/prefetch.inline.hpp"
+#include "utilities/align.hpp"
 #include "utilities/copy.hpp"
 #include "utilities/events.hpp"
 #include "utilities/stack.inline.hpp"
@@ -327,7 +332,7 @@ class Compacter {
       while (cur_addr < top) {
         prefetch_write_scan(cur_addr);
         if (cur_addr < first_dead || cast_to_oop(cur_addr)->is_gc_marked()) {
-          size_t size = SerialFullGC::adjust_pointers(cast_to_oop(cur_addr));
+          size_t size = cast_to_oop(cur_addr)->oop_iterate_size(&SerialFullGC::adjust_pointer_closure);
           cur_addr += size;
         } else {
           assert(*(HeapWord**)cur_addr > cur_addr, "forward progress");
@@ -609,6 +614,25 @@ void MarkAndPushClosure::do_oop_work(T* p)            { SerialFullGC::mark_and_p
 void MarkAndPushClosure::do_oop(      oop* p)         { do_oop_work(p); }
 void MarkAndPushClosure::do_oop(narrowOop* p)         { do_oop_work(p); }
 
+template <class T> void SerialFullGC::adjust_pointer(T* p) {
+  T heap_oop = RawAccess<>::oop_load(p);
+  if (!CompressedOops::is_null(heap_oop)) {
+    oop obj = CompressedOops::decode_not_null(heap_oop);
+    assert(Universe::heap()->is_in(obj), "should be in heap");
+
+    if (obj->is_forwarded()) {
+      oop new_obj = obj->forwardee();
+      assert(is_object_aligned(new_obj), "oop must be aligned");
+      RawAccess<IS_NOT_NULL>::oop_store(p, new_obj);
+    }
+  }
+}
+
+template <typename T>
+void AdjustPointerClosure::do_oop_work(T* p)           { SerialFullGC::adjust_pointer(p); }
+inline void AdjustPointerClosure::do_oop(oop* p)       { do_oop_work(p); }
+inline void AdjustPointerClosure::do_oop(narrowOop* p) { do_oop_work(p); }
+
 AdjustPointerClosure SerialFullGC::adjust_pointer_closure;
 
 void SerialFullGC::adjust_marks() {
diff --git a/src/hotspot/share/gc/serial/serialFullGC.hpp b/src/hotspot/share/gc/serial/serialFullGC.hpp
index fba41560e677c..fff287b1f5062 100644
--- a/src/hotspot/share/gc/serial/serialFullGC.hpp
+++ b/src/hotspot/share/gc/serial/serialFullGC.hpp
@@ -133,11 +133,9 @@ class SerialFullGC : AllStatic {
   static void adjust_marks();   // Adjust the pointers in the preserved marks table
   static void restore_marks();  // Restore the marks that we saved in preserve_mark
 
-  static size_t adjust_pointers(oop obj);
-
   static void follow_stack();   // Empty marking stack.
 
-  template <class T> static inline void adjust_pointer(T* p);
+  template <class T> static void adjust_pointer(T* p);
 
   // Check mark and maybe push on marking stack
   template <class T> static void mark_and_push(T* p);
diff --git a/src/hotspot/share/gc/serial/serialFullGC.inline.hpp b/src/hotspot/share/gc/serial/serialFullGC.inline.hpp
deleted file mode 100644
index 2984c49b3d5ba..0000000000000
--- a/src/hotspot/share/gc/serial/serialFullGC.inline.hpp
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_GC_SERIAL_SERIALFULLGC_INLINE_HPP
-#define SHARE_GC_SERIAL_SERIALFULLGC_INLINE_HPP
-
-#include "gc/serial/serialFullGC.hpp"
-
-#include "classfile/classLoaderData.inline.hpp"
-#include "classfile/javaClasses.inline.hpp"
-#include "gc/shared/continuationGCSupport.inline.hpp"
-#include "gc/serial/serialStringDedup.hpp"
-#include "memory/universe.hpp"
-#include "oops/markWord.hpp"
-#include "oops/access.inline.hpp"
-#include "oops/compressedOops.inline.hpp"
-#include "oops/oop.inline.hpp"
-#include "utilities/align.hpp"
-#include "utilities/stack.inline.hpp"
-
-template <class T> inline void SerialFullGC::adjust_pointer(T* p) {
-  T heap_oop = RawAccess<>::oop_load(p);
-  if (!CompressedOops::is_null(heap_oop)) {
-    oop obj = CompressedOops::decode_not_null(heap_oop);
-    assert(Universe::heap()->is_in(obj), "should be in heap");
-
-    if (obj->is_forwarded()) {
-      oop new_obj = obj->forwardee();
-      assert(is_object_aligned(new_obj), "oop must be aligned");
-      RawAccess<IS_NOT_NULL>::oop_store(p, new_obj);
-    }
-  }
-}
-
-template <typename T>
-void AdjustPointerClosure::do_oop_work(T* p)           { SerialFullGC::adjust_pointer(p); }
-inline void AdjustPointerClosure::do_oop(oop* p)       { do_oop_work(p); }
-inline void AdjustPointerClosure::do_oop(narrowOop* p) { do_oop_work(p); }
-
-inline size_t SerialFullGC::adjust_pointers(oop obj) {
-  return obj->oop_iterate_size(&SerialFullGC::adjust_pointer_closure);
-}
-
-#endif // SHARE_GC_SERIAL_SERIALFULLGC_INLINE_HPP

From 397d94831033e91c7a849774bf4e80d8f1c8ec66 Mon Sep 17 00:00:00 2001
From: Matthias Baesken <mbaesken@openjdk.org>
Date: Fri, 12 Apr 2024 14:09:23 +0000
Subject: [PATCH 19/32] 8329605: hs errfile generic events - move memory
 protections and nmethod flushes to separate sections

Reviewed-by: lucy, stefank, stuefe
---
 src/hotspot/os/aix/os_aix.cpp          |  2 +-
 src/hotspot/os/bsd/os_bsd.cpp          |  6 +++---
 src/hotspot/os/linux/os_linux.cpp      |  2 +-
 src/hotspot/share/code/nmethod.cpp     |  2 +-
 src/hotspot/share/utilities/events.cpp |  6 +++++-
 src/hotspot/share/utilities/events.hpp | 30 +++++++++++++++++++++++++-
 6 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/src/hotspot/os/aix/os_aix.cpp b/src/hotspot/os/aix/os_aix.cpp
index 9e9e2575b0f50..39400db75e191 100644
--- a/src/hotspot/os/aix/os_aix.cpp
+++ b/src/hotspot/os/aix/os_aix.cpp
@@ -1934,7 +1934,7 @@ static bool checked_mprotect(char* addr, size_t size, int prot) {
   //
   // See http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp?topic=/com.ibm.aix.basetechref/doc/basetrf1/mprotect.htm
 
-  Events::log(nullptr, "Protecting memory [" INTPTR_FORMAT "," INTPTR_FORMAT "] with protection modes %x", p2i(addr), p2i(addr+size), prot);
+  Events::log_memprotect(nullptr, "Protecting memory [" INTPTR_FORMAT "," INTPTR_FORMAT "] with protection modes %x", p2i(addr), p2i(addr+size), prot);
   bool rc = ::mprotect(addr, size, prot) == 0 ? true : false;
 
   if (!rc) {
diff --git a/src/hotspot/os/bsd/os_bsd.cpp b/src/hotspot/os/bsd/os_bsd.cpp
index bf615d1b37ac6..4b849c5bad416 100644
--- a/src/hotspot/os/bsd/os_bsd.cpp
+++ b/src/hotspot/os/bsd/os_bsd.cpp
@@ -1590,7 +1590,7 @@ bool os::pd_commit_memory(char* addr, size_t size, bool exec) {
   int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
 #if defined(__OpenBSD__)
   // XXX: Work-around mmap/MAP_FIXED bug temporarily on OpenBSD
-  Events::log(nullptr, "Protecting memory [" INTPTR_FORMAT "," INTPTR_FORMAT "] with protection modes %x", p2i(addr), p2i(addr+size), prot);
+  Events::log_memprotect(nullptr, "Protecting memory [" INTPTR_FORMAT "," INTPTR_FORMAT "] with protection modes %x", p2i(addr), p2i(addr+size), prot);
   if (::mprotect(addr, size, prot) == 0) {
     return true;
   } else {
@@ -1711,7 +1711,7 @@ bool os::numa_get_group_ids_for_range(const void** addresses, int* lgrp_ids, siz
 bool os::pd_uncommit_memory(char* addr, size_t size, bool exec) {
 #if defined(__OpenBSD__)
   // XXX: Work-around mmap/MAP_FIXED bug temporarily on OpenBSD
-  Events::log(nullptr, "Protecting memory [" INTPTR_FORMAT "," INTPTR_FORMAT "] with PROT_NONE", p2i(addr), p2i(addr+size));
+  Events::log_memprotect(nullptr, "Protecting memory [" INTPTR_FORMAT "," INTPTR_FORMAT "] with PROT_NONE", p2i(addr), p2i(addr+size));
   if (::mprotect(addr, size, PROT_NONE) == 0) {
     return true;
   } else {
@@ -1829,7 +1829,7 @@ static bool bsd_mprotect(char* addr, size_t size, int prot) {
   assert(addr == bottom, "sanity check");
 
   size = align_up(pointer_delta(addr, bottom, 1) + size, os::vm_page_size());
-  Events::log(nullptr, "Protecting memory [" INTPTR_FORMAT "," INTPTR_FORMAT "] with protection modes %x", p2i(bottom), p2i(bottom+size), prot);
+  Events::log_memprotect(nullptr, "Protecting memory [" INTPTR_FORMAT "," INTPTR_FORMAT "] with protection modes %x", p2i(bottom), p2i(bottom+size), prot);
   return ::mprotect(bottom, size, prot) == 0;
 }
 
diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
index 18a9a38db7f62..974ca7c8553b1 100644
--- a/src/hotspot/os/linux/os_linux.cpp
+++ b/src/hotspot/os/linux/os_linux.cpp
@@ -3786,7 +3786,7 @@ static bool linux_mprotect(char* addr, size_t size, int prot) {
 #ifdef CAN_SHOW_REGISTERS_ON_ASSERT
   if (addr != g_assert_poison)
 #endif
-  Events::log(nullptr, "Protecting memory [" INTPTR_FORMAT "," INTPTR_FORMAT "] with protection modes %x", p2i(bottom), p2i(bottom+size), prot);
+  Events::log_memprotect(nullptr, "Protecting memory [" INTPTR_FORMAT "," INTPTR_FORMAT "] with protection modes %x", p2i(bottom), p2i(bottom+size), prot);
   return ::mprotect(bottom, size, prot) == 0;
 }
 
diff --git a/src/hotspot/share/code/nmethod.cpp b/src/hotspot/share/code/nmethod.cpp
index 51ba872b3ac66..4f0f6c15af14d 100644
--- a/src/hotspot/share/code/nmethod.cpp
+++ b/src/hotspot/share/code/nmethod.cpp
@@ -2037,7 +2037,7 @@ void nmethod::purge(bool free_code_cache_data, bool unregister_nmethod) {
   MutexLocker ml(CodeCache_lock, Mutex::_no_safepoint_check_flag);
 
   // completely deallocate this method
-  Events::log(Thread::current(), "flushing nmethod " INTPTR_FORMAT, p2i(this));
+  Events::log_nmethod_flush(Thread::current(), "flushing %s nmethod " INTPTR_FORMAT, is_osr_method() ? "osr" : "", p2i(this));
   log_debug(codecache)("*flushing %s nmethod %3d/" INTPTR_FORMAT ". Live blobs:" UINT32_FORMAT
                        "/Free CodeCache:" SIZE_FORMAT "Kb",
                        is_osr_method() ? "osr" : "",_compile_id, p2i(this), CodeCache::blob_count(),
diff --git a/src/hotspot/share/utilities/events.cpp b/src/hotspot/share/utilities/events.cpp
index f89821800d14d..b4d46d79ffa69 100644
--- a/src/hotspot/share/utilities/events.cpp
+++ b/src/hotspot/share/utilities/events.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -37,6 +37,8 @@
 
 EventLog* Events::_logs = nullptr;
 StringEventLog* Events::_messages = nullptr;
+StringEventLog* Events::_memprotect_messages = nullptr;
+StringEventLog* Events::_nmethod_flush_messages = nullptr;
 StringEventLog* Events::_vm_operations = nullptr;
 StringEventLog* Events::_zgc_phase_switch = nullptr;
 ExceptionsEventLog* Events::_exceptions = nullptr;
@@ -97,6 +99,8 @@ void Events::print() {
 void Events::init() {
   if (LogEvents) {
     _messages = new StringEventLog("Events", "events");
+    _nmethod_flush_messages = new StringEventLog("Nmethod flushes", "nmethodflushes");
+    _memprotect_messages = new StringEventLog("Memory protections", "memprotects");
     _vm_operations = new StringEventLog("VM Operations", "vmops");
     if (UseZGC) {
       _zgc_phase_switch = new StringEventLog("ZGC Phase Switch", "zgcps");
diff --git a/src/hotspot/share/utilities/events.hpp b/src/hotspot/share/utilities/events.hpp
index b400fd707faf8..0aefbbefd2bb1 100644
--- a/src/hotspot/share/utilities/events.hpp
+++ b/src/hotspot/share/utilities/events.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -220,6 +220,12 @@ class Events : AllStatic {
   // A log for generic messages that aren't well categorized.
   static StringEventLog* _messages;
 
+  // A log for memory protection related messages
+  static StringEventLog* _memprotect_messages;
+
+  // A log for nmethod flush operations
+  static StringEventLog* _nmethod_flush_messages;
+
   // A log for VM Operations
   static StringEventLog* _vm_operations;
 
@@ -259,6 +265,10 @@ class Events : AllStatic {
   // Logs a generic message with timestamp and format as printf.
   static void log(Thread* thread, const char* format, ...) ATTRIBUTE_PRINTF(2, 3);
 
+  static void log_memprotect(Thread* thread, const char* format, ...) ATTRIBUTE_PRINTF(2, 3);
+
+  static void log_nmethod_flush(Thread* thread, const char* format, ...) ATTRIBUTE_PRINTF(2, 3);
+
   static void log_vm_operation(Thread* thread, const char* format, ...) ATTRIBUTE_PRINTF(2, 3);
 
   static void log_zgc_phase_switch(const char* format, ...) ATTRIBUTE_PRINTF(1, 2);
@@ -290,6 +300,24 @@ inline void Events::log(Thread* thread, const char* format, ...) {
   }
 }
 
+inline void Events::log_memprotect(Thread* thread, const char* format, ...) {
+  if (LogEvents && _memprotect_messages != nullptr) {
+    va_list ap;
+    va_start(ap, format);
+    _memprotect_messages->logv(thread, format, ap);
+    va_end(ap);
+  }
+}
+
+inline void Events::log_nmethod_flush(Thread* thread, const char* format, ...) {
+  if (LogEvents && _nmethod_flush_messages != nullptr) {
+    va_list ap;
+    va_start(ap, format);
+    _nmethod_flush_messages->logv(thread, format, ap);
+    va_end(ap);
+  }
+}
+
 inline void Events::log_vm_operation(Thread* thread, const char* format, ...) {
   if (LogEvents && _vm_operations != nullptr) {
     va_list ap;

From 68f86dccce601ec10111dc3e535d28ce9fc80928 Mon Sep 17 00:00:00 2001
From: Magnus Ihse Bursie <ihse@openjdk.org>
Date: Fri, 12 Apr 2024 20:57:56 +0000
Subject: [PATCH 20/32] 8330110: AIX build fails after JDK-8329704 - issue with
 libjli.a

Reviewed-by: mbaesken, mdoerr
---
 make/common/JdkNativeCompilation.gmk | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/make/common/JdkNativeCompilation.gmk b/make/common/JdkNativeCompilation.gmk
index b425bdbae2bbc..4385fb959882d 100644
--- a/make/common/JdkNativeCompilation.gmk
+++ b/make/common/JdkNativeCompilation.gmk
@@ -118,6 +118,8 @@ define ResolveLibPath
       else
         ifeq ($(STATIC_LIBS), true)
           $1_$2_LIBPATH := $$(SUPPORT_OUTPUTDIR)/native/$$($1_$2_MODULE)/lib$$($1_$2_NAME)/static
+        else ifeq ($$($1_$2_STATIC_LIBRARY), true)
+          $1_$2_LIBPATH := $$(SUPPORT_OUTPUTDIR)/native/$$($1_$2_MODULE)
         else
           $1_$2_LIBPATH := $$(SUPPORT_OUTPUTDIR)/modules_libs/$$($1_$2_MODULE)
         endif

From 28b201955907e145f208d756b607ab545a83b2d3 Mon Sep 17 00:00:00 2001
From: Alex Menkov <amenkov@openjdk.org>
Date: Fri, 12 Apr 2024 22:29:34 +0000
Subject: [PATCH 21/32] 8240343: JDI stopListening/stoplis001 "FAILED:
 listening is successfully stopped without starting listening"

Reviewed-by: cjplummer, sspitsyn
---
 .../stopListening/stoplis001.java             |  7 +++++-
 .../stopListening/stoplis001/TEST.properties  | 24 -------------------
 .../stoplis001/TestDescription.java           |  3 ---
 3 files changed, 6 insertions(+), 28 deletions(-)
 delete mode 100644 test/hotspot/jtreg/vmTestbase/nsk/jdi/ListeningConnector/stopListening/stoplis001/TEST.properties

diff --git a/test/hotspot/jtreg/vmTestbase/nsk/jdi/ListeningConnector/stopListening/stoplis001.java b/test/hotspot/jtreg/vmTestbase/nsk/jdi/ListeningConnector/stopListening/stoplis001.java
index 9fa40e17d5a9a..1fd2d0f8a0a5a 100644
--- a/test/hotspot/jtreg/vmTestbase/nsk/jdi/ListeningConnector/stopListening/stoplis001.java
+++ b/test/hotspot/jtreg/vmTestbase/nsk/jdi/ListeningConnector/stopListening/stoplis001.java
@@ -86,7 +86,6 @@ private int runIt(String argv[], PrintStream out) {
         this.out = out;
         log = new Log(out, argHandler);
 
-        Map<String,? extends com.sun.jdi.connect.Connector.Argument> cArgs1 = initConnector(argHandler.getTransportPort());
         Map<String,? extends com.sun.jdi.connect.Connector.Argument> cArgs2 = initConnector(null);
         if ((addr = startListen(cArgs2)) == null) {
             log.complain("FAILURE: unable to start listening the address " +
@@ -96,6 +95,12 @@ private int runIt(String argv[], PrintStream out) {
         else
             log.display("TEST: start listening the address " + addr);
 
+        // argHandler.getTransportPort() returns a free port (different from the port allocated by startListen(cArgs2))
+        Map<String,? extends com.sun.jdi.connect.Connector.Argument> cArgs1 = initConnector(argHandler.getTransportPort());
+
+        log.display("cArgs1: " + cArgs1);
+        log.display("cArgs2: " + cArgs2);
+
 /* Check that an Exception is thrown if ListeningConnector.stopListening
  has been invoked with argument map different from the one given for
  a previous ListeningConnector.startListening() invocation */
diff --git a/test/hotspot/jtreg/vmTestbase/nsk/jdi/ListeningConnector/stopListening/stoplis001/TEST.properties b/test/hotspot/jtreg/vmTestbase/nsk/jdi/ListeningConnector/stopListening/stoplis001/TEST.properties
deleted file mode 100644
index 8b51b2a911560..0000000000000
--- a/test/hotspot/jtreg/vmTestbase/nsk/jdi/ListeningConnector/stopListening/stoplis001/TEST.properties
+++ /dev/null
@@ -1,24 +0,0 @@
-#
-# Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# This code is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License version 2 only, as
-# published by the Free Software Foundation.
-#
-# This code is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-# version 2 for more details (a copy is included in the LICENSE file that
-# accompanied this code).
-#
-# You should have received a copy of the GNU General Public License version
-# 2 along with this work; if not, write to the Free Software Foundation,
-# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-# or visit www.oracle.com if you need additional information or have any
-# questions.
-#
-
-exclusiveAccess.dirs=.
diff --git a/test/hotspot/jtreg/vmTestbase/nsk/jdi/ListeningConnector/stopListening/stoplis001/TestDescription.java b/test/hotspot/jtreg/vmTestbase/nsk/jdi/ListeningConnector/stopListening/stoplis001/TestDescription.java
index c3298a3111fd1..121f67d498cd5 100644
--- a/test/hotspot/jtreg/vmTestbase/nsk/jdi/ListeningConnector/stopListening/stoplis001/TestDescription.java
+++ b/test/hotspot/jtreg/vmTestbase/nsk/jdi/ListeningConnector/stopListening/stoplis001/TestDescription.java
@@ -40,9 +40,6 @@
  *       argument map is the same with the one given for the previous
  *       ListeningConnector.startListening() invocation.
  *
- *     NOTE: this test is tagged "nonconcurrent" because it uses the default
- *     "javadebug" shmem file, as do some other tests.
- *
  * @library /vmTestbase
  *          /test/lib
  * @build nsk.jdi.ListeningConnector.stopListening.stoplis001

From b9ef9f667ef9d4052c9d6dfec763b94d331dc04d Mon Sep 17 00:00:00 2001
From: Scott Gibbons <sgibbons@openjdk.org>
Date: Sat, 13 Apr 2024 00:48:52 +0000
Subject: [PATCH 22/32] 8330185: Potential uncaught unsafe memory copy
 exception

Reviewed-by: kvn, sviswanathan
---
 src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp
index c802f953c9054..8e77b9cc6c7cd 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp
@@ -728,7 +728,8 @@ address StubGenerator::generate_disjoint_copy_avx3_masked(address* entry, const
 
   if (MaxVectorSize == 64) {
     __ BIND(L_copy_large);
-    arraycopy_avx3_large(to, from, temp1, temp2, temp3, temp4, count, xmm1, xmm2, xmm3, xmm4, shift);
+      UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, false, ucme_exit_pc);
+      arraycopy_avx3_large(to, from, temp1, temp2, temp3, temp4, count, xmm1, xmm2, xmm3, xmm4, shift);
     __ jmp(L_finish);
   }
   return start;

From c1c99a669bb7f9928086db6a4ecfc90c410ffbb0 Mon Sep 17 00:00:00 2001
From: Serguei Spitsyn <sspitsyn@openjdk.org>
Date: Sat, 13 Apr 2024 09:22:00 +0000
Subject: [PATCH 23/32] 8329674: JvmtiEnvThreadState::reset_current_location
 function should use JvmtiHandshake

Reviewed-by: lmesnik, pchilanomate
---
 src/hotspot/share/prims/jvmtiEnvBase.cpp      |   5 +-
 .../share/prims/jvmtiEnvThreadState.cpp       | 101 ++++++------------
 src/hotspot/share/runtime/vmOperation.hpp     |   1 -
 3 files changed, 34 insertions(+), 73 deletions(-)

diff --git a/src/hotspot/share/prims/jvmtiEnvBase.cpp b/src/hotspot/share/prims/jvmtiEnvBase.cpp
index 9b9197aa88871..778cc17ffe06b 100644
--- a/src/hotspot/share/prims/jvmtiEnvBase.cpp
+++ b/src/hotspot/share/prims/jvmtiEnvBase.cpp
@@ -1997,8 +1997,11 @@ JvmtiHandshake::execute(JvmtiUnitedHandshakeClosure* hs_cl, jthread target) {
 void
 JvmtiHandshake::execute(JvmtiUnitedHandshakeClosure* hs_cl, ThreadsListHandle* tlh,
                         JavaThread* target_jt, Handle target_h) {
+  JavaThread* current = JavaThread::current();
   bool is_virtual = java_lang_VirtualThread::is_instance(target_h());
-  bool self = target_jt == JavaThread::current();
+  bool self = target_jt == current;
+
+  assert(!Continuations::enabled() || self || !is_virtual || current->is_VTMS_transition_disabler(), "sanity check");
 
   hs_cl->set_target_jt(target_jt);   // can be needed in the virtual thread case
   hs_cl->set_is_virtual(is_virtual); // can be needed in the virtual thread case
diff --git a/src/hotspot/share/prims/jvmtiEnvThreadState.cpp b/src/hotspot/share/prims/jvmtiEnvThreadState.cpp
index 6f2891fdc18cd..f61f9415bbc0e 100644
--- a/src/hotspot/share/prims/jvmtiEnvThreadState.cpp
+++ b/src/hotspot/share/prims/jvmtiEnvThreadState.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -264,57 +264,18 @@ bool JvmtiEnvThreadState::is_frame_pop(int cur_frame_number) {
   return get_frame_pops()->contains(fp);
 }
 
-class VM_VirtualThreadGetCurrentLocation : public VM_Operation {
- private:
-   Handle _vthread_h;
-   jmethodID _method_id;
-   int _bci;
-   bool _completed;
-
- public:
-  VM_VirtualThreadGetCurrentLocation(Handle vthread_h)
-    : _vthread_h(vthread_h),
-      _method_id(nullptr),
-      _bci(0),
-      _completed(false)
-  {}
-
-  VMOp_Type type() const { return VMOp_VirtualThreadGetCurrentLocation; }
-  void doit() {
-    if (!JvmtiEnvBase::is_vthread_alive(_vthread_h())) {
-      return; // _completed remains false.
-    }
-    ResourceMark rm;
-    javaVFrame* jvf = JvmtiEnvBase::get_vthread_jvf(_vthread_h());
-
-    if (jvf != nullptr) {
-      // jvf can be null, when the native enterSpecial frame is on the top.
-      Method* method = jvf->method();
-      _method_id = method->jmethod_id();
-      _bci = jvf->bci();
-    }
-    _completed = true;
-  }
-  void get_current_location(jmethodID *method_id, int *bci) {
-    *method_id = _method_id;
-    *bci = _bci;
-  }
-  bool completed() {
-    return _completed;
-  }
-};
-
-class GetCurrentLocationClosure : public HandshakeClosure {
+class GetCurrentLocationClosure : public JvmtiUnitedHandshakeClosure {
  private:
    jmethodID _method_id;
    int _bci;
    bool _completed;
  public:
   GetCurrentLocationClosure()
-    : HandshakeClosure("GetCurrentLocation"),
+    : JvmtiUnitedHandshakeClosure("GetCurrentLocation"),
       _method_id(nullptr),
       _bci(0),
       _completed(false) {}
+
   void do_thread(Thread *target) {
     JavaThread *jt = JavaThread::cast(target);
     ResourceMark rmark; // jt != Thread::current()
@@ -335,6 +296,20 @@ class GetCurrentLocationClosure : public HandshakeClosure {
     }
     _completed = true;
   }
+  void do_vthread(Handle target_h) {
+    assert(_target_jt == nullptr || !_target_jt->is_exiting(), "sanity check");
+    // use jvmti_vthread() as vthread() can be outdated
+    assert(_target_jt == nullptr || _target_jt->jvmti_vthread() == target_h(), "sanity check");
+    ResourceMark rm;
+    javaVFrame *jvf = JvmtiEnvBase::get_vthread_jvf(target_h());
+
+    if (jvf != nullptr) {
+      Method* method = jvf->method();
+      _method_id = method->jmethod_id();
+      _bci = jvf->bci();
+    }
+    _completed = true;
+  }
   void get_current_location(jmethodID *method_id, int *bci) {
     *method_id = _method_id;
     *bci = _bci;
@@ -372,41 +347,25 @@ void JvmtiEnvThreadState::reset_current_location(jvmtiEvent event_type, bool ena
   if (enabled) {
     // If enabling breakpoint, no need to reset.
     // Can't do anything if empty stack.
-
     JavaThread* thread = get_thread_or_saved();
 
-    oop thread_oop = jvmti_thread_state()->get_thread_oop();
+    if (event_type == JVMTI_EVENT_SINGLE_STEP &&
+        ((thread == nullptr && is_virtual()) || thread->has_last_Java_frame())) {
+      JavaThread* current = JavaThread::current();
+      HandleMark hm(current);
+      oop thread_oop = jvmti_thread_state()->get_thread_oop();
+      Handle thread_h(current, thread_oop);
+      ThreadsListHandle tlh(current);
+
+      GetCurrentLocationClosure op;
+      JvmtiHandshake::execute(&op, &tlh, thread, thread_h);
 
-    if (thread == nullptr && event_type == JVMTI_EVENT_SINGLE_STEP && is_virtual()) {
-      // Handle the unmounted virtual thread case.
-      jmethodID method_id;
-      int bci;
-      JavaThread* cur_thread = JavaThread::current();
-      HandleMark hm(cur_thread);
-      VM_VirtualThreadGetCurrentLocation op(Handle(cur_thread, thread_oop));
-      VMThread::execute(&op);
       if (op.completed()) {
-        // Do nothing if virtual thread has been already terminated.
+        jmethodID method_id;
+        int bci;
         op.get_current_location(&method_id, &bci);
         set_current_location(method_id, bci);
       }
-      return;
-    }
-    if (event_type == JVMTI_EVENT_SINGLE_STEP && thread->has_last_Java_frame()) {
-      jmethodID method_id;
-      int bci;
-      // The java thread stack may not be walkable for a running thread
-      // so get current location with direct handshake.
-      GetCurrentLocationClosure op;
-      Thread *current = Thread::current();
-      if (thread->is_handshake_safe_for(current)) {
-        op.do_thread(thread);
-      } else {
-        Handshake::execute(&op, thread);
-        guarantee(op.completed(), "Handshake failed. Target thread is not alive?");
-      }
-      op.get_current_location(&method_id, &bci);
-      set_current_location(method_id, bci);
     }
   } else if (event_type == JVMTI_EVENT_SINGLE_STEP || !is_enabled(JVMTI_EVENT_SINGLE_STEP)) {
     // If this is to disable breakpoint, also check if single-step is not enabled
diff --git a/src/hotspot/share/runtime/vmOperation.hpp b/src/hotspot/share/runtime/vmOperation.hpp
index ed136e90206cc..f6991d78f8e8d 100644
--- a/src/hotspot/share/runtime/vmOperation.hpp
+++ b/src/hotspot/share/runtime/vmOperation.hpp
@@ -82,7 +82,6 @@
   template(ChangeBreakpoints)                     \
   template(GetOrSetLocal)                         \
   template(VirtualThreadGetOrSetLocal)            \
-  template(VirtualThreadGetCurrentLocation)       \
   template(ChangeSingleStep)                      \
   template(SetNotifyJvmtiEventsMode)              \
   template(HeapWalkOperation)                     \

From 1abb826210eda76a3dc8cf1c10e6df6f2413a87a Mon Sep 17 00:00:00 2001
From: Leonid Mesnik <lmesnik@openjdk.org>
Date: Sun, 14 Apr 2024 16:02:07 +0000
Subject: [PATCH 24/32] 8330131: Problemlist
 serviceability/jvmti/vthread/GetThreadStateMountedTest/GetThreadStateMountedTest.java

Reviewed-by: dcubed
---
 test/hotspot/jtreg/ProblemList.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/hotspot/jtreg/ProblemList.txt b/test/hotspot/jtreg/ProblemList.txt
index 6221bd4bb2f0e..8e144b692f29b 100644
--- a/test/hotspot/jtreg/ProblemList.txt
+++ b/test/hotspot/jtreg/ProblemList.txt
@@ -133,6 +133,7 @@ serviceability/sa/sadebugd/DebugdConnectTest.java 8239062,8270326 macosx-x64,mac
 serviceability/sa/TestRevPtrsForInvokeDynamic.java 8241235 generic-all
 
 serviceability/jvmti/ModuleAwareAgents/ThreadStart/MAAThreadStart.java 8225354 windows-all
+serviceability/jvmti/vthread/GetThreadStateMountedTest/GetThreadStateMountedTest.java 8318090,8318729 generic-all
 serviceability/jvmti/vthread/GetSetLocalTest/GetSetLocalTest.java 8286836 generic-all
 serviceability/dcmd/gc/RunFinalizationTest.java 8227120 linux-all,windows-x64,aix-ppc64
 

From b486709b0627cfb4cf428a6508ef7c5b14e6da57 Mon Sep 17 00:00:00 2001
From: Christian Hagedorn <chagedorn@openjdk.org>
Date: Mon, 15 Apr 2024 06:25:24 +0000
Subject: [PATCH 25/32] 8328480: C2: SubTypeCheckNode in checkcast should use
 the klass constant of a unique concrete sub class

Reviewed-by: roland, kvn
---
 src/hotspot/share/opto/graphKit.cpp           | 19 +++--
 .../types/TestSubTypeCheckUniqueSubclass.java | 73 +++++++++++++++++++
 2 files changed, 85 insertions(+), 7 deletions(-)
 create mode 100644 test/hotspot/jtreg/compiler/types/TestSubTypeCheckUniqueSubclass.java

diff --git a/src/hotspot/share/opto/graphKit.cpp b/src/hotspot/share/opto/graphKit.cpp
index 1ba6896170e70..6bfdae1cf5ca9 100644
--- a/src/hotspot/share/opto/graphKit.cpp
+++ b/src/hotspot/share/opto/graphKit.cpp
@@ -3268,8 +3268,9 @@ Node* GraphKit::gen_instanceof(Node* obj, Node* superklass, bool safe_for_replac
 Node* GraphKit::gen_checkcast(Node *obj, Node* superklass,
                               Node* *failure_control) {
   kill_dead_locals();           // Benefit all the uncommon traps
-  const TypeKlassPtr *tk = _gvn.type(superklass)->is_klassptr()->try_improve();
-  const TypeOopPtr *toop = tk->cast_to_exactness(false)->as_instance_type();
+  const TypeKlassPtr* klass_ptr_type = _gvn.type(superklass)->is_klassptr();
+  const TypeKlassPtr* improved_klass_ptr_type = klass_ptr_type->try_improve();
+  const TypeOopPtr* toop = improved_klass_ptr_type->cast_to_exactness(false)->as_instance_type();
 
   // Fast cutout:  Check the case that the cast is vacuously true.
   // This detects the common cases where the test will short-circuit
@@ -3277,10 +3278,10 @@ Node* GraphKit::gen_checkcast(Node *obj, Node* superklass,
   // because if the test is going to turn into zero code, we don't
   // want a residual null check left around.  (Causes a slowdown,
   // for example, in some objArray manipulations, such as a[i]=a[j].)
-  if (tk->singleton()) {
+  if (improved_klass_ptr_type->singleton()) {
     const TypeOopPtr* objtp = _gvn.type(obj)->isa_oopptr();
     if (objtp != nullptr) {
-      switch (C->static_subtype_check(tk, objtp->as_klass_type())) {
+      switch (C->static_subtype_check(improved_klass_ptr_type, objtp->as_klass_type())) {
       case Compile::SSC_always_true:
         // If we know the type check always succeed then we don't use
         // the profiling data at this bytecode. Don't lose it, feed it
@@ -3346,7 +3347,7 @@ Node* GraphKit::gen_checkcast(Node *obj, Node* superklass,
   }
 
   Node* cast_obj = nullptr;
-  if (tk->klass_is_exact()) {
+  if (improved_klass_ptr_type->klass_is_exact()) {
     // The following optimization tries to statically cast the speculative type of the object
     // (for example obtained during profiling) to the type of the superklass and then do a
     // dynamic check that the type of the object is what we expect. To work correctly
@@ -3356,7 +3357,7 @@ Node* GraphKit::gen_checkcast(Node *obj, Node* superklass,
     // a speculative type use it to perform an exact cast.
     ciKlass* spec_obj_type = obj_type->speculative_type();
     if (spec_obj_type != nullptr || data != nullptr) {
-      cast_obj = maybe_cast_profiled_receiver(not_null_obj, tk, spec_obj_type, safe_for_replace);
+      cast_obj = maybe_cast_profiled_receiver(not_null_obj, improved_klass_ptr_type, spec_obj_type, safe_for_replace);
       if (cast_obj != nullptr) {
         if (failure_control != nullptr) // failure is now impossible
           (*failure_control) = top();
@@ -3368,7 +3369,11 @@ Node* GraphKit::gen_checkcast(Node *obj, Node* superklass,
 
   if (cast_obj == nullptr) {
     // Generate the subtype check
-    Node* not_subtype_ctrl = gen_subtype_check(not_null_obj, superklass );
+    Node* improved_superklass = superklass;
+    if (improved_klass_ptr_type != klass_ptr_type && improved_klass_ptr_type->singleton()) {
+      improved_superklass = makecon(improved_klass_ptr_type);
+    }
+    Node* not_subtype_ctrl = gen_subtype_check(not_null_obj, improved_superklass);
 
     // Plug in success path into the merge
     cast_obj = _gvn.transform(new CheckCastPPNode(control(), not_null_obj, toop));
diff --git a/test/hotspot/jtreg/compiler/types/TestSubTypeCheckUniqueSubclass.java b/test/hotspot/jtreg/compiler/types/TestSubTypeCheckUniqueSubclass.java
new file mode 100644
index 0000000000000..ceee83f3d6a34
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/types/TestSubTypeCheckUniqueSubclass.java
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8328480
+ * @summary Test that SubTypeCheckNode takes improved unique concrete klass constant in order to fold consecutive sub
+ *          type checks.
+ * @library /test/lib /
+ * @run driver compiler.types.TestSubTypeCheckUniqueSubclass
+ */
+
+package compiler.types;
+
+import compiler.lib.ir_framework.*;
+
+public class TestSubTypeCheckUniqueSubclass {
+    static Object o = new C(); // Make sure C is loaded.
+    static Object o2 = new C2(); // Make sure C2 is loaded while NeverLoaded is not.
+
+    public static void main(String[] args) {
+        TestFramework.run();
+    }
+
+    @Test
+    @Warmup(0)
+    @IR(counts = {IRNode.SUBTYPE_CHECK, "1"},
+        phase = CompilePhase.ITER_GVN1)
+    static void testAbstractAbstract() {
+         A a = (A)o;
+         A a2 = (B)o;
+    }
+
+    @Test
+    @Warmup(0)
+    @IR(counts = {IRNode.SUBTYPE_CHECK, "1"},
+            phase = CompilePhase.ITER_GVN1)
+    static void testAbstractAbstractWithUnloaded() {
+        A2 a = (A2)o2;
+        A2 a2 = (B2)o2;
+    }
+}
+
+abstract class A {}
+abstract class B extends A {}
+class C extends B {}
+
+abstract class A2 {}
+abstract class B2 extends A2 {}
+class C2 extends B2 {}
+
+// Class never loaded -> C2 looks like unique sub class.
+class NeverLoaded extends B2 {}

From bc1a1a5861741ce5002e28eed2ea84ddafd68c70 Mon Sep 17 00:00:00 2001
From: David Holmes <dholmes@openjdk.org>
Date: Mon, 15 Apr 2024 07:31:58 +0000
Subject: [PATCH 26/32] 8330064: JFR: Incorrect function declarations for
 register/unregister_stack_filter

Reviewed-by: mgronlun, dcubed, egahlin, jwaters
---
 src/hotspot/share/jfr/jni/jfrJniMethod.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/hotspot/share/jfr/jni/jfrJniMethod.hpp b/src/hotspot/share/jfr/jni/jfrJniMethod.hpp
index 72351d3e1c1db..6a2d622d7e9a7 100644
--- a/src/hotspot/share/jfr/jni/jfrJniMethod.hpp
+++ b/src/hotspot/share/jfr/jni/jfrJniMethod.hpp
@@ -161,9 +161,9 @@ jlong JNICALL jfr_host_total_swap_memory(JNIEnv* env, jclass jvm);
 
 void JNICALL jfr_emit_data_loss(JNIEnv* env, jclass jvm, jlong bytes);
 
-jlong JNICALL jfr_register_stack_filter(JNIEnv* env, jobject classes, jobject methods);
+jlong JNICALL jfr_register_stack_filter(JNIEnv* env, jclass jvm, jobjectArray classes, jobjectArray methods);
 
-jlong JNICALL jfr_unregister_stack_filter(JNIEnv* env, jlong start_filter_id);
+jlong JNICALL jfr_unregister_stack_filter(JNIEnv* env, jclass jvm, jlong id);
 
 #ifdef __cplusplus
 }

From d22d56087142d2fdf8ffc5fcde83dd08f07f6933 Mon Sep 17 00:00:00 2001
From: David Holmes <dholmes@openjdk.org>
Date: Mon, 15 Apr 2024 07:33:10 +0000
Subject: [PATCH 27/32] 8329864: TestLibGraal.java still crashes with
 assert(_stack_base != nullptr)

Reviewed-by: dnsimon, jsjolen, dcubed
---
 src/hotspot/share/runtime/javaThread.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/hotspot/share/runtime/javaThread.cpp b/src/hotspot/share/runtime/javaThread.cpp
index 8fe8e368f09b2..f6d514055b9a3 100644
--- a/src/hotspot/share/runtime/javaThread.cpp
+++ b/src/hotspot/share/runtime/javaThread.cpp
@@ -1580,9 +1580,11 @@ void JavaThread::print_on_error(outputStream* st, char *buf, int buflen) const {
   if (osthread()) {
     st->print(", id=%d", osthread()->thread_id());
   }
+  // Use raw field members for stack base/size as this could be
+  // called before a thread has run enough to initialize them.
   st->print(", stack(" PTR_FORMAT "," PTR_FORMAT ") (" PROPERFMT ")",
-            p2i(stack_end()), p2i(stack_base()),
-            PROPERFMTARGS(stack_size()));
+            p2i(_stack_base - _stack_size), p2i(_stack_base),
+            PROPERFMTARGS(_stack_size));
   st->print("]");
 
   ThreadsSMRSupport::print_info_on(this, st);

From 5404b4eafc2eb3291cecf99f98728946388f5d16 Mon Sep 17 00:00:00 2001
From: Yudi Zheng <yzheng@openjdk.org>
Date: Mon, 15 Apr 2024 08:07:49 +0000
Subject: [PATCH 28/32] 8330105: SharedRuntime::resolve* should respect
 interpreter-only mode

Reviewed-by: never, dlong, dnsimon
---
 src/hotspot/share/runtime/sharedRuntime.cpp | 51 +++++++--------------
 src/hotspot/share/runtime/sharedRuntime.hpp |  1 +
 2 files changed, 17 insertions(+), 35 deletions(-)

diff --git a/src/hotspot/share/runtime/sharedRuntime.cpp b/src/hotspot/share/runtime/sharedRuntime.cpp
index a4ee1a3173461..2b06859c96d8d 100644
--- a/src/hotspot/share/runtime/sharedRuntime.cpp
+++ b/src/hotspot/share/runtime/sharedRuntime.cpp
@@ -1396,8 +1396,7 @@ JRT_BLOCK_ENTRY(address, SharedRuntime::handle_wrong_method_ic_miss(JavaThread*
     current->set_vm_result_2(callee_method());
   JRT_BLOCK_END
   // return compiled code entry point after potential safepoints
-  assert(callee_method->verified_code_entry() != nullptr, " Jump to zero!");
-  return callee_method->verified_code_entry();
+  return get_resolved_entry(current, callee_method);
 JRT_END
 
 
@@ -1450,8 +1449,7 @@ JRT_BLOCK_ENTRY(address, SharedRuntime::handle_wrong_method(JavaThread* current)
     current->set_vm_result_2(callee_method());
   JRT_BLOCK_END
   // return compiled code entry point after potential safepoints
-  assert(callee_method->verified_code_entry() != nullptr, " Jump to zero!");
-  return callee_method->verified_code_entry();
+  return get_resolved_entry(current, callee_method);
 JRT_END
 
 // Handle abstract method call
@@ -1488,6 +1486,17 @@ JRT_BLOCK_ENTRY(address, SharedRuntime::handle_wrong_method_abstract(JavaThread*
   return res;
 JRT_END
 
+// return verified_code_entry if interp_only_mode is not set for the current thread;
+// otherwise return c2i entry.
+address SharedRuntime::get_resolved_entry(JavaThread* current, methodHandle callee_method) {
+  if (current->is_interp_only_mode()) {
+    // In interp_only_mode we need to go to the interpreted entry
+    // The c2i won't patch in this mode -- see fixup_callers_callsite
+    return callee_method->get_c2i_entry();
+  }
+  assert(callee_method->verified_code_entry() != nullptr, " Jump to zero!");
+  return callee_method->verified_code_entry();
+}
 
 // resolve a static call and patch code
 JRT_BLOCK_ENTRY(address, SharedRuntime::resolve_static_call_C(JavaThread* current ))
@@ -1496,37 +1505,11 @@ JRT_BLOCK_ENTRY(address, SharedRuntime::resolve_static_call_C(JavaThread* curren
   JRT_BLOCK
     callee_method = SharedRuntime::resolve_helper(false, false, CHECK_NULL);
     current->set_vm_result_2(callee_method());
-
-    if (current->is_interp_only_mode()) {
-      RegisterMap reg_map(current,
-                          RegisterMap::UpdateMap::skip,
-                          RegisterMap::ProcessFrames::include,
-                          RegisterMap::WalkContinuation::skip);
-      frame stub_frame = current->last_frame();
-      assert(stub_frame.is_runtime_frame(), "must be a runtimeStub");
-      frame caller = stub_frame.sender(&reg_map);
-      enter_special = caller.cb() != nullptr && caller.cb()->is_nmethod()
-        && caller.cb()->as_nmethod()->method()->is_continuation_enter_intrinsic();
-    }
   JRT_BLOCK_END
-
-  if (current->is_interp_only_mode() && enter_special) {
-    // enterSpecial is compiled and calls this method to resolve the call to Continuation::enter
-    // but in interp_only_mode we need to go to the interpreted entry
-    // The c2i won't patch in this mode -- see fixup_callers_callsite
-    //
-    // This should probably be done in all cases, not just enterSpecial (see JDK-8218403),
-    // but that's part of a larger fix, and the situation is worse for enterSpecial, as it has no
-    // interpreted version.
-    return callee_method->get_c2i_entry();
-  }
-
   // return compiled code entry point after potential safepoints
-  assert(callee_method->verified_code_entry() != nullptr, " Jump to zero!");
-  return callee_method->verified_code_entry();
+  return get_resolved_entry(current, callee_method);
 JRT_END
 
-
 // resolve virtual call and update inline cache to monomorphic
 JRT_BLOCK_ENTRY(address, SharedRuntime::resolve_virtual_call_C(JavaThread* current))
   methodHandle callee_method;
@@ -1535,8 +1518,7 @@ JRT_BLOCK_ENTRY(address, SharedRuntime::resolve_virtual_call_C(JavaThread* curre
     current->set_vm_result_2(callee_method());
   JRT_BLOCK_END
   // return compiled code entry point after potential safepoints
-  assert(callee_method->verified_code_entry() != nullptr, " Jump to zero!");
-  return callee_method->verified_code_entry();
+  return get_resolved_entry(current, callee_method);
 JRT_END
 
 
@@ -1549,8 +1531,7 @@ JRT_BLOCK_ENTRY(address, SharedRuntime::resolve_opt_virtual_call_C(JavaThread* c
     current->set_vm_result_2(callee_method());
   JRT_BLOCK_END
   // return compiled code entry point after potential safepoints
-  assert(callee_method->verified_code_entry() != nullptr, " Jump to zero!");
-  return callee_method->verified_code_entry();
+  return get_resolved_entry(current, callee_method);
 JRT_END
 
 methodHandle SharedRuntime::handle_ic_miss_helper(TRAPS) {
diff --git a/src/hotspot/share/runtime/sharedRuntime.hpp b/src/hotspot/share/runtime/sharedRuntime.hpp
index c8a1000676450..d477929768f2f 100644
--- a/src/hotspot/share/runtime/sharedRuntime.hpp
+++ b/src/hotspot/share/runtime/sharedRuntime.hpp
@@ -496,6 +496,7 @@ class SharedRuntime: AllStatic {
   static void complete_monitor_unlocking_C(oopDesc* obj, BasicLock* lock, JavaThread* current);
 
   // Resolving of calls
+  static address get_resolved_entry        (JavaThread* current, methodHandle callee_method);
   static address resolve_static_call_C     (JavaThread* current);
   static address resolve_virtual_call_C    (JavaThread* current);
   static address resolve_opt_virtual_call_C(JavaThread* current);

From 3f1d9c441ea98910d9483e133bccfac784db393d Mon Sep 17 00:00:00 2001
From: Joachim Kern <jkern@openjdk.org>
Date: Mon, 15 Apr 2024 08:46:36 +0000
Subject: [PATCH 29/32] 8329257: AIX: Switch HOTSPOT_TOOLCHAIN_TYPE from xlc to
 gcc

Reviewed-by: jwaters, stuefe, kbarrett, mdoerr
---
 make/autoconf/flags-cflags.m4                 |   2 +-
 make/autoconf/toolchain.m4                    |   6 +-
 src/hotspot/os/aix/loadlib_aix.cpp            |   4 +-
 src/hotspot/os/aix/os_aix.cpp                 |  16 +--
 src/hotspot/os/aix/os_aix.hpp                 |  10 +-
 src/hotspot/os/aix/porting_aix.cpp            |   6 +-
 src/hotspot/os/posix/os_posix.cpp             |   2 +-
 src/hotspot/os_cpu/aix_ppc/os_aix_ppc.cpp     |  20 +--
 src/hotspot/share/utilities/byteswap.hpp      |   4 +-
 .../share/utilities/compilerWarnings_xlc.hpp  |  30 ----
 .../share/utilities/count_leading_zeros.hpp   |  33 +----
 .../share/utilities/count_trailing_zeros.hpp  |  17 +--
 src/hotspot/share/utilities/debug.hpp         |   2 +-
 .../share/utilities/globalDefinitions_gcc.hpp |  12 +-
 .../share/utilities/globalDefinitions_xlc.hpp | 128 ------------------
 15 files changed, 44 insertions(+), 248 deletions(-)
 delete mode 100644 src/hotspot/share/utilities/compilerWarnings_xlc.hpp
 delete mode 100644 src/hotspot/share/utilities/globalDefinitions_xlc.hpp

diff --git a/make/autoconf/flags-cflags.m4 b/make/autoconf/flags-cflags.m4
index 97a3ec14ecd5e..c96b289084cca 100644
--- a/make/autoconf/flags-cflags.m4
+++ b/make/autoconf/flags-cflags.m4
@@ -459,7 +459,7 @@ AC_DEFUN([FLAGS_SETUP_CFLAGS_HELPER],
     CFLAGS_OS_DEF_JVM="-D_ALLBSD_SOURCE -D_DARWIN_C_SOURCE -D_XOPEN_SOURCE"
     CFLAGS_OS_DEF_JDK="-D_ALLBSD_SOURCE -D_DARWIN_UNLIMITED_SELECT"
   elif test "x$OPENJDK_TARGET_OS" = xaix; then
-    CFLAGS_OS_DEF_JVM="-DAIX -D_LARGE_FILES"
+    CFLAGS_OS_DEF_JVM="-DAIX -Dalloca'(size)'=__builtin_alloca'(size)' -D_LARGE_FILES"
     CFLAGS_OS_DEF_JDK="-D_LARGE_FILES"
   elif test "x$OPENJDK_TARGET_OS" = xbsd; then
     CFLAGS_OS_DEF_JDK="-D_ALLBSD_SOURCE"
diff --git a/make/autoconf/toolchain.m4 b/make/autoconf/toolchain.m4
index acccac3e320c6..aceb6edb79d87 100644
--- a/make/autoconf/toolchain.m4
+++ b/make/autoconf/toolchain.m4
@@ -953,11 +953,7 @@ AC_DEFUN_ONCE([TOOLCHAIN_MISC_CHECKS],
   # Setup hotspot lecagy names for toolchains
   HOTSPOT_TOOLCHAIN_TYPE=$TOOLCHAIN_TYPE
   if test "x$TOOLCHAIN_TYPE" = xclang; then
-    if test "x$OPENJDK_TARGET_OS" = xaix; then
-      HOTSPOT_TOOLCHAIN_TYPE=xlc
-    else
-      HOTSPOT_TOOLCHAIN_TYPE=gcc
-    fi
+    HOTSPOT_TOOLCHAIN_TYPE=gcc
   elif test "x$TOOLCHAIN_TYPE" = xmicrosoft; then
     HOTSPOT_TOOLCHAIN_TYPE=visCPP
   fi
diff --git a/src/hotspot/os/aix/loadlib_aix.cpp b/src/hotspot/os/aix/loadlib_aix.cpp
index 107f2783ff516..bc21aef383698 100644
--- a/src/hotspot/os/aix/loadlib_aix.cpp
+++ b/src/hotspot/os/aix/loadlib_aix.cpp
@@ -117,8 +117,8 @@ static void print_entry(const loaded_module_t* lm, outputStream* os) {
             ", data: " INTPTR_FORMAT " - " INTPTR_FORMAT " "
             "%s",
       (lm->is_in_vm ? '*' : ' '),
-      lm->text, (uintptr_t)lm->text + lm->text_len,
-      lm->data, (uintptr_t)lm->data + lm->data_len,
+      p2i(lm->text), (uintptr_t)lm->text + lm->text_len,
+      p2i(lm->data), (uintptr_t)lm->data + lm->data_len,
       lm->path);
   if (lm->member) {
     os->print("(%s)", lm->member);
diff --git a/src/hotspot/os/aix/os_aix.cpp b/src/hotspot/os/aix/os_aix.cpp
index 39400db75e191..71a9aa576ea1d 100644
--- a/src/hotspot/os/aix/os_aix.cpp
+++ b/src/hotspot/os/aix/os_aix.cpp
@@ -23,10 +23,6 @@
  *
  */
 
-// According to the AIX OS doc #pragma alloca must be used
-// with C++ compiler before referencing the function alloca()
-#pragma alloca
-
 // no precompiled headers
 #include "classfile/vmSymbols.hpp"
 #include "code/vtableStubs.hpp"
@@ -606,8 +602,8 @@ static void *thread_native_entry(Thread *thread) {
     address low_address = thread->stack_end();
     address high_address = thread->stack_base();
     lt.print("Thread is alive (tid: " UINTX_FORMAT ", kernel thread id: " UINTX_FORMAT
-             ", stack [" PTR_FORMAT " - " PTR_FORMAT " (" SIZE_FORMAT "k using %uk pages)).",
-             os::current_thread_id(), (uintx) kernel_thread_id, low_address, high_address,
+             ", stack [" PTR_FORMAT " - " PTR_FORMAT " (" SIZE_FORMAT "k using %luk pages)).",
+             os::current_thread_id(), (uintx) kernel_thread_id, p2i(low_address), p2i(high_address),
              (high_address - low_address) / K, os::Aix::query_pagesize(low_address) / K);
   }
 
@@ -1354,8 +1350,8 @@ struct vmembk_t {
 
   void print_on(outputStream* os) const {
     os->print("[" PTR_FORMAT " - " PTR_FORMAT "] (" UINTX_FORMAT
-      " bytes, %d %s pages), %s",
-      addr, addr + size - 1, size, size / pagesize, describe_pagesize(pagesize),
+      " bytes, %ld %s pages), %s",
+      p2i(addr), p2i(addr) + size - 1, size, size / pagesize, describe_pagesize(pagesize),
       (type == VMEM_SHMATED ? "shmat" : "mmap")
     );
   }
@@ -1939,7 +1935,7 @@ static bool checked_mprotect(char* addr, size_t size, int prot) {
 
   if (!rc) {
     const char* const s_errno = os::errno_name(errno);
-    warning("mprotect(" PTR_FORMAT "-" PTR_FORMAT ", 0x%X) failed (%s).", addr, addr + size, prot, s_errno);
+    warning("mprotect(" PTR_FORMAT "-" PTR_FORMAT ", 0x%X) failed (%s).", p2i(addr), p2i(addr) + size, prot, s_errno);
     return false;
   }
 
@@ -2356,7 +2352,7 @@ void os::set_native_thread_name(const char *name) {
 
 bool os::find(address addr, outputStream* st) {
 
-  st->print(PTR_FORMAT ": ", addr);
+  st->print(PTR_FORMAT ": ", p2i(addr));
 
   loaded_module_t lm;
   if (LoadedLibraries::find_for_text_address(addr, &lm) ||
diff --git a/src/hotspot/os/aix/os_aix.hpp b/src/hotspot/os/aix/os_aix.hpp
index 7f4f3c7e8ccc3..759bc552bb7c0 100644
--- a/src/hotspot/os/aix/os_aix.hpp
+++ b/src/hotspot/os/aix/os_aix.hpp
@@ -120,19 +120,19 @@ class os::Aix {
   struct meminfo_t {
 
     // Amount of virtual memory (in units of 4 KB pages)
-    unsigned long long virt_total;
+    size_t virt_total;
 
     // Amount of real memory, in bytes
-    unsigned long long real_total;
+    size_t real_total;
 
     // Amount of free real memory, in bytes
-    unsigned long long real_free;
+    size_t real_free;
 
     // Total amount of paging space, in bytes
-    unsigned long long pgsp_total;
+    size_t pgsp_total;
 
     // Amount of free paging space, in bytes
-    unsigned long long pgsp_free;
+    size_t pgsp_free;
 
   };
 
diff --git a/src/hotspot/os/aix/porting_aix.cpp b/src/hotspot/os/aix/porting_aix.cpp
index 68233097b4957..c06d4ad9a7fc0 100644
--- a/src/hotspot/os/aix/porting_aix.cpp
+++ b/src/hotspot/os/aix/porting_aix.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2023 SAP SE. All rights reserved.
+ * Copyright (c) 2012, 2024 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -741,7 +741,7 @@ void AixNativeCallstack::print_callstack_for_context(outputStream* st, const uco
     st->print("(invalid) ");
     goto cleanup;
   } else {
-    st->print("(base - 0x%X) ", PTRDIFF_BYTES(stack_base, cur_sp));
+    st->print("(base - 0x%lX) ", PTRDIFF_BYTES(stack_base, cur_sp));
   }
   st->cr();
 
@@ -797,7 +797,7 @@ void AixNativeCallstack::print_callstack_for_context(outputStream* st, const uco
       st->print_cr("trying to recover and find backchain...");
       sp = try_find_backchain(sp_last, stack_base, stack_size);
       if (sp) {
-        st->print_cr("found something which looks like a backchain at " PTR_FORMAT ", after 0x%x bytes... ",
+        st->print_cr("found something which looks like a backchain at " PTR_FORMAT ", after 0x%lx bytes... ",
             p2i(sp), PTRDIFF_BYTES(sp, sp_last));
       } else {
         st->print_cr("did not find a backchain, giving up.");
diff --git a/src/hotspot/os/posix/os_posix.cpp b/src/hotspot/os/posix/os_posix.cpp
index d2de5b3048492..76a6080305b35 100644
--- a/src/hotspot/os/posix/os_posix.cpp
+++ b/src/hotspot/os/posix/os_posix.cpp
@@ -508,7 +508,7 @@ void os::Posix::print_rlimit_info(outputStream* st) {
 
 #if defined(AIX)
   st->print(", NPROC ");
-  st->print("%d", sysconf(_SC_CHILD_MAX));
+  st->print("%ld", sysconf(_SC_CHILD_MAX));
 
   print_rlimit(st, ", THREADS", RLIMIT_THREADS);
 #else
diff --git a/src/hotspot/os_cpu/aix_ppc/os_aix_ppc.cpp b/src/hotspot/os_cpu/aix_ppc/os_aix_ppc.cpp
index e1e81d673a704..8711c9a89b352 100644
--- a/src/hotspot/os_cpu/aix_ppc/os_aix_ppc.cpp
+++ b/src/hotspot/os_cpu/aix_ppc/os_aix_ppc.cpp
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2023 SAP SE. All rights reserved.
+ * Copyright (c) 2012, 2024 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -261,7 +261,7 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
       else if (sig == SIGTRAP && TrapBasedICMissChecks &&
                nativeInstruction_at(pc)->is_sigtrap_ic_miss_check()) {
         if (TraceTraps) {
-          tty->print_cr("trap: ic_miss_check at " INTPTR_FORMAT " (SIGTRAP)", pc);
+          tty->print_cr("trap: ic_miss_check at " INTPTR_FORMAT " (SIGTRAP)", p2i(pc));
         }
         stub = SharedRuntime::get_ic_miss_stub();
         goto run_stub;
@@ -271,7 +271,7 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
       else if (sig == SIGTRAP && TrapBasedNullChecks &&
                nativeInstruction_at(pc)->is_sigtrap_null_check()) {
         if (TraceTraps) {
-          tty->print_cr("trap: null_check at " INTPTR_FORMAT " (SIGTRAP)", pc);
+          tty->print_cr("trap: null_check at " INTPTR_FORMAT " (SIGTRAP)", p2i(pc));
         }
         stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
         goto run_stub;
@@ -282,7 +282,7 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
                CodeCache::contains((void*) pc) &&
                MacroAssembler::uses_implicit_null_check(info->si_addr)) {
         if (TraceTraps) {
-          tty->print_cr("trap: null_check at " INTPTR_FORMAT " (SIGSEGV)", pc);
+          tty->print_cr("trap: null_check at " INTPTR_FORMAT " (SIGSEGV)", p2i(pc));
         }
         stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
       }
@@ -292,7 +292,7 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
       else if (sig == SIGTRAP && TrapBasedRangeChecks &&
                nativeInstruction_at(pc)->is_sigtrap_range_check()) {
         if (TraceTraps) {
-          tty->print_cr("trap: range_check at " INTPTR_FORMAT " (SIGTRAP)", pc);
+          tty->print_cr("trap: range_check at " INTPTR_FORMAT " (SIGTRAP)", p2i(pc));
         }
         stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
         goto run_stub;
@@ -435,12 +435,12 @@ void os::print_context(outputStream *st, const void *context) {
   const ucontext_t* uc = (const ucontext_t*)context;
 
   st->print_cr("Registers:");
-  st->print("pc =" INTPTR_FORMAT "  ", uc->uc_mcontext.jmp_context.iar);
-  st->print("lr =" INTPTR_FORMAT "  ", uc->uc_mcontext.jmp_context.lr);
-  st->print("ctr=" INTPTR_FORMAT "  ", uc->uc_mcontext.jmp_context.ctr);
+  st->print("pc =" INTPTR_FORMAT "  ", (unsigned long)uc->uc_mcontext.jmp_context.iar);
+  st->print("lr =" INTPTR_FORMAT "  ", (unsigned long)uc->uc_mcontext.jmp_context.lr);
+  st->print("ctr=" INTPTR_FORMAT "  ", (unsigned long)uc->uc_mcontext.jmp_context.ctr);
   st->cr();
   for (int i = 0; i < 32; i++) {
-    st->print("r%-2d=" INTPTR_FORMAT "  ", i, uc->uc_mcontext.jmp_context.gpr[i]);
+    st->print("r%-2d=" INTPTR_FORMAT "  ", i, (unsigned long)uc->uc_mcontext.jmp_context.gpr[i]);
     if (i % 3 == 2) st->cr();
   }
   st->cr();
@@ -464,7 +464,7 @@ void os::print_tos_pc(outputStream *st, const void *context) {
   st->cr();
 
   // Try to decode the instructions.
-  st->print_cr("Decoded instructions: (pc=" PTR_FORMAT ")", pc);
+  st->print_cr("Decoded instructions: (pc=" PTR_FORMAT ")", p2i(pc));
   st->print("<TODO: PPC port - print_context>");
   // TODO: PPC port Disassembler::decode(pc, 16, 16, st);
   st->cr();
diff --git a/src/hotspot/share/utilities/byteswap.hpp b/src/hotspot/share/utilities/byteswap.hpp
index d5ece7e00cd42..fba0775cf4992 100644
--- a/src/hotspot/share/utilities/byteswap.hpp
+++ b/src/hotspot/share/utilities/byteswap.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, Google and/or its affiliates. All rights reserved.
+ * Copyright (c) 2023, 2024, Google and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -88,7 +88,7 @@ struct ByteswapFallbackImpl<T, 8> {
 /*****************************************************************************
  * GCC and compatible (including Clang)
  *****************************************************************************/
-#if defined(TARGET_COMPILER_gcc) || defined(TARGET_COMPILER_xlc)
+#if defined(TARGET_COMPILER_gcc)
 
 #if defined(__clang__) || defined(ASSERT)
 
diff --git a/src/hotspot/share/utilities/compilerWarnings_xlc.hpp b/src/hotspot/share/utilities/compilerWarnings_xlc.hpp
deleted file mode 100644
index 76782c105af98..0000000000000
--- a/src/hotspot/share/utilities/compilerWarnings_xlc.hpp
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_UTILITIES_COMPILERWARNINGS_XLC_HPP
-#define SHARE_UTILITIES_COMPILERWARNINGS_XLC_HPP
-
-// Nothing here yet.
-
-#endif // SHARE_UTILITIES_COMPILERWARNINGS_XLC_HPP
diff --git a/src/hotspot/share/utilities/count_leading_zeros.hpp b/src/hotspot/share/utilities/count_leading_zeros.hpp
index 612a6efbc3a82..d6cbed9a355e9 100644
--- a/src/hotspot/share/utilities/count_leading_zeros.hpp
+++ b/src/hotspot/share/utilities/count_leading_zeros.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -123,37 +123,6 @@ template <typename T> struct CountLeadingZerosImpl<T, 8> {
   }
 };
 
-/*****************************************************************************
- * IBM XL C/C++
- *****************************************************************************/
-#elif defined(TARGET_COMPILER_xlc)
-
-#include <builtins.h>
-
-template <typename T> struct CountLeadingZerosImpl<T, 1> {
-  static unsigned doit(T v) {
-    return __cntlz4((uint32_t)v & 0xFF) - 24u;
-  }
-};
-
-template <typename T> struct CountLeadingZerosImpl<T, 2> {
-  static unsigned doit(T v) {
-    return __cntlz4((uint32_t)v & 0xFFFF) - 16u;
-  }
-};
-
-template <typename T> struct CountLeadingZerosImpl<T, 4> {
-  static unsigned doit(T v) {
-    return __cntlz4(v);
-  }
-};
-
-template <typename T> struct CountLeadingZerosImpl<T, 8> {
-  static unsigned doit(T v) {
-    return __cntlz8(v);
-  }
-};
-
 /*****************************************************************************
  * Fallback
  *****************************************************************************/
diff --git a/src/hotspot/share/utilities/count_trailing_zeros.hpp b/src/hotspot/share/utilities/count_trailing_zeros.hpp
index b9e55d0fa2453..59cda41daff8e 100644
--- a/src/hotspot/share/utilities/count_trailing_zeros.hpp
+++ b/src/hotspot/share/utilities/count_trailing_zeros.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -84,21 +84,6 @@ inline unsigned count_trailing_zeros_64(uint64_t x) {
   return index;
 }
 
-/*****************************************************************************
- * IBM XL C/C++
- *****************************************************************************/
-#elif defined(TARGET_COMPILER_xlc)
-
-#include <builtins.h>
-
-inline unsigned count_trailing_zeros_32(uint32_t x) {
-  return __cnttz4(x);
-}
-
-inline unsigned count_trailing_zeros_64(uint64_t x) {
-  return __cnttz8(x);
-}
-
 /*****************************************************************************
  * Unknown toolchain
  *****************************************************************************/
diff --git a/src/hotspot/share/utilities/debug.hpp b/src/hotspot/share/utilities/debug.hpp
index 3a9adda905498..d21439c35ca62 100644
--- a/src/hotspot/share/utilities/debug.hpp
+++ b/src/hotspot/share/utilities/debug.hpp
@@ -106,7 +106,7 @@ class DebuggingContext {
 // constant evaluation in the compiler. We don't do something like that now,
 // because we need a fallback when we don't have any mechanism for detecting
 // constant evaluation.
-#if defined(TARGET_COMPILER_gcc) || defined(TARGET_COMPILER_xlc)
+#if defined(TARGET_COMPILER_gcc)
 
 // Both __has_builtin and __builtin_is_constant_evaluated are available in our
 // minimum required versions of gcc and clang.
diff --git a/src/hotspot/share/utilities/globalDefinitions_gcc.hpp b/src/hotspot/share/utilities/globalDefinitions_gcc.hpp
index 06bb22687d83f..4267cdb2746cd 100644
--- a/src/hotspot/share/utilities/globalDefinitions_gcc.hpp
+++ b/src/hotspot/share/utilities/globalDefinitions_gcc.hpp
@@ -39,6 +39,14 @@
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
+// In stdlib.h on AIX malloc is defined as a macro causing
+// compiler errors when resolving them in different depths as it
+// happens in the log tags. This avoids the macro.
+#if (defined(__VEC__) || defined(__AIXVEC)) && defined(AIX) \
+    && defined(__open_xl_version__) && __open_xl_version__ >= 17
+  #undef malloc
+  extern void *malloc(size_t) asm("vec_malloc");
+#endif
 #include <wchar.h>
 
 #include <math.h>
@@ -50,7 +58,7 @@
 #include <limits.h>
 #include <errno.h>
 
-#if defined(LINUX) || defined(_ALLBSD_SOURCE)
+#if defined(LINUX) || defined(_ALLBSD_SOURCE) || defined(_AIX)
 #include <signal.h>
 #ifndef __OpenBSD__
 #include <ucontext.h>
@@ -83,7 +91,7 @@
 // checking for nanness
 #if defined(__APPLE__)
 inline int g_isnan(double f) { return isnan(f); }
-#elif defined(LINUX) || defined(_ALLBSD_SOURCE)
+#elif defined(LINUX) || defined(_ALLBSD_SOURCE) || defined(_AIX)
 inline int g_isnan(float  f) { return isnan(f); }
 inline int g_isnan(double f) { return isnan(f); }
 #else
diff --git a/src/hotspot/share/utilities/globalDefinitions_xlc.hpp b/src/hotspot/share/utilities/globalDefinitions_xlc.hpp
deleted file mode 100644
index 9595452d39942..0000000000000
--- a/src/hotspot/share/utilities/globalDefinitions_xlc.hpp
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright (c) 1998, 2024, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2023 SAP SE. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_UTILITIES_GLOBALDEFINITIONS_XLC_HPP
-#define SHARE_UTILITIES_GLOBALDEFINITIONS_XLC_HPP
-
-#include "jni.h"
-
-// This file holds compiler-dependent includes,
-// globally used constants & types, class (forward)
-// declarations and a few frequently used utility functions.
-
-#include <alloca.h>
-#include <ctype.h>
-#include <string.h>
-#include <stdarg.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <stdlib.h>
-// In stdlib.h on AIX malloc is defined as a macro causing
-// compiler errors when resolving them in different depths as it
-// happens in the log tags. This avoids the macro.
-#if (defined(__VEC__) || defined(__AIXVEC)) && defined(AIX) \
-    && defined(__open_xl_version__) && __open_xl_version__ >= 17
-  #undef malloc
-  extern void *malloc(size_t) asm("vec_malloc");
-#endif
-
-#include <wchar.h>
-
-#include <math.h>
-#include <time.h>
-#include <fcntl.h>
-#include <dlfcn.h>
-#include <pthread.h>
-
-#include <limits.h>
-#include <errno.h>
-
-#include <stdint.h>
-
-#if defined(__open_xl_version__)
-  #if __open_xl_version__ < 17
-  #error "open xlc < 17 not supported"
-  #endif
-#else
-  #error "xlc version not supported, macro __open_xl_version__ not found"
-#endif
-
-#ifndef _AIX
-#error "missing AIX-specific definition _AIX"
-#endif
-
-// Use XLC compiler builtins instead of inline assembler
-#define USE_XLC_BUILTINS
-
-#ifdef USE_XLC_BUILTINS
-#include <builtins.h>
-// XLC V10 and higher provide the prototype for __dcbtst (void *);
-#endif // USE_XLC_BUILTINS
-
-// NULL vs NULL_WORD:
-// Some platform/tool-chain combinations can't assign NULL to an integer
-// type so we define NULL_WORD to use in those contexts.
-#define NULL_WORD  0L
-
-// checking for nanness
-inline int g_isnan(float  f) { return isnan(f); }
-inline int g_isnan(double f) { return isnan(f); }
-
-// Checking for finiteness
-inline int g_isfinite(jfloat  f)                 { return finite(f); }
-inline int g_isfinite(jdouble f)                 { return finite(f); }
-
-// Formatting.
-#ifdef _LP64
-#define FORMAT64_MODIFIER "l"
-#else // !_LP64
-#define FORMAT64_MODIFIER "ll"
-#endif // _LP64
-
-// Cannot use xlc's offsetof as implementation of hotspot's
-// offset_of(), because xlc warns about applying offsetof() to non-POD
-// object and xlc cannot compile the expression offsetof(DataLayout,
-// _cells[index]) in DataLayout::cell_offset() .  Therefore we define
-// offset_of as it is defined for gcc.
-#define offset_of(klass,field) (size_t)((intx)&(((klass*)16)->field) - 16)
-
-#define THREAD_LOCAL __thread
-
-// Inlining support
-//
-// Be aware that for function/method declarations, xlC only supports the following
-// syntax (i.e. the attribute must be placed AFTER the function/method declarator):
-//
-//   void* operator new(size_t size) throw() NOINLINE;
-//
-// For function/method definitions, the more common placement BEFORE the
-// function/method declarator seems to be supported as well:
-//
-//   NOINLINE void* CHeapObj<F>::operator new(size_t size) throw() {...}
-
-#define NOINLINE     __attribute__((__noinline__))
-#define ALWAYSINLINE inline __attribute__((__always_inline__))
-
-#endif // SHARE_UTILITIES_GLOBALDEFINITIONS_XLC_HPP

From 60d88b7ae2945724ab4db44207e3390bcff172c0 Mon Sep 17 00:00:00 2001
From: Per Minborg <pminborg@openjdk.org>
Date: Mon, 15 Apr 2024 12:42:31 +0000
Subject: [PATCH 30/32] 8330176: Typo in Linker javadoc

Reviewed-by: mcimadamore
---
 src/java.base/share/classes/java/lang/foreign/Linker.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/java.base/share/classes/java/lang/foreign/Linker.java b/src/java.base/share/classes/java/lang/foreign/Linker.java
index ea407c296f7f0..fb325ef1d44f4 100644
--- a/src/java.base/share/classes/java/lang/foreign/Linker.java
+++ b/src/java.base/share/classes/java/lang/foreign/Linker.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -598,7 +598,7 @@ static Linker nativeLinker() {
      * <p>
      * Calling this method is equivalent to the following code:
      * {@snippet lang=java :
-     * linker.downcallHandle(function).bindTo(symbol);
+     * linker.downcallHandle(function, options).bindTo(address);
      * }
      *
      * @param address  the native memory segment whose

From a3fecdb2f417bd32e528f907bc293cc494746955 Mon Sep 17 00:00:00 2001
From: Thomas Schatzl <tschatzl@openjdk.org>
Date: Mon, 15 Apr 2024 13:21:18 +0000
Subject: [PATCH 31/32] 8329764: G1: Handle null references during verification
 first

Reviewed-by: stefank, iwalulya
---
 src/hotspot/share/gc/g1/g1HeapRegion.cpp | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/hotspot/share/gc/g1/g1HeapRegion.cpp b/src/hotspot/share/gc/g1/g1HeapRegion.cpp
index 698a4e0e39fbe..ff5ac4ab3a12e 100644
--- a/src/hotspot/share/gc/g1/g1HeapRegion.cpp
+++ b/src/hotspot/share/gc/g1/g1HeapRegion.cpp
@@ -621,14 +621,18 @@ class G1VerifyLiveAndRemSetClosure : public BasicOopIterateClosure {
 
   template <class T>
   void do_oop_work(T* p) {
-    if (_failures->count() >= G1MaxVerifyFailures) {
+    // Check for null references first - they are fairly common and since there is
+    // nothing to do for them anyway (they can't fail verification), it makes sense
+    // to handle them first.
+    T heap_oop = RawAccess<>::oop_load(p);
+    if (CompressedOops::is_null(heap_oop)) {
       return;
     }
 
-    T heap_oop = RawAccess<>::oop_load(p);
-    if (CompressedOops::is_null(heap_oop)) {
+    if (_failures->count() >= G1MaxVerifyFailures) {
       return;
     }
+
     oop obj = CompressedOops::decode_raw_not_null(heap_oop);
 
     LiveChecker<T> live_check(_failures, _containing_obj, p, obj, _vo);

From 273df6286a9cc4dcef4cd339234206b2ad922386 Mon Sep 17 00:00:00 2001
From: Albert Mingkun Yang <ayang@openjdk.org>
Date: Mon, 15 Apr 2024 13:32:26 +0000
Subject: [PATCH 32/32] 8328792: Parallel: Refactor
 PSParallelCompact::summary_phase

Reviewed-by: iwalulya, tschatzl
---
 .../share/gc/parallel/psParallelCompact.cpp   | 271 +++++++-----------
 .../share/gc/parallel/psParallelCompact.hpp   |  24 +-
 2 files changed, 119 insertions(+), 176 deletions(-)

diff --git a/src/hotspot/share/gc/parallel/psParallelCompact.cpp b/src/hotspot/share/gc/parallel/psParallelCompact.cpp
index 8480e8520c459..30b73ad3e56ef 100644
--- a/src/hotspot/share/gc/parallel/psParallelCompact.cpp
+++ b/src/hotspot/share/gc/parallel/psParallelCompact.cpp
@@ -642,6 +642,38 @@ ParallelCompactData::summarize_split_space(size_t src_region,
   return source_next;
 }
 
+size_t ParallelCompactData::live_words_in_space(const MutableSpace* space,
+                                                HeapWord** full_region_prefix_end) {
+  size_t cur_region = addr_to_region_idx(space->bottom());
+  const size_t end_region = addr_to_region_idx(region_align_up(space->top()));
+  size_t live_words = 0;
+  if (full_region_prefix_end == nullptr) {
+    for (/* empty */; cur_region < end_region; ++cur_region) {
+      live_words += _region_data[cur_region].data_size();
+    }
+  } else {
+    bool first_set = false;
+    for (/* empty */; cur_region < end_region; ++cur_region) {
+      size_t live_words_in_region = _region_data[cur_region].data_size();
+      if (!first_set && live_words_in_region < RegionSize) {
+        *full_region_prefix_end = region_to_addr(cur_region);
+        first_set = true;
+      }
+      live_words += live_words_in_region;
+    }
+    if (!first_set) {
+      // All regions are full of live objs.
+      assert(is_region_aligned(space->top()), "inv");
+      *full_region_prefix_end = space->top();
+    }
+    assert(*full_region_prefix_end != nullptr, "postcondition");
+    assert(is_region_aligned(*full_region_prefix_end), "inv");
+    assert(*full_region_prefix_end >= space->bottom(), "in-range");
+    assert(*full_region_prefix_end <= space->top(), "in-range");
+  }
+  return live_words;
+}
+
 bool ParallelCompactData::summarize(SplitInfo& split_info,
                                     HeapWord* source_beg, HeapWord* source_end,
                                     HeapWord** source_next,
@@ -982,93 +1014,19 @@ void PSParallelCompact::post_compact()
   Universe::heap()->record_whole_heap_examined_timestamp();
 }
 
-ParallelCompactData::RegionData*
-PSParallelCompact::first_dead_space_region(const RegionData* beg,
-                                           const RegionData* end)
-{
-  const size_t region_size = ParallelCompactData::RegionSize;
-  ParallelCompactData& sd = summary_data();
-  size_t left = sd.region(beg);
-  size_t right = end > beg ? sd.region(end) - 1 : left;
-
-  // Binary search.
-  while (left < right) {
-    // Equivalent to (left + right) / 2, but does not overflow.
-    const size_t middle = left + (right - left) / 2;
-    RegionData* const middle_ptr = sd.region(middle);
-    HeapWord* const dest = middle_ptr->destination();
-    HeapWord* const addr = sd.region_to_addr(middle);
-    assert(dest != nullptr, "sanity");
-    assert(dest <= addr, "must move left");
-
-    if (middle > left && dest < addr) {
-      right = middle - 1;
-    } else if (middle < right && middle_ptr->data_size() == region_size) {
-      left = middle + 1;
-    } else {
-      return middle_ptr;
-    }
-  }
-  return sd.region(left);
-}
-
-// Return the address of the end of the dense prefix, a.k.a. the start of the
-// compacted region.  The address is always on a region boundary.
-//
-// Completely full regions at the left are skipped, since no compaction can
-// occur in those regions.  Then the maximum amount of dead wood to allow is
-// computed, based on the density (amount live / capacity) of the generation;
-// the region with approximately that amount of dead space to the left is
-// identified as the limit region.  Regions between the last completely full
-// region and the limit region are scanned and the one that has the best
-// (maximum) reclaimed_ratio() is selected.
-HeapWord*
-PSParallelCompact::compute_dense_prefix(const SpaceId id,
-                                        bool maximum_compaction)
-{
+HeapWord* PSParallelCompact::compute_dense_prefix_for_old_space(MutableSpace* old_space,
+                                                                HeapWord* full_region_prefix_end) {
   const size_t region_size = ParallelCompactData::RegionSize;
   const ParallelCompactData& sd = summary_data();
 
-  const MutableSpace* const space = _space_info[id].space();
-  HeapWord* const top = space->top();
-  HeapWord* const top_aligned_up = sd.region_align_up(top);
-  HeapWord* const new_top = _space_info[id].new_top();
-  HeapWord* const new_top_aligned_up = sd.region_align_up(new_top);
-  HeapWord* const bottom = space->bottom();
-  const RegionData* const beg_cp = sd.addr_to_region_ptr(bottom);
-  const RegionData* const top_cp = sd.addr_to_region_ptr(top_aligned_up);
-  const RegionData* const new_top_cp =
-    sd.addr_to_region_ptr(new_top_aligned_up);
-
-  // Skip full regions at the beginning of the space--they are necessarily part
-  // of the dense prefix.
-  const RegionData* const full_cp = first_dead_space_region(beg_cp, new_top_cp);
-  assert(full_cp->destination() == sd.region_to_addr(full_cp) ||
-         space->is_empty(), "no dead space allowed to the left");
-  assert(full_cp->data_size() < region_size || full_cp == new_top_cp - 1,
-         "region must have dead space");
-
-  // The gc number is saved whenever a maximum compaction is done, and used to
-  // determine when the maximum compaction interval has expired.  This avoids
-  // successive max compactions for different reasons.
-  const uint total_invocations = ParallelScavengeHeap::heap()->total_full_collections();
-  assert(total_invocations >= _maximum_compaction_gc_num, "sanity");
-  const size_t gcs_since_max = total_invocations - _maximum_compaction_gc_num;
-  const bool interval_ended = gcs_since_max > HeapMaximumCompactionInterval ||
-    total_invocations == HeapFirstMaximumCompactionCount;
-  if (maximum_compaction || full_cp == top_cp || interval_ended) {
-    _maximum_compaction_gc_num = total_invocations;
-    return sd.region_to_addr(full_cp);
-  }
-
   // Iteration starts with the region *after* the full-region-prefix-end.
-  const RegionData* const start_region = full_cp;
+  const RegionData* const start_region = sd.addr_to_region_ptr(full_region_prefix_end);
   // If final region is not full, iteration stops before that region,
   // because fill_dense_prefix_end assumes that prefix_end <= top.
-  const RegionData* const end_region = sd.addr_to_region_ptr(space->top());
+  const RegionData* const end_region = sd.addr_to_region_ptr(old_space->top());
   assert(start_region <= end_region, "inv");
 
-  size_t max_waste = space->capacity_in_words() * (MarkSweepDeadRatio / 100.0);
+  size_t max_waste = old_space->capacity_in_words() * (MarkSweepDeadRatio / 100.0);
   const RegionData* cur_region = start_region;
   for (/* empty */; cur_region < end_region; ++cur_region) {
     assert(region_size >= cur_region->data_size(), "inv");
@@ -1081,24 +1039,11 @@ PSParallelCompact::compute_dense_prefix(const SpaceId id,
 
   HeapWord* const prefix_end = sd.region_to_addr(cur_region);
   assert(sd.is_region_aligned(prefix_end), "postcondition");
-  assert(prefix_end >= sd.region_to_addr(full_cp), "in-range");
-  assert(prefix_end <= space->top(), "in-range");
+  assert(prefix_end >= full_region_prefix_end, "in-range");
+  assert(prefix_end <= old_space->top(), "in-range");
   return prefix_end;
 }
 
-void PSParallelCompact::summarize_spaces_quick()
-{
-  for (unsigned int i = 0; i < last_space_id; ++i) {
-    const MutableSpace* space = _space_info[i].space();
-    HeapWord** nta = _space_info[i].new_top_addr();
-    bool result = _summary_data.summarize(_space_info[i].split_info(),
-                                          space->bottom(), space->top(), nullptr,
-                                          space->bottom(), space->end(), nta);
-    assert(result, "space must fit into itself");
-    _space_info[i].set_dense_prefix(space->bottom());
-  }
-}
-
 void PSParallelCompact::fill_dense_prefix_end(SpaceId id) {
   // Comparing two sizes to decide if filling is required:
   //
@@ -1123,6 +1068,12 @@ void PSParallelCompact::fill_dense_prefix_end(SpaceId id) {
   }
   assert(CollectedHeap::min_fill_size() == 2, "inv");
   HeapWord* const dense_prefix_end = dense_prefix(id);
+  assert(_summary_data.is_region_aligned(dense_prefix_end), "precondition");
+  assert(dense_prefix_end <= space(id)->top(), "precondition");
+  if (dense_prefix_end == space(id)->top()) {
+    // Must not have single-word gap right before prefix-end/top.
+    return;
+  }
   RegionData* const region_after_dense_prefix = _summary_data.addr_to_region_ptr(dense_prefix_end);
   idx_t const dense_prefix_bit = _mark_bitmap.addr_to_bit(dense_prefix_end);
 
@@ -1147,56 +1098,6 @@ void PSParallelCompact::fill_dense_prefix_end(SpaceId id) {
   }
 }
 
-void
-PSParallelCompact::summarize_space(SpaceId id, bool maximum_compaction)
-{
-  assert(id < last_space_id, "id out of range");
-  assert(_space_info[id].dense_prefix() == _space_info[id].space()->bottom(),
-         "should have been reset in summarize_spaces_quick()");
-
-  const MutableSpace* space = _space_info[id].space();
-  if (_space_info[id].new_top() != space->bottom()) {
-    HeapWord* dense_prefix_end = compute_dense_prefix(id, maximum_compaction);
-    _space_info[id].set_dense_prefix(dense_prefix_end);
-
-    // Recompute the summary data, taking into account the dense prefix.  If
-    // every last byte will be reclaimed, then the existing summary data which
-    // compacts everything can be left in place.
-    if (!maximum_compaction && dense_prefix_end != space->bottom()) {
-      // If dead space crosses the dense prefix boundary, it is (at least
-      // partially) filled with a dummy object, marked live and added to the
-      // summary data.  This simplifies the copy/update phase and must be done
-      // before the final locations of objects are determined, to prevent
-      // leaving a fragment of dead space that is too small to fill.
-      fill_dense_prefix_end(id);
-
-      // Compute the destination of each Region, and thus each object.
-      _summary_data.summarize_dense_prefix(space->bottom(), dense_prefix_end);
-      _summary_data.summarize(_space_info[id].split_info(),
-                              dense_prefix_end, space->top(), nullptr,
-                              dense_prefix_end, space->end(),
-                              _space_info[id].new_top_addr());
-    }
-  }
-
-  if (log_develop_is_enabled(Trace, gc, compaction)) {
-    const size_t region_size = ParallelCompactData::RegionSize;
-    HeapWord* const dense_prefix_end = _space_info[id].dense_prefix();
-    const size_t dp_region = _summary_data.addr_to_region_idx(dense_prefix_end);
-    const size_t dp_words = pointer_delta(dense_prefix_end, space->bottom());
-    HeapWord* const new_top = _space_info[id].new_top();
-    const HeapWord* nt_aligned_up = _summary_data.region_align_up(new_top);
-    const size_t cr_words = pointer_delta(nt_aligned_up, dense_prefix_end);
-    log_develop_trace(gc, compaction)(
-        "id=%d cap=" SIZE_FORMAT " dp=" PTR_FORMAT " "
-        "dp_region=" SIZE_FORMAT " " "dp_count=" SIZE_FORMAT " "
-        "cr_count=" SIZE_FORMAT " " "nt=" PTR_FORMAT,
-        id, space->capacity_in_words(), p2i(dense_prefix_end),
-        dp_region, dp_words / region_size,
-        cr_words / region_size, p2i(new_top));
-  }
-}
-
 #ifndef PRODUCT
 void PSParallelCompact::summary_phase_msg(SpaceId dst_space_id,
                                           HeapWord* dst_beg, HeapWord* dst_end,
@@ -1220,33 +1121,75 @@ void PSParallelCompact::summary_phase_msg(SpaceId dst_space_id,
 }
 #endif  // #ifndef PRODUCT
 
-void PSParallelCompact::summary_phase(bool maximum_compaction)
-{
-  GCTraceTime(Info, gc, phases) tm("Summary Phase", &_gc_timer);
+bool PSParallelCompact::reassess_maximum_compaction(bool maximum_compaction,
+                                                    size_t total_live_words,
+                                                    MutableSpace* const old_space,
+                                                    HeapWord* full_region_prefix_end) {
+  // Check if all live objs are larger than old-gen.
+  const bool is_old_gen_overflowing = (total_live_words > old_space->capacity_in_words());
 
-  // Quick summarization of each space into itself, to see how much is live.
-  summarize_spaces_quick();
+  // JVM flags
+  const uint total_invocations = ParallelScavengeHeap::heap()->total_full_collections();
+  assert(total_invocations >= _maximum_compaction_gc_num, "sanity");
+  const size_t gcs_since_max = total_invocations - _maximum_compaction_gc_num;
+  const bool is_interval_ended = gcs_since_max > HeapMaximumCompactionInterval
+                              || total_invocations == HeapFirstMaximumCompactionCount;
 
-  log_develop_trace(gc, compaction)("summary phase:  after summarizing each space to self");
-  NOT_PRODUCT(print_region_ranges());
-  NOT_PRODUCT(print_initial_summary_data(_summary_data, _space_info));
+  // If all regions in old-gen are full
+  const bool is_region_full =
+    full_region_prefix_end >= _summary_data.region_align_down(old_space->top());
 
-  // The amount of live data that will end up in old space (assuming it fits).
-  size_t old_space_total_live = 0;
-  for (unsigned int id = old_space_id; id < last_space_id; ++id) {
-    old_space_total_live += pointer_delta(_space_info[id].new_top(),
-                                          _space_info[id].space()->bottom());
+  if (maximum_compaction || is_old_gen_overflowing || is_interval_ended || is_region_full) {
+    _maximum_compaction_gc_num = total_invocations;
+    return true;
   }
 
+  return false;
+}
+
+void PSParallelCompact::summary_phase(bool maximum_compaction)
+{
+  GCTraceTime(Info, gc, phases) tm("Summary Phase", &_gc_timer);
+
   MutableSpace* const old_space = _space_info[old_space_id].space();
-  const size_t old_capacity = old_space->capacity_in_words();
-  if (old_space_total_live > old_capacity) {
-    // XXX - should also try to expand
-    maximum_compaction = true;
-  }
+  {
+    size_t total_live_words = 0;
+    HeapWord* full_region_prefix_end = nullptr;
+    {
+      // old-gen
+      size_t live_words = _summary_data.live_words_in_space(old_space,
+                                                            &full_region_prefix_end);
+      total_live_words += live_words;
+    }
+    // young-gen
+    for (uint i = eden_space_id; i < last_space_id; ++i) {
+      const MutableSpace* space = _space_info[i].space();
+      size_t live_words = _summary_data.live_words_in_space(space);
+      total_live_words += live_words;
+      _space_info[i].set_new_top(space->bottom() + live_words);
+      _space_info[i].set_dense_prefix(space->bottom());
+    }
 
-  // Old generations.
-  summarize_space(old_space_id, maximum_compaction);
+    maximum_compaction = reassess_maximum_compaction(maximum_compaction,
+                                                     total_live_words,
+                                                     old_space,
+                                                     full_region_prefix_end);
+    HeapWord* dense_prefix_end =
+      maximum_compaction ? full_region_prefix_end
+                         : compute_dense_prefix_for_old_space(old_space,
+                                                              full_region_prefix_end);
+    SpaceId id = old_space_id;
+    _space_info[id].set_dense_prefix(dense_prefix_end);
+
+    if (dense_prefix_end != old_space->bottom()) {
+      fill_dense_prefix_end(id);
+      _summary_data.summarize_dense_prefix(old_space->bottom(), dense_prefix_end);
+    }
+    _summary_data.summarize(_space_info[id].split_info(),
+                            dense_prefix_end, old_space->top(), nullptr,
+                            dense_prefix_end, old_space->end(),
+                            _space_info[id].new_top_addr());
+  }
 
   // Summarize the remaining spaces in the young gen.  The initial target space
   // is the old gen.  If a space does not fit entirely into the target, then the
diff --git a/src/hotspot/share/gc/parallel/psParallelCompact.hpp b/src/hotspot/share/gc/parallel/psParallelCompact.hpp
index cd4965bbb4516..6ca23947ca413 100644
--- a/src/hotspot/share/gc/parallel/psParallelCompact.hpp
+++ b/src/hotspot/share/gc/parallel/psParallelCompact.hpp
@@ -406,6 +406,10 @@ class ParallelCompactData
   HeapWord* summarize_split_space(size_t src_region, SplitInfo& split_info,
                                   HeapWord* destination, HeapWord* target_end,
                                   HeapWord** target_next);
+
+  size_t live_words_in_space(const MutableSpace* space,
+                             HeapWord** full_region_prefix_end = nullptr);
+
   bool summarize(SplitInfo& split_info,
                  HeapWord* source_beg, HeapWord* source_end,
                  HeapWord** source_next,
@@ -935,26 +939,22 @@ class PSParallelCompact : AllStatic {
   static void pre_compact();
   static void post_compact();
 
+  static bool reassess_maximum_compaction(bool maximum_compaction,
+                                          size_t total_live_words,
+                                          MutableSpace* const old_space,
+                                          HeapWord* full_region_prefix_end);
+
   // Mark live objects
   static void marking_phase(ParallelOldTracer *gc_tracer);
 
-  // Methods used to compute the dense prefix.
-
-  // Return a pointer to the first region in the range [beg, end) that is not
-  // completely full.
-  static RegionData* first_dead_space_region(const RegionData* beg,
-                                             const RegionData* end);
-
-  // Compute the dense prefix for the designated space.
-  static HeapWord* compute_dense_prefix(const SpaceId id,
-                                        bool maximum_compaction);
+  // Identify the dense-fix in the old-space to avoid moving much memory with little reclaimed.
+  static HeapWord* compute_dense_prefix_for_old_space(MutableSpace* old_space,
+                                                      HeapWord* full_region_prefix_end);
 
   // Create a filler obj (if needed) right before the dense-prefix-boundary to
   // make the heap parsable.
   static void fill_dense_prefix_end(SpaceId id);
 
-  static void summarize_spaces_quick();
-  static void summarize_space(SpaceId id, bool maximum_compaction);
   static void summary_phase(bool maximum_compaction);
 
   // Adjust addresses in roots.  Does not adjust addresses in heap.