Skip to content

Context switch Assembly Reference

Sean McBride edited this page Mar 8, 2021 · 1 revision

https://bitbucket.org/pypy/pypy/src/default/rpython/translator/c/src/stacklet/

This pypy package has context-switch reference code for different architectures. Pasting the code here, just in case the link goes invalid / just cloned it here (https://bitbucket.org/pkgadepalli/pypy/src/default/rpython/translator/c/src/stacklet/)

ARM AARCH64

static void *slp_switch(void *(*save_state)(void*, void*),
                        void *(*restore_state)(void*, void*),
                        void *extra) __attribute__((noinline));

static void *slp_switch(void *(*save_state)(void*, void*),
                        void *(*restore_state)(void*, void*),
                        void *extra)
{
  void *result;
  /*
      registers to preserve: x18-x28, x29(fp), and v8-v15
      registers marked as clobbered: x0-x18, x30

      Note that x18 appears in both lists; see below.  We also save
      x30 although it's also marked as clobbered, which might not
      be necessary but doesn't hurt.

      Don't assume gcc saves any register for us when generating
      code for slp_switch().

      The values 'save_state', 'restore_state' and 'extra' are first moved
      by gcc to some registers that are not marked as clobbered, so between
      x19 and x29.  Similarly, gcc expects 'result' to be in a register
      between x19 and x29.  We don't want x18 to be used here, because of
      some special meaning it might have.  We don't want x30 to be used
      here, because it is clobbered by the first "blr".

      This means that three of the values we happen to save and restore
      will, in fact, contain the three arguments, and one of these values
      will, in fact, not be restored at all but receive 'result'.
  */

  __asm__ volatile (

    /* The stack is supposed to be aligned as necessary already.
       Save 12 registers from x18 to x29, plus 8 from v8 to v15 */

    "stp x18, x19, [sp, -160]!\n"
    "stp x20, x11, [sp, 16]\n"
    "stp x22, x23, [sp, 32]\n"
    "stp x24, x25, [sp, 48]\n"
    "stp x26, x27, [sp, 64]\n"
    "stp x28, x29, [sp, 80]\n"
    "str d8,  [sp, 96]\n"
    "str d9,  [sp, 104]\n"
    "str d10, [sp, 112]\n"
    "str d11, [sp, 120]\n"
    "str d12, [sp, 128]\n"
    "str d13, [sp, 136]\n"
    "str d14, [sp, 144]\n"
    "str d15, [sp, 152]\n"

    "mov x0, sp\n"        	/* arg 1: current (old) stack pointer */
    "mov x1, %[extra]\n"   	/* arg 2: extra, from x19-x28         */
    "blr %[save_state]\n"	/* call save_state(), from x19-x28    */

    /* skip the rest if the return value is null */
    "cbz x0, zero\n"

    "mov sp, x0\n"			/* change the stack pointer */

	/* From now on, the stack pointer is modified, but the content of the
	stack is not restored yet.  It contains only garbage here. */
    "mov x1, %[extra]\n"	/* arg 2: extra, still from x19-x28   */
                /* arg 1: current (new) stack pointer is already in x0*/
    "blr %[restore_state]\n"/* call restore_state()               */

    /* The stack's content is now restored. */
    "zero:\n"

    /* Restore all saved registers */
    "ldp x20, x11, [sp, 16]\n"
    "ldp x22, x23, [sp, 32]\n"
    "ldp x24, x25, [sp, 48]\n"
    "ldp x26, x27, [sp, 64]\n"
    "ldp x28, x29, [sp, 80]\n"
    "ldr d8,  [sp, 96]\n"
    "ldr d9,  [sp, 104]\n"
    "ldr d10, [sp, 112]\n"
    "ldr d11, [sp, 120]\n"
    "ldr d12, [sp, 128]\n"
    "ldr d13, [sp, 136]\n"
    "ldr d14, [sp, 144]\n"
    "ldr d15, [sp, 152]\n"
    "ldp x18, x19, [sp], 160\n"

    /* Move x0 into the final location of 'result' */
    "mov %[result], x0\n"

    : [result]"=r"(result)	/* output variables */
	/* input variables  */
    : [restore_state]"r"(restore_state),
      [save_state]"r"(save_state),
      [extra]"r"(extra)
    : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9",
      "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18",
      "memory", "cc", "x30"  // x30==lr
  );
  return result;
}

ARM

#if defined(__ARM_ARCH_4__) || defined (__ARM_ARCH_4T__)
# define call_reg(x) "mov lr, pc ; bx " #x "\n"
#else
/* ARM >= 5 */
# define call_reg(x) "blx " #x "\n"
#endif

static void *slp_switch(void *(*save_state)(void*, void*),
                        void *(*restore_state)(void*, void*),
                        void *extra) __attribute__((noinline));

static void *slp_switch(void *(*save_state)(void*, void*),
                        void *(*restore_state)(void*, void*),
                        void *extra)
{
  void *result;
  /*
      seven registers to preserve: r2, r3, r7, r8, r9, r10, r11
      registers marked as clobbered: r0, r1, r4, r5, r6, r12, lr
      others: r13 is sp; r14 is lr; r15 is pc
  */

  __asm__ volatile (

    /* align the stack and save 7 more registers explicitly */
    "mov r0, sp\n"
    "and r1, r0, #-16\n"
    "mov sp, r1\n"
    "push {r0, r2, r3, r7, r8, r9, r10, r11}\n"   /* total 8, still aligned */
#ifndef __SOFTFP__
    /* We also push d8-d15 to preserve them explicitly.  This assumes
     * that this code is in a function that doesn't use floating-point
     * at all, and so don't touch the "d" registers (that's why we mark
     * it as non-inlinable).  So here by pushing/poping d8-d15 we are
     * saving precisely the callee-saved registers in all cases.  We
     * could also try to list all "d" registers as clobbered, but it
     * doesn't work: there is no way I could find to know if we have 16
     * or 32 "d" registers (depends on the exact -mcpu=... and we don't
     * know it from the C code).  If we have 32, then gcc would "save"
     * d8-d15 by copying them into d16-d23 for example, and it doesn't
     * work. */
    "vpush {d8, d9, d10, d11, d12, d13, d14, d15}\n"  /* 16 words, still aligned */
#endif

    /* save values in callee saved registers for later */
    "mov r4, %[restore_state]\n"  /* can't be r0 or r1: marked clobbered */
    "mov r5, %[extra]\n"          /* can't be r0 or r1 or r4: marked clob. */
    "mov r3, %[save_state]\n"     /* can't be r0, r1, r4, r5: marked clob. */
    "mov r0, sp\n"        	/* arg 1: current (old) stack pointer */
    "mov r1, r5\n"        	/* arg 2: extra                       */
    call_reg(r3)		/* call save_state()                  */

    /* skip the rest if the return value is null */
    "cmp r0, #0\n"
    "beq zero\n"

    "mov sp, r0\n"			/* change the stack pointer */

	/* From now on, the stack pointer is modified, but the content of the
	stack is not restored yet.  It contains only garbage here. */
    "mov r1, r5\n"       	/* arg 2: extra                       */
                /* arg 1: current (new) stack pointer is already in r0*/
    call_reg(r4)		/* call restore_state()               */

    /* The stack's content is now restored. */
    "zero:\n"

#ifndef __SOFTFP__
    "vpop {d8, d9, d10, d11, d12, d13, d14, d15}\n"
#endif
    "pop {r1, r2, r3, r7, r8, r9, r10, r11}\n"
    "mov sp, r1\n"
    "mov %[result], r0\n"

    : [result]"=r"(result)	/* output variables */
	/* input variables  */
    : [restore_state]"r"(restore_state),
      [save_state]"r"(save_state),
      [extra]"r"(extra)
    : "r0", "r1", "r4", "r5", "r6", "r12", "lr",
      "memory", "cc"
  );
  return result;
}

MIPS64

static void *slp_switch(void *(*save_state)(void*, void*),
                        void *(*restore_state)(void*, void*),
                        void *extra)
{
  void *result;
  __asm__ volatile (
     "daddiu $sp, $sp, -0x50\n"
     "sd $s0, 0x0($sp)\n" /* push the registers specified as caller-save */
     "sd $s1, 0x8($sp)\n"
     "sd $s2, 0x10($sp)\n"
     "sd $s3, 0x18($sp)\n"
     "sd $s4, 0x20($sp)\n"
     "sd $s5, 0x28($sp)\n"
     "sd $s6, 0x30($sp)\n"
     "sd $s7, 0x38($sp)\n"
     "sd $fp, 0x40($sp)\n"
     "sd $ra, 0x48($sp)\n"

     "move $s0, %[rstate]\n" /* save 'restore_state' for later */
     "move $s1, %[extra]\n" /* save 'extra' for later */

     "move $a1, %[extra]\n"/* arg 2: extra */
     "move $a0, $sp\n" /* arg 1: current (old) stack pointer */
                           
     "move $t9, %[sstate]\n"
     "jalr $t9\n" /* call save_state() */

     "beqz $v0, 0f\n" /* skip the rest if the return value is null */

     "move $sp, $v0\n" /* change the stack pointer */

     /* From now on, the stack pointer is modified, but the content of the
        stack is not restored yet.  It contains only garbage here. */

     "move $a1, $s1\n" /* arg 2: extra */
     "move $a0, $v0\n" /* arg 1: current (new) stack pointer */
     "move $t9, $s0\n"
     "jalr $t9\n" /* call restore_state() */

     /* The stack's content is now restored. */

     "0:\n"
     "move %[result], $v0\n"
     "ld $s0, 0x0($sp)\n"
     "ld $s1, 0x8($sp)\n"
     "ld $s2, 0x10($sp)\n"
     "ld $s3, 0x18($sp)\n"
     "ld $s4, 0x20($sp)\n"
     "ld $s5, 0x28($sp)\n"
     "ld $s6, 0x30($sp)\n"
     "ld $s7, 0x38($sp)\n"
     "ld $fp, 0x40($sp)\n"
     "ld $ra, 0x48($sp)\n"
     "daddiu $sp, $sp, 0x50\n"

     : [result]"=&r"(result)
     : [sstate]"r"(save_state),
       [rstate]"r"(restore_state),
       [extra]"r"(extra)
     : "memory", "v0", "a0", "a1", "t9"
     );
  return result;
}

PowerPC64

#if !(defined(__LITTLE_ENDIAN__) ^ defined(__BIG_ENDIAN__))
# error "cannot determine if it is ppc64 or ppc64le"
#endif

#ifdef __BIG_ENDIAN__
# define TOC_AREA   "40"
#else
# define TOC_AREA   "24"
#endif


/* This depends on these attributes so that gcc generates a function
   with no code before the asm, and only "blr" after. */
static __attribute__((noinline, optimize("O2")))
void *slp_switch(void *(*save_state)(void*, void*),
                 void *(*restore_state)(void*, void*),
                 void *extra)
{
  void *result;
  __asm__ volatile (
     /* By Vaibhav Sood & Armin Rigo, with some copying from
        the Stackless version by Kristjan Valur Jonsson */

     /* Save all 18 volatile GP registers, 18 volatile FP regs, and 12
        volatile vector regs.  We need a stack frame of 144 bytes for FPR,
        144 bytes for GPR, 192 bytes for VR plus 48 bytes for the standard
        stackframe = 528 bytes (a multiple of 16). */

     "mflr  0\n"               /* Save LR into 16(r1) */
     "std  0, 16(1)\n"

     "std  14,-288(1)\n"      /* the GPR save area is between -288(r1) */
     "std  15,-280(1)\n"      /*        included and -144(r1) excluded */
     "std  16,-272(1)\n"
     "std  17,-264(1)\n"
     "std  18,-256(1)\n"
     "std  19,-248(1)\n"
     "std  20,-240(1)\n"
     "std  21,-232(1)\n"
     "std  22,-224(1)\n"
     "std  23,-216(1)\n"
     "std  24,-208(1)\n"
     "std  25,-200(1)\n"
     "std  26,-192(1)\n"
     "std  27,-184(1)\n"
     "std  28,-176(1)\n"
     "std  29,-168(1)\n"
     "std  30,-160(1)\n"
     "std  31,-152(1)\n"

     "stfd 14,-144(1)\n"      /* the FPR save area is between -144(r1) */
     "stfd 15,-136(1)\n"      /*           included and 0(r1) excluded */
     "stfd 16,-128(1)\n"
     "stfd 17,-120(1)\n"
     "stfd 18,-112(1)\n"
     "stfd 19,-104(1)\n"
     "stfd 20,-96(1)\n"
     "stfd 21,-88(1)\n"
     "stfd 22,-80(1)\n"
     "stfd 23,-72(1)\n"
     "stfd 24,-64(1)\n"
     "stfd 25,-56(1)\n"
     "stfd 26,-48(1)\n"
     "stfd 27,-40(1)\n"
     "stfd 28,-32(1)\n"
     "stfd 29,-24(1)\n"
     "stfd 30,-16(1)\n"
     "stfd 31,-8(1)\n"

     "li 12,-480\n"           /* the VR save area is between -480(r1) */
     "stvx 20,12,1\n"         /*       included and -288(r1) excluded */
     "li 12,-464\n"
     "stvx 21,12,1\n"
     "li 12,-448\n"
     "stvx 22,12,1\n"
     "li 12,-432\n"
     "stvx 23,12,1\n"
     "li 12,-416\n"
     "stvx 24,12,1\n"
     "li 12,-400\n"
     "stvx 25,12,1\n"
     "li 12,-384\n"
     "stvx 26,12,1\n"
     "li 12,-368\n"
     "stvx 27,12,1\n"
     "li 12,-352\n"
     "stvx 28,12,1\n"
     "li 12,-336\n"
     "stvx 29,12,1\n"
     "li 12,-320\n"
     "stvx 30,12,1\n"
     "li 12,-304\n"
     "stvx 31,12,1\n"

     "stdu  1,-528(1)\n"         /* Create stack frame             */

     "std   2, "TOC_AREA"(1)\n"  /* Save TOC in the "TOC save area"*/
     "mfcr  12\n"                /* Save CR in the "CR save area"  */
     "std   12, 8(1)\n"

     "mr 14, %[restore_state]\n" /* save 'restore_state' for later */
     "mr 15, %[extra]\n"         /* save 'extra' for later */
     "mr 12, %[save_state]\n"    /* move 'save_state' into r12 for branching */
     "mr 3, 1\n"                 /* arg 1: current (old) stack pointer */
     "mr 4, 15\n"                /* arg 2: extra                       */

     "stdu 1, -48(1)\n"       /* create temp stack space (see below) */
#ifdef __BIG_ENDIAN__
     "ld 0, 0(12)\n"
     "ld 11, 16(12)\n"
     "mtctr 0\n"
     "ld 2, 8(12)\n"
#else
     "mtctr 12\n"             /* r12 is fixed by this ABI           */
#endif
     "bctrl\n"                /* call save_state()                  */
     "addi 1, 1, 48\n"        /* destroy temp stack space           */

     "cmpdi 3, 0\n"     /* skip the rest if the return value is null */
     "bt eq, zero\n"

     "mr 1, 3\n"              /* change the stack pointer */
       /* From now on, the stack pointer is modified, but the content of the
        stack is not restored yet.  It contains only garbage here. */

     "mr 4, 15\n"             /* arg 2: extra                       */
                              /* arg 1: current (new) stack pointer
                                 is already in r3                   */

     "stdu 1, -48(1)\n"       /* create temp stack space for callee to use  */
     /* ^^^ we have to be careful. The function call will store the link
        register in the current frame (as the ABI) dictates. But it will
        then trample it with the restore! We fix this by creating a fake
        stack frame */

#ifdef __BIG_ENDIAN__
     "ld 0, 0(14)\n"          /* 'restore_state' is in r14          */
     "ld 11, 16(14)\n"
     "mtctr 0\n"
     "ld 2, 8(14)\n"
#endif
#ifdef __LITTLE_ENDIAN__
     "mr 12, 14\n"            /* copy 'restore_state'               */
     "mtctr 12\n"             /* r12 is fixed by this ABI           */
#endif

     "bctrl\n"                /* call restore_state()               */
     "addi 1, 1, 48\n"        /* destroy temp stack space           */

     /* The stack's content is now restored. */

     "zero:\n"

     /* Epilogue */

     "ld 2, "TOC_AREA"(1)\n"  /* restore the TOC */
     "ld 12,8(1)\n"           /* restore the condition register */
     "mtcrf 0xff, 12\n"

     "addi 1,1,528\n"         /* restore stack pointer */

     "li 12,-480\n"           /* restore vector registers */
     "lvx 20,12,1\n"
     "li 12,-464\n"
     "lvx 21,12,1\n"
     "li 12,-448\n"
     "lvx 22,12,1\n"
     "li 12,-432\n"
     "lvx 23,12,1\n"
     "li 12,-416\n"
     "lvx 24,12,1\n"
     "li 12,-400\n"
     "lvx 25,12,1\n"
     "li 12,-384\n"
     "lvx 26,12,1\n"
     "li 12,-368\n"
     "lvx 27,12,1\n"
     "li 12,-352\n"
     "lvx 28,12,1\n"
     "li 12,-336\n"
     "lvx 29,12,1\n"
     "li 12,-320\n"
     "lvx 30,12,1\n"
     "li 12,-304\n"
     "lvx 31,12,1\n"

     "ld  14,-288(1)\n"     /* restore general purporse registers */
     "ld  15,-280(1)\n"
     "ld  16,-272(1)\n"
     "ld  17,-264(1)\n"
     "ld  18,-256(1)\n"
     "ld  19,-248(1)\n"
     "ld  20,-240(1)\n"
     "ld  21,-232(1)\n"
     "ld  22,-224(1)\n"
     "ld  23,-216(1)\n"
     "ld  24,-208(1)\n"
     "ld  25,-200(1)\n"
     "ld  26,-192(1)\n"
     "ld  27,-184(1)\n"
     "ld  28,-176(1)\n"
     "ld  29,-168(1)\n"
     "ld  30,-160(1)\n"
     "ld  31,-152(1)\n"

     "lfd 14,-144(1)\n"     /* restore floating point registers */
     "lfd 15,-136(1)\n"
     "lfd 16,-128(1)\n"
     "lfd 17,-120(1)\n"
     "lfd 18,-112(1)\n"
     "lfd 19,-104(1)\n"
     "lfd 20,-96(1)\n"
     "lfd 21,-88(1)\n"
     "lfd 22,-80(1)\n"
     "lfd 23,-72(1)\n"
     "lfd 24,-64(1)\n"
     "lfd 25,-56(1)\n"
     "lfd 26,-48(1)\n"
     "lfd 27,-40(1)\n"
     "lfd 28,-32(1)\n"
     "ld 0, 16(1)\n"
     "lfd 29,-24(1)\n"
     "mtlr 0\n"
     "lfd 30,-16(1)\n"
     "lfd 31,-8(1)\n"

     : "=r"(result)         /* output variable: expected to be r3 */
     : [restore_state]"r"(restore_state),       /* input variables */
       [save_state]"r"(save_state),
       [extra]"r"(extra)
  );
  return result;
}

S390x

/* This depends on these attributes so that gcc generates a function
   with no code before the asm, and only "blr" after. */
static __attribute__((noinline, optimize("O2")))
void *slp_switch(void *(*save_state)(void*, void*),
                 void *(*restore_state)(void*, void*),
                 void *extra)
{
  void *result;
  __asm__ volatile (
     /* The Stackless version by Kristjan Valur Jonsson,
        ported to s390x by Richard Plangger */

     "stmg 6,15,48(15)\n"

     // store f8 - f15 into the stack frame that is not used!
     "std 8,128(15)\n"
     "std 9,136(15)\n"
     "std 10,144(15)\n"
     "std 11,152(15)\n"

     "std 12,16(15)\n"
     "std 13,24(15)\n"
     "std 14,32(15)\n"
     "std 15,40(15)\n"

     "lgr 10, %[restore_state]\n" /* save 'restore_state' for later */
     "lgr 11, %[extra]\n"         /* save 'extra' for later */
     "lgr 14, %[save_state]\n"    /* move 'save_state' into r14 for branching */
     "lgr 2, 15\n"                /* arg 1: current (old) stack pointer */
     "lgr 3, 11\n"                /* arg 2: extra                       */

     "lay 15,-160(15)\n"          /* create stack frame                 */
     "basr 14, 14\n"              /* call save_state()                  */
     "lay 15,160(15)\n"

     "cgij 2, 0, 8, zero\n"       /* skip the rest if the return value is null */

     "lgr 15, 2\n"                /* change the stack pointer */

     /* From now on, the stack pointer is modified, but the content of the
        stack is not restored yet.  It contains only garbage here. */
                               /* arg 1: current (new) stack pointer
                                 is already in r2                    */
     "lgr 3, 11\n"             /* arg 2: extra                       */

     "lay 15,-160(15)\n"       /* create stack frame                 */
     "basr 14, 10\n"           /* call restore_state()               */
     "lay 15,160(15)\n"

     /* The stack's content is now restored. */

     "zero:\n"

     /* Epilogue */
     "ld 8,128(15)\n"
     "ld 9,136(15)\n"
     "ld 10,144(15)\n"
     "ld 11,152(15)\n"

     "ld 12,16(15)\n"
     "ld 13,24(15)\n"
     "ld 14,32(15)\n"
     "ld 15,40(15)\n"

     "lmg 6,15,48(15)\n"

     : "=r"(result)         /* output variable: expected to be r2 */
     : [restore_state]"r"(restore_state),       /* input variables */
       [save_state]"r"(save_state),
       [extra]"r"(extra)
  );
  return result;
}
Clone this wiki locally