|
| 1 | +/* |
| 2 | + libco.ppc (2016-09-14) |
| 3 | + author: blargg |
| 4 | + license: public domain |
| 5 | +*/ |
| 6 | + |
| 7 | +#define LIBCO_C |
| 8 | +#include "libco.h" |
| 9 | +#include "settings.h" |
| 10 | + |
| 11 | +#include <stdlib.h> |
| 12 | +#include <stdint.h> |
| 13 | +#include <string.h> |
| 14 | + |
| 15 | +/* state format (offsets in 32-bit words) |
| 16 | + +0 pointer to swap code |
| 17 | + rest of function descriptor for entry function |
| 18 | + +8 PC |
| 19 | ++10 SP |
| 20 | + special registers |
| 21 | + GPRs |
| 22 | + FPRs |
| 23 | + VRs |
| 24 | + stack |
| 25 | +*/ |
| 26 | + |
| 27 | +enum { state_size = 1024 }; |
| 28 | +enum { above_stack = 2048 }; |
| 29 | +enum { stack_align = 256 }; |
| 30 | + |
| 31 | +static thread_local cothread_t co_active_handle = 0; |
| 32 | + |
| 33 | +/* determine environment */ |
| 34 | + |
| 35 | +#define LIBCO_PPC64 (_ARCH_PPC64 || __PPC64__ || __ppc64__ || __powerpc64__) |
| 36 | + |
| 37 | +text_section |
| 38 | +static const uint32_t libco_ppc_code[1024] = { |
| 39 | + 0xf8240008, /* std r1, 8(4) */ |
| 40 | + 0xf8440010, /* std r2, 16(4) */ |
| 41 | + 0xf9840060, /* std r12, 96(4) */ |
| 42 | + 0xf9a40068, /* std r13, 104(4) */ |
| 43 | + 0xf9c40070, /* std r14, 112(4) */ |
| 44 | + 0xf9e40078, /* std r15, 120(4) */ |
| 45 | + 0xfa040080, /* std r16, 128(4) */ |
| 46 | + 0xfa240088, /* std r17, 136(4) */ |
| 47 | + 0xfa440090, /* std r18, 144(4) */ |
| 48 | + 0xfa640098, /* std r19, 152(4) */ |
| 49 | + 0xfa8400a0, /* std r20, 160(4) */ |
| 50 | + 0xfaa400a8, /* std r21, 168(4) */ |
| 51 | + 0xfac400b0, /* std r22, 176(4) */ |
| 52 | + 0xfae400b8, /* std r23, 184(4) */ |
| 53 | + 0xfb0400c0, /* std r24, 192(4) */ |
| 54 | + 0xfb2400c8, /* std r25, 200(4) */ |
| 55 | + 0xfb4400d0, /* std r26, 208(4) */ |
| 56 | + 0xfb6400d8, /* std r27, 216(4) */ |
| 57 | + 0xfb8400e0, /* std r28, 224(4) */ |
| 58 | + 0xfba400e8, /* std r29, 232(4) */ |
| 59 | + 0xfbc400f0, /* std r30, 240(4) */ |
| 60 | + 0xfbe400f8, /* std r31, 248(4) */ |
| 61 | + 0x7ca802a6, /* mflr r5 */ |
| 62 | + 0xf8a40100, /* std r5, 256(4) */ |
| 63 | + 0x7ca00026, /* mfcr r5 */ |
| 64 | + 0xf8a40108, /* std r5, 264(4) */ |
| 65 | + 0xe8230008, /* ld r1, 8(3) */ |
| 66 | + 0xe8430010, /* ld r2, 16(3) */ |
| 67 | + 0xe9830060, /* ld r12, 96(3) */ |
| 68 | + 0xe9a30068, /* ld r13, 104(3) */ |
| 69 | + 0xe9c30070, /* ld r14, 112(3) */ |
| 70 | + 0xe9e30078, /* ld r15, 120(3) */ |
| 71 | + 0xea030080, /* ld r16, 128(3) */ |
| 72 | + 0xea230088, /* ld r17, 136(3) */ |
| 73 | + 0xea430090, /* ld r18, 144(3) */ |
| 74 | + 0xea630098, /* ld r19, 152(3) */ |
| 75 | + 0xea8300a0, /* ld r20, 160(3) */ |
| 76 | + 0xeaa300a8, /* ld r21, 168(3) */ |
| 77 | + 0xeac300b0, /* ld r22, 176(3) */ |
| 78 | + 0xeae300b8, /* ld r23, 184(3) */ |
| 79 | + 0xeb0300c0, /* ld r24, 192(3) */ |
| 80 | + 0xeb2300c8, /* ld r25, 200(3) */ |
| 81 | + 0xeb4300d0, /* ld r26, 208(3) */ |
| 82 | + 0xeb6300d8, /* ld r27, 216(3) */ |
| 83 | + 0xeb8300e0, /* ld r28, 224(3) */ |
| 84 | + 0xeba300e8, /* ld r29, 232(3) */ |
| 85 | + 0xebc300f0, /* ld r30, 240(3) */ |
| 86 | + 0xebe300f8, /* ld r31, 248(3) */ |
| 87 | + 0xe8a30100, /* ld r5, 256(3) */ |
| 88 | + 0x7ca803a6, /* mtlr r5 */ |
| 89 | + 0xe8a30108, /* ld r5, 264(3) */ |
| 90 | + 0x7caff120, /* mtcr r5 */ |
| 91 | + |
| 92 | + #ifndef LIBCO_PPC_NOFP |
| 93 | + 0xd9c40180, /* stfd r14, 384(4) */ |
| 94 | + 0xd9e40188, /* stfd r15, 392(4) */ |
| 95 | + 0xda040190, /* stfd r16, 400(4) */ |
| 96 | + 0xda240198, /* stfd r17, 408(4) */ |
| 97 | + 0xda4401a0, /* stfd r18, 416(4) */ |
| 98 | + 0xda6401a8, /* stfd r19, 424(4) */ |
| 99 | + 0xda8401b0, /* stfd r20, 432(4) */ |
| 100 | + 0xdaa401b8, /* stfd r21, 440(4) */ |
| 101 | + 0xdac401c0, /* stfd r22, 448(4) */ |
| 102 | + 0xdae401c8, /* stfd r23, 456(4) */ |
| 103 | + 0xdb0401d0, /* stfd r24, 464(4) */ |
| 104 | + 0xdb2401d8, /* stfd r25, 472(4) */ |
| 105 | + 0xdb4401e0, /* stfd r26, 480(4) */ |
| 106 | + 0xdb6401e8, /* stfd r27, 488(4) */ |
| 107 | + 0xdb8401f0, /* stfd r28, 496(4) */ |
| 108 | + 0xdba401f8, /* stfd r29, 504(4) */ |
| 109 | + 0xdbc40200, /* stfd r30, 512(4) */ |
| 110 | + 0xdbe40208, /* stfd r31, 520(4) */ |
| 111 | + 0xc9c30180, /* lfd r14, 384(3) */ |
| 112 | + 0xc9e30188, /* lfd r15, 392(3) */ |
| 113 | + 0xca030190, /* lfd r16, 400(3) */ |
| 114 | + 0xca230198, /* lfd r17, 408(3) */ |
| 115 | + 0xca4301a0, /* lfd r18, 416(3) */ |
| 116 | + 0xca6301a8, /* lfd r19, 424(3) */ |
| 117 | + 0xca8301b0, /* lfd r20, 432(3) */ |
| 118 | + 0xcaa301b8, /* lfd r21, 440(3) */ |
| 119 | + 0xcac301c0, /* lfd r22, 448(3) */ |
| 120 | + 0xcae301c8, /* lfd r23, 456(3) */ |
| 121 | + 0xcb0301d0, /* lfd r24, 464(3) */ |
| 122 | + 0xcb2301d8, /* lfd r25, 472(3) */ |
| 123 | + 0xcb4301e0, /* lfd r26, 480(3) */ |
| 124 | + 0xcb6301e8, /* lfd r27, 488(3) */ |
| 125 | + 0xcb8301f0, /* lfd r28, 496(3) */ |
| 126 | + 0xcba301f8, /* lfd r29, 504(3) */ |
| 127 | + 0xcbc30200, /* lfd r30, 512(3) */ |
| 128 | + 0xcbe30208, /* lfd r31, 520(3) */ |
| 129 | + #endif |
| 130 | + |
| 131 | + #ifdef __ALTIVEC__ |
| 132 | + 0x38a00210, /* li r5, 528 */ |
| 133 | + 0x7e842bce, /* stvxl v20, r4, r5 */ |
| 134 | + 0x38a50010, /* addi r5, r5, 16 */ |
| 135 | + 0x7ea42bce, /* stvxl v21, r4, r5 */ |
| 136 | + 0x38a50010, /* addi r5, r5, 16 */ |
| 137 | + 0x7ec42bce, /* stvxl v22, r4, r5 */ |
| 138 | + 0x38a50010, /* addi r5, r5, 16 */ |
| 139 | + 0x7ee42bce, /* stvxl v23, r4, r5 */ |
| 140 | + 0x38a50010, /* addi r5, r5, 16 */ |
| 141 | + 0x7f042bce, /* stvxl v24, r4, r5 */ |
| 142 | + 0x38a50010, /* addi r5, r5, 16 */ |
| 143 | + 0x7f242bce, /* stvxl v25, r4, r5 */ |
| 144 | + 0x38a50010, /* addi r5, r5, 16 */ |
| 145 | + 0x7f442bce, /* stvxl v26, r4, r5 */ |
| 146 | + 0x38a50010, /* addi r5, r5, 16 */ |
| 147 | + 0x7f642bce, /* stvxl v27, r4, r5 */ |
| 148 | + 0x38a50010, /* addi r5, r5, 16 */ |
| 149 | + 0x7f842bce, /* stvxl v28, r4, r5 */ |
| 150 | + 0x38a50010, /* addi r5, r5, 16 */ |
| 151 | + 0x7fa42bce, /* stvxl v29, r4, r5 */ |
| 152 | + 0x38a50010, /* addi r5, r5, 16 */ |
| 153 | + 0x7fc42bce, /* stvxl v30, r4, r5 */ |
| 154 | + 0x38a50010, /* addi r5, r5, 16 */ |
| 155 | + 0x7fe42bce, /* stvxl v31, r4, r5 */ |
| 156 | + 0x38a50010, /* addi r5, r5, 16 */ |
| 157 | + 0x7ca042a6, /* mfvrsave r5 */ |
| 158 | + 0x90a402e0, /* stw r5, 736(4) */ |
| 159 | + 0x38a00210, /* li r5, 528 */ |
| 160 | + 0x7e832ace, /* lvxl v20, r3, r5 */ |
| 161 | + 0x38a50010, /* addi r5, r5, 16 */ |
| 162 | + 0x7ea32ace, /* lvxl v21, r3, r5 */ |
| 163 | + 0x38a50010, /* addi r5, r5, 16 */ |
| 164 | + 0x7ec32ace, /* lvxl v22, r3, r5 */ |
| 165 | + 0x38a50010, /* addi r5, r5, 16 */ |
| 166 | + 0x7ee32ace, /* lvxl v23, r3, r5 */ |
| 167 | + 0x38a50010, /* addi r5, r5, 16 */ |
| 168 | + 0x7f032ace, /* lvxl v24, r3, r5 */ |
| 169 | + 0x38a50010, /* addi r5, r5, 16 */ |
| 170 | + 0x7f232ace, /* lvxl v25, r3, r5 */ |
| 171 | + 0x38a50010, /* addi r5, r5, 16 */ |
| 172 | + 0x7f432ace, /* lvxl v26, r3, r5 */ |
| 173 | + 0x38a50010, /* addi r5, r5, 16 */ |
| 174 | + 0x7f632ace, /* lvxl v27, r3, r5 */ |
| 175 | + 0x38a50010, /* addi r5, r5, 16 */ |
| 176 | + 0x7f832ace, /* lvxl v28, r3, r5 */ |
| 177 | + 0x38a50010, /* addi r5, r5, 16 */ |
| 178 | + 0x7fa32ace, /* lvxl v29, r3, r5 */ |
| 179 | + 0x38a50010, /* addi r5, r5, 16 */ |
| 180 | + 0x7fc32ace, /* lvxl v30, r3, r5 */ |
| 181 | + 0x38a50010, /* addi r5, r5, 16 */ |
| 182 | + 0x7fe32ace, /* lvxl v31, r3, 5 */ |
| 183 | + 0x38a50010, /* addi r5, r5, 16 */ |
| 184 | + 0x80a302d0, /* lwz r5, 720(3) */ |
| 185 | + 0x7ca043a6, /* mtvrsave r5 */ |
| 186 | + #endif |
| 187 | + |
| 188 | + 0x4e800020 /* blr */ |
| 189 | +}; |
| 190 | + |
| 191 | +/* function call goes directly to code */ |
| 192 | +#define CO_SWAP_ASM(x, y) ((void (*)(cothread_t, cothread_t))(uintptr_t)libco_ppc_code)(x, y) |
| 193 | + |
| 194 | +cothread_t co_active() { |
| 195 | + if(!co_active_handle) co_active_handle = (uint64_t*)malloc(state_size); |
| 196 | + return co_active_handle; |
| 197 | +} |
| 198 | + |
| 199 | +cothread_t co_create(unsigned int size, void (*entry_)(void), |
| 200 | + size_t *out_size) { |
| 201 | + |
| 202 | + uintptr_t entry = (uintptr_t)entry_; |
| 203 | + uint64_t* t = 0; |
| 204 | + uintptr_t sp; |
| 205 | + int shift; |
| 206 | + |
| 207 | + /* be sure main thread was successfully allocated */ |
| 208 | + if(co_active()) { |
| 209 | + size += state_size + above_stack + stack_align; |
| 210 | + t = (uint64_t*)malloc(size); |
| 211 | + } |
| 212 | + |
| 213 | + if(t) { |
| 214 | + /* save current registers into new thread, so that any special ones will have proper values when thread is begun */ |
| 215 | + CO_SWAP_ASM(t, t); |
| 216 | + |
| 217 | + /* put stack near end of block, and align */ |
| 218 | + sp = (uintptr_t)t + size - above_stack; |
| 219 | + sp -= sp % stack_align; |
| 220 | + |
| 221 | + /* set up sp and entry will be called on next swap */ |
| 222 | + t[1] = (uint64_t)sp; |
| 223 | + t[12] = (uint64_t)entry; |
| 224 | + t[32] = (uint64_t)entry; |
| 225 | + } |
| 226 | + *out_size = size; |
| 227 | + return t; |
| 228 | +} |
| 229 | + |
| 230 | +void co_delete(cothread_t t) { |
| 231 | + free(t); |
| 232 | +} |
| 233 | + |
| 234 | +void co_switch(cothread_t t) { |
| 235 | + cothread_t old = co_active_handle; |
| 236 | + co_active_handle = t; |
| 237 | + |
| 238 | + CO_SWAP_ASM(t, old); |
| 239 | +} |
0 commit comments