|
| 1 | +/* Copyright 2017 Facebook. |
| 2 | + * |
| 3 | + * Use and distribution licensed under the BSD license. See |
| 4 | + * the LICENSE file for full text. |
| 5 | + */ |
| 6 | + |
| 7 | +/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
| 8 | +#include "memcached.h" |
| 9 | +#include "slab_automove_extstore.h" |
| 10 | +#include <stdlib.h> |
| 11 | +#include <string.h> |
| 12 | + |
| 13 | +#define MIN_PAGES_FOR_SOURCE 2 |
| 14 | +#define MIN_PAGES_FOR_RECLAIM 2.5 |
| 15 | +#define MIN_PAGES_FREE 1.5 |
| 16 | +#define MEMCHECK_PERIOD 60 |
| 17 | + |
| 18 | +struct window_data { |
| 19 | + uint64_t age; |
| 20 | + uint64_t dirty; |
| 21 | + uint64_t evicted; |
| 22 | + unsigned int excess_free; |
| 23 | +}; |
| 24 | + |
| 25 | +struct window_global { |
| 26 | + uint32_t pool_low; |
| 27 | + uint32_t pool_high; |
| 28 | +}; |
| 29 | + |
| 30 | +typedef struct { |
| 31 | + struct window_data *window_data; |
| 32 | + struct window_global *window_global; |
| 33 | + struct settings *settings; |
| 34 | + uint32_t window_size; |
| 35 | + uint32_t window_cur; |
| 36 | + uint32_t item_size; |
| 37 | + rel_time_t last_memcheck_run; |
| 38 | + double max_age_ratio; |
| 39 | + double free_ratio; |
| 40 | + unsigned int free_mem[MAX_NUMBER_OF_SLAB_CLASSES]; |
| 41 | + item_stats_automove iam_before[MAX_NUMBER_OF_SLAB_CLASSES]; |
| 42 | + item_stats_automove iam_after[MAX_NUMBER_OF_SLAB_CLASSES]; |
| 43 | + slab_stats_automove sam_before[MAX_NUMBER_OF_SLAB_CLASSES]; |
| 44 | + slab_stats_automove sam_after[MAX_NUMBER_OF_SLAB_CLASSES]; |
| 45 | +} slab_automove; |
| 46 | + |
| 47 | +void *slab_automove_extstore_init(struct settings *settings) { |
| 48 | + uint32_t window_size = settings->slab_automove_window; |
| 49 | + double max_age_ratio = settings->slab_automove_ratio; |
| 50 | + slab_automove *a = calloc(1, sizeof(slab_automove)); |
| 51 | + if (a == NULL) |
| 52 | + return NULL; |
| 53 | + a->window_data = calloc(window_size * MAX_NUMBER_OF_SLAB_CLASSES, sizeof(struct window_data)); |
| 54 | + a->window_global = calloc(window_size, sizeof(struct window_global)); |
| 55 | + a->window_size = window_size; |
| 56 | + a->max_age_ratio = max_age_ratio; |
| 57 | + a->free_ratio = settings->slab_automove_freeratio; |
| 58 | + a->item_size = settings->ext_item_size; |
| 59 | + a->last_memcheck_run = 0; |
| 60 | + a->settings = settings; |
| 61 | + if (a->window_data == NULL || a->window_global == NULL) { |
| 62 | + if (a->window_data) |
| 63 | + free(a->window_data); |
| 64 | + if (a->window_global) |
| 65 | + free(a->window_global); |
| 66 | + free(a); |
| 67 | + return NULL; |
| 68 | + } |
| 69 | + |
| 70 | + // do a dry run to fill the before structs |
| 71 | + fill_item_stats_automove(a->iam_before); |
| 72 | + fill_slab_stats_automove(a->sam_before); |
| 73 | + |
| 74 | + return (void *)a; |
| 75 | +} |
| 76 | + |
| 77 | +void slab_automove_extstore_free(void *arg) { |
| 78 | + slab_automove *a = (slab_automove *)arg; |
| 79 | + free(a->window_data); |
| 80 | + free(a); |
| 81 | +} |
| 82 | + |
| 83 | +static void window_sum(struct window_data *wd, struct window_data *w, |
| 84 | + uint32_t size) { |
| 85 | + for (int x = 0; x < size; x++) { |
| 86 | + struct window_data *d = &wd[x]; |
| 87 | + w->age += d->age; |
| 88 | + w->dirty += d->dirty; |
| 89 | + w->evicted += d->evicted; |
| 90 | + w->excess_free += d->excess_free; |
| 91 | + } |
| 92 | +} |
| 93 | + |
| 94 | +/* This could potentially merge with above */ |
| 95 | +static void window_global_sum(struct window_global *wg, |
| 96 | + struct window_global *w, uint32_t size) { |
| 97 | + for (int x = 0; x < size; x++) { |
| 98 | + struct window_global *d = &wg[x]; |
| 99 | + w->pool_high += d->pool_high; |
| 100 | + w->pool_low += d->pool_low; |
| 101 | + } |
| 102 | +} |
| 103 | + |
| 104 | +static void global_pool_check(slab_automove *a) { |
| 105 | + bool mem_limit_reached; |
| 106 | + uint32_t free = a->free_mem[0]; |
| 107 | + struct window_global *wg = &a->window_global[a->window_cur % a->window_size]; |
| 108 | + unsigned int count = global_page_pool_size(&mem_limit_reached); |
| 109 | + memset(wg, 0, sizeof(struct window_global)); |
| 110 | + if (!mem_limit_reached) |
| 111 | + return; |
| 112 | + if (count < free / 2) { |
| 113 | + wg->pool_low = 1; |
| 114 | + } else if (count > free) { |
| 115 | + wg->pool_high = 1; |
| 116 | + } |
| 117 | +} |
| 118 | + |
| 119 | +static void memcheck(slab_automove *a) { |
| 120 | + unsigned int total_pages = 0; |
| 121 | + if (current_time < a->last_memcheck_run + MEMCHECK_PERIOD) |
| 122 | + return; |
| 123 | + a->last_memcheck_run = current_time; |
| 124 | + for (int n = 1; n < MAX_NUMBER_OF_SLAB_CLASSES; n++) { |
| 125 | + slab_stats_automove *sam = &a->sam_after[n]; |
| 126 | + total_pages += sam->total_pages; |
| 127 | + unsigned int hold_free = (sam->total_pages * sam->chunks_per_page) |
| 128 | + * a->free_ratio; |
| 129 | + if (sam->chunks_per_page * MIN_PAGES_FREE > hold_free) |
| 130 | + hold_free = sam->chunks_per_page * MIN_PAGES_FREE; |
| 131 | + a->free_mem[n] = hold_free; |
| 132 | + if (a->settings->ext_free_memchunks[n] != hold_free) { |
| 133 | + a->settings->ext_free_memchunks[n] = hold_free; |
| 134 | + } |
| 135 | + } |
| 136 | + a->free_mem[0] = total_pages * a->free_ratio; |
| 137 | +} |
| 138 | + |
| 139 | +void slab_automove_extstore_run(void *arg, int *src, int *dst) { |
| 140 | + slab_automove *a = (slab_automove *)arg; |
| 141 | + int n; |
| 142 | + struct window_data w_sum; |
| 143 | + int oldest = -1; |
| 144 | + uint64_t oldest_age = 0; |
| 145 | + int youngest = -1; |
| 146 | + uint64_t youngest_age = ~0; |
| 147 | + *src = -1; |
| 148 | + *dst = -1; |
| 149 | + |
| 150 | + global_pool_check(a); |
| 151 | + struct window_global wg_sum; |
| 152 | + memset(&wg_sum, 0, sizeof(struct window_global)); |
| 153 | + window_global_sum(a->window_global, &wg_sum, a->window_size); |
| 154 | + // fill after structs |
| 155 | + fill_item_stats_automove(a->iam_after); |
| 156 | + fill_slab_stats_automove(a->sam_after); |
| 157 | + a->window_cur++; |
| 158 | + |
| 159 | + memcheck(a); |
| 160 | + |
| 161 | + // iterate slabs |
| 162 | + for (n = POWER_SMALLEST; n < MAX_NUMBER_OF_SLAB_CLASSES; n++) { |
| 163 | + bool small_slab = a->sam_before[n].chunk_size < a->item_size |
| 164 | + ? true : false; |
| 165 | + int w_offset = n * a->window_size; |
| 166 | + struct window_data *wd = &a->window_data[w_offset + (a->window_cur % a->window_size)]; |
| 167 | + // summarize the window-up-to-now. |
| 168 | + memset(&w_sum, 0, sizeof(struct window_data)); |
| 169 | + window_sum(&a->window_data[w_offset], &w_sum, a->window_size); |
| 170 | + memset(wd, 0, sizeof(struct window_data)); |
| 171 | + |
| 172 | + // if page delta, or evicted delta, mark window dirty |
| 173 | + // (or outofmemory) |
| 174 | + if (a->iam_after[n].evicted - a->iam_before[n].evicted > 0 || |
| 175 | + a->iam_after[n].outofmemory - a->iam_before[n].outofmemory > 0) { |
| 176 | + wd->evicted = 1; |
| 177 | + wd->dirty = 1; |
| 178 | + } |
| 179 | + if (a->sam_after[n].total_pages - a->sam_before[n].total_pages > 0) { |
| 180 | + wd->dirty = 1; |
| 181 | + } |
| 182 | + // Mark excess free if we're over the free mem limit and the number of |
| 183 | + // chunks aren't decreasing at all. |
| 184 | + if (a->sam_after[n].free_chunks > a->free_mem[n] |
| 185 | + && a->sam_after[n].free_chunks - a->sam_before[n].free_chunks >= 0) { |
| 186 | + if (a->free_mem[n] > 0) { |
| 187 | + wd->excess_free = 1; |
| 188 | + } |
| 189 | + } |
| 190 | + |
| 191 | + // set age into window |
| 192 | + wd->age = a->iam_after[n].age; |
| 193 | + |
| 194 | + // grab age as average of window total |
| 195 | + uint64_t age = w_sum.age / a->window_size; |
| 196 | + |
| 197 | + // if > N free chunks and not dirty, make decision. |
| 198 | + if (a->sam_after[n].free_chunks > a->sam_after[n].chunks_per_page * MIN_PAGES_FOR_RECLAIM) { |
| 199 | + if (small_slab && w_sum.dirty == 0) { |
| 200 | + *src = n; |
| 201 | + *dst = 0; |
| 202 | + break; |
| 203 | + } else if (!small_slab && w_sum.excess_free >= a->window_size / 2 |
| 204 | + && a->sam_after[n].total_pages > MIN_PAGES_FOR_SOURCE) { |
| 205 | + // If large slab and free chunks haven't decreased for a full |
| 206 | + // window, reclaim pages. |
| 207 | + *src = n; |
| 208 | + *dst = 0; |
| 209 | + } |
| 210 | + } |
| 211 | + |
| 212 | + // if oldest and have enough pages, is oldest |
| 213 | + if (!small_slab |
| 214 | + && age > oldest_age |
| 215 | + && a->sam_after[n].total_pages > MIN_PAGES_FOR_SOURCE) { |
| 216 | + oldest = n; |
| 217 | + oldest_age = age; |
| 218 | + } |
| 219 | + |
| 220 | + // don't count as youngest if it hasn't been using new chunks. |
| 221 | + if (!small_slab && age < youngest_age && a->sam_after[n].total_pages != 0 |
| 222 | + && w_sum.excess_free < a->window_size) { |
| 223 | + youngest = n; |
| 224 | + youngest_age = age; |
| 225 | + } |
| 226 | + } |
| 227 | + |
| 228 | + memcpy(a->iam_before, a->iam_after, |
| 229 | + sizeof(item_stats_automove) * MAX_NUMBER_OF_SLAB_CLASSES); |
| 230 | + memcpy(a->sam_before, a->sam_after, |
| 231 | + sizeof(slab_stats_automove) * MAX_NUMBER_OF_SLAB_CLASSES); |
| 232 | + // only make decisions if window has filled once. |
| 233 | + if (a->window_cur < a->window_size) |
| 234 | + return; |
| 235 | + |
| 236 | + if (wg_sum.pool_high && youngest != -1) { |
| 237 | + /**src = 0; |
| 238 | + *dst = youngest;*/ |
| 239 | + /* TODO: No current way to directly assign page from 0 to elsewhere. |
| 240 | + * Do a current hack by setting the youngest's free mem limiter to |
| 241 | + * zero and re-running memcheck in the next second. |
| 242 | + * If set rates are very high and the pool is too low, this can bottom |
| 243 | + * out... |
| 244 | + */ |
| 245 | + a->last_memcheck_run = 0; |
| 246 | + a->settings->ext_free_memchunks[youngest] = 0; |
| 247 | + } else if (wg_sum.pool_low && oldest != -1) { |
| 248 | + *src = oldest; |
| 249 | + *dst = 0; |
| 250 | + } else if (youngest != -1 && oldest != -1 && youngest != oldest) { |
| 251 | + // if we have a youngest and oldest, and oldest is outside the ratio. |
| 252 | + if (a->sam_after[youngest].free_chunks <= a->free_mem[youngest] |
| 253 | + && youngest_age < ((double)oldest_age * a->max_age_ratio)) { |
| 254 | + *src = oldest; |
| 255 | + *dst = youngest; |
| 256 | + } |
| 257 | + } |
| 258 | + return; |
| 259 | +} |
0 commit comments