Skip to content

Commit 48b07bd

Browse files
committed
extstore: C version of automove algorithm
couple TODO items left for a new issue I thought of. Also hardcoded memory buffer size which should be fixed. also need to change the "free and re-init" logic to use a boolean in case any related option changes.
1 parent bb1080b commit 48b07bd

14 files changed

+327
-14
lines changed

Makefile.am

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ endif
4949

5050
if ENABLE_EXTSTORE
5151
memcached_SOURCES += extstore.c extstore.h \
52-
storage.c storage.h
52+
storage.c storage.h \
53+
slab_automove_extstore.c slab_automove_extstore.h
5354
endif
5455

5556
memcached_debug_SOURCES = $(memcached_SOURCES)

items.c

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "slab_automove.h"
55
#ifdef EXTSTORE
66
#include "storage.h"
7+
#include "slab_automove_extstore.h"
78
#endif
89
#include <sys/stat.h>
910
#include <sys/socket.h>
@@ -1488,14 +1489,29 @@ static void lru_maintainer_crawler_check(struct crawler_expired_data *cdata, log
14881489
}
14891490
}
14901491

1492+
slab_automove_reg_t slab_automove_default = {
1493+
.init = slab_automove_init,
1494+
.free = slab_automove_free,
1495+
.run = slab_automove_run
1496+
};
1497+
#ifdef EXTSTORE
1498+
slab_automove_reg_t slab_automove_extstore = {
1499+
.init = slab_automove_extstore_init,
1500+
.free = slab_automove_extstore_free,
1501+
.run = slab_automove_extstore_run
1502+
};
1503+
#endif
14911504
static pthread_t lru_maintainer_tid;
14921505

14931506
#define MAX_LRU_MAINTAINER_SLEEP 1000000
14941507
#define MIN_LRU_MAINTAINER_SLEEP 1000
14951508

14961509
static void *lru_maintainer_thread(void *arg) {
1510+
slab_automove_reg_t *sam = &slab_automove_default;
14971511
#ifdef EXTSTORE
14981512
void *storage = arg;
1513+
if (storage != NULL)
1514+
sam = &slab_automove_extstore;
14991515
int x;
15001516
#endif
15011517
int i;
@@ -1520,8 +1536,7 @@ static void *lru_maintainer_thread(void *arg) {
15201536
}
15211537

15221538
double last_ratio = settings.slab_automove_ratio;
1523-
void *am = slab_automove_init(settings.slab_automove_window,
1524-
settings.slab_automove_ratio);
1539+
void *am = sam->init(&settings);
15251540

15261541
pthread_mutex_lock(&lru_maintainer_lock);
15271542
if (settings.verbose > 2)
@@ -1597,13 +1612,12 @@ static void *lru_maintainer_thread(void *arg) {
15971612

15981613
if (settings.slab_automove == 1 && last_automove_check != current_time) {
15991614
if (last_ratio != settings.slab_automove_ratio) {
1600-
slab_automove_free(am);
1601-
am = slab_automove_init(settings.slab_automove_window,
1602-
settings.slab_automove_ratio);
1615+
sam->free(am);
1616+
am = sam->init(&settings);
16031617
last_ratio = settings.slab_automove_ratio;
16041618
}
16051619
int src, dst;
1606-
slab_automove_run(am, &src, &dst);
1620+
sam->run(am, &src, &dst);
16071621
if (src != -1 && dst != -1) {
16081622
slabs_reassign(src, dst);
16091623
LOGGER_LOG(l, LOG_SYSEVENTS, LOGGER_SLAB_MOVE, NULL,
@@ -1619,7 +1633,7 @@ static void *lru_maintainer_thread(void *arg) {
16191633
}
16201634
}
16211635
pthread_mutex_unlock(&lru_maintainer_lock);
1622-
slab_automove_free(am);
1636+
sam->free(am);
16231637
// LRU crawler *must* be stopped.
16241638
free(cdata);
16251639
if (settings.verbose > 2)

memcached.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6634,6 +6634,7 @@ int main (int argc, char **argv) {
66346634
settings.ext_drop_unread = false;
66356635
settings.ext_wbuf_size = 1024 * 1024 * 4;
66366636
settings.ext_compact_under = 0;
6637+
settings.slab_automove_freeratio = 0.005;
66376638
ext_cf.page_size = 1024 * 1024 * 64;
66386639
ext_cf.page_count = 64;
66396640
ext_cf.wbuf_size = settings.ext_wbuf_size;

memcached.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,7 @@ struct settings {
413413
unsigned int ext_wbuf_size; /* read only note for the engine */
414414
unsigned int ext_compact_under; /* when fewer than this many pages, compact */
415415
double ext_max_frag; /* ideal maximum page fragmentation */
416+
double slab_automove_freeratio; /* % of memory to hold free as buffer */
416417
bool ext_drop_unread; /* skip unread items during compaction */
417418
/* per-slab-class free chunk limit */
418419
unsigned int ext_free_memchunks[MAX_NUMBER_OF_SLAB_CLASSES];

slab_automove.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ typedef struct {
3030
slab_stats_automove sam_after[MAX_NUMBER_OF_SLAB_CLASSES];
3131
} slab_automove;
3232

33-
void *slab_automove_init(uint32_t window_size, double max_age_ratio) {
33+
void *slab_automove_init(struct settings *settings) {
34+
uint32_t window_size = settings->slab_automove_window;
35+
double max_age_ratio = settings->slab_automove_ratio;
3436
slab_automove *a = calloc(1, sizeof(slab_automove));
3537
if (a == NULL)
3638
return NULL;

slab_automove.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,19 @@
11
#ifndef SLAB_AUTOMOVE_H
22
#define SLAB_AUTOMOVE_H
33

4-
void *slab_automove_init(uint32_t window_size, double max_age_ratio);
4+
/* default automove functions */
5+
void *slab_automove_init(struct settings *settings);
56
void slab_automove_free(void *arg);
67
void slab_automove_run(void *arg, int *src, int *dst);
78

9+
typedef void *(*slab_automove_init_func)(struct settings *settings);
10+
typedef void (*slab_automove_free_func)(void *arg);
11+
typedef void (*slab_automove_run_func)(void *arg, int *src, int *dst);
12+
13+
typedef struct {
14+
slab_automove_init_func init;
15+
slab_automove_free_func free;
16+
slab_automove_run_func run;
17+
} slab_automove_reg_t;
18+
819
#endif

slab_automove_extstore.c

Lines changed: 259 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,259 @@
1+
/* Copyright 2017 Facebook.
2+
*
3+
* Use and distribution licensed under the BSD license. See
4+
* the LICENSE file for full text.
5+
*/
6+
7+
/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
8+
#include "memcached.h"
9+
#include "slab_automove_extstore.h"
10+
#include <stdlib.h>
11+
#include <string.h>
12+
13+
#define MIN_PAGES_FOR_SOURCE 2
14+
#define MIN_PAGES_FOR_RECLAIM 2.5
15+
#define MIN_PAGES_FREE 1.5
16+
#define MEMCHECK_PERIOD 60
17+
18+
struct window_data {
19+
uint64_t age;
20+
uint64_t dirty;
21+
uint64_t evicted;
22+
unsigned int excess_free;
23+
};
24+
25+
struct window_global {
26+
uint32_t pool_low;
27+
uint32_t pool_high;
28+
};
29+
30+
typedef struct {
31+
struct window_data *window_data;
32+
struct window_global *window_global;
33+
struct settings *settings;
34+
uint32_t window_size;
35+
uint32_t window_cur;
36+
uint32_t item_size;
37+
rel_time_t last_memcheck_run;
38+
double max_age_ratio;
39+
double free_ratio;
40+
unsigned int free_mem[MAX_NUMBER_OF_SLAB_CLASSES];
41+
item_stats_automove iam_before[MAX_NUMBER_OF_SLAB_CLASSES];
42+
item_stats_automove iam_after[MAX_NUMBER_OF_SLAB_CLASSES];
43+
slab_stats_automove sam_before[MAX_NUMBER_OF_SLAB_CLASSES];
44+
slab_stats_automove sam_after[MAX_NUMBER_OF_SLAB_CLASSES];
45+
} slab_automove;
46+
47+
void *slab_automove_extstore_init(struct settings *settings) {
48+
uint32_t window_size = settings->slab_automove_window;
49+
double max_age_ratio = settings->slab_automove_ratio;
50+
slab_automove *a = calloc(1, sizeof(slab_automove));
51+
if (a == NULL)
52+
return NULL;
53+
a->window_data = calloc(window_size * MAX_NUMBER_OF_SLAB_CLASSES, sizeof(struct window_data));
54+
a->window_global = calloc(window_size, sizeof(struct window_global));
55+
a->window_size = window_size;
56+
a->max_age_ratio = max_age_ratio;
57+
a->free_ratio = settings->slab_automove_freeratio;
58+
a->item_size = settings->ext_item_size;
59+
a->last_memcheck_run = 0;
60+
a->settings = settings;
61+
if (a->window_data == NULL || a->window_global == NULL) {
62+
if (a->window_data)
63+
free(a->window_data);
64+
if (a->window_global)
65+
free(a->window_global);
66+
free(a);
67+
return NULL;
68+
}
69+
70+
// do a dry run to fill the before structs
71+
fill_item_stats_automove(a->iam_before);
72+
fill_slab_stats_automove(a->sam_before);
73+
74+
return (void *)a;
75+
}
76+
77+
void slab_automove_extstore_free(void *arg) {
78+
slab_automove *a = (slab_automove *)arg;
79+
free(a->window_data);
80+
free(a);
81+
}
82+
83+
static void window_sum(struct window_data *wd, struct window_data *w,
84+
uint32_t size) {
85+
for (int x = 0; x < size; x++) {
86+
struct window_data *d = &wd[x];
87+
w->age += d->age;
88+
w->dirty += d->dirty;
89+
w->evicted += d->evicted;
90+
w->excess_free += d->excess_free;
91+
}
92+
}
93+
94+
/* This could potentially merge with above */
95+
static void window_global_sum(struct window_global *wg,
96+
struct window_global *w, uint32_t size) {
97+
for (int x = 0; x < size; x++) {
98+
struct window_global *d = &wg[x];
99+
w->pool_high += d->pool_high;
100+
w->pool_low += d->pool_low;
101+
}
102+
}
103+
104+
static void global_pool_check(slab_automove *a) {
105+
bool mem_limit_reached;
106+
uint32_t free = a->free_mem[0];
107+
struct window_global *wg = &a->window_global[a->window_cur % a->window_size];
108+
unsigned int count = global_page_pool_size(&mem_limit_reached);
109+
memset(wg, 0, sizeof(struct window_global));
110+
if (!mem_limit_reached)
111+
return;
112+
if (count < free / 2) {
113+
wg->pool_low = 1;
114+
} else if (count > free) {
115+
wg->pool_high = 1;
116+
}
117+
}
118+
119+
static void memcheck(slab_automove *a) {
120+
unsigned int total_pages = 0;
121+
if (current_time < a->last_memcheck_run + MEMCHECK_PERIOD)
122+
return;
123+
a->last_memcheck_run = current_time;
124+
for (int n = 1; n < MAX_NUMBER_OF_SLAB_CLASSES; n++) {
125+
slab_stats_automove *sam = &a->sam_after[n];
126+
total_pages += sam->total_pages;
127+
unsigned int hold_free = (sam->total_pages * sam->chunks_per_page)
128+
* a->free_ratio;
129+
if (sam->chunks_per_page * MIN_PAGES_FREE > hold_free)
130+
hold_free = sam->chunks_per_page * MIN_PAGES_FREE;
131+
a->free_mem[n] = hold_free;
132+
if (a->settings->ext_free_memchunks[n] != hold_free) {
133+
a->settings->ext_free_memchunks[n] = hold_free;
134+
}
135+
}
136+
a->free_mem[0] = total_pages * a->free_ratio;
137+
}
138+
139+
void slab_automove_extstore_run(void *arg, int *src, int *dst) {
140+
slab_automove *a = (slab_automove *)arg;
141+
int n;
142+
struct window_data w_sum;
143+
int oldest = -1;
144+
uint64_t oldest_age = 0;
145+
int youngest = -1;
146+
uint64_t youngest_age = ~0;
147+
*src = -1;
148+
*dst = -1;
149+
150+
global_pool_check(a);
151+
struct window_global wg_sum;
152+
memset(&wg_sum, 0, sizeof(struct window_global));
153+
window_global_sum(a->window_global, &wg_sum, a->window_size);
154+
// fill after structs
155+
fill_item_stats_automove(a->iam_after);
156+
fill_slab_stats_automove(a->sam_after);
157+
a->window_cur++;
158+
159+
memcheck(a);
160+
161+
// iterate slabs
162+
for (n = POWER_SMALLEST; n < MAX_NUMBER_OF_SLAB_CLASSES; n++) {
163+
bool small_slab = a->sam_before[n].chunk_size < a->item_size
164+
? true : false;
165+
int w_offset = n * a->window_size;
166+
struct window_data *wd = &a->window_data[w_offset + (a->window_cur % a->window_size)];
167+
// summarize the window-up-to-now.
168+
memset(&w_sum, 0, sizeof(struct window_data));
169+
window_sum(&a->window_data[w_offset], &w_sum, a->window_size);
170+
memset(wd, 0, sizeof(struct window_data));
171+
172+
// if page delta, or evicted delta, mark window dirty
173+
// (or outofmemory)
174+
if (a->iam_after[n].evicted - a->iam_before[n].evicted > 0 ||
175+
a->iam_after[n].outofmemory - a->iam_before[n].outofmemory > 0) {
176+
wd->evicted = 1;
177+
wd->dirty = 1;
178+
}
179+
if (a->sam_after[n].total_pages - a->sam_before[n].total_pages > 0) {
180+
wd->dirty = 1;
181+
}
182+
// Mark excess free if we're over the free mem limit and the number of
183+
// chunks aren't decreasing at all.
184+
if (a->sam_after[n].free_chunks > a->free_mem[n]
185+
&& a->sam_after[n].free_chunks - a->sam_before[n].free_chunks >= 0) {
186+
if (a->free_mem[n] > 0) {
187+
wd->excess_free = 1;
188+
}
189+
}
190+
191+
// set age into window
192+
wd->age = a->iam_after[n].age;
193+
194+
// grab age as average of window total
195+
uint64_t age = w_sum.age / a->window_size;
196+
197+
// if > N free chunks and not dirty, make decision.
198+
if (a->sam_after[n].free_chunks > a->sam_after[n].chunks_per_page * MIN_PAGES_FOR_RECLAIM) {
199+
if (small_slab && w_sum.dirty == 0) {
200+
*src = n;
201+
*dst = 0;
202+
break;
203+
} else if (!small_slab && w_sum.excess_free >= a->window_size / 2
204+
&& a->sam_after[n].total_pages > MIN_PAGES_FOR_SOURCE) {
205+
// If large slab and free chunks haven't decreased for a full
206+
// window, reclaim pages.
207+
*src = n;
208+
*dst = 0;
209+
}
210+
}
211+
212+
// if oldest and have enough pages, is oldest
213+
if (!small_slab
214+
&& age > oldest_age
215+
&& a->sam_after[n].total_pages > MIN_PAGES_FOR_SOURCE) {
216+
oldest = n;
217+
oldest_age = age;
218+
}
219+
220+
// don't count as youngest if it hasn't been using new chunks.
221+
if (!small_slab && age < youngest_age && a->sam_after[n].total_pages != 0
222+
&& w_sum.excess_free < a->window_size) {
223+
youngest = n;
224+
youngest_age = age;
225+
}
226+
}
227+
228+
memcpy(a->iam_before, a->iam_after,
229+
sizeof(item_stats_automove) * MAX_NUMBER_OF_SLAB_CLASSES);
230+
memcpy(a->sam_before, a->sam_after,
231+
sizeof(slab_stats_automove) * MAX_NUMBER_OF_SLAB_CLASSES);
232+
// only make decisions if window has filled once.
233+
if (a->window_cur < a->window_size)
234+
return;
235+
236+
if (wg_sum.pool_high && youngest != -1) {
237+
/**src = 0;
238+
*dst = youngest;*/
239+
/* TODO: No current way to directly assign page from 0 to elsewhere.
240+
* Do a current hack by setting the youngest's free mem limiter to
241+
* zero and re-running memcheck in the next second.
242+
* If set rates are very high and the pool is too low, this can bottom
243+
* out...
244+
*/
245+
a->last_memcheck_run = 0;
246+
a->settings->ext_free_memchunks[youngest] = 0;
247+
} else if (wg_sum.pool_low && oldest != -1) {
248+
*src = oldest;
249+
*dst = 0;
250+
} else if (youngest != -1 && oldest != -1 && youngest != oldest) {
251+
// if we have a youngest and oldest, and oldest is outside the ratio.
252+
if (a->sam_after[youngest].free_chunks <= a->free_mem[youngest]
253+
&& youngest_age < ((double)oldest_age * a->max_age_ratio)) {
254+
*src = oldest;
255+
*dst = youngest;
256+
}
257+
}
258+
return;
259+
}

0 commit comments

Comments
 (0)