From 3e4c4474ed614cf91352eb606df3aa5e659e26f6 Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Wed, 8 Feb 2017 14:42:23 +0000 Subject: [PATCH 01/62] dasm_x86.lua: Add support for RDTSCP instruction This is a very useful instruction for self-benchmarking programs that want to read the CPU timestamp counter efficiently. See Intel whitepaper for details: http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/ia-32-ia-64-benchmark-code-execution-paper.pdf --- src/dasm_x86.lua | 1 + 1 file changed, 1 insertion(+) diff --git a/src/dasm_x86.lua b/src/dasm_x86.lua index 0c11f020ec..ef4442c89b 100644 --- a/src/dasm_x86.lua +++ b/src/dasm_x86.lua @@ -1230,6 +1230,7 @@ local map_op = { shrd_3 = "mriqdw:0FACRmU|mrC/qq:0FADRm|mrC/dd:|mrC/ww:", rdtsc_0 = "0F31", -- P1+ + rdtscp_0 = "0F01F9",-- P6+ rdpmc_0 = "0F33", -- P6+ cpuid_0 = "0FA2", -- P1+ From 95872b1653c2e58a01389486585edd5662c2e112 Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Wed, 8 Feb 2017 14:45:10 +0000 Subject: [PATCH 02/62] core.timeline: New module for "timeline" logs --- src/core/main.lua | 3 +- src/core/timeline.dasl | 310 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 312 insertions(+), 1 deletion(-) create mode 100644 src/core/timeline.dasl diff --git a/src/core/main.lua b/src/core/main.lua index 611f167152..b1e46060c1 100644 --- a/src/core/main.lua +++ b/src/core/main.lua @@ -28,7 +28,7 @@ end -- Reserve names that we want to use for global module. -- (This way we avoid errors from the 'strict' module.) -_G.config, _G.engine, _G.memory, _G.link, _G.packet, _G.timer, +_G.config, _G.engine, _G.memory, _G.link, _G.packet, _G.timer, _G.timeline, _G.main = nil ffi.cdef[[ @@ -129,6 +129,7 @@ function initialize () _G.link = require("core.link") _G.packet = require("core.packet") _G.timer = require("core.timer") + _G.timeline = require("core.timeline") _G.main = getfenv() end diff --git a/src/core/timeline.dasl b/src/core/timeline.dasl new file mode 100644 index 0000000000..b3a29fdbba --- /dev/null +++ b/src/core/timeline.dasl @@ -0,0 +1,310 @@ +-- timeline: high-resolution event log using in-memory ring buffer +-- Use of this source code is governed by the Apache 2.0 license; see COPYING. + +module(..., package.seeall) + +local dasm = require("dasm") +local ffi = require("ffi") +local C = ffi.C +local S = require("syscall") +local shm = require("core.shm") +local lib = require("core.lib") + +-- Load a set of events for logging onto a timeline. +-- Returns a set of logging functions. +-- +-- For example: +-- e = load_events(engine.timeline, "core.app", {name="myapp", class="intel10g"}) +-- Loads the events defined in src/core/app.events and tags each event +-- with the name of the app and class. Events can then be logged: +-- e:app_pulled(inpackets, inbytes, outpackets, outbytes) +function load_events (tl, eventmodule, extra) + local category = eventmodule:match("[^.]+$") -- "core.engine" -> "engine" + -- Convert extra into " key1=value1 key2=value2 ..." attributes string. + local spec = require(eventmodule.."_events") + return load_events_from_string(tl, spec, category, extra) +end + +-- (Helper function) +function load_events_from_string (tl, spec, category, extra) + local events = {} + -- Insert a delimiter character (\a "alarm") between log messages. + spec = spec:gsub("\n(%d|)", "\n\a%1") + for message in spec:gmatch("[^\a]+") do + message = message:gsub("(.-)%s*$", "%1") -- trim trailing spaces + local event = message:match("([%w_]+):") + events[event] = mkevent(tl, category, message, extra) + end + -- Return the set of functions in an efficient-to-call FFI object. + local mt = {__index = events} + return ffi.new(ffi.metatype(ffi.typeof("struct{}"), mt)) +end + +------------------------------------------------------------ +-- Binary data structures + +ffi.cdef[[ + // 64B file header + struct timeline_header { + uint64_t magic; + uint16_t major_version; + uint16_t minor_version; + uint32_t log_bytes; + uint32_t strings_bytes; + uint16_t priority_mask; + uint8_t reserved[42]; + }; + + // 64B log entry + struct timeline_entry { + uint64_t tsc; // CPU timestamp (note: assumed to be first elem below) + uint16_t msgid; // msgid*16 is index into string table + uint16_t core_numa; // TSC_AUX: core (bits 0-7) + numa (12-15) + uint32_t reserved; // (available for future use) + uint64_t arg0, arg1, arg2, arg3, arg4, arg5; // message arguments + }; + + // Private local state for updating the log + struct timeline_state { + // state for the entries ring buffer + struct timeline_entry *entries; + uint32_t level; + uint32_t next_entry; + uint32_t num_entries; + // state for the string table + char *stringtable; + int stringtable_size; + int next_string; + }; +]] + +-- Header of the log file +local magic = 0xa3ff7223441d0001ULL +local major, minor = 2, 1 + +------------------------------------------------------------ +-- API + +-- Create a new timeline under the given shared memory path. +function new (shmpath, num_entries, size_stringtable) + num_entries = num_entries or 1e6 + size_stringtable = size_stringtable or 1e6 + -- Calculate size based on number of log entries + local size_header = ffi.sizeof("struct timeline_header") + local size_entries = num_entries * ffi.sizeof("struct timeline_entry") + local size = size_header + size_entries + size_stringtable + -- Allocate one shm object with memory for all data structures + local memory = shm.create(shmpath, ffi.typeof("char["..size.."]")) + local header = ffi.cast("struct timeline_header *", memory) + local ring = ffi.cast("struct timeline_entry *", memory + size_header) + local stringtable = ffi.cast("char*", memory + size_header + size_entries) + -- Fill in header values + header.magic = 0xa3ff7223441d0001ULL + header.major_version = 2 + header.minor_version = 0 + header.log_bytes = size_entries + header.strings_bytes = size_stringtable + -- Private state + local state = ffi.new("struct timeline_state") + state.entries = ring + state.level = 0 + state.next_entry = 0 + state.num_entries = num_entries + state.stringtable = stringtable + state.stringtable_size = size_stringtable + state.next_string = 0 + -- Return an object + return state +end + +function mkevent (timeline, category, message, attrs) + if not message:match("^%d|([^:]+):") then + error(("event syntax error: %q"):format(message)) + end + -- Extract the log level for the message + local level = tonumber(message:match("^(%d)|")) + -- Insert the category ("0|event:" -> "0|category.event:") + message = message:gsub("^(%d|)", "%1"..category..".") + -- Insert the additional attributes. + -- e.g. "1|foo: arg" with {a1="x",a2="y"} becomes "1|foo a1=x a2=y: arg" + for k,v in pairs(attrs or {}) do + message = message:gsub(":", (" %s=%s:"):format(k, v), 1) + end + -- Count the number of arguments. + -- (See http://stackoverflow.com/a/11158158/1523491) + local _, n = (message:match(":([^\n]*)")):gsub("[^%s]+","") + local id = intern(timeline, message) + local event = event -- move asm function into local scope + local log = timeline + if n==0 then return function () event(log,level,id,0,0,0,0,0,0) end end + if n==1 then return function (a) event(log,level,id,a,0,0,0,0,0) end end + if n==2 then return function (a,b) event(log,level,id,a,b,0,0,0,0) end end + if n==3 then return function (a,b,c) event(log,level,id,a,b,c,0,0,0) end end + if n==4 then return function (a,b,c,d) event(log,level,id,a,b,c,d,0,0) end end + if n==5 then return function (a,b,c,d,e) event(log,level,id,a,b,c,d,e,0) end end + if n==6 then return function (a,b,c,d,e,f) event(log,level,id,a,b,c,d,e,f) end end + error("illegal number of arguments: "..n) +end + +-- Get or set the current timeline log level. +function level (timeline, level) + if level then timeline.level = level end + return timeline.level +end + +------------------------------------------------------------ +-- Defining log message formats + +-- Intern a string in the timeline stringtable. +-- Return a unique ID (16-bit offset in 16-byte words) or 0xFFFF if +-- the table is full. + +-- Cache known strings in a weak table keyed on timeline object. +-- (Timeline object is an FFI struct that can't contain a Lua tables.) +local known = setmetatable({}, {__mode='k'}) + +function intern (timeline, str) + known[timeline] = known[timeline] or {} + if known[timeline][str] then + return known[timeline][str] + end + local len = #str+1 -- count null terminator + if timeline.next_string + len >= timeline.stringtable_size then + return 0xFFFF -- overflow + else + local position = timeline.next_string + ffi.copy(timeline.stringtable + position, str) + timeline.next_string = lib.align(position + len, 16) + local id = position/16 + assert(id == math.floor(id), "timeline string alignment error") + known[timeline][str] = id + return id + end +end + +------------------------------------------------------------ +-- Logging messages + +|.arch x64 +|.actionlist actions +|.globalnames globalnames + + +-- Registers holding function parameters for x86-64 calling convention. +|.define p0, rdi +|.define p1, rsi +|.define p2, rdx +|.define p3, rcx +|.define p4, r8 +|.define p5, r9 + +|.type log, struct timeline_state +|.type msg, struct timeline_entry +-- void log(timeline, level, msg, arg0, ..., arg5) +local function asmlog (Dst) + |->log: + -- Check that the enabled log level is >= the event log level + | mov eax, log:p0->level + | cmp p1, rax + | jge >1 + | ret + |1: + -- Load index to write into r11 + | mov r11d, log:p0->next_entry + -- Increment next index and check for wrap-around + | mov eax, r11d + | add eax, 1 + | xor ecx, ecx + | cmp eax, log:p0->num_entries + | cmove eax, ecx + | mov log:p0->next_entry, eax + -- Convert log entry number to pointer + | shl r11, 6 -- 64B element number -> byte index + | mov r10, log:p0->entries + | add r10, r11 + -- Log the arguments from register parameters + | mov msg:r10->msgid, dx -- 16-bit value from p2 + | mov msg:r10->arg0, p3 + | mov msg:r10->arg1, p4 + | mov msg:r10->arg2, p5 + -- Log the arguments from stack parameters + | mov rax, [rsp+16] + | mov msg:r10->arg3, rax + | mov rax, [rsp+24] + | mov msg:r10->arg4, rax + | mov rax, [rsp+32] + | mov msg:r10->arg5, rax + -- Log the timestamp and core/numa aux info + | rdtscp + | mov msg:r10->tsc, eax + | mov [r10+4], edx -- assumes tsc is first field of struct timeline_entry + | mov msg:r10->core_numa, cx + + | ret +end + +local Dst, globals = dasm.new(actions, nil, nil, 1 + #globalnames) +asmlog(Dst) +local mcode, size = Dst:build() +local entry = dasm.globals(globals, globalnames) + +event = ffi.cast("uint64_t(*)(struct timeline_state *, int, int, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t)", entry.log) + +_anchor = mcode + +--dasm.dump(mcode, size) + +local test_events = [[ +6|six: +event with level 6 (0 args) + +5|five: a b c +event with level 5 (3 args) + +4|four: a b c d e f +event with level 4 (6 args) + +3|three: +event with level 3 +]] + +-- selftest is designed mostly to check that timeline logging does not +-- crash the snabb process e.g. including overflow of the log entries +-- and the string table. it does not verify the contents of the log +-- messages. +function selftest () + print("selftest: timeline") + local tl = new("selftest/timeline") + local e = load_events_from_string(tl, test_events, "selftest", + {module="timeline", func="selftest"}) + level(tl, 4) -- won't log event three + + print("check logging individual messages") + -- First check that log entries are created + assert(tl.next_entry == 0) + e.six() assert(tl.next_entry == 1) + e.five(1, 2, 3) assert(tl.next_entry == 2) + e.four(1, 2, 3, 4, 5, 6) assert(tl.next_entry == 3) + e.three() assert(tl.next_entry == 3) -- skipped + + local n = tl.num_entries*10 + print("check wrap-around on "..lib.comma_value(n).." events") + for i = 1, n do + e.six() + e.five(1, 2, 3) + e.four(1, 2, 3, 4, 5, 6) + e.three() + end + -- overflow the string table + print("overflowing string table") + for i = 1, 1e5 do + mkevent(tl, "selftest", "9|dummy_event_definition:", {i=i}) + end + -- report median logging time + local sample = {} + for i = 1, 1000 do sample[i] = tl.entries[i].tsc - tl.entries[i-1].tsc end + table.sort(sample) + print("median time delta for sample:", tonumber(sample[1]).." cycles") + print("selftest: ok") +end + From 930f294177615fc0a7a012c66bcbd409d17a96fa Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Wed, 8 Feb 2017 14:46:15 +0000 Subject: [PATCH 03/62] Makefile: Embed "*.events" timeline specs --- src/Makefile | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index 467b88e9eb..e89c3f15f0 100644 --- a/src/Makefile +++ b/src/Makefile @@ -22,6 +22,7 @@ RMSRC = $(shell find . -name '*.md' -not -regex './obj.*' -printf '%P ') PROGRAM = $(shell find program -regex '^[^/]+/[^/]+' -type d -printf '%P ') # sort to eliminate potential duplicate of programs.inc INCSRC = $(sort $(shell find . -regex '[^\#]*\.inc' -printf '%P ') programs.inc) +EVTSRC = $(sort $(shell find . -regex '[^\#]*\.events' -printf '%P ')) LUAOBJ := $(patsubst %.lua,obj/%_lua.o,$(LUASRC)) PFLUAOBJ := $(patsubst %.lua,obj/%_lua.o,$(PFLUASRC)) @@ -33,6 +34,7 @@ JITOBJS:= $(patsubst %,obj/jit_%.o,$(JITSRC)) EXTRAOBJS := obj/jit_tprof.o obj/jit_vmprof.o obj/strict.o RMOBJS := $(patsubst %,obj/%,$(RMSRC)) INCOBJ := $(patsubst %.inc,obj/%_inc.o, $(INCSRC)) +EVTOBJ := $(patsubst %.events,obj/%_events.o, $(EVTSRC)) EXE := bin/snabb $(patsubst %,bin/%,$(PROGRAM)) # TESTMODS expands to: @@ -49,7 +51,7 @@ TESTSCRIPTS = $(shell find . -name "selftest.sh" -executable | xargs) PATH := ../lib/luajit/usr/local/bin:$(PATH) -snabb: $(LUAOBJ) $(PFLUAOBJ) $(HOBJ) $(COBJ) $(ARCHOBJ) $(ASMOBJ) $(INCOBJ) $(LUAJIT_A) +snabb: $(LUAOBJ) $(PFLUAOBJ) $(HOBJ) $(COBJ) $(ARCHOBJ) $(ASMOBJ) $(INCOBJ) $(EVTOBJ) $(LUAJIT_A) $(E) "LINK $@" $(Q) $(CC) $(DEBUG) -Wl,--no-as-needed -Wl,-E -Werror -Wall -o $@ $^ \ ../lib/luajit/src/libluajit.a \ @@ -162,6 +164,13 @@ $(INCOBJ): obj/%_inc.o: %.inc Makefile | $(OBJDIR) echo "]=============]") > $(basename $@).luainc $(Q) luajit -bg -n $(subst /,.,$*)_inc $(basename $@).luainc $@ +$(EVTOBJ): obj/%_events.o: %.events Makefile | $(OBJDIR) + $(E) "EVENTS $@" + @(echo -n "return [=============["; \ + cat $<; \ + echo "]=============]") > $(basename $@).luainc + $(Q) luajit -bg -n $(subst /,.,$*)_events $(basename $@).luainc $@ + # Create list of programs that exist programs.inc: program @(for d in program/*/; do basename $$d; done) > $@ From aa9980a5b58383b9ff2e63fe848d9f95612c7bae Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Wed, 8 Feb 2017 14:47:14 +0000 Subject: [PATCH 04/62] engine: Add engine.events, app.events, link.events --- src/core/app.events | 11 ++++++++ src/core/engine.events | 60 ++++++++++++++++++++++++++++++++++++++++++ src/core/link.events | 10 +++++++ 3 files changed, 81 insertions(+) create mode 100644 src/core/app.events create mode 100644 src/core/engine.events create mode 100644 src/core/link.events diff --git a/src/core/app.events b/src/core/app.events new file mode 100644 index 0000000000..1d77fbe828 --- /dev/null +++ b/src/core/app.events @@ -0,0 +1,11 @@ +3|pull: inpackets inbytes outpackets outbytes droppackets dropbytes +Entering app pull() callback. + +3|pulled: inpackets inbytes outpackets outbytes droppackets dropbytes +Returned from app pull() callback. + +3|push: inpackets inbytes outpackets outbytes droppackets dropbytes +Entering app push() callback. + +3|pushed: inpackets inbytes outpackets outbytes droppackets dropbytes +Returned from app push() callback. diff --git a/src/core/engine.events b/src/core/engine.events new file mode 100644 index 0000000000..4c974a79a9 --- /dev/null +++ b/src/core/engine.events @@ -0,0 +1,60 @@ +4|sleep_Hz: usec Hz +The engine requests that the kernel suspend this process for a period of +microseconds in order to reduce CPU utilization and achieve a fixed +frequency of breaths per second (Hz). + +4|sleep_on_idle: usec +The engine requests that the kernel suspend this process for a period +of microseconds in order to reduce CPU utilization because idleness +has been detected (a breath in which no packets were processed.) + +4|wakeup_from_sleep: +The engine resumes operation after sleeping voluntarily. + + +6|engine_started: +The engine starts the traffic processing loop. + +6|engine_stopped: +The engine stops the traffic processing loop. + + +5|breath_start: breath totalpackets totalbytes totaletherbits +The engine starts an iteration of the packet-processing event loop (a +"breath".) + +The total count of packets, bytes, and bits (including layer-1 +ethernet overhead) that the engine has processed are included. These +can be used to track the rate of traffic. + +3|got_monotonic_time: unixnanos +The engine has completed initialization for the breath: synchronized +the current time and handled any pending error recovery. + +'unixnanos' is the current wall-clock time in nanoseconds since the epoc. +This can be used to synchronize the cycle timestamps with wall-clock time. + +4|breath_pulled: +The engine has "pulled" new packets into the event loop for processing. + +4|breath_pushed: +The engine has "pushed" packets one step through the processing network. + +5|breath_end: breath npackets bpp +The engine completes an iteration of the event loop (a "breath.") + +'packets' is the total number of packets processed during the breath. +'bpp' is the average number bytes per packet. + +Note: 'packets' is an internal measure of how many packets were +deallocated (freed) during processing. This does not necessarily +correspond directly to ingress or egress packets on a given interface. + + +4|polled_timers: + The engine polled its timers and executed any that were expired. + +4|commited_counters: + The engine commits the latest counter values to externally visible + shared memory. + diff --git a/src/core/link.events b/src/core/link.events new file mode 100644 index 0000000000..d6d097c483 --- /dev/null +++ b/src/core/link.events @@ -0,0 +1,10 @@ +2|packet_start: wirelength + Started capturing a packet from a link. + This event is followed by one or more app_pcap_data messages + containing (partial) payload and then app_pcap_end. +2|packet_data: a b c d e f + Payload excerpt from packet being captured. + Each argument is 8 bytes of sequential payload (48 bytes total.) +2|packet_end: wirelength + Finished capturing a packet from a link. The (partial) payload is + logged in the immediately preceeding packet_data events. From 8530942a5753cb13905c2a5c815982b60534a723 Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Wed, 8 Feb 2017 14:47:45 +0000 Subject: [PATCH 05/62] core.link: Add dropped bytes counter --- src/core/link.h | 2 +- src/core/link.lua | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/core/link.h b/src/core/link.h index add1d73bd1..255f6c4549 100644 --- a/src/core/link.h +++ b/src/core/link.h @@ -9,7 +9,7 @@ struct link { // http://en.wikipedia.org/wiki/Circular_buffer struct packet *packets[LINK_RING_SIZE]; struct { - struct counter *dtime, *txbytes, *rxbytes, *txpackets, *rxpackets, *txdrop; + struct counter *dtime, *txbytes, *rxbytes, *txpackets, *rxpackets, *txdrop, *txdropbytes; } stats; // Two cursors: // read: the next element to be read diff --git a/src/core/link.lua b/src/core/link.lua index 6fcc9fb58c..dd2dbdfe00 100644 --- a/src/core/link.lua +++ b/src/core/link.lua @@ -23,7 +23,7 @@ local size = C.LINK_RING_SIZE -- NB: Huge slow-down if this is not local max = C.LINK_MAX_PACKETS local provided_counters = { - "dtime", "rxpackets", "rxbytes", "txpackets", "txbytes", "txdrop" + "dtime", "rxpackets", "rxbytes", "txpackets", "txbytes", "txdrop", "txdropbytes" } function new (name) @@ -60,6 +60,7 @@ function transmit (r, p) -- assert(p) if full(r) then counter.add(r.stats.txdrop) + counter.add(r.stats.txdropbytes, p.length) packet.free(p) else r.packets[r.write] = p From 9b252f0e6f990c963dde1f275840153cb75110b3 Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Wed, 8 Feb 2017 14:49:00 +0000 Subject: [PATCH 06/62] engine: Instrument with timeline events --- src/core/app.lua | 108 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 98 insertions(+), 10 deletions(-) diff --git a/src/core/app.lua b/src/core/app.lua index 50f05d991a..d4fa8945e4 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -14,6 +14,8 @@ local zone = require("jit.zone") local jit = require("jit") local ffi = require("ffi") local C = ffi.C +local timeline_mod = require("core.timeline") -- avoid collision with timeline() + require("core.packet_h") -- Packet per pull @@ -29,6 +31,9 @@ test_skipped_code = 43 -- Indexed both by name (in a table) and by number (in an array). app_table, app_array = {}, {} link_table, link_array = {}, {} +-- Timeline events specific to app and link instances +app_events = setmetatable({}, { __mode = 'k' }) +link_events = setmetatable({}, { __mode = 'k' }) configuration = config.new() @@ -39,6 +44,17 @@ freebits = counter.create("engine/freebits.counter") -- Total packet bits free freebytes = counter.create("engine/freebytes.counter") -- Total packet bytes freed configs = counter.create("engine/configs.counter") -- Total configurations loaded +-- Timeline event log +local timeline_log, events -- initialized on demand + +function timeline () + if timeline_log == nil then + timeline_log = timeline_mod.new("engine/timeline") + events = timeline_mod.load_events(timeline_log, "core.engine") + end + return timeline_log +end + -- Breathing regluation to reduce CPU usage when idle by calling usleep(3). -- -- There are two modes available: @@ -185,6 +201,7 @@ function apply_config_actions (actions, conf) name, tostring(app))) end local zone = app.zone or getfenv(class.new)._NAME or name + app_events[app] = timeline_mod.load_events(timeline(), "core.app", {app=name}) app.appname = name app.output = {} app.input = {} @@ -229,6 +246,8 @@ function apply_config_actions (actions, conf) if not new_app_table[ta] then error("no such app: " .. ta) end -- Create or reuse a link and assign/update receiving app index local link = link_table[linkspec] or link.new(linkspec) + link_events[link] = + timeline_mod.load_events(timeline(), "core.link", {linkspec=linkspec}) link.receiving_app = app_name_to_index[ta] -- Add link to apps new_app_table[fa].output[fl] = link @@ -254,6 +273,8 @@ end -- Call this to "run snabb switch". function main (options) + timeline() -- ensure timeline is created and initialized + events.engine_started() options = options or {} local done = options.done local no_timers = options.no_timers @@ -271,11 +292,12 @@ function main (options) monotonic_now = C.get_monotonic_time() repeat breathe() - if not no_timers then timer.run() end + if not no_timers then timer.run() events.polled_timers() end if not busywait then pace_breathing() end until done and done() counter.commit() if not options.no_report then report(options.report) end + events.engine_stopped() end local nextbreath @@ -288,14 +310,18 @@ function pace_breathing () nextbreath = nextbreath or monotonic_now local sleep = tonumber(nextbreath - monotonic_now) if sleep > 1e-6 then + events.sleep_Hz(Hz, math.round(sleep*1e6)) C.usleep(sleep * 1e6) monotonic_now = C.get_monotonic_time() + events.wakeup_from_sleep() end nextbreath = math.max(nextbreath + 1/Hz, monotonic_now) else if lastfrees == counter.read(frees) then sleep = math.min(sleep + 1, maxsleep) + events.sleep_on_idle(sleep) C.usleep(sleep) + events.wakeup_from_sleep() else sleep = math.floor(sleep/2) end @@ -306,33 +332,62 @@ function pace_breathing () end function breathe () + local freed_packets0 = counter.read(frees) + local freed_bytes0 = counter.read(freebytes) + events.breath_start(counter.read(breaths), freed_packets0, freed_bytes0, + counter.read(freebits)) monotonic_now = C.get_monotonic_time() -- Restart: restart dead apps restart_dead_apps() -- Inhale: pull work into the app network + events.got_monotonic_time(C.get_time_ns()) for i = 1, #app_array do local app = app_array[i] --- if app.pull then --- zone(app.zone) app:pull() zone() if app.pull and not app.dead then zone(app.zone) - with_restart(app, app.pull) + if timeline_mod.level(timeline_log) <= 3 then + app_events[app].pull(linkstats(app)) + with_restart(app, app.pull) + app_events[app].pulled(linkstats(app)) + else + with_restart(app, app.pull) + end zone() end end + events.breath_pulled() -- Exhale: push work out through the app network local firstloop = true repeat local progress = false -- For each link that has new data, run the receiving app for i = 1, #link_array do - local link = link_array[i] - if firstloop or link.has_new_data then - link.has_new_data = false - local receiver = app_array[link.receiving_app] + local l = link_array[i] + if firstloop or l.has_new_data then + -- Consider logging a packet + if l.has_new_data and timeline_mod.level(timeline_log) <= 2 then + local p = link.front(l) + if p ~= nil then + link_events[l].packet_start(p.length) + local u64 = ffi.cast("uint64_t*", p.data) + for n = 0, p.length/8, 6 do + link_events[l].packet_data(u64[n+0],u64[n+1],u64[n+2], + u64[n+3],u64[n+4],u64[n+5]) + end + link_events[l].packet_end(p.length) + end + end + l.has_new_data = false + local receiver = app_array[l.receiving_app] if receiver.push and not receiver.dead then zone(receiver.zone) - with_restart(receiver, receiver.push) + if timeline_mod.level(timeline_log) <= 3 then + app_events[receiver].push(linkstats(receiver)) + with_restart(receiver, receiver.push) + app_events[receiver].pushed(linkstats(receiver)) + else + with_restart(receiver, receiver.push) + end zone() progress = true end @@ -340,9 +395,42 @@ function breathe () end firstloop = false until not progress -- Stop after no link had new data + events.breath_pushed() + local freed + local freed_packets = counter.read(frees) - freed_packets0 + local freed_bytes = (counter.read(freebytes) - freed_bytes0) + local freed_bytes_per_packet = freed_bytes / math.max(tonumber(freed_packets), 1) + events.breath_end(counter.read(breaths), freed_packets, freed_bytes_per_packet) counter.add(breaths) -- Commit counters at a reasonable frequency - if counter.read(breaths) % 100 == 0 then counter.commit() end + if counter.read(breaths) % 100 == 0 then + counter.commit() + events.commited_counters() + end + -- Sample events with dynamic priorities. + -- Lower priorities are enabled 1/10th as often as the one above. + local r = math.random() + if r < 0.00001 then timeline_mod.level(timeline_log, 2) + elseif r < 0.00010 then timeline_mod.level(timeline_log, 3) + elseif r < 0.01000 then timeline_mod.level(timeline_log, 3) + elseif r < 0.10000 then timeline_mod.level(timeline_log, 5) + else timeline_mod.level(timeline_log, 6) + end +end + +function linkstats (app) + local inp, inb, outp, outb, dropp, dropb = 0, 0, 0, 0, 0, 0 + for i = 1, #app.input do + inp = inp + tonumber(counter.read(app.input[i].stats.rxpackets)) + inb = inb + tonumber(counter.read(app.input[i].stats.rxbytes)) + end + for i = 1, #app.output do + outp = outp + tonumber(counter.read(app.output[i].stats.txpackets)) + outb = outb + tonumber(counter.read(app.output[i].stats.txbytes)) + dropp = dropp + tonumber(counter.read(app.output[i].stats.txdrop)) + dropb = dropb + tonumber(counter.read(app.output[i].stats.txdropbytes)) + end + return inp, inb, outp, outb, dropp, dropb end function report (options) From 40ceef6032efdcb52d59bba71353b3f9f93ff882 Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Mon, 20 Feb 2017 09:34:40 +0000 Subject: [PATCH 07/62] core.timeline: Switch to double-float on disk Use 'double' instead of 'uint64_t' for values in the timeline file. This change is motivated by making timeline files easier to process by R. In the future we may switch back to uint64_t for the TSC counter and/or argument values for improved precision. The major_version file header field can be used to avoid confusion. The obvious downside to using doubles is that the TSC value will lose precision as the server uptime increases (the TSC starts at zero and increases at the base frequency of the CPU e.g. 2GHz.) The impact seems to be modest though. For example a 2GHz CPU would start rounding TSC values to the nearest 128 (likely quite acceptable in practice) after approximately 2 years of operation (2^53 * 128 cycles.) So - storing the TSC as a double-float is definitely a kludge - but unlikely to cause any real harm and expedient for the short-term goal of putting this code to use without getting blocked due to e.g. my lack of sophisticated as an R hacker. --- src/core/timeline.dasl | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/src/core/timeline.dasl b/src/core/timeline.dasl index b3a29fdbba..47208453ea 100644 --- a/src/core/timeline.dasl +++ b/src/core/timeline.dasl @@ -51,17 +51,16 @@ ffi.cdef[[ uint16_t minor_version; uint32_t log_bytes; uint32_t strings_bytes; - uint16_t priority_mask; - uint8_t reserved[42]; + uint8_t reserved[44]; }; // 64B log entry struct timeline_entry { - uint64_t tsc; // CPU timestamp (note: assumed to be first elem below) + double tsc; // CPU timestamp (note: assumed to be first elem below) uint16_t msgid; // msgid*16 is index into string table uint16_t core_numa; // TSC_AUX: core (bits 0-7) + numa (12-15) uint32_t reserved; // (available for future use) - uint64_t arg0, arg1, arg2, arg3, arg4, arg5; // message arguments + double arg0, arg1, arg2, arg3, arg4, arg5; // message arguments }; // Private local state for updating the log @@ -100,7 +99,7 @@ function new (shmpath, num_entries, size_stringtable) local stringtable = ffi.cast("char*", memory + size_header + size_entries) -- Fill in header values header.magic = 0xa3ff7223441d0001ULL - header.major_version = 2 + header.major_version = 3 header.minor_version = 0 header.log_bytes = size_entries header.strings_bytes = size_stringtable @@ -223,22 +222,21 @@ local function asmlog (Dst) | mov r10, log:p0->entries | add r10, r11 -- Log the arguments from register parameters - | mov msg:r10->msgid, dx -- 16-bit value from p2 - | mov msg:r10->arg0, p3 - | mov msg:r10->arg1, p4 - | mov msg:r10->arg2, p5 - -- Log the arguments from stack parameters - | mov rax, [rsp+16] - | mov msg:r10->arg3, rax - | mov rax, [rsp+24] - | mov msg:r10->arg4, rax - | mov rax, [rsp+32] - | mov msg:r10->arg5, rax + | mov msg:r10->msgid, dx + | movsd qword msg:r10->arg0, xmm0 + | movsd qword msg:r10->arg1, xmm1 + | movsd qword msg:r10->arg2, xmm2 + | movsd qword msg:r10->arg3, xmm3 + | movsd qword msg:r10->arg4, xmm4 + | movsd qword msg:r10->arg5, xmm5 -- Log the timestamp and core/numa aux info | rdtscp - | mov msg:r10->tsc, eax - | mov [r10+4], edx -- assumes tsc is first field of struct timeline_entry | mov msg:r10->core_numa, cx + -- Convert TSC in EAX:EDX to double + | shl rdx, 32 + | or rax, rdx + | cvtsi2sd xmm0, rax + | movsd qword msg:r10->tsc, xmm0 | ret end @@ -248,7 +246,7 @@ asmlog(Dst) local mcode, size = Dst:build() local entry = dasm.globals(globals, globalnames) -event = ffi.cast("uint64_t(*)(struct timeline_state *, int, int, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t)", entry.log) +event = ffi.cast("void(*)(struct timeline_state *, int, int, double, double, double, double, double, double)", entry.log) _anchor = mcode From 563ae7ae32ac59bc1999a305fa0ee884527243b3 Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Mon, 20 Feb 2017 17:20:58 +0000 Subject: [PATCH 08/62] engine: update timeline level probabilities --- src/core/app.lua | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/core/app.lua b/src/core/app.lua index ddf83620bb..15a573db50 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -433,11 +433,13 @@ function breathe () -- Sample events with dynamic priorities. -- Lower priorities are enabled 1/10th as often as the one above. local r = math.random() - if r < 0.00001 then timeline_mod.level(timeline_log, 2) - elseif r < 0.00010 then timeline_mod.level(timeline_log, 3) - elseif r < 0.01000 then timeline_mod.level(timeline_log, 4) - elseif r < 0.10000 then timeline_mod.level(timeline_log, 5) - else timeline_mod.level(timeline_log, 6) + if r < 0.000001 then timeline_mod.level(timeline_log, 1) + elseif r < 0.000010 then timeline_mod.level(timeline_log, 2) + elseif r < 0.000100 then timeline_mod.level(timeline_log, 3) + elseif r < 0.001000 then timeline_mod.level(timeline_log, 4) + elseif r < 0.010000 then timeline_mod.level(timeline_log, 5) + elseif r < 0.100000 then timeline_mod.level(timeline_log, 6) + else timeline_mod.level(timeline_log, 7) end running = false end From ffb7379c7e5463084da817d3a1e4976e51c0088b Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Tue, 21 Feb 2017 08:50:08 +0000 Subject: [PATCH 09/62] engine: Randomize timeline log level with math Simplify the code and eliminate unwanted branches from the engine loop by drawing a random timeline level from a log-uniform distribution that mathematically favors higher log levels over lower ones. Plucked log5() out of the air i.e. each log level should be enabled for 5x more breaths than the one below. Here is how the distribution of log level choice looks in practice using this algorithm: > t = {0,0,0,0,0,0,0,0,0} > for i = 1, 1e8 do local n = math.max(1,math.ceil(math.log(math.random(5^9))/math.log(5))) t[n] = t[n]+1 end > for i,n in ipairs(t) do print(i,n) end 1 560 2 2151 3 10886 4 55149 5 273376 6 1367410 7 6844261 8 34228143 9 171120244 Note: Lua provides only natural logarithm functions but it is easy to derive other bases from this (google "log change of base formula"). --- src/core/app.lua | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/core/app.lua b/src/core/app.lua index 11a8e5fea3..e07f6e98bc 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -444,17 +444,14 @@ function breathe () counter.commit() events.commited_counters() end - -- Sample events with dynamic priorities. - -- Lower priorities are enabled 1/10th as often as the one above. - local r = math.random() - if r < 0.000001 then timeline_mod.level(timeline_log, 1) - elseif r < 0.000010 then timeline_mod.level(timeline_log, 2) - elseif r < 0.000100 then timeline_mod.level(timeline_log, 3) - elseif r < 0.001000 then timeline_mod.level(timeline_log, 4) - elseif r < 0.010000 then timeline_mod.level(timeline_log, 5) - elseif r < 0.100000 then timeline_mod.level(timeline_log, 6) - else timeline_mod.level(timeline_log, 7) - end + -- Randomize the log level. Enable each level in 5x more breaths + -- than the level below by randomly picking from log5() distribution. + -- Goal is ballpark 1000 messages per second (~15min for 1M entries.) + -- + -- Could be better to reduce the log level over time to "stretch" + -- logs for long running processes? Improvements possible :-). + local level = math.max(1, math.ceil(math.log(math.random(5^9))/math.log(5))) + timeline_mod.level(timeline_log, level) running = false end From 16a6bb21eb5bb303f63fc57bf97b3dbe55382268 Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Wed, 12 Apr 2017 11:27:35 +0000 Subject: [PATCH 10/62] engine: Remove timeline packet payload sampling I suspect that it is a misfeature for the timeline to sample the contents of packets. Do we really want user data potentially appearing in debug logs? Removed for now. --- src/core/app.lua | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/src/core/app.lua b/src/core/app.lua index 57c1fc0435..9a7327d1e2 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -402,7 +402,6 @@ function breathe () for i = 1, #breathe_pull_order do local app = breathe_pull_order[i] if app.pull and not app.dead then - if timeline_mod.level(timeline_log) <= 2 then log_links(app.input) end zone(app.zone) if timeline_mod.level(timeline_log) <= 3 then app_events[app].pull(linkstats(app)) @@ -420,7 +419,6 @@ function breathe () local app = breathe_push_order[i] if app.push and not app.dead then zone(app.zone) - if timeline_mod.level(timeline_log) <= 2 then log_links(app.output) end if timeline_mod.level(timeline_log) <= 3 then app_events[app].push(linkstats(app)) with_restart(app, app.push) @@ -428,7 +426,6 @@ function breathe () else with_restart(app, app.push) end - if timeline_mod.level(timeline_log) <= 2 then log_links(app.output) end zone() end end @@ -470,22 +467,6 @@ function linkstats (app) return inp, inb, outp, outb, dropp, dropb end --- Log packet payload from links. -function log_links (links) - for _, l in ipairs(links) do - local p = link.front(l) - if p ~= nil then - link_events[l].packet_start(p.length) - local u64 = ffi.cast("uint64_t*", p.data) - for n = 0, p.length/8, 6 do - link_events[l].packet_data(u64[n+0],u64[n+1],u64[n+2], - u64[n+3],u64[n+4],u64[n+5]) - end - link_events[l].packet_end(p.length) - end - end -end - function report (options) if not options or options.showload then report_load() From 1bb4108a3e2a0763412138e58e41ca8be337e522 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Tue, 30 Oct 2018 17:26:39 +0100 Subject: [PATCH 11/62] core.app: remove remains from 16a6bb2 (timeline packet sampling) --- src/core/app.lua | 5 +---- src/core/link.events | 10 ---------- 2 files changed, 1 insertion(+), 14 deletions(-) delete mode 100644 src/core/link.events diff --git a/src/core/app.lua b/src/core/app.lua index 980e5918b5..ad2d34ce24 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -52,9 +52,8 @@ end -- The set of all active apps and links in the system, indexed by name. app_table, link_table = {}, {} --- Timeline events specific to app and link instances +-- Timeline events specific to app instances app_events = setmetatable({}, { __mode = 'k' }) -link_events = setmetatable({}, { __mode = 'k' }) configuration = config.new() @@ -368,8 +367,6 @@ function apply_config_actions (actions) local link = link.new(linkspec) link_table[linkspec] = link configuration.links[linkspec] = true - link_events[link] = - timeline_mod.load_events(timeline(), "core.link", {linkspec=linkspec}) end function ops.link_output (appname, linkname, linkspec) local app = app_table[appname] diff --git a/src/core/link.events b/src/core/link.events deleted file mode 100644 index d6d097c483..0000000000 --- a/src/core/link.events +++ /dev/null @@ -1,10 +0,0 @@ -2|packet_start: wirelength - Started capturing a packet from a link. - This event is followed by one or more app_pcap_data messages - containing (partial) payload and then app_pcap_end. -2|packet_data: a b c d e f - Payload excerpt from packet being captured. - Each argument is 8 bytes of sequential payload (48 bytes total.) -2|packet_end: wirelength - Finished capturing a packet from a link. The (partial) payload is - logged in the immediately preceeding packet_data events. From 0e52b2406ae3c4ee55786e5f6432d8870d7d4fb7 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Wed, 31 Oct 2018 17:28:30 +0100 Subject: [PATCH 12/62] core.timeline: make timeline log available to other core modules --- src/core/app.lua | 4 ++-- src/core/timeline.dasl | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/core/app.lua b/src/core/app.lua index ad2d34ce24..52df1cbced 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -43,8 +43,8 @@ end -- Timeline event log local timeline_log, events -- initialized on demand function timeline () - if timeline_log == nil then - timeline_log = timeline_mod.new("engine/timeline") + if not timeline_log then + timeline_log = timeline_mod.log() events = timeline_mod.load_events(timeline_log, "core.engine") end return timeline_log diff --git a/src/core/timeline.dasl b/src/core/timeline.dasl index 47208453ea..64e4c40eae 100644 --- a/src/core/timeline.dasl +++ b/src/core/timeline.dasl @@ -10,6 +10,15 @@ local S = require("syscall") local shm = require("core.shm") local lib = require("core.lib") +-- Initialize and return the timeline log. +local timeline_log +function log () + if timeline_log == nil then + timeline_log = new("engine/timeline") + end + return timeline_log +end + -- Load a set of events for logging onto a timeline. -- Returns a set of logging functions. -- From e8348451ece53bc4a451227d02b2d6b567eeb5fa Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Wed, 31 Oct 2018 17:29:02 +0100 Subject: [PATCH 13/62] core.packet: record packet allocation events to timeline --- src/core/app.lua | 2 +- src/core/packet.events | 31 +++++++++++++++++++++++++++++++ src/core/packet.lua | 17 ++++++++++++++++- 3 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 src/core/packet.events diff --git a/src/core/app.lua b/src/core/app.lua index 52df1cbced..266cc0b957 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -632,8 +632,8 @@ function breathe () -- Commit counters and rebalance freelists at a reasonable frequency if counter.read(breaths) % 100 == 0 then counter.commit() - packet.rebalance_freelists() events.commited_counters() + packet.rebalance_freelists() end -- Randomize the log level. Enable each level in 5x more breaths -- than the level below by randomly picking from log5() distribution. diff --git a/src/core/packet.events b/src/core/packet.events new file mode 100644 index 0000000000..682eda4355 --- /dev/null +++ b/src/core/packet.events @@ -0,0 +1,31 @@ +2|packet_allocated: + A packet has been allocated from the packet freelist. + +2|packet_freed: length + A packet has been freed to the packet freelist. + + 'length' is the byte size of the packet. + +6|packets_preallocated: packets + DMA memory for packets had been preallocated from the operating system. + + 'packets' is the number of packets for which space has been reserved. + +4|group_freelist_wait: + The process is waiting to acquire the group freelist’s lock. + +4|group_freelist_locked: + The process has acquired the group freelist’s lock. + +4|group_freelist_unlocked: + The process has released the group freelist’s lock. + +4|group_freelist_released: packets + The packet freelist was rebalanced with the group freelist. + + 'packets' is the number of packets released to the group freelist. + +4|group_freelist_reclaimed: packets + The packet freelist was refilled from the group freelist. + + 'packets' is the number of packets reclaimed from the group freelist. diff --git a/src/core/packet.lua b/src/core/packet.lua index 130a585e08..2e01fb1036 100644 --- a/src/core/packet.lua +++ b/src/core/packet.lua @@ -13,6 +13,9 @@ local memory = require("core.memory") local shm = require("core.shm") local counter = require("core.counter") local sync = require("core.sync") +local timeline = require("core.timeline") + +local events = timeline.load_events(timeline.log(), "core.packet") require("core.packet_h") @@ -107,13 +110,18 @@ end -- Return borrowed packets to group freelist. function rebalance_freelists () - if group_fl and freelist_nfree(packets_fl) > packets_allocated then + local free_packets = freelist_nfree(packets_fl) + if group_fl and free_packets > packets_allocated then + events.group_freelist_wait() freelist_lock(group_fl) + events.group_freelist_locked() while freelist_nfree(packets_fl) > packets_allocated and not freelist_full(group_fl) do freelist_add(group_fl, freelist_remove(packets_fl)) end freelist_unlock(group_fl) + events.group_freelist_unlocked() + events.group_freelist_released(free_packets - freelist_nfree(packets_fl)) end end @@ -121,17 +129,22 @@ end function allocate () if freelist_nfree(packets_fl) == 0 then if group_fl then + events.group_freelist_wait() freelist_lock(group_fl) + events.group_freelist_locked() while freelist_nfree(group_fl) > 0 and freelist_nfree(packets_fl) < packets_allocated do freelist_add(packets_fl, freelist_remove(group_fl)) end freelist_unlock(group_fl) + events.group_freelist_unlocked() + events.group_freelist_reclaimed(freelist_nfree(packets_fl)) end if freelist_nfree(packets_fl) == 0 then preallocate_step() end end + events.packet_allocated() return freelist_remove(packets_fl) end @@ -229,6 +242,7 @@ function account_free (p) end function free (p) + events.packet_freed(p.length) account_free(p) free_internal(p) end @@ -250,6 +264,7 @@ function preallocate_step() end packets_allocated = packets_allocated + packet_allocation_step packet_allocation_step = 2 * packet_allocation_step + events.packets_preallocated(packet_allocation_step) end function selftest () From a6def7b56681c75be0efad492dcad0613de8df7d Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 2 Nov 2018 15:27:29 +0100 Subject: [PATCH 14/62] Revert "core.timeline: make timeline log available to other core modules" This reverts commit 0e52b2406ae3c4ee55786e5f6432d8870d7d4fb7. --- src/core/app.lua | 4 ++-- src/core/packet.lua | 5 ++--- src/core/timeline.dasl | 9 --------- 3 files changed, 4 insertions(+), 14 deletions(-) diff --git a/src/core/app.lua b/src/core/app.lua index 266cc0b957..4cbcb6291a 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -43,8 +43,8 @@ end -- Timeline event log local timeline_log, events -- initialized on demand function timeline () - if not timeline_log then - timeline_log = timeline_mod.log() + if timeline_log == nil then + timeline_log = timeline_mod.new("engine/timeline") events = timeline_mod.load_events(timeline_log, "core.engine") end return timeline_log diff --git a/src/core/packet.lua b/src/core/packet.lua index aaab27e122..d9a18b3393 100644 --- a/src/core/packet.lua +++ b/src/core/packet.lua @@ -15,8 +15,6 @@ local counter = require("core.counter") local sync = require("core.sync") local timeline = require("core.timeline") -local events = timeline.load_events(timeline.log(), "core.packet") - require("core.packet_h") local packet_t = ffi.typeof("struct packet") @@ -108,11 +106,12 @@ end local packet_allocation_step = 1000 local packets_allocated = 0 -- Initialized on demand. -local packets_fl, group_fl +local packets_fl, group_fl, events -- Call to ensure packet freelist is enabled. function initialize () packets_fl = freelist_create("engine/packets.freelist") + events = timeline.load_events(engine.timeline(), "core.packet") end -- Call to ensure group freelist is enabled. diff --git a/src/core/timeline.dasl b/src/core/timeline.dasl index 64e4c40eae..47208453ea 100644 --- a/src/core/timeline.dasl +++ b/src/core/timeline.dasl @@ -10,15 +10,6 @@ local S = require("syscall") local shm = require("core.shm") local lib = require("core.lib") --- Initialize and return the timeline log. -local timeline_log -function log () - if timeline_log == nil then - timeline_log = new("engine/timeline") - end - return timeline_log -end - -- Load a set of events for logging onto a timeline. -- Returns a set of logging functions. -- From 8201d1d7990cf5eb7af756accc12a8faa0f43c8d Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 2 Nov 2018 15:35:10 +0100 Subject: [PATCH 15/62] lib.ptree.worker: reformulate to use engine.main with done func --- src/lib/ptree/worker.lua | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/src/lib/ptree/worker.lua b/src/lib/ptree/worker.lua index 24db0eaa5a..64b78e1047 100644 --- a/src/lib/ptree/worker.lua +++ b/src/lib/ptree/worker.lua @@ -92,27 +92,22 @@ function Worker:handle_actions_from_manager() end function Worker:main () - local vmprofile = require("jit.vmprofile") local stop = engine.now() + self.duration local next_time = engine.now() - -- Setup vmprofile. - engine.setvmprofile("engine") - vmprofile.start() - - if not engine.auditlog_enabled then engine.enable_auditlog() end - - repeat - self.breathe() + local function control () if next_time < engine.now() then next_time = engine.now() + self.period + engine.setvmprofile("worker") self:handle_actions_from_manager() - timer.run() + engine.setvmprofile("engine") + end + if stop < engine.now() then + return true -- done end - if not engine.busywait then engine.pace_breathing() end - until stop < engine.now() - counter.commit() - if not self.no_report then engine.report(self.report) end + end + + engine.main{done=control} end function main (opts) From debd26cfea3a77690f66144ee9ff62e4f5166cb8 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Mon, 8 Oct 2018 11:08:25 +0200 Subject: [PATCH 16/62] engine.main: fix memory leak due to latency histogram creation --- src/core/app.lua | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/core/app.lua b/src/core/app.lua index 4cbcb6291a..5b547fd789 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -50,6 +50,14 @@ function timeline () return timeline_log end +-- Breath latency histogram +local latency -- initialized on demand +function enable_latency_histogram () + if latency == nil then + latency = histogram.create('engine/latency.histogram', 1e-6, 1e0) + end +end + -- The set of all active apps and links in the system, indexed by name. app_table, link_table = {}, {} -- Timeline events specific to app instances @@ -529,10 +537,10 @@ function main (options) -- Ensure timeline is created and initialized timeline() - -- Enable latency histogram if requested + -- Enable latency histogram unless explicitly disabled local breathe = breathe if options.measure_latency or options.measure_latency == nil then - local latency = histogram.create('engine/latency.histogram', 1e-6, 1e0) + enable_latency_histogram() breathe = latency:wrap_thunk(breathe, now) end From edaad6c82177089626cbc6334047a5d2771bd1de Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Wed, 7 Nov 2018 15:23:39 +0100 Subject: [PATCH 17/62] core.app: set timeline log level at the very end of breathe loop This fixes a bug where timeline log level was rerolled between end of breaths but before before post-breath events, causing sampling to affect the event lag of the polled_timers event. --- src/core/app.lua | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/core/app.lua b/src/core/app.lua index 5b547fd789..2c95c5c6a6 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -553,6 +553,7 @@ function main (options) breathe() if not no_timers then timer.run() events.polled_timers() end if not busywait then pace_breathing() end + set_log_level() -- roll random log level until done and done() counter.commit() if not options.no_report then report(options.report) end @@ -562,6 +563,19 @@ function main (options) setvmprofile("program") end +function set_log_level (level) + if not level then + -- Randomize the log level. Enable each level in 5x more breaths + -- than the level below by randomly picking from log5() distribution. + -- Goal is ballpark 1000 messages per second (~15min for 1M entries.) + -- + -- Could be better to reduce the log level over time to "stretch" + -- logs for long running processes? Improvements possible :-). + level = math.max(1, math.ceil(math.log(math.random(5^9))/math.log(5))) + end + timeline_mod.level(timeline_log, level) +end + local nextbreath local lastfrees = 0 local lastfreebits = 0 @@ -643,14 +657,6 @@ function breathe () events.commited_counters() packet.rebalance_freelists() end - -- Randomize the log level. Enable each level in 5x more breaths - -- than the level below by randomly picking from log5() distribution. - -- Goal is ballpark 1000 messages per second (~15min for 1M entries.) - -- - -- Could be better to reduce the log level over time to "stretch" - -- logs for long running processes? Improvements possible :-). - local level = math.max(1, math.ceil(math.log(math.random(5^9))/math.log(5))) - timeline_mod.level(timeline_log, level) running = false end From 9c4c804cbafc45c9abe2a06c1352314dec5ee4e3 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 8 Nov 2018 11:19:35 +0100 Subject: [PATCH 18/62] core.packet: reorder group_fl released/reclaimed/unlocked events --- src/core/packet.lua | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/packet.lua b/src/core/packet.lua index d9a18b3393..a592b6aa1d 100644 --- a/src/core/packet.lua +++ b/src/core/packet.lua @@ -123,18 +123,18 @@ end -- Return borrowed packets to group freelist. function rebalance_freelists () - local free_packets = freelist_nfree(packets_fl) - if group_fl and free_packets > packets_allocated then + if group_fl and freelist_nfree(packets_fl) > packets_allocated then events.group_freelist_wait() freelist_lock(group_fl) events.group_freelist_locked() + local nfree0 = freelist_nfree(packets_fl) while freelist_nfree(packets_fl) > packets_allocated and not freelist_full(group_fl) do freelist_add(group_fl, freelist_remove(packets_fl)) end + events.group_freelist_released(nfree0 - freelist_nfree(packets_fl)) freelist_unlock(group_fl) events.group_freelist_unlocked() - events.group_freelist_released(free_packets - freelist_nfree(packets_fl)) end end From 004f5bb71848cb2d530deea113d411168cf2a754 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 8 Nov 2018 11:21:05 +0100 Subject: [PATCH 19/62] core.timeline: decouple log level from event rate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changes the syntax of event specs to ,|: ... The previous level digit becomes the event’s "rate" and retains its semantics with regard to the logging frequency of the specified event. The "stack depth" of the event is now decoupled as the new, leading level digit and specified independently. The new level semantics are as follows: - level ranges from 0-9 (10 levels in total) - 0 is the top most level while 9 in the lowest - levels 0-4 are reserved for use by the engine - user applications can use levels 5-9 to create hierarchy in their events Caveat: users should avoid defining events with a higher level and a lower event rate than an enclosed event if the higher level event is supposed to serve as a latency anchor for the lower level event. RIGHT WRONG 5,3|op_start: 5,2|op_start: 6,2|op_iter: 6,3|op_iter: 5,3|op_end: 5,2|op_end: In the left most WRONG example, the anchor of the op_inter event depends on the log rate at the time of sampling. --- src/core/app.events | 9 +++--- src/core/app.lua | 26 ++++++++--------- src/core/engine.events | 59 ++++++++++++++++++++------------------ src/core/packet.events | 41 ++++++++++++++------------- src/core/timeline.dasl | 64 +++++++++++++++++++++--------------------- 5 files changed, 101 insertions(+), 98 deletions(-) diff --git a/src/core/app.events b/src/core/app.events index 1d77fbe828..dd7ac9daad 100644 --- a/src/core/app.events +++ b/src/core/app.events @@ -1,11 +1,12 @@ -3|pull: inpackets inbytes outpackets outbytes droppackets dropbytes +3,3|pull: inpackets inbytes outpackets outbytes droppackets dropbytes Entering app pull() callback. -3|pulled: inpackets inbytes outpackets outbytes droppackets dropbytes +3,3|pulled: inpackets inbytes outpackets outbytes droppackets dropbytes Returned from app pull() callback. -3|push: inpackets inbytes outpackets outbytes droppackets dropbytes + +3,3|push: inpackets inbytes outpackets outbytes droppackets dropbytes Entering app push() callback. -3|pushed: inpackets inbytes outpackets outbytes droppackets dropbytes +3,3|pushed: inpackets inbytes outpackets outbytes droppackets dropbytes Returned from app push() callback. diff --git a/src/core/app.lua b/src/core/app.lua index 2c95c5c6a6..f131ce6818 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -553,7 +553,7 @@ function main (options) breathe() if not no_timers then timer.run() events.polled_timers() end if not busywait then pace_breathing() end - set_log_level() -- roll random log level + randomize_log_rate() -- roll random log rate until done and done() counter.commit() if not options.no_report then report(options.report) end @@ -563,17 +563,15 @@ function main (options) setvmprofile("program") end -function set_log_level (level) - if not level then - -- Randomize the log level. Enable each level in 5x more breaths - -- than the level below by randomly picking from log5() distribution. - -- Goal is ballpark 1000 messages per second (~15min for 1M entries.) - -- - -- Could be better to reduce the log level over time to "stretch" - -- logs for long running processes? Improvements possible :-). - level = math.max(1, math.ceil(math.log(math.random(5^9))/math.log(5))) - end - timeline_mod.level(timeline_log, level) +function randomize_log_rate () + -- Randomize the log rate. Enable each rate in 5x more breaths + -- than the rate below by randomly picking from log5() distribution. + -- Goal is ballpark 1000 messages per second (~15min for 1M entries.) + -- + -- Could be better to reduce the log rate over time to "stretch" + -- logs for long running processes? Improvements possible :-). + local rate = math.max(1, math.ceil(math.log(math.random(5^9))/math.log(5))) + timeline_mod.rate(timeline_log, rate) end local nextbreath @@ -621,7 +619,7 @@ function breathe () for i = 1, #breathe_pull_order do local app = breathe_pull_order[i] if app.pull and not app.dead then - if timeline_mod.level(timeline_log) <= 3 then + if timeline_mod.rate(timeline_log) <= 3 then app_events[app].pull(linkstats(app)) with_restart(app, app.pull) app_events[app].pulled(linkstats(app)) @@ -635,7 +633,7 @@ function breathe () for i = 1, #breathe_push_order do local app = breathe_push_order[i] if app.push and not app.dead then - if timeline_mod.level(timeline_log) <= 3 then + if timeline_mod.rate(timeline_log) <= 3 then app_events[app].push(linkstats(app)) with_restart(app, app.push) app_events[app].pushed(linkstats(app)) diff --git a/src/core/engine.events b/src/core/engine.events index 4c974a79a9..79e1978e21 100644 --- a/src/core/engine.events +++ b/src/core/engine.events @@ -1,25 +1,8 @@ -4|sleep_Hz: usec Hz -The engine requests that the kernel suspend this process for a period of -microseconds in order to reduce CPU utilization and achieve a fixed -frequency of breaths per second (Hz). - -4|sleep_on_idle: usec -The engine requests that the kernel suspend this process for a period -of microseconds in order to reduce CPU utilization because idleness -has been detected (a breath in which no packets were processed.) - -4|wakeup_from_sleep: -The engine resumes operation after sleeping voluntarily. - - -6|engine_started: +0,6|engine_started: The engine starts the traffic processing loop. -6|engine_stopped: -The engine stops the traffic processing loop. - -5|breath_start: breath totalpackets totalbytes totaletherbits +1,5|breath_start: breath totalpackets totalbytes totaletherbits The engine starts an iteration of the packet-processing event loop (a "breath".) @@ -27,20 +10,23 @@ The total count of packets, bytes, and bits (including layer-1 ethernet overhead) that the engine has processed are included. These can be used to track the rate of traffic. -3|got_monotonic_time: unixnanos + +2,3|got_monotonic_time: unixnanos The engine has completed initialization for the breath: synchronized the current time and handled any pending error recovery. 'unixnanos' is the current wall-clock time in nanoseconds since the epoc. This can be used to synchronize the cycle timestamps with wall-clock time. -4|breath_pulled: + +2,4|breath_pulled: The engine has "pulled" new packets into the event loop for processing. -4|breath_pushed: +2,4|breath_pushed: The engine has "pushed" packets one step through the processing network. -5|breath_end: breath npackets bpp + +1,5|breath_end: breath npackets bpp The engine completes an iteration of the event loop (a "breath.") 'packets' is the total number of packets processed during the breath. @@ -51,10 +37,27 @@ deallocated (freed) during processing. This does not necessarily correspond directly to ingress or egress packets on a given interface. -4|polled_timers: - The engine polled its timers and executed any that were expired. +1,4|commited_counters: +The engine commits the latest counter values to externally visible shared +memory. + +1,4|polled_timers: +The engine polled its timers and executed any that were expired. + + +1,4|sleep_Hz: usec Hz +The engine requests that the kernel suspend this process for a period of +microseconds in order to reduce CPU utilization and achieve a fixed +frequency of breaths per second (Hz). + +1,4|sleep_on_idle: usec +The engine requests that the kernel suspend this process for a period +of microseconds in order to reduce CPU utilization because idleness +has been detected (a breath in which no packets were processed.) + +1,4|wakeup_from_sleep: +The engine resumes operation after sleeping voluntarily. -4|commited_counters: - The engine commits the latest counter values to externally visible - shared memory. +0,6|engine_stopped: +The engine stops the traffic processing loop. diff --git a/src/core/packet.events b/src/core/packet.events index 682eda4355..e004a6da52 100644 --- a/src/core/packet.events +++ b/src/core/packet.events @@ -1,31 +1,32 @@ -2|packet_allocated: - A packet has been allocated from the packet freelist. +9,6|packets_preallocated: packets +DMA memory for packets had been preallocated from the operating system. -2|packet_freed: length - A packet has been freed to the packet freelist. +'packets' is the number of packets for which space has been reserved. - 'length' is the byte size of the packet. +9,1|packet_allocated: +A packet has been allocated from the packet freelist. -6|packets_preallocated: packets - DMA memory for packets had been preallocated from the operating system. +9,1|packet_freed: length +A packet has been freed to the packet freelist. - 'packets' is the number of packets for which space has been reserved. +'length' is the byte size of the packet. -4|group_freelist_wait: - The process is waiting to acquire the group freelist’s lock. -4|group_freelist_locked: - The process has acquired the group freelist’s lock. +9,4|group_freelist_wait: +The process is waiting to acquire the group freelist’s lock. -4|group_freelist_unlocked: - The process has released the group freelist’s lock. +9,4|group_freelist_locked: +The process has acquired the group freelist’s lock. -4|group_freelist_released: packets - The packet freelist was rebalanced with the group freelist. +9,4|group_freelist_released: packets +The packet freelist was rebalanced with the group freelist. - 'packets' is the number of packets released to the group freelist. +'packets' is the number of packets released to the group freelist. -4|group_freelist_reclaimed: packets - The packet freelist was refilled from the group freelist. +9,4|group_freelist_reclaimed: packets +The packet freelist was refilled from the group freelist. - 'packets' is the number of packets reclaimed from the group freelist. +'packets' is the number of packets reclaimed from the group freelist. + +9,4|group_freelist_unlocked: +The process has released the group freelist’s lock. diff --git a/src/core/timeline.dasl b/src/core/timeline.dasl index 47208453ea..f676db9b8b 100644 --- a/src/core/timeline.dasl +++ b/src/core/timeline.dasl @@ -29,7 +29,7 @@ end function load_events_from_string (tl, spec, category, extra) local events = {} -- Insert a delimiter character (\a "alarm") between log messages. - spec = spec:gsub("\n(%d|)", "\n\a%1") + spec = spec:gsub("\n(%d,%d|)", "\n\a%1") for message in spec:gmatch("[^\a]+") do message = message:gsub("(.-)%s*$", "%1") -- trim trailing spaces local event = message:match("([%w_]+):") @@ -67,7 +67,7 @@ ffi.cdef[[ struct timeline_state { // state for the entries ring buffer struct timeline_entry *entries; - uint32_t level; + uint32_t rate; uint32_t next_entry; uint32_t num_entries; // state for the string table @@ -106,7 +106,7 @@ function new (shmpath, num_entries, size_stringtable) -- Private state local state = ffi.new("struct timeline_state") state.entries = ring - state.level = 0 + state.rate = 0 state.next_entry = 0 state.num_entries = num_entries state.stringtable = stringtable @@ -117,13 +117,13 @@ function new (shmpath, num_entries, size_stringtable) end function mkevent (timeline, category, message, attrs) - if not message:match("^%d|([^:]+):") then + if not message:match("^%d,%d|([^:]+):") then error(("event syntax error: %q"):format(message)) end - -- Extract the log level for the message - local level = tonumber(message:match("^(%d)|")) - -- Insert the category ("0|event:" -> "0|category.event:") - message = message:gsub("^(%d|)", "%1"..category..".") + -- Extract the sampling rate for the message + local rate = tonumber(message:match("^%d,(%d)|")) + -- Insert the category ("0,3|event:" -> "0,3|category.event:") + message = message:gsub("|", "|"..category..".", 1) -- Insert the additional attributes. -- e.g. "1|foo: arg" with {a1="x",a2="y"} becomes "1|foo a1=x a2=y: arg" for k,v in pairs(attrs or {}) do @@ -135,20 +135,20 @@ function mkevent (timeline, category, message, attrs) local id = intern(timeline, message) local event = event -- move asm function into local scope local log = timeline - if n==0 then return function () event(log,level,id,0,0,0,0,0,0) end end - if n==1 then return function (a) event(log,level,id,a,0,0,0,0,0) end end - if n==2 then return function (a,b) event(log,level,id,a,b,0,0,0,0) end end - if n==3 then return function (a,b,c) event(log,level,id,a,b,c,0,0,0) end end - if n==4 then return function (a,b,c,d) event(log,level,id,a,b,c,d,0,0) end end - if n==5 then return function (a,b,c,d,e) event(log,level,id,a,b,c,d,e,0) end end - if n==6 then return function (a,b,c,d,e,f) event(log,level,id,a,b,c,d,e,f) end end + if n==0 then return function () event(log,rate,id,0,0,0,0,0,0) end end + if n==1 then return function (a) event(log,rate,id,a,0,0,0,0,0) end end + if n==2 then return function (a,b) event(log,rate,id,a,b,0,0,0,0) end end + if n==3 then return function (a,b,c) event(log,rate,id,a,b,c,0,0,0) end end + if n==4 then return function (a,b,c,d) event(log,rate,id,a,b,c,d,0,0) end end + if n==5 then return function (a,b,c,d,e) event(log,rate,id,a,b,c,d,e,0) end end + if n==6 then return function (a,b,c,d,e,f) event(log,rate,id,a,b,c,d,e,f) end end error("illegal number of arguments: "..n) end --- Get or set the current timeline log level. -function level (timeline, level) - if level then timeline.level = level end - return timeline.level +-- Get or set the current timeline log rate. +function rate (timeline, rate) + if rate then timeline.rate = rate end + return timeline.rate end ------------------------------------------------------------ @@ -199,11 +199,11 @@ end |.type log, struct timeline_state |.type msg, struct timeline_entry --- void log(timeline, level, msg, arg0, ..., arg5) +-- void log(timeline, rate, msg, arg0, ..., arg5) local function asmlog (Dst) |->log: - -- Check that the enabled log level is >= the event log level - | mov eax, log:p0->level + -- Check that the enabled log rate is >= the event log rate + | mov eax, log:p0->rate | cmp p1, rax | jge >1 | ret @@ -253,17 +253,17 @@ _anchor = mcode --dasm.dump(mcode, size) local test_events = [[ -6|six: -event with level 6 (0 args) +0,6|six: +event with rate 6 (0 args) -5|five: a b c -event with level 5 (3 args) +0,5|five: a b c +event with rate 5 (3 args) -4|four: a b c d e f -event with level 4 (6 args) +0,4|four: a b c d e f +event with rate 4 (6 args) -3|three: -event with level 3 +0,3|three: +event with rate 3 (0 args) ]] -- selftest is designed mostly to check that timeline logging does not @@ -275,7 +275,7 @@ function selftest () local tl = new("selftest/timeline") local e = load_events_from_string(tl, test_events, "selftest", {module="timeline", func="selftest"}) - level(tl, 4) -- won't log event three + rate(tl, 4) -- won't log event three print("check logging individual messages") -- First check that log entries are created @@ -296,7 +296,7 @@ function selftest () -- overflow the string table print("overflowing string table") for i = 1, 1e5 do - mkevent(tl, "selftest", "9|dummy_event_definition:", {i=i}) + mkevent(tl, "selftest", "0,9|dummy_event_definition:", {i=i}) end -- report median logging time local sample = {} From 8e681091aef7233d15d9638ad68daa48d52e48e2 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 16 Nov 2018 14:43:27 +0100 Subject: [PATCH 20/62] core.app: initialize timeline log rate to 7 (no logging) randomize_log_rate will then set the rate to values between 1 and 6, enabling the respective events. --- src/core/app.lua | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/core/app.lua b/src/core/app.lua index f131ce6818..357bffcec8 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -45,11 +45,25 @@ local timeline_log, events -- initialized on demand function timeline () if timeline_log == nil then timeline_log = timeline_mod.new("engine/timeline") + timeline_mod.rate(timeline_log, 7) -- initialize rate to "no logging" events = timeline_mod.load_events(timeline_log, "core.engine") end return timeline_log end +function randomize_log_rate () + -- Randomize the log rate. Enable each rate in 5x more breaths + -- than the rate below by randomly picking from log5() distribution. + -- Goal is ballpark 1000 messages per second (~15min for 1M entries.) + -- + -- Could be better to reduce the log rate over time to "stretch" + -- logs for long running processes? Improvements possible :-). + -- + -- We use rates 0-6 where 6 means "log always", and 0 means "log never." + local rate = math.max(1, math.ceil(math.log(math.random(5^9))/math.log(5))) + timeline_mod.rate(timeline_log, rate) +end + -- Breath latency histogram local latency -- initialized on demand function enable_latency_histogram () @@ -563,17 +577,6 @@ function main (options) setvmprofile("program") end -function randomize_log_rate () - -- Randomize the log rate. Enable each rate in 5x more breaths - -- than the rate below by randomly picking from log5() distribution. - -- Goal is ballpark 1000 messages per second (~15min for 1M entries.) - -- - -- Could be better to reduce the log rate over time to "stretch" - -- logs for long running processes? Improvements possible :-). - local rate = math.max(1, math.ceil(math.log(math.random(5^9))/math.log(5))) - timeline_mod.rate(timeline_log, rate) -end - local nextbreath local lastfrees = 0 local lastfreebits = 0 From 8f0c6bf928f5efb88b33a6da2f76395a4177ff0f Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 16 Nov 2018 17:16:59 +0100 Subject: [PATCH 21/62] core.timeline: add switch to disable timeline logging --- src/core/timeline.dasl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/core/timeline.dasl b/src/core/timeline.dasl index f676db9b8b..76e4a9b97b 100644 --- a/src/core/timeline.dasl +++ b/src/core/timeline.dasl @@ -10,6 +10,9 @@ local S = require("syscall") local shm = require("core.shm") local lib = require("core.lib") +-- Set to false to disable timeline logging +enabled = true + -- Load a set of events for logging onto a timeline. -- Returns a set of logging functions. -- @@ -86,6 +89,7 @@ local major, minor = 2, 1 -- Create a new timeline under the given shared memory path. function new (shmpath, num_entries, size_stringtable) + if not enabled then return false end num_entries = num_entries or 1e6 size_stringtable = size_stringtable or 1e6 -- Calculate size based on number of log entries @@ -132,6 +136,8 @@ function mkevent (timeline, category, message, attrs) -- Count the number of arguments. -- (See http://stackoverflow.com/a/11158158/1523491) local _, n = (message:match(":([^\n]*)")):gsub("[^%s]+","") + assert(n >= 0 and n <= 6, "illegal number of arguments: "..n) + if not enabled then return function () end end local id = intern(timeline, message) local event = event -- move asm function into local scope local log = timeline @@ -142,11 +148,11 @@ function mkevent (timeline, category, message, attrs) if n==4 then return function (a,b,c,d) event(log,rate,id,a,b,c,d,0,0) end end if n==5 then return function (a,b,c,d,e) event(log,rate,id,a,b,c,d,e,0) end end if n==6 then return function (a,b,c,d,e,f) event(log,rate,id,a,b,c,d,e,f) end end - error("illegal number of arguments: "..n) end -- Get or set the current timeline log rate. function rate (timeline, rate) + if not enabled then return 1/0 end if rate then timeline.rate = rate end return timeline.rate end From 30cafdc7ce2b2ca98cbdece4ca893d46fe8c7bd3 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 16 Nov 2018 18:08:34 +0100 Subject: [PATCH 22/62] core.app: add timeline events for app network configuration --- src/core/app.events | 13 +++++++++++++ src/core/app.lua | 18 ++++++++++++------ src/core/engine.events | 15 +++++++++++++++ 3 files changed, 40 insertions(+), 6 deletions(-) diff --git a/src/core/app.events b/src/core/app.events index dd7ac9daad..0fb3611263 100644 --- a/src/core/app.events +++ b/src/core/app.events @@ -1,3 +1,16 @@ +1,6|started: +The app has been started. (Returned from new() callback.) + +1,6|linked: +The app has been linked. (Returned from link() callback.) + +1,6|reconfigured: +The app has been reconfigured. (Returned from reconfig() callback.) + +1,6|stopped: +The app has been stopped. (Returned from stop() callback.) + + 3,3|pull: inpackets inbytes outpackets outbytes droppackets dropbytes Entering app pull() callback. diff --git a/src/core/app.lua b/src/core/app.lua index 357bffcec8..5e39db5c80 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -346,6 +346,7 @@ function compute_config_actions (old, new) end end + events.config_actions_computed() return actions end @@ -371,14 +372,14 @@ function apply_config_actions (actions) local link = app.output[linkname] app.output[linkname] = nil remove_link_from_array(app.output, link) - if app.link then app:link() end + if app.link then app:link() app_events[app].linked() end end function ops.unlink_input (appname, linkname) local app = app_table[appname] local link = app.input[linkname] app.input[linkname] = nil remove_link_from_array(app.input, link) - if app.link then app:link() end + if app.link then app:link() app_events[app].linked() end end function ops.free_link (linkspec) link.free(link_table[linkspec], linkspec) @@ -395,19 +396,20 @@ function apply_config_actions (actions) local link = assert(link_table[linkspec]) app.output[linkname] = link table.insert(app.output, link) - if app.link then app:link() end + if app.link then app:link() app_events[app].linked() end end function ops.link_input (appname, linkname, linkspec) local app = app_table[appname] local link = assert(link_table[linkspec]) app.input[linkname] = link table.insert(app.input, link) - if app.link then app:link() end + if app.link then app:link() app_events[app].linked() end end function ops.stop_app (name) local app = app_table[name] - if app.stop then app:stop() end + if app.stop then app:stop() app_events[app].stopped() end if app.shm then shm.delete_frame(app.shm) end + app_events[app] = nil app_table[name] = nil configuration.apps[name] = nil end @@ -430,21 +432,25 @@ function apply_config_actions (actions) app.shm = shm.create_frame("apps/"..name, app.shm) end configuration.apps[name] = { class = class, arg = arg } + app_events[app].started() end function ops.reconfig_app (name, class, arg) local app = app_table[name] - app:reconfig(arg) + app:reconfig(arg) app_events[app].reconfigured() configuration.apps[name].arg = arg end + events.configure(counter.read(configs) + 1) -- Dispatch actions. for _, action in ipairs(actions) do local name, args = unpack(action) if log then io.write("engine: ", name, " ", args[1], "\n") end assert(ops[name], name)(unpack(args)) end + events.config_applied() compute_breathe_order () + events.breath_order_computed() end -- Sort the NODES topologically according to SUCCESSORS via diff --git a/src/core/engine.events b/src/core/engine.events index 79e1978e21..25b275d851 100644 --- a/src/core/engine.events +++ b/src/core/engine.events @@ -61,3 +61,18 @@ The engine resumes operation after sleeping voluntarily. 0,6|engine_stopped: The engine stops the traffic processing loop. + + +0,6|config_actions_computed: +The engine has computed the actions required for applying a new configuration. + +0,6|configure: config +The engine begins to apply a new configuration. + +'config' is the number of this configuration. + +0,6|config_applied: +The engine has applied a new configuration. + +0,6|breath_order_computed: +The engine has computed the breath order of a new configuration. \ No newline at end of file From e5faba0f19d11021f6416ba0884712241835dee0 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 7 Dec 2018 15:47:10 +0100 Subject: [PATCH 23/62] Fix some wrong assumptions about the event rate Realized: 9 is the rate that means "log always". Elevate some rare but important events to rate 9. --- src/core/app.events | 8 ++++---- src/core/app.lua | 6 +++--- src/core/engine.events | 10 +++++----- src/core/packet.events | 4 ++-- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/core/app.events b/src/core/app.events index 0fb3611263..e72e76047a 100644 --- a/src/core/app.events +++ b/src/core/app.events @@ -1,13 +1,13 @@ -1,6|started: +1,9|started: The app has been started. (Returned from new() callback.) -1,6|linked: +1,9|linked: The app has been linked. (Returned from link() callback.) -1,6|reconfigured: +1,9|reconfigured: The app has been reconfigured. (Returned from reconfig() callback.) -1,6|stopped: +1,9|stopped: The app has been stopped. (Returned from stop() callback.) diff --git a/src/core/app.lua b/src/core/app.lua index 5e39db5c80..00034e8119 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -45,7 +45,7 @@ local timeline_log, events -- initialized on demand function timeline () if timeline_log == nil then timeline_log = timeline_mod.new("engine/timeline") - timeline_mod.rate(timeline_log, 7) -- initialize rate to "no logging" + timeline_mod.rate(timeline_log, 9) -- initially log events with rate >= 9 events = timeline_mod.load_events(timeline_log, "core.engine") end return timeline_log @@ -59,7 +59,7 @@ function randomize_log_rate () -- Could be better to reduce the log rate over time to "stretch" -- logs for long running processes? Improvements possible :-). -- - -- We use rates 0-6 where 6 means "log always", and 0 means "log never." + -- We use rates 0-9 where 9 means "log always", and 0 means "log never." local rate = math.max(1, math.ceil(math.log(math.random(5^9))/math.log(5))) timeline_mod.rate(timeline_log, rate) end @@ -450,7 +450,7 @@ function apply_config_actions (actions) events.config_applied() compute_breathe_order () - events.breath_order_computed() + events.breathe_order_computed() end -- Sort the NODES topologically according to SUCCESSORS via diff --git a/src/core/engine.events b/src/core/engine.events index 25b275d851..10e9e32687 100644 --- a/src/core/engine.events +++ b/src/core/engine.events @@ -63,16 +63,16 @@ The engine resumes operation after sleeping voluntarily. The engine stops the traffic processing loop. -0,6|config_actions_computed: +0,9|config_actions_computed: The engine has computed the actions required for applying a new configuration. -0,6|configure: config +0,9|configure: config The engine begins to apply a new configuration. 'config' is the number of this configuration. -0,6|config_applied: +0,9|config_applied: The engine has applied a new configuration. -0,6|breath_order_computed: -The engine has computed the breath order of a new configuration. \ No newline at end of file +0,9|breathe_order_computed: +The engine has computed the breath order of a new configuration. diff --git a/src/core/packet.events b/src/core/packet.events index e004a6da52..752a3f25ee 100644 --- a/src/core/packet.events +++ b/src/core/packet.events @@ -1,5 +1,5 @@ -9,6|packets_preallocated: packets -DMA memory for packets had been preallocated from the operating system. +9,9|packets_preallocated: packets +DMA memory for packets has been preallocated from the operating system. 'packets' is the number of packets for which space has been reserved. From 872eaab60543a84be2307131a435e2ad290645de Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 29 Sep 2022 16:52:07 +0200 Subject: [PATCH 24/62] app.events: reduce tick/ticked event rate This ensures there is always a preceding breath_start event to anchor breath lag for these events. --- src/core/app.events | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/app.events b/src/core/app.events index 78c28d8dda..35be99956c 100644 --- a/src/core/app.events +++ b/src/core/app.events @@ -28,8 +28,8 @@ Entering app push() callback. Returned from app push() callback. -3,9|tick: +3,5|tick: Entering app tick() callback. -3,9|ticked: +3,5|ticked: Returned from app tick() callback. From 025225416573bb12b81a1f24513c5490b9126129 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 29 Sep 2022 16:53:07 +0200 Subject: [PATCH 25/62] engine.events: fix breath_ticked description --- src/core/engine.events | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/engine.events b/src/core/engine.events index 86cab6538b..48dd6f6b9c 100644 --- a/src/core/engine.events +++ b/src/core/engine.events @@ -26,7 +26,7 @@ The engine has "pulled" new packets into the event loop for processing. The engine has "pushed" packets one step through the processing network. 2,4|breath_ticked: -The engine has "pushed" packets one step through the processing network. +The engine has executed "tick" methods. 1,5|breath_end: breath npackets bpp From 8080451d17ff192ccffa12fe4f7e9f54632c04d7 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 29 Sep 2022 16:54:19 +0200 Subject: [PATCH 26/62] core.app: log breath_ticked only when tick()=true --- src/core/app.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/app.lua b/src/core/app.lua index 365e36fd0d..8268e98080 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -696,8 +696,8 @@ function breathe () app:tick() app_events[app].ticked(linkstats(app)) end + events.breath_ticked() end - events.breath_ticked() setvmprofile("engine") local freed local freed_packets = counter.read(frees) - freed_packets0 From cd43d69f1168d6292c0a221040e9b4ec1acbef79 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 29 Sep 2022 16:55:59 +0200 Subject: [PATCH 27/62] engine.events: increase rate of commited_counters This is to ensure suffient event logs since counter.commit() is only executed every 100th breath. --- src/core/engine.events | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/engine.events b/src/core/engine.events index 48dd6f6b9c..ad8eb683e3 100644 --- a/src/core/engine.events +++ b/src/core/engine.events @@ -40,7 +40,7 @@ deallocated (freed) during processing. This does not necessarily correspond directly to ingress or egress packets on a given interface. -1,4|commited_counters: +1,5|commited_counters: The engine commits the latest counter values to externally visible shared memory. From 80915314610816ab34c1a5803100dd5a7923b661 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 30 Sep 2022 15:27:29 +0200 Subject: [PATCH 28/62] Revert "app.events: reduce tick/ticked event rate" This reverts commit 872eaab60543a84be2307131a435e2ad290645de. --- src/core/app.events | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/app.events b/src/core/app.events index 35be99956c..78c28d8dda 100644 --- a/src/core/app.events +++ b/src/core/app.events @@ -28,8 +28,8 @@ Entering app push() callback. Returned from app push() callback. -3,5|tick: +3,9|tick: Entering app tick() callback. -3,5|ticked: +3,9|ticked: Returned from app tick() callback. From 3e0fee74c630ab64858f551ef85cfdce3434cf1e Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 30 Sep 2022 15:43:55 +0200 Subject: [PATCH 29/62] core.app: fix bug introduced in merge with master --- src/core/app.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/app.lua b/src/core/app.lua index 8268e98080..c22597ddde 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -33,7 +33,7 @@ local named_program_root = shm.root .. "/" .. "by-name" program_name = false -- Auditlog state -local auditlog_enabled = false +auditlog_enabled = false function enable_auditlog () jit.auditlog(shm.path("audit.log")) auditlog_enabled = true From cb11a267e1b238a1551db0dc0220534f741c0edc Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 29 Sep 2022 16:52:07 +0200 Subject: [PATCH 30/62] app.events: reduce tick/ticked event rate This ensures there is always a preceding breath_start event to anchor breath lag for these events. --- src/core/app.events | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/app.events b/src/core/app.events index 78c28d8dda..35be99956c 100644 --- a/src/core/app.events +++ b/src/core/app.events @@ -28,8 +28,8 @@ Entering app push() callback. Returned from app push() callback. -3,9|tick: +3,5|tick: Entering app tick() callback. -3,9|ticked: +3,5|ticked: Returned from app tick() callback. From bfa3aa67bcd2cd2c42f260e930ddf03fbee4f6b5 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 30 Sep 2022 16:10:17 +0200 Subject: [PATCH 31/62] lib.ptree: fix bugs introduced by merge with master --- src/lib/ptree/worker.lua | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lib/ptree/worker.lua b/src/lib/ptree/worker.lua index 586d1f54c8..04d04dffcc 100644 --- a/src/lib/ptree/worker.lua +++ b/src/lib/ptree/worker.lua @@ -120,8 +120,10 @@ function Worker:main () self:handle_actions_from_manager() events.engine_started() timer.run() + events.polled_timers() end if not engine.busywait then engine.pace_breathing() end + engine.randomize_log_rate() until stop < engine.now() counter.commit() if not self.no_report then engine.report(self.report) end From 8c34646874b9dfd35f32dcab4250724e81e3674b Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Mon, 19 Dec 2022 17:37:54 +0100 Subject: [PATCH 32/62] timeline: cleaup merge --- src/core/app.lua | 7 ++++--- src/core/packet.lua | 12 ++++-------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/core/app.lua b/src/core/app.lua index c22597ddde..9c491504fd 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -374,8 +374,7 @@ function apply_config_actions (actions) configuration.links[linkspec] = nil end function ops.new_link (linkspec) - local link = link.new(linkspec) - link_table[linkspec] = link + link_table[linkspec] = link.new(linkspec) configuration.links[linkspec] = true end function ops.link_output (appname, linkname, linkspec) @@ -589,6 +588,9 @@ function main (options) breathe = latency:wrap_thunk(breathe, now) end + -- Setup vmprofile + setvmprofile("engine") + -- Enable tick enable_tick() @@ -808,7 +810,6 @@ function report_apps () app:report() end end - setvmprofile("engine") end function selftest () diff --git a/src/core/packet.lua b/src/core/packet.lua index 2c89105e60..db280aa013 100644 --- a/src/core/packet.lua +++ b/src/core/packet.lua @@ -290,7 +290,10 @@ end function account_free (p) counter.add(engine.frees) counter.add(engine.freebytes, p.length) - counter.add(engine.freebits, physical_bits(p)) + -- Calculate bits of physical capacity required for packet on 10GbE + -- Account for minimum data size and overhead of CRC and inter-packet gap + -- https://en.wikipedia.org/wiki/Ethernet_frame + counter.add(engine.freebits, (12 + 8 + math.max(p.length, 60) + 4) * 8) end local free_internal, account_free = @@ -304,13 +307,6 @@ function free (p) end end -function physical_bits (p) - -- Calculate bits of physical capacity required for packet on 10GbE - -- Account for minimum data size and overhead of CRC and inter-packet gap - -- https://en.wikipedia.org/wiki/Ethernet_frame - return (12 + 8 + math.max(p.length, 60) + 4) * 8 -end - -- Set packet data length. function resize (p, len) assert(len <= max_payload, "packet payload overflow") From 840e1e53245e891063b25c221f1845927930c159 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Mon, 19 Dec 2022 18:09:10 +0100 Subject: [PATCH 33/62] timeline: cleanup - remove link dropbytes - simplify breath stats - make stat collection helpers local --- src/core/app.events | 8 +++---- src/core/app.lua | 48 +++++++++++++++++++++--------------------- src/core/engine.events | 11 ++++------ src/core/link.h | 2 +- src/core/link.lua | 3 +-- 5 files changed, 34 insertions(+), 38 deletions(-) diff --git a/src/core/app.events b/src/core/app.events index 35be99956c..b8e8e0399c 100644 --- a/src/core/app.events +++ b/src/core/app.events @@ -14,17 +14,17 @@ The app has been reconfigured. (Returned from reconfig() callback.) The app has been stopped. (Returned from stop() callback.) -3,3|pull: inpackets inbytes outpackets outbytes droppackets dropbytes +3,3|pull: inpackets inbytes outpackets outbytes droppackets Entering app pull() callback. -3,3|pulled: inpackets inbytes outpackets outbytes droppackets dropbytes +3,3|pulled: inpackets inbytes outpackets outbytes droppackets Returned from app pull() callback. -3,3|push: inpackets inbytes outpackets outbytes droppackets dropbytes +3,3|push: inpackets inbytes outpackets outbytes droppackets Entering app push() callback. -3,3|pushed: inpackets inbytes outpackets outbytes droppackets dropbytes +3,3|pushed: inpackets inbytes outpackets outbytes droppackets Returned from app push() callback. diff --git a/src/core/app.lua b/src/core/app.lua index 9b3ffa9c75..276cdc63b8 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -642,11 +642,30 @@ function pace_breathing () end end +local function enginestats () + local breaths = counter.read(breaths) + local frees = counter.read(frees) + local freebytes = counter.read(freebytes) + local freebits = counter.read(freebits) + return breaths, frees, freebytes, freebits +end + +local function linkstats (app) + local inp, inb, outp, outb, dropp, dropb = 0, 0, 0, 0, 0, 0 + for i = 1, #app.input do + inp = inp + tonumber(counter.read(app.input[i].stats.rxpackets)) + inb = inb + tonumber(counter.read(app.input[i].stats.rxbytes)) + end + for i = 1, #app.output do + outp = outp + tonumber(counter.read(app.output[i].stats.txpackets)) + outb = outb + tonumber(counter.read(app.output[i].stats.txbytes)) + dropp = dropp + tonumber(counter.read(app.output[i].stats.txdrop)) + end + return inp, inb, outp, outb, dropp +end + function breathe () - local freed_packets0 = counter.read(frees) - local freed_bytes0 = counter.read(freebytes) - events.breath_start(counter.read(breaths), freed_packets0, freed_bytes0, - counter.read(freebits)) + events.breath_start(enginestats()) running = true monotonic_now = C.get_monotonic_time() events.got_monotonic_time(C.get_time_ns()) @@ -702,11 +721,7 @@ function breathe () events.breath_ticked() end setvmprofile("engine") - local freed - local freed_packets = counter.read(frees) - freed_packets0 - local freed_bytes = (counter.read(freebytes) - freed_bytes0) - local freed_bytes_per_packet = freed_bytes / math.max(tonumber(freed_packets), 1) - events.breath_end(counter.read(breaths), freed_packets, freed_bytes_per_packet) + events.breath_end(enginestats()) counter.add(breaths) -- Commit counters at a reasonable frequency if counter.read(breaths) % 100 == 0 then @@ -716,21 +731,6 @@ function breathe () running = false end -function linkstats (app) - local inp, inb, outp, outb, dropp, dropb = 0, 0, 0, 0, 0, 0 - for i = 1, #app.input do - inp = inp + tonumber(counter.read(app.input[i].stats.rxpackets)) - inb = inb + tonumber(counter.read(app.input[i].stats.rxbytes)) - end - for i = 1, #app.output do - outp = outp + tonumber(counter.read(app.output[i].stats.txpackets)) - outb = outb + tonumber(counter.read(app.output[i].stats.txbytes)) - dropp = dropp + tonumber(counter.read(app.output[i].stats.txdrop)) - dropb = dropb + tonumber(counter.read(app.output[i].stats.txdropbytes)) - end - return inp, inb, outp, outb, dropp, dropb -end - function report (options) if not options or options.showload then report_load() diff --git a/src/core/engine.events b/src/core/engine.events index ad8eb683e3..c9c59e2cd6 100644 --- a/src/core/engine.events +++ b/src/core/engine.events @@ -29,15 +29,12 @@ The engine has "pushed" packets one step through the processing network. The engine has executed "tick" methods. -1,5|breath_end: breath npackets bpp +1,5|breath_end: breath totalpackets totalbytes totaletherbits The engine completes an iteration of the event loop (a "breath.") -'packets' is the total number of packets processed during the breath. -'bpp' is the average number bytes per packet. - -Note: 'packets' is an internal measure of how many packets were -deallocated (freed) during processing. This does not necessarily -correspond directly to ingress or egress packets on a given interface. +The total count of packets, bytes, and bits (including layer-1 +ethernet overhead) that the engine has processed are included. These +can be used to track the rate of traffic. 1,5|commited_counters: diff --git a/src/core/link.h b/src/core/link.h index 415c768b2c..44f9e7eddb 100644 --- a/src/core/link.h +++ b/src/core/link.h @@ -9,7 +9,7 @@ struct link { // http://en.wikipedia.org/wiki/Circular_buffer struct packet *packets[LINK_RING_SIZE]; struct { - struct counter *dtime, *txbytes, *rxbytes, *txpackets, *rxpackets, *txdrop, *txdropbytes; + struct counter *dtime, *txbytes, *rxbytes, *txpackets, *rxpackets, *txdrop; } stats; // Two cursors: // read: the next element to be read diff --git a/src/core/link.lua b/src/core/link.lua index 4dddf6ed7c..34cab06507 100644 --- a/src/core/link.lua +++ b/src/core/link.lua @@ -23,7 +23,7 @@ local size = C.LINK_RING_SIZE -- NB: Huge slow-down if this is not local max = C.LINK_MAX_PACKETS local provided_counters = { - "dtime", "rxpackets", "rxbytes", "txpackets", "txbytes", "txdrop", "txdropbytes" + "dtime", "rxpackets", "rxbytes", "txpackets", "txbytes", "txdrop" } function new (name) @@ -63,7 +63,6 @@ function transmit (r, p) -- assert(p) if full(r) then counter.add(r.stats.txdrop) - counter.add(r.stats.txdropbytes, p.length) packet.free(p) else r.packets[r.write] = p From db3c5cbcb7f1595be7873fd0ccb86de6865ef6a4 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Mon, 19 Dec 2022 18:18:22 +0100 Subject: [PATCH 34/62] engine: add timeline_enabled toggle (on by default) --- src/core/app.lua | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/core/app.lua b/src/core/app.lua index 276cdc63b8..f200da3bb9 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -40,6 +40,7 @@ function enable_auditlog () end -- Timeline event log +local timeline_enabled = true local timeline_log, events -- initialized on demand function timeline () if timeline_log == nil then @@ -51,6 +52,11 @@ function timeline () end function randomize_log_rate () + -- Bail if timeline logging is disabled. + -- Effectively this disables all events with rate < 9. + if not timeline_enabled then + return + end -- Randomize the log rate. Enable each rate in 5x more breaths -- than the rate below by randomly picking from log5() distribution. -- Goal is ballpark 1000 messages per second (~15min for 1M entries.) From 7b00bcfe2be24bf2d5f5b6f5b384d9ab2a8395f3 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Tue, 27 Dec 2022 18:25:05 +0100 Subject: [PATCH 35/62] Revert "engine: add timeline_enabled toggle (on by default)" This reverts commit db3c5cbcb7f1595be7873fd0ccb86de6865ef6a4. --- src/core/app.lua | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/core/app.lua b/src/core/app.lua index f200da3bb9..276cdc63b8 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -40,7 +40,6 @@ function enable_auditlog () end -- Timeline event log -local timeline_enabled = true local timeline_log, events -- initialized on demand function timeline () if timeline_log == nil then @@ -52,11 +51,6 @@ function timeline () end function randomize_log_rate () - -- Bail if timeline logging is disabled. - -- Effectively this disables all events with rate < 9. - if not timeline_enabled then - return - end -- Randomize the log rate. Enable each rate in 5x more breaths -- than the rate below by randomly picking from log5() distribution. -- Goal is ballpark 1000 messages per second (~15min for 1M entries.) From 9896b4b5b20d8e98c3512471b28a80c002bd81a9 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Tue, 27 Dec 2022 18:29:04 +0100 Subject: [PATCH 36/62] timeline: disable by default, add lib.scheduling toggle --- src/core/app.lua | 2 ++ src/core/timeline.dasl | 4 ++-- src/lib/scheduling.lua | 6 ++++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/core/app.lua b/src/core/app.lua index 276cdc63b8..4182d8de4a 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -51,6 +51,8 @@ function timeline () end function randomize_log_rate () + -- Bail if timeline logging is not enabled. + if not timeline_log then return end -- Randomize the log rate. Enable each rate in 5x more breaths -- than the rate below by randomly picking from log5() distribution. -- Goal is ballpark 1000 messages per second (~15min for 1M entries.) diff --git a/src/core/timeline.dasl b/src/core/timeline.dasl index 76e4a9b97b..c32473a861 100644 --- a/src/core/timeline.dasl +++ b/src/core/timeline.dasl @@ -10,8 +10,8 @@ local S = require("syscall") local shm = require("core.shm") local lib = require("core.lib") --- Set to false to disable timeline logging -enabled = true +-- Set to true to enable timeline logging +enabled = false -- Load a set of events for logging onto a timeline. -- Returns a set of logging functions. diff --git a/src/lib/scheduling.lua b/src/lib/scheduling.lua index e3a4769e01..8c6453d25e 100644 --- a/src/lib/scheduling.lua +++ b/src/lib/scheduling.lua @@ -6,6 +6,7 @@ local S = require("syscall") local lib = require("core.lib") local numa = require("lib.numa") local ingress_drop_monitor = require("lib.timers.ingress_drop_monitor") +local timeline = require("core.timeline") local function fatal (msg) print(msg) @@ -25,6 +26,7 @@ local scheduling_opts = { ingress_drop_monitor = {}, -- Action string: one of 'flush' or 'warn'. profile = {default=true}, -- Boolean. busywait = {default=true}, -- Boolean. + timeline = {}, -- Boolean. Enable timeline logging? enable_xdp = {}, -- Enable Snabb XDP mode (see apps.xdp.xdp). eval = {} -- String. } @@ -63,6 +65,10 @@ function sched_apply.busywait (busywait) engine.busywait = busywait end +function sched_apply.timeline (enabled) + timeline.enabled = enabled +end + function sched_apply.enable_xdp (opt) if opt then require('apps.xdp.xdp').snabb_enable_xdp(opt) end end From 21e2017a7a8b782bed7841a4e3f4a1bbf5e34a38 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 30 Dec 2022 15:13:01 +0100 Subject: [PATCH 37/62] timeline: rework group freelist events remove obsolete apps/interlink/freelist_instrument.lua --- src/apps/interlink/freelist_instrument.lua | 40 ---------------------- src/apps/interlink/test_sink.lua | 20 ++--------- src/apps/interlink/test_source.lua | 29 ++++------------ src/apps/interlink/wait_test.snabb | 7 ++-- src/core/packet.lua | 18 ++++------ 5 files changed, 18 insertions(+), 96 deletions(-) delete mode 100644 src/apps/interlink/freelist_instrument.lua diff --git a/src/apps/interlink/freelist_instrument.lua b/src/apps/interlink/freelist_instrument.lua deleted file mode 100644 index 4070a1063d..0000000000 --- a/src/apps/interlink/freelist_instrument.lua +++ /dev/null @@ -1,40 +0,0 @@ --- Use of this source code is governed by the Apache 2.0 license; see COPYING. - -module(...,package.seeall) - -local histogram = require("core.histogram") -local tsc = require("lib.tsc") - -function instrument_freelist () - local ts = tsc.new() - local rebalance_latency = histogram.create('engine/rebalance_latency.histogram', 1, 100e6) - local reclaim_latency = histogram.create('engine/reclaim_latency.histogram', 1, 100e6) - - local rebalance_step, reclaim_step = packet.rebalance_step, packet.reclaim_step - packet.rebalance_step = function () - local start = ts:stamp() - rebalance_step() - rebalance_latency:add(tonumber(ts:to_ns(ts:stamp()-start))) - end - packet.reclaim_step = function () - local start = ts:stamp() - reclaim_step() - reclaim_latency:add(tonumber(ts:to_ns(ts:stamp()-start))) - end - - return rebalance_latency, reclaim_latency -end - -function histogram_csv_header (out) - out = out or io.stdout - out:write("histogram,lo,hi,count\n") -end - -function histogram_csv (histogram, name, out) - out = out or io.stdout - name = name or 'untitled' - for count, lo, hi in histogram:iterate() do - out:write(("%s,%f,%f,%d\n"):format(name, lo, hi, tonumber(count))) - out:flush() - end -end \ No newline at end of file diff --git a/src/apps/interlink/test_sink.lua b/src/apps/interlink/test_sink.lua index d72f072f5a..c6911599ca 100644 --- a/src/apps/interlink/test_sink.lua +++ b/src/apps/interlink/test_sink.lua @@ -13,26 +13,10 @@ function configure (c, name) config.link(c, name..".output -> sink.input") end -function start (name, duration) +function start (name, duration, core) + numa.bind_to_cpu(core, 'skip') local c = config.new() configure(c, name) engine.configure(c) engine.main{duration=duration} end - -local instr = require("apps.interlink.freelist_instrument") - -function start_instrument (name, duration, core) - numa.bind_to_cpu(core, 'skip') - local rebalance_latency = instr.instrument_freelist() - start(name, duration) - instr.histogram_csv(rebalance_latency, "rebalance") - local min, avg, max = rebalance_latency:summarize() - io.stderr:write(("(%d) rebalance latency (ns) min:%16s avg:%16s max:%16s\n") - :format(core, - lib.comma_value(math.floor(min)), - lib.comma_value(math.floor(avg)), - lib.comma_value(math.floor(max)))) - io.stderr:flush() -end - diff --git a/src/apps/interlink/test_source.lua b/src/apps/interlink/test_source.lua index ecfdd664fc..3ae31f3d86 100644 --- a/src/apps/interlink/test_source.lua +++ b/src/apps/interlink/test_source.lua @@ -20,13 +20,19 @@ function start (name, duration) engine.main{duration=duration} end -function startn (name, duration, n) +function startn (name, duration, n, core) + numa.bind_to_cpu(core, 'skip') local c = config.new() for i=1,n do configure(c, name..i) end engine.configure(c) engine.main{duration=duration} + local txpackets = txpackets() + engine.main{duration=1, no_report=true} + io.stderr:write(("%.3f Mpps\n"):format(txpackets / 1e6 / duration)) + io.stderr:flush() + --engine.report_links() end function txpackets () @@ -36,24 +42,3 @@ function txpackets () end return txpackets end - -local instr = require("apps.interlink.freelist_instrument") - -function startn_instrument (name, duration, n, core) - numa.bind_to_cpu(core, 'skip') - local _, reclaim_latency = instr.instrument_freelist() - startn(name, duration, n) - local txpackets = txpackets() - instr.histogram_csv(reclaim_latency, "reclaim") - local min, avg, max = reclaim_latency:summarize() - engine.main{duration=1, no_report=true} - io.stderr:write(("(%d) reclaim latency (ns) min:%16s avg:%16s max:%16s\n") - :format(core, - lib.comma_value(math.floor(min)), - lib.comma_value(math.floor(avg)), - lib.comma_value(math.floor(max)))) - io.stderr:write(("%.3f Mpps\n"):format(txpackets / 1e6 / duration)) - io.stderr:flush() - - --engine.report_links() -end \ No newline at end of file diff --git a/src/apps/interlink/wait_test.snabb b/src/apps/interlink/wait_test.snabb index ebe0cc0d94..1aebea3bf2 100755 --- a/src/apps/interlink/wait_test.snabb +++ b/src/apps/interlink/wait_test.snabb @@ -17,15 +17,12 @@ for core in pairs(CPUS) do table.sort(cores) end -require("apps.interlink.freelist_instrument").histogram_csv_header() -io.stdout:flush() - for i=1,NCONSUMERS do - worker.start("sink"..i, ([[require("apps.interlink.test_sink").start_instrument(%q, %d, %s)]]) + worker.start("sink"..i, ([[require("apps.interlink.test_sink").start(%q, %d, %s)]]) :format("test"..i, DURATION, cores[1+i])) end -worker.start("source", ([[require("apps.interlink.test_source").startn_instrument(%q, %d, %d, %s)]]) +worker.start("source", ([[require("apps.interlink.test_source").startn(%q, %d, %d, %s)]]) :format("test", DURATION, NCONSUMERS, assert(cores[1]))) engine.main{done = function () diff --git a/src/core/packet.lua b/src/core/packet.lua index 634a3c3564..523200c311 100644 --- a/src/core/packet.lua +++ b/src/core/packet.lua @@ -135,40 +135,34 @@ end local group_fl_chunksize = group_freelist.chunksize -- Return borrowed packets to group freelist. -function rebalance_step () - events.group_freelist_wait() +local function rebalance_step () local chunk, seq = group_freelist.start_add(group_fl) if chunk then - events.group_freelist_locked() chunk.nfree = group_fl_chunksize for i=0, chunk.nfree-1 do chunk.list[i] = freelist_remove(packets_fl) end - events.group_freelist_released(chunk.nfree) group_freelist.finish(chunk, seq) - events.group_freelist_unlocked() else error("group freelist overflow") end + events.group_freelist_released(group_fl_chunksize) end -function need_rebalance () +local function need_rebalance () return freelist_nfree(packets_fl) >= (packets_allocated + group_fl_chunksize) end -- Reclaim packets from group freelist. -function reclaim_step () - events.group_freelist_wait() +local function reclaim_step () local chunk, seq = group_freelist.start_remove(group_fl) if chunk then - events.group_freelist_locked() for i=0, chunk.nfree-1 do freelist_add(packets_fl, chunk.list[i]) end - events.group_freelist_reclaimed(chunk.nfree) group_freelist.finish(chunk, seq) - events.group_freelist_unlocked() end + events.group_freelist_reclaimed(group_fl_chunksize) end -- Register struct freelist as an abstract SHM object type so that the @@ -181,6 +175,7 @@ end}) -- Return an empty packet. function allocate () if freelist_nfree(packets_fl) == 0 then + events.freelist_empty() if group_fl then reclaim_step() end @@ -317,6 +312,7 @@ function free (p) account_free(p) free_internal(p) if group_fl and need_rebalance() then + events.freelist_need_rebalance() rebalance_step() end end From b7a8af9fea150eda7834edf307fb3633a59e1203 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 30 Dec 2022 15:13:48 +0100 Subject: [PATCH 38/62] timeline: remove unused arguments to tick events --- src/core/app.lua | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/app.lua b/src/core/app.lua index 4182d8de4a..1ec8c0c8cd 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -715,10 +715,10 @@ function breathe () -- Tick: call tick() methods at tick_Hz frequency if tick() then for _, app in ipairs(breathe_ticks) do - app_events[app].tick(linkstats(app)) setvmprofile(app.zone) + app_events[app].tick() app:tick() - app_events[app].ticked(linkstats(app)) + app_events[app].ticked() end events.breath_ticked() end From 068b4d58d49aedf9390ddb348a327ee5d4d72c02 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 30 Dec 2022 15:14:48 +0100 Subject: [PATCH 39/62] timeline: remove linkstats for push events --- src/core/app.events | 8 ++++---- src/core/app.lua | 34 ++++++---------------------------- 2 files changed, 10 insertions(+), 32 deletions(-) diff --git a/src/core/app.events b/src/core/app.events index b8e8e0399c..be3e6d4986 100644 --- a/src/core/app.events +++ b/src/core/app.events @@ -14,17 +14,17 @@ The app has been reconfigured. (Returned from reconfig() callback.) The app has been stopped. (Returned from stop() callback.) -3,3|pull: inpackets inbytes outpackets outbytes droppackets +3,3|pull: Entering app pull() callback. -3,3|pulled: inpackets inbytes outpackets outbytes droppackets +3,3|pulled: Returned from app pull() callback. -3,3|push: inpackets inbytes outpackets outbytes droppackets +3,3|push: Entering app push() callback. -3,3|pushed: inpackets inbytes outpackets outbytes droppackets +3,3|pushed: Returned from app push() callback. diff --git a/src/core/app.lua b/src/core/app.lua index 1ec8c0c8cd..19dbe39a30 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -652,20 +652,6 @@ local function enginestats () return breaths, frees, freebytes, freebits end -local function linkstats (app) - local inp, inb, outp, outb, dropp, dropb = 0, 0, 0, 0, 0, 0 - for i = 1, #app.input do - inp = inp + tonumber(counter.read(app.input[i].stats.rxpackets)) - inb = inb + tonumber(counter.read(app.input[i].stats.rxbytes)) - end - for i = 1, #app.output do - outp = outp + tonumber(counter.read(app.output[i].stats.txpackets)) - outb = outb + tonumber(counter.read(app.output[i].stats.txbytes)) - dropp = dropp + tonumber(counter.read(app.output[i].stats.txdrop)) - end - return inp, inb, outp, outb, dropp -end - function breathe () events.breath_start(enginestats()) running = true @@ -678,13 +664,9 @@ function breathe () if i > #breathe_pull_order then goto PULL_EXIT else local app = breathe_pull_order[i] setvmprofile(app.zone) - if timeline_mod.rate(timeline_log) <= 3 then - app_events[app].pull(linkstats(app)) - app:pull() - app_events[app].pulled(linkstats(app)) - else - app:pull() - end + app_events[app].pull() + app:pull() + app_events[app].pulled() end i = i+1 goto PULL_LOOP @@ -699,13 +681,9 @@ function breathe () local spec = breathe_push_order[i] local app, push, link = spec.app, spec.push, spec.link setvmprofile(app.zone) - if timeline_mod.rate(timeline_log) <= 3 then - app_events[app].push(linkstats(app)) - push(app, link) - app_events[app].pushed(linkstats(app)) - else - push(app, link) - end + app_events[app].push() + push(app, link) + app_events[app].pushed() end i = i+1 goto PUSH_LOOP From f41f7a1de69bd13e9399ad76d1b1065ceebecbb2 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 30 Dec 2022 15:15:15 +0100 Subject: [PATCH 40/62] timeline: fixup rework group freelist events --- src/core/packet.events | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/core/packet.events b/src/core/packet.events index 752a3f25ee..8f502e61cb 100644 --- a/src/core/packet.events +++ b/src/core/packet.events @@ -12,11 +12,12 @@ A packet has been freed to the packet freelist. 'length' is the byte size of the packet. -9,4|group_freelist_wait: -The process is waiting to acquire the group freelist’s lock. +9,4|freelist_empty: +The process is trying to allocate a packet but the freelist is empty. -9,4|group_freelist_locked: -The process has acquired the group freelist’s lock. +9,4|freelist_need_rebalance: +The process freed a packet. There are now too many packets on the +local freelist which need to be expelled to the group freelist. 9,4|group_freelist_released: packets The packet freelist was rebalanced with the group freelist. @@ -27,6 +28,3 @@ The packet freelist was rebalanced with the group freelist. The packet freelist was refilled from the group freelist. 'packets' is the number of packets reclaimed from the group freelist. - -9,4|group_freelist_unlocked: -The process has released the group freelist’s lock. From bcca06e6214f002a52d93464459a684a5d1e73f3 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 9 Feb 2023 13:57:26 +0100 Subject: [PATCH 41/62] core.sync: add load primitive --- src/core/sync.dasl | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/core/sync.dasl b/src/core/sync.dasl index ac3823c5d6..9e3a08c52e 100644 --- a/src/core/sync.dasl +++ b/src/core/sync.dasl @@ -14,6 +14,14 @@ local ffi = require("ffi") |.define arg2, esi |.define arg3, edx +-- load(src) -> uint32_t +-- Load integer from src. +local load_t = "uint32_t (*) (uint32_t *)" +local function load (Dst) + | mov eax, [arg1] + | ret +end + -- cas(dst, old, new) -> true|false -- Atomic compare-and-swap; compare old with value pointed to by dst. If -- equal, stores new at dst and returns true. Else, returns false. @@ -54,6 +62,9 @@ end local function generate (Dst) Dst:growpc(16) | .align 16 + |->load: + || load(Dst) + | .align 16 |->cas: || cas(Dst) | .align 16 @@ -75,12 +86,24 @@ end local entry = dasm.globals(globals, globalnames) local sync = { + load = ffi.cast(load_t, entry.load), cas = ffi.cast(cas_t, entry.cas), lock = ffi.cast(lock_t, entry.lock), unlock = ffi.cast(unlock_t, entry.unlock) } sync.selftest = function () + -- load + local box = ffi.new( + "struct { uint32_t pad1, state[1], pad2; } __attribute__((packed))" + ) + local state = sync.load(box.state) + assert(state == 0) + for i=1,100 do + box.state[0] = state + 1 + state = sync.load(box.state) + assert(state == i) + end -- cas local box = ffi.new( "struct { unsigned int pad1, state[1], pad2; } __attribute__((packed))" From 45d3ac768bcb3d394b986062a3a474715f2eb817 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 9 Feb 2023 14:03:19 +0100 Subject: [PATCH 42/62] core.group_freelist: fix a deadlock chunk.sequence was not reloaded on further iterations of the loop. Use sync.load() to explicitly reload lock-free states. Panic if we fail to progress after 1000 iterations. --- src/core/group_freelist.lua | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/core/group_freelist.lua b/src/core/group_freelist.lua index 2d233a99b3..6963b3cb4f 100644 --- a/src/core/group_freelist.lua +++ b/src/core/group_freelist.lua @@ -70,18 +70,18 @@ end function freelist_open (name, readonly) local fl = shm.open(name, "struct group_freelist", 'read-only', 1) - waitfor(function () return fl.state[0] == READY end) + waitfor(function () return sync.load(fl.state) == READY end) local size = fl.size shm.unmap(fl) return shm.open(name, "struct group_freelist", readonly, size) end function start_add (fl) - local pos = fl.enqueue_pos[0] + local pos = sync.load(fl.enqueue_pos) local mask = fl.enqueue_mask - while true do + for _=1,1000 do local chunk = fl.chunk[band(pos, mask)] - local seq = chunk.sequence[0] + local seq = sync.load(chunk.sequence) local dif = seq - pos if dif == 0 then if sync.cas(fl.enqueue_pos, pos, pos+1) then @@ -90,18 +90,18 @@ function start_add (fl) elseif dif < 0 then return else - compiler_barrier() -- ensure fresh load of enqueue_pos - pos = fl.enqueue_pos[0] + pos = sync.load(fl.enqueue_pos) end end + error("BUG") end function start_remove (fl) - local pos = fl.dequeue_pos[0] + local pos = sync.load(fl.dequeue_pos) local mask = fl.dequeue_mask - while true do + for _=1,1000 do local chunk = fl.chunk[band(pos, mask)] - local seq = chunk.sequence[0] + local seq = sync.load(chunk.sequence) local dif = seq - (pos+1) if dif == 0 then if sync.cas(fl.dequeue_pos, pos, pos+1) then @@ -110,10 +110,10 @@ function start_remove (fl) elseif dif < 0 then return else - compiler_barrier() -- ensure fresh load of dequeue_pos - pos = fl.dequeue_pos[0] + pos = sync.load(fl.dequeue_pos) end end + error("bug") end function finish (chunk, seq) From b916131a7e2e67dfa5267c370f11df235ed5dc32 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 9 Feb 2023 14:50:56 +0100 Subject: [PATCH 43/62] group_freelist: revert panic --- src/core/group_freelist.lua | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/core/group_freelist.lua b/src/core/group_freelist.lua index 6963b3cb4f..a52e67a48c 100644 --- a/src/core/group_freelist.lua +++ b/src/core/group_freelist.lua @@ -79,7 +79,7 @@ end function start_add (fl) local pos = sync.load(fl.enqueue_pos) local mask = fl.enqueue_mask - for _=1,1000 do + while true do local chunk = fl.chunk[band(pos, mask)] local seq = sync.load(chunk.sequence) local dif = seq - pos @@ -93,13 +93,12 @@ function start_add (fl) pos = sync.load(fl.enqueue_pos) end end - error("BUG") end function start_remove (fl) local pos = sync.load(fl.dequeue_pos) local mask = fl.dequeue_mask - for _=1,1000 do + while true do local chunk = fl.chunk[band(pos, mask)] local seq = sync.load(chunk.sequence) local dif = seq - (pos+1) @@ -113,7 +112,6 @@ function start_remove (fl) pos = sync.load(fl.dequeue_pos) end end - error("bug") end function finish (chunk, seq) From f8c977c81f29fb6ce2afffb3c342e1faa5722c5c Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 9 Feb 2023 15:06:56 +0100 Subject: [PATCH 44/62] group_freelist: add deadlock detection --- src/core/group_freelist.lua | 57 ++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/src/core/group_freelist.lua b/src/core/group_freelist.lua index a52e67a48c..11327c8fdc 100644 --- a/src/core/group_freelist.lua +++ b/src/core/group_freelist.lua @@ -76,41 +76,58 @@ function freelist_open (name, readonly) return shm.open(name, "struct group_freelist", readonly, size) end +local function deadlock_timeout(deadline, timeout, message) + local now = ffi.C.get_monotonic_time() + deadline = deadline or (now + timeout) + assert(now < deadline, message) + return deadline +end + function start_add (fl) + local deadline local pos = sync.load(fl.enqueue_pos) local mask = fl.enqueue_mask while true do - local chunk = fl.chunk[band(pos, mask)] - local seq = sync.load(chunk.sequence) - local dif = seq - pos - if dif == 0 then - if sync.cas(fl.enqueue_pos, pos, pos+1) then - return chunk, pos+1 + for _=1,100000 do + local chunk = fl.chunk[band(pos, mask)] + local seq = sync.load(chunk.sequence) + local dif = seq - pos + if dif == 0 then + if sync.cas(fl.enqueue_pos, pos, pos+1) then + return chunk, pos+1 + end + elseif dif < 0 then + return + else + pos = sync.load(fl.enqueue_pos) end - elseif dif < 0 then - return - else - pos = sync.load(fl.enqueue_pos) end + deadline = deadlock_timeout(deadline, 5, + "deadlock in group_freelist.start_add") end end function start_remove (fl) + local deadline local pos = sync.load(fl.dequeue_pos) local mask = fl.dequeue_mask while true do - local chunk = fl.chunk[band(pos, mask)] - local seq = sync.load(chunk.sequence) - local dif = seq - (pos+1) - if dif == 0 then - if sync.cas(fl.dequeue_pos, pos, pos+1) then - return chunk, pos+mask+1 + for _=1,100000 do + local chunk = fl.chunk[band(pos, mask)] + local seq = sync.load(chunk.sequence) + local dif = seq - (pos+1) + if dif == 0 then + if sync.cas(fl.dequeue_pos, pos, pos+1) then + return chunk, pos+mask+1 + end + elseif dif < 0 then + return + else + pos = sync.load(fl.dequeue_pos) end - elseif dif < 0 then - return - else - pos = sync.load(fl.dequeue_pos) end + deadline = deadlock_timeout(deadline, 5, + "deadlock in group_freelist.start_remove") end end From 2fbc94b33e969491707ad808f54ca605354eaaba Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Fri, 24 Feb 2023 17:21:54 +0100 Subject: [PATCH 45/62] lib.poptrie: fix bug where address bits where interpreted in reverse order --- src/lib/poptrie.lua | 349 +++++++++++++++++++++++++++--------- src/lib/poptrie_lookup.dasl | 30 ++-- 2 files changed, 283 insertions(+), 96 deletions(-) diff --git a/src/lib/poptrie.lua b/src/lib/poptrie.lua index 2367eca46e..109a89072c 100644 --- a/src/lib/poptrie.lua +++ b/src/lib/poptrie.lua @@ -103,17 +103,23 @@ end -- Extract bits at offset -- key=uint8_t[?] function extract (key, offset, length) - local bits, read = 0, 0 - local byte = math.floor(offset/8) - while read < length do - offset = math.max(offset - byte*8, 0) - local nbits = math.min(length - read, 8 - offset) - local x = band(rshift(key[byte], offset), lshift(1, nbits) - 1) - bits = bor(bits, lshift(x, read)) - read = read + nbits - byte = math.min(byte + 1, ffi.sizeof(key) - 1) - end - return bits + local bits = 0 + local skip = math.floor(offset/8) + offset = offset - skip*8 + for i = skip, 15 do + for j = 7, 0, -1 do + if offset == 0 then + local bit = rshift(band(key[i], lshift(1, j)), j) + length = length - 1 + bits = bor(bits, lshift(bit, length)) + if length == 0 then + return bits + end + else + offset = offset - 1 + end + end + end end -- Add key/value pair to RIB (intermediary binary trie) @@ -153,6 +159,7 @@ end -- Map f over keys of length in RIB function Poptrie:rib_map (f, length, root) + local bit = lshift(1, length-1) local function map (node, offset, key, value) value = (node and node.value) or value local left, right = node and node.left, node and node.right @@ -160,7 +167,7 @@ function Poptrie:rib_map (f, length, root) f(key, value, (left or right) and node) else map(left, offset + 1, key, value) - map(right, offset + 1, bor(key, lshift(1, offset)), value) + map(right, offset + 1, bor(key, rshift(bit, offset)), value) end end return map(root or self.rib, 0, 0) @@ -384,80 +391,250 @@ function selftest () for i = 1, 16 do bs[i] = math.random(256) - 1 end return s(unpack(bs)) end - -- To test direct pointing: direct_pointing = true - local t = new{} - -- Tets building empty RIB - t:build() - -- Test RIB - t:add(s(0x00), 8, 1) -- 00000000 - t:add(s(0x0F), 8, 2) -- 00001111 - t:add(s(0x07), 4, 3) -- 0111 - t:add(s(0xFF), 8, 4) -- 11111111 - t:add(s(0xFF), 5, 5) -- 11111 - -- 111111111111111111111111111111111111111111111111111111111111111111110000 - t:add(s(0xF0,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x0F), 72, 6) - local v, n = t:rib_lookup(s(0x0), 1) - assert(not v and n.left and not n.right) - local v, n = t:rib_lookup(s(0x00), 8) - assert(v == 1 and not n) - local v, n = t:rib_lookup(s(0x07), 3) - assert(not v and (n.left and n.right)) - local v, n = t:rib_lookup(s(0x0), 1, n) - assert(v == 3 and not n) - local v, n = t:rib_lookup(s(0xFF), 5) - assert(v == 5 and (not n.left) and n.right) - local v, n = t:rib_lookup(s(0x0F), 3, n) - assert(v == 4 and not n) - local v, n = t:rib_lookup(s(0x3F), 8) - assert(v == 5 and not n) - local v, n = t:rib_lookup(s(0xF0,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x0F), 72) - assert(v == 6 and not n) - -- Test FIB - t:build() - if debug then t:fib_info() end - assert(t:lookup(s(0x00)) == 1) -- 00000000 - assert(t:lookup(s(0x03)) == 0) -- 00000011 - assert(t:lookup(s(0x07)) == 3) -- 00000111 - assert(t:lookup(s(0x0F)) == 2) -- 00001111 - assert(t:lookup(s(0x1F)) == 5) -- 00011111 - assert(t:lookup(s(0x3F)) == 5) -- 00111111 - assert(t:lookup(s(0xFF)) == 4) -- 11111111 - assert(t:lookup(s(0xF0,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x0F)) == 6) - assert(t:lookup32(s(0x00)) == 1) - assert(t:lookup32(s(0x03)) == 0) - assert(t:lookup32(s(0x07)) == 3) - assert(t:lookup32(s(0x0F)) == 2) - assert(t:lookup32(s(0x1F)) == 5) - assert(t:lookup32(s(0x3F)) == 5) - assert(t:lookup32(s(0xFF)) == 4) - assert(t:lookup64(s(0x00)) == 1) - assert(t:lookup64(s(0x03)) == 0) - assert(t:lookup64(s(0x07)) == 3) - assert(t:lookup64(s(0x0F)) == 2) - assert(t:lookup64(s(0x1F)) == 5) - assert(t:lookup64(s(0x3F)) == 5) - assert(t:lookup64(s(0xFF)) == 4) - assert(t:lookup128(s(0x00)) == 1) - assert(t:lookup128(s(0x03)) == 0) - assert(t:lookup128(s(0x07)) == 3) - assert(t:lookup128(s(0x0F)) == 2) - assert(t:lookup128(s(0x1F)) == 5) - assert(t:lookup128(s(0x3F)) == 5) - assert(t:lookup128(s(0xFF)) == 4) - assert(t:lookup128(s(0xF0,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x0F)) == 6) - -- Test 32-bit leaves - local t = new{direct_pointing=true, s=8, leaf_t=ffi.typeof("uint32_t")} - t:add(s(0xff,0x00), 9, 0xffffffff) - t:add(s(0xff,0x01), 9, 0xfffffffe) - t:build() - assert(t:lookup(s(0xff,0x00)) == 0xffffffff) - assert(t:lookup(s(0xff,0x01)) == 0xfffffffe) - assert(t:lookup32(s(0xff,0x00)) == 0xffffffff) - assert(t:lookup32(s(0xff,0x01)) == 0xfffffffe) - assert(t:lookup64(s(0xff,0x00)) == 0xffffffff) - assert(t:lookup64(s(0xff,0x01)) == 0xfffffffe) - assert(t:lookup128(s(0xff,0x00)) == 0xffffffff) - assert(t:lookup128(s(0xff,0x01)) == 0xfffffffe) + + for _, leaf_t in ipairs{"uint16_t", "uint32_t"} do + for _, direct_pointing in ipairs{false, true} do + print("unit tests with:") + print("", "direct_pointing", direct_pointing) + print("", "leaf_t", leaf_t) + + local t = new{direct_pointing=direct_pointing, leaf_t=ffi.typeof(leaf_t)} + t:add(s(128,178,0,0), 15, 559) + local v, n = t:rib_lookup(s(128,178,0,1), 32) + assert(t:rib_lookup(s(128,178,0,1), 32) == 559) + assert(t:rib_lookup(s(128,179,0,1), 32) == 559) + t:build() + assert(t:lookup(s(128,178,0,1)) == 559) + assert(t:lookup(s(128,179,0,1)) == 559) + assert(t:lookup32(s(128,178,0,1)) == 559) + assert(t:lookup32(s(128,179,0,1)) == 559) + assert(t:lookup(s(128,178,0,1)) == 559) + assert(t:lookup(s(128,179,0,1)) == 559) + assert(t:lookup64(s(128,178,0,1)) == 559) + assert(t:lookup64(s(128,179,0,1)) == 559) + assert(t:lookup(s(128,178,0,1)) == 559) + assert(t:lookup(s(128,179,0,1)) == 559) + assert(t:lookup128(s(128,178,0,1)) == 559) + assert(t:lookup128(s(128,179,0,1)) == 559) + + local t = new{direct_pointing=direct_pointing, leaf_t=ffi.typeof(leaf_t)} + t:add(s(128,0,0,0), 7, 7) + t:add(s(128,178,0,0), 15, 15) + t:add(s(128,178,154,160), 27, 27) + t:add(s(128,178,154,196), 31, 31) + t:build() + -- /7 + assert(t:rib_lookup(s(128,0,0,0), 32) == 7) + assert(t:rib_lookup(s(128,0,0,1), 32) == 7) + assert(t:rib_lookup(s(129,0,0,1), 32) == 7) + assert(t:rib_lookup(s(127,0,0,1), 32) == nil) + assert(t:rib_lookup(s(130,0,0,1), 32) == nil) + assert(t:lookup(s(128,0,0,0)) == 7) + assert(t:lookup(s(128,0,0,1)) == 7) + assert(t:lookup(s(129,0,0,1)) == 7) + assert(t:lookup(s(127,0,0,1)) == 0) + assert(t:lookup(s(130,0,0,1)) == 0) + assert(t:lookup32(s(128,0,0,0)) == 7) + assert(t:lookup32(s(128,0,0,1)) == 7) + assert(t:lookup32(s(129,0,0,1)) == 7) + assert(t:lookup32(s(127,0,0,1)) == 0) + assert(t:lookup32(s(130,0,0,1)) == 0) + assert(t:lookup64(s(128,0,0,0)) == 7) + assert(t:lookup64(s(128,0,0,1)) == 7) + assert(t:lookup64(s(129,0,0,1)) == 7) + assert(t:lookup64(s(127,0,0,1)) == 0) + assert(t:lookup64(s(130,0,0,1)) == 0) + assert(t:lookup128(s(128,0,0,0)) == 7) + assert(t:lookup128(s(128,0,0,1)) == 7) + assert(t:lookup128(s(129,0,0,1)) == 7) + assert(t:lookup128(s(127,0,0,1)) == 0) + assert(t:lookup128(s(130,0,0,1)) == 0) + -- /15 + assert(t:rib_lookup(s(128,178,0,0), 32) == 15) + assert(t:rib_lookup(s(128,178,0,1), 32) == 15) + assert(t:rib_lookup(s(128,179,0,1), 32) == 15) + assert(t:rib_lookup(s(128,177,0,1), 32) == 7) + assert(t:rib_lookup(s(128,180,0,1), 32) == 7) + assert(t:lookup(s(128,178,0,0)) == 15) + assert(t:lookup(s(128,178,0,1)) == 15) + assert(t:lookup(s(128,179,0,1)) == 15) + assert(t:lookup(s(128,177,0,1)) == 7) + assert(t:lookup(s(128,180,0,1)) == 7) + assert(t:lookup32(s(128,178,0,0)) == 15) + assert(t:lookup32(s(128,178,0,1)) == 15) + assert(t:lookup32(s(128,179,0,1)) == 15) + assert(t:lookup32(s(128,177,0,1)) == 7) + assert(t:lookup32(s(128,180,0,1)) == 7) + assert(t:lookup64(s(128,178,0,0)) == 15) + assert(t:lookup64(s(128,178,0,1)) == 15) + assert(t:lookup64(s(128,179,0,1)) == 15) + assert(t:lookup64(s(128,177,0,1)) == 7) + assert(t:lookup64(s(128,180,0,1)) == 7) + assert(t:lookup128(s(128,178,0,0)) == 15) + assert(t:lookup128(s(128,178,0,1)) == 15) + assert(t:lookup128(s(128,179,0,1)) == 15) + assert(t:lookup128(s(128,177,0,1)) == 7) + assert(t:lookup128(s(128,180,0,1)) == 7) + -- /27 + assert(t:rib_lookup(s(128,178,154,160), 32) == 27) + assert(t:rib_lookup(s(128,178,154,161), 32) == 27) + assert(t:rib_lookup(s(128,178,154,191), 32) == 27) + assert(t:rib_lookup(s(128,179,154,160), 32) == 15) + assert(t:rib_lookup(s(128,178,154,159), 32) == 15) + assert(t:lookup(s(128,178,154,160)) == 27) + assert(t:lookup(s(128,178,154,161)) == 27) + assert(t:lookup(s(128,178,154,191)) == 27) + assert(t:lookup(s(128,179,154,160)) == 15) + assert(t:lookup(s(128,178,154,159)) == 15) + assert(t:lookup32(s(128,178,154,160)) == 27) + assert(t:lookup32(s(128,178,154,161)) == 27) + assert(t:lookup32(s(128,178,154,191)) == 27) + assert(t:lookup32(s(128,179,154,160)) == 15) + assert(t:lookup32(s(128,178,154,159)) == 15) + assert(t:lookup64(s(128,178,154,160)) == 27) + assert(t:lookup64(s(128,178,154,161)) == 27) + assert(t:lookup64(s(128,178,154,191)) == 27) + assert(t:lookup64(s(128,179,154,160)) == 15) + assert(t:lookup64(s(128,178,154,159)) == 15) + assert(t:lookup128(s(128,178,154,160)) == 27) + assert(t:lookup128(s(128,178,154,161)) == 27) + assert(t:lookup128(s(128,178,154,191)) == 27) + assert(t:lookup128(s(128,179,154,160)) == 15) + assert(t:lookup128(s(128,178,154,159)) == 15) + -- /31 + assert(t:rib_lookup(s(128,178,154,196), 32) == 31) + assert(t:rib_lookup(s(128,178,154,197), 32) == 31) + assert(t:rib_lookup(s(128,178,154,180), 32) == 27) + assert(t:rib_lookup(s(128,178,154,198), 32) == 15) + assert(t:lookup(s(128,178,154,196)) == 31) + assert(t:lookup(s(128,178,154,197)) == 31) + assert(t:lookup(s(128,178,154,180)) == 27) + assert(t:lookup(s(128,178,154,198)) == 15) + assert(t:lookup32(s(128,178,154,196)) == 31) + assert(t:lookup32(s(128,178,154,197)) == 31) + assert(t:lookup32(s(128,178,154,180)) == 27) + assert(t:lookup32(s(128,178,154,198)) == 15) + assert(t:lookup64(s(128,178,154,196)) == 31) + assert(t:lookup64(s(128,178,154,197)) == 31) + assert(t:lookup64(s(128,178,154,180)) == 27) + assert(t:lookup64(s(128,178,154,198)) == 15) + assert(t:lookup128(s(128,178,154,196)) == 31) + assert(t:lookup128(s(128,178,154,197)) == 31) + assert(t:lookup128(s(128,178,154,180)) == 27) + assert(t:lookup128(s(128,178,154,198)) == 15) + + local t = new{direct_pointing=direct_pointing, leaf_t=ffi.typeof(leaf_t)} + t:add(s(128,0,0,0,1,224,0,0), 43, 43) + t:add(s(128,0,0,0,1,224,178,196), 63, 63) + t:build() + -- /43 + assert(t:rib_lookup(s(128,0,0,0,1,224,0,0), 64) == 43) + assert(t:rib_lookup(s(128,0,0,0,1,224,0,1), 64) == 43) + assert(t:rib_lookup(s(128,0,0,0,1,225,0,0), 64) == 43) + assert(t:rib_lookup(s(128,0,0,0,1,254,100,12), 64) == 43) + assert(t:rib_lookup(s(128,0,0,0,1,200,100,12), 64) == nil) + assert(t:rib_lookup(s(128,0,0,0,2,224,0,0), 64) == nil) + assert(t:lookup(s(128,0,0,0,1,224,0,0)) == 43) + assert(t:lookup(s(128,0,0,0,1,224,0,1)) == 43) + assert(t:lookup(s(128,0,0,0,1,225,0,0)) == 43) + assert(t:lookup(s(128,0,0,0,1,254,100,12)) == 43) + assert(t:lookup(s(128,0,0,0,1,200,100,12)) == 0) + assert(t:lookup(s(128,0,0,0,2,224,0,0)) == 0) + assert(t:lookup64(s(128,0,0,0,1,224,0,0)) == 43) + assert(t:lookup64(s(128,0,0,0,1,224,0,1)) == 43) + assert(t:lookup64(s(128,0,0,0,1,225,0,0)) == 43) + assert(t:lookup64(s(128,0,0,0,1,254,100,12)) == 43) + assert(t:lookup64(s(128,0,0,0,1,200,100,12)) == 0) + assert(t:lookup64(s(128,0,0,0,2,224,0,0)) == 0) + assert(t:lookup128(s(128,0,0,0,1,224,0,0)) == 43) + assert(t:lookup128(s(128,0,0,0,1,224,0,1)) == 43) + assert(t:lookup128(s(128,0,0,0,1,225,0,0)) == 43) + assert(t:lookup128(s(128,0,0,0,1,254,100,12)) == 43) + assert(t:lookup128(s(128,0,0,0,1,200,100,12)) == 0) + assert(t:lookup128(s(128,0,0,0,2,224,0,0)) == 0) + -- /63 + assert(t:rib_lookup(s(128,0,0,0,1,224,178,196), 64) == 63) + assert(t:rib_lookup(s(128,0,0,0,1,224,178,197), 64) == 63) + assert(t:rib_lookup(s(128,0,0,0,1,225,178,197), 64) == 43) + assert(t:rib_lookup(s(128,0,0,0,1,224,179,196), 64) == 43) + assert(t:rib_lookup(s(128,0,0,0,2,225,178,197), 64) == nil) + assert(t:lookup(s(128,0,0,0,1,224,178,196)) == 63) + assert(t:lookup(s(128,0,0,0,1,224,178,197)) == 63) + assert(t:lookup(s(128,0,0,0,1,225,178,197)) == 43) + assert(t:lookup(s(128,0,0,0,1,224,179,196)) == 43) + assert(t:lookup(s(128,0,0,0,2,225,178,197)) == 0) + assert(t:lookup64(s(128,0,0,0,1,224,178,196)) == 63) + assert(t:lookup64(s(128,0,0,0,1,224,178,197)) == 63) + assert(t:lookup64(s(128,0,0,0,1,225,178,197)) == 43) + assert(t:lookup64(s(128,0,0,0,1,224,179,196)) == 43) + assert(t:lookup64(s(128,0,0,0,2,225,178,197)) == 0) + assert(t:lookup128(s(128,0,0,0,1,224,178,196)) == 63) + assert(t:lookup128(s(128,0,0,0,1,224,178,197)) == 63) + assert(t:lookup128(s(128,0,0,0,1,225,178,197)) == 43) + assert(t:lookup128(s(128,0,0,0,1,224,179,196)) == 43) + assert(t:lookup128(s(128,0,0,0,2,225,178,197)) == 0) + + local t = new{direct_pointing=direct_pointing, leaf_t=ffi.typeof(leaf_t)} + -- Tets building empty RIB + t:build() + -- Test RIB + t:add(s(0x00), 8, 1) -- 00000000 + t:add(s(0xF0), 8, 2) -- 11110000 + t:add(s(0xE0), 4, 3) -- 11100000 + t:add(s(0xFF), 8, 4) -- 11111111 + t:add(s(0xFF), 5, 5) -- 11111000 + t:add(s(0xF0,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x0F), 72, 6) + local v, n = t:rib_lookup(s(0x0), 1) + assert(not v and n.left and not n.right) + local v, n = t:rib_lookup(s(0x00), 8) + assert(v == 1 and not n) + local v, n = t:rib_lookup(s(0xE0), 3) + assert(not v and (n.left and n.right)) + local v, n = t:rib_lookup(s(0x0), 1, n) + assert(v == 3 and not n) + local v, n = t:rib_lookup(s(0xFF), 5) + assert(v == 5 and (not n.left) and n.right) + local v, n = t:rib_lookup(s(0xF0), 3, n) + assert(v == 4 and not n) + local v, n = t:rib_lookup(s(0xF8), 8) + assert(v == 5 and not n) + local v, n = t:rib_lookup(s(0xF0,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x0F), 72) + assert(v == 6 and not n) + -- Test FIB + t:build() + if debug then t:fib_info() end + assert(t:lookup(s(0x00)) == 1) -- 00000000 + assert(t:lookup(s(0xC0)) == 0) -- 11000000 + assert(t:lookup(s(0xE0)) == 3) -- 11100000 + assert(t:lookup(s(0xF0)) == 2) -- 11110000 + assert(t:lookup(s(0xF8)) == 5) -- 11111000 + assert(t:lookup(s(0xFC)) == 5) -- 11111100 + assert(t:lookup(s(0xFF)) == 4) -- 11111111 + assert(t:lookup(s(0xF0,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x0F)) == 6) + assert(t:lookup32(s(0x00)) == 1) + assert(t:lookup32(s(0xC0)) == 0) + assert(t:lookup32(s(0xE0)) == 3) + assert(t:lookup32(s(0xF0)) == 2) + assert(t:lookup32(s(0xF8)) == 5) + assert(t:lookup32(s(0xFC)) == 5) + assert(t:lookup32(s(0xFF)) == 4) + assert(t:lookup64(s(0x00)) == 1) + assert(t:lookup64(s(0xC0)) == 0) + assert(t:lookup64(s(0xE0)) == 3) + assert(t:lookup64(s(0xF0)) == 2) + assert(t:lookup64(s(0xF8)) == 5) + assert(t:lookup64(s(0xFC)) == 5) + assert(t:lookup64(s(0xFF)) == 4) + assert(t:lookup128(s(0x00)) == 1) + assert(t:lookup128(s(0xC0)) == 0) + assert(t:lookup128(s(0xE0)) == 3) + assert(t:lookup128(s(0xF0)) == 2) + assert(t:lookup128(s(0xF8)) == 5) + assert(t:lookup128(s(0xFC)) == 5) + assert(t:lookup128(s(0xFF)) == 4) + assert(t:lookup128(s(0xF0,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x0F)) == 6) + end + end -- Random testing local function reproduce (cases, config) diff --git a/src/lib/poptrie_lookup.dasl b/src/lib/poptrie_lookup.dasl index 6cc1d529ec..dab200440f 100644 --- a/src/lib/poptrie_lookup.dasl +++ b/src/lib/poptrie_lookup.dasl @@ -73,10 +73,10 @@ local BMI2 = (assert(lib.readfile("/proc/cpuinfo", "*a"), function lookup (Dst, Poptrie, keysize) if Poptrie.direct_pointing then -- v = extract(key, 0, Poptrie.s) - local direct_mask = bit.lshift(1ULL, Poptrie.s) - 1 - -- v = band(key, direct_mask) + local direct_shift = 32 - Poptrie.s | mov v_dw, dword [key] - | and v, direct_mask + | bswap v_dw + | shr v_dw, direct_shift -- index = dmap[v] | mov index, dword [dmap+v*4] -- eax = band(index, leaf_tag - 1) (tag inverted) @@ -101,28 +101,38 @@ function lookup (Dst, Poptrie, keysize) |2: -- v = extract(key, offset, k=6) if keysize == 32 then + | mov v_dw, dword [key] + | bswap v_dw if BMI2 then - | shrx v_dw, dword [key], offset + | shlx v_dw, v_dw, offset else | mov ecx, offset - | mov v_dw, dword [key] - | shr v, cl + | shl v_dw, cl end + local shift = 32 - 6 + | shr v_dw, shift elseif keysize == 64 then + | mov v, [key] + | bswap v if BMI2 then - | shrx v, [key], offsetx + | shlx v, v, offsetx else | mov ecx, offset - | mov v, [key] - | shr v, cl + | shl v, cl end + local shift = 64 - 6 + | shr v, shift elseif keysize == 128 then | mov ecx, offset | mov v, [key] + | bswap v | mov vec, [key+8] + | bswap vec | test cl, 64 | cmovnz v, vec - | shrd v, vec, cl + | shld v, vec, cl + local shift = 64 - 6 + | shr v, shift else error("NYI") end -- v = band(v, lshift(1, k=6) - 1) | and v_dw, 0x3F From 67b3d9c66ede5a726033f4619734ac1f39d3596d Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Mon, 27 Feb 2023 12:41:43 +0100 Subject: [PATCH 46/62] lib.poptrie: optimize x86 lookup Do not maintain offset counter and repeatedly load key and then shift it by offset. Instead load key at beginning and shift it further for each iteration. This way we only have to do load/bswap once. --- src/lib/poptrie_lookup.dasl | 79 +++++++++++++++---------------------- 1 file changed, 31 insertions(+), 48 deletions(-) diff --git a/src/lib/poptrie_lookup.dasl b/src/lib/poptrie_lookup.dasl index dab200440f..1aea871025 100644 --- a/src/lib/poptrie_lookup.dasl +++ b/src/lib/poptrie_lookup.dasl @@ -60,23 +60,39 @@ local BMI2 = (assert(lib.readfile("/proc/cpuinfo", "*a"), |.define leaves, rdi -- pointer to leaves array |.define nodes, rsi -- pointer to nodes array |.define key, rdx -- key to look up +|.define key_dw, edx -- (key as dword) |.define dmap, rcx -- pointer to directmap |.define index, r8d -- index into node array |.define node, r8 -- pointer into node array -|.define offset, r9d -- offset into key -|.define offsetx, r9 -- (offset as qword) +|.define key_x, r9 -- key extension (for 128 bit keys) |.define v, r10 -- k or s bits extracted from key |.define v_dw, r10d -- (v as dword) |.define vec, r11 -- 64-bit vector or leafvec -- lookup(leaf_t *leaves, node_t *nodes, key) -> leaf_t function lookup (Dst, Poptrie, keysize) + if keysize == 32 then + | mov key_dw, dword [key] + | bswap key + elseif keysize == 64 then + | mov key, [key] + | bswap key + elseif keysize == 128 then + | mov key_x, [key+8] + | bswap key_x + | mov key, [key] + | bswap key + else error("NYI") end if Poptrie.direct_pointing then -- v = extract(key, 0, Poptrie.s) - local direct_shift = 32 - Poptrie.s - | mov v_dw, dword [key] - | bswap v_dw - | shr v_dw, direct_shift + | mov v, key + | shr v, (64 - Poptrie.s) + if keysize <= 64 then + | shl key, Poptrie.s + else + | shld key, key_x, Poptrie.s + | shl key_x, Poptrie.s + end -- index = dmap[v] | mov index, dword [dmap+v*4] -- eax = band(index, leaf_tag - 1) (tag inverted) @@ -89,53 +105,22 @@ function lookup (Dst, Poptrie, keysize) |1: | imul index, 24 -- multiply by node size | lea node, [nodes+index] - -- offset = s - | mov offset, Poptrie.s else - -- index, node, offset = 0, nodes[index], 0 + -- index, node = 0, nodes[index] | xor index, index | lea node, [nodes+0] -- nodes[0] - | xor offset, offset end -- while band(vec, lshift(1ULL, v)) ~= 0 |2: -- v = extract(key, offset, k=6) - if keysize == 32 then - | mov v_dw, dword [key] - | bswap v_dw - if BMI2 then - | shlx v_dw, v_dw, offset - else - | mov ecx, offset - | shl v_dw, cl - end - local shift = 32 - 6 - | shr v_dw, shift - elseif keysize == 64 then - | mov v, [key] - | bswap v - if BMI2 then - | shlx v, v, offsetx - else - | mov ecx, offset - | shl v, cl - end - local shift = 64 - 6 - | shr v, shift - elseif keysize == 128 then - | mov ecx, offset - | mov v, [key] - | bswap v - | mov vec, [key+8] - | bswap vec - | test cl, 64 - | cmovnz v, vec - | shld v, vec, cl - local shift = 64 - 6 - | shr v, shift - else error("NYI") end - -- v = band(v, lshift(1, k=6) - 1) - | and v_dw, 0x3F + | mov v, key + | shr v, (64 - Poptrie.k) + if keysize <= 64 then + | shl key, Poptrie.k + else + | shld key, key_x, Poptrie.k + | shl key_x, Poptrie.k + end -- vec = nodes[index].vector | mov vec, qword [node+8] -- is bit v set in vec? @@ -161,8 +146,6 @@ function lookup (Dst, Poptrie, keysize) -- node = nodes[index] | imul index, 24 -- multiply by node size | lea node, [nodes+index] - -- offset = offset + k - | add offset, 6 | jmp <2 -- loop -- end while |4: From 8e7be8efac627f511d36cacbe6e1e072e86299dd Mon Sep 17 00:00:00 2001 From: Alexander Gall Date: Tue, 28 Feb 2023 11:36:07 +0100 Subject: [PATCH 47/62] apps.rss.metadata: remove 59 (no-next-header) from list of IPv6 extension headers The original code had the effect of setting the ULP to 255 in this case, which is wrong. We simply need to treat 59 like any other ULP. --- src/apps/rss/metadata.lua | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/apps/rss/metadata.lua b/src/apps/rss/metadata.lua index 96ab01fc49..22ecec54cd 100644 --- a/src/apps/rss/metadata.lua +++ b/src/apps/rss/metadata.lua @@ -117,11 +117,6 @@ local ipv6_ext_hdr_fns = { local payload_len = ext_hdr.length return payload_len * 4 - 2, next_header end, - [59] = - -- No next header - function(ptr) - return 0, 255 - end, [60] = -- Destination ipv6_generic_ext_hdr, From a382b1d5dbd01fcf2209269515317f420824fb4c Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Wed, 15 Mar 2023 12:07:45 +0100 Subject: [PATCH 48/62] core.timeline: fix selftest --- src/core/timeline.dasl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/core/timeline.dasl b/src/core/timeline.dasl index c32473a861..58e0533125 100644 --- a/src/core/timeline.dasl +++ b/src/core/timeline.dasl @@ -278,6 +278,9 @@ event with rate 3 (0 args) -- messages. function selftest () print("selftest: timeline") + + enabled = true -- enable timeline + local tl = new("selftest/timeline") local e = load_events_from_string(tl, test_events, "selftest", {module="timeline", func="selftest"}) @@ -308,7 +311,7 @@ function selftest () local sample = {} for i = 1, 1000 do sample[i] = tl.entries[i].tsc - tl.entries[i-1].tsc end table.sort(sample) - print("median time delta for sample:", tonumber(sample[1]).." cycles") + print("median time delta for sample:", tonumber(sample[500]).." cycles") print("selftest: ok") end From 00287bbbed4603f061eee3d99a34855582f2242d Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Wed, 15 Mar 2023 13:41:13 +0100 Subject: [PATCH 49/62] engine: allow timely timeline initialization This fixes a bug where the the timeline was initialized before one had the opportunity to set timeline.enabled --- src/core/app.lua | 5 +++++ src/core/packet.lua | 4 +++- src/lib/ptree/worker.lua | 6 ++++-- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/core/app.lua b/src/core/app.lua index 19dbe39a30..b5e4b15d2d 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -46,6 +46,7 @@ function timeline () timeline_log = timeline_mod.new("events.timeline") timeline_mod.rate(timeline_log, 9) -- initially log events with rate >= 9 events = timeline_mod.load_events(timeline_log, "core.engine") + packet.initialize_events() end return timeline_log end @@ -267,6 +268,8 @@ end -- Return the configuration actions needed to migrate from old config to new. function compute_config_actions (old, new) + timeline() -- Ensure timeline is created and initialized + local actions = {} -- First determine the links that are going away and remove them. @@ -333,6 +336,8 @@ end -- Update the active app network by applying the necessary actions. function apply_config_actions (actions) + timeline() -- Ensure timeline is created and initialized + -- Table of functions that execute config actions local ops = {} -- As an efficiency hack, some apps rely on the fact that we add diff --git a/src/core/packet.lua b/src/core/packet.lua index 523200c311..81e76bc0c3 100644 --- a/src/core/packet.lua +++ b/src/core/packet.lua @@ -116,7 +116,9 @@ function initialize (max_packets) shm.unlink("engine/packets.freelist") end packets_fl = freelist_create("engine/packets.freelist", max_packets) - +end + +function initialize_events () if not events then events = timeline.load_events(engine.timeline(), "core.packet") end diff --git a/src/lib/ptree/worker.lua b/src/lib/ptree/worker.lua index 4e280121bd..db88765302 100644 --- a/src/lib/ptree/worker.lua +++ b/src/lib/ptree/worker.lua @@ -15,7 +15,7 @@ local channel = require("lib.ptree.channel") local action_codec = require("lib.ptree.action_codec") local ptree_alarms = require("lib.ptree.alarms") local timeline = require("core.timeline") -local events = timeline.load_events(engine.timeline(), "core.engine") +local events local Worker = {} @@ -105,7 +105,9 @@ function Worker:main () if not engine.auditlog_enabled then engine.enable_auditlog() end - engine.timeline() + if not events then + events = timeline.load_events(engine.timeline(), "core.engine") + end engine.enable_tick() From a6fe6dc8b5862cc43a9d6ad34d8a570303dab333 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Wed, 15 Mar 2023 13:41:42 +0100 Subject: [PATCH 50/62] snabb ipfix probe: add --timeline option to CLI --- src/program/ipfix/probe/probe.lua | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/program/ipfix/probe/probe.lua b/src/program/ipfix/probe/probe.lua index b09e43e118..6c7bd7f628 100644 --- a/src/program/ipfix/probe/probe.lua +++ b/src/program/ipfix/probe/probe.lua @@ -23,9 +23,10 @@ local long_opts = { busywait ="b", ["real-time"] = "r", ["no-profile"] = "p", + ["timeline"] = "t", ["test-pcap"] = "T" } -local opt = "hn:brpT:" +local opt = "hn:brptT:" local opt_handler = {} local name local busywait, real_time, profile = false, false, true @@ -34,6 +35,8 @@ function opt_handler.n (arg) name = arg end function opt_handler.b () busywait = true end function opt_handler.r () real_time = true end function opt_handler.p () profile = false end +local timeline = false +function opt_handler.t () timeline = true end local pcap_input function opt_handler.T (arg) pcap_input = arg end @@ -86,6 +89,7 @@ function start (name, confpath) busywait = busywait, real_time = real_time, profile = profile, + timeline = timeline, jit_opt = { sizemcode=256, maxmcode=8192, From 66f0eb3ad9238921ec382e04183af99ee50c5ab1 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Wed, 15 Mar 2023 15:25:06 +0100 Subject: [PATCH 51/62] lib.scheduling: add group_freelist_size setting --- src/lib/scheduling.lua | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/lib/scheduling.lua b/src/lib/scheduling.lua index 8c6453d25e..27d18997c5 100644 --- a/src/lib/scheduling.lua +++ b/src/lib/scheduling.lua @@ -23,6 +23,7 @@ local scheduling_opts = { cpu = {}, -- CPU index (integer). real_time = {}, -- Boolean. max_packets = {}, -- Positive integer. + group_freelist_size = {}, ingress_drop_monitor = {}, -- Action string: one of 'flush' or 'warn'. profile = {default=true}, -- Boolean. busywait = {default=true}, -- Boolean. @@ -61,6 +62,10 @@ function sched_apply.max_packets (max_packets) packet.initialize(max_packets) end +function sched_apply.group_freelist_size (nchunks) + packet.enable_group_freelist(nchunks) +end + function sched_apply.busywait (busywait) engine.busywait = busywait end From f906776e1d00a3fde2febcec6c5cc382ebbccaf3 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Wed, 15 Mar 2023 16:17:25 +0100 Subject: [PATCH 52/62] lib.interlink: fix shm printing bug --- src/lib/interlink.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/interlink.lua b/src/lib/interlink.lua index 1dc26fb0db..62ace97390 100644 --- a/src/lib/interlink.lua +++ b/src/lib/interlink.lua @@ -298,7 +298,7 @@ local function describe (r) [DOWN] = "deallocating" })[r.state[0]] end - return ("%d/%d (%s)"):format(queue_fill(r), size - 1, status(r)) + return ("%d/%d (%s)"):format(queue_fill(r), r.size, status(r)) end ffi.metatype("struct interlink", {__tostring=describe}) From 50cad00ffa194148f0b883ac8a6148df8a9bc35b Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Wed, 15 Mar 2023 16:18:33 +0100 Subject: [PATCH 53/62] ipfix probe: make group freelist and interlink sizes configurable via schema --- src/lib/yang/snabb-snabbflow-v1.yang | 21 +++++++++++++++++++++ src/program/ipfix/lib.lua | 18 ++++++++++-------- src/program/ipfix/probe/probe.lua | 24 ++++++++++++++++++++++-- 3 files changed, 53 insertions(+), 10 deletions(-) diff --git a/src/lib/yang/snabb-snabbflow-v1.yang b/src/lib/yang/snabb-snabbflow-v1.yang index 2465475409..87e75d0949 100644 --- a/src/lib/yang/snabb-snabbflow-v1.yang +++ b/src/lib/yang/snabb-snabbflow-v1.yang @@ -11,6 +11,11 @@ module snabb-snabbflow-v1 { description "Configuration for the Snabbflow IPFIX exporter."; + revision 2023-03-15 { + description + "Added interlink and group freelist configuration options."; + } + revision 2022-04-27 { description "Initial draft."; @@ -161,6 +166,22 @@ module snabb-snabbflow-v1 { } } } + + leaf group-freelist-size { + type uint32 { range 1024|2048|4096|8192; } + default 2048; + description + "Number of chunks allocated for the group freelist. + Each chunk holds up to 2048 packets. Must be a power of two."; + } + + leaf interlink-size { + type uint32 { range 1024|2048|4096|8192|16384|32768|65536|131072|262144; } + default 65536; + description + "Capacity of inter-process packet queues in number of packets. + Must be a power of two."; + } } } diff --git a/src/program/ipfix/lib.lua b/src/program/ipfix/lib.lua index 4a19eefef3..95a2e4831e 100644 --- a/src/program/ipfix/lib.lua +++ b/src/program/ipfix/lib.lua @@ -65,26 +65,28 @@ end local function configure_interlink_input (config, in_graph) config = lib.parse(config, { - name={required=true} + name={required=true}, + size={required=true} }) local graph = in_graph or app_graph.new() local in_name = config.name - app_graph.app(graph, in_name, Receiver) + app_graph.app(graph, in_name, Receiver, { size = config.size }) return graph, {name=in_name, output='output'} end local function configure_interlink_output (config, in_graph) config = lib.parse(config, { - name={required=true} + name={required=true}, + size={required=true} }) local graph = in_graph or app_graph.new() local out_name = config.name - app_graph.app(graph, out_name, Transmitter) + app_graph.app(graph, out_name, Transmitter, { size = config.size }) return graph, {name=out_name, input='input'} end @@ -175,9 +177,9 @@ local function configure_ipfix_tap_instance (config, in_graph) return graph, ipfix end -function configure_interlink_ipfix_tap_instance (in_name, config) +function configure_interlink_ipfix_tap_instance (in_name, link_size, config) local graph = app_graph.new() - local _, receiver = configure_interlink_input({name=in_name}, graph) + local _, receiver = configure_interlink_input({name=in_name, size=link_size}, graph) local _, ipfix = configure_ipfix_tap_instance(config, graph) link(graph, receiver, ipfix) @@ -230,7 +232,7 @@ local function configure_rss_tap_instances (config, outputs, rss_group, in_graph -- Keys -- link_name name of the link local _, transmitter = configure_interlink_output( - {name=output.link_name}, graph + {name=output.link_name, size=output.link_size}, graph ) link(graph, rss, transmitter) else @@ -303,4 +305,4 @@ function configure_mlx_controller (devices) need_ctrl = true end return ctrl_graph, need_ctrl -end \ No newline at end of file +end diff --git a/src/program/ipfix/probe/probe.lua b/src/program/ipfix/probe/probe.lua index 6c7bd7f628..d0ec0c6239 100644 --- a/src/program/ipfix/probe/probe.lua +++ b/src/program/ipfix/probe/probe.lua @@ -70,6 +70,17 @@ local function update_cpuset (cpu_pool) end end +local probe_group_freelist_size + +local function update_group_freelist_size (nchunks) + if not probe_group_freelist_size then + probe_group_freelist_size = nchunks + elseif probe_group_freelist_size ~= nchunks then + error("Can not change group-freelist-size after probe has started.") + end + return probe_group_freelist_size +end + local function warn (msg, ...) io.stderr:write("Warning: "..msg:format(...).."\n") io.stderr:flush() @@ -90,6 +101,9 @@ function start (name, confpath) real_time = real_time, profile = profile, timeline = timeline, + group_freelist_size = update_group_freelist_size( + conf.snabbflow_config.rss.software_scaling.group_freelist_size + ), jit_opt = { sizemcode=256, maxmcode=8192, @@ -126,6 +140,8 @@ function setup_workers (config) local flow_director = config.snabbflow_config.flow_director local ipfix = config.snabbflow_config.ipfix + update_group_freelist_size(rss.software_scaling.group_freelist_size) + local collector_pools = {} for name, p in pairs(ipfix.collector_pool) do local collectors = {} @@ -302,10 +318,14 @@ function setup_workers (config) args = iconfig } else - output = { type = "interlink", link_name = rss_link } + output = { + type = "interlink", + link_name = rss_link, + link_size = rss.software_scaling.interlink_size + } workers[rss_link] = probe.configure_interlink_ipfix_tap_instance( - rss_link, iconfig + rss_link, output.link_size, iconfig ) -- Dedicated exporter processes are restartable worker_opts[rss_link] = { From 81a9e19293c96d7dbc1341915624dd2dec6b3cf1 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Wed, 15 Mar 2023 20:28:58 +0100 Subject: [PATCH 54/62] timeline: enable via environment variable SNABB_TIMELINE_ENABLE --- src/core/timeline.dasl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/timeline.dasl b/src/core/timeline.dasl index 58e0533125..b384131a93 100644 --- a/src/core/timeline.dasl +++ b/src/core/timeline.dasl @@ -11,7 +11,7 @@ local shm = require("core.shm") local lib = require("core.lib") -- Set to true to enable timeline logging -enabled = false +enabled = (lib.getenv("SNABB_TIMELINE_ENABLE") and true) or false -- Load a set of events for logging onto a timeline. -- Returns a set of logging functions. From f805e81f4872b06fbad428349929786d76ff4207 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Wed, 15 Mar 2023 18:03:46 +0100 Subject: [PATCH 55/62] Revert "snabb ipfix probe: add --timeline option to CLI" This reverts commit a6fe6dc8b5862cc43a9d6ad34d8a570303dab333. --- src/program/ipfix/probe/probe.lua | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/program/ipfix/probe/probe.lua b/src/program/ipfix/probe/probe.lua index d0ec0c6239..e29911a8e2 100644 --- a/src/program/ipfix/probe/probe.lua +++ b/src/program/ipfix/probe/probe.lua @@ -23,10 +23,9 @@ local long_opts = { busywait ="b", ["real-time"] = "r", ["no-profile"] = "p", - ["timeline"] = "t", ["test-pcap"] = "T" } -local opt = "hn:brptT:" +local opt = "hn:brpT:" local opt_handler = {} local name local busywait, real_time, profile = false, false, true @@ -35,8 +34,6 @@ function opt_handler.n (arg) name = arg end function opt_handler.b () busywait = true end function opt_handler.r () real_time = true end function opt_handler.p () profile = false end -local timeline = false -function opt_handler.t () timeline = true end local pcap_input function opt_handler.T (arg) pcap_input = arg end @@ -100,7 +97,6 @@ function start (name, confpath) busywait = busywait, real_time = real_time, profile = profile, - timeline = timeline, group_freelist_size = update_group_freelist_size( conf.snabbflow_config.rss.software_scaling.group_freelist_size ), From 0916c248f0823976fabe848664341811050bbc35 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Wed, 15 Mar 2023 18:04:39 +0100 Subject: [PATCH 56/62] Revert "engine: allow timely timeline initialization" This reverts commit 00287bbbed4603f061eee3d99a34855582f2242d. --- src/core/app.lua | 5 ----- src/core/packet.lua | 4 +--- src/lib/ptree/worker.lua | 6 ++---- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/src/core/app.lua b/src/core/app.lua index b5e4b15d2d..19dbe39a30 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -46,7 +46,6 @@ function timeline () timeline_log = timeline_mod.new("events.timeline") timeline_mod.rate(timeline_log, 9) -- initially log events with rate >= 9 events = timeline_mod.load_events(timeline_log, "core.engine") - packet.initialize_events() end return timeline_log end @@ -268,8 +267,6 @@ end -- Return the configuration actions needed to migrate from old config to new. function compute_config_actions (old, new) - timeline() -- Ensure timeline is created and initialized - local actions = {} -- First determine the links that are going away and remove them. @@ -336,8 +333,6 @@ end -- Update the active app network by applying the necessary actions. function apply_config_actions (actions) - timeline() -- Ensure timeline is created and initialized - -- Table of functions that execute config actions local ops = {} -- As an efficiency hack, some apps rely on the fact that we add diff --git a/src/core/packet.lua b/src/core/packet.lua index 81e76bc0c3..523200c311 100644 --- a/src/core/packet.lua +++ b/src/core/packet.lua @@ -116,9 +116,7 @@ function initialize (max_packets) shm.unlink("engine/packets.freelist") end packets_fl = freelist_create("engine/packets.freelist", max_packets) -end - -function initialize_events () + if not events then events = timeline.load_events(engine.timeline(), "core.packet") end diff --git a/src/lib/ptree/worker.lua b/src/lib/ptree/worker.lua index db88765302..4e280121bd 100644 --- a/src/lib/ptree/worker.lua +++ b/src/lib/ptree/worker.lua @@ -15,7 +15,7 @@ local channel = require("lib.ptree.channel") local action_codec = require("lib.ptree.action_codec") local ptree_alarms = require("lib.ptree.alarms") local timeline = require("core.timeline") -local events +local events = timeline.load_events(engine.timeline(), "core.engine") local Worker = {} @@ -105,9 +105,7 @@ function Worker:main () if not engine.auditlog_enabled then engine.enable_auditlog() end - if not events then - events = timeline.load_events(engine.timeline(), "core.engine") - end + engine.timeline() engine.enable_tick() From 559c80dcd8336b358faeb72c31dbc5d70c113064 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Tue, 28 Mar 2023 15:18:24 +0200 Subject: [PATCH 57/62] core.sync: load64, cas64 --- src/core/sync.dasl | 72 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 60 insertions(+), 12 deletions(-) diff --git a/src/core/sync.dasl b/src/core/sync.dasl index 9e3a08c52e..8f839c926e 100644 --- a/src/core/sync.dasl +++ b/src/core/sync.dasl @@ -9,16 +9,25 @@ local ffi = require("ffi") | .actionlist actions | .globalnames globalnames --- This module happens to use 32-bit arguments only. -|.define arg1, edi -|.define arg2, esi -|.define arg3, edx +|.define darg1, edi +|.define darg2, esi +|.define darg3, edx +|.define qarg1, rdi +|.define qarg2, rsi +|.define qarg3, rdx -- load(src) -> uint32_t -- Load integer from src. local load_t = "uint32_t (*) (uint32_t *)" local function load (Dst) - | mov eax, [arg1] + | mov eax, [qarg1] + | ret +end + +-- load64(src) -> uint64_t +local load64_t = "uint64_t (*) (uint64_t *)" +local function load64 (Dst) + | mov rax, [qarg1] | ret end @@ -27,10 +36,19 @@ end -- equal, stores new at dst and returns true. Else, returns false. local cas_t = "bool (*) (int *, int, int)" local function cas (Dst) - | mov eax, arg2 - | lock; cmpxchg [arg1], arg3 -- compare-and-swap; sets ZF flag on success - | mov eax, 0 -- clear eax for return value - | setz al -- set eax to 1 (true) if ZF is set + | mov eax, darg2 + | lock; cmpxchg [qarg1], darg3 -- compare-and-swap; sets ZF flag on success + | mov eax, 0 -- clear eax for return value + | setz al -- set eax to 1 (true) if ZF is set + | ret +end + +local cas64_t = "bool (*) (uint64_t *, uint64_t, uint64_t)" +local function cas64 (Dst) + | mov rax, qarg2 + | lock; cmpxchg [qarg1], qarg3 -- compare-and-swap; sets ZF flag on success + | mov eax, 0 -- clear eax for return value + | setz al -- set eax to 1 (true) if ZF is set | ret end @@ -42,20 +60,20 @@ local lock_t = "void (*) (int *)" local function lock (Dst) -- attempt to acquire | mov eax, 1 - | xchg eax, [arg1] + | xchg eax, [qarg1] | test eax, eax -- was it 0 (unlocked)? | jnz >1 -- no, go spin | ret -- spin |1: | pause - | cmp dword [arg1], 1 -- does it look locked? + | cmp dword [qarg1], 1 -- does it look locked? | je <1 -- spin if it does | jmp ->lock -- otherwise try to acquire end local unlock_t = "void (*) (int *)" local function unlock (Dst) - | mov dword [arg1], 0 + | mov dword [qarg1], 0 | ret end @@ -65,9 +83,15 @@ local function generate (Dst) |->load: || load(Dst) | .align 16 + |->load64: + || load64(Dst) + | .align 16 |->cas: || cas(Dst) | .align 16 + |->cas64: + || cas64(Dst) + | .align 16 |->lock: || lock(Dst) | .align 16 @@ -87,7 +111,9 @@ local entry = dasm.globals(globals, globalnames) local sync = { load = ffi.cast(load_t, entry.load), + load64 = ffi.cast(load64_t, entry.load64), cas = ffi.cast(cas_t, entry.cas), + cas64 = ffi.cast(cas64_t, entry.cas64), lock = ffi.cast(lock_t, entry.lock), unlock = ffi.cast(unlock_t, entry.unlock) } @@ -104,6 +130,18 @@ sync.selftest = function () state = sync.load(box.state) assert(state == i) end + local box = ffi.new( + "struct { uint64_t pad1, state[1], pad2; } __attribute__((packed))" + ) + local state = sync.load64(box.state) + assert(state == 0) + for i=1,100 do + box.state[0] = state + 1 + state = sync.load64(box.state) + assert(state == i) + end + box.state[0] = 0x5555555555555555 + assert(sync.load64(box.state) == 0x5555555555555555) -- cas local box = ffi.new( "struct { unsigned int pad1, state[1], pad2; } __attribute__((packed))" @@ -115,6 +153,16 @@ sync.selftest = function () and box.state[0] == 2147483648 and box.pad1 == 0 and box.pad2 == 0) + local box = ffi.new( + "struct { uint64_t pad1, state[1], pad2; } __attribute__((packed))" + ) + assert(sync.cas64(box.state, 0, 1) and box.state[0] == 1) + assert(not sync.cas64(box.state, 0, 2) and box.state[0] == 1) + assert(sync.cas64(box.state, 1, 2) and box.state[0] == 2) + assert(sync.cas64(box.state, 2, 0x5555555555555555) + and box.state[0] == 0x5555555555555555 + and box.pad1 == 0 + and box.pad2 == 0) -- lock / unlock local spinlock = ffi.new("int[1]") sync.lock(spinlock) From f47228ba805c2838acbfbe2b6b144ad2a50023f5 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Tue, 28 Mar 2023 15:53:02 +0200 Subject: [PATCH 58/62] core.group_freelist: use 64 bit sequence --- src/core/group_freelist.lua | 39 +++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/src/core/group_freelist.lua b/src/core/group_freelist.lua index 11327c8fdc..343c9f6a54 100644 --- a/src/core/group_freelist.lua +++ b/src/core/group_freelist.lua @@ -24,21 +24,22 @@ chunksize = 2048 local default_size = 1024 -- must be a power of two local CACHELINE = 64 -- XXX - make dynamic -local INT = ffi.sizeof("uint32_t") +local QWORD = ffi.sizeof("uint64_t") ffi.cdef([[ struct group_freelist_chunk { - uint32_t sequence[1], nfree; + uint64_t sequence[1]; + uint32_t nfree, pad; struct packet *list[]]..chunksize..[[]; } __attribute__((packed))]]) ffi.cdef([[ struct group_freelist { - uint32_t enqueue_pos[1], enqueue_mask; - uint8_t pad_enqueue_pos[]]..CACHELINE-2*INT..[[]; + uint64_t enqueue_pos[1], enqueue_mask; + uint8_t pad_enqueue_pos[]]..CACHELINE-2*QWORD..[[]; - uint32_t dequeue_pos[1], dequeue_mask; - uint8_t pad_dequeue_pos[]]..CACHELINE-2*INT..[[]; + uint64_t dequeue_pos[1], dequeue_mask; + uint8_t pad_dequeue_pos[]]..CACHELINE-2*QWORD..[[]; uint32_t size, state[1]; @@ -85,21 +86,21 @@ end function start_add (fl) local deadline - local pos = sync.load(fl.enqueue_pos) + local pos = sync.load64(fl.enqueue_pos) local mask = fl.enqueue_mask while true do for _=1,100000 do local chunk = fl.chunk[band(pos, mask)] - local seq = sync.load(chunk.sequence) - local dif = seq - pos + local seq = sync.load64(chunk.sequence) + local dif = ffi.cast("int64_t", seq) - ffi.cast("int64_t", pos) if dif == 0 then - if sync.cas(fl.enqueue_pos, pos, pos+1) then + if sync.cas64(fl.enqueue_pos, pos, pos+1) then return chunk, pos+1 end elseif dif < 0 then return else - pos = sync.load(fl.enqueue_pos) + pos = sync.load64(fl.enqueue_pos) end end deadline = deadlock_timeout(deadline, 5, @@ -109,21 +110,21 @@ end function start_remove (fl) local deadline - local pos = sync.load(fl.dequeue_pos) + local pos = sync.load64(fl.dequeue_pos) local mask = fl.dequeue_mask while true do for _=1,100000 do local chunk = fl.chunk[band(pos, mask)] - local seq = sync.load(chunk.sequence) - local dif = seq - (pos+1) + local seq = sync.load64(chunk.sequence) + local dif = ffi.cast("int64_t", seq) - ffi.cast("int64_t", pos+1) if dif == 0 then - if sync.cas(fl.dequeue_pos, pos, pos+1) then + if sync.cas64(fl.dequeue_pos, pos, pos+1) then return chunk, pos+mask+1 end elseif dif < 0 then return else - pos = sync.load(fl.dequeue_pos) + pos = sync.load64(fl.dequeue_pos) end end deadline = deadlock_timeout(deadline, 5, @@ -137,11 +138,7 @@ end local function occupied_chunks (fl) local enqueue, dequeue = fl.enqueue_pos[0], fl.dequeue_pos[0] - if dequeue > enqueue then - return enqueue + fl.size - dequeue - else - return enqueue - dequeue - end + return tonumber(enqueue - dequeue) end -- Register struct group_freelist as an abstract SHM object type so that From b6c130d82a435e4d2a8979e96e4d0a6d9b4aca90 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 6 Apr 2023 14:54:50 +0200 Subject: [PATCH 59/62] core.group_freelist: ensure finish is atomic --- src/core/group_freelist.lua | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/core/group_freelist.lua b/src/core/group_freelist.lua index 343c9f6a54..b638c2fee9 100644 --- a/src/core/group_freelist.lua +++ b/src/core/group_freelist.lua @@ -15,7 +15,6 @@ local band = bit.band -- -- https://www.1024cores.net/home/lock-free-algorithms/queues/bounded-mpmc-queue -- --- NB: assumes 32-bit wide loads/stores are atomic (as is the fact on x86_64)! -- Group freelist holds up to n chunks of chunksize packets each chunksize = 2048 @@ -133,7 +132,7 @@ function start_remove (fl) end function finish (chunk, seq) - chunk.sequence[0] = seq + assert(sync.cas64(chunk.sequence, chunk.sequence[0], seq)) end local function occupied_chunks (fl) From e8eb4fdcb5211a6db153b43daf8942e7ff0c9afd Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 6 Apr 2023 14:43:00 +0200 Subject: [PATCH 60/62] Revert "group_freelist: add deadlock detection" This reverts commit f8c977c81f29fb6ce2afffb3c342e1faa5722c5c. --- src/core/group_freelist.lua | 57 +++++++++++++------------------------ 1 file changed, 20 insertions(+), 37 deletions(-) diff --git a/src/core/group_freelist.lua b/src/core/group_freelist.lua index b638c2fee9..07e021b27d 100644 --- a/src/core/group_freelist.lua +++ b/src/core/group_freelist.lua @@ -76,58 +76,41 @@ function freelist_open (name, readonly) return shm.open(name, "struct group_freelist", readonly, size) end -local function deadlock_timeout(deadline, timeout, message) - local now = ffi.C.get_monotonic_time() - deadline = deadline or (now + timeout) - assert(now < deadline, message) - return deadline -end - function start_add (fl) - local deadline local pos = sync.load64(fl.enqueue_pos) local mask = fl.enqueue_mask while true do - for _=1,100000 do - local chunk = fl.chunk[band(pos, mask)] - local seq = sync.load64(chunk.sequence) - local dif = ffi.cast("int64_t", seq) - ffi.cast("int64_t", pos) - if dif == 0 then - if sync.cas64(fl.enqueue_pos, pos, pos+1) then - return chunk, pos+1 - end - elseif dif < 0 then - return - else - pos = sync.load64(fl.enqueue_pos) + local chunk = fl.chunk[band(pos, mask)] + local seq = sync.load64(chunk.sequence) + local dif = ffi.cast("int64_t", seq) - ffi.cast("int64_t", pos) + if dif == 0 then + if sync.cas64(fl.enqueue_pos, pos, pos+1) then + return chunk, pos+1 end + elseif dif < 0 then + return + else + pos = sync.load64(fl.enqueue_pos) end - deadline = deadlock_timeout(deadline, 5, - "deadlock in group_freelist.start_add") end end function start_remove (fl) - local deadline local pos = sync.load64(fl.dequeue_pos) local mask = fl.dequeue_mask while true do - for _=1,100000 do - local chunk = fl.chunk[band(pos, mask)] - local seq = sync.load64(chunk.sequence) - local dif = ffi.cast("int64_t", seq) - ffi.cast("int64_t", pos+1) - if dif == 0 then - if sync.cas64(fl.dequeue_pos, pos, pos+1) then - return chunk, pos+mask+1 - end - elseif dif < 0 then - return - else - pos = sync.load64(fl.dequeue_pos) + local chunk = fl.chunk[band(pos, mask)] + local seq = sync.load64(chunk.sequence) + local dif = ffi.cast("int64_t", seq) - ffi.cast("int64_t", pos+1) + if dif == 0 then + if sync.cas64(fl.dequeue_pos, pos, pos+1) then + return chunk, pos+mask+1 end + elseif dif < 0 then + return + else + pos = sync.load64(fl.dequeue_pos) end - deadline = deadlock_timeout(deadline, 5, - "deadlock in group_freelist.start_remove") end end From f3905b3504d3759fb7554a0205a6a01fa1756cec Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 6 Apr 2023 14:55:12 +0200 Subject: [PATCH 61/62] core.group_freelist: cleanup --- src/core/group_freelist.lua | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/core/group_freelist.lua b/src/core/group_freelist.lua index 07e021b27d..bec68c7201 100644 --- a/src/core/group_freelist.lua +++ b/src/core/group_freelist.lua @@ -7,7 +7,6 @@ local shm = require("core.shm") local lib = require("core.lib") local ffi = require("ffi") -local waitfor, compiler_barrier = lib.waitfor, lib.compiler_barrier local band = bit.band -- Group freelist: lock-free multi-producer multi-consumer ring buffer @@ -70,7 +69,7 @@ end function freelist_open (name, readonly) local fl = shm.open(name, "struct group_freelist", 'read-only', 1) - waitfor(function () return sync.load(fl.state) == READY end) + lib.waitfor(function () return sync.load(fl.state) == READY end) local size = fl.size shm.unmap(fl) return shm.open(name, "struct group_freelist", readonly, size) From 7472b839bc95eb0b9bd0966912f684d0ef9884e5 Mon Sep 17 00:00:00 2001 From: Max Rottenkolber Date: Thu, 6 Apr 2023 14:59:43 +0200 Subject: [PATCH 62/62] lib.yang.list: fix bug in remove_obsolete_nodes Prior attempt to move up child node to parent was broken. (hash parameters no longer match) Not even sure if what was attempted is possible as we do not know the children's keys at this point? Fix by only moving leaf children. --- src/lib/yang/list.lua | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/lib/yang/list.lua b/src/lib/yang/list.lua index b949ad4846..03aa02274f 100644 --- a/src/lib/yang/list.lua +++ b/src/lib/yang/list.lua @@ -829,16 +829,14 @@ function List:remove_obsolete_nodes (k, r, d, s, h) self:free_node(r) return self:remove_obsolete_nodes(k, node.parent, d, s, h) elseif band(node.occupied, node.occupied-1) == 0 then - -- Node has only one child, move it to parent. + -- Node has only one child, move to parent if it is a leaf. local index = msb_set(node.occupied) - parent.children[parent_index] = node.children[index] if self:node_leaf(node, index) then + parent.children[parent_index] = node.children[index] self:node_leaf(parent, parent_index, true) - else - self:node(node.children[index]).parent = node.parent + self:free_node(r) + return self:remove_obsolete_nodes(k, node.parent, d, s, h) end - self:free_node(r) - return self:remove_obsolete_nodes(k, node.parent, d, s, h) end end