From caa65c352a2a76a3931a4d56b151a0f700b42dc5 Mon Sep 17 00:00:00 2001 From: Greg Mefford <greg@gregmefford.com> Date: Sat, 11 Jan 2025 20:58:09 -0500 Subject: [PATCH] SynchronizeMappings like v8 does, or using JITDump --- interpreter/beam/beam.go | 128 +++++++++++++++++++++++++++++- interpreter/beam/jitdumpreader.go | 113 ++++++++++++++++++++++++++ 2 files changed, 239 insertions(+), 2 deletions(-) create mode 100644 interpreter/beam/jitdumpreader.go diff --git a/interpreter/beam/beam.go b/interpreter/beam/beam.go index 38ff3bc8..63993294 100644 --- a/interpreter/beam/beam.go +++ b/interpreter/beam/beam.go @@ -10,6 +10,7 @@ package beam // import "go.opentelemetry.io/ebpf-profiler/interpreter/beam" import ( "fmt" + "os" "regexp" "strconv" @@ -19,6 +20,8 @@ import ( "go.opentelemetry.io/ebpf-profiler/interpreter" "go.opentelemetry.io/ebpf-profiler/libpf" "go.opentelemetry.io/ebpf-profiler/libpf/pfelf" + "go.opentelemetry.io/ebpf-profiler/lpm" + "go.opentelemetry.io/ebpf-profiler/process" "go.opentelemetry.io/ebpf-profiler/remotememory" "go.opentelemetry.io/ebpf-profiler/reporter" "go.opentelemetry.io/ebpf-profiler/support" @@ -40,6 +43,12 @@ type beamInstance struct { data *beamData rm remotememory.RemoteMemory + // mappings is indexed by the Mapping to its generation + mappings map[process.Mapping]*uint32 + // prefixes is indexed by the prefix added to ebpf maps (to be cleaned up) to its generation + prefixes map[lpm.Prefix]*uint32 + // mappingGeneration is the current generation (so old entries can be pruned) + mappingGeneration uint32 } func readSymbolValue(ef *pfelf.File, name libpf.SymbolName) ([]byte, error) { @@ -116,11 +125,126 @@ func Loader(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpr func (d *beamData) Attach(ebpf interpreter.EbpfHandler, pid libpf.PID, bias libpf.Address, rm remotememory.RemoteMemory) (interpreter.Instance, error) { log.Infof("BEAM interpreter attaching") return &beamInstance{ - data: d, - rm: rm, + data: d, + rm: rm, + mappings: make(map[process.Mapping]*uint32), + prefixes: make(map[lpm.Prefix]*uint32), }, nil } +func (i *beamInstance) SynchronizeMappings(ebpf interpreter.EbpfHandler, + _ reporter.SymbolReporter, pr process.Process, mappings []process.Mapping) error { + pid := pr.PID() + i.mappingGeneration++ + for idx := range mappings { + m := &mappings[idx] + if !m.IsExecutable() || !m.IsAnonymous() { + continue + } + + if _, exists := i.mappings[*m]; exists { + *i.mappings[*m] = i.mappingGeneration + continue + } + + // Generate a new uint32 pointer which is shared for mapping and the prefixes it owns + // so updating the mapping above will reflect to prefixes also. + mappingGeneration := i.mappingGeneration + i.mappings[*m] = &mappingGeneration + + // Just assume all anonymous and executable mappings are BEAM for now + log.Infof("Enabling BEAM for %#x/%#x", m.Vaddr, m.Length) + + prefixes, err := lpm.CalculatePrefixList(m.Vaddr, m.Vaddr+m.Length) + if err != nil { + return fmt.Errorf("new anonymous mapping lpm failure %#x/%#x", m.Vaddr, m.Length) + } + + for _, prefix := range prefixes { + _, exists := i.prefixes[prefix] + if !exists { + err := ebpf.UpdatePidInterpreterMapping(pid, prefix, support.ProgUnwindBEAM, 0, 0) + if err != nil { + return err + } + } + i.prefixes[prefix] = &mappingGeneration + } + } + + // Remove prefixes not seen + for prefix, generationPtr := range i.prefixes { + if *generationPtr == i.mappingGeneration { + continue + } + log.Infof("Delete BEAM prefix %#v", prefix) + _ = ebpf.DeletePidInterpreterMapping(pid, prefix) + delete(i.prefixes, prefix) + } + for m, generationPtr := range i.mappings { + if *generationPtr == i.mappingGeneration { + continue + } + log.Infof("Disabling BEAM for %#x/%#x", m.Vaddr, m.Length) + delete(i.mappings, m) + } + + return nil +} + +func (i *beamInstance) SynchronizeMappingsFromJITDump(ebpf interpreter.EbpfHandler, + _ reporter.SymbolReporter, pr process.Process, mappings []process.Mapping) error { + pid := pr.PID() + file, err := os.Open(fmt.Sprintf("/tmp/jit-%d.dump", uint32(pid))) + if err != nil { + return err + } + defer file.Close() + + header, err := ReadJITDumpHeader(file) + if err != nil { + return err + } + log.Infof("Parsed header: %v", *header) + + for recordHeader, err := ReadJITDumpRecordHeader(file); err == nil; recordHeader, err = ReadJITDumpRecordHeader(file) { + switch recordHeader.ID { + case JITCodeLoad: + record, name, err := ReadJITDumpRecordCodeLoad(file, recordHeader) + if err != nil { + return err + } + + log.Infof("JITDump Code Load %s @ 0x%x (%d bytes)", name, record.CodeAddr, record.CodeSize) + + prefixes, err := lpm.CalculatePrefixList(record.CodeAddr, record.CodeAddr+record.CodeSize) + if err != nil { + return fmt.Errorf("lpm failure %#x/%#x", record.CodeAddr, record.CodeSize) + } + + for _, prefix := range prefixes { + // TODO: Include FileID + err := ebpf.UpdatePidInterpreterMapping(pid, prefix, support.ProgUnwindBEAM, 0, 0) + if err != nil { + return err + } + } + + // TODO: remove mappings that have been moved/unloaded + + default: + log.Warnf("Ignoring JITDump record type %d", recordHeader.ID) + SkipJITDumpRecord(file, recordHeader) + } + } + + if err != nil { + return err + } + + return nil +} + func (i *beamInstance) Detach(interpreter.EbpfHandler, libpf.PID) error { log.Infof("BEAM interpreter detaching") return nil diff --git a/interpreter/beam/jitdumpreader.go b/interpreter/beam/jitdumpreader.go new file mode 100644 index 00000000..b4b53be8 --- /dev/null +++ b/interpreter/beam/jitdumpreader.go @@ -0,0 +1,113 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package beam // import "go.opentelemetry.io/ebpf-profiler/interpreter/beam" + +// Minimal JITDUMP file reader for BEAM + +// This has the minimal code we need to read the JITDUMP files that the BEAM +// writes to `/tmp/jit-<pid>.dump`. It isn't BEAM-specific, so it could probably +// be used more generally. The spec for this file format is at: +// https://raw.githubusercontent.com/torvalds/linux/refs/heads/master/tools/perf/Documentation/jitdump-specification.txt + +import ( + "encoding/binary" + "fmt" + "io" +) + +type JITDumpHeader struct { + Magic uint32 // the ASCII string "JiTD", written is as 0x4A695444. The reader will detect an endian mismatch when it reads 0x4454694a + Version uint32 // a 4-byte value representing the format version. It is currently set to 1 + TotalSize uint32 // size in bytes of file header + ELFMach uint32 // ELF architecture encoding (ELF e_machine value as specified in /usr/include/elf.h) + Pad1 uint32 // padding. Reserved for future use + Pid uint32 // JIT runtime process identification (OS specific) + Timestamp uint64 // timestamp of when the file was created + Flags uint64 // a bitmask of flags +} + +type JITDumpRecordHeader struct { + ID uint32 // a value identifying the record type (e.g. beam.JITCodeLoad) + TotalSize uint32 // the size in bytes of the record including this header + Timestamp uint64 // a timestamp of when the record was created +} + +const ( + JITCodeLoad = 0 // record describing a jitted function + JITCodeMove = 1 // record describing an already jitted function which is moved + JITCodeDebugInfo = 2 // record describing the debug information for a jitted function + JITCodeClose = 3 // record marking the end of the jit runtime (optional) + JITCodeUnwindingInfo = 4 // record describing a function unwinding information +) + +type JITDumpRecordCodeLoad struct { + PID uint32 // OS process id of the runtime generating the jitted code + TID uint32 // OS thread identification of the runtime thread generating the jitted code + VMA uint64 // virtual address of jitted code start + CodeAddr uint64 // code start address for the jitted code. By default vma = code_addr + CodeSize uint64 // size in bytes of the generated jitted code + CodeIndex uint64 // unique identifier for the jitted code +} + +func ReadJITDumpHeader(file io.ReadSeeker) (*JITDumpHeader, error) { + header := JITDumpHeader{} + err := binary.Read(file, binary.LittleEndian, &header) + if err != nil { + return nil, err + } + + if header.Magic != 0x4A695444 { + return nil, fmt.Errorf("File malformed, or maybe wrong endianness. Found magic number: %x", header.Magic) + } + + return &header, nil +} + +func ReadJITDumpRecordHeader(file io.ReadSeeker) (*JITDumpRecordHeader, error) { + header := JITDumpRecordHeader{} + err := binary.Read(file, binary.LittleEndian, &header) + if err != nil { + return nil, err + } + return &header, nil +} + +func ReadJITDumpRecordCodeLoad(file io.ReadSeeker, header *JITDumpRecordHeader) (*JITDumpRecordCodeLoad, string, error) { + record := JITDumpRecordCodeLoad{} + err := binary.Read(file, binary.LittleEndian, &record) + if err != nil { + return nil, "", err + } + + recordHeaderSize := uint32(16) + codeLoadRecordHeaderSize := uint32(40) + nameSize := header.TotalSize - uint32(record.CodeSize) - recordHeaderSize - codeLoadRecordHeaderSize + name := make([]byte, nameSize) + err = binary.Read(file, binary.LittleEndian, &name) + if err != nil { + return nil, "", err + } + + if name[nameSize-1] != '\x00' { + return nil, "", fmt.Errorf("Expected null terminated string, found %c", name[nameSize-1]) + } + + // Skip over the actual native code because we don't need it but we + // probably do want to read the next record. + _, err = file.Seek(int64(record.CodeSize), io.SeekCurrent) + if err != nil { + return nil, "", err + } + + return &record, string(name), nil +} + +func SkipJITDumpRecord(file io.ReadSeeker, header *JITDumpRecordHeader) error { + recordHeaderSize := uint64(16) + _, err := file.Seek(int64(header.TotalSize)-int64(recordHeaderSize), io.SeekCurrent) + if err != nil { + return err + } + return nil +}