Skip to content
111 changes: 109 additions & 2 deletions examples/cpp/pyperf/PyOffsets.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ extern const struct struct_offsets kPy27OffsetConfig = {
.f_code = 32,
.f_lineno = 124,
.f_localsplus = 376,
.owner = -1,
},
.PyCodeObject = {
.co_filename = 80,
Expand Down Expand Up @@ -97,6 +98,7 @@ extern const struct struct_offsets kPy36OffsetConfig = {
.f_code = 32,
.f_lineno = 124,
.f_localsplus = 376,
.owner = -1,
},
.PyCodeObject = {
.co_filename = 96,
Expand Down Expand Up @@ -137,6 +139,7 @@ extern const struct struct_offsets kPy37OffsetConfig = {
.f_code = 32,
.f_lineno = 108,
.f_localsplus = 360,
.owner = -1,
},
.PyCodeObject = {
.co_filename = 96,
Expand Down Expand Up @@ -177,6 +180,7 @@ extern const struct struct_offsets kPy38OffsetConfig = {
.f_code = 32,
.f_lineno = 108,
.f_localsplus = 360,
.owner = -1,
},
.PyCodeObject = {
.co_filename = 104,
Expand All @@ -189,6 +193,8 @@ extern const struct struct_offsets kPy38OffsetConfig = {
}
};

static const struct struct_offsets kPy39OffsetConfig = kPy38OffsetConfig;

extern const struct struct_offsets kPy310OffsetConfig = {
.PyObject = {
.ob_type = 8
Expand Down Expand Up @@ -218,6 +224,7 @@ extern const struct struct_offsets kPy310OffsetConfig = {
.f_code = 32,
.f_lineno = 100,
.f_localsplus = 352,
.owner = -1,
},
.PyCodeObject = {
.co_filename = 104,
Expand All @@ -237,7 +244,7 @@ extern const struct struct_offsets kPy311OffsetConfig = {
.String = {
// see https://github.com/python/cpython/blob/3.11/Include/cpython/unicodeobject.h#L69-L71
.data = 48, // sizeof(PyASCIIObject), which is an offset to string data
.size = -1,
.size = 16,
},
.PyTypeObject = {
.tp_name = 24
Expand All @@ -263,6 +270,7 @@ extern const struct struct_offsets kPy311OffsetConfig = {
.f_code = 32, // offsetof(_PyInterpreterFrame, f_code),
.f_lineno = -1, // N/A
.f_localsplus = 72, // offsetof(_PyInterpreterFrame, localsplus),
.owner = 69,
},
.PyCodeObject = {
.co_filename = 112,
Expand All @@ -275,15 +283,114 @@ extern const struct struct_offsets kPy311OffsetConfig = {
},
};

extern const struct struct_offsets kPy312OffsetConfig = {
.PyObject = {
.ob_type = 8
},
.String = {
// see https://github.com/python/cpython/blob/3.11/Include/cpython/unicodeobject.h#L69-L71
.data = 40, // sizeof(PyASCIIObject), which is an offset to string data
.size = 16,
},
.PyTypeObject = {
.tp_name = 24
},
.PyThreadState = {
.next = 8,
.interp = 16,
.frame = -1, // no direct pointer to PyFrameObject since Python 3.11
.thread = 136, // offsetof(PyThreadState,thread_id),
.cframe = 56, // pointer to intermediate structure, PyCFrame
},
.PyCFrame = {
.current_frame = 0
},
.PyInterpreterState = {
.tstate_head = 64 + 8, // offsetof(PyInterpreterState, threads.head),
},
.PyRuntimeState = {
.interp_main = 32 + 8 //48, // offsetof(_PyRuntimeState, interpreters.main),
},
.PyFrameObject = { // in Python 3.11 these fields are in PyInterpreterFrame
.f_back = 8, // offsetof(_PyInterpreterFrame, previous),
.f_code = 0, // offsetof(_PyInterpreterFrame, f_code),
.f_lineno = -1, // N/A
.f_localsplus = 72, // offsetof(_PyInterpreterFrame, localsplus),
.owner = 70,
},
.PyCodeObject = {
.co_filename = 112,
.co_name = 120,
.co_varnames = 96, // offsetof(PyCodeObject, co_localsplusnames),
.co_firstlineno = 68,
},
.PyTupleObject = {
.ob_item = 24
},
};

/* 3.13: _PyCFrame was removed and PyThreadState.frame is back. */
static const struct struct_offsets kPy313OffsetConfig = {
/* PyObject / String / PyTypeObject */
.PyObject = {
.ob_type = 8
},
.String = {
.data = 40,
.size = 16
},
.PyTypeObject = {
.tp_name = 24
},
/* PyThreadState (frame restored, no cframe) */
.PyThreadState = {
.next = 8,
.interp = 16,
.frame = 72,
.thread = 152,
.cframe = -1 },
/* _PyCFrame is gone */
.PyCFrame = {
.current_frame = -1
},
/* Interpreter / runtime */
.PyInterpreterState = {
.tstate_head = 7344
},
.PyRuntimeState = {
.interp_main = 640
},
/* _PyInterpreterFrame “virtual” frame */
.PyFrameObject = {
.f_back = 8,
.f_code = 0,
.f_lineno = -1,
.f_localsplus = 72,
.owner = 70,
},
/* PyCodeObject offsets unchanged since 3.11 */
.PyCodeObject = {
.co_filename = 112,
.co_name = 120,
.co_varnames = 96,
.co_firstlineno = 68
},
.PyTupleObject = {
.ob_item = 24
},
};

// List of mappings from Python 3 minor versions to offsets. `get_offsets` depends on this list
// being sorted in ascending order when it searches through it.
const std::vector<std::pair<version, struct_offsets>> python3Versions = {
{{3,6,0}, kPy36OffsetConfig},
{{3,7,0}, kPy37OffsetConfig},
{{3,8,0}, kPy38OffsetConfig},
// 3.9 is same as 3.8
{{3,9,0}, kPy39OffsetConfig},
{{3,10,0}, kPy310OffsetConfig},
{{3,11,0}, kPy311OffsetConfig},
{{3,12,0}, kPy312OffsetConfig},
{{3,13,0}, kPy313OffsetConfig},
};

const struct_offsets& get_offsets(version& version) {
Expand Down
60 changes: 42 additions & 18 deletions examples/cpp/pyperf/PyPerfBPFProgram.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ struct struct_offsets {
int64_t f_code;
int64_t f_lineno;
int64_t f_localsplus;
int64_t owner;
} PyFrameObject;
struct {
int64_t co_filename;
Expand Down Expand Up @@ -167,6 +168,7 @@ struct event {
int32_t stack[STACK_MAX_LEN];
uintptr_t user_ip;
uintptr_t user_sp;
uintptr_t user_bp;
uint32_t user_stack_len;
uint8_t raw_user_stack[__USER_STACKS_PAGES__ * PAGE_SIZE];
#define FRAME_CODE_IS_NULL 0x80000001
Expand Down Expand Up @@ -198,6 +200,10 @@ struct sample_state {
#define CPU_BITS 10
#define COUNTER_BITS (31 - CPU_BITS)
#define MAX_SYMBOLS (1 << COUNTER_BITS)
#define FRAME_OWNED_BY_THREAD 0
#define FRAME_OWNED_BY_GENERATOR 1
#define FRAME_OWNED_BY_FRAME_OBJECT 2
#define FRAME_OWNED_BY_CSTACK 3
BPF_HASH(symbols, struct symbol, int32_t, __SYMBOLS_SIZE__);

// Table of processes currently being profiled.
Expand Down Expand Up @@ -340,6 +346,7 @@ on_event(struct pt_regs* ctx) {

event->user_sp = user_regs.sp;
event->user_ip = user_regs.ip;
event->user_bp = user_regs.bp;
event->user_stack_len = 0;

// Subtract 128 from sp for x86-ABI red zone
Expand Down Expand Up @@ -470,12 +477,12 @@ get_thread_state(struct pt_regs *ctx) {
found:
// Get pointer to top frame from PyThreadState
if (state->offsets.PyThreadState.frame > -1) {
// For Python <= 3.10 get frame pointer directly from PyThreadState
// For Python <= 3.10, >=3.13 get frame pointer directly from PyThreadState
bpf_probe_read_user(
&state->frame_ptr, sizeof(state->frame_ptr),
(void *)(state->thread_state + state->offsets.PyThreadState.frame));
} else {
// In Python 3.11+ PyFrameObject fields of interest were mostly moved to PyInterpreterFrame (but we refer it here
// In Python 3.11, 3.12 PyFrameObject fields of interest were mostly moved to PyInterpreterFrame (but we refer it here
// as "frame"); also, we need to get pointer indirectly through PyCFrame structure
uintptr_t cframe;
bpf_probe_read_user(
Expand Down Expand Up @@ -542,14 +549,15 @@ get_first_arg_name(
char *argname,
size_t maxlen) {
int result = 0;
ssize_t ob_size; // Py_ssize_t;
ssize_t ob_size = 0; // Py_ssize_t;
// Roughly equivalnt to the following in GDB:
//
// ((PyTupleObject*)$frame->f_code->co_varnames)->ob_item[0]
//
void* args_ptr;
result |= bpf_probe_read_user(&args_ptr, sizeof(void*), code_ptr + offsets->PyCodeObject.co_varnames);
result |= bpf_probe_read_user(&ob_size, sizeof(ob_size), args_ptr + offsets->String.size); // String.size is PyVarObject.ob_size
result |= bpf_probe_read_user(&ob_size, sizeof(ob_size), args_ptr + 16); // PyVarObject.ob_size has always been 16

if (result == 0 && ob_size > 0) {
result |= bpf_probe_read_user(&args_ptr, sizeof(void*), args_ptr + offsets->PyTupleObject.ob_item);
result |= bpf_probe_read_user_str(argname, maxlen, args_ptr + offsets->String.data);
Expand Down Expand Up @@ -688,24 +696,40 @@ int read_python_stack(struct pt_regs* ctx) {
void *cur_frame;
void *cur_code_ptr;

#pragma unroll
#pragma unroll
for (int i = 0; i < PYTHON_STACK_FRAMES_PER_PROG; i++) {
cur_frame = state->frame_ptr;

// read PyCodeObject first, if that fails, then no point reading next frame
bpf_probe_read_user(
&cur_code_ptr, sizeof(cur_code_ptr),
cur_frame + state->offsets.PyFrameObject.f_code);

// read current PyFrameObject filename/name
// The compiler substitutes a constant for `i` because the loop is unrolled. This guarantees we
// are always within the array bounds. On the other hand, `stack_len` is a variable, so the
// verifier can't guarantee it's within bounds without an explicit check.
const int32_t symbol_id = read_symbol(state, cur_frame, cur_code_ptr);
// to please the verifier...
if (event->stack_len < STACK_MAX_LEN) {
event->stack[event->stack_len++] = symbol_id;
char owner = FRAME_OWNED_BY_THREAD; // If owner not relevant for distro, assume frame good.
if (state->offsets.PyFrameObject.owner != -1) {
bpf_probe_read_user(
&owner, sizeof(owner),
cur_frame + state->offsets.PyFrameObject.owner);
}
if (owner == FRAME_OWNED_BY_THREAD ||
owner == FRAME_OWNED_BY_GENERATOR ||
owner == FRAME_OWNED_BY_FRAME_OBJECT) {
// read PyCodeObject first, if that fails, then no point reading next frame
bpf_probe_read_user(
&cur_code_ptr, sizeof(cur_code_ptr),
cur_frame + state->offsets.PyFrameObject.f_code);

// read current PyFrameObject filename/name
// The compiler substitutes a constant for `i` because the loop is unrolled. This guarantees we
// are always within the array bounds. On the other hand, `stack_len` is a variable, so the
// verifier can't guarantee it's within bounds without an explicit check.
const int32_t symbol_id = read_symbol(state, cur_frame, cur_code_ptr);
// to please the verifier...
if (event->stack_len < STACK_MAX_LEN) {
event->stack[event->stack_len++] = symbol_id;
}
} else if (owner != FRAME_OWNED_BY_CSTACK) {
// This means frame ownership is unknown. Something is off.
if (event->stack_len < STACK_MAX_LEN) {
event->stack[event->stack_len++] = -1 * owner;
}
} // If it's CSTACK we just skip.


// read next PyFrameObject pointer, update in place
bpf_probe_read_user(
Expand Down
15 changes: 13 additions & 2 deletions examples/cpp/pyperf/PyPerfNativeStackTrace.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ const uint8_t *NativeStackTrace::stack = NULL;
size_t NativeStackTrace::stack_len = 0;
uintptr_t NativeStackTrace::sp = 0;
uintptr_t NativeStackTrace::ip = 0;
uintptr_t NativeStackTrace::bp = 0;
ProcSymbolsCache NativeStackTrace::procSymbolsCache;
bool NativeStackTrace::insert_dso_name = false;
const static double ProcSymbolsCacheTTL_S = 60;
Expand All @@ -43,11 +44,12 @@ static double steady_time_since_epoch() {
}

NativeStackTrace::NativeStackTrace(uint32_t pid, const unsigned char *raw_stack,
size_t stack_len, uintptr_t ip, uintptr_t sp) : error_occurred(false) {
size_t stack_len, uintptr_t ip, uintptr_t sp, uintptr_t bp) : error_occurred(false) {
NativeStackTrace::stack = raw_stack;
NativeStackTrace::stack_len = stack_len;
NativeStackTrace::ip = ip;
NativeStackTrace::sp = sp;
NativeStackTrace::bp = bp;

if (stack_len == 0) {
return;
Expand Down Expand Up @@ -157,6 +159,15 @@ NativeStackTrace::NativeStackTrace(uint32_t pid, const unsigned char *raw_stack,

int NativeStackTrace::UPT_access_reg(unw_addr_space_t as, unw_regnum_t regnum,
unw_word_t *valp, int write, void *arg) {
if (regnum == UNW_X86_64_RBP) {
if (write) {
logInfo(2, "Libunwind attempts to write to BP\n");
return -UNW_EINVAL;
}

*valp = NativeStackTrace::bp;
return 0;
}
if (regnum == UNW_REG_SP) {
if (write) {
logInfo(2, "Libunwind attempts to write to SP\n");
Expand Down Expand Up @@ -234,7 +245,7 @@ int NativeStackTrace::UPT_access_mem(unw_addr_space_t as, unw_word_t addr,
int NativeStackTrace::UPT_access_fpreg(unw_addr_space_t as, unw_regnum_t reg, unw_fpreg_t *val,
int write, void *arg) {
logInfo(3, "Libunwind unexpected UPT_access_fpreg() attempt\n");
return -UNW_EINVAL;
return -UNW_EINVAL;
}

int NativeStackTrace::UPT_resume(unw_addr_space_t as, unw_cursor_t *c, void *arg) {
Expand Down
4 changes: 3 additions & 1 deletion examples/cpp/pyperf/PyPerfNativeStackTrace.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ typedef std::map<uint32_t, ProcSymbolsCacheEntry> ProcSymbolsCache;
class NativeStackTrace {
public:
explicit NativeStackTrace(uint32_t pid, const uint8_t *raw_stack,
size_t stack_len, uintptr_t ip, uintptr_t sp);
size_t stack_len, uintptr_t ip, uintptr_t sp,
uintptr_t bp);

std::vector<std::string> get_stack_symbol() const;
bool error_occured() const;
Expand All @@ -41,6 +42,7 @@ class NativeStackTrace {
static size_t stack_len;
static uintptr_t ip;
static uintptr_t sp;
static uintptr_t bp;
static ProcSymbolsCache procSymbolsCache;

static int UPT_access_reg(unw_addr_space_t as, unw_regnum_t regnum,
Expand Down
4 changes: 3 additions & 1 deletion examples/cpp/pyperf/PyPerfType.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ struct struct_offsets {
int64_t f_code;
int64_t f_lineno;
int64_t f_localsplus;
int64_t owner;
} PyFrameObject;
struct {
int64_t co_filename;
Expand Down Expand Up @@ -224,6 +225,7 @@ typedef struct event {
#define FRAME_CODE_IS_NULL ((int32_t)0x80000001)
uintptr_t user_ip;
uintptr_t user_sp;
uintptr_t user_bp;
uint32_t user_stack_len;
uint8_t raw_user_stack[]; // NOTICE: Field with variadic length - must be last!
} Event;
Expand All @@ -247,7 +249,7 @@ struct PyPerfSample {
kernelStackId(raw->kernel_stack_id),
pyStackIds(raw->stack, raw->stack + raw->stack_len),
nativeStack(raw->pid, raw->raw_user_stack, raw->user_stack_len,
raw->user_ip, raw->user_sp) {}
raw->user_ip, raw->user_sp, raw->user_bp) {}
};

} // namespace pyperf
Expand Down