From b05de17ab1caac0c68af506c3369319d6f928195 Mon Sep 17 00:00:00 2001 From: Zhang Hua Date: Thu, 14 May 2026 22:10:10 +0800 Subject: [PATCH] Report pending radostrace ops on interrupt or timeout Report unfinished radostrace ops from the BPF ops map before exit when tracing stops via Ctrl+C or when the session timeout expires. - keep SIGINT on a graceful shutdown path - dump pending ops with duration, client, tid, osd, object, and rw mode - report pending ops before exiting on -t timeout - prevent false positives when ring buffer reservation cannot reserve space Signed-off-by: Zhang Hua --- src/radostrace.bpf.c | 22 ++++++++-- src/radostrace.cc | 102 +++++++++++++++++++++++++++++++++++++++---- 2 files changed, 111 insertions(+), 13 deletions(-) diff --git a/src/radostrace.bpf.c b/src/radostrace.bpf.c index f6bde46..7e479ce 100644 --- a/src/radostrace.bpf.c +++ b/src/radostrace.bpf.c @@ -82,7 +82,6 @@ int uprobe_send_op(struct pt_regs *ctx) { return 0; } memset(val, 0, sizeof(struct client_op_v)); - val->sent_stamp = bpf_ktime_get_boot_ns(); val->tid = key.tid; val->cid = key.cid; val->rw = 0; @@ -127,8 +126,14 @@ int uprobe_send_op(struct pt_regs *ctx) { bpf_printk("uprobe_send_op got NULL vf at varid %d\n", varid); } - name_len &= 127; - bpf_probe_read_user(val->object_name, name_len, (void *)name_base); + if (name_len < 0) { + name_len = 0; + } else if (name_len > (int)(sizeof(val->object_name) - 1)) { + name_len = sizeof(val->object_name) - 1; + } + if (name_len > 0) { + bpf_probe_read_user(val->object_name, name_len, (void *)name_base); + } // read op flags ++varid; vf = bpf_map_lookup_elem(&hprobes, &varid); @@ -180,6 +185,7 @@ int uprobe_send_op(struct pt_regs *ctx) { bpf_printk("uprobe_send_op got M_start %d\n", M_start); } else { bpf_printk("uprobe_send_op got NULL vf at varid %d\n", varid); + bpf_map_delete_elem(&ops, &key); return 0; } @@ -195,6 +201,7 @@ int uprobe_send_op(struct pt_regs *ctx) { bpf_printk("uprobe_send_op got m_finish %d\n", m_finish); } else { bpf_printk("uprobe_send_op got NULL vf at varid %d\n", varid); + bpf_map_delete_elem(&ops, &key); return 0; } @@ -220,6 +227,7 @@ int uprobe_send_op(struct pt_regs *ctx) { bpf_printk("uprobe_send_op got m_start %lld\n", m_start); } else { bpf_printk("uprobe_send_op got NULL vf at varid %d\n", varid); + bpf_map_delete_elem(&ops, &key); return 0; } @@ -234,10 +242,13 @@ int uprobe_send_op(struct pt_regs *ctx) { bpf_printk("uprobe_send_op got m_start %d\n", val->ops_size); } else { bpf_printk("uprobe_send_op got NULL vf at varid %d\n", varid); + bpf_map_delete_elem(&ops, &key); return 0; } - val->ops_size &= 3; + if (val->ops_size > 3) { + val->ops_size = 3; + } val->offset = 0; val->length = 0; for (__u32 i = 0; i < 3; ++i) { @@ -276,6 +287,8 @@ int uprobe_send_op(struct pt_regs *ctx) { } } + val->sent_stamp = bpf_ktime_get_boot_ns(); + //bpf_map_update_elem(&ops, &key, val, 0);// no need to update again here return 0; } @@ -322,6 +335,7 @@ int uprobe_finish_op(struct pt_regs *ctx) { // submit to ringbuf struct client_op_v *e = bpf_ringbuf_reserve(&rb, sizeof(struct client_op_v), 0); if (NULL == e) { + bpf_map_delete_elem(&ops, &key); return 0; } *e = *opv; diff --git a/src/radostrace.cc b/src/radostrace.cc index 3495722..2eb024b 100644 --- a/src/radostrace.cc +++ b/src/radostrace.cc @@ -2,6 +2,7 @@ //observe the client <-> osds latency for each request. #include +#include #include #include #include @@ -85,11 +86,20 @@ DwarfParser::probes_t rados_probes = { }; volatile sig_atomic_t timeout_occurred = 0; +volatile sig_atomic_t got_sigint = 0; // CSV Output bool export_csv = false; std::string csv_output_file = "radostrace_events.csv"; +static __u64 get_boottime_ns() { + struct timespec ts; + if (clock_gettime(CLOCK_BOOTTIME, &ts) != 0) { + return 0; + } + return (__u64)ts.tv_sec * 1000000000ULL + (__u64)ts.tv_nsec; +} + const char * ceph_osd_op_str(int opc) { const char *op_str = NULL; #define GENERATE_CASE_ENTRY(op, opcode, str) case CEPH_OSD_OP_##op: op_str=str; break; @@ -124,11 +134,11 @@ void fill_map_hprobes(std::string mod_path, DwarfParser &dwarfparser, struct bpf } void signal_handler(int signum){ - clog << "Caught signal " << signum << endl; if (signum == SIGINT) { - clog << "process killed" << endl; + got_sigint = 1; + } else { + _exit(signum); } - exit(signum); } void timeout_handler(int signum) { @@ -223,6 +233,65 @@ int attach_retuprobe(struct radostrace_bpf *skel, return 0; } +static int report_hung_ops(struct radostrace_bpf *skel, + __u64 min_duration_ns = 0, + const char *title = "\n--- Pending ops (never completed) ---\n", + const char *empty_msg = "\nNo pending ops detected.\n") { + __u64 now_ns = get_boottime_ns(); + if (now_ns == 0) { + fprintf(stderr, "\nFailed to read CLOCK_BOOTTIME.\n"); + return -1; + } + + int map_fd = bpf_map__fd(skel->maps.ops); + struct client_op_k current_key, next_key; + struct client_op_v val; + struct client_op_k *key_ptr = NULL; + int found = 0; + + while (bpf_map_get_next_key(map_fd, key_ptr, &next_key) == 0) { + if (bpf_map_lookup_elem(map_fd, &next_key, &val) == 0) { + if (val.sent_stamp == 0) { + current_key = next_key; + key_ptr = ¤t_key; + continue; + } + __u64 duration_ns = now_ns - val.sent_stamp; + if (duration_ns < min_duration_ns) { + current_key = next_key; + key_ptr = ¤t_key; + continue; + } + if (found == 0) { + fprintf(stderr, "%s", title); + fprintf(stderr, "%8s %8s %8s %8s %-20s WR\n", + "client", "tid", "osd", "dur(s)", "object"); + } + long long dur_sec = (long long)(duration_ns / 1000000000ULL); + const char *wr_str = (val.rw & CEPH_OSD_FLAG_WRITE) ? "W" : "R"; + char safe_name[sizeof(val.object_name) + 1]; + memcpy(safe_name, val.object_name, sizeof(val.object_name)); + safe_name[sizeof(val.object_name)] = '\0'; + fprintf(stderr, "%8lld %8lld %8d %8lld %-20s %s\n", + (long long)val.cid, + (long long)val.tid, + (int)val.target_osd, + dur_sec, + safe_name, + wr_str); + found++; + } + current_key = next_key; + key_ptr = ¤t_key; + } + if (found == 0) { + fprintf(stderr, "%s", empty_msg); + } else { + fprintf(stderr, "Total reported ops: %d\n", found); + } + return found; +} + int digitnum(int x) { int cnt = 1; while(x / 10) { @@ -477,7 +546,7 @@ int main(int argc, char **argv) { return 0; } else if (arg == "-h" || arg == "--help") { std::cout << "Usage: " << argv[0] << " [-t ] [-j [filename]] [-i ] [-o [filename]] [-p ] [--skip-version-check]\n"; - std::cout << " -t, --timeout Set execution timeout in seconds\n"; + std::cout << " -t, --timeout Set execution timeout in seconds and report pending ops before exit\n"; std::cout << " -j, --export-json Export DWARF info to JSON (default: radostrace_dwarf.json)\n"; std::cout << " -i, --import-json Import DWARF info from JSON file\n"; std::cout << " -o, --output Export events data info to CSV (default: radostrace_events.csv)\n"; @@ -486,6 +555,9 @@ int main(int argc, char **argv) { std::cout << " -V, --version Print version information and exit\n"; std::cout << " -h, --help Show this help message\n"; return 0; + } else { + std::cerr << "Unknown argument: " << arg << "\n"; + return 1; } } @@ -501,7 +573,8 @@ int main(int argc, char **argv) { struct radostrace_bpf *skel; // long uprobe_offset; int ret = 0; - struct ring_buffer *rb; + int exit_code = 0; + struct ring_buffer *rb = nullptr; DwarfParser dwarfparser(rados_probes, probe_units); @@ -664,6 +737,7 @@ int main(int argc, char **argv) { rb = ring_buffer__new(bpf_map__fd(skel->maps.rb), handle_event, NULL, NULL); if (!rb) { cerr << "failed to setup ring_buffer" << endl; + exit_code = 1; goto cleanup; } @@ -678,18 +752,28 @@ int main(int argc, char **argv) { clog << "Started to poll from ring buffer" << endl; - while ((!timeout_occurred || timeout == -1) && (ret = ring_buffer__poll(rb, 1000)) >= 0) { - // Continue polling while timeout hasn't occurred or if unlimited execution time + while (!got_sigint && (!timeout_occurred || timeout == -1) && (ret = ring_buffer__poll(rb, 1000)) >= 0) { + } + + if (ret == -EINTR && (got_sigint || timeout_occurred)) { + ret = 0; + } + + if (ret < 0 && ret != -EINTR) { + exit_code = 1; + cerr << "Error polling ring buffer: " << -ret << endl; } if (timeout_occurred) { - cerr << "Timeout occurred. Exiting." << endl; + cerr << "Timeout occurred. Reporting pending ops before exit." << endl; } + report_hung_ops(skel); + cleanup: clog << "Clean up the eBPF program" << endl; ring_buffer__free(rb); radostrace_bpf__destroy(skel); - return timeout_occurred ? -1 : -errno; + return exit_code; }