From 158b8cb52ec58c8ef8f5241a9db1c6dea4285253 Mon Sep 17 00:00:00 2001 From: Kevin Menard Date: Thu, 17 Oct 2024 17:36:43 -0400 Subject: [PATCH] YJIT: Add compilation log (#11818) * YJIT: Add `--yjit-compilation-log` flag to print out the compilation log at exit. * YJIT: Add an option to enable the compilation log at runtime. * YJIT: Fix a typo in the `IseqPayload` docs. * YJIT: Add stubs for getting the YJIT compilation log in memory. * YJIT: Add a compilation log based on a circular buffer to cap the log size. * YJIT: Allow specifying either a file or directory name for the YJIT compilation log. The compilation log will be populated as compilation events occur. If a directory is supplied, then a filename based on the PID will be used as the write target. If a file name is supplied instead, the log will be written to that file. * YJIT: Add JIT compilation of C function substitutions to the compilation log. * YJIT: Add compilation events to the circular buffer even if output is sent to a file. Previously, the two modes were treated as being exclusive of one another. However, it could be beneficial to log all events to a file while also allowing for direct access of the last N events via `RubyVM::YJIT.compilation_log`. * YJIT: Make timestamps the first element in the YJIT compilation log tuple. * YJIT: Stream log to stderr if `--yjit-compilation-log` is supplied without an argument. * YJIT: Eagerly compute compilation log messages to avoid hanging on to references that may GC. * YJIT: Log all compiled blocks, not just the method entry points. * YJIT: Remove all compilation events other than block compilation to slim down the log. * YJIT: Replace circular buffer iterator with a consuming loop. * YJIT: Support `--yjit-compilation-log=quiet` as a way to activate the in-memory log without printing it. Co-authored-by: Randy Stauner * YJIT: Promote the compilation log to being the one YJIT log. Co-authored-by: Randy Stauner * Update doc/yjit/yjit.md * Update doc/yjit/yjit.md --------- Co-authored-by: Randy Stauner Co-authored-by: Maxime Chevalier-Boisvert --- doc/yjit/yjit.md | 8 ++ yjit.c | 5 +- yjit.rb | 28 ++++++- yjit/src/codegen.rs | 3 + yjit/src/core.rs | 2 +- yjit/src/cruby.rs | 20 ++++- yjit/src/lib.rs | 1 + yjit/src/log.rs | 179 ++++++++++++++++++++++++++++++++++++++++++++ yjit/src/options.rs | 44 +++++++++++ yjit/src/yjit.rs | 16 +++- 10 files changed, 299 insertions(+), 7 deletions(-) create mode 100644 yjit/src/log.rs diff --git a/doc/yjit/yjit.md b/doc/yjit/yjit.md index d57d5dceda..49e395285f 100644 --- a/doc/yjit/yjit.md +++ b/doc/yjit/yjit.md @@ -166,6 +166,12 @@ You can dump statistics about compilation and execution by running YJIT with the ./miniruby --yjit-stats myscript.rb ``` +You can see what YJIT has compiled by running YJIT with the `--yjit-log` command-line option: + +```sh +./miniruby --yjit-log myscript.rb +``` + The machine code generated for a given method can be printed by adding `puts RubyVM::YJIT.disasm(method(:method_name))` to a Ruby script. Note that no code will be generated if the method is not compiled.

Command-Line Options

@@ -181,6 +187,8 @@ YJIT supports all command-line options supported by upstream CRuby, but also add compiled, lower values mean less code is compiled (default 200K) - `--yjit-stats`: print statistics after the execution of a program (incurs a run-time cost) - `--yjit-stats=quiet`: gather statistics while running a program but don't print them. Stats are accessible through `RubyVM::YJIT.runtime_stats`. (incurs a run-time cost) +- `--yjit-log[=file|dir]`: log all compilation events to the specified file or directory. If no name is supplied, the last 1024 log entries will be printed to stderr when the application exits. +- `--yjit-log=quiet`: gather a circular buffer of recent YJIT compilations. The compilation log entries are accessible through `RubyVM::YJIT.log` and old entries will be discarded if the buffer is not drained quickly. (incurs a run-time cost) - `--yjit-disable`: disable YJIT despite other `--yjit*` flags for lazily enabling it with `RubyVM::YJIT.enable` - `--yjit-code-gc`: enable code GC (disabled by default as of Ruby 3.3). It will cause all machine code to be discarded when the executable memory size limit is hit, meaning JIT compilation will then start over. diff --git a/yjit.c b/yjit.c index 93d8fe5c9f..737372daa1 100644 --- a/yjit.c +++ b/yjit.c @@ -1226,15 +1226,18 @@ rb_yjit_set_exception_return(rb_control_frame_t *cfp, void *leave_exit, void *le // Primitives used by yjit.rb VALUE rb_yjit_stats_enabled_p(rb_execution_context_t *ec, VALUE self); VALUE rb_yjit_print_stats_p(rb_execution_context_t *ec, VALUE self); +VALUE rb_yjit_log_enabled_p(rb_execution_context_t *c, VALUE self); +VALUE rb_yjit_print_log_p(rb_execution_context_t *c, VALUE self); VALUE rb_yjit_trace_exit_locations_enabled_p(rb_execution_context_t *ec, VALUE self); VALUE rb_yjit_get_stats(rb_execution_context_t *ec, VALUE self, VALUE key); VALUE rb_yjit_reset_stats_bang(rb_execution_context_t *ec, VALUE self); +VALUE rb_yjit_get_log(rb_execution_context_t *ec, VALUE self); VALUE rb_yjit_disasm_iseq(rb_execution_context_t *ec, VALUE self, VALUE iseq); VALUE rb_yjit_insns_compiled(rb_execution_context_t *ec, VALUE self, VALUE iseq); VALUE rb_yjit_code_gc(rb_execution_context_t *ec, VALUE self); VALUE rb_yjit_simulate_oom_bang(rb_execution_context_t *ec, VALUE self); VALUE rb_yjit_get_exit_locations(rb_execution_context_t *ec, VALUE self); -VALUE rb_yjit_enable(rb_execution_context_t *ec, VALUE self, VALUE gen_stats, VALUE print_stats); +VALUE rb_yjit_enable(rb_execution_context_t *ec, VALUE self, VALUE gen_stats, VALUE print_stats, VALUE gen_compilation_log, VALUE print_compilation_log); // Preprocessed yjit.rb generated during build #include "yjit.rbinc" diff --git a/yjit.rb b/yjit.rb index 5483e39bf3..bcd34cde9b 100644 --- a/yjit.rb +++ b/yjit.rb @@ -18,6 +18,11 @@ module RubyVM::YJIT Primitive.rb_yjit_stats_enabled_p end + # Check if `--yjit-log` is used. + def self.log_enabled? + Primitive.rb_yjit_log_enabled_p + end + # Check if rb_yjit_trace_exit_locations_enabled_p is enabled. def self.trace_exit_locations_enabled? # :nodoc: Primitive.rb_yjit_trace_exit_locations_enabled_p @@ -28,15 +33,22 @@ module RubyVM::YJIT Primitive.rb_yjit_reset_stats_bang end - # Enable \YJIT compilation. `stats` option decides whether to enable \YJIT stats or not. + # Enable \YJIT compilation. `stats` option decides whether to enable \YJIT stats or not. `compilation_log` decides + # whether to enable \YJIT compilation logging or not. # + # `stats`: # * `false`: Disable stats. # * `true`: Enable stats. Print stats at exit. # * `:quiet`: Enable stats. Do not print stats at exit. - def self.enable(stats: false) + # + # `log`: + # * `false`: Don't enable the log. + # * `true`: Enable the log. Print log at exit. + # * `:quiet`: Enable the log. Do not print log at exit. + def self.enable(stats: false, log: false) return false if enabled? at_exit { print_and_dump_stats } if stats - Primitive.rb_yjit_enable(stats, stats != :quiet) + Primitive.rb_yjit_enable(stats, stats != :quiet, log, log != :quiet) end # If --yjit-trace-exits is enabled parse the hashes from @@ -173,6 +185,16 @@ module RubyVM::YJIT strio.string end + # Return an array of log entries. + # Return `nil` when option is not passed or unavailable. + def self.log + return nil unless log_enabled? + + Primitive.rb_yjit_get_log.map do |timestamp, path| + [Time.at(timestamp), path] + end + end + # Produce disassembly for an iseq. This requires a `--enable-yjit=dev` build. def self.disasm(iseq) # :nodoc: # If a method or proc is passed in, get its iseq diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 7e69ba32fa..82ec10b074 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -458,6 +458,7 @@ macro_rules! perf_call { } use crate::codegen::JCCKinds::*; +use crate::log::Log; #[allow(non_camel_case_types, unused)] pub enum JCCKinds { @@ -1223,6 +1224,8 @@ pub fn gen_single_block( asm_comment!(asm, "reg_mapping: {:?}", asm.ctx.get_reg_mapping()); } + Log::add_block_with_chain_depth(blockid, asm.ctx.get_chain_depth()); + // Mark the start of an ISEQ for --yjit-perf jit_perf_symbol_push!(jit, &mut asm, &get_iseq_name(iseq), PerfMap::ISEQ); diff --git a/yjit/src/core.rs b/yjit/src/core.rs index 32bce76a01..ef91638ca6 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -1763,7 +1763,7 @@ pub struct IseqPayload { // Basic block versions pub version_map: VersionMap, - // Indexes of code pages used by this this ISEQ + // Indexes of code pages used by this ISEQ pub pages: HashSet, // List of ISEQ entry codes diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs index 702aaf51bc..25fabec1d0 100644 --- a/yjit/src/cruby.rs +++ b/yjit/src/cruby.rs @@ -84,6 +84,7 @@ use std::convert::From; use std::ffi::{CString, CStr}; +use std::fmt::{Debug, Formatter}; use std::os::raw::{c_char, c_int, c_uint}; use std::panic::{catch_unwind, UnwindSafe}; @@ -563,6 +564,18 @@ impl From<*const rb_callable_method_entry_t> for VALUE { } } +impl From<&str> for VALUE { + fn from(value: &str) -> Self { + rust_str_to_ruby(value) + } +} + +impl From for VALUE { + fn from(value: String) -> Self { + rust_str_to_ruby(&value) + } +} + impl From for u64 { fn from(value: VALUE) -> Self { let VALUE(uimm) = value; @@ -594,7 +607,6 @@ impl From for u16 { } /// Produce a Ruby string from a Rust string slice -#[cfg(feature = "disasm")] pub fn rust_str_to_ruby(str: &str) -> VALUE { unsafe { rb_utf8_str_new(str.as_ptr() as *const _, str.len() as i64) } } @@ -625,6 +637,12 @@ pub struct SourceLocation { pub line: c_int, } +impl Debug for SourceLocation { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("{}:{}", self.file.to_string_lossy(), self.line)) + } +} + /// Make a [SourceLocation] at the current spot. macro_rules! src_loc { () => { diff --git a/yjit/src/lib.rs b/yjit/src/lib.rs index 3f3d24be4b..1e3f31b88b 100644 --- a/yjit/src/lib.rs +++ b/yjit/src/lib.rs @@ -15,3 +15,4 @@ mod stats; mod utils; mod yjit; mod virtualmem; +mod log; diff --git a/yjit/src/log.rs b/yjit/src/log.rs new file mode 100644 index 0000000000..f2dcf519e0 --- /dev/null +++ b/yjit/src/log.rs @@ -0,0 +1,179 @@ +use crate::core::BlockId; +use crate::cruby::*; +use crate::options::*; +use crate::yjit::yjit_enabled_p; + +use std::fmt::{Display, Formatter}; +use std::os::raw::c_long; +use crate::utils::iseq_get_location; + +type Timestamp = f64; + +#[derive(Clone, Debug)] +pub struct LogEntry { + /// The time when the block was compiled. + pub timestamp: Timestamp, + + /// The log message. + pub message: String, +} + +impl Display for LogEntry { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{:15.6}: {}", self.timestamp, self.message) + } +} + +pub type Log = CircularBuffer; +static mut LOG: Option = None; + +impl Log { + pub fn init() { + unsafe { + LOG = Some(Log::new()); + } + } + + pub fn get_instance() -> &'static mut Log { + unsafe { + LOG.as_mut().unwrap() + } + } + + pub fn has_instance() -> bool { + unsafe { + LOG.as_mut().is_some() + } + } + + pub fn add_block_with_chain_depth(block_id: BlockId, chain_depth: u8) { + if !Self::has_instance() { + return; + } + + let print_log = get_option!(log); + let timestamp = std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap().as_secs_f64(); + + let location = iseq_get_location(block_id.iseq, block_id.idx); + let index = block_id.idx; + let message = if chain_depth > 0 { + format!("{} (index: {}, chain_depth: {})", location, index, chain_depth) + } else { + format!("{} (index: {})", location, index) + }; + + let entry = LogEntry { + timestamp, + message + }; + + if let Some(output) = print_log { + match output { + LogOutput::Stderr => { + eprintln!("{}", entry); + } + + LogOutput::File(fd) => { + use std::os::unix::io::{FromRawFd, IntoRawFd}; + use std::io::Write; + + // Write with the fd opened during boot + let mut file = unsafe { std::fs::File::from_raw_fd(fd) }; + writeln!(file, "{}", entry).unwrap(); + file.flush().unwrap(); + file.into_raw_fd(); // keep the fd open + } + + LogOutput::MemoryOnly => () // Don't print or write anything + } + } + + Self::get_instance().push(entry); + } +} + +pub struct CircularBuffer { + buffer: Vec>, + head: usize, + tail: usize, + size: usize +} + +impl CircularBuffer { + pub fn new() -> Self { + Self { + buffer: vec![None; N], + head: 0, + tail: 0, + size: 0 + } + } + + pub fn push(&mut self, value: T) { + self.buffer[self.head] = Some(value); + self.head = (self.head + 1) % N; + if self.size == N { + self.tail = (self.tail + 1) % N; + } else { + self.size += 1; + } + } + + pub fn pop(&mut self) -> Option { + if self.size == 0 { + return None; + } + + let value = self.buffer[self.tail].take(); + self.tail = (self.tail + 1) % N; + self.size -= 1; + value + } + + pub fn len(&self) -> usize { + self.size + } +} + + +//=========================================================================== + +/// Primitive called in yjit.rb +/// Check if log generation is enabled +#[no_mangle] +pub extern "C" fn rb_yjit_log_enabled_p(_ec: EcPtr, _ruby_self: VALUE) -> VALUE { + if get_option!(log).is_some() { + return Qtrue; + } else { + return Qfalse; + } +} + +/// Primitive called in yjit.rb. +/// Export all YJIT log entries as a Ruby array. +#[no_mangle] +pub extern "C" fn rb_yjit_get_log(_ec: EcPtr, _ruby_self: VALUE) -> VALUE { + with_vm_lock(src_loc!(), || rb_yjit_get_log_array()) +} + +fn rb_yjit_get_log_array() -> VALUE { + if !yjit_enabled_p() || get_option!(log).is_none() { + return Qnil; + } + + let log = Log::get_instance(); + let array = unsafe { rb_ary_new_capa(log.len() as c_long) }; + + while log.len() > 0 { + let entry = log.pop().unwrap(); + + unsafe { + let entry_array = rb_ary_new_capa(2); + rb_ary_push(entry_array, rb_float_new(entry.timestamp)); + rb_ary_push(entry_array, entry.message.into()); + rb_ary_push(array, entry_array); + } + } + + return array; +} diff --git a/yjit/src/options.rs b/yjit/src/options.rs index c91c365738..b993b5685b 100644 --- a/yjit/src/options.rs +++ b/yjit/src/options.rs @@ -82,6 +82,9 @@ pub struct Options { /// Enable writing /tmp/perf-{pid}.map for Linux perf pub perf_map: Option, + + // Where to store the log. `None` disables the log. + pub log: Option, } // Initialize the options to default values @@ -103,6 +106,7 @@ pub static mut OPTIONS: Options = Options { frame_pointer: false, code_gc: false, perf_map: None, + log: None, }; /// YJIT option descriptions for `ruby --help`. @@ -113,6 +117,7 @@ pub const YJIT_OPTIONS: &'static [(&str, &str)] = &[ ("--yjit-call-threshold=num", "Number of calls to trigger JIT."), ("--yjit-cold-threshold=num", "Global calls after which ISEQs not compiled (default: 200K)."), ("--yjit-stats", "Enable collecting YJIT statistics."), + ("--yjit--log[=file|dir]", "Enable logging of YJIT's compilation activity."), ("--yjit-disable", "Disable YJIT for lazily enabling it with RubyVM::YJIT.enable."), ("--yjit-code-gc", "Run code GC when the code size reaches the limit."), ("--yjit-perf", "Enable frame pointers and perf profiling."), @@ -128,6 +133,16 @@ pub enum TraceExits { Counter(Counter), } +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum LogOutput { + // Dump to the log file as events occur. + File(std::os::unix::io::RawFd), + // Keep the log in memory only + MemoryOnly, + // Dump to stderr when the process exits + Stderr +} + #[derive(Debug)] pub enum DumpDisasm { // Dump to stdout @@ -170,6 +185,7 @@ macro_rules! get_option_ref { }; } pub(crate) use get_option_ref; +use crate::log::Log; /// Expected to receive what comes after the third dash in "--yjit-*". /// Empty string means user passed only "--yjit". C code rejects when @@ -310,6 +326,34 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> { return None; } }, + ("log", _) => match opt_val { + "" => unsafe { + OPTIONS.log = Some(LogOutput::Stderr); + Log::init(); + }, + "quiet" => unsafe { + OPTIONS.log = Some(LogOutput::MemoryOnly); + Log::init(); + }, + arg_value => { + let log_file_path = if std::path::Path::new(arg_value).is_dir() { + format!("{arg_value}/yjit_{}.log", std::process::id()) + } else { + arg_value.to_string() + }; + + match File::options().create(true).write(true).truncate(true).open(&log_file_path) { + Ok(file) => { + use std::os::unix::io::IntoRawFd; + eprintln!("YJIT log: {log_file_path}"); + + unsafe { OPTIONS.log = Some(LogOutput::File(file.into_raw_fd())) } + Log::init() + } + Err(err) => panic!("Failed to create {log_file_path}: {err}"), + } + } + }, ("trace-exits", _) => unsafe { OPTIONS.gen_stats = true; OPTIONS.trace_exits = match opt_val { diff --git a/yjit/src/yjit.rs b/yjit/src/yjit.rs index c2647d55f7..a9ecc24a80 100644 --- a/yjit/src/yjit.rs +++ b/yjit/src/yjit.rs @@ -8,6 +8,7 @@ use crate::stats::incr_counter; use crate::stats::with_compile_time; use std::os::raw; +use crate::log::Log; /// Is YJIT on? The interpreter uses this variable to decide whether to trigger /// compilation. See jit_exec() and jit_compile(). @@ -167,7 +168,7 @@ pub extern "C" fn rb_yjit_code_gc(_ec: EcPtr, _ruby_self: VALUE) -> VALUE { /// Enable YJIT compilation, returning true if YJIT was previously disabled #[no_mangle] -pub extern "C" fn rb_yjit_enable(_ec: EcPtr, _ruby_self: VALUE, gen_stats: VALUE, print_stats: VALUE) -> VALUE { +pub extern "C" fn rb_yjit_enable(_ec: EcPtr, _ruby_self: VALUE, gen_stats: VALUE, print_stats: VALUE, gen_log: VALUE, print_log: VALUE) -> VALUE { with_vm_lock(src_loc!(), || { // Initialize and enable YJIT if gen_stats.test() { @@ -176,6 +177,19 @@ pub extern "C" fn rb_yjit_enable(_ec: EcPtr, _ruby_self: VALUE, gen_stats: VALUE OPTIONS.print_stats = print_stats.test(); } } + + if gen_log.test() { + unsafe { + if print_log.test() { + OPTIONS.log = Some(LogOutput::Stderr); + } else { + OPTIONS.log = Some(LogOutput::MemoryOnly); + } + + Log::init(); + } + } + yjit_init(); // Add "+YJIT" to RUBY_DESCRIPTION