YJIT: Let yjit_perf.py support perf with Python disabled (#10246)

* YJIT: Let yjit_perf.py support perf with Python disabled

* Update yjit.md about perf

* Recommend the extra interface by default
This commit is contained in:
Takashi Kokubun 2024-03-14 10:08:23 -07:00 committed by GitHub
parent 09d8c99cdc
commit b0be2961f7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 47 additions and 13 deletions

View File

@ -480,13 +480,8 @@ perf script --fields +pid > /tmp/test.perf
You can also profile the number of cycles consumed by code generated by each YJIT function. You can also profile the number of cycles consumed by code generated by each YJIT function.
```bash ```bash
# Build perf from source for Python support # Install perf
# [Optional] libelf-dev libunwind-dev libaudit-dev libslang2-dev libdw-dev apt-get install linux-tools-common linux-tools-generic linux-tools-`uname -r`
sudo apt-get install libpython3-dev python3-pip flex libtraceevent-dev
git clone https://github.com/torvalds/linux
cd linux/tools/perf
make
make install
# [Optional] Allow running perf without sudo # [Optional] Allow running perf without sudo
echo 0 | sudo tee /proc/sys/kernel/kptr_restrict echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
@ -496,6 +491,25 @@ echo -1 | sudo tee /proc/sys/kernel/perf_event_paranoid
cd ../yjit-bench cd ../yjit-bench
PERF=record ruby --yjit-perf=codegen -Iharness-perf benchmarks/lobsters/benchmark.rb PERF=record ruby --yjit-perf=codegen -Iharness-perf benchmarks/lobsters/benchmark.rb
# Aggregate results
perf script > /tmp/perf.txt
../ruby/misc/yjit_perf.py /tmp/perf.txt
```
#### Building perf with Python support
The above instructions work fine for most people, but you could also use
a handy `perf script -s` interface if you build perf from source.
```bash
# Build perf from source for Python support
sudo apt-get install libpython3-dev python3-pip flex libtraceevent-dev \
libelf-dev libunwind-dev libaudit-dev libslang2-dev libdw-dev
git clone --depth=1 https://github.com/torvalds/linux
cd linux/tools/perf
make
make install
# Aggregate results # Aggregate results
perf script -s ../ruby/misc/yjit_perf.py perf script -s ../ruby/misc/yjit_perf.py
``` ```

32
misc/yjit_perf.py Normal file → Executable file
View File

@ -1,12 +1,9 @@
#!/usr/bin/env python3
import os import os
import sys import sys
from collections import Counter, defaultdict from collections import Counter, defaultdict
import os.path import os.path
sys.path.append(os.environ['PERF_EXEC_PATH'] + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
from perf_trace_context import *
from EventClass import *
# Aggregating cycles per symbol and dso # Aggregating cycles per symbol and dso
total_cycles = 0 total_cycles = 0
category_cycles = Counter() category_cycles = Counter()
@ -57,11 +54,10 @@ def categorize_symbol(dso, symbol):
def process_event(event): def process_event(event):
global total_cycles, category_cycles, detailed_category_cycles, categories global total_cycles, category_cycles, detailed_category_cycles, categories
sample = event["sample"]
full_dso = event.get("dso", "Unknown_dso") full_dso = event.get("dso", "Unknown_dso")
dso = os.path.basename(full_dso) dso = os.path.basename(full_dso)
symbol = event.get("symbol", "[unknown]") symbol = event.get("symbol", "[unknown]")
cycles = sample["period"] cycles = event["sample"]["period"]
total_cycles += cycles total_cycles += cycles
category = categorize_symbol(dso, symbol) category = categorize_symbol(dso, symbol)
@ -94,3 +90,27 @@ def trace_end():
for (dso, symbol), cycles in symbols.most_common(): for (dso, symbol), cycles in symbols.most_common():
symbol_ratio = (cycles / category_total) * 100 symbol_ratio = (cycles / category_total) * 100
print("{:<20} {:<50} {:>20.2f}% {:>15}".format(dso, truncate_symbol(symbol), symbol_ratio, cycles)) print("{:<20} {:<50} {:>20.2f}% {:>15}".format(dso, truncate_symbol(symbol), symbol_ratio, cycles))
# There are two ways to use this script:
# 1) perf script -s misc/yjit_perf.py -- native interface
# 2) perf script > perf.txt && misc/yjit_perf.py perf.txt -- hack, which doesn't require perf with Python support
#
# In both cases, __name__ is "__main__". The following code implements (2) when sys.argv is 2.
if __name__ == "__main__" and len(sys.argv) == 2:
if len(sys.argv) != 2:
print("Usage: yjit_perf.py <filename>")
sys.exit(1)
with open(sys.argv[1], "r") as file:
for line in file:
# [Example]
# ruby 78207 3482.848465: 1212775 cpu_core/cycles:P/: 5c0333f682e1 [JIT] getlocal_WC_0+0x0 (/tmp/perf-78207.map)
row = line.split(maxsplit=6)
period = row[3] # "1212775"
symbol, dso = row[6].split(" (") # "[JIT] getlocal_WC_0+0x0", "/tmp/perf-78207.map)\n"
symbol = symbol.split("+")[0] # "[JIT] getlocal_WC_0"
dso = dso.split(")")[0] # "/tmp/perf-78207.map"
process_event({"dso": dso, "symbol": symbol, "sample": {"period": int(period)}})
trace_end()