1327 lines
52 KiB
C++
1327 lines
52 KiB
C++
/*
|
|
* Copyright (C) 2011 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "trace.h"
|
|
|
|
#include <sys/uio.h>
|
|
#include <unistd.h>
|
|
|
|
#include "android-base/macros.h"
|
|
#include "android-base/stringprintf.h"
|
|
|
|
#include "art_method-inl.h"
|
|
#include "base/casts.h"
|
|
#include "base/enums.h"
|
|
#include "base/os.h"
|
|
#include "base/stl_util.h"
|
|
#include "base/systrace.h"
|
|
#include "base/time_utils.h"
|
|
#include "base/unix_file/fd_file.h"
|
|
#include "base/utils.h"
|
|
#include "class_linker.h"
|
|
#include "common_throws.h"
|
|
#include "debugger.h"
|
|
#include "dex/descriptors_names.h"
|
|
#include "dex/dex_file-inl.h"
|
|
#include "entrypoints/quick/quick_entrypoints.h"
|
|
#include "gc/scoped_gc_critical_section.h"
|
|
#include "instrumentation.h"
|
|
#include "jit/jit.h"
|
|
#include "jit/jit_code_cache.h"
|
|
#include "mirror/class-inl.h"
|
|
#include "mirror/dex_cache-inl.h"
|
|
#include "mirror/object-inl.h"
|
|
#include "mirror/object_array-inl.h"
|
|
#include "nativehelper/scoped_local_ref.h"
|
|
#include "scoped_thread_state_change-inl.h"
|
|
#include "stack.h"
|
|
#include "thread.h"
|
|
#include "thread_list.h"
|
|
|
|
namespace art {
|
|
|
|
using android::base::StringPrintf;
|
|
|
|
static constexpr size_t TraceActionBits = MinimumBitsToStore(
|
|
static_cast<size_t>(kTraceMethodActionMask));
|
|
static constexpr uint8_t kOpNewMethod = 1U;
|
|
static constexpr uint8_t kOpNewThread = 2U;
|
|
static constexpr uint8_t kOpTraceSummary = 3U;
|
|
|
|
static const char kTraceTokenChar = '*';
|
|
static const uint16_t kTraceHeaderLength = 32;
|
|
static const uint32_t kTraceMagicValue = 0x574f4c53;
|
|
static const uint16_t kTraceVersionSingleClock = 2;
|
|
static const uint16_t kTraceVersionDualClock = 3;
|
|
static const uint16_t kTraceRecordSizeSingleClock = 10; // using v2
|
|
static const uint16_t kTraceRecordSizeDualClock = 14; // using v3 with two timestamps
|
|
|
|
TraceClockSource Trace::default_clock_source_ = kDefaultTraceClockSource;
|
|
|
|
Trace* volatile Trace::the_trace_ = nullptr;
|
|
pthread_t Trace::sampling_pthread_ = 0U;
|
|
std::unique_ptr<std::vector<ArtMethod*>> Trace::temp_stack_trace_;
|
|
|
|
// The key identifying the tracer to update instrumentation.
|
|
static constexpr const char* kTracerInstrumentationKey = "Tracer";
|
|
|
|
static TraceAction DecodeTraceAction(uint32_t tmid) {
|
|
return static_cast<TraceAction>(tmid & kTraceMethodActionMask);
|
|
}
|
|
|
|
namespace {
|
|
// Scaling factor to convert timestamp counter into wall clock time reported in micro seconds.
|
|
// This is initialized at the start of tracing using the timestamp counter update frequency.
|
|
// See InitializeTimestampCounters for more details.
|
|
double tsc_to_microsec_scaling_factor = -1.0;
|
|
|
|
uint64_t GetTimestamp() {
|
|
uint64_t t = 0;
|
|
#if defined(__arm__)
|
|
// See Architecture Reference Manual ARMv7-A and ARMv7-R edition section B4.1.34
|
|
// Q and R specify that they should be written to lower and upper halves of 64-bit value.
|
|
// See: https://llvm.org/docs/LangRef.html#asm-template-argument-modifiers
|
|
asm volatile("mrrc p15, 1, %Q0, %R0, c14" : "=r"(t));
|
|
#elif defined(__aarch64__)
|
|
// See Arm Architecture Registers Armv8 section System Registers
|
|
asm volatile("mrs %0, cntvct_el0" : "=r"(t));
|
|
#elif defined(__i386__) || defined(__x86_64__)
|
|
// rdtsc returns two 32-bit values in rax and rdx even on 64-bit architectures.
|
|
unsigned int lo, hi;
|
|
asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
|
|
t = (static_cast<uint64_t>(hi) << 32) | lo;
|
|
#else
|
|
t = MicroTime();
|
|
#endif
|
|
return t;
|
|
}
|
|
|
|
#if defined(__i386__) || defined(__x86_64__)
|
|
// Here we compute the scaling factor by sleeping for a millisecond. Alternatively, we could
|
|
// generate raw timestamp counter and also time using clock_gettime at the start and the end of the
|
|
// trace. We can compute the frequency of timestamp counter upadtes in the post processing step
|
|
// using these two samples. However, that would require a change in Android Studio which is the main
|
|
// consumer of these profiles. For now, just compute the frequency of tsc updates here.
|
|
double computeScalingFactor() {
|
|
uint64_t start = MicroTime();
|
|
uint64_t start_tsc = GetTimestamp();
|
|
// Sleep for one millisecond.
|
|
usleep(1000);
|
|
uint64_t diff_tsc = GetTimestamp() - start_tsc;
|
|
uint64_t diff_time = MicroTime() - start;
|
|
double scaling_factor = static_cast<double>(diff_time) / diff_tsc;
|
|
DCHECK(scaling_factor > 0.0) << scaling_factor;
|
|
return scaling_factor;
|
|
}
|
|
|
|
double GetScalingFactorForX86() {
|
|
uint32_t eax, ebx, ecx;
|
|
asm volatile("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx) : "a"(0x0), "c"(0));
|
|
if (eax < 0x15) {
|
|
// There is no 15H - Timestamp counter and core crystal clock information
|
|
// leaf. Just compute the frequency.
|
|
return computeScalingFactor();
|
|
}
|
|
|
|
// From Intel architecture-instruction-set-extensions-programming-reference:
|
|
// EBX[31:0]/EAX[31:0] indicates the ratio of the TSC frequency and the
|
|
// core crystal clock frequency.
|
|
// If EBX[31:0] is 0, the TSC and "core crystal clock" ratio is not enumerated.
|
|
// If ECX is 0, the nominal core crystal clock frequency is not enumerated.
|
|
// "TSC frequency" = "core crystal clock frequency" * EBX/EAX.
|
|
// The core crystal clock may differ from the reference clock, bus clock, or core clock
|
|
// frequencies.
|
|
// EAX Bits 31 - 00: An unsigned integer which is the denominator of the
|
|
// TSC/"core crystal clock" ratio.
|
|
// EBX Bits 31 - 00: An unsigned integer which is the numerator of the
|
|
// TSC/"core crystal clock" ratio.
|
|
// ECX Bits 31 - 00: An unsigned integer which is the nominal frequency of the core
|
|
// crystal clock in Hz.
|
|
// EDX Bits 31 - 00: Reserved = 0.
|
|
asm volatile("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx) : "a"(0x15), "c"(0));
|
|
if (ebx == 0 || ecx == 0) {
|
|
return computeScalingFactor();
|
|
}
|
|
double coreCrystalFreq = ecx;
|
|
// frequency = coreCrystalFreq * (ebx / eax)
|
|
// scaling_factor = seconds_to_microseconds / frequency
|
|
// = seconds_to_microseconds * eax / (coreCrystalFreq * ebx)
|
|
double seconds_to_microseconds = 1000 * 1000;
|
|
double scaling_factor = (seconds_to_microseconds * eax) / (coreCrystalFreq * ebx);
|
|
return scaling_factor;
|
|
}
|
|
#endif
|
|
|
|
void InitializeTimestampCounters() {
|
|
// It is sufficient to initialize this once for the entire execution. Just return if it is
|
|
// already initialized.
|
|
if (tsc_to_microsec_scaling_factor > 0.0) {
|
|
return;
|
|
}
|
|
|
|
#if defined(__arm__)
|
|
double seconds_to_microseconds = 1000 * 1000;
|
|
uint64_t freq = 0;
|
|
// See Architecture Reference Manual ARMv7-A and ARMv7-R edition section B4.1.21
|
|
asm volatile("mrc p15, 0, %0, c14, c0, 0" : "=r"(freq));
|
|
tsc_to_microsec_scaling_factor = seconds_to_microseconds / static_cast<double>(freq);
|
|
#elif defined(__aarch64__)
|
|
double seconds_to_microseconds = 1000 * 1000;
|
|
uint64_t freq = 0;
|
|
// See Arm Architecture Registers Armv8 section System Registers
|
|
asm volatile("mrs %0, cntfrq_el0" : "=r"(freq));
|
|
tsc_to_microsec_scaling_factor = seconds_to_microseconds / static_cast<double>(freq);
|
|
#elif defined(__i386__) || defined(__x86_64__)
|
|
tsc_to_microsec_scaling_factor = GetScalingFactorForX86();
|
|
#else
|
|
tsc_to_microsec_scaling_factor = 1.0;
|
|
#endif
|
|
}
|
|
|
|
ALWAYS_INLINE uint64_t GetMicroTime(uint64_t counter) {
|
|
DCHECK(tsc_to_microsec_scaling_factor > 0.0) << tsc_to_microsec_scaling_factor;
|
|
return tsc_to_microsec_scaling_factor * counter;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
ArtMethod* Trace::DecodeTraceMethod(uint32_t tmid) {
|
|
uint32_t method_index = tmid >> TraceActionBits;
|
|
// This is used only for logging which is usually needed only for debugging ART. So it's not
|
|
// performance critical.
|
|
for (auto const& entry : art_method_id_map_) {
|
|
if (method_index == entry.second) {
|
|
return entry.first;
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
uint32_t Trace::EncodeTraceMethod(ArtMethod* method) {
|
|
uint32_t idx = 0;
|
|
auto it = art_method_id_map_.find(method);
|
|
if (it != art_method_id_map_.end()) {
|
|
idx = it->second;
|
|
} else {
|
|
idx = current_method_index_;
|
|
art_method_id_map_.emplace(method, idx);
|
|
current_method_index_++;
|
|
}
|
|
return idx;
|
|
}
|
|
|
|
std::vector<ArtMethod*>* Trace::AllocStackTrace() {
|
|
return (temp_stack_trace_.get() != nullptr) ? temp_stack_trace_.release() :
|
|
new std::vector<ArtMethod*>();
|
|
}
|
|
|
|
void Trace::FreeStackTrace(std::vector<ArtMethod*>* stack_trace) {
|
|
stack_trace->clear();
|
|
temp_stack_trace_.reset(stack_trace);
|
|
}
|
|
|
|
void Trace::SetDefaultClockSource(TraceClockSource clock_source) {
|
|
#if defined(__linux__)
|
|
default_clock_source_ = clock_source;
|
|
#else
|
|
if (clock_source != TraceClockSource::kWall) {
|
|
LOG(WARNING) << "Ignoring tracing request to use CPU time.";
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static uint16_t GetTraceVersion(TraceClockSource clock_source) {
|
|
return (clock_source == TraceClockSource::kDual) ? kTraceVersionDualClock
|
|
: kTraceVersionSingleClock;
|
|
}
|
|
|
|
static uint16_t GetRecordSize(TraceClockSource clock_source) {
|
|
return (clock_source == TraceClockSource::kDual) ? kTraceRecordSizeDualClock
|
|
: kTraceRecordSizeSingleClock;
|
|
}
|
|
|
|
bool Trace::UseThreadCpuClock() {
|
|
return (clock_source_ == TraceClockSource::kThreadCpu) ||
|
|
(clock_source_ == TraceClockSource::kDual);
|
|
}
|
|
|
|
bool Trace::UseWallClock() {
|
|
return (clock_source_ == TraceClockSource::kWall) ||
|
|
(clock_source_ == TraceClockSource::kDual);
|
|
}
|
|
|
|
void Trace::MeasureClockOverhead() {
|
|
if (UseThreadCpuClock()) {
|
|
Thread::Current()->GetCpuMicroTime();
|
|
}
|
|
if (UseWallClock()) {
|
|
GetTimestamp();
|
|
}
|
|
}
|
|
|
|
// Compute an average time taken to measure clocks.
|
|
uint32_t Trace::GetClockOverheadNanoSeconds() {
|
|
Thread* self = Thread::Current();
|
|
uint64_t start = self->GetCpuMicroTime();
|
|
|
|
for (int i = 4000; i > 0; i--) {
|
|
MeasureClockOverhead();
|
|
MeasureClockOverhead();
|
|
MeasureClockOverhead();
|
|
MeasureClockOverhead();
|
|
MeasureClockOverhead();
|
|
MeasureClockOverhead();
|
|
MeasureClockOverhead();
|
|
MeasureClockOverhead();
|
|
}
|
|
|
|
uint64_t elapsed_us = self->GetCpuMicroTime() - start;
|
|
return static_cast<uint32_t>(elapsed_us / 32);
|
|
}
|
|
|
|
// TODO: put this somewhere with the big-endian equivalent used by JDWP.
|
|
static void Append2LE(uint8_t* buf, uint16_t val) {
|
|
*buf++ = static_cast<uint8_t>(val);
|
|
*buf++ = static_cast<uint8_t>(val >> 8);
|
|
}
|
|
|
|
// TODO: put this somewhere with the big-endian equivalent used by JDWP.
|
|
static void Append4LE(uint8_t* buf, uint32_t val) {
|
|
*buf++ = static_cast<uint8_t>(val);
|
|
*buf++ = static_cast<uint8_t>(val >> 8);
|
|
*buf++ = static_cast<uint8_t>(val >> 16);
|
|
*buf++ = static_cast<uint8_t>(val >> 24);
|
|
}
|
|
|
|
// TODO: put this somewhere with the big-endian equivalent used by JDWP.
|
|
static void Append8LE(uint8_t* buf, uint64_t val) {
|
|
*buf++ = static_cast<uint8_t>(val);
|
|
*buf++ = static_cast<uint8_t>(val >> 8);
|
|
*buf++ = static_cast<uint8_t>(val >> 16);
|
|
*buf++ = static_cast<uint8_t>(val >> 24);
|
|
*buf++ = static_cast<uint8_t>(val >> 32);
|
|
*buf++ = static_cast<uint8_t>(val >> 40);
|
|
*buf++ = static_cast<uint8_t>(val >> 48);
|
|
*buf++ = static_cast<uint8_t>(val >> 56);
|
|
}
|
|
|
|
static void GetSample(Thread* thread, void* arg) REQUIRES_SHARED(Locks::mutator_lock_) {
|
|
std::vector<ArtMethod*>* const stack_trace = Trace::AllocStackTrace();
|
|
StackVisitor::WalkStack(
|
|
[&](const art::StackVisitor* stack_visitor) REQUIRES_SHARED(Locks::mutator_lock_) {
|
|
ArtMethod* m = stack_visitor->GetMethod();
|
|
// Ignore runtime frames (in particular callee save).
|
|
if (!m->IsRuntimeMethod()) {
|
|
stack_trace->push_back(m);
|
|
}
|
|
return true;
|
|
},
|
|
thread,
|
|
/* context= */ nullptr,
|
|
art::StackVisitor::StackWalkKind::kIncludeInlinedFrames);
|
|
Trace* the_trace = reinterpret_cast<Trace*>(arg);
|
|
the_trace->CompareAndUpdateStackTrace(thread, stack_trace);
|
|
}
|
|
|
|
static void ClearThreadStackTraceAndClockBase(Thread* thread, void* arg ATTRIBUTE_UNUSED) {
|
|
thread->SetTraceClockBase(0);
|
|
std::vector<ArtMethod*>* stack_trace = thread->GetStackTraceSample();
|
|
thread->SetStackTraceSample(nullptr);
|
|
delete stack_trace;
|
|
}
|
|
|
|
void Trace::CompareAndUpdateStackTrace(Thread* thread,
|
|
std::vector<ArtMethod*>* stack_trace) {
|
|
CHECK_EQ(pthread_self(), sampling_pthread_);
|
|
std::vector<ArtMethod*>* old_stack_trace = thread->GetStackTraceSample();
|
|
// Update the thread's stack trace sample.
|
|
thread->SetStackTraceSample(stack_trace);
|
|
// Read timer clocks to use for all events in this trace.
|
|
uint32_t thread_clock_diff = 0;
|
|
uint64_t timestamp_counter = 0;
|
|
ReadClocks(thread, &thread_clock_diff, ×tamp_counter);
|
|
if (old_stack_trace == nullptr) {
|
|
// If there's no previous stack trace sample for this thread, log an entry event for all
|
|
// methods in the trace.
|
|
for (auto rit = stack_trace->rbegin(); rit != stack_trace->rend(); ++rit) {
|
|
LogMethodTraceEvent(thread, *rit, kTraceMethodEnter, thread_clock_diff, timestamp_counter);
|
|
}
|
|
} else {
|
|
// If there's a previous stack trace for this thread, diff the traces and emit entry and exit
|
|
// events accordingly.
|
|
auto old_rit = old_stack_trace->rbegin();
|
|
auto rit = stack_trace->rbegin();
|
|
// Iterate bottom-up over both traces until there's a difference between them.
|
|
while (old_rit != old_stack_trace->rend() && rit != stack_trace->rend() && *old_rit == *rit) {
|
|
old_rit++;
|
|
rit++;
|
|
}
|
|
// Iterate top-down over the old trace until the point where they differ, emitting exit events.
|
|
for (auto old_it = old_stack_trace->begin(); old_it != old_rit.base(); ++old_it) {
|
|
LogMethodTraceEvent(thread, *old_it, kTraceMethodExit, thread_clock_diff, timestamp_counter);
|
|
}
|
|
// Iterate bottom-up over the new trace from the point where they differ, emitting entry events.
|
|
for (; rit != stack_trace->rend(); ++rit) {
|
|
LogMethodTraceEvent(thread, *rit, kTraceMethodEnter, thread_clock_diff, timestamp_counter);
|
|
}
|
|
FreeStackTrace(old_stack_trace);
|
|
}
|
|
}
|
|
|
|
void* Trace::RunSamplingThread(void* arg) {
|
|
Runtime* runtime = Runtime::Current();
|
|
intptr_t interval_us = reinterpret_cast<intptr_t>(arg);
|
|
CHECK_GE(interval_us, 0);
|
|
CHECK(runtime->AttachCurrentThread("Sampling Profiler", true, runtime->GetSystemThreadGroup(),
|
|
!runtime->IsAotCompiler()));
|
|
|
|
while (true) {
|
|
usleep(interval_us);
|
|
ScopedTrace trace("Profile sampling");
|
|
Thread* self = Thread::Current();
|
|
Trace* the_trace;
|
|
{
|
|
MutexLock mu(self, *Locks::trace_lock_);
|
|
the_trace = the_trace_;
|
|
if (the_trace_->stop_tracing_) {
|
|
break;
|
|
}
|
|
}
|
|
{
|
|
// Avoid a deadlock between a thread doing garbage collection
|
|
// and the profile sampling thread, by blocking GC when sampling
|
|
// thread stacks (see b/73624630).
|
|
gc::ScopedGCCriticalSection gcs(self,
|
|
art::gc::kGcCauseInstrumentation,
|
|
art::gc::kCollectorTypeInstrumentation);
|
|
ScopedSuspendAll ssa(__FUNCTION__);
|
|
MutexLock mu(self, *Locks::thread_list_lock_);
|
|
runtime->GetThreadList()->ForEach(GetSample, the_trace);
|
|
}
|
|
}
|
|
|
|
runtime->DetachCurrentThread();
|
|
return nullptr;
|
|
}
|
|
|
|
void Trace::Start(const char* trace_filename,
|
|
size_t buffer_size,
|
|
int flags,
|
|
TraceOutputMode output_mode,
|
|
TraceMode trace_mode,
|
|
int interval_us) {
|
|
std::unique_ptr<File> file(OS::CreateEmptyFileWriteOnly(trace_filename));
|
|
if (file == nullptr) {
|
|
std::string msg = android::base::StringPrintf("Unable to open trace file '%s'", trace_filename);
|
|
PLOG(ERROR) << msg;
|
|
ScopedObjectAccess soa(Thread::Current());
|
|
Thread::Current()->ThrowNewException("Ljava/lang/RuntimeException;", msg.c_str());
|
|
return;
|
|
}
|
|
Start(std::move(file), buffer_size, flags, output_mode, trace_mode, interval_us);
|
|
}
|
|
|
|
void Trace::Start(int trace_fd,
|
|
size_t buffer_size,
|
|
int flags,
|
|
TraceOutputMode output_mode,
|
|
TraceMode trace_mode,
|
|
int interval_us) {
|
|
if (trace_fd < 0) {
|
|
std::string msg = android::base::StringPrintf("Unable to start tracing with invalid fd %d",
|
|
trace_fd);
|
|
LOG(ERROR) << msg;
|
|
ScopedObjectAccess soa(Thread::Current());
|
|
Thread::Current()->ThrowNewException("Ljava/lang/RuntimeException;", msg.c_str());
|
|
return;
|
|
}
|
|
std::unique_ptr<File> file(new File(trace_fd, /* path= */ "tracefile", /* check_usage= */ true));
|
|
Start(std::move(file), buffer_size, flags, output_mode, trace_mode, interval_us);
|
|
}
|
|
|
|
void Trace::StartDDMS(size_t buffer_size,
|
|
int flags,
|
|
TraceMode trace_mode,
|
|
int interval_us) {
|
|
Start(std::unique_ptr<File>(),
|
|
buffer_size,
|
|
flags,
|
|
TraceOutputMode::kDDMS,
|
|
trace_mode,
|
|
interval_us);
|
|
}
|
|
|
|
void Trace::Start(std::unique_ptr<File>&& trace_file_in,
|
|
size_t buffer_size,
|
|
int flags,
|
|
TraceOutputMode output_mode,
|
|
TraceMode trace_mode,
|
|
int interval_us) {
|
|
// We own trace_file now and are responsible for closing it. To account for error situations, use
|
|
// a specialized unique_ptr to ensure we close it on the way out (if it hasn't been passed to a
|
|
// Trace instance).
|
|
auto deleter = [](File* file) {
|
|
if (file != nullptr) {
|
|
file->MarkUnchecked(); // Don't deal with flushing requirements.
|
|
int result ATTRIBUTE_UNUSED = file->Close();
|
|
delete file;
|
|
}
|
|
};
|
|
std::unique_ptr<File, decltype(deleter)> trace_file(trace_file_in.release(), deleter);
|
|
|
|
Thread* self = Thread::Current();
|
|
{
|
|
MutexLock mu(self, *Locks::trace_lock_);
|
|
if (the_trace_ != nullptr) {
|
|
LOG(ERROR) << "Trace already in progress, ignoring this request";
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Check interval if sampling is enabled
|
|
if (trace_mode == TraceMode::kSampling && interval_us <= 0) {
|
|
LOG(ERROR) << "Invalid sampling interval: " << interval_us;
|
|
ScopedObjectAccess soa(self);
|
|
ThrowRuntimeException("Invalid sampling interval: %d", interval_us);
|
|
return;
|
|
}
|
|
|
|
// Initialize the frequency of timestamp counter updates here. This is needed
|
|
// to get wallclock time from timestamp counter values.
|
|
InitializeTimestampCounters();
|
|
|
|
Runtime* runtime = Runtime::Current();
|
|
|
|
// Enable count of allocs if specified in the flags.
|
|
bool enable_stats = false;
|
|
|
|
// Create Trace object.
|
|
{
|
|
// Suspend JIT here since we are switching runtime to debuggable. Debuggable runtimes cannot use
|
|
// JITed code from before so we need to invalidated all JITed code here. Enter suspend JIT scope
|
|
// to prevent any races with ongoing JIT compilations.
|
|
jit::ScopedJitSuspend suspend_jit;
|
|
// Required since EnableMethodTracing calls ConfigureStubs which visits class linker classes.
|
|
gc::ScopedGCCriticalSection gcs(self,
|
|
gc::kGcCauseInstrumentation,
|
|
gc::kCollectorTypeInstrumentation);
|
|
ScopedSuspendAll ssa(__FUNCTION__);
|
|
MutexLock mu(self, *Locks::trace_lock_);
|
|
if (the_trace_ != nullptr) {
|
|
LOG(ERROR) << "Trace already in progress, ignoring this request";
|
|
} else {
|
|
enable_stats = (flags & kTraceCountAllocs) != 0;
|
|
the_trace_ = new Trace(trace_file.release(), buffer_size, flags, output_mode, trace_mode);
|
|
if (trace_mode == TraceMode::kSampling) {
|
|
CHECK_PTHREAD_CALL(pthread_create, (&sampling_pthread_, nullptr, &RunSamplingThread,
|
|
reinterpret_cast<void*>(interval_us)),
|
|
"Sampling profiler thread");
|
|
the_trace_->interval_us_ = interval_us;
|
|
} else {
|
|
if (!runtime->IsJavaDebuggable()) {
|
|
art::jit::Jit* jit = runtime->GetJit();
|
|
if (jit != nullptr) {
|
|
jit->GetCodeCache()->InvalidateAllCompiledCode();
|
|
jit->GetCodeCache()->TransitionToDebuggable();
|
|
jit->GetJitCompiler()->SetDebuggableCompilerOption(true);
|
|
}
|
|
runtime->SetRuntimeDebugState(art::Runtime::RuntimeDebugState::kJavaDebuggable);
|
|
runtime->GetInstrumentation()->UpdateEntrypointsForDebuggable();
|
|
runtime->DeoptimizeBootImage();
|
|
}
|
|
runtime->GetInstrumentation()->AddListener(
|
|
the_trace_,
|
|
instrumentation::Instrumentation::kMethodEntered |
|
|
instrumentation::Instrumentation::kMethodExited |
|
|
instrumentation::Instrumentation::kMethodUnwind);
|
|
// TODO: In full-PIC mode, we don't need to fully deopt.
|
|
// TODO: We can only use trampoline entrypoints if we are java-debuggable since in that case
|
|
// we know that inlining and other problematic optimizations are disabled. We might just
|
|
// want to use the trampolines anyway since it is faster. It makes the story with disabling
|
|
// jit-gc more complex though.
|
|
runtime->GetInstrumentation()->EnableMethodTracing(kTracerInstrumentationKey,
|
|
the_trace_,
|
|
/*needs_interpreter=*/false);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Can't call this when holding the mutator lock.
|
|
if (enable_stats) {
|
|
runtime->SetStatsEnabled(true);
|
|
}
|
|
}
|
|
|
|
void Trace::UpdateThreadsList(Thread* thread) {
|
|
// TODO(mythria): Clean this up and update threads_list_ when recording the trace event similar
|
|
// to what we do for streaming case.
|
|
std::string name;
|
|
thread->GetThreadName(name);
|
|
// In tests, we destroy VM after already detaching the current thread. When a thread is
|
|
// detached we record the information about the threads_list_. We re-attach the current
|
|
// thread again as a "Shutdown thread" in the process of shutting down. So don't record
|
|
// information about shutdown threads.
|
|
if (name.compare("Shutdown thread") == 0) {
|
|
return;
|
|
}
|
|
|
|
// There can be races when unregistering a thread and stopping the trace and it is possible to
|
|
// update the list twice. For example, This information is updated here when stopping tracing and
|
|
// also when a thread is detaching. In thread detach, we first update this information and then
|
|
// remove the thread from the list of active threads. If the tracing was stopped in between these
|
|
// events, we can see two updates for the same thread. Since we need a trace_lock_ it isn't easy
|
|
// to prevent this race (for ex: update this information when holding thread_list_lock_). It is
|
|
// harmless to do two updates so just use overwrite here.
|
|
threads_list_.Overwrite(thread->GetTid(), name);
|
|
}
|
|
|
|
void Trace::StopTracing(bool finish_tracing, bool flush_file) {
|
|
Runtime* const runtime = Runtime::Current();
|
|
Thread* const self = Thread::Current();
|
|
pthread_t sampling_pthread = 0U;
|
|
{
|
|
MutexLock mu(self, *Locks::trace_lock_);
|
|
if (the_trace_ == nullptr) {
|
|
LOG(ERROR) << "Trace stop requested, but no trace currently running";
|
|
return;
|
|
}
|
|
// Tell sampling_pthread_ to stop tracing.
|
|
the_trace_->stop_tracing_ = true;
|
|
sampling_pthread = sampling_pthread_;
|
|
}
|
|
|
|
// Make sure that we join before we delete the trace since we don't want to have
|
|
// the sampling thread access a stale pointer. This finishes since the sampling thread exits when
|
|
// the_trace_ is null.
|
|
if (sampling_pthread != 0U) {
|
|
CHECK_PTHREAD_CALL(pthread_join, (sampling_pthread, nullptr), "sampling thread shutdown");
|
|
}
|
|
|
|
// Make a copy of the_trace_, so it can be flushed later. We want to reset
|
|
// the_trace_ to nullptr in suspend all scope to prevent any races
|
|
Trace* the_trace = the_trace_;
|
|
bool stop_alloc_counting = (the_trace->flags_ & Trace::kTraceCountAllocs) != 0;
|
|
// Stop the trace sources adding more entries to the trace buffer and synchronise stores.
|
|
{
|
|
gc::ScopedGCCriticalSection gcs(
|
|
self, gc::kGcCauseInstrumentation, gc::kCollectorTypeInstrumentation);
|
|
jit::ScopedJitSuspend suspend_jit;
|
|
ScopedSuspendAll ssa(__FUNCTION__);
|
|
|
|
if (the_trace->trace_mode_ == TraceMode::kSampling) {
|
|
MutexLock mu(self, *Locks::thread_list_lock_);
|
|
runtime->GetThreadList()->ForEach(ClearThreadStackTraceAndClockBase, nullptr);
|
|
} else {
|
|
runtime->GetInstrumentation()->RemoveListener(
|
|
the_trace,
|
|
instrumentation::Instrumentation::kMethodEntered |
|
|
instrumentation::Instrumentation::kMethodExited |
|
|
instrumentation::Instrumentation::kMethodUnwind);
|
|
runtime->GetInstrumentation()->DisableMethodTracing(kTracerInstrumentationKey);
|
|
runtime->GetInstrumentation()->MaybeSwitchRuntimeDebugState(self);
|
|
}
|
|
|
|
// Flush thread specific buffer from all threads before resetting the_trace_ to nullptr.
|
|
// We also flush the buffer when destroying a thread which expects the_trace_ to be valid so
|
|
// make sure that the per-thread buffer is reset before resetting the_trace_.
|
|
{
|
|
MutexLock tl_lock(Thread::Current(), *Locks::thread_list_lock_);
|
|
for (Thread* thread : Runtime::Current()->GetThreadList()->GetList()) {
|
|
if (thread->GetMethodTraceBuffer() != nullptr) {
|
|
the_trace_->FlushStreamingBuffer(thread);
|
|
thread->ResetMethodTraceBuffer();
|
|
}
|
|
// Record threads here before resetting the_trace_ to prevent any races between
|
|
// unregistering the thread and resetting the_trace_.
|
|
the_trace->UpdateThreadsList(thread);
|
|
}
|
|
}
|
|
|
|
// Reset the_trace_ by taking a trace_lock
|
|
MutexLock mu(self, *Locks::trace_lock_);
|
|
the_trace_ = nullptr;
|
|
sampling_pthread_ = 0U;
|
|
}
|
|
|
|
// At this point, code may read buf_ as its writers are shutdown
|
|
// and the ScopedSuspendAll above has ensured all stores to buf_
|
|
// are now visible.
|
|
if (finish_tracing) {
|
|
the_trace->FinishTracing();
|
|
}
|
|
if (the_trace->trace_file_.get() != nullptr) {
|
|
// Do not try to erase, so flush and close explicitly.
|
|
if (flush_file) {
|
|
if (the_trace->trace_file_->Flush() != 0) {
|
|
PLOG(WARNING) << "Could not flush trace file.";
|
|
}
|
|
} else {
|
|
the_trace->trace_file_->MarkUnchecked(); // Do not trigger guard.
|
|
}
|
|
if (the_trace->trace_file_->Close() != 0) {
|
|
PLOG(ERROR) << "Could not close trace file.";
|
|
}
|
|
}
|
|
delete the_trace;
|
|
|
|
if (stop_alloc_counting) {
|
|
// Can be racy since SetStatsEnabled is not guarded by any locks.
|
|
runtime->SetStatsEnabled(false);
|
|
}
|
|
}
|
|
|
|
void Trace::FlushThreadBuffer(Thread* self) {
|
|
MutexLock mu(self, *Locks::trace_lock_);
|
|
the_trace_->FlushStreamingBuffer(self);
|
|
}
|
|
|
|
void Trace::Abort() {
|
|
// Do not write anything anymore.
|
|
StopTracing(false, false);
|
|
}
|
|
|
|
void Trace::Stop() {
|
|
// Finish writing.
|
|
StopTracing(true, true);
|
|
}
|
|
|
|
void Trace::Shutdown() {
|
|
if (GetMethodTracingMode() != kTracingInactive) {
|
|
Stop();
|
|
}
|
|
}
|
|
|
|
TracingMode Trace::GetMethodTracingMode() {
|
|
MutexLock mu(Thread::Current(), *Locks::trace_lock_);
|
|
if (the_trace_ == nullptr) {
|
|
return kTracingInactive;
|
|
} else {
|
|
switch (the_trace_->trace_mode_) {
|
|
case TraceMode::kSampling:
|
|
return kSampleProfilingActive;
|
|
case TraceMode::kMethodTracing:
|
|
return kMethodTracingActive;
|
|
}
|
|
LOG(FATAL) << "Unreachable";
|
|
UNREACHABLE();
|
|
}
|
|
}
|
|
|
|
static constexpr size_t kMinBufSize = 18U; // Trace header is up to 18B.
|
|
// Size of per-thread buffer size. The value is chosen arbitrarily. This value
|
|
// should be greater than kMinBufSize.
|
|
static constexpr size_t kPerThreadBufSize = 512 * 1024;
|
|
static_assert(kPerThreadBufSize > kMinBufSize);
|
|
|
|
namespace {
|
|
|
|
TraceClockSource GetClockSourceFromFlags(int flags) {
|
|
bool need_wall = flags & Trace::TraceFlag::kTraceClockSourceWallClock;
|
|
bool need_thread_cpu = flags & Trace::TraceFlag::kTraceClockSourceThreadCpu;
|
|
if (need_wall && need_thread_cpu) {
|
|
return TraceClockSource::kDual;
|
|
} else if (need_wall) {
|
|
return TraceClockSource::kWall;
|
|
} else if (need_thread_cpu) {
|
|
return TraceClockSource::kThreadCpu;
|
|
} else {
|
|
return kDefaultTraceClockSource;
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
|
|
Trace::Trace(File* trace_file,
|
|
size_t buffer_size,
|
|
int flags,
|
|
TraceOutputMode output_mode,
|
|
TraceMode trace_mode)
|
|
: trace_file_(trace_file),
|
|
buf_(new uint8_t[std::max(kMinBufSize, buffer_size)]()),
|
|
flags_(flags),
|
|
trace_output_mode_(output_mode),
|
|
trace_mode_(trace_mode),
|
|
clock_source_(GetClockSourceFromFlags(flags)),
|
|
buffer_size_(std::max(kMinBufSize, buffer_size)),
|
|
start_time_(GetMicroTime(GetTimestamp())),
|
|
clock_overhead_ns_(GetClockOverheadNanoSeconds()),
|
|
overflow_(false),
|
|
interval_us_(0),
|
|
stop_tracing_(false),
|
|
tracing_lock_("tracing lock", LockLevel::kTracingStreamingLock) {
|
|
CHECK_IMPLIES(trace_file == nullptr, output_mode == TraceOutputMode::kDDMS);
|
|
|
|
uint16_t trace_version = GetTraceVersion(clock_source_);
|
|
if (output_mode == TraceOutputMode::kStreaming) {
|
|
trace_version |= 0xF0U;
|
|
}
|
|
// Set up the beginning of the trace.
|
|
memset(buf_.get(), 0, kTraceHeaderLength);
|
|
Append4LE(buf_.get(), kTraceMagicValue);
|
|
Append2LE(buf_.get() + 4, trace_version);
|
|
Append2LE(buf_.get() + 6, kTraceHeaderLength);
|
|
Append8LE(buf_.get() + 8, start_time_);
|
|
if (trace_version >= kTraceVersionDualClock) {
|
|
uint16_t record_size = GetRecordSize(clock_source_);
|
|
Append2LE(buf_.get() + 16, record_size);
|
|
}
|
|
static_assert(18 <= kMinBufSize, "Minimum buffer size not large enough for trace header");
|
|
|
|
cur_offset_.store(kTraceHeaderLength, std::memory_order_relaxed);
|
|
|
|
if (output_mode == TraceOutputMode::kStreaming) {
|
|
// Flush the header information to the file. We use a per thread buffer, so
|
|
// it is easier to just write the header information directly to file.
|
|
if (!trace_file_->WriteFully(buf_.get(), kTraceHeaderLength)) {
|
|
PLOG(WARNING) << "Failed streaming a tracing event.";
|
|
}
|
|
cur_offset_.store(0, std::memory_order_relaxed);
|
|
}
|
|
}
|
|
|
|
static uint64_t ReadBytes(uint8_t* buf, size_t bytes) {
|
|
uint64_t ret = 0;
|
|
for (size_t i = 0; i < bytes; ++i) {
|
|
ret |= static_cast<uint64_t>(buf[i]) << (i * 8);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
void Trace::DumpBuf(uint8_t* buf, size_t buf_size, TraceClockSource clock_source) {
|
|
uint8_t* ptr = buf + kTraceHeaderLength;
|
|
uint8_t* end = buf + buf_size;
|
|
|
|
MutexLock mu(Thread::Current(), tracing_lock_);
|
|
while (ptr < end) {
|
|
uint32_t tmid = ReadBytes(ptr + 2, sizeof(tmid));
|
|
ArtMethod* method = DecodeTraceMethod(tmid);
|
|
TraceAction action = DecodeTraceAction(tmid);
|
|
LOG(INFO) << ArtMethod::PrettyMethod(method) << " " << static_cast<int>(action);
|
|
ptr += GetRecordSize(clock_source);
|
|
}
|
|
}
|
|
|
|
void Trace::FinishTracing() {
|
|
size_t final_offset = 0;
|
|
if (trace_output_mode_ != TraceOutputMode::kStreaming) {
|
|
final_offset = cur_offset_.load(std::memory_order_relaxed);
|
|
}
|
|
|
|
// Compute elapsed time.
|
|
uint64_t elapsed = GetMicroTime(GetTimestamp()) - start_time_;
|
|
|
|
std::ostringstream os;
|
|
|
|
os << StringPrintf("%cversion\n", kTraceTokenChar);
|
|
os << StringPrintf("%d\n", GetTraceVersion(clock_source_));
|
|
os << StringPrintf("data-file-overflow=%s\n", overflow_ ? "true" : "false");
|
|
if (UseThreadCpuClock()) {
|
|
if (UseWallClock()) {
|
|
os << StringPrintf("clock=dual\n");
|
|
} else {
|
|
os << StringPrintf("clock=thread-cpu\n");
|
|
}
|
|
} else {
|
|
os << StringPrintf("clock=wall\n");
|
|
}
|
|
os << StringPrintf("elapsed-time-usec=%" PRIu64 "\n", elapsed);
|
|
if (trace_output_mode_ != TraceOutputMode::kStreaming) {
|
|
size_t num_records = (final_offset - kTraceHeaderLength) / GetRecordSize(clock_source_);
|
|
os << StringPrintf("num-method-calls=%zd\n", num_records);
|
|
}
|
|
os << StringPrintf("clock-call-overhead-nsec=%d\n", clock_overhead_ns_);
|
|
os << StringPrintf("vm=art\n");
|
|
os << StringPrintf("pid=%d\n", getpid());
|
|
if ((flags_ & kTraceCountAllocs) != 0) {
|
|
os << "alloc-count=" << Runtime::Current()->GetStat(KIND_ALLOCATED_OBJECTS) << "\n";
|
|
os << "alloc-size=" << Runtime::Current()->GetStat(KIND_ALLOCATED_BYTES) << "\n";
|
|
os << "gc-count=" << Runtime::Current()->GetStat(KIND_GC_INVOCATIONS) << "\n";
|
|
}
|
|
os << StringPrintf("%cthreads\n", kTraceTokenChar);
|
|
DumpThreadList(os);
|
|
os << StringPrintf("%cmethods\n", kTraceTokenChar);
|
|
DumpMethodList(os);
|
|
os << StringPrintf("%cend\n", kTraceTokenChar);
|
|
std::string header(os.str());
|
|
|
|
if (trace_output_mode_ == TraceOutputMode::kStreaming) {
|
|
// It is expected that this method is called when all other threads are suspended, so there
|
|
// cannot be any writes to trace_file_ after finish tracing.
|
|
// Write a special token to mark the end of trace records and the start of
|
|
// trace summary.
|
|
uint8_t buf[7];
|
|
Append2LE(buf, 0);
|
|
buf[2] = kOpTraceSummary;
|
|
Append4LE(buf + 3, static_cast<uint32_t>(header.length()));
|
|
// Write the trace summary. The summary is identical to the file header when
|
|
// the output mode is not streaming (except for methods).
|
|
if (!trace_file_->WriteFully(buf, sizeof(buf)) ||
|
|
!trace_file_->WriteFully(header.c_str(), header.length())) {
|
|
PLOG(WARNING) << "Failed streaming a tracing event.";
|
|
}
|
|
} else {
|
|
if (trace_file_.get() == nullptr) {
|
|
std::vector<uint8_t> data;
|
|
data.resize(header.length() + final_offset);
|
|
memcpy(data.data(), header.c_str(), header.length());
|
|
memcpy(data.data() + header.length(), buf_.get(), final_offset);
|
|
Runtime::Current()->GetRuntimeCallbacks()->DdmPublishChunk(CHUNK_TYPE("MPSE"),
|
|
ArrayRef<const uint8_t>(data));
|
|
const bool kDumpTraceInfo = false;
|
|
if (kDumpTraceInfo) {
|
|
LOG(INFO) << "Trace sent:\n" << header;
|
|
DumpBuf(buf_.get(), final_offset, clock_source_);
|
|
}
|
|
} else {
|
|
if (!trace_file_->WriteFully(header.c_str(), header.length()) ||
|
|
!trace_file_->WriteFully(buf_.get(), final_offset)) {
|
|
std::string detail(StringPrintf("Trace data write failed: %s", strerror(errno)));
|
|
PLOG(ERROR) << detail;
|
|
ThrowRuntimeException("%s", detail.c_str());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void Trace::DexPcMoved(Thread* thread ATTRIBUTE_UNUSED,
|
|
Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,
|
|
ArtMethod* method,
|
|
uint32_t new_dex_pc) {
|
|
// We're not recorded to listen to this kind of event, so complain.
|
|
LOG(ERROR) << "Unexpected dex PC event in tracing " << ArtMethod::PrettyMethod(method)
|
|
<< " " << new_dex_pc;
|
|
}
|
|
|
|
void Trace::FieldRead(Thread* thread ATTRIBUTE_UNUSED,
|
|
Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,
|
|
ArtMethod* method,
|
|
uint32_t dex_pc,
|
|
ArtField* field ATTRIBUTE_UNUSED)
|
|
REQUIRES_SHARED(Locks::mutator_lock_) {
|
|
// We're not recorded to listen to this kind of event, so complain.
|
|
LOG(ERROR) << "Unexpected field read event in tracing " << ArtMethod::PrettyMethod(method)
|
|
<< " " << dex_pc;
|
|
}
|
|
|
|
void Trace::FieldWritten(Thread* thread ATTRIBUTE_UNUSED,
|
|
Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,
|
|
ArtMethod* method,
|
|
uint32_t dex_pc,
|
|
ArtField* field ATTRIBUTE_UNUSED,
|
|
const JValue& field_value ATTRIBUTE_UNUSED)
|
|
REQUIRES_SHARED(Locks::mutator_lock_) {
|
|
// We're not recorded to listen to this kind of event, so complain.
|
|
LOG(ERROR) << "Unexpected field write event in tracing " << ArtMethod::PrettyMethod(method)
|
|
<< " " << dex_pc;
|
|
}
|
|
|
|
void Trace::MethodEntered(Thread* thread, ArtMethod* method) {
|
|
uint32_t thread_clock_diff = 0;
|
|
uint64_t timestamp_counter = 0;
|
|
ReadClocks(thread, &thread_clock_diff, ×tamp_counter);
|
|
LogMethodTraceEvent(thread, method, kTraceMethodEnter, thread_clock_diff, timestamp_counter);
|
|
}
|
|
|
|
void Trace::MethodExited(Thread* thread,
|
|
ArtMethod* method,
|
|
instrumentation::OptionalFrame frame ATTRIBUTE_UNUSED,
|
|
JValue& return_value ATTRIBUTE_UNUSED) {
|
|
uint32_t thread_clock_diff = 0;
|
|
uint64_t timestamp_counter = 0;
|
|
ReadClocks(thread, &thread_clock_diff, ×tamp_counter);
|
|
LogMethodTraceEvent(thread, method, kTraceMethodExit, thread_clock_diff, timestamp_counter);
|
|
}
|
|
|
|
void Trace::MethodUnwind(Thread* thread,
|
|
ArtMethod* method,
|
|
uint32_t dex_pc ATTRIBUTE_UNUSED) {
|
|
uint32_t thread_clock_diff = 0;
|
|
uint64_t timestamp_counter = 0;
|
|
ReadClocks(thread, &thread_clock_diff, ×tamp_counter);
|
|
LogMethodTraceEvent(thread, method, kTraceUnroll, thread_clock_diff, timestamp_counter);
|
|
}
|
|
|
|
void Trace::ExceptionThrown(Thread* thread ATTRIBUTE_UNUSED,
|
|
Handle<mirror::Throwable> exception_object ATTRIBUTE_UNUSED)
|
|
REQUIRES_SHARED(Locks::mutator_lock_) {
|
|
LOG(ERROR) << "Unexpected exception thrown event in tracing";
|
|
}
|
|
|
|
void Trace::ExceptionHandled(Thread* thread ATTRIBUTE_UNUSED,
|
|
Handle<mirror::Throwable> exception_object ATTRIBUTE_UNUSED)
|
|
REQUIRES_SHARED(Locks::mutator_lock_) {
|
|
LOG(ERROR) << "Unexpected exception thrown event in tracing";
|
|
}
|
|
|
|
void Trace::Branch(Thread* /*thread*/, ArtMethod* method,
|
|
uint32_t /*dex_pc*/, int32_t /*dex_pc_offset*/)
|
|
REQUIRES_SHARED(Locks::mutator_lock_) {
|
|
LOG(ERROR) << "Unexpected branch event in tracing" << ArtMethod::PrettyMethod(method);
|
|
}
|
|
|
|
void Trace::WatchedFramePop(Thread* self ATTRIBUTE_UNUSED,
|
|
const ShadowFrame& frame ATTRIBUTE_UNUSED) {
|
|
LOG(ERROR) << "Unexpected WatchedFramePop event in tracing";
|
|
}
|
|
|
|
void Trace::ReadClocks(Thread* thread, uint32_t* thread_clock_diff, uint64_t* timestamp_counter) {
|
|
if (UseThreadCpuClock()) {
|
|
uint64_t clock_base = thread->GetTraceClockBase();
|
|
if (UNLIKELY(clock_base == 0)) {
|
|
// First event, record the base time in the map.
|
|
uint64_t time = thread->GetCpuMicroTime();
|
|
thread->SetTraceClockBase(time);
|
|
} else {
|
|
*thread_clock_diff = thread->GetCpuMicroTime() - clock_base;
|
|
}
|
|
}
|
|
if (UseWallClock()) {
|
|
*timestamp_counter = GetTimestamp();
|
|
}
|
|
}
|
|
|
|
std::string Trace::GetMethodLine(ArtMethod* method, uint32_t method_index) {
|
|
method = method->GetInterfaceMethodIfProxy(kRuntimePointerSize);
|
|
return StringPrintf("%#x\t%s\t%s\t%s\t%s\n",
|
|
(method_index << TraceActionBits),
|
|
PrettyDescriptor(method->GetDeclaringClassDescriptor()).c_str(),
|
|
method->GetName(),
|
|
method->GetSignature().ToString().c_str(),
|
|
method->GetDeclaringClassSourceFile());
|
|
}
|
|
|
|
void Trace::RecordStreamingMethodEvent(Thread* thread,
|
|
ArtMethod* method,
|
|
TraceAction action,
|
|
uint32_t thread_clock_diff,
|
|
uint64_t timestamp_counter) {
|
|
uintptr_t* method_trace_buffer = thread->GetMethodTraceBuffer();
|
|
size_t* current_offset = thread->GetMethodTraceIndexPtr();
|
|
// Initialize the buffer lazily. It's just simpler to keep the creation at one place.
|
|
if (method_trace_buffer == nullptr) {
|
|
method_trace_buffer = new uintptr_t[std::max(kMinBufSize, kPerThreadBufSize)]();
|
|
thread->SetMethodTraceBuffer(method_trace_buffer);
|
|
*current_offset = 0;
|
|
|
|
// This is the first event from this thread, so first record information about the thread.
|
|
std::string thread_name;
|
|
thread->GetThreadName(thread_name);
|
|
static constexpr size_t kThreadNameHeaderSize = 7;
|
|
uint8_t header[kThreadNameHeaderSize];
|
|
Append2LE(header, 0);
|
|
header[2] = kOpNewThread;
|
|
// We use only 16 bits to encode thread id. On Android, we don't expect to use more than
|
|
// 16-bits for a Tid. For 32-bit platforms it is always ensured we use less than 16 bits.
|
|
// See __check_max_thread_id in bionic for more details. Even on 64-bit the max threads
|
|
// is currently less than 65536.
|
|
// TODO(mythria): On host, we know thread ids can be greater than 16 bits. Consider adding
|
|
// a map similar to method ids.
|
|
DCHECK(!kIsTargetBuild || thread->GetTid() < (1 << 16));
|
|
Append2LE(header + 3, static_cast<uint16_t>(thread->GetTid()));
|
|
Append2LE(header + 5, static_cast<uint16_t>(thread_name.length()));
|
|
|
|
{
|
|
MutexLock mu(Thread::Current(), tracing_lock_);
|
|
if (!trace_file_->WriteFully(header, kThreadNameHeaderSize) ||
|
|
!trace_file_->WriteFully(reinterpret_cast<const uint8_t*>(thread_name.c_str()),
|
|
thread_name.length())) {
|
|
PLOG(WARNING) << "Failed streaming a tracing event.";
|
|
}
|
|
}
|
|
}
|
|
|
|
size_t required_entries = (clock_source_ == TraceClockSource::kDual) ? 4 : 3;
|
|
if (*current_offset + required_entries >= kPerThreadBufSize) {
|
|
// We don't have space for further entries. Flush the contents of the buffer and reuse the
|
|
// buffer to store contents. Reset the index to the start of the buffer.
|
|
FlushStreamingBuffer(thread);
|
|
*current_offset = 0;
|
|
}
|
|
|
|
// Record entry in per-thread trace buffer.
|
|
int current_index = *current_offset;
|
|
method_trace_buffer[current_index++] = reinterpret_cast<uintptr_t>(method);
|
|
// TODO(mythria): We only need two bits to record the action. Consider merging
|
|
// it with the method entry to save space.
|
|
method_trace_buffer[current_index++] = action;
|
|
if (UseThreadCpuClock()) {
|
|
method_trace_buffer[current_index++] = thread_clock_diff;
|
|
}
|
|
if (UseWallClock()) {
|
|
if (art::kRuntimePointerSize == PointerSize::k32) {
|
|
// On 32-bit architectures store timestamp counter as two 32-bit values.
|
|
method_trace_buffer[current_index++] = timestamp_counter >> 32;
|
|
method_trace_buffer[current_index++] = static_cast<uint32_t>(timestamp_counter);
|
|
} else {
|
|
method_trace_buffer[current_index++] = timestamp_counter;
|
|
}
|
|
}
|
|
*current_offset = current_index;
|
|
}
|
|
|
|
void Trace::WriteToBuf(uint8_t* header,
|
|
size_t header_size,
|
|
const std::string& data,
|
|
size_t* current_index,
|
|
uint8_t* buffer,
|
|
size_t buffer_size) {
|
|
EnsureSpace(buffer, current_index, buffer_size, header_size);
|
|
memcpy(buffer + *current_index, header, header_size);
|
|
*current_index += header_size;
|
|
|
|
EnsureSpace(buffer, current_index, buffer_size, data.length());
|
|
if (data.length() < buffer_size) {
|
|
memcpy(buffer + *current_index, reinterpret_cast<const uint8_t*>(data.c_str()), data.length());
|
|
*current_index += data.length();
|
|
} else {
|
|
// The data is larger than buffer, so write directly to the file. EnsureSpace should have
|
|
// flushed any data in the buffer.
|
|
DCHECK_EQ(*current_index, 0U);
|
|
if (!trace_file_->WriteFully(reinterpret_cast<const uint8_t*>(data.c_str()), data.length())) {
|
|
PLOG(WARNING) << "Failed streaming a tracing event.";
|
|
}
|
|
}
|
|
}
|
|
|
|
void Trace::FlushStreamingBuffer(Thread* thread) {
|
|
// Take a tracing_lock_ to serialize writes across threads. We also need to allocate a unique
|
|
// method id for each method. We do that by maintaining a map from id to method for each newly
|
|
// seen method. tracing_lock_ is required to serialize these.
|
|
MutexLock mu(Thread::Current(), tracing_lock_);
|
|
uintptr_t* method_trace_buffer = thread->GetMethodTraceBuffer();
|
|
// Create a temporary buffer to encode the trace events from the specified thread.
|
|
size_t buffer_size = kPerThreadBufSize;
|
|
size_t current_index = 0;
|
|
std::unique_ptr<uint8_t[]> buffer(new uint8_t[std::max(kMinBufSize, buffer_size)]);
|
|
|
|
size_t num_entries = *(thread->GetMethodTraceIndexPtr());
|
|
for (size_t entry_index = 0; entry_index < num_entries;) {
|
|
ArtMethod* method = reinterpret_cast<ArtMethod*>(method_trace_buffer[entry_index++]);
|
|
TraceAction action = DecodeTraceAction(method_trace_buffer[entry_index++]);
|
|
uint32_t thread_time = 0;
|
|
uint32_t wall_time = 0;
|
|
if (UseThreadCpuClock()) {
|
|
thread_time = method_trace_buffer[entry_index++];
|
|
}
|
|
if (UseWallClock()) {
|
|
uint64_t timestamp = method_trace_buffer[entry_index++];
|
|
if (art::kRuntimePointerSize == PointerSize::k32) {
|
|
// On 32-bit architectures timestamp is stored as two 32-bit values.
|
|
timestamp = (timestamp << 32 | method_trace_buffer[entry_index++]);
|
|
}
|
|
wall_time = GetMicroTime(timestamp) - start_time_;
|
|
}
|
|
|
|
auto it = art_method_id_map_.find(method);
|
|
uint32_t method_index = 0;
|
|
// If we haven't seen this method before record information about the method.
|
|
if (it == art_method_id_map_.end()) {
|
|
art_method_id_map_.emplace(method, current_method_index_);
|
|
method_index = current_method_index_;
|
|
current_method_index_++;
|
|
// Write a special block with the name.
|
|
std::string method_line(GetMethodLine(method, method_index));
|
|
static constexpr size_t kMethodNameHeaderSize = 5;
|
|
uint8_t method_header[kMethodNameHeaderSize];
|
|
DCHECK_LT(kMethodNameHeaderSize, kPerThreadBufSize);
|
|
Append2LE(method_header, 0);
|
|
method_header[2] = kOpNewMethod;
|
|
Append2LE(method_header + 3, static_cast<uint16_t>(method_line.length()));
|
|
WriteToBuf(method_header,
|
|
kMethodNameHeaderSize,
|
|
method_line,
|
|
¤t_index,
|
|
buffer.get(),
|
|
buffer_size);
|
|
} else {
|
|
method_index = it->second;
|
|
}
|
|
|
|
const size_t record_size = GetRecordSize(clock_source_);
|
|
DCHECK_LT(record_size, kPerThreadBufSize);
|
|
EnsureSpace(buffer.get(), ¤t_index, buffer_size, record_size);
|
|
EncodeEventEntry(
|
|
buffer.get() + current_index, thread, method_index, action, thread_time, wall_time);
|
|
current_index += record_size;
|
|
}
|
|
|
|
// Flush the contents of buffer to file.
|
|
if (!trace_file_->WriteFully(buffer.get(), current_index)) {
|
|
PLOG(WARNING) << "Failed streaming a tracing event.";
|
|
}
|
|
}
|
|
|
|
void Trace::RecordMethodEvent(Thread* thread,
|
|
ArtMethod* method,
|
|
TraceAction action,
|
|
uint32_t thread_clock_diff,
|
|
uint64_t timestamp_counter) {
|
|
// Advance cur_offset_ atomically.
|
|
int32_t new_offset;
|
|
int32_t old_offset = 0;
|
|
|
|
// In the non-streaming case, we do a busy loop here trying to get
|
|
// an offset to write our record and advance cur_offset_ for the
|
|
// next use.
|
|
// Although multiple threads can call this method concurrently,
|
|
// the compare_exchange_weak here is still atomic (by definition).
|
|
// A succeeding update is visible to other cores when they pass
|
|
// through this point.
|
|
old_offset = cur_offset_.load(std::memory_order_relaxed); // Speculative read
|
|
do {
|
|
new_offset = old_offset + GetRecordSize(clock_source_);
|
|
if (static_cast<size_t>(new_offset) > buffer_size_) {
|
|
overflow_ = true;
|
|
return;
|
|
}
|
|
} while (!cur_offset_.compare_exchange_weak(old_offset, new_offset, std::memory_order_relaxed));
|
|
|
|
// Write data into the tracing buffer (if not streaming) or into a
|
|
// small buffer on the stack (if streaming) which we'll put into the
|
|
// tracing buffer below.
|
|
//
|
|
// These writes to the tracing buffer are synchronised with the
|
|
// future reads that (only) occur under FinishTracing(). The callers
|
|
// of FinishTracing() acquire locks and (implicitly) synchronise
|
|
// the buffer memory.
|
|
uint8_t* ptr;
|
|
ptr = buf_.get() + old_offset;
|
|
uint32_t wall_clock_diff = GetMicroTime(timestamp_counter) - start_time_;
|
|
MutexLock mu(Thread::Current(), tracing_lock_);
|
|
EncodeEventEntry(
|
|
ptr, thread, EncodeTraceMethod(method), action, thread_clock_diff, wall_clock_diff);
|
|
}
|
|
|
|
void Trace::LogMethodTraceEvent(Thread* thread,
|
|
ArtMethod* method,
|
|
TraceAction action,
|
|
uint32_t thread_clock_diff,
|
|
uint64_t timestamp_counter) {
|
|
// This method is called in both tracing modes (method and sampling). In sampling mode, this
|
|
// method is only called by the sampling thread. In method tracing mode, it can be called
|
|
// concurrently.
|
|
|
|
// Ensure we always use the non-obsolete version of the method so that entry/exit events have the
|
|
// same pointer value.
|
|
method = method->GetNonObsoleteMethod();
|
|
|
|
if (trace_output_mode_ == TraceOutputMode::kStreaming) {
|
|
RecordStreamingMethodEvent(thread, method, action, thread_clock_diff, timestamp_counter);
|
|
} else {
|
|
RecordMethodEvent(thread, method, action, thread_clock_diff, timestamp_counter);
|
|
}
|
|
}
|
|
|
|
void Trace::EncodeEventEntry(uint8_t* ptr,
|
|
Thread* thread,
|
|
uint32_t method_index,
|
|
TraceAction action,
|
|
uint32_t thread_clock_diff,
|
|
uint32_t wall_clock_diff) {
|
|
static constexpr size_t kPacketSize = 14U; // The maximum size of data in a packet.
|
|
uint32_t method_value = (method_index << TraceActionBits) | action;
|
|
Append2LE(ptr, thread->GetTid());
|
|
Append4LE(ptr + 2, method_value);
|
|
ptr += 6;
|
|
|
|
if (UseThreadCpuClock()) {
|
|
Append4LE(ptr, thread_clock_diff);
|
|
ptr += 4;
|
|
}
|
|
if (UseWallClock()) {
|
|
Append4LE(ptr, wall_clock_diff);
|
|
}
|
|
static_assert(kPacketSize == 2 + 4 + 4 + 4, "Packet size incorrect.");
|
|
}
|
|
|
|
void Trace::EnsureSpace(uint8_t* buffer,
|
|
size_t* current_index,
|
|
size_t buffer_size,
|
|
size_t required_size) {
|
|
if (*current_index + required_size < buffer_size) {
|
|
return;
|
|
}
|
|
|
|
if (!trace_file_->WriteFully(buffer, *current_index)) {
|
|
PLOG(WARNING) << "Failed streaming a tracing event.";
|
|
}
|
|
*current_index = 0;
|
|
}
|
|
|
|
void Trace::DumpMethodList(std::ostream& os) {
|
|
MutexLock mu(Thread::Current(), tracing_lock_);
|
|
for (auto const& entry : art_method_id_map_) {
|
|
os << GetMethodLine(entry.first, entry.second);
|
|
}
|
|
}
|
|
|
|
void Trace::DumpThreadList(std::ostream& os) {
|
|
for (const auto& it : threads_list_) {
|
|
// We use only 16 bits to encode thread id. On Android, we don't expect to use more than
|
|
// 16-bits for a Tid. For 32-bit platforms it is always ensured we use less than 16 bits.
|
|
// See __check_max_thread_id in bionic for more details. Even on 64-bit the max threads
|
|
// is currently less than 65536.
|
|
// TODO(mythria): On host, we know thread ids can be greater than 16 bits. Consider adding
|
|
// a map similar to method ids.
|
|
DCHECK(!kIsTargetBuild || it.first < (1 << 16));
|
|
os << static_cast<uint16_t>(it.first) << "\t" << it.second << "\n";
|
|
}
|
|
}
|
|
|
|
void Trace::StoreExitingThreadInfo(Thread* thread) {
|
|
MutexLock mu(thread, *Locks::trace_lock_);
|
|
if (the_trace_ != nullptr) {
|
|
the_trace_->UpdateThreadsList(thread);
|
|
}
|
|
}
|
|
|
|
Trace::TraceOutputMode Trace::GetOutputMode() {
|
|
MutexLock mu(Thread::Current(), *Locks::trace_lock_);
|
|
CHECK(the_trace_ != nullptr) << "Trace output mode requested, but no trace currently running";
|
|
return the_trace_->trace_output_mode_;
|
|
}
|
|
|
|
Trace::TraceMode Trace::GetMode() {
|
|
MutexLock mu(Thread::Current(), *Locks::trace_lock_);
|
|
CHECK(the_trace_ != nullptr) << "Trace mode requested, but no trace currently running";
|
|
return the_trace_->trace_mode_;
|
|
}
|
|
|
|
int Trace::GetFlags() {
|
|
MutexLock mu(Thread::Current(), *Locks::trace_lock_);
|
|
CHECK(the_trace_ != nullptr) << "Trace flags requested, but no trace currently running";
|
|
return the_trace_->flags_;
|
|
}
|
|
|
|
int Trace::GetIntervalInMillis() {
|
|
MutexLock mu(Thread::Current(), *Locks::trace_lock_);
|
|
CHECK(the_trace_ != nullptr) << "Trace interval requested, but no trace currently running";
|
|
return the_trace_->interval_us_;
|
|
}
|
|
|
|
size_t Trace::GetBufferSize() {
|
|
MutexLock mu(Thread::Current(), *Locks::trace_lock_);
|
|
CHECK(the_trace_ != nullptr) << "Trace buffer size requested, but no trace currently running";
|
|
return the_trace_->buffer_size_;
|
|
}
|
|
|
|
bool Trace::IsTracingEnabled() {
|
|
MutexLock mu(Thread::Current(), *Locks::trace_lock_);
|
|
return the_trace_ != nullptr;
|
|
}
|
|
|
|
} // namespace art
|