/* * Copyright (C) 2011 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "trace.h" #include #include #include "android-base/macros.h" #include "android-base/stringprintf.h" #include "art_method-inl.h" #include "base/casts.h" #include "base/enums.h" #include "base/os.h" #include "base/stl_util.h" #include "base/systrace.h" #include "base/time_utils.h" #include "base/unix_file/fd_file.h" #include "base/utils.h" #include "class_linker.h" #include "common_throws.h" #include "debugger.h" #include "dex/descriptors_names.h" #include "dex/dex_file-inl.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/scoped_gc_critical_section.h" #include "instrumentation.h" #include "jit/jit.h" #include "jit/jit_code_cache.h" #include "mirror/class-inl.h" #include "mirror/dex_cache-inl.h" #include "mirror/object-inl.h" #include "mirror/object_array-inl.h" #include "nativehelper/scoped_local_ref.h" #include "scoped_thread_state_change-inl.h" #include "stack.h" #include "thread.h" #include "thread_list.h" namespace art { using android::base::StringPrintf; static constexpr size_t TraceActionBits = MinimumBitsToStore( static_cast(kTraceMethodActionMask)); static constexpr uint8_t kOpNewMethod = 1U; static constexpr uint8_t kOpNewThread = 2U; static constexpr uint8_t kOpTraceSummary = 3U; static const char kTraceTokenChar = '*'; static const uint16_t kTraceHeaderLength = 32; static const uint32_t kTraceMagicValue = 0x574f4c53; static const uint16_t kTraceVersionSingleClock = 2; static const uint16_t kTraceVersionDualClock = 3; static const uint16_t kTraceRecordSizeSingleClock = 10; // using v2 static const uint16_t kTraceRecordSizeDualClock = 14; // using v3 with two timestamps TraceClockSource Trace::default_clock_source_ = kDefaultTraceClockSource; Trace* volatile Trace::the_trace_ = nullptr; pthread_t Trace::sampling_pthread_ = 0U; std::unique_ptr> Trace::temp_stack_trace_; // The key identifying the tracer to update instrumentation. static constexpr const char* kTracerInstrumentationKey = "Tracer"; static TraceAction DecodeTraceAction(uint32_t tmid) { return static_cast(tmid & kTraceMethodActionMask); } namespace { // Scaling factor to convert timestamp counter into wall clock time reported in micro seconds. // This is initialized at the start of tracing using the timestamp counter update frequency. // See InitializeTimestampCounters for more details. double tsc_to_microsec_scaling_factor = -1.0; uint64_t GetTimestamp() { uint64_t t = 0; #if defined(__arm__) // See Architecture Reference Manual ARMv7-A and ARMv7-R edition section B4.1.34 // Q and R specify that they should be written to lower and upper halves of 64-bit value. // See: https://llvm.org/docs/LangRef.html#asm-template-argument-modifiers asm volatile("mrrc p15, 1, %Q0, %R0, c14" : "=r"(t)); #elif defined(__aarch64__) // See Arm Architecture Registers Armv8 section System Registers asm volatile("mrs %0, cntvct_el0" : "=r"(t)); #elif defined(__i386__) || defined(__x86_64__) // rdtsc returns two 32-bit values in rax and rdx even on 64-bit architectures. unsigned int lo, hi; asm volatile("rdtsc" : "=a"(lo), "=d"(hi)); t = (static_cast(hi) << 32) | lo; #else t = MicroTime(); #endif return t; } #if defined(__i386__) || defined(__x86_64__) // Here we compute the scaling factor by sleeping for a millisecond. Alternatively, we could // generate raw timestamp counter and also time using clock_gettime at the start and the end of the // trace. We can compute the frequency of timestamp counter upadtes in the post processing step // using these two samples. However, that would require a change in Android Studio which is the main // consumer of these profiles. For now, just compute the frequency of tsc updates here. double computeScalingFactor() { uint64_t start = MicroTime(); uint64_t start_tsc = GetTimestamp(); // Sleep for one millisecond. usleep(1000); uint64_t diff_tsc = GetTimestamp() - start_tsc; uint64_t diff_time = MicroTime() - start; double scaling_factor = static_cast(diff_time) / diff_tsc; DCHECK(scaling_factor > 0.0) << scaling_factor; return scaling_factor; } double GetScalingFactorForX86() { uint32_t eax, ebx, ecx; asm volatile("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx) : "a"(0x0), "c"(0)); if (eax < 0x15) { // There is no 15H - Timestamp counter and core crystal clock information // leaf. Just compute the frequency. return computeScalingFactor(); } // From Intel architecture-instruction-set-extensions-programming-reference: // EBX[31:0]/EAX[31:0] indicates the ratio of the TSC frequency and the // core crystal clock frequency. // If EBX[31:0] is 0, the TSC and "core crystal clock" ratio is not enumerated. // If ECX is 0, the nominal core crystal clock frequency is not enumerated. // "TSC frequency" = "core crystal clock frequency" * EBX/EAX. // The core crystal clock may differ from the reference clock, bus clock, or core clock // frequencies. // EAX Bits 31 - 00: An unsigned integer which is the denominator of the // TSC/"core crystal clock" ratio. // EBX Bits 31 - 00: An unsigned integer which is the numerator of the // TSC/"core crystal clock" ratio. // ECX Bits 31 - 00: An unsigned integer which is the nominal frequency of the core // crystal clock in Hz. // EDX Bits 31 - 00: Reserved = 0. asm volatile("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx) : "a"(0x15), "c"(0)); if (ebx == 0 || ecx == 0) { return computeScalingFactor(); } double coreCrystalFreq = ecx; // frequency = coreCrystalFreq * (ebx / eax) // scaling_factor = seconds_to_microseconds / frequency // = seconds_to_microseconds * eax / (coreCrystalFreq * ebx) double seconds_to_microseconds = 1000 * 1000; double scaling_factor = (seconds_to_microseconds * eax) / (coreCrystalFreq * ebx); return scaling_factor; } #endif void InitializeTimestampCounters() { // It is sufficient to initialize this once for the entire execution. Just return if it is // already initialized. if (tsc_to_microsec_scaling_factor > 0.0) { return; } #if defined(__arm__) double seconds_to_microseconds = 1000 * 1000; uint64_t freq = 0; // See Architecture Reference Manual ARMv7-A and ARMv7-R edition section B4.1.21 asm volatile("mrc p15, 0, %0, c14, c0, 0" : "=r"(freq)); tsc_to_microsec_scaling_factor = seconds_to_microseconds / static_cast(freq); #elif defined(__aarch64__) double seconds_to_microseconds = 1000 * 1000; uint64_t freq = 0; // See Arm Architecture Registers Armv8 section System Registers asm volatile("mrs %0, cntfrq_el0" : "=r"(freq)); tsc_to_microsec_scaling_factor = seconds_to_microseconds / static_cast(freq); #elif defined(__i386__) || defined(__x86_64__) tsc_to_microsec_scaling_factor = GetScalingFactorForX86(); #else tsc_to_microsec_scaling_factor = 1.0; #endif } ALWAYS_INLINE uint64_t GetMicroTime(uint64_t counter) { DCHECK(tsc_to_microsec_scaling_factor > 0.0) << tsc_to_microsec_scaling_factor; return tsc_to_microsec_scaling_factor * counter; } } // namespace ArtMethod* Trace::DecodeTraceMethod(uint32_t tmid) { uint32_t method_index = tmid >> TraceActionBits; // This is used only for logging which is usually needed only for debugging ART. So it's not // performance critical. for (auto const& entry : art_method_id_map_) { if (method_index == entry.second) { return entry.first; } } return nullptr; } uint32_t Trace::EncodeTraceMethod(ArtMethod* method) { uint32_t idx = 0; auto it = art_method_id_map_.find(method); if (it != art_method_id_map_.end()) { idx = it->second; } else { idx = current_method_index_; art_method_id_map_.emplace(method, idx); current_method_index_++; } return idx; } std::vector* Trace::AllocStackTrace() { return (temp_stack_trace_.get() != nullptr) ? temp_stack_trace_.release() : new std::vector(); } void Trace::FreeStackTrace(std::vector* stack_trace) { stack_trace->clear(); temp_stack_trace_.reset(stack_trace); } void Trace::SetDefaultClockSource(TraceClockSource clock_source) { #if defined(__linux__) default_clock_source_ = clock_source; #else if (clock_source != TraceClockSource::kWall) { LOG(WARNING) << "Ignoring tracing request to use CPU time."; } #endif } static uint16_t GetTraceVersion(TraceClockSource clock_source) { return (clock_source == TraceClockSource::kDual) ? kTraceVersionDualClock : kTraceVersionSingleClock; } static uint16_t GetRecordSize(TraceClockSource clock_source) { return (clock_source == TraceClockSource::kDual) ? kTraceRecordSizeDualClock : kTraceRecordSizeSingleClock; } bool Trace::UseThreadCpuClock() { return (clock_source_ == TraceClockSource::kThreadCpu) || (clock_source_ == TraceClockSource::kDual); } bool Trace::UseWallClock() { return (clock_source_ == TraceClockSource::kWall) || (clock_source_ == TraceClockSource::kDual); } void Trace::MeasureClockOverhead() { if (UseThreadCpuClock()) { Thread::Current()->GetCpuMicroTime(); } if (UseWallClock()) { GetTimestamp(); } } // Compute an average time taken to measure clocks. uint32_t Trace::GetClockOverheadNanoSeconds() { Thread* self = Thread::Current(); uint64_t start = self->GetCpuMicroTime(); for (int i = 4000; i > 0; i--) { MeasureClockOverhead(); MeasureClockOverhead(); MeasureClockOverhead(); MeasureClockOverhead(); MeasureClockOverhead(); MeasureClockOverhead(); MeasureClockOverhead(); MeasureClockOverhead(); } uint64_t elapsed_us = self->GetCpuMicroTime() - start; return static_cast(elapsed_us / 32); } // TODO: put this somewhere with the big-endian equivalent used by JDWP. static void Append2LE(uint8_t* buf, uint16_t val) { *buf++ = static_cast(val); *buf++ = static_cast(val >> 8); } // TODO: put this somewhere with the big-endian equivalent used by JDWP. static void Append4LE(uint8_t* buf, uint32_t val) { *buf++ = static_cast(val); *buf++ = static_cast(val >> 8); *buf++ = static_cast(val >> 16); *buf++ = static_cast(val >> 24); } // TODO: put this somewhere with the big-endian equivalent used by JDWP. static void Append8LE(uint8_t* buf, uint64_t val) { *buf++ = static_cast(val); *buf++ = static_cast(val >> 8); *buf++ = static_cast(val >> 16); *buf++ = static_cast(val >> 24); *buf++ = static_cast(val >> 32); *buf++ = static_cast(val >> 40); *buf++ = static_cast(val >> 48); *buf++ = static_cast(val >> 56); } static void GetSample(Thread* thread, void* arg) REQUIRES_SHARED(Locks::mutator_lock_) { std::vector* const stack_trace = Trace::AllocStackTrace(); StackVisitor::WalkStack( [&](const art::StackVisitor* stack_visitor) REQUIRES_SHARED(Locks::mutator_lock_) { ArtMethod* m = stack_visitor->GetMethod(); // Ignore runtime frames (in particular callee save). if (!m->IsRuntimeMethod()) { stack_trace->push_back(m); } return true; }, thread, /* context= */ nullptr, art::StackVisitor::StackWalkKind::kIncludeInlinedFrames); Trace* the_trace = reinterpret_cast(arg); the_trace->CompareAndUpdateStackTrace(thread, stack_trace); } static void ClearThreadStackTraceAndClockBase(Thread* thread, void* arg ATTRIBUTE_UNUSED) { thread->SetTraceClockBase(0); std::vector* stack_trace = thread->GetStackTraceSample(); thread->SetStackTraceSample(nullptr); delete stack_trace; } void Trace::CompareAndUpdateStackTrace(Thread* thread, std::vector* stack_trace) { CHECK_EQ(pthread_self(), sampling_pthread_); std::vector* old_stack_trace = thread->GetStackTraceSample(); // Update the thread's stack trace sample. thread->SetStackTraceSample(stack_trace); // Read timer clocks to use for all events in this trace. uint32_t thread_clock_diff = 0; uint64_t timestamp_counter = 0; ReadClocks(thread, &thread_clock_diff, ×tamp_counter); if (old_stack_trace == nullptr) { // If there's no previous stack trace sample for this thread, log an entry event for all // methods in the trace. for (auto rit = stack_trace->rbegin(); rit != stack_trace->rend(); ++rit) { LogMethodTraceEvent(thread, *rit, kTraceMethodEnter, thread_clock_diff, timestamp_counter); } } else { // If there's a previous stack trace for this thread, diff the traces and emit entry and exit // events accordingly. auto old_rit = old_stack_trace->rbegin(); auto rit = stack_trace->rbegin(); // Iterate bottom-up over both traces until there's a difference between them. while (old_rit != old_stack_trace->rend() && rit != stack_trace->rend() && *old_rit == *rit) { old_rit++; rit++; } // Iterate top-down over the old trace until the point where they differ, emitting exit events. for (auto old_it = old_stack_trace->begin(); old_it != old_rit.base(); ++old_it) { LogMethodTraceEvent(thread, *old_it, kTraceMethodExit, thread_clock_diff, timestamp_counter); } // Iterate bottom-up over the new trace from the point where they differ, emitting entry events. for (; rit != stack_trace->rend(); ++rit) { LogMethodTraceEvent(thread, *rit, kTraceMethodEnter, thread_clock_diff, timestamp_counter); } FreeStackTrace(old_stack_trace); } } void* Trace::RunSamplingThread(void* arg) { Runtime* runtime = Runtime::Current(); intptr_t interval_us = reinterpret_cast(arg); CHECK_GE(interval_us, 0); CHECK(runtime->AttachCurrentThread("Sampling Profiler", true, runtime->GetSystemThreadGroup(), !runtime->IsAotCompiler())); while (true) { usleep(interval_us); ScopedTrace trace("Profile sampling"); Thread* self = Thread::Current(); Trace* the_trace; { MutexLock mu(self, *Locks::trace_lock_); the_trace = the_trace_; if (the_trace_->stop_tracing_) { break; } } { // Avoid a deadlock between a thread doing garbage collection // and the profile sampling thread, by blocking GC when sampling // thread stacks (see b/73624630). gc::ScopedGCCriticalSection gcs(self, art::gc::kGcCauseInstrumentation, art::gc::kCollectorTypeInstrumentation); ScopedSuspendAll ssa(__FUNCTION__); MutexLock mu(self, *Locks::thread_list_lock_); runtime->GetThreadList()->ForEach(GetSample, the_trace); } } runtime->DetachCurrentThread(); return nullptr; } void Trace::Start(const char* trace_filename, size_t buffer_size, int flags, TraceOutputMode output_mode, TraceMode trace_mode, int interval_us) { std::unique_ptr file(OS::CreateEmptyFileWriteOnly(trace_filename)); if (file == nullptr) { std::string msg = android::base::StringPrintf("Unable to open trace file '%s'", trace_filename); PLOG(ERROR) << msg; ScopedObjectAccess soa(Thread::Current()); Thread::Current()->ThrowNewException("Ljava/lang/RuntimeException;", msg.c_str()); return; } Start(std::move(file), buffer_size, flags, output_mode, trace_mode, interval_us); } void Trace::Start(int trace_fd, size_t buffer_size, int flags, TraceOutputMode output_mode, TraceMode trace_mode, int interval_us) { if (trace_fd < 0) { std::string msg = android::base::StringPrintf("Unable to start tracing with invalid fd %d", trace_fd); LOG(ERROR) << msg; ScopedObjectAccess soa(Thread::Current()); Thread::Current()->ThrowNewException("Ljava/lang/RuntimeException;", msg.c_str()); return; } std::unique_ptr file(new File(trace_fd, /* path= */ "tracefile", /* check_usage= */ true)); Start(std::move(file), buffer_size, flags, output_mode, trace_mode, interval_us); } void Trace::StartDDMS(size_t buffer_size, int flags, TraceMode trace_mode, int interval_us) { Start(std::unique_ptr(), buffer_size, flags, TraceOutputMode::kDDMS, trace_mode, interval_us); } void Trace::Start(std::unique_ptr&& trace_file_in, size_t buffer_size, int flags, TraceOutputMode output_mode, TraceMode trace_mode, int interval_us) { // We own trace_file now and are responsible for closing it. To account for error situations, use // a specialized unique_ptr to ensure we close it on the way out (if it hasn't been passed to a // Trace instance). auto deleter = [](File* file) { if (file != nullptr) { file->MarkUnchecked(); // Don't deal with flushing requirements. int result ATTRIBUTE_UNUSED = file->Close(); delete file; } }; std::unique_ptr trace_file(trace_file_in.release(), deleter); Thread* self = Thread::Current(); { MutexLock mu(self, *Locks::trace_lock_); if (the_trace_ != nullptr) { LOG(ERROR) << "Trace already in progress, ignoring this request"; return; } } // Check interval if sampling is enabled if (trace_mode == TraceMode::kSampling && interval_us <= 0) { LOG(ERROR) << "Invalid sampling interval: " << interval_us; ScopedObjectAccess soa(self); ThrowRuntimeException("Invalid sampling interval: %d", interval_us); return; } // Initialize the frequency of timestamp counter updates here. This is needed // to get wallclock time from timestamp counter values. InitializeTimestampCounters(); Runtime* runtime = Runtime::Current(); // Enable count of allocs if specified in the flags. bool enable_stats = false; // Create Trace object. { // Suspend JIT here since we are switching runtime to debuggable. Debuggable runtimes cannot use // JITed code from before so we need to invalidated all JITed code here. Enter suspend JIT scope // to prevent any races with ongoing JIT compilations. jit::ScopedJitSuspend suspend_jit; // Required since EnableMethodTracing calls ConfigureStubs which visits class linker classes. gc::ScopedGCCriticalSection gcs(self, gc::kGcCauseInstrumentation, gc::kCollectorTypeInstrumentation); ScopedSuspendAll ssa(__FUNCTION__); MutexLock mu(self, *Locks::trace_lock_); if (the_trace_ != nullptr) { LOG(ERROR) << "Trace already in progress, ignoring this request"; } else { enable_stats = (flags & kTraceCountAllocs) != 0; the_trace_ = new Trace(trace_file.release(), buffer_size, flags, output_mode, trace_mode); if (trace_mode == TraceMode::kSampling) { CHECK_PTHREAD_CALL(pthread_create, (&sampling_pthread_, nullptr, &RunSamplingThread, reinterpret_cast(interval_us)), "Sampling profiler thread"); the_trace_->interval_us_ = interval_us; } else { if (!runtime->IsJavaDebuggable()) { art::jit::Jit* jit = runtime->GetJit(); if (jit != nullptr) { jit->GetCodeCache()->InvalidateAllCompiledCode(); jit->GetCodeCache()->TransitionToDebuggable(); jit->GetJitCompiler()->SetDebuggableCompilerOption(true); } runtime->SetRuntimeDebugState(art::Runtime::RuntimeDebugState::kJavaDebuggable); runtime->GetInstrumentation()->UpdateEntrypointsForDebuggable(); runtime->DeoptimizeBootImage(); } runtime->GetInstrumentation()->AddListener( the_trace_, instrumentation::Instrumentation::kMethodEntered | instrumentation::Instrumentation::kMethodExited | instrumentation::Instrumentation::kMethodUnwind); // TODO: In full-PIC mode, we don't need to fully deopt. // TODO: We can only use trampoline entrypoints if we are java-debuggable since in that case // we know that inlining and other problematic optimizations are disabled. We might just // want to use the trampolines anyway since it is faster. It makes the story with disabling // jit-gc more complex though. runtime->GetInstrumentation()->EnableMethodTracing(kTracerInstrumentationKey, the_trace_, /*needs_interpreter=*/false); } } } // Can't call this when holding the mutator lock. if (enable_stats) { runtime->SetStatsEnabled(true); } } void Trace::UpdateThreadsList(Thread* thread) { // TODO(mythria): Clean this up and update threads_list_ when recording the trace event similar // to what we do for streaming case. std::string name; thread->GetThreadName(name); // In tests, we destroy VM after already detaching the current thread. When a thread is // detached we record the information about the threads_list_. We re-attach the current // thread again as a "Shutdown thread" in the process of shutting down. So don't record // information about shutdown threads. if (name.compare("Shutdown thread") == 0) { return; } // There can be races when unregistering a thread and stopping the trace and it is possible to // update the list twice. For example, This information is updated here when stopping tracing and // also when a thread is detaching. In thread detach, we first update this information and then // remove the thread from the list of active threads. If the tracing was stopped in between these // events, we can see two updates for the same thread. Since we need a trace_lock_ it isn't easy // to prevent this race (for ex: update this information when holding thread_list_lock_). It is // harmless to do two updates so just use overwrite here. threads_list_.Overwrite(thread->GetTid(), name); } void Trace::StopTracing(bool finish_tracing, bool flush_file) { Runtime* const runtime = Runtime::Current(); Thread* const self = Thread::Current(); pthread_t sampling_pthread = 0U; { MutexLock mu(self, *Locks::trace_lock_); if (the_trace_ == nullptr) { LOG(ERROR) << "Trace stop requested, but no trace currently running"; return; } // Tell sampling_pthread_ to stop tracing. the_trace_->stop_tracing_ = true; sampling_pthread = sampling_pthread_; } // Make sure that we join before we delete the trace since we don't want to have // the sampling thread access a stale pointer. This finishes since the sampling thread exits when // the_trace_ is null. if (sampling_pthread != 0U) { CHECK_PTHREAD_CALL(pthread_join, (sampling_pthread, nullptr), "sampling thread shutdown"); } // Make a copy of the_trace_, so it can be flushed later. We want to reset // the_trace_ to nullptr in suspend all scope to prevent any races Trace* the_trace = the_trace_; bool stop_alloc_counting = (the_trace->flags_ & Trace::kTraceCountAllocs) != 0; // Stop the trace sources adding more entries to the trace buffer and synchronise stores. { gc::ScopedGCCriticalSection gcs( self, gc::kGcCauseInstrumentation, gc::kCollectorTypeInstrumentation); jit::ScopedJitSuspend suspend_jit; ScopedSuspendAll ssa(__FUNCTION__); if (the_trace->trace_mode_ == TraceMode::kSampling) { MutexLock mu(self, *Locks::thread_list_lock_); runtime->GetThreadList()->ForEach(ClearThreadStackTraceAndClockBase, nullptr); } else { runtime->GetInstrumentation()->RemoveListener( the_trace, instrumentation::Instrumentation::kMethodEntered | instrumentation::Instrumentation::kMethodExited | instrumentation::Instrumentation::kMethodUnwind); runtime->GetInstrumentation()->DisableMethodTracing(kTracerInstrumentationKey); runtime->GetInstrumentation()->MaybeSwitchRuntimeDebugState(self); } // Flush thread specific buffer from all threads before resetting the_trace_ to nullptr. // We also flush the buffer when destroying a thread which expects the_trace_ to be valid so // make sure that the per-thread buffer is reset before resetting the_trace_. { MutexLock tl_lock(Thread::Current(), *Locks::thread_list_lock_); for (Thread* thread : Runtime::Current()->GetThreadList()->GetList()) { if (thread->GetMethodTraceBuffer() != nullptr) { the_trace_->FlushStreamingBuffer(thread); thread->ResetMethodTraceBuffer(); } // Record threads here before resetting the_trace_ to prevent any races between // unregistering the thread and resetting the_trace_. the_trace->UpdateThreadsList(thread); } } // Reset the_trace_ by taking a trace_lock MutexLock mu(self, *Locks::trace_lock_); the_trace_ = nullptr; sampling_pthread_ = 0U; } // At this point, code may read buf_ as its writers are shutdown // and the ScopedSuspendAll above has ensured all stores to buf_ // are now visible. if (finish_tracing) { the_trace->FinishTracing(); } if (the_trace->trace_file_.get() != nullptr) { // Do not try to erase, so flush and close explicitly. if (flush_file) { if (the_trace->trace_file_->Flush() != 0) { PLOG(WARNING) << "Could not flush trace file."; } } else { the_trace->trace_file_->MarkUnchecked(); // Do not trigger guard. } if (the_trace->trace_file_->Close() != 0) { PLOG(ERROR) << "Could not close trace file."; } } delete the_trace; if (stop_alloc_counting) { // Can be racy since SetStatsEnabled is not guarded by any locks. runtime->SetStatsEnabled(false); } } void Trace::FlushThreadBuffer(Thread* self) { MutexLock mu(self, *Locks::trace_lock_); the_trace_->FlushStreamingBuffer(self); } void Trace::Abort() { // Do not write anything anymore. StopTracing(false, false); } void Trace::Stop() { // Finish writing. StopTracing(true, true); } void Trace::Shutdown() { if (GetMethodTracingMode() != kTracingInactive) { Stop(); } } TracingMode Trace::GetMethodTracingMode() { MutexLock mu(Thread::Current(), *Locks::trace_lock_); if (the_trace_ == nullptr) { return kTracingInactive; } else { switch (the_trace_->trace_mode_) { case TraceMode::kSampling: return kSampleProfilingActive; case TraceMode::kMethodTracing: return kMethodTracingActive; } LOG(FATAL) << "Unreachable"; UNREACHABLE(); } } static constexpr size_t kMinBufSize = 18U; // Trace header is up to 18B. // Size of per-thread buffer size. The value is chosen arbitrarily. This value // should be greater than kMinBufSize. static constexpr size_t kPerThreadBufSize = 512 * 1024; static_assert(kPerThreadBufSize > kMinBufSize); namespace { TraceClockSource GetClockSourceFromFlags(int flags) { bool need_wall = flags & Trace::TraceFlag::kTraceClockSourceWallClock; bool need_thread_cpu = flags & Trace::TraceFlag::kTraceClockSourceThreadCpu; if (need_wall && need_thread_cpu) { return TraceClockSource::kDual; } else if (need_wall) { return TraceClockSource::kWall; } else if (need_thread_cpu) { return TraceClockSource::kThreadCpu; } else { return kDefaultTraceClockSource; } } } // namespace Trace::Trace(File* trace_file, size_t buffer_size, int flags, TraceOutputMode output_mode, TraceMode trace_mode) : trace_file_(trace_file), buf_(new uint8_t[std::max(kMinBufSize, buffer_size)]()), flags_(flags), trace_output_mode_(output_mode), trace_mode_(trace_mode), clock_source_(GetClockSourceFromFlags(flags)), buffer_size_(std::max(kMinBufSize, buffer_size)), start_time_(GetMicroTime(GetTimestamp())), clock_overhead_ns_(GetClockOverheadNanoSeconds()), overflow_(false), interval_us_(0), stop_tracing_(false), tracing_lock_("tracing lock", LockLevel::kTracingStreamingLock) { CHECK_IMPLIES(trace_file == nullptr, output_mode == TraceOutputMode::kDDMS); uint16_t trace_version = GetTraceVersion(clock_source_); if (output_mode == TraceOutputMode::kStreaming) { trace_version |= 0xF0U; } // Set up the beginning of the trace. memset(buf_.get(), 0, kTraceHeaderLength); Append4LE(buf_.get(), kTraceMagicValue); Append2LE(buf_.get() + 4, trace_version); Append2LE(buf_.get() + 6, kTraceHeaderLength); Append8LE(buf_.get() + 8, start_time_); if (trace_version >= kTraceVersionDualClock) { uint16_t record_size = GetRecordSize(clock_source_); Append2LE(buf_.get() + 16, record_size); } static_assert(18 <= kMinBufSize, "Minimum buffer size not large enough for trace header"); cur_offset_.store(kTraceHeaderLength, std::memory_order_relaxed); if (output_mode == TraceOutputMode::kStreaming) { // Flush the header information to the file. We use a per thread buffer, so // it is easier to just write the header information directly to file. if (!trace_file_->WriteFully(buf_.get(), kTraceHeaderLength)) { PLOG(WARNING) << "Failed streaming a tracing event."; } cur_offset_.store(0, std::memory_order_relaxed); } } static uint64_t ReadBytes(uint8_t* buf, size_t bytes) { uint64_t ret = 0; for (size_t i = 0; i < bytes; ++i) { ret |= static_cast(buf[i]) << (i * 8); } return ret; } void Trace::DumpBuf(uint8_t* buf, size_t buf_size, TraceClockSource clock_source) { uint8_t* ptr = buf + kTraceHeaderLength; uint8_t* end = buf + buf_size; MutexLock mu(Thread::Current(), tracing_lock_); while (ptr < end) { uint32_t tmid = ReadBytes(ptr + 2, sizeof(tmid)); ArtMethod* method = DecodeTraceMethod(tmid); TraceAction action = DecodeTraceAction(tmid); LOG(INFO) << ArtMethod::PrettyMethod(method) << " " << static_cast(action); ptr += GetRecordSize(clock_source); } } void Trace::FinishTracing() { size_t final_offset = 0; if (trace_output_mode_ != TraceOutputMode::kStreaming) { final_offset = cur_offset_.load(std::memory_order_relaxed); } // Compute elapsed time. uint64_t elapsed = GetMicroTime(GetTimestamp()) - start_time_; std::ostringstream os; os << StringPrintf("%cversion\n", kTraceTokenChar); os << StringPrintf("%d\n", GetTraceVersion(clock_source_)); os << StringPrintf("data-file-overflow=%s\n", overflow_ ? "true" : "false"); if (UseThreadCpuClock()) { if (UseWallClock()) { os << StringPrintf("clock=dual\n"); } else { os << StringPrintf("clock=thread-cpu\n"); } } else { os << StringPrintf("clock=wall\n"); } os << StringPrintf("elapsed-time-usec=%" PRIu64 "\n", elapsed); if (trace_output_mode_ != TraceOutputMode::kStreaming) { size_t num_records = (final_offset - kTraceHeaderLength) / GetRecordSize(clock_source_); os << StringPrintf("num-method-calls=%zd\n", num_records); } os << StringPrintf("clock-call-overhead-nsec=%d\n", clock_overhead_ns_); os << StringPrintf("vm=art\n"); os << StringPrintf("pid=%d\n", getpid()); if ((flags_ & kTraceCountAllocs) != 0) { os << "alloc-count=" << Runtime::Current()->GetStat(KIND_ALLOCATED_OBJECTS) << "\n"; os << "alloc-size=" << Runtime::Current()->GetStat(KIND_ALLOCATED_BYTES) << "\n"; os << "gc-count=" << Runtime::Current()->GetStat(KIND_GC_INVOCATIONS) << "\n"; } os << StringPrintf("%cthreads\n", kTraceTokenChar); DumpThreadList(os); os << StringPrintf("%cmethods\n", kTraceTokenChar); DumpMethodList(os); os << StringPrintf("%cend\n", kTraceTokenChar); std::string header(os.str()); if (trace_output_mode_ == TraceOutputMode::kStreaming) { // It is expected that this method is called when all other threads are suspended, so there // cannot be any writes to trace_file_ after finish tracing. // Write a special token to mark the end of trace records and the start of // trace summary. uint8_t buf[7]; Append2LE(buf, 0); buf[2] = kOpTraceSummary; Append4LE(buf + 3, static_cast(header.length())); // Write the trace summary. The summary is identical to the file header when // the output mode is not streaming (except for methods). if (!trace_file_->WriteFully(buf, sizeof(buf)) || !trace_file_->WriteFully(header.c_str(), header.length())) { PLOG(WARNING) << "Failed streaming a tracing event."; } } else { if (trace_file_.get() == nullptr) { std::vector data; data.resize(header.length() + final_offset); memcpy(data.data(), header.c_str(), header.length()); memcpy(data.data() + header.length(), buf_.get(), final_offset); Runtime::Current()->GetRuntimeCallbacks()->DdmPublishChunk(CHUNK_TYPE("MPSE"), ArrayRef(data)); const bool kDumpTraceInfo = false; if (kDumpTraceInfo) { LOG(INFO) << "Trace sent:\n" << header; DumpBuf(buf_.get(), final_offset, clock_source_); } } else { if (!trace_file_->WriteFully(header.c_str(), header.length()) || !trace_file_->WriteFully(buf_.get(), final_offset)) { std::string detail(StringPrintf("Trace data write failed: %s", strerror(errno))); PLOG(ERROR) << detail; ThrowRuntimeException("%s", detail.c_str()); } } } } void Trace::DexPcMoved(Thread* thread ATTRIBUTE_UNUSED, Handle this_object ATTRIBUTE_UNUSED, ArtMethod* method, uint32_t new_dex_pc) { // We're not recorded to listen to this kind of event, so complain. LOG(ERROR) << "Unexpected dex PC event in tracing " << ArtMethod::PrettyMethod(method) << " " << new_dex_pc; } void Trace::FieldRead(Thread* thread ATTRIBUTE_UNUSED, Handle this_object ATTRIBUTE_UNUSED, ArtMethod* method, uint32_t dex_pc, ArtField* field ATTRIBUTE_UNUSED) REQUIRES_SHARED(Locks::mutator_lock_) { // We're not recorded to listen to this kind of event, so complain. LOG(ERROR) << "Unexpected field read event in tracing " << ArtMethod::PrettyMethod(method) << " " << dex_pc; } void Trace::FieldWritten(Thread* thread ATTRIBUTE_UNUSED, Handle this_object ATTRIBUTE_UNUSED, ArtMethod* method, uint32_t dex_pc, ArtField* field ATTRIBUTE_UNUSED, const JValue& field_value ATTRIBUTE_UNUSED) REQUIRES_SHARED(Locks::mutator_lock_) { // We're not recorded to listen to this kind of event, so complain. LOG(ERROR) << "Unexpected field write event in tracing " << ArtMethod::PrettyMethod(method) << " " << dex_pc; } void Trace::MethodEntered(Thread* thread, ArtMethod* method) { uint32_t thread_clock_diff = 0; uint64_t timestamp_counter = 0; ReadClocks(thread, &thread_clock_diff, ×tamp_counter); LogMethodTraceEvent(thread, method, kTraceMethodEnter, thread_clock_diff, timestamp_counter); } void Trace::MethodExited(Thread* thread, ArtMethod* method, instrumentation::OptionalFrame frame ATTRIBUTE_UNUSED, JValue& return_value ATTRIBUTE_UNUSED) { uint32_t thread_clock_diff = 0; uint64_t timestamp_counter = 0; ReadClocks(thread, &thread_clock_diff, ×tamp_counter); LogMethodTraceEvent(thread, method, kTraceMethodExit, thread_clock_diff, timestamp_counter); } void Trace::MethodUnwind(Thread* thread, ArtMethod* method, uint32_t dex_pc ATTRIBUTE_UNUSED) { uint32_t thread_clock_diff = 0; uint64_t timestamp_counter = 0; ReadClocks(thread, &thread_clock_diff, ×tamp_counter); LogMethodTraceEvent(thread, method, kTraceUnroll, thread_clock_diff, timestamp_counter); } void Trace::ExceptionThrown(Thread* thread ATTRIBUTE_UNUSED, Handle exception_object ATTRIBUTE_UNUSED) REQUIRES_SHARED(Locks::mutator_lock_) { LOG(ERROR) << "Unexpected exception thrown event in tracing"; } void Trace::ExceptionHandled(Thread* thread ATTRIBUTE_UNUSED, Handle exception_object ATTRIBUTE_UNUSED) REQUIRES_SHARED(Locks::mutator_lock_) { LOG(ERROR) << "Unexpected exception thrown event in tracing"; } void Trace::Branch(Thread* /*thread*/, ArtMethod* method, uint32_t /*dex_pc*/, int32_t /*dex_pc_offset*/) REQUIRES_SHARED(Locks::mutator_lock_) { LOG(ERROR) << "Unexpected branch event in tracing" << ArtMethod::PrettyMethod(method); } void Trace::WatchedFramePop(Thread* self ATTRIBUTE_UNUSED, const ShadowFrame& frame ATTRIBUTE_UNUSED) { LOG(ERROR) << "Unexpected WatchedFramePop event in tracing"; } void Trace::ReadClocks(Thread* thread, uint32_t* thread_clock_diff, uint64_t* timestamp_counter) { if (UseThreadCpuClock()) { uint64_t clock_base = thread->GetTraceClockBase(); if (UNLIKELY(clock_base == 0)) { // First event, record the base time in the map. uint64_t time = thread->GetCpuMicroTime(); thread->SetTraceClockBase(time); } else { *thread_clock_diff = thread->GetCpuMicroTime() - clock_base; } } if (UseWallClock()) { *timestamp_counter = GetTimestamp(); } } std::string Trace::GetMethodLine(ArtMethod* method, uint32_t method_index) { method = method->GetInterfaceMethodIfProxy(kRuntimePointerSize); return StringPrintf("%#x\t%s\t%s\t%s\t%s\n", (method_index << TraceActionBits), PrettyDescriptor(method->GetDeclaringClassDescriptor()).c_str(), method->GetName(), method->GetSignature().ToString().c_str(), method->GetDeclaringClassSourceFile()); } void Trace::RecordStreamingMethodEvent(Thread* thread, ArtMethod* method, TraceAction action, uint32_t thread_clock_diff, uint64_t timestamp_counter) { uintptr_t* method_trace_buffer = thread->GetMethodTraceBuffer(); size_t* current_offset = thread->GetMethodTraceIndexPtr(); // Initialize the buffer lazily. It's just simpler to keep the creation at one place. if (method_trace_buffer == nullptr) { method_trace_buffer = new uintptr_t[std::max(kMinBufSize, kPerThreadBufSize)](); thread->SetMethodTraceBuffer(method_trace_buffer); *current_offset = 0; // This is the first event from this thread, so first record information about the thread. std::string thread_name; thread->GetThreadName(thread_name); static constexpr size_t kThreadNameHeaderSize = 7; uint8_t header[kThreadNameHeaderSize]; Append2LE(header, 0); header[2] = kOpNewThread; // We use only 16 bits to encode thread id. On Android, we don't expect to use more than // 16-bits for a Tid. For 32-bit platforms it is always ensured we use less than 16 bits. // See __check_max_thread_id in bionic for more details. Even on 64-bit the max threads // is currently less than 65536. // TODO(mythria): On host, we know thread ids can be greater than 16 bits. Consider adding // a map similar to method ids. DCHECK(!kIsTargetBuild || thread->GetTid() < (1 << 16)); Append2LE(header + 3, static_cast(thread->GetTid())); Append2LE(header + 5, static_cast(thread_name.length())); { MutexLock mu(Thread::Current(), tracing_lock_); if (!trace_file_->WriteFully(header, kThreadNameHeaderSize) || !trace_file_->WriteFully(reinterpret_cast(thread_name.c_str()), thread_name.length())) { PLOG(WARNING) << "Failed streaming a tracing event."; } } } size_t required_entries = (clock_source_ == TraceClockSource::kDual) ? 4 : 3; if (*current_offset + required_entries >= kPerThreadBufSize) { // We don't have space for further entries. Flush the contents of the buffer and reuse the // buffer to store contents. Reset the index to the start of the buffer. FlushStreamingBuffer(thread); *current_offset = 0; } // Record entry in per-thread trace buffer. int current_index = *current_offset; method_trace_buffer[current_index++] = reinterpret_cast(method); // TODO(mythria): We only need two bits to record the action. Consider merging // it with the method entry to save space. method_trace_buffer[current_index++] = action; if (UseThreadCpuClock()) { method_trace_buffer[current_index++] = thread_clock_diff; } if (UseWallClock()) { if (art::kRuntimePointerSize == PointerSize::k32) { // On 32-bit architectures store timestamp counter as two 32-bit values. method_trace_buffer[current_index++] = timestamp_counter >> 32; method_trace_buffer[current_index++] = static_cast(timestamp_counter); } else { method_trace_buffer[current_index++] = timestamp_counter; } } *current_offset = current_index; } void Trace::WriteToBuf(uint8_t* header, size_t header_size, const std::string& data, size_t* current_index, uint8_t* buffer, size_t buffer_size) { EnsureSpace(buffer, current_index, buffer_size, header_size); memcpy(buffer + *current_index, header, header_size); *current_index += header_size; EnsureSpace(buffer, current_index, buffer_size, data.length()); if (data.length() < buffer_size) { memcpy(buffer + *current_index, reinterpret_cast(data.c_str()), data.length()); *current_index += data.length(); } else { // The data is larger than buffer, so write directly to the file. EnsureSpace should have // flushed any data in the buffer. DCHECK_EQ(*current_index, 0U); if (!trace_file_->WriteFully(reinterpret_cast(data.c_str()), data.length())) { PLOG(WARNING) << "Failed streaming a tracing event."; } } } void Trace::FlushStreamingBuffer(Thread* thread) { // Take a tracing_lock_ to serialize writes across threads. We also need to allocate a unique // method id for each method. We do that by maintaining a map from id to method for each newly // seen method. tracing_lock_ is required to serialize these. MutexLock mu(Thread::Current(), tracing_lock_); uintptr_t* method_trace_buffer = thread->GetMethodTraceBuffer(); // Create a temporary buffer to encode the trace events from the specified thread. size_t buffer_size = kPerThreadBufSize; size_t current_index = 0; std::unique_ptr buffer(new uint8_t[std::max(kMinBufSize, buffer_size)]); size_t num_entries = *(thread->GetMethodTraceIndexPtr()); for (size_t entry_index = 0; entry_index < num_entries;) { ArtMethod* method = reinterpret_cast(method_trace_buffer[entry_index++]); TraceAction action = DecodeTraceAction(method_trace_buffer[entry_index++]); uint32_t thread_time = 0; uint32_t wall_time = 0; if (UseThreadCpuClock()) { thread_time = method_trace_buffer[entry_index++]; } if (UseWallClock()) { uint64_t timestamp = method_trace_buffer[entry_index++]; if (art::kRuntimePointerSize == PointerSize::k32) { // On 32-bit architectures timestamp is stored as two 32-bit values. timestamp = (timestamp << 32 | method_trace_buffer[entry_index++]); } wall_time = GetMicroTime(timestamp) - start_time_; } auto it = art_method_id_map_.find(method); uint32_t method_index = 0; // If we haven't seen this method before record information about the method. if (it == art_method_id_map_.end()) { art_method_id_map_.emplace(method, current_method_index_); method_index = current_method_index_; current_method_index_++; // Write a special block with the name. std::string method_line(GetMethodLine(method, method_index)); static constexpr size_t kMethodNameHeaderSize = 5; uint8_t method_header[kMethodNameHeaderSize]; DCHECK_LT(kMethodNameHeaderSize, kPerThreadBufSize); Append2LE(method_header, 0); method_header[2] = kOpNewMethod; Append2LE(method_header + 3, static_cast(method_line.length())); WriteToBuf(method_header, kMethodNameHeaderSize, method_line, ¤t_index, buffer.get(), buffer_size); } else { method_index = it->second; } const size_t record_size = GetRecordSize(clock_source_); DCHECK_LT(record_size, kPerThreadBufSize); EnsureSpace(buffer.get(), ¤t_index, buffer_size, record_size); EncodeEventEntry( buffer.get() + current_index, thread, method_index, action, thread_time, wall_time); current_index += record_size; } // Flush the contents of buffer to file. if (!trace_file_->WriteFully(buffer.get(), current_index)) { PLOG(WARNING) << "Failed streaming a tracing event."; } } void Trace::RecordMethodEvent(Thread* thread, ArtMethod* method, TraceAction action, uint32_t thread_clock_diff, uint64_t timestamp_counter) { // Advance cur_offset_ atomically. int32_t new_offset; int32_t old_offset = 0; // In the non-streaming case, we do a busy loop here trying to get // an offset to write our record and advance cur_offset_ for the // next use. // Although multiple threads can call this method concurrently, // the compare_exchange_weak here is still atomic (by definition). // A succeeding update is visible to other cores when they pass // through this point. old_offset = cur_offset_.load(std::memory_order_relaxed); // Speculative read do { new_offset = old_offset + GetRecordSize(clock_source_); if (static_cast(new_offset) > buffer_size_) { overflow_ = true; return; } } while (!cur_offset_.compare_exchange_weak(old_offset, new_offset, std::memory_order_relaxed)); // Write data into the tracing buffer (if not streaming) or into a // small buffer on the stack (if streaming) which we'll put into the // tracing buffer below. // // These writes to the tracing buffer are synchronised with the // future reads that (only) occur under FinishTracing(). The callers // of FinishTracing() acquire locks and (implicitly) synchronise // the buffer memory. uint8_t* ptr; ptr = buf_.get() + old_offset; uint32_t wall_clock_diff = GetMicroTime(timestamp_counter) - start_time_; MutexLock mu(Thread::Current(), tracing_lock_); EncodeEventEntry( ptr, thread, EncodeTraceMethod(method), action, thread_clock_diff, wall_clock_diff); } void Trace::LogMethodTraceEvent(Thread* thread, ArtMethod* method, TraceAction action, uint32_t thread_clock_diff, uint64_t timestamp_counter) { // This method is called in both tracing modes (method and sampling). In sampling mode, this // method is only called by the sampling thread. In method tracing mode, it can be called // concurrently. // Ensure we always use the non-obsolete version of the method so that entry/exit events have the // same pointer value. method = method->GetNonObsoleteMethod(); if (trace_output_mode_ == TraceOutputMode::kStreaming) { RecordStreamingMethodEvent(thread, method, action, thread_clock_diff, timestamp_counter); } else { RecordMethodEvent(thread, method, action, thread_clock_diff, timestamp_counter); } } void Trace::EncodeEventEntry(uint8_t* ptr, Thread* thread, uint32_t method_index, TraceAction action, uint32_t thread_clock_diff, uint32_t wall_clock_diff) { static constexpr size_t kPacketSize = 14U; // The maximum size of data in a packet. uint32_t method_value = (method_index << TraceActionBits) | action; Append2LE(ptr, thread->GetTid()); Append4LE(ptr + 2, method_value); ptr += 6; if (UseThreadCpuClock()) { Append4LE(ptr, thread_clock_diff); ptr += 4; } if (UseWallClock()) { Append4LE(ptr, wall_clock_diff); } static_assert(kPacketSize == 2 + 4 + 4 + 4, "Packet size incorrect."); } void Trace::EnsureSpace(uint8_t* buffer, size_t* current_index, size_t buffer_size, size_t required_size) { if (*current_index + required_size < buffer_size) { return; } if (!trace_file_->WriteFully(buffer, *current_index)) { PLOG(WARNING) << "Failed streaming a tracing event."; } *current_index = 0; } void Trace::DumpMethodList(std::ostream& os) { MutexLock mu(Thread::Current(), tracing_lock_); for (auto const& entry : art_method_id_map_) { os << GetMethodLine(entry.first, entry.second); } } void Trace::DumpThreadList(std::ostream& os) { for (const auto& it : threads_list_) { // We use only 16 bits to encode thread id. On Android, we don't expect to use more than // 16-bits for a Tid. For 32-bit platforms it is always ensured we use less than 16 bits. // See __check_max_thread_id in bionic for more details. Even on 64-bit the max threads // is currently less than 65536. // TODO(mythria): On host, we know thread ids can be greater than 16 bits. Consider adding // a map similar to method ids. DCHECK(!kIsTargetBuild || it.first < (1 << 16)); os << static_cast(it.first) << "\t" << it.second << "\n"; } } void Trace::StoreExitingThreadInfo(Thread* thread) { MutexLock mu(thread, *Locks::trace_lock_); if (the_trace_ != nullptr) { the_trace_->UpdateThreadsList(thread); } } Trace::TraceOutputMode Trace::GetOutputMode() { MutexLock mu(Thread::Current(), *Locks::trace_lock_); CHECK(the_trace_ != nullptr) << "Trace output mode requested, but no trace currently running"; return the_trace_->trace_output_mode_; } Trace::TraceMode Trace::GetMode() { MutexLock mu(Thread::Current(), *Locks::trace_lock_); CHECK(the_trace_ != nullptr) << "Trace mode requested, but no trace currently running"; return the_trace_->trace_mode_; } int Trace::GetFlags() { MutexLock mu(Thread::Current(), *Locks::trace_lock_); CHECK(the_trace_ != nullptr) << "Trace flags requested, but no trace currently running"; return the_trace_->flags_; } int Trace::GetIntervalInMillis() { MutexLock mu(Thread::Current(), *Locks::trace_lock_); CHECK(the_trace_ != nullptr) << "Trace interval requested, but no trace currently running"; return the_trace_->interval_us_; } size_t Trace::GetBufferSize() { MutexLock mu(Thread::Current(), *Locks::trace_lock_); CHECK(the_trace_ != nullptr) << "Trace buffer size requested, but no trace currently running"; return the_trace_->buffer_size_; } bool Trace::IsTracingEnabled() { MutexLock mu(Thread::Current(), *Locks::trace_lock_); return the_trace_ != nullptr; } } // namespace art