Commit 4460a84b authored by Hiroshi Yamauchi's avatar Hiroshi Yamauchi
Browse files

Rosalloc thread local allocation path without a cas.

Speedup on N4:
MemAllocTest 3044 -> 2396 (~21% reduction)
BinaryTrees  4101 -> 2929 (~26% reduction)

Bug: 9986565
Change-Id: Ia1d1a37b9e001f903c3c056e8ec68fc8c623a78b
parent 4cfe74cb
......@@ -263,6 +263,8 @@ void CommonRuntimeTest::SetUp() {
// pool is created by the runtime.
runtime_->GetHeap()->CreateThreadPool();
runtime_->GetHeap()->VerifyHeap(); // Check for heap corruption before the test
// Reduce timinig-dependent flakiness in OOME behavior (eg StubTest.AllocObject).
runtime_->GetHeap()->SetMinIntervalHomogeneousSpaceCompactionByOom(0U);
// Get the boot class path from the runtime so it can be used in tests.
boot_class_path_ = class_linker_->GetBootClassPath();
......
......@@ -48,9 +48,9 @@ class ModUnionTableTest : public CommonRuntimeTest {
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
auto* klass = GetObjectArrayClass(self, space);
const size_t size = ComputeArraySize(self, klass, component_count, 2);
size_t bytes_allocated = 0;
size_t bytes_allocated = 0, bytes_tl_bulk_allocated;
auto* obj = down_cast<mirror::ObjectArray<mirror::Object>*>(
space->Alloc(self, size, &bytes_allocated, nullptr));
space->Alloc(self, size, &bytes_allocated, nullptr, &bytes_tl_bulk_allocated));
if (obj != nullptr) {
obj->SetClass(klass);
obj->SetLength(static_cast<int32_t>(component_count));
......@@ -77,9 +77,10 @@ class ModUnionTableTest : public CommonRuntimeTest {
// copy of the class in the same space that we are allocating in.
DCHECK(java_lang_object_array_ != nullptr);
const size_t class_size = java_lang_object_array_->GetClassSize();
size_t bytes_allocated = 0;
size_t bytes_allocated = 0, bytes_tl_bulk_allocated;
auto* klass = down_cast<mirror::Class*>(space->Alloc(self, class_size, &bytes_allocated,
nullptr));
nullptr,
&bytes_tl_bulk_allocated));
DCHECK(klass != nullptr);
memcpy(klass, java_lang_object_array_, class_size);
Runtime::Current()->GetHeap()->GetCardTable()->MarkCard(klass);
......
......@@ -28,15 +28,19 @@ inline ALWAYS_INLINE bool RosAlloc::ShouldCheckZeroMemory() {
}
template<bool kThreadSafe>
inline ALWAYS_INLINE void* RosAlloc::Alloc(Thread* self, size_t size, size_t* bytes_allocated) {
inline ALWAYS_INLINE void* RosAlloc::Alloc(Thread* self, size_t size, size_t* bytes_allocated,
size_t* usable_size,
size_t* bytes_tl_bulk_allocated) {
if (UNLIKELY(size > kLargeSizeThreshold)) {
return AllocLargeObject(self, size, bytes_allocated);
return AllocLargeObject(self, size, bytes_allocated, usable_size,
bytes_tl_bulk_allocated);
}
void* m;
if (kThreadSafe) {
m = AllocFromRun(self, size, bytes_allocated);
m = AllocFromRun(self, size, bytes_allocated, usable_size, bytes_tl_bulk_allocated);
} else {
m = AllocFromRunThreadUnsafe(self, size, bytes_allocated);
m = AllocFromRunThreadUnsafe(self, size, bytes_allocated, usable_size,
bytes_tl_bulk_allocated);
}
// Check if the returned memory is really all zero.
if (ShouldCheckZeroMemory() && m != nullptr) {
......@@ -48,6 +52,115 @@ inline ALWAYS_INLINE void* RosAlloc::Alloc(Thread* self, size_t size, size_t* by
return m;
}
inline bool RosAlloc::Run::IsFull() {
const size_t num_vec = NumberOfBitmapVectors();
for (size_t v = 0; v < num_vec; ++v) {
if (~alloc_bit_map_[v] != 0) {
return false;
}
}
return true;
}
inline bool RosAlloc::CanAllocFromThreadLocalRun(Thread* self, size_t size) {
if (UNLIKELY(!IsSizeForThreadLocal(size))) {
return false;
}
size_t bracket_size;
size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
DCHECK_EQ(idx, SizeToIndex(size));
DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
DCHECK_EQ(bracket_size, bracketSizes[idx]);
DCHECK_LE(size, bracket_size);
DCHECK(size > 512 || bracket_size - size < 16);
DCHECK_LT(idx, kNumThreadLocalSizeBrackets);
Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx));
if (kIsDebugBuild) {
// Need the lock to prevent race conditions.
MutexLock mu(self, *size_bracket_locks_[idx]);
CHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
CHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
}
DCHECK(thread_local_run != nullptr);
DCHECK(thread_local_run->IsThreadLocal() || thread_local_run == dedicated_full_run_);
return !thread_local_run->IsFull();
}
inline void* RosAlloc::AllocFromThreadLocalRun(Thread* self, size_t size,
size_t* bytes_allocated) {
DCHECK(bytes_allocated != nullptr);
if (UNLIKELY(!IsSizeForThreadLocal(size))) {
return nullptr;
}
size_t bracket_size;
size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx));
if (kIsDebugBuild) {
// Need the lock to prevent race conditions.
MutexLock mu(self, *size_bracket_locks_[idx]);
CHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
CHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
}
DCHECK(thread_local_run != nullptr);
DCHECK(thread_local_run->IsThreadLocal() || thread_local_run == dedicated_full_run_);
void* slot_addr = thread_local_run->AllocSlot();
if (LIKELY(slot_addr != nullptr)) {
*bytes_allocated = bracket_size;
}
return slot_addr;
}
inline size_t RosAlloc::MaxBytesBulkAllocatedFor(size_t size) {
if (UNLIKELY(!IsSizeForThreadLocal(size))) {
return size;
}
size_t bracket_size;
size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
return numOfSlots[idx] * bracket_size;
}
inline void* RosAlloc::Run::AllocSlot() {
const size_t idx = size_bracket_idx_;
while (true) {
if (kIsDebugBuild) {
// Make sure that no slots leaked, the bitmap should be full for all previous vectors.
for (size_t i = 0; i < first_search_vec_idx_; ++i) {
CHECK_EQ(~alloc_bit_map_[i], 0U);
}
}
uint32_t* const alloc_bitmap_ptr = &alloc_bit_map_[first_search_vec_idx_];
uint32_t ffz1 = __builtin_ffs(~*alloc_bitmap_ptr);
if (LIKELY(ffz1 != 0)) {
const uint32_t ffz = ffz1 - 1;
const uint32_t slot_idx = ffz +
first_search_vec_idx_ * sizeof(*alloc_bitmap_ptr) * kBitsPerByte;
const uint32_t mask = 1U << ffz;
DCHECK_LT(slot_idx, numOfSlots[idx]) << "out of range";
// Found an empty slot. Set the bit.
DCHECK_EQ(*alloc_bitmap_ptr & mask, 0U);
*alloc_bitmap_ptr |= mask;
DCHECK_NE(*alloc_bitmap_ptr & mask, 0U);
uint8_t* slot_addr = reinterpret_cast<uint8_t*>(this) +
headerSizes[idx] + slot_idx * bracketSizes[idx];
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex
<< reinterpret_cast<intptr_t>(slot_addr)
<< ", bracket_size=" << std::dec << bracketSizes[idx]
<< ", slot_idx=" << slot_idx;
}
return slot_addr;
}
const size_t num_words = RoundUp(numOfSlots[idx], 32) / 32;
if (first_search_vec_idx_ + 1 >= num_words) {
DCHECK(IsFull());
// Already at the last word, return null.
return nullptr;
}
// Increase the index to the next word and try again.
++first_search_vec_idx_;
}
}
} // namespace allocator
} // namespace gc
} // namespace art
......
......@@ -454,7 +454,10 @@ size_t RosAlloc::FreePages(Thread* self, void* ptr, bool already_zero) {
return byte_size;
}
void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) {
void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated,
size_t* usable_size, size_t* bytes_tl_bulk_allocated) {
DCHECK(bytes_allocated != nullptr);
DCHECK(usable_size != nullptr);
DCHECK_GT(size, kLargeSizeThreshold);
size_t num_pages = RoundUp(size, kPageSize) / kPageSize;
void* r;
......@@ -470,6 +473,8 @@ void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_alloca
}
const size_t total_bytes = num_pages * kPageSize;
*bytes_allocated = total_bytes;
*usable_size = total_bytes;
*bytes_tl_bulk_allocated = total_bytes;
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::AllocLargeObject() : 0x" << std::hex << reinterpret_cast<intptr_t>(r)
<< "-0x" << (reinterpret_cast<intptr_t>(r) + num_pages * kPageSize)
......@@ -622,7 +627,12 @@ inline void* RosAlloc::AllocFromCurrentRunUnlocked(Thread* self, size_t idx) {
return slot_addr;
}
void* RosAlloc::AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated) {
void* RosAlloc::AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated,
size_t* usable_size,
size_t* bytes_tl_bulk_allocated) {
DCHECK(bytes_allocated != nullptr);
DCHECK(usable_size != nullptr);
DCHECK(bytes_tl_bulk_allocated != nullptr);
DCHECK_LE(size, kLargeSizeThreshold);
size_t bracket_size;
size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
......@@ -634,14 +644,19 @@ void* RosAlloc::AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* byte
Locks::mutator_lock_->AssertExclusiveHeld(self);
void* slot_addr = AllocFromCurrentRunUnlocked(self, idx);
if (LIKELY(slot_addr != nullptr)) {
DCHECK(bytes_allocated != nullptr);
*bytes_allocated = bracket_size;
// Caller verifies that it is all 0.
*usable_size = bracket_size;
*bytes_tl_bulk_allocated = bracket_size;
}
// Caller verifies that it is all 0.
return slot_addr;
}
void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) {
void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated,
size_t* usable_size, size_t* bytes_tl_bulk_allocated) {
DCHECK(bytes_allocated != nullptr);
DCHECK(usable_size != nullptr);
DCHECK(bytes_tl_bulk_allocated != nullptr);
DCHECK_LE(size, kLargeSizeThreshold);
size_t bracket_size;
size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
......@@ -712,31 +727,43 @@ void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated)
self->SetRosAllocRun(idx, thread_local_run);
DCHECK(!thread_local_run->IsFull());
}
DCHECK(thread_local_run != nullptr);
DCHECK(!thread_local_run->IsFull());
DCHECK(thread_local_run->IsThreadLocal());
// Account for all the free slots in the new or refreshed thread local run.
*bytes_tl_bulk_allocated = thread_local_run->NumberOfFreeSlots() * bracket_size;
slot_addr = thread_local_run->AllocSlot();
// Must succeed now with a new run.
DCHECK(slot_addr != nullptr);
} else {
// The slot is already counted. Leave it as is.
*bytes_tl_bulk_allocated = 0;
}
DCHECK(slot_addr != nullptr);
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::AllocFromRun() thread-local : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
LOG(INFO) << "RosAlloc::AllocFromRun() thread-local : 0x" << std::hex
<< reinterpret_cast<intptr_t>(slot_addr)
<< "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size)
<< "(" << std::dec << (bracket_size) << ")";
}
*bytes_allocated = bracket_size;
*usable_size = bracket_size;
} else {
// Use the (shared) current run.
MutexLock mu(self, *size_bracket_locks_[idx]);
slot_addr = AllocFromCurrentRunUnlocked(self, idx);
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::AllocFromRun() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
LOG(INFO) << "RosAlloc::AllocFromRun() : 0x" << std::hex
<< reinterpret_cast<intptr_t>(slot_addr)
<< "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size)
<< "(" << std::dec << (bracket_size) << ")";
}
if (LIKELY(slot_addr != nullptr)) {
*bytes_allocated = bracket_size;
*usable_size = bracket_size;
*bytes_tl_bulk_allocated = bracket_size;
}
}
DCHECK(bytes_allocated != nullptr);
*bytes_allocated = bracket_size;
// Caller verifies that it is all 0.
return slot_addr;
}
......@@ -852,44 +879,6 @@ std::string RosAlloc::Run::Dump() {
return stream.str();
}
inline void* RosAlloc::Run::AllocSlot() {
const size_t idx = size_bracket_idx_;
while (true) {
if (kIsDebugBuild) {
// Make sure that no slots leaked, the bitmap should be full for all previous vectors.
for (size_t i = 0; i < first_search_vec_idx_; ++i) {
CHECK_EQ(~alloc_bit_map_[i], 0U);
}
}
uint32_t* const alloc_bitmap_ptr = &alloc_bit_map_[first_search_vec_idx_];
uint32_t ffz1 = __builtin_ffs(~*alloc_bitmap_ptr);
if (LIKELY(ffz1 != 0)) {
const uint32_t ffz = ffz1 - 1;
const uint32_t slot_idx = ffz + first_search_vec_idx_ * sizeof(*alloc_bitmap_ptr) * kBitsPerByte;
const uint32_t mask = 1U << ffz;
DCHECK_LT(slot_idx, numOfSlots[idx]) << "out of range";
// Found an empty slot. Set the bit.
DCHECK_EQ(*alloc_bitmap_ptr & mask, 0U);
*alloc_bitmap_ptr |= mask;
DCHECK_NE(*alloc_bitmap_ptr & mask, 0U);
uint8_t* slot_addr = reinterpret_cast<uint8_t*>(this) + headerSizes[idx] + slot_idx * bracketSizes[idx];
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
<< ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
}
return slot_addr;
}
const size_t num_words = RoundUp(numOfSlots[idx], 32) / 32;
if (first_search_vec_idx_ + 1 >= num_words) {
DCHECK(IsFull());
// Already at the last word, return null.
return nullptr;
}
// Increase the index to the next word and try again.
++first_search_vec_idx_;
}
}
void RosAlloc::Run::FreeSlot(void* ptr) {
DCHECK(!IsThreadLocal());
const uint8_t idx = size_bracket_idx_;
......@@ -920,6 +909,25 @@ void RosAlloc::Run::FreeSlot(void* ptr) {
}
}
size_t RosAlloc::Run::NumberOfFreeSlots() {
size_t num_alloc_slots = 0;
const size_t idx = size_bracket_idx_;
const size_t num_slots = numOfSlots[idx];
const size_t num_vec = RoundUp(num_slots, 32) / 32;
DCHECK_NE(num_vec, 0U);
for (size_t v = 0; v < num_vec - 1; v++) {
num_alloc_slots += POPCOUNT(alloc_bit_map_[v]);
}
// Don't count the invalid bits in the last vector.
uint32_t last_vec_masked = alloc_bit_map_[num_vec - 1] &
~GetBitmapLastVectorMask(num_slots, num_vec);
num_alloc_slots += POPCOUNT(last_vec_masked);
size_t num_free_slots = num_slots - num_alloc_slots;
DCHECK_LE(num_alloc_slots, num_slots);
DCHECK_LE(num_free_slots, num_slots);
return num_free_slots;
}
inline bool RosAlloc::Run::MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out) {
DCHECK(IsThreadLocal());
// Free slots in the alloc bit map based on the thread local free bit map.
......@@ -1055,16 +1063,6 @@ inline bool RosAlloc::Run::IsAllFree() {
return alloc_bit_map_[num_vec - 1] == GetBitmapLastVectorMask(num_slots, num_vec);
}
inline bool RosAlloc::Run::IsFull() {
const size_t num_vec = NumberOfBitmapVectors();
for (size_t v = 0; v < num_vec; ++v) {
if (~alloc_bit_map_[v] != 0) {
return false;
}
}
return true;
}
inline bool RosAlloc::Run::IsBulkFreeBitmapClean() {
const size_t num_vec = NumberOfBitmapVectors();
for (size_t v = 0; v < num_vec; v++) {
......@@ -1654,10 +1652,11 @@ void RosAlloc::SetFootprintLimit(size_t new_capacity) {
}
}
void RosAlloc::RevokeThreadLocalRuns(Thread* thread) {
size_t RosAlloc::RevokeThreadLocalRuns(Thread* thread) {
Thread* self = Thread::Current();
// Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
ReaderMutexLock wmu(self, bulk_free_lock_);
size_t free_bytes = 0U;
for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) {
MutexLock mu(self, *size_bracket_locks_[idx]);
Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx));
......@@ -1665,9 +1664,12 @@ void RosAlloc::RevokeThreadLocalRuns(Thread* thread) {
// Invalid means already revoked.
DCHECK(thread_local_run->IsThreadLocal());
if (thread_local_run != dedicated_full_run_) {
// Note the thread local run may not be full here.
thread->SetRosAllocRun(idx, dedicated_full_run_);
DCHECK_EQ(thread_local_run->magic_num_, kMagicNum);
// Note the thread local run may not be full here.
// Count the number of free slots left.
size_t num_free_slots = thread_local_run->NumberOfFreeSlots();
free_bytes += num_free_slots * bracketSizes[idx];
bool dont_care;
thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&dont_care);
thread_local_run->SetIsThreadLocal(false);
......@@ -1677,6 +1679,7 @@ void RosAlloc::RevokeThreadLocalRuns(Thread* thread) {
RevokeRun(self, idx, thread_local_run);
}
}
return free_bytes;
}
void RosAlloc::RevokeRun(Thread* self, size_t idx, Run* run) {
......@@ -1719,16 +1722,18 @@ void RosAlloc::RevokeThreadUnsafeCurrentRuns() {
}
}
void RosAlloc::RevokeAllThreadLocalRuns() {
size_t RosAlloc::RevokeAllThreadLocalRuns() {
// This is called when a mutator thread won't allocate such as at
// the Zygote creation time or during the GC pause.
MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_);
MutexLock mu2(Thread::Current(), *Locks::thread_list_lock_);
std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
size_t free_bytes = 0U;
for (Thread* thread : thread_list) {
RevokeThreadLocalRuns(thread);
free_bytes += RevokeThreadLocalRuns(thread);
}
RevokeThreadUnsafeCurrentRuns();
return free_bytes;
}
void RosAlloc::AssertThreadLocalRunsAreRevoked(Thread* thread) {
......
......@@ -230,8 +230,10 @@ class RosAlloc {
static uint32_t GetBitmapLastVectorMask(size_t num_slots, size_t num_vec);
// Returns true if all the slots in the run are not in use.
bool IsAllFree();
// Returns the number of free slots.
size_t NumberOfFreeSlots();
// Returns true if all the slots in the run are in use.
bool IsFull();
ALWAYS_INLINE bool IsFull();
// Returns true if the bulk free bit map is clean.
bool IsBulkFreeBitmapClean();
// Returns true if the thread local free bit map is clean.
......@@ -309,6 +311,15 @@ class RosAlloc {
DCHECK(bracketSizes[idx] == size);
return idx;
}
// Returns true if the given allocation size is for a thread local allocation.
static bool IsSizeForThreadLocal(size_t size) {
DCHECK_GT(kNumThreadLocalSizeBrackets, 0U);
size_t max_thread_local_bracket_idx = kNumThreadLocalSizeBrackets - 1;
bool is_size_for_thread_local = size <= bracketSizes[max_thread_local_bracket_idx];
DCHECK(size > kLargeSizeThreshold ||
(is_size_for_thread_local == (SizeToIndex(size) < kNumThreadLocalSizeBrackets)));
return is_size_for_thread_local;
}
// Rounds up the size up the nearest bracket size.
static size_t RoundToBracketSize(size_t size) {
DCHECK(size <= kLargeSizeThreshold);
......@@ -504,11 +515,13 @@ class RosAlloc {
size_t FreePages(Thread* self, void* ptr, bool already_zero) EXCLUSIVE_LOCKS_REQUIRED(lock_);
// Allocate/free a run slot.
void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated)
void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size,
size_t* bytes_tl_bulk_allocated)
LOCKS_EXCLUDED(lock_);
// Allocate/free a run slot without acquiring locks.
// TODO: EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
void* AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated)
void* AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated,
size_t* usable_size, size_t* bytes_tl_bulk_allocated)
LOCKS_EXCLUDED(lock_);
void* AllocFromCurrentRunUnlocked(Thread* self, size_t idx);
......@@ -527,7 +540,9 @@ class RosAlloc {
size_t FreeInternal(Thread* self, void* ptr) LOCKS_EXCLUDED(lock_);
// Allocates large objects.
void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated,
size_t* usable_size, size_t* bytes_tl_bulk_allocated)
LOCKS_EXCLUDED(lock_);
// Revoke a run by adding it to non_full_runs_ or freeing the pages.
void RevokeRun(Thread* self, size_t idx, Run* run);
......@@ -551,13 +566,26 @@ class RosAlloc {
// If kThreadUnsafe is true then the allocator may avoid acquiring some locks as an optimization.
// If used, this may cause race conditions if multiple threads are allocating at the same time.
template<bool kThreadSafe = true>
void* Alloc(Thread* self, size_t size, size_t* bytes_allocated)
void* Alloc(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size,
size_t* bytes_tl_bulk_allocated)
LOCKS_EXCLUDED(lock_);
size_t Free(Thread* self, void* ptr)
LOCKS_EXCLUDED(bulk_free_lock_);
size_t BulkFree(Thread* self, void** ptrs, size_t num_ptrs)
LOCKS_EXCLUDED(bulk_free_lock_);
// Returns true if the given allocation request can be allocated in
// an existing thread local run without allocating a new run.
ALWAYS_INLINE bool CanAllocFromThreadLocalRun(Thread* self, size_t size);
// Allocate the given allocation request in an existing thread local
// run without allocating a new run.
ALWAYS_INLINE void* AllocFromThreadLocalRun(Thread* self, size_t size, size_t* bytes_allocated);
// Returns the maximum bytes that could be allocated for the given
// size in bulk, that is the maximum value for the
// bytes_allocated_bulk out param returned by RosAlloc::Alloc().
ALWAYS_INLINE size_t MaxBytesBulkAllocatedFor(size_t size);
// Returns the size of the allocated slot for a given allocated memory chunk.
size_t UsableSize(const void* ptr);
// Returns the size of the allocated slot for a given size.
......@@ -586,9 +614,13 @@ class RosAlloc {
void SetFootprintLimit(size_t bytes) LOCKS_EXCLUDED(lock_);
// Releases the thread-local runs assigned to the given thread back to the common set of runs.
void RevokeThreadLocalRuns(Thread* thread);
// Returns the total bytes of free slots in the revoked thread local runs. This is to be
// subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting.
size_t RevokeThreadLocalRuns(Thread* thread);
// Releases the thread-local runs assigned to all the threads back to the common set of runs.
void RevokeAllThreadLocalRuns() LOCKS_EXCLUDED(Locks::thread_list_lock_);
// Returns the total bytes of free slots in the revoked thread local runs. This is to be
// subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting.
size_t RevokeAllThreadLocalRuns() LOCKS_EXCLUDED(Locks::thread_list_lock_);
// Assert the thread local runs of a thread are revoked.
void AssertThreadLocalRunsAreRevoked(Thread* thread);
// Assert all the thread local runs are revoked.
......
......@@ -1259,8 +1259,9 @@ mirror::Object* ConcurrentCopying::Copy(mirror::Object* from_ref) {
size_t region_space_bytes_allocated = 0U;
size_t non_moving_space_bytes_allocated = 0U;
size_t bytes_allocated = 0U;
size_t dummy;
mirror::Object* to_ref = region_space_->AllocNonvirtual<true>(
region_space_alloc_size, &region_space_bytes_allocated, nullptr);
region_space_alloc_size, &region_space_bytes_allocated, nullptr, &dummy);
bytes_allocated = region_space_bytes_allocated;
if (to_ref != nullptr) {
DCHECK_EQ(region_space_alloc_size, region_space_bytes_allocated);
......@@ -1286,7 +1287,7 @@ mirror::Object* ConcurrentCopying::Copy(mirror::Object* from_ref) {
}
fall_back_to_non_moving = true;
to_ref = heap_->non_moving_space_->Alloc(Thread::Current(), obj_size,
&non_moving_space_bytes_allocated, nullptr);
&non_moving_space_bytes_allocated, nullptr, &dummy);
CHECK(to_ref != nullptr) << "Fall-back non-moving space allocation failed";
bytes_allocated = non_moving_space_bytes_allocated;
// Mark it in the mark bitmap.
......
......@@ -48,6 +48,7 @@ void Iteration::Reset(GcCause gc_cause, bool clear_soft_references) {
gc_cause_ = gc_cause;
freed_ = ObjectBytePair();
freed_los_ = ObjectBytePair();
freed_bytes_revoke_ = 0;
}
uint64_t Iteration::GetEstimatedThroughput() const {
......
......@@ -75,6 +75,12 @@ class Iteration {
uint64_t GetFreedLargeObjects() const {
return freed_los_.objects;
}
uint64_t GetFreedRevokeBytes() const {
return freed_bytes_revoke_;
}
void SetFreedRevoke(uint64_t freed) {
freed_bytes_revoke_ = freed;
}
void Reset(GcCause gc_cause, bool clear_soft_references);
// Returns the estimated throughput of the iteration.
uint64_t GetEstimatedThroughput() const;
......@@ -99,6 +105,7 @@ class Iteration {
TimingLogger timings_;
ObjectBytePair freed_;
ObjectBytePair freed_los_;
uint64_t freed_bytes_revoke_; // see Heap::num_bytes_freed_revoke_.
std::vector<uint64_t> pause_times_;
friend class GarbageCollector;
......
......@@ -292,6 +292,7 @@ void MarkSweep::ReclaimPhase() {
Runtime::Current()->AllowNewSystemWeaks();
{
WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
GetHeap()->RecordFreeRevoke();
// Reclaim unmarked objects.
Sweep(false);
// Swap the live and mark bitmaps for each space which we modified space. This is an
......
......@@ -242,6 +242,7 @@ void SemiSpace::MarkingPhase() {
// Revoke buffers before measuring how many objects were moved since the TLABs need to be revoked
// before they are properly counted.
RevokeAllThreadLocalBuffers();
GetHeap()->RecordFreeRevoke(); // this is for the non-moving rosalloc space used by GSS.
// Record freed memory.
const int64_t from_bytes = from_space_->GetBytesAllocated();
const int64_t to_bytes = bytes_moved_;
......@@ -489,17 +490,18 @@ static inline size_t CopyAvoidingDirtyingPages(void* dest, const void* src, size
mirror::Object* SemiSpace::MarkNonForwardedObject(mirror::Object* obj) {
const size_t object_size = obj->SizeOf();
size_t bytes_allocated;
size_t bytes_allocated, dummy;
mirror::Object* forward_address = nullptr;
if (generational_ && reinterpret_cast<uint8_t*>(obj) < last_gc_to_space_end_) {
// If it's allocated before the last GC (older), move
// (pseudo-promote) it to the main free list space (as sort
// of an old generation.)
forward_address = promo_dest_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated,
nullptr);
nullptr, &dummy);
if (UNLIKELY(forward_address == nullptr)) {
// If out of space, fall back to the to-space.
forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr);
forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr,
&dummy);
// No logic for marking the bitmap, so it must be null.
DCHECK(to_space_live_bitmap_ == nullptr);
} else {
......@@ -544,7 +546,8 @@ mirror::Object* SemiSpace::MarkNonForwardedObject(mirror::Object* obj) {
}
} else {
// If it's allocated after the last GC (younger), copy it to the to-space.
forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr);
forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr,
&dummy);
if (forward_address != nullptr && to_space_live_bitmap_ != nullptr) {
to_space_live_bitmap_->Set(forward_address);
}
......@@ -552,7 +555,7 @@ mirror::Object* SemiSpace::MarkNonForwardedObject(mirror::Object* obj) {
// If it's still null, attempt to use the fallback space.
if (UNLIKELY(forward_address == nullptr)) {
forward_address = fallback_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated,
nullptr);
nullptr, &dummy);
CHECK(forward_address != nullptr) << "Out of memory in the to-space and fallback space.";
accounting::ContinuousSpaceBitmap* bitmap = fallback_space_->GetLiveBitmap();
if (bitmap != nullptr) {
......
......@@ -64,6 +64,7 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Clas
// fragmentation.
}
AllocationTimer alloc_timer(this, &obj);
// bytes allocated for the (individual) object.
size_t bytes_allocated;
size_t usable_size;
size_t new_num_bytes_allocated = 0;
......@@ -86,13 +87,29 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Clas
usable_size = bytes_allocated;
pre_fence_visitor(obj, usable_size);
QuasiAtomic::ThreadFenceForConstructor();
} else if (!kInstrumented && allocator == kAllocatorTypeRosAlloc &&
(obj = rosalloc_space_->AllocThreadLocal(self, byte_count, &bytes_allocated)) &&
LIKELY(obj != nullptr)) {
DCHECK(!running_on_valgrind_);
obj->SetClass(klass);
if (kUseBakerOrBrooksReadBarrier) {
if (kUseBrooksReadBarrier) {
obj->SetReadBarrierPointer(obj);
}
obj->AssertReadBarrierPointer();
}
usable_size = bytes_allocated;
pre_fence_visitor(obj, usable_size);
QuasiAtomic::ThreadFenceForConstructor();
} else {
// bytes allocated that takes bulk thread-local buffer allocations into account.
size_t bytes_tl_bulk_allocated = 0;
obj = TryToAllocate<kInstrumented, false>(self, allocator, byte_count, &bytes_allocated,
&usable_size);
&usable_size, &bytes_tl_bulk_allocated);
if (UNLIKELY(obj == nullptr)) {
bool is_current_allocator = allocator == GetCurrentAllocator();
obj = AllocateInternalWithGc(self, allocator, byte_count, &bytes_allocated, &usable_size,
&klass);
&bytes_tl_bulk_allocated, &klass);
if (obj == nullptr) {
bool after_is_current_allocator = allocator == GetCurrentAllocator();
// If there is a pending exception, fail the allocation right away since the next one
......@@ -126,9 +143,9 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Clas
WriteBarrierField(obj, mirror::Object::ClassOffset(), klass);
}
pre_fence_visitor(obj, usable_size);
new_num_bytes_allocated =
static_cast<size_t>(num_bytes_allocated_.FetchAndAddSequentiallyConsistent(bytes_allocated))
+ bytes_allocated;
new_num_bytes_allocated = static_cast<size_t>(
num_bytes_allocated_.FetchAndAddSequentiallyConsistent(bytes_tl_bulk_allocated))
+ bytes_tl_bulk_allocated;
}
if (kIsDebugBuild && Runtime::Current()->IsStarted()) {
CHECK_LE(obj->SizeOf(), usable_size);
......@@ -196,8 +213,10 @@ inline mirror::Object* Heap::AllocLargeObject(Thread* self, mirror::Class** klas
template <const bool kInstrumented, const bool kGrow>
inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator_type,
size_t alloc_size, size_t* bytes_allocated,
size_t* usable_size) {
size_t* usable_size,
size_t* bytes_tl_bulk_allocated) {
if (allocator_type != kAllocatorTypeTLAB && allocator_type != kAllocatorTypeRegionTLAB &&
allocator_type != kAllocatorTypeRosAlloc &&
UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) {
return nullptr;
}
......@@ -210,35 +229,56 @@ inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator
if (LIKELY(ret != nullptr)) {
*bytes_allocated = alloc_size;
*usable_size = alloc_size;
*bytes_tl_bulk_allocated = alloc_size;
}
break;
}
case kAllocatorTypeRosAlloc: {
if (kInstrumented && UNLIKELY(running_on_valgrind_)) {
// If running on valgrind, we should be using the instrumented path.
ret = rosalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size);
size_t max_bytes_tl_bulk_allocated = rosalloc_space_->MaxBytesBulkAllocatedFor(alloc_size);
if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type,
max_bytes_tl_bulk_allocated))) {
return nullptr;
}
ret = rosalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
bytes_tl_bulk_allocated);
} else {
DCHECK(!running_on_valgrind_);
ret = rosalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size);
size_t max_bytes_tl_bulk_allocated =
rosalloc_space_->MaxBytesBulkAllocatedForNonvirtual(alloc_size);
if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type,
max_bytes_tl_bulk_allocated))) {
return nullptr;
}
if (!kInstrumented) {
DCHECK(!rosalloc_space_->CanAllocThreadLocal(self, alloc_size));
}
ret = rosalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size,
bytes_tl_bulk_allocated);
}
break;
}
case kAllocatorTypeDlMalloc: {
if (kInstrumented && UNLIKELY(running_on_valgrind_)) {
// If running on valgrind, we should be using the instrumented path.
ret = dlmalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size);
ret = dlmalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
bytes_tl_bulk_allocated);
} else {
DCHECK(!running_on_valgrind_);
ret = dlmalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size);
ret = dlmalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size,
bytes_tl_bulk_allocated);
}
break;
}
case kAllocatorTypeNonMoving: {
ret = non_moving_space_->Alloc(self, alloc_size, bytes_allocated, usable_size);
ret = non_moving_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
bytes_tl_bulk_allocated);
break;
}
case kAllocatorTypeLOS: {
ret = large_object_space_->Alloc(self, alloc_size, bytes_allocated, usable_size);
ret = large_object_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
bytes_tl_bulk_allocated);
// Note that the bump pointer spaces aren't necessarily next to
// the other continuous spaces like the non-moving alloc space or
// the zygote space.
......@@ -257,20 +297,22 @@ inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator
if (!bump_pointer_space_->AllocNewTlab(self, new_tlab_size)) {
return nullptr;
}
*bytes_allocated = new_tlab_size;
*bytes_tl_bulk_allocated = new_tlab_size;
} else {
*bytes_allocated = 0;
*bytes_tl_bulk_allocated = 0;
}
// The allocation can't fail.
ret = self->AllocTlab(alloc_size);
DCHECK(ret != nullptr);
*bytes_allocated = alloc_size;
*usable_size = alloc_size;
break;
}
case kAllocatorTypeRegion: {
DCHECK(region_space_ != nullptr);
alloc_size = RoundUp(alloc_size, space::RegionSpace::kAlignment);
ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size);
ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size,
bytes_tl_bulk_allocated);
break;
}
case kAllocatorTypeRegionTLAB: {
......@@ -283,15 +325,17 @@ inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator
// Try to allocate a tlab.
if (!region_space_->AllocNewTlab(self)) {
// Failed to allocate a tlab. Try non-tlab.
ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size);
ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size,
bytes_tl_bulk_allocated);
return ret;
}
*bytes_allocated = space::RegionSpace::kRegionSize;
*bytes_tl_bulk_allocated = space::RegionSpace::kRegionSize;
// Fall-through.
} else {
// Check OOME for a non-tlab allocation.
if (!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size)) {
ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size);
ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size,
bytes_tl_bulk_allocated);
return ret;
} else {
// Neither tlab or non-tlab works. Give up.
......@@ -301,18 +345,20 @@ inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator
} else {
// Large. Check OOME.
if (LIKELY(!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) {
ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size);
ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size,
bytes_tl_bulk_allocated);
return ret;
} else {
return nullptr;
}
}
} else {
*bytes_allocated = 0;
*bytes_tl_bulk_allocated = 0; // Allocated in an existing buffer.
}
// The allocation can't fail.
ret = self->AllocTlab(alloc_size);
DCHECK(ret != nullptr);
*bytes_allocated = alloc_size;
*usable_size = alloc_size;
break;
}
......
......@@ -156,6 +156,7 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max
total_objects_freed_ever_(0),
num_bytes_allocated_(0),
native_bytes_allocated_(0),
num_bytes_freed_revoke_(0),
verify_missing_card_marks_(false),
verify_system_weaks_(false),
verify_pre_gc_heap_(verify_pre_gc_heap),
......@@ -1344,6 +1345,19 @@ void Heap::RecordFree(uint64_t freed_objects, int64_t freed_bytes) {
}
}
void Heap::RecordFreeRevoke() {
// Subtract num_bytes_freed_revoke_ from num_bytes_allocated_ to cancel out the
// the ahead-of-time, bulk counting of bytes allocated in rosalloc thread-local buffers.
// If there's a concurrent revoke, ok to not necessarily reset num_bytes_freed_revoke_
// all the way to zero exactly as the remainder will be subtracted at the next GC.
size_t bytes_freed = num_bytes_freed_revoke_.LoadSequentiallyConsistent();
CHECK_GE(num_bytes_freed_revoke_.FetchAndSubSequentiallyConsistent(bytes_freed),
bytes_freed) << "num_bytes_freed_revoke_ underflow";
CHECK_GE(num_bytes_allocated_.FetchAndSubSequentiallyConsistent(bytes_freed),
bytes_freed) << "num_bytes_allocated_ underflow";
GetCurrentGcIteration()->SetFreedRevoke(bytes_freed);
}
space::RosAllocSpace* Heap::GetRosAllocSpace(gc::allocator::RosAlloc* rosalloc) const {
for (const auto& space : continuous_spaces_) {
if (space->AsContinuousSpace()->IsRosAllocSpace()) {
......@@ -1358,6 +1372,7 @@ space::RosAllocSpace* Heap::GetRosAllocSpace(gc::allocator::RosAlloc* rosalloc)
mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocator,
size_t alloc_size, size_t* bytes_allocated,
size_t* usable_size,
size_t* bytes_tl_bulk_allocated,
mirror::Class** klass) {
bool was_default_allocator = allocator == GetCurrentAllocator();
// Make sure there is no pending exception since we may need to throw an OOME.
......@@ -1377,7 +1392,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat
}
// A GC was in progress and we blocked, retry allocation now that memory has been freed.
mirror::Object* ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated,
usable_size);
usable_size, bytes_tl_bulk_allocated);
if (ptr != nullptr) {
return ptr;
}
......@@ -1391,7 +1406,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat
}
if (gc_ran) {
mirror::Object* ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated,
usable_size);
usable_size, bytes_tl_bulk_allocated);
if (ptr != nullptr) {
return ptr;
}
......@@ -1411,7 +1426,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat
if (plan_gc_ran) {
// Did we free sufficient memory for the allocation to succeed?
mirror::Object* ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated,
usable_size);
usable_size, bytes_tl_bulk_allocated);
if (ptr != nullptr) {
return ptr;
}
......@@ -1420,7 +1435,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat
// Allocations have failed after GCs; this is an exceptional state.
// Try harder, growing the heap if necessary.
mirror::Object* ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated,
usable_size);
usable_size, bytes_tl_bulk_allocated);
if (ptr != nullptr) {
return ptr;
}
......@@ -1437,7 +1452,8 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat
if (was_default_allocator && allocator != GetCurrentAllocator()) {
return nullptr;
}
ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, usable_size);
ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, usable_size,
bytes_tl_bulk_allocated);
if (ptr == nullptr) {
const uint64_t current_time = NanoTime();
switch (allocator) {
......@@ -1453,7 +1469,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat
case HomogeneousSpaceCompactResult::kSuccess:
// If the allocation succeeded, we delayed an oom.
ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated,
usable_size);
usable_size, bytes_tl_bulk_allocated);
if (ptr != nullptr) {
count_delayed_oom_++;
}
......@@ -1498,7 +1514,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat
} else {
LOG(WARNING) << "Disabled moving GC due to the non moving space being full";
ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated,
usable_size);
usable_size, bytes_tl_bulk_allocated);
}
}
break;
......@@ -1984,8 +2000,8 @@ class ZygoteCompactingCollector FINAL : public collector::SemiSpace {
if (it == bins_.end()) {
// No available space in the bins, place it in the target space instead (grows the zygote
// space).
size_t bytes_allocated;
forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated, nullptr);
size_t bytes_allocated, dummy;
forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated, nullptr, &dummy);
if (to_space_live_bitmap_ != nullptr) {
to_space_live_bitmap_->Set(forward_address);
} else {
......@@ -3084,7 +3100,8 @@ void Heap::GrowForUtilization(collector::GarbageCollector* collector_ran,
SetIdealFootprint(target_size);
if (IsGcConcurrent()) {
const uint64_t freed_bytes = current_gc_iteration_.GetFreedBytes() +
current_gc_iteration_.GetFreedLargeObjectBytes();
current_gc_iteration_.GetFreedLargeObjectBytes() +
current_gc_iteration_.GetFreedRevokeBytes();
// Bytes allocated will shrink by freed_bytes after the GC runs, so if we want to figure out
// how many bytes were allocated during the GC we need to add freed_bytes back on.
CHECK_GE(bytes_allocated + freed_bytes, bytes_allocated_before_gc);
......@@ -3290,31 +3307,43 @@ void Heap::RequestTrim(Thread* self) {
void Heap::RevokeThreadLocalBuffers(Thread* thread) {
if (rosalloc_space_ != nullptr) {
rosalloc_space_->RevokeThreadLocalBuffers(thread);
size_t freed_bytes_revoke = rosalloc_space_->RevokeThreadLocalBuffers(thread);
if (freed_bytes_revoke > 0U) {
num_bytes_freed_revoke_.FetchAndAddSequentiallyConsistent(freed_bytes_revoke);
CHECK_GE(num_bytes_allocated_.LoadRelaxed(), num_bytes_freed_revoke_.LoadRelaxed());
}
}
if (bump_pointer_space_ != nullptr) {
bump_pointer_space_->RevokeThreadLocalBuffers(thread);
CHECK_EQ(bump_pointer_space_->RevokeThreadLocalBuffers(thread), 0U);
}
if (region_space_ != nullptr) {
region_space_->RevokeThreadLocalBuffers(thread);
CHECK_EQ(region_space_->RevokeThreadLocalBuffers(thread), 0U);
}
}
void Heap::RevokeRosAllocThreadLocalBuffers(Thread* thread) {
if (rosalloc_space_ != nullptr) {
rosalloc_space_->RevokeThreadLocalBuffers(thread);
size_t freed_bytes_revoke = rosalloc_space_->RevokeThreadLocalBuffers(thread);
if (freed_bytes_revoke > 0U) {
num_bytes_freed_revoke_.FetchAndAddSequentiallyConsistent(freed_bytes_revoke);
CHECK_GE(num_bytes_allocated_.LoadRelaxed(), num_bytes_freed_revoke_.LoadRelaxed());
}
}
}
void Heap::RevokeAllThreadLocalBuffers() {
if (rosalloc_space_ != nullptr) {
rosalloc_space_->RevokeAllThreadLocalBuffers();
size_t freed_bytes_revoke = rosalloc_space_->RevokeAllThreadLocalBuffers();
if (freed_bytes_revoke > 0U) {
num_bytes_freed_revoke_.FetchAndAddSequentiallyConsistent(freed_bytes_revoke);
CHECK_GE(num_bytes_allocated_.LoadRelaxed(), num_bytes_freed_revoke_.LoadRelaxed());
}
}
if (bump_pointer_space_ != nullptr) {
bump_pointer_space_->RevokeAllThreadLocalBuffers();
CHECK_EQ(bump_pointer_space_->RevokeAllThreadLocalBuffers(), 0U);
}
if (region_space_ != nullptr) {
region_space_->RevokeAllThreadLocalBuffers();
CHECK_EQ(region_space_->RevokeAllThreadLocalBuffers(), 0U);
}
}
......
......@@ -390,6 +390,9 @@ class Heap {
// free-list backed space.
void RecordFree(uint64_t freed_objects, int64_t freed_bytes);
// Record the bytes freed by thread-local buffer revoke.
void RecordFreeRevoke();
// Must be called if a field of an Object in the heap changes, and before any GC safe-point.
// The call is not needed if NULL is stored in the field.
ALWAYS_INLINE void WriteBarrierField(const mirror::Object* dst, MemberOffset /*offset*/,
......@@ -664,6 +667,11 @@ class Heap {
// Whether or not we may use a garbage collector, used so that we only create collectors we need.
bool MayUseCollector(CollectorType type) const;
// Used by tests to reduce timinig-dependent flakiness in OOME behavior.
void SetMinIntervalHomogeneousSpaceCompactionByOom(uint64_t interval) {
min_interval_homogeneous_space_compaction_by_oom_ = interval;
}
private:
class ConcurrentGCTask;
class CollectorTransitionTask;
......@@ -724,6 +732,7 @@ class Heap {
// an initial allocation attempt failed.
mirror::Object* AllocateInternalWithGc(Thread* self, AllocatorType allocator, size_t num_bytes,
size_t* bytes_allocated, size_t* usable_size,
size_t* bytes_tl_bulk_allocated,
mirror::Class** klass)
LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
......@@ -742,7 +751,8 @@ class Heap {
template <const bool kInstrumented, const bool kGrow>
ALWAYS_INLINE mirror::Object* TryToAllocate(Thread* self, AllocatorType allocator_type,
size_t alloc_size, size_t* bytes_allocated,
size_t* usable_size)
size_t* usable_size,
size_t* bytes_tl_bulk_allocated)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
void ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type)
......@@ -998,6 +1008,13 @@ class Heap {
// Bytes which are allocated and managed by native code but still need to be accounted for.
Atomic<size_t> native_bytes_allocated_;
// Number of bytes freed by thread local buffer revokes. This will
// cancel out the ahead-of-time bulk counting of bytes allocated in
// rosalloc thread-local buffers. It is temporarily accumulated
// here to be subtracted from num_bytes_allocated_ later at the next
// GC.
Atomic<size_t> num_bytes_freed_revoke_;
// Info related to the current or previous GC iteration.
collector::Iteration current_gc_iteration_;
......
......@@ -24,7 +24,8 @@ namespace gc {
namespace space {
inline mirror::Object* BumpPointerSpace::Alloc(Thread*, size_t num_bytes, size_t* bytes_allocated,
size_t* usable_size) {
size_t* usable_size,
size_t* bytes_tl_bulk_allocated) {
num_bytes = RoundUp(num_bytes, kAlignment);
mirror::Object* ret = AllocNonvirtual(num_bytes);
if (LIKELY(ret != nullptr)) {
......@@ -32,13 +33,15 @@ inline mirror::Object* BumpPointerSpace::Alloc(Thread*, size_t num_bytes, size_t
if (usable_size != nullptr) {
*usable_size = num_bytes;
}
*bytes_tl_bulk_allocated = num_bytes;
}
return ret;
}
inline mirror::Object* BumpPointerSpace::AllocThreadUnsafe(Thread* self, size_t num_bytes,
size_t* bytes_allocated,
size_t* usable_size) {
size_t* usable_size,
size_t* bytes_tl_bulk_allocated) {
Locks::mutator_lock_->AssertExclusiveHeld(self);
num_bytes = RoundUp(num_bytes, kAlignment);
uint8_t* end = end_.LoadRelaxed();
......@@ -54,6 +57,7 @@ inline mirror::Object* BumpPointerSpace::AllocThreadUnsafe(Thread* self, size_t
if (UNLIKELY(usable_size != nullptr)) {
*usable_size = num_bytes;
}
*bytes_tl_bulk_allocated = num_bytes;
return obj;
}
......
......@@ -93,12 +93,13 @@ mirror::Object* BumpPointerSpace::GetNextObject(mirror::Object* obj) {
return reinterpret_cast<mirror::Object*>(RoundUp(position, kAlignment));
}
void BumpPointerSpace::RevokeThreadLocalBuffers(Thread* thread) {
size_t BumpPointerSpace::RevokeThreadLocalBuffers(Thread* thread) {
MutexLock mu(Thread::Current(), block_lock_);
RevokeThreadLocalBuffersLocked(thread);
return 0U;
}
void BumpPointerSpace::RevokeAllThreadLocalBuffers() {
size_t BumpPointerSpace::RevokeAllThreadLocalBuffers() {
Thread* self = Thread::Current();
MutexLock mu(self, *Locks::runtime_shutdown_lock_);
MutexLock mu2(self, *Locks::thread_list_lock_);
......@@ -107,6 +108,7 @@ void BumpPointerSpace::RevokeAllThreadLocalBuffers() {
for (Thread* thread : thread_list) {
RevokeThreadLocalBuffers(thread);
}
return 0U;
}
void BumpPointerSpace::AssertThreadLocalBuffersAreRevoked(Thread* thread) {
......
......@@ -47,10 +47,10 @@ class BumpPointerSpace FINAL : public ContinuousMemMapAllocSpace {
// Allocate num_bytes, returns nullptr if the space is full.
mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated,
size_t* usable_size) OVERRIDE;
size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE;
// Thread-unsafe allocation for when mutators are suspended, used by the semispace collector.
mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated,
size_t* usable_size)
size_t* usable_size, size_t* bytes_tl_bulk_allocated)
OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
mirror::Object* AllocNonvirtual(size_t num_bytes);
......@@ -103,9 +103,9 @@ class BumpPointerSpace FINAL : public ContinuousMemMapAllocSpace {
void Dump(std::ostream& os) const;
void RevokeThreadLocalBuffers(Thread* thread) LOCKS_EXCLUDED(block_lock_);
void RevokeAllThreadLocalBuffers() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_,
Locks::thread_list_lock_);
size_t RevokeThreadLocalBuffers(Thread* thread) LOCKS_EXCLUDED(block_lock_);
size_t RevokeAllThreadLocalBuffers() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_,
Locks::thread_list_lock_);
void AssertThreadLocalBuffersAreRevoked(Thread* thread) LOCKS_EXCLUDED(block_lock_);
void AssertAllThreadLocalBuffersAreRevoked() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_,
Locks::thread_list_lock_);
......
......@@ -27,11 +27,13 @@ namespace space {
inline mirror::Object* DlMallocSpace::AllocNonvirtual(Thread* self, size_t num_bytes,
size_t* bytes_allocated,
size_t* usable_size) {
size_t* usable_size,
size_t* bytes_tl_bulk_allocated) {
mirror::Object* obj;
{
MutexLock mu(self, lock_);
obj = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated, usable_size);
obj = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated, usable_size,
bytes_tl_bulk_allocated);
}
if (LIKELY(obj != NULL)) {
// Zero freshly allocated memory, done while not holding the space's lock.
......@@ -49,9 +51,11 @@ inline size_t DlMallocSpace::AllocationSizeNonvirtual(mirror::Object* obj, size_
return size + kChunkOverhead;
}
inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(Thread* /*self*/, size_t num_bytes,
size_t* bytes_allocated,
size_t* usable_size) {
inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(
Thread* /*self*/, size_t num_bytes,
size_t* bytes_allocated,
size_t* usable_size,
size_t* bytes_tl_bulk_allocated) {
mirror::Object* result = reinterpret_cast<mirror::Object*>(mspace_malloc(mspace_, num_bytes));
if (LIKELY(result != NULL)) {
if (kDebugSpaces) {
......@@ -61,6 +65,7 @@ inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(Thread* /*self*/,
size_t allocation_size = AllocationSizeNonvirtual(result, usable_size);
DCHECK(bytes_allocated != NULL);
*bytes_allocated = allocation_size;
*bytes_tl_bulk_allocated = allocation_size;
}
return result;
}
......
......@@ -123,7 +123,8 @@ void* DlMallocSpace::CreateMspace(void* begin, size_t morecore_start, size_t ini
}
mirror::Object* DlMallocSpace::AllocWithGrowth(Thread* self, size_t num_bytes,
size_t* bytes_allocated, size_t* usable_size) {
size_t* bytes_allocated, size_t* usable_size,
size_t* bytes_tl_bulk_allocated) {
mirror::Object* result;
{
MutexLock mu(self, lock_);
......@@ -131,7 +132,8 @@ mirror::Object* DlMallocSpace::AllocWithGrowth(Thread* self, size_t num_bytes,
size_t max_allowed = Capacity();
mspace_set_footprint_limit(mspace_, max_allowed);
// Try the allocation.
result = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated, usable_size);
result = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated, usable_size,
bytes_tl_bulk_allocated);
// Shrink back down as small as possible.
size_t footprint = mspace_footprint(mspace_);
mspace_set_footprint_limit(mspace_, footprint);
......
......@@ -48,11 +48,15 @@ class DlMallocSpace : public MallocSpace {
// Virtual to allow ValgrindMallocSpace to intercept.
virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated,
size_t* usable_size) OVERRIDE LOCKS_EXCLUDED(lock_);
size_t* usable_size,
size_t* bytes_tl_bulk_allocated)
OVERRIDE LOCKS_EXCLUDED(lock_);
// Virtual to allow ValgrindMallocSpace to intercept.
virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated,
size_t* usable_size) OVERRIDE LOCKS_EXCLUDED(lock_) {
return AllocNonvirtual(self, num_bytes, bytes_allocated, usable_size);
size_t* usable_size, size_t* bytes_tl_bulk_allocated)
OVERRIDE LOCKS_EXCLUDED(lock_) {
return AllocNonvirtual(self, num_bytes, bytes_allocated, usable_size,
bytes_tl_bulk_allocated);
}
// Virtual to allow ValgrindMallocSpace to intercept.
virtual size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE {
......@@ -67,15 +71,22 @@ class DlMallocSpace : public MallocSpace {
LOCKS_EXCLUDED(lock_)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
size_t MaxBytesBulkAllocatedFor(size_t num_bytes) OVERRIDE {
return num_bytes;
}
// DlMallocSpaces don't have thread local state.
void RevokeThreadLocalBuffers(art::Thread*) OVERRIDE {
size_t RevokeThreadLocalBuffers(art::Thread*) OVERRIDE {
return 0U;
}
void RevokeAllThreadLocalBuffers() OVERRIDE {
size_t RevokeAllThreadLocalBuffers() OVERRIDE {
return 0U;
}
// Faster non-virtual allocation path.
mirror::Object* AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated,
size_t* usable_size) LOCKS_EXCLUDED(lock_);
size_t* usable_size, size_t* bytes_tl_bulk_allocated)
LOCKS_EXCLUDED(lock_);
// Faster non-virtual allocation size path.
size_t AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size);
......@@ -134,7 +145,8 @@ class DlMallocSpace : public MallocSpace {
private:
mirror::Object* AllocWithoutGrowthLocked(Thread* self, size_t num_bytes, size_t* bytes_allocated,
size_t* usable_size)
size_t* usable_size,
size_t* bytes_tl_bulk_allocated)
EXCLUSIVE_LOCKS_REQUIRED(lock_);
void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size,
......
......@@ -38,10 +38,11 @@ class ValgrindLargeObjectMapSpace FINAL : public LargeObjectMapSpace {
}
virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated,
size_t* usable_size) OVERRIDE {
size_t* usable_size, size_t* bytes_tl_bulk_allocated)
OVERRIDE {
mirror::Object* obj =
LargeObjectMapSpace::Alloc(self, num_bytes + kValgrindRedZoneBytes * 2, bytes_allocated,
usable_size);
usable_size, bytes_tl_bulk_allocated);
mirror::Object* object_without_rdz = reinterpret_cast<mirror::Object*>(
reinterpret_cast<uintptr_t>(obj) + kValgrindRedZoneBytes);
VALGRIND_MAKE_MEM_NOACCESS(reinterpret_cast<void*>(obj), kValgrindRedZoneBytes);
......@@ -108,7 +109,8 @@ LargeObjectMapSpace* LargeObjectMapSpace::Create(const std::string& name) {
}
mirror::Object* LargeObjectMapSpace::Alloc(Thread* self, size_t num_bytes,
size_t* bytes_allocated, size_t* usable_size) {
size_t* bytes_allocated, size_t* usable_size,
size_t* bytes_tl_bulk_allocated) {
std::string error_msg;
MemMap* mem_map = MemMap::MapAnonymous("large object space allocation", nullptr, num_bytes,
PROT_READ | PROT_WRITE, true, false, &error_msg);
......@@ -131,6 +133,8 @@ mirror::Object* LargeObjectMapSpace::Alloc(Thread* self, size_t num_bytes,
if (usable_size != nullptr) {
*usable_size = allocation_size;
}
DCHECK(bytes_tl_bulk_allocated != nullptr);
*bytes_tl_bulk_allocated = allocation_size;
num_bytes_allocated_ += allocation_size;
total_bytes_allocated_ += allocation_size;
++num_objects_allocated_;
......@@ -413,7 +417,7 @@ size_t FreeListSpace::AllocationSize(mirror::Object* obj, size_t* usable_size) {
}
mirror::Object* FreeListSpace::Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated,
size_t* usable_size) {
size_t* usable_size, size_t* bytes_tl_bulk_allocated) {
MutexLock mu(self, lock_);
const size_t allocation_size = RoundUp(num_bytes, kAlignment);
AllocationInfo temp_info;
......@@ -451,6 +455,8 @@ mirror::Object* FreeListSpace::Alloc(Thread* self, size_t num_bytes, size_t* byt
if (usable_size != nullptr) {
*usable_size = allocation_size;
}
DCHECK(bytes_tl_bulk_allocated != nullptr);
*bytes_tl_bulk_allocated = allocation_size;
// Need to do these inside of the lock.
++num_objects_allocated_;
++total_objects_allocated_;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment