1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_RUNTIME_GC_HEAP_INL_H_
18 #define ART_RUNTIME_GC_HEAP_INL_H_
19 
20 #include "heap.h"
21 
22 #include "allocation_listener.h"
23 #include "base/quasi_atomic.h"
24 #include "base/time_utils.h"
25 #include "gc/accounting/atomic_stack.h"
26 #include "gc/accounting/card_table-inl.h"
27 #include "gc/allocation_record.h"
28 #include "gc/collector/semi_space.h"
29 #include "gc/space/bump_pointer_space-inl.h"
30 #include "gc/space/dlmalloc_space-inl.h"
31 #include "gc/space/large_object_space.h"
32 #include "gc/space/region_space-inl.h"
33 #include "gc/space/rosalloc_space-inl.h"
34 #include "handle_scope-inl.h"
35 #include "obj_ptr-inl.h"
36 #include "runtime.h"
37 #include "thread-inl.h"
38 #include "verify_object.h"
39 #include "write_barrier-inl.h"
40 
41 namespace art {
42 namespace gc {
43 
44 template <bool kInstrumented, bool kCheckLargeObject, typename PreFenceVisitor>
AllocObjectWithAllocator(Thread * self,ObjPtr<mirror::Class> klass,size_t byte_count,AllocatorType allocator,const PreFenceVisitor & pre_fence_visitor)45 inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self,
46                                                       ObjPtr<mirror::Class> klass,
47                                                       size_t byte_count,
48                                                       AllocatorType allocator,
49                                                       const PreFenceVisitor& pre_fence_visitor) {
50   auto no_suspend_pre_fence_visitor =
51       [&pre_fence_visitor](auto... x) REQUIRES_SHARED(Locks::mutator_lock_) {
52         ScopedAssertNoThreadSuspension sants("No thread suspension during pre-fence visitor");
53         pre_fence_visitor(x...);
54       };
55 
56   if (kIsDebugBuild) {
57     CheckPreconditionsForAllocObject(klass, byte_count);
58     // Since allocation can cause a GC which will need to SuspendAll, make sure all allocations are
59     // done in the runnable state where suspension is expected.
60     CHECK_EQ(self->GetState(), kRunnable);
61     self->AssertThreadSuspensionIsAllowable();
62     self->AssertNoPendingException();
63     // Make sure to preserve klass.
64     StackHandleScope<1> hs(self);
65     HandleWrapperObjPtr<mirror::Class> h = hs.NewHandleWrapper(&klass);
66     self->PoisonObjectPointers();
67   }
68   auto pre_object_allocated = [&]() REQUIRES_SHARED(Locks::mutator_lock_)
69       REQUIRES(!Roles::uninterruptible_) {
70     if constexpr (kInstrumented) {
71       AllocationListener* l = alloc_listener_.load(std::memory_order_seq_cst);
72       if (UNLIKELY(l != nullptr) && UNLIKELY(l->HasPreAlloc())) {
73         StackHandleScope<1> hs(self);
74         HandleWrapperObjPtr<mirror::Class> h_klass(hs.NewHandleWrapper(&klass));
75         l->PreObjectAllocated(self, h_klass, &byte_count);
76       }
77     }
78   };
79   ObjPtr<mirror::Object> obj;
80   // bytes allocated for the (individual) object.
81   size_t bytes_allocated;
82   size_t usable_size;
83   size_t new_num_bytes_allocated = 0;
84   {
85     // Do the initial pre-alloc
86     pre_object_allocated();
87     ScopedAssertNoThreadSuspension ants("Called PreObjectAllocated, no suspend until alloc");
88 
89     // Need to check that we aren't the large object allocator since the large object allocation
90     // code path includes this function. If we didn't check we would have an infinite loop.
91     if (kCheckLargeObject && UNLIKELY(ShouldAllocLargeObject(klass, byte_count))) {
92       // AllocLargeObject can suspend and will recall PreObjectAllocated if needed.
93       ScopedAllowThreadSuspension ats;
94       obj = AllocLargeObject<kInstrumented, PreFenceVisitor>(self, &klass, byte_count,
95                                                              pre_fence_visitor);
96       if (obj != nullptr) {
97         return obj.Ptr();
98       }
99       // There should be an OOM exception, since we are retrying, clear it.
100       self->ClearException();
101 
102       // If the large object allocation failed, try to use the normal spaces (main space,
103       // non moving space). This can happen if there is significant virtual address space
104       // fragmentation.
105       pre_object_allocated();
106     }
107     if (IsTLABAllocator(allocator)) {
108       byte_count = RoundUp(byte_count, space::BumpPointerSpace::kAlignment);
109     }
110     // If we have a thread local allocation we don't need to update bytes allocated.
111     if (IsTLABAllocator(allocator) && byte_count <= self->TlabSize()) {
112       obj = self->AllocTlab(byte_count);
113       DCHECK(obj != nullptr) << "AllocTlab can't fail";
114       obj->SetClass(klass);
115       if (kUseBakerReadBarrier) {
116         obj->AssertReadBarrierState();
117       }
118       bytes_allocated = byte_count;
119       usable_size = bytes_allocated;
120       no_suspend_pre_fence_visitor(obj, usable_size);
121       QuasiAtomic::ThreadFenceForConstructor();
122     } else if (
123         !kInstrumented && allocator == kAllocatorTypeRosAlloc &&
124         (obj = rosalloc_space_->AllocThreadLocal(self, byte_count, &bytes_allocated)) != nullptr &&
125         LIKELY(obj != nullptr)) {
126       DCHECK(!is_running_on_memory_tool_);
127       obj->SetClass(klass);
128       if (kUseBakerReadBarrier) {
129         obj->AssertReadBarrierState();
130       }
131       usable_size = bytes_allocated;
132       no_suspend_pre_fence_visitor(obj, usable_size);
133       QuasiAtomic::ThreadFenceForConstructor();
134     } else {
135       // Bytes allocated that includes bulk thread-local buffer allocations in addition to direct
136       // non-TLAB object allocations.
137       size_t bytes_tl_bulk_allocated = 0u;
138       obj = TryToAllocate<kInstrumented, false>(self, allocator, byte_count, &bytes_allocated,
139                                                 &usable_size, &bytes_tl_bulk_allocated);
140       if (UNLIKELY(obj == nullptr)) {
141         // AllocateInternalWithGc can cause thread suspension, if someone instruments the
142         // entrypoints or changes the allocator in a suspend point here, we need to retry the
143         // allocation. It will send the pre-alloc event again.
144         obj = AllocateInternalWithGc(self,
145                                      allocator,
146                                      kInstrumented,
147                                      byte_count,
148                                      &bytes_allocated,
149                                      &usable_size,
150                                      &bytes_tl_bulk_allocated,
151                                      &klass);
152         if (obj == nullptr) {
153           // The only way that we can get a null return if there is no pending exception is if the
154           // allocator or instrumentation changed.
155           if (!self->IsExceptionPending()) {
156             // Since we are restarting, allow thread suspension.
157             ScopedAllowThreadSuspension ats;
158             // AllocObject will pick up the new allocator type, and instrumented as true is the safe
159             // default.
160             return AllocObject</*kInstrumented=*/true>(self,
161                                                        klass,
162                                                        byte_count,
163                                                        pre_fence_visitor);
164           }
165           return nullptr;
166         }
167       }
168       DCHECK_GT(bytes_allocated, 0u);
169       DCHECK_GT(usable_size, 0u);
170       obj->SetClass(klass);
171       if (kUseBakerReadBarrier) {
172         obj->AssertReadBarrierState();
173       }
174       if (collector::SemiSpace::kUseRememberedSet &&
175           UNLIKELY(allocator == kAllocatorTypeNonMoving)) {
176         // (Note this if statement will be constant folded away for the fast-path quick entry
177         // points.) Because SetClass() has no write barrier, the GC may need a write barrier in the
178         // case the object is non movable and points to a recently allocated movable class.
179         WriteBarrier::ForFieldWrite(obj, mirror::Object::ClassOffset(), klass);
180       }
181       no_suspend_pre_fence_visitor(obj, usable_size);
182       QuasiAtomic::ThreadFenceForConstructor();
183       if (bytes_tl_bulk_allocated > 0) {
184         size_t num_bytes_allocated_before =
185             num_bytes_allocated_.fetch_add(bytes_tl_bulk_allocated, std::memory_order_relaxed);
186         new_num_bytes_allocated = num_bytes_allocated_before + bytes_tl_bulk_allocated;
187         // Only trace when we get an increase in the number of bytes allocated. This happens when
188         // obtaining a new TLAB and isn't often enough to hurt performance according to golem.
189         if (region_space_) {
190           // With CC collector, during a GC cycle, the heap usage increases as
191           // there are two copies of evacuated objects. Therefore, add evac-bytes
192           // to the heap size. When the GC cycle is not running, evac-bytes
193           // are 0, as required.
194           TraceHeapSize(new_num_bytes_allocated + region_space_->EvacBytes());
195         } else {
196           TraceHeapSize(new_num_bytes_allocated);
197         }
198       }
199     }
200   }
201   if (kIsDebugBuild && Runtime::Current()->IsStarted()) {
202     CHECK_LE(obj->SizeOf(), usable_size);
203   }
204   // TODO: Deprecate.
205   if (kInstrumented) {
206     if (Runtime::Current()->HasStatsEnabled()) {
207       RuntimeStats* thread_stats = self->GetStats();
208       ++thread_stats->allocated_objects;
209       thread_stats->allocated_bytes += bytes_allocated;
210       RuntimeStats* global_stats = Runtime::Current()->GetStats();
211       ++global_stats->allocated_objects;
212       global_stats->allocated_bytes += bytes_allocated;
213     }
214   } else {
215     DCHECK(!Runtime::Current()->HasStatsEnabled());
216   }
217   if (kInstrumented) {
218     if (IsAllocTrackingEnabled()) {
219       // allocation_records_ is not null since it never becomes null after allocation tracking is
220       // enabled.
221       DCHECK(allocation_records_ != nullptr);
222       allocation_records_->RecordAllocation(self, &obj, bytes_allocated);
223     }
224     AllocationListener* l = alloc_listener_.load(std::memory_order_seq_cst);
225     if (l != nullptr) {
226       // Same as above. We assume that a listener that was once stored will never be deleted.
227       // Otherwise we'd have to perform this under a lock.
228       l->ObjectAllocated(self, &obj, bytes_allocated);
229     }
230   } else {
231     DCHECK(!IsAllocTrackingEnabled());
232   }
233   if (AllocatorHasAllocationStack(allocator)) {
234     PushOnAllocationStack(self, &obj);
235   }
236   if (kInstrumented) {
237     if (gc_stress_mode_) {
238       CheckGcStressMode(self, &obj);
239     }
240   } else {
241     DCHECK(!gc_stress_mode_);
242   }
243   // IsGcConcurrent() isn't known at compile time so we can optimize by not checking it for
244   // the BumpPointer or TLAB allocators. This is nice since it allows the entire if statement to be
245   // optimized out. And for the other allocators, AllocatorMayHaveConcurrentGC is a constant since
246   // the allocator_type should be constant propagated.
247   if (AllocatorMayHaveConcurrentGC(allocator) && IsGcConcurrent()) {
248     // New_num_bytes_allocated is zero if we didn't update num_bytes_allocated_.
249     // That's fine.
250     CheckConcurrentGCForJava(self, new_num_bytes_allocated, &obj);
251   }
252   VerifyObject(obj);
253   self->VerifyStack();
254   return obj.Ptr();
255 }
256 
257 // The size of a thread-local allocation stack in the number of references.
258 static constexpr size_t kThreadLocalAllocationStackSize = 128;
259 
PushOnAllocationStack(Thread * self,ObjPtr<mirror::Object> * obj)260 inline void Heap::PushOnAllocationStack(Thread* self, ObjPtr<mirror::Object>* obj) {
261   if (kUseThreadLocalAllocationStack) {
262     if (UNLIKELY(!self->PushOnThreadLocalAllocationStack(obj->Ptr()))) {
263       PushOnThreadLocalAllocationStackWithInternalGC(self, obj);
264     }
265   } else if (UNLIKELY(!allocation_stack_->AtomicPushBack(obj->Ptr()))) {
266     PushOnAllocationStackWithInternalGC(self, obj);
267   }
268 }
269 
270 template <bool kInstrumented, typename PreFenceVisitor>
AllocLargeObject(Thread * self,ObjPtr<mirror::Class> * klass,size_t byte_count,const PreFenceVisitor & pre_fence_visitor)271 inline mirror::Object* Heap::AllocLargeObject(Thread* self,
272                                               ObjPtr<mirror::Class>* klass,
273                                               size_t byte_count,
274                                               const PreFenceVisitor& pre_fence_visitor) {
275   // Save and restore the class in case it moves.
276   StackHandleScope<1> hs(self);
277   auto klass_wrapper = hs.NewHandleWrapper(klass);
278   return AllocObjectWithAllocator<kInstrumented, false, PreFenceVisitor>(self, *klass, byte_count,
279                                                                          kAllocatorTypeLOS,
280                                                                          pre_fence_visitor);
281 }
282 
283 template <const bool kInstrumented, const bool kGrow>
TryToAllocate(Thread * self,AllocatorType allocator_type,size_t alloc_size,size_t * bytes_allocated,size_t * usable_size,size_t * bytes_tl_bulk_allocated)284 inline mirror::Object* Heap::TryToAllocate(Thread* self,
285                                            AllocatorType allocator_type,
286                                            size_t alloc_size,
287                                            size_t* bytes_allocated,
288                                            size_t* usable_size,
289                                            size_t* bytes_tl_bulk_allocated) {
290   if (allocator_type != kAllocatorTypeRegionTLAB &&
291       allocator_type != kAllocatorTypeTLAB &&
292       allocator_type != kAllocatorTypeRosAlloc &&
293       UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, alloc_size, kGrow))) {
294     return nullptr;
295   }
296   mirror::Object* ret;
297   switch (allocator_type) {
298     case kAllocatorTypeBumpPointer: {
299       DCHECK(bump_pointer_space_ != nullptr);
300       alloc_size = RoundUp(alloc_size, space::BumpPointerSpace::kAlignment);
301       ret = bump_pointer_space_->AllocNonvirtual(alloc_size);
302       if (LIKELY(ret != nullptr)) {
303         *bytes_allocated = alloc_size;
304         *usable_size = alloc_size;
305         *bytes_tl_bulk_allocated = alloc_size;
306       }
307       break;
308     }
309     case kAllocatorTypeRosAlloc: {
310       if (kInstrumented && UNLIKELY(is_running_on_memory_tool_)) {
311         // If running on ASan, we should be using the instrumented path.
312         size_t max_bytes_tl_bulk_allocated = rosalloc_space_->MaxBytesBulkAllocatedFor(alloc_size);
313         if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type,
314                                                max_bytes_tl_bulk_allocated,
315                                                kGrow))) {
316           return nullptr;
317         }
318         ret = rosalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
319                                      bytes_tl_bulk_allocated);
320       } else {
321         DCHECK(!is_running_on_memory_tool_);
322         size_t max_bytes_tl_bulk_allocated =
323             rosalloc_space_->MaxBytesBulkAllocatedForNonvirtual(alloc_size);
324         if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type,
325                                                max_bytes_tl_bulk_allocated,
326                                                kGrow))) {
327           return nullptr;
328         }
329         if (!kInstrumented) {
330           DCHECK(!rosalloc_space_->CanAllocThreadLocal(self, alloc_size));
331         }
332         ret = rosalloc_space_->AllocNonvirtual(self,
333                                                alloc_size,
334                                                bytes_allocated,
335                                                usable_size,
336                                                bytes_tl_bulk_allocated);
337       }
338       break;
339     }
340     case kAllocatorTypeDlMalloc: {
341       if (kInstrumented && UNLIKELY(is_running_on_memory_tool_)) {
342         // If running on ASan, we should be using the instrumented path.
343         ret = dlmalloc_space_->Alloc(self,
344                                      alloc_size,
345                                      bytes_allocated,
346                                      usable_size,
347                                      bytes_tl_bulk_allocated);
348       } else {
349         DCHECK(!is_running_on_memory_tool_);
350         ret = dlmalloc_space_->AllocNonvirtual(self,
351                                                alloc_size,
352                                                bytes_allocated,
353                                                usable_size,
354                                                bytes_tl_bulk_allocated);
355       }
356       break;
357     }
358     case kAllocatorTypeNonMoving: {
359       ret = non_moving_space_->Alloc(self,
360                                      alloc_size,
361                                      bytes_allocated,
362                                      usable_size,
363                                      bytes_tl_bulk_allocated);
364       break;
365     }
366     case kAllocatorTypeLOS: {
367       ret = large_object_space_->Alloc(self,
368                                        alloc_size,
369                                        bytes_allocated,
370                                        usable_size,
371                                        bytes_tl_bulk_allocated);
372       // Note that the bump pointer spaces aren't necessarily next to
373       // the other continuous spaces like the non-moving alloc space or
374       // the zygote space.
375       DCHECK(ret == nullptr || large_object_space_->Contains(ret));
376       break;
377     }
378     case kAllocatorTypeRegion: {
379       DCHECK(region_space_ != nullptr);
380       alloc_size = RoundUp(alloc_size, space::RegionSpace::kAlignment);
381       ret = region_space_->AllocNonvirtual<false>(alloc_size,
382                                                   bytes_allocated,
383                                                   usable_size,
384                                                   bytes_tl_bulk_allocated);
385       break;
386     }
387     case kAllocatorTypeTLAB:
388       FALLTHROUGH_INTENDED;
389     case kAllocatorTypeRegionTLAB: {
390       DCHECK_ALIGNED(alloc_size, kObjectAlignment);
391       static_assert(space::RegionSpace::kAlignment == space::BumpPointerSpace::kAlignment,
392                     "mismatched alignments");
393       static_assert(kObjectAlignment == space::BumpPointerSpace::kAlignment,
394                     "mismatched alignments");
395       if (UNLIKELY(self->TlabSize() < alloc_size)) {
396         return AllocWithNewTLAB(self,
397                                 allocator_type,
398                                 alloc_size,
399                                 kGrow,
400                                 bytes_allocated,
401                                 usable_size,
402                                 bytes_tl_bulk_allocated);
403       }
404       // The allocation can't fail.
405       ret = self->AllocTlab(alloc_size);
406       DCHECK(ret != nullptr);
407       *bytes_allocated = alloc_size;
408       *bytes_tl_bulk_allocated = 0;  // Allocated in an existing buffer.
409       *usable_size = alloc_size;
410       break;
411     }
412     default: {
413       LOG(FATAL) << "Invalid allocator type";
414       ret = nullptr;
415     }
416   }
417   return ret;
418 }
419 
ShouldAllocLargeObject(ObjPtr<mirror::Class> c,size_t byte_count)420 inline bool Heap::ShouldAllocLargeObject(ObjPtr<mirror::Class> c, size_t byte_count) const {
421   // We need to have a zygote space or else our newly allocated large object can end up in the
422   // Zygote resulting in it being prematurely freed.
423   // We can only do this for primitive objects since large objects will not be within the card table
424   // range. This also means that we rely on SetClass not dirtying the object's card.
425   return byte_count >= large_object_threshold_ && (c->IsPrimitiveArray() || c->IsStringClass());
426 }
427 
IsOutOfMemoryOnAllocation(AllocatorType allocator_type,size_t alloc_size,bool grow)428 inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type,
429                                             size_t alloc_size,
430                                             bool grow) {
431   size_t old_target = target_footprint_.load(std::memory_order_relaxed);
432   while (true) {
433     size_t old_allocated = num_bytes_allocated_.load(std::memory_order_relaxed);
434     size_t new_footprint = old_allocated + alloc_size;
435     // Tests against heap limits are inherently approximate, since multiple allocations may
436     // race, and this is not atomic with the allocation.
437     if (UNLIKELY(new_footprint <= old_target)) {
438       return false;
439     } else if (UNLIKELY(new_footprint > growth_limit_)) {
440       return true;
441     }
442     // We are between target_footprint_ and growth_limit_ .
443     if (AllocatorMayHaveConcurrentGC(allocator_type) && IsGcConcurrent()) {
444       return false;
445     } else {
446       if (grow) {
447         if (target_footprint_.compare_exchange_weak(/*inout ref*/old_target, new_footprint,
448                                                     std::memory_order_relaxed)) {
449           VlogHeapGrowth(old_target, new_footprint, alloc_size);
450           return false;
451         }  // else try again.
452       } else {
453         return true;
454       }
455     }
456   }
457 }
458 
ShouldConcurrentGCForJava(size_t new_num_bytes_allocated)459 inline bool Heap::ShouldConcurrentGCForJava(size_t new_num_bytes_allocated) {
460   // For a Java allocation, we only check whether the number of Java allocated bytes excceeds a
461   // threshold. By not considering native allocation here, we (a) ensure that Java heap bounds are
462   // maintained, and (b) reduce the cost of the check here.
463   return new_num_bytes_allocated >= concurrent_start_bytes_;
464 }
465 
CheckConcurrentGCForJava(Thread * self,size_t new_num_bytes_allocated,ObjPtr<mirror::Object> * obj)466 inline void Heap::CheckConcurrentGCForJava(Thread* self,
467                                     size_t new_num_bytes_allocated,
468                                     ObjPtr<mirror::Object>* obj) {
469   if (UNLIKELY(ShouldConcurrentGCForJava(new_num_bytes_allocated))) {
470     RequestConcurrentGCAndSaveObject(self, false /* force_full */, obj);
471   }
472 }
473 
474 }  // namespace gc
475 }  // namespace art
476 
477 #endif  // ART_RUNTIME_GC_HEAP_INL_H_
478