1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "common.h"
20 #include "memview.h"
21 #include "arrayview.h"
22 #include "dex_format.h"
23 #include "dex_leb128.h"
24 #include "buffer.h"
25 #include "index_map.h"
26 #include "hash_table.h"
27 
28 #include <stdlib.h>
29 #include <map>
30 #include <memory>
31 #include <vector>
32 #include <string>
33 
34 // A simple, lightweight IR to abstract the key .dex structures
35 //
36 // 1. All the cross-IR references are modeled as plain pointers.
37 // 2. Newly allocated nodes are mem-zeroed first
38 //
39 // This IR can mirror any .dex file, although for JVMTI BCI
40 // it's expected to construct the IR for the single modified class only
41 // (and include only the nodes referenced from that class)
42 
43 #define SLICER_IR_TYPE     \
44   using Node::Node; \
45   friend struct DexFile;
46 
47 #define SLICER_IR_INDEXED_TYPE           \
48   using IndexedNode::IndexedNode; \
49   friend struct DexFile;
50 
51 namespace ir {
52 
53 // convenience notation
54 template <class T>
55 using own = std::unique_ptr<T>;
56 
57 struct Node;
58 struct IndexedNode;
59 struct EncodedValue;
60 struct EncodedArray;
61 struct String;
62 struct Type;
63 struct TypeList;
64 struct Proto;
65 struct FieldDecl;
66 struct EncodedField;
67 struct DebugInfo;
68 struct Code;
69 struct MethodDecl;
70 struct EncodedMethod;
71 struct AnnotationElement;
72 struct Annotation;
73 struct AnnotationSet;
74 struct AnnotationSetRefList;
75 struct FieldAnnotation;
76 struct MethodAnnotation;
77 struct ParamAnnotation;
78 struct AnnotationsDirectory;
79 struct Class;
80 struct DexFile;
81 
82 // The base class for all the .dex IR types:
83 //   This is not a polymorphic interface, but
84 //   a way to constrain the allocation and ownership
85 //   of .dex IR nodes.
86 struct Node {
newNode87   void* operator new(size_t size) {
88     return ::calloc(1, size);
89   }
90 
91   void* operator new[](size_t size) {
92     return ::calloc(1, size);
93   }
94 
deleteNode95   void operator delete(void* ptr) {
96     ::free(ptr);
97   }
98 
99   void operator delete[](void* ptr) {
100     ::free(ptr);
101   }
102 
103  public:
104   Node(const Node&) = delete;
105   Node& operator=(const Node&) = delete;
106 
107  protected:
108   Node() = default;
109   ~Node() = default;
110 };
111 
112 // a concession for the convenience of the .dex writer
113 //
114 // TODO: consider moving the indexing to the writer.
115 //
116 struct IndexedNode : public Node {
117   SLICER_IR_TYPE;
118 
119   // this is the index in the generated image
120   // (not the original index)
121   dex::u4 index;
122 
123   // original indexe
124   // (from the source .dex image or allocated post reader)
125   dex::u4 orig_index;
126 };
127 
128 struct EncodedValue : public Node {
129   SLICER_IR_TYPE;
130 
131   dex::u1 type;
132   union {
133     int8_t byte_value;
134     int16_t short_value;
135     uint16_t char_value;
136     int32_t int_value;
137     int64_t long_value;
138     float float_value;
139     double double_value;
140     String* string_value;
141     Type* type_value;
142     FieldDecl* field_value;
143     MethodDecl* method_value;
144     FieldDecl* enum_value;
145     EncodedArray* array_value;
146     Annotation* annotation_value;
147     bool bool_value;
148   } u;
149 
150   SLICER_EXTRA(slicer::MemView original);
151 };
152 
153 struct EncodedArray : public Node {
154   SLICER_IR_TYPE;
155 
156   std::vector<EncodedValue*> values;
157 };
158 
159 struct String : public IndexedNode {
160   SLICER_IR_INDEXED_TYPE;
161 
162   // opaque DEX "string_data_item"
163   slicer::MemView data;
164 
c_strString165   const char* c_str() const {
166     const dex::u1* strData = data.ptr<dex::u1>();
167     dex::ReadULeb128(&strData);
168     return reinterpret_cast<const char*>(strData);
169   }
170 };
171 
172 struct Type : public IndexedNode {
173   SLICER_IR_INDEXED_TYPE;
174 
175   enum class Category { Void, Scalar, WideScalar, Reference };
176 
177   String* descriptor;
178   Class* class_def;
179 
180   std::string Decl() const;
181   Category GetCategory() const;
182 };
183 
184 struct TypeList : public Node {
185   SLICER_IR_TYPE;
186 
187   std::vector<Type*> types;
188 };
189 
190 struct Proto : public IndexedNode {
191   SLICER_IR_INDEXED_TYPE;
192 
193   String* shorty;
194   Type* return_type;
195   TypeList* param_types;
196 
197   std::string Signature() const;
198 };
199 
200 struct FieldDecl : public IndexedNode {
201   SLICER_IR_INDEXED_TYPE;
202 
203   String* name;
204   Type* type;
205   Type* parent;
206 };
207 
208 struct EncodedField : public Node {
209   SLICER_IR_TYPE;
210 
211   FieldDecl* decl;
212   dex::u4 access_flags;
213 };
214 
215 struct DebugInfo : public Node {
216   SLICER_IR_TYPE;
217 
218   dex::u4 line_start;
219   std::vector<String*> param_names;
220 
221   // original debug info opcodes stream
222   // (must be "relocated" when creating a new .dex image)
223   slicer::MemView data;
224 };
225 
226 struct Code : public Node {
227   SLICER_IR_TYPE;
228 
229   dex::u2 registers;
230   dex::u2 ins_count;
231   dex::u2 outs_count;
232   slicer::ArrayView<const dex::u2> instructions;
233   slicer::ArrayView<const dex::TryBlock> try_blocks;
234   slicer::MemView catch_handlers;
235   DebugInfo* debug_info;
236 };
237 
238 struct MethodDecl : public IndexedNode {
239   SLICER_IR_INDEXED_TYPE;
240 
241   String* name;
242   Proto* prototype;
243   Type* parent;
244 };
245 
246 struct EncodedMethod : public Node {
247   SLICER_IR_TYPE;
248 
249   MethodDecl* decl;
250   Code* code;
251   dex::u4 access_flags;
252 };
253 
254 struct AnnotationElement : public Node {
255   SLICER_IR_TYPE;
256 
257   String* name;
258   EncodedValue* value;
259 };
260 
261 struct Annotation : public Node {
262   SLICER_IR_TYPE;
263 
264   Type* type;
265   std::vector<AnnotationElement*> elements;
266   dex::u1 visibility;
267 };
268 
269 struct AnnotationSet : public Node {
270   SLICER_IR_TYPE;
271 
272   std::vector<Annotation*> annotations;
273 };
274 
275 struct AnnotationSetRefList : public Node {
276   SLICER_IR_TYPE;
277 
278   std::vector<AnnotationSet*> annotations;
279 };
280 
281 struct FieldAnnotation : public Node {
282   SLICER_IR_TYPE;
283 
284   FieldDecl* field_decl;
285   AnnotationSet* annotations;
286 };
287 
288 struct MethodAnnotation : public Node {
289   SLICER_IR_TYPE;
290 
291   MethodDecl* method_decl;
292   AnnotationSet* annotations;
293 };
294 
295 struct ParamAnnotation : public Node {
296   SLICER_IR_TYPE;
297 
298   MethodDecl* method_decl;
299   AnnotationSetRefList* annotations;
300 };
301 
302 struct AnnotationsDirectory : public Node {
303   SLICER_IR_TYPE;
304 
305   AnnotationSet* class_annotation;
306   std::vector<FieldAnnotation*> field_annotations;
307   std::vector<MethodAnnotation*> method_annotations;
308   std::vector<ParamAnnotation*> param_annotations;
309 };
310 
311 struct Class : public IndexedNode {
312   SLICER_IR_INDEXED_TYPE;
313 
314   Type* type;
315   dex::u4 access_flags;
316   Type* super_class;
317   TypeList* interfaces;
318   String* source_file;
319   AnnotationsDirectory* annotations;
320   EncodedArray* static_init;
321 
322   std::vector<EncodedField*> static_fields;
323   std::vector<EncodedField*> instance_fields;
324   std::vector<EncodedMethod*> direct_methods;
325   std::vector<EncodedMethod*> virtual_methods;
326 };
327 
328 // ir::String hashing
329 struct StringsHasher {
GetKeyStringsHasher330   const char* GetKey(const String* string) const { return string->c_str(); }
331   uint32_t Hash(const char* string_key) const;
332   bool Compare(const char* string_key, const String* string) const;
333 };
334 
335 // ir::Proto hashing
336 struct ProtosHasher {
GetKeyProtosHasher337   std::string GetKey(const Proto* proto) const { return proto->Signature(); }
338   uint32_t Hash(const std::string& proto_key) const;
339   bool Compare(const std::string& proto_key, const Proto* proto) const;
340 };
341 
342 // ir::EncodedMethod hashing
343 struct MethodKey {
344   String* class_descriptor = nullptr;
345   String* method_name = nullptr;
346   Proto* prototype = nullptr;
347 };
348 
349 struct MethodsHasher {
350   MethodKey GetKey(const EncodedMethod* method) const;
351   uint32_t Hash(const MethodKey& method_key) const;
352   bool Compare(const MethodKey& method_key, const EncodedMethod* method) const;
353 };
354 
355 using StringsLookup = slicer::HashTable<const char*, String, StringsHasher>;
356 using PrototypesLookup = slicer::HashTable<const std::string&, Proto, ProtosHasher>;
357 using MethodsLookup = slicer::HashTable<const MethodKey&, EncodedMethod, MethodsHasher>;
358 
359 // The main container/root for a .dex IR
360 struct DexFile {
361   // indexed structures
362   std::vector<own<String>> strings;
363   std::vector<own<Type>> types;
364   std::vector<own<Proto>> protos;
365   std::vector<own<FieldDecl>> fields;
366   std::vector<own<MethodDecl>> methods;
367   std::vector<own<Class>> classes;
368 
369   // data segment structures
370   std::vector<own<EncodedField>> encoded_fields;
371   std::vector<own<EncodedMethod>> encoded_methods;
372   std::vector<own<TypeList>> type_lists;
373   std::vector<own<Code>> code;
374   std::vector<own<DebugInfo>> debug_info;
375   std::vector<own<EncodedValue>> encoded_values;
376   std::vector<own<EncodedArray>> encoded_arrays;
377   std::vector<own<Annotation>> annotations;
378   std::vector<own<AnnotationElement>> annotation_elements;
379   std::vector<own<AnnotationSet>> annotation_sets;
380   std::vector<own<AnnotationSetRefList>> annotation_set_ref_lists;
381   std::vector<own<AnnotationsDirectory>> annotations_directories;
382   std::vector<own<FieldAnnotation>> field_annotations;
383   std::vector<own<MethodAnnotation>> method_annotations;
384   std::vector<own<ParamAnnotation>> param_annotations;
385 
386   // original index to IR node mappings
387   //
388   // CONSIDER: we only need to carry around
389   //   the relocation for the referenced items
390   //
391   std::map<dex::u4, Type*> types_map;
392   std::map<dex::u4, String*> strings_map;
393   std::map<dex::u4, Proto*> protos_map;
394   std::map<dex::u4, FieldDecl*> fields_map;
395   std::map<dex::u4, MethodDecl*> methods_map;
396   std::map<dex::u4, Class*> classes_map;
397 
398   // original .dex header "magic" signature
399   slicer::MemView magic;
400 
401   // keep track of the used index values
402   // (so we can easily allocate new ones)
403   IndexMap strings_indexes;
404   IndexMap types_indexes;
405   IndexMap protos_indexes;
406   IndexMap fields_indexes;
407   IndexMap methods_indexes;
408   IndexMap classes_indexes;
409 
410   // lookup hash tables
411   StringsLookup strings_lookup;
412   MethodsLookup methods_lookup;
413   PrototypesLookup prototypes_lookup;
414 
415  public:
416   DexFile() = default;
417 
418   // No copy/move semantics
419   DexFile(const DexFile&) = delete;
420   DexFile& operator=(const DexFile&) = delete;
421 
422   template <class T>
AllocDexFile423   T* Alloc() {
424     T* p = new T();
425     Track(p);
426     return p;
427   }
428 
AttachBufferDexFile429   void AttachBuffer(slicer::Buffer&& buffer) {
430     buffers_.push_back(std::move(buffer));
431   }
432 
433   void Normalize();
434 
435  private:
436   void TopSortClassIndex(Class* irClass, dex::u4* nextIndex);
437   void SortClassIndexes();
438 
439   template <class T>
PushOwnDexFile440   void PushOwn(std::vector<own<T>>& v, T* p) {
441     v.push_back(own<T>(p));
442   }
443 
TrackDexFile444   void Track(String* p) { PushOwn(strings, p); }
TrackDexFile445   void Track(Type* p) { PushOwn(types, p); }
TrackDexFile446   void Track(Proto* p) { PushOwn(protos, p); }
TrackDexFile447   void Track(FieldDecl* p) { PushOwn(fields, p); }
TrackDexFile448   void Track(MethodDecl* p) { PushOwn(methods, p); }
TrackDexFile449   void Track(Class* p) { PushOwn(classes, p); }
450 
TrackDexFile451   void Track(EncodedField* p) { PushOwn(encoded_fields, p); }
TrackDexFile452   void Track(EncodedMethod* p) { PushOwn(encoded_methods, p); }
TrackDexFile453   void Track(TypeList* p) { PushOwn(type_lists, p); }
TrackDexFile454   void Track(Code* p) { PushOwn(code, p); }
TrackDexFile455   void Track(DebugInfo* p) { PushOwn(debug_info, p); }
TrackDexFile456   void Track(EncodedValue* p) { PushOwn(encoded_values, p); }
TrackDexFile457   void Track(EncodedArray* p) { PushOwn(encoded_arrays, p); }
TrackDexFile458   void Track(Annotation* p) { PushOwn(annotations, p); }
TrackDexFile459   void Track(AnnotationElement* p) { PushOwn(annotation_elements, p); }
TrackDexFile460   void Track(AnnotationSet* p) { PushOwn(annotation_sets, p); }
TrackDexFile461   void Track(AnnotationSetRefList* p) { PushOwn(annotation_set_ref_lists, p); }
TrackDexFile462   void Track(AnnotationsDirectory* p) { PushOwn(annotations_directories, p); }
TrackDexFile463   void Track(FieldAnnotation* p) { PushOwn(field_annotations, p); }
TrackDexFile464   void Track(MethodAnnotation* p) { PushOwn(method_annotations, p); }
TrackDexFile465   void Track(ParamAnnotation* p) { PushOwn(param_annotations, p); }
466 
467 private:
468   // additional memory buffers owned by this .dex IR
469   std::vector<slicer::Buffer> buffers_;
470 };
471 
472 }  // namespace ir
473 
474 #undef SLICER_IR_TYPE
475 #undef SLICER_IR_INDEXED_TYPE
476