1 //===-- ValueEnumerator.cpp - Number values and types for bitcode writer --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the ValueEnumerator class.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "ValueEnumerator.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallPtrSet.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/DebugInfoMetadata.h"
19 #include "llvm/IR/DerivedTypes.h"
20 #include "llvm/IR/Instructions.h"
21 #include "llvm/IR/Module.h"
22 #include "llvm/IR/ValueSymbolTable.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <algorithm>
26 using namespace llvm;
27
28 namespace llvm_2_9_func {
29
isIntOrIntVectorValue(const std::pair<const Value *,unsigned> & V)30 static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {
31 return V.first->getType()->isIntOrIntVectorTy();
32 }
33
34 /// ValueEnumerator - Enumerate module-level information.
ValueEnumerator(const llvm::Module & M)35 ValueEnumerator::ValueEnumerator(const llvm::Module &M)
36 : HasMDString(false), HasDILocation(false) {
37 // Enumerate the global variables.
38 for (llvm::Module::const_global_iterator I = M.global_begin(), E = M.global_end();
39 I != E; ++I)
40 EnumerateValue(&*I);
41
42 // Enumerate the functions.
43 for (llvm::Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
44 EnumerateValue(&*I);
45 EnumerateAttributes(cast<Function>(I)->getAttributes());
46 }
47
48 // Enumerate the aliases.
49 for (llvm::Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
50 I != E; ++I)
51 EnumerateValue(&*I);
52
53 // Remember what is the cutoff between globalvalue's and other constants.
54 unsigned FirstConstant = Values.size();
55
56 // Enumerate the global variable initializers.
57 for (llvm::Module::const_global_iterator I = M.global_begin(), E = M.global_end();
58 I != E; ++I)
59 if (I->hasInitializer())
60 EnumerateValue(I->getInitializer());
61
62 // Enumerate the aliasees.
63 for (llvm::Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
64 I != E; ++I)
65 EnumerateValue(I->getAliasee());
66
67 // Enumerate the metadata type.
68 //
69 // TODO: Move this to ValueEnumerator::EnumerateOperandType() once bitcode
70 // only encodes the metadata type when it's used as a value.
71 EnumerateType(Type::getMetadataTy(M.getContext()));
72
73 // Insert constants and metadata that are named at module level into the slot
74 // pool so that the module symbol table can refer to them...
75 EnumerateValueSymbolTable(M.getValueSymbolTable());
76 EnumerateNamedMetadata(M);
77
78 SmallVector<std::pair<unsigned, MDNode *>, 8> MDs;
79
80 // Enumerate types used by function bodies and argument lists.
81 for (const Function &F : M) {
82 for (const Argument &A : F.args())
83 EnumerateType(A.getType());
84
85 for (const BasicBlock &BB : F)
86 for (const Instruction &I : BB) {
87 for (const Use &Op : I.operands()) {
88 auto *MD = dyn_cast<MetadataAsValue>(&Op);
89 if (!MD) {
90 EnumerateOperandType(Op);
91 continue;
92 }
93
94 // Local metadata is enumerated during function-incorporation.
95 if (isa<LocalAsMetadata>(MD->getMetadata()))
96 continue;
97
98 EnumerateMetadata(MD->getMetadata());
99 }
100 EnumerateType(I.getType());
101 if (const CallInst *CI = dyn_cast<CallInst>(&I))
102 EnumerateAttributes(CI->getAttributes());
103 else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I))
104 EnumerateAttributes(II->getAttributes());
105
106 // Enumerate metadata attached with this instruction.
107 MDs.clear();
108 I.getAllMetadataOtherThanDebugLoc(MDs);
109 for (unsigned i = 0, e = MDs.size(); i != e; ++i)
110 EnumerateMetadata(MDs[i].second);
111
112 // Don't enumerate the location directly -- it has a special record
113 // type -- but enumerate its operands.
114 if (DILocation *L = I.getDebugLoc())
115 EnumerateMDNodeOperands(L);
116 }
117 }
118
119 // Optimize constant ordering.
120 OptimizeConstants(FirstConstant, Values.size());
121 }
122
getInstructionID(const Instruction * Inst) const123 unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const {
124 InstructionMapType::const_iterator I = InstructionMap.find(Inst);
125 assert(I != InstructionMap.end() && "Instruction is not mapped!");
126 return I->second;
127 }
128
setInstructionID(const Instruction * I)129 void ValueEnumerator::setInstructionID(const Instruction *I) {
130 InstructionMap[I] = InstructionCount++;
131 }
132
getValueID(const Value * V) const133 unsigned ValueEnumerator::getValueID(const Value *V) const {
134 if (auto *MD = dyn_cast<MetadataAsValue>(V))
135 return getMetadataID(MD->getMetadata());
136
137 ValueMapType::const_iterator I = ValueMap.find(V);
138 assert(I != ValueMap.end() && "Value not in slotcalculator!");
139 return I->second-1;
140 }
141
dump() const142 void ValueEnumerator::dump() const {
143 print(dbgs(), ValueMap, "Default");
144 dbgs() << '\n';
145 print(dbgs(), MDValueMap, "MetaData");
146 dbgs() << '\n';
147 }
148
print(raw_ostream & OS,const ValueMapType & Map,const char * Name) const149 void ValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map,
150 const char *Name) const {
151
152 OS << "Map Name: " << Name << "\n";
153 OS << "Size: " << Map.size() << "\n";
154 for (ValueMapType::const_iterator I = Map.begin(),
155 E = Map.end(); I != E; ++I) {
156
157 const Value *V = I->first;
158 if (V->hasName())
159 OS << "Value: " << V->getName();
160 else
161 OS << "Value: [null]\n";
162 V->dump();
163
164 OS << " Uses(" << std::distance(V->use_begin(),V->use_end()) << "):";
165 for (const Use &U : V->uses()) {
166 if (&U != &*V->use_begin())
167 OS << ",";
168 if(U->hasName())
169 OS << " " << U->getName();
170 else
171 OS << " [null]";
172
173 }
174 OS << "\n\n";
175 }
176 }
177
print(llvm::raw_ostream & OS,const MetadataMapType & Map,const char * Name) const178 void ValueEnumerator::print(llvm::raw_ostream &OS, const MetadataMapType &Map,
179 const char *Name) const {
180
181 OS << "Map Name: " << Name << "\n";
182 OS << "Size: " << Map.size() << "\n";
183 for (auto I = Map.begin(), E = Map.end(); I != E; ++I) {
184 const llvm::Metadata *MD = I->first;
185 OS << "Metadata: slot = " << I->second << "\n";
186 MD->print(OS);
187 }
188 }
189
190
191 // Optimize constant ordering.
192 namespace {
193 struct CstSortPredicate {
194 ValueEnumerator &VE;
CstSortPredicatellvm_2_9_func::__anonbbe54d9a0111::CstSortPredicate195 explicit CstSortPredicate(ValueEnumerator &ve) : VE(ve) {}
operator ()llvm_2_9_func::__anonbbe54d9a0111::CstSortPredicate196 bool operator()(const std::pair<const Value*, unsigned> &LHS,
197 const std::pair<const Value*, unsigned> &RHS) {
198 // Sort by plane.
199 if (LHS.first->getType() != RHS.first->getType())
200 return VE.getTypeID(LHS.first->getType()) <
201 VE.getTypeID(RHS.first->getType());
202 // Then by frequency.
203 return LHS.second > RHS.second;
204 }
205 };
206 }
207
208 /// OptimizeConstants - Reorder constant pool for denser encoding.
OptimizeConstants(unsigned CstStart,unsigned CstEnd)209 void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
210 if (CstStart == CstEnd || CstStart+1 == CstEnd) return;
211
212 CstSortPredicate P(*this);
213 std::stable_sort(Values.begin()+CstStart, Values.begin()+CstEnd, P);
214
215 // Ensure that integer and vector of integer constants are at the start of the
216 // constant pool. This is important so that GEP structure indices come before
217 // gep constant exprs.
218 std::partition(Values.begin()+CstStart, Values.begin()+CstEnd,
219 isIntOrIntVectorValue);
220
221 // Rebuild the modified portion of ValueMap.
222 for (; CstStart != CstEnd; ++CstStart)
223 ValueMap[Values[CstStart].first] = CstStart+1;
224 }
225
226
227 /// EnumerateValueSymbolTable - Insert all of the values in the specified symbol
228 /// table into the values table.
EnumerateValueSymbolTable(const ValueSymbolTable & VST)229 void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
230 for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end();
231 VI != VE; ++VI)
232 EnumerateValue(VI->getValue());
233 }
234
235 /// EnumerateNamedMetadata - Insert all of the values referenced by
236 /// named metadata in the specified module.
EnumerateNamedMetadata(const llvm::Module & M)237 void ValueEnumerator::EnumerateNamedMetadata(const llvm::Module &M) {
238 for (llvm::Module::const_named_metadata_iterator I = M.named_metadata_begin(),
239 E = M.named_metadata_end();
240 I != E; ++I)
241 EnumerateNamedMDNode(&*I);
242 }
243
EnumerateNamedMDNode(const NamedMDNode * MD)244 void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) {
245 for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i)
246 EnumerateMetadata(MD->getOperand(i));
247 }
248
249 /// EnumerateMDNodeOperands - Enumerate all non-function-local values
250 /// and types referenced by the given MDNode.
EnumerateMDNodeOperands(const MDNode * N)251 void ValueEnumerator::EnumerateMDNodeOperands(const MDNode *N) {
252 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
253 Metadata *MD = N->getOperand(i);
254 if (!MD)
255 continue;
256 assert(!isa<LocalAsMetadata>(MD) && "MDNodes cannot be function-local");
257 EnumerateMetadata(MD);
258 }
259 }
260
EnumerateMetadata(const llvm::Metadata * MD)261 void ValueEnumerator::EnumerateMetadata(const llvm::Metadata *MD) {
262 assert(
263 (isa<MDNode>(MD) || isa<MDString>(MD) || isa<ConstantAsMetadata>(MD)) &&
264 "Invalid metadata kind");
265
266 // Insert a placeholder ID to block the co-recursive call to
267 // EnumerateMDNodeOperands() from re-visiting MD in a cyclic graph.
268 //
269 // Return early if there's already an ID.
270 if (!MDValueMap.insert(std::make_pair(MD, 0)).second)
271 return;
272
273 // Visit operands first to minimize RAUW.
274 if (auto *N = dyn_cast<MDNode>(MD))
275 EnumerateMDNodeOperands(N);
276 else if (auto *C = dyn_cast<ConstantAsMetadata>(MD))
277 EnumerateValue(C->getValue());
278
279 HasMDString |= isa<MDString>(MD);
280 HasDILocation |= isa<DILocation>(MD);
281
282 // Replace the placeholder ID inserted above with the correct one. MDValueMap may
283 // have changed by inserting operands, so we need a fresh lookup here.
284 MDs.push_back(MD);
285 MDValueMap[MD] = MDs.size();
286 }
287
288 /// EnumerateFunctionLocalMetadataa - Incorporate function-local metadata
289 /// information reachable from the metadata.
EnumerateFunctionLocalMetadata(const llvm::LocalAsMetadata * Local)290 void ValueEnumerator::EnumerateFunctionLocalMetadata(
291 const llvm::LocalAsMetadata *Local) {
292 // Check to see if it's already in!
293 unsigned &MDValueID = MDValueMap[Local];
294 if (MDValueID)
295 return;
296
297 MDs.push_back(Local);
298 MDValueID = MDs.size();
299
300 EnumerateValue(Local->getValue());
301
302 // Also, collect all function-local metadata for easy access.
303 FunctionLocalMDs.push_back(Local);
304 }
305
EnumerateValue(const Value * V)306 void ValueEnumerator::EnumerateValue(const Value *V) {
307 assert(!V->getType()->isVoidTy() && "Can't insert void values!");
308 assert(!isa<MetadataAsValue>(V) && "EnumerateValue doesn't handle Metadata!");
309
310 // Check to see if it's already in!
311 unsigned &ValueID = ValueMap[V];
312 if (ValueID) {
313 // Increment use count.
314 Values[ValueID-1].second++;
315 return;
316 }
317
318 // Enumerate the type of this value.
319 EnumerateType(V->getType());
320
321 if (const Constant *C = dyn_cast<Constant>(V)) {
322 if (isa<GlobalValue>(C)) {
323 // Initializers for globals are handled explicitly elsewhere.
324 } else if (C->getNumOperands()) {
325 // If a constant has operands, enumerate them. This makes sure that if a
326 // constant has uses (for example an array of const ints), that they are
327 // inserted also.
328
329 // We prefer to enumerate them with values before we enumerate the user
330 // itself. This makes it more likely that we can avoid forward references
331 // in the reader. We know that there can be no cycles in the constants
332 // graph that don't go through a global variable.
333 for (User::const_op_iterator I = C->op_begin(), E = C->op_end();
334 I != E; ++I)
335 if (!isa<BasicBlock>(*I)) // Don't enumerate BB operand to BlockAddress.
336 EnumerateValue(*I);
337
338 // Finally, add the value. Doing this could make the ValueID reference be
339 // dangling, don't reuse it.
340 Values.push_back(std::make_pair(V, 1U));
341 ValueMap[V] = Values.size();
342 return;
343 } else if (const ConstantDataSequential *CDS =
344 dyn_cast<ConstantDataSequential>(C)) {
345 // For our legacy handling of the new ConstantDataSequential type, we
346 // need to enumerate the individual elements, as well as mark the
347 // outer constant as used.
348 for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i)
349 EnumerateValue(CDS->getElementAsConstant(i));
350 Values.push_back(std::make_pair(V, 1U));
351 ValueMap[V] = Values.size();
352 return;
353 }
354 }
355
356 // Add the value.
357 Values.push_back(std::make_pair(V, 1U));
358 ValueID = Values.size();
359 }
360
361
EnumerateType(Type * Ty)362 void ValueEnumerator::EnumerateType(Type *Ty) {
363 unsigned *TypeID = &TypeMap[Ty];
364
365 // We've already seen this type.
366 if (*TypeID)
367 return;
368
369 // If it is a non-anonymous struct, mark the type as being visited so that we
370 // don't recursively visit it. This is safe because we allow forward
371 // references of these in the bitcode reader.
372 if (StructType *STy = dyn_cast<StructType>(Ty))
373 if (!STy->isLiteral())
374 *TypeID = ~0U;
375
376 // Enumerate all of the subtypes before we enumerate this type. This ensures
377 // that the type will be enumerated in an order that can be directly built.
378 for (Type *SubTy : Ty->subtypes())
379 EnumerateType(SubTy);
380
381 // Refresh the TypeID pointer in case the table rehashed.
382 TypeID = &TypeMap[Ty];
383
384 // Check to see if we got the pointer another way. This can happen when
385 // enumerating recursive types that hit the base case deeper than they start.
386 //
387 // If this is actually a struct that we are treating as forward ref'able,
388 // then emit the definition now that all of its contents are available.
389 if (*TypeID && *TypeID != ~0U)
390 return;
391
392 // Add this type now that its contents are all happily enumerated.
393 Types.push_back(Ty);
394
395 *TypeID = Types.size();
396 }
397
398 // Enumerate the types for the specified value. If the value is a constant,
399 // walk through it, enumerating the types of the constant.
EnumerateOperandType(const Value * V)400 void ValueEnumerator::EnumerateOperandType(const Value *V) {
401 EnumerateType(V->getType());
402
403 if (auto *MD = dyn_cast<MetadataAsValue>(V)) {
404 assert(!isa<LocalAsMetadata>(MD->getMetadata()) &&
405 "Function-local metadata should be left for later");
406
407 EnumerateMetadata(MD->getMetadata());
408 return;
409 }
410
411 const Constant *C = dyn_cast<Constant>(V);
412 if (!C)
413 return;
414
415 // If this constant is already enumerated, ignore it, we know its type must
416 // be enumerated.
417 if (ValueMap.count(C))
418 return;
419
420 // This constant may have operands, make sure to enumerate the types in
421 // them.
422 for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
423 const Value *Op = C->getOperand(i);
424
425 // Don't enumerate basic blocks here, this happens as operands to
426 // blockaddress.
427 if (isa<BasicBlock>(Op))
428 continue;
429
430 EnumerateOperandType(Op);
431 }
432 }
433
EnumerateAttributes(AttributeSet PAL)434 void ValueEnumerator::EnumerateAttributes(AttributeSet PAL) {
435 if (PAL.isEmpty()) return; // null is always 0.
436
437 // Do a lookup.
438 unsigned &Entry = AttributeMap[PAL];
439 if (Entry == 0) {
440 // Never saw this before, add it.
441 Attribute.push_back(PAL);
442 Entry = Attribute.size();
443 }
444
445 // Do lookups for all attribute groups.
446 for (unsigned i = 0, e = PAL.getNumSlots(); i != e; ++i) {
447 AttributeSet AS = PAL.getSlotAttributes(i);
448 unsigned &Entry = AttributeGroupMap[AS];
449 if (Entry == 0) {
450 AttributeGroups.push_back(AS);
451 Entry = AttributeGroups.size();
452 }
453 }
454 }
455
incorporateFunction(const Function & F)456 void ValueEnumerator::incorporateFunction(const Function &F) {
457 InstructionCount = 0;
458 NumModuleValues = Values.size();
459 NumModuleMDs = MDs.size();
460
461 // Adding function arguments to the value table.
462 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
463 I != E; ++I)
464 EnumerateValue(&*I);
465
466 FirstFuncConstantID = Values.size();
467
468 // Add all function-level constants to the value table.
469 for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
470 for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I)
471 for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
472 OI != E; ++OI) {
473 if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) ||
474 isa<InlineAsm>(*OI))
475 EnumerateValue(*OI);
476 }
477 BasicBlocks.push_back(&*BB);
478 ValueMap[&*BB] = BasicBlocks.size();
479 }
480
481 // Optimize the constant layout.
482 OptimizeConstants(FirstFuncConstantID, Values.size());
483
484 // Add the function's parameter attributes so they are available for use in
485 // the function's instruction.
486 EnumerateAttributes(F.getAttributes());
487
488 FirstInstID = Values.size();
489
490 SmallVector<llvm::LocalAsMetadata *, 8> FnLocalMDVector;
491 // Add all of the instructions.
492 for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
493 for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
494 for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
495 OI != E; ++OI) {
496 if (auto *MD = dyn_cast<llvm::MetadataAsValue>(&*OI))
497 if (auto *Local = dyn_cast<LocalAsMetadata>(MD->getMetadata()))
498 // Enumerate metadata after the instructions they might refer to.
499 FnLocalMDVector.push_back(Local);
500 }
501
502 if (!I->getType()->isVoidTy())
503 EnumerateValue(&*I);
504 }
505 }
506
507 // Add all of the function-local metadata.
508 for (unsigned i = 0, e = FnLocalMDVector.size(); i != e; ++i)
509 EnumerateFunctionLocalMetadata(FnLocalMDVector[i]);
510 }
511
purgeFunction()512 void ValueEnumerator::purgeFunction() {
513 /// Remove purged values from the ValueMap.
514 for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i)
515 ValueMap.erase(Values[i].first);
516 for (unsigned i = NumModuleMDs, e = MDs.size(); i != e; ++i)
517 MDValueMap.erase(MDs[i]);
518 for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i)
519 ValueMap.erase(BasicBlocks[i]);
520
521 Values.resize(NumModuleValues);
522 MDs.resize(NumModuleMDs);
523 BasicBlocks.clear();
524 FunctionLocalMDs.clear();
525 }
526
IncorporateFunctionInfoGlobalBBIDs(const Function * F,DenseMap<const BasicBlock *,unsigned> & IDMap)527 static void IncorporateFunctionInfoGlobalBBIDs(const Function *F,
528 DenseMap<const BasicBlock*, unsigned> &IDMap) {
529 unsigned Counter = 0;
530 for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
531 IDMap[&*BB] = ++Counter;
532 }
533
534 /// getGlobalBasicBlockID - This returns the function-specific ID for the
535 /// specified basic block. This is relatively expensive information, so it
536 /// should only be used by rare constructs such as address-of-label.
getGlobalBasicBlockID(const BasicBlock * BB) const537 unsigned ValueEnumerator::getGlobalBasicBlockID(const BasicBlock *BB) const {
538 unsigned &Idx = GlobalBasicBlockIDs[BB];
539 if (Idx != 0)
540 return Idx-1;
541
542 IncorporateFunctionInfoGlobalBBIDs(BB->getParent(), GlobalBasicBlockIDs);
543 return getGlobalBasicBlockID(BB);
544 }
545
546 } // end llvm_2_9_func namespace
547