https://github.com/akkartik/mu/blob/main/035compute_segment_address.cc
 1 //: ELF binaries have finicky rules about the precise alignment each segment
 2 //: should start at. They depend on the amount of code in a program.
 3 //: We shouldn't expect people to adjust segment addresses everytime they make
 4 //: a change to their programs.
 5 //: Let's start taking the given segment addresses as guidelines, and adjust
 6 //: them as necessary.
 7 //: This gives up a measure of control in placing code and data.
 8 
 9 void test_segment_name() {
10   run(
11       "== code 0x09000000\n"
12       "05/add-to-EAX  0x0d0c0b0a/imm32\n"
13       // code starts at 0x09000000 + p_offset, which is 0x54 for a single-segment binary
14   );
15   CHECK_TRACE_CONTENTS(
16       "load: 0x09000054 -> 05\n"
17       "load: 0x09000055 -> 0a\n"
18       "load: 0x09000056 -> 0b\n"
19       "load: 0x09000057 -> 0c\n"
20       "load: 0x09000058 -> 0d\n"
21       "run: add imm32 0x0d0c0b0a to EAX\n"
22       "run: storing 0x0d0c0b0a\n"
23   );
24 }
25 
26 //: compute segment address
27 
28 :(before "End Transforms")
29 Transform.push_back(compute_segment_starts);
30 
31 :(code)
32 void compute_segment_starts(program& p) {
33   trace(3, "transform") << "-- compute segment addresses" << end();
34   uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/;
35   for (size_t i = 0;  i < p.segments.size();  ++i) {
36     segment& curr = p.segments.at(i);
37     if (curr.start >= 0x08000000) {
38       // valid address for user space, so assume we're creating a real ELF binary, not just running a test
39       curr.start &= 0xfffff000;  // same number of zeros as the p_align used when emitting the ELF binary
40       curr.start |= (p_offset & 0xfff);
41       trace(99, "transform") << "segment " << i << " begins at address 0x" << HEXWORD << curr.start << end();
42     }
43     p_offset += size_of(curr);
44     assert(p_offset < SEGMENT_ALIGNMENT);  // for now we get less and less available space in each successive segment
45   }
46 }
47 
48 uint32_t size_of(const segment& s) {
49   uint32_t sum = 0;
50   for (int i = 0;  i < SIZE(s.lines);  ++i)
51     sum += num_bytes(s.lines.at(i));
52   return sum;
53 }
54 
55 // Assumes all bitfields are packed.
56 uint32_t num_bytes(const line& inst) {
57   uint32_t sum = 0;
58   for (int i = 0;  i < SIZE(inst.words);  ++i)
59     sum += size_of(inst.words.at(i));
60   return sum;
61 }
62 
63 int size_of(const word& w) {
64   if (has_argument_metadata(w, "disp32") || has_argument_metadata(w, "imm32"))
65     return 4;
66   else if (has_argument_metadata(w, "disp16"))
67     return 2;
68   // End size_of(word w) Special-cases
69   else
70     return 1;
71 }
72 
73 //: Dependencies:
74 //: - We'd like to compute segment addresses before setting up global variables,
75 //:   because computing addresses for global variables requires knowing where
76 //:   the data segment starts.
77 //: - We'd like to finish expanding labels before computing segment addresses,
78 //:   because it would make computing the sizes of segments more self-contained
79 //:   (num_bytes).
80 //:
81 //: Decision: compute segment addresses before expanding labels, by being
82 //: aware in this layer of certain argument types that will eventually occupy
83 //: multiple bytes.
84 //:
85 //: The layer to expand labels later hooks into num_bytes() to teach this
86 //: layer that labels occupy zero space in the binary.