The final fix to the raytracing program involves rounding modes. It turns
out x86 processors round floats by default, unlike C which has trained
me to expect truncation. Rather than mess with the MXCSR register, I added
another instruction for truncation. Now milestone 3 emits perfectly correct
results.
This commit is contained in:
Kartik Agaram 2020-10-05 10:16:53 -07:00
parent bb3ce6cdea
commit f13576b5d2
11 changed files with 22270 additions and 22157 deletions

View File

@ -111,10 +111,11 @@ case 0x2a: { // convert integer to float
:(before "End Initialize Op Names")
put_new(Name_f3_0f, "2d", "convert floating-point to int (cvtss2si)");
put_new(Name_f3_0f, "2c", "truncate floating-point to int (cvttss2si)");
:(code)
void test_cvtss2si() {
Xmm[0] = 10.0;
Xmm[0] = 9.8;
run(
"== code 0x1\n"
// op ModR/M SIB displacement immediate
@ -134,7 +135,34 @@ case 0x2d: { // convert float to integer
const uint8_t dest = (modrm>>3)&0x7;
trace(Callstack_depth+1, "run") << "convert x/m32 to " << rname(dest) << end();
const float* src = effective_address_float(modrm);
Reg[dest].i = *src;
Reg[dest].i = round(*src);
trace(Callstack_depth+1, "run") << rname(dest) << " is now 0x" << HEXWORD << Reg[dest].i << end();
break;
}
:(code)
void test_cvttss2si() {
Xmm[0] = 9.8;
run(
"== code 0x1\n"
// op ModR/M SIB displacement immediate
"f3 0f 2c c0 \n"
// ModR/M in binary: 11 (direct mode) 000 (EAX) 000 (XMM0)
);
CHECK_TRACE_CONTENTS(
"run: truncate x/m32 to EAX\n"
"run: x/m32 is XMM0\n"
"run: EAX is now 0x00000009\n"
);
}
:(before "End Three-Byte Opcodes Starting With f3 0f")
case 0x2c: { // truncate float to integer
const uint8_t modrm = next();
const uint8_t dest = (modrm>>3)&0x7;
trace(Callstack_depth+1, "run") << "truncate x/m32 to " << rname(dest) << end();
const float* src = effective_address_float(modrm);
Reg[dest].i = trunc(*src);
trace(Callstack_depth+1, "run") << rname(dest) << " is now 0x" << HEXWORD << Reg[dest].i << end();
break;
}

View File

@ -701,6 +701,7 @@ map</*op*/string, /*bitvector*/uint8_t> Permitted_arguments_f3_0f;
put_new(Permitted_arguments_f3_0f, "10", 0x01); // copy xm32 to x32
put_new(Permitted_arguments_f3_0f, "11", 0x01); // copy x32 to xm32
put_new(Permitted_arguments_f3_0f, "2a", 0x01); // convert-to-float
put_new(Permitted_arguments_f3_0f, "2c", 0x01); // truncate-to-int
put_new(Permitted_arguments_f3_0f, "2d", 0x01); // convert-to-int
put_new(Permitted_arguments_f3_0f, "51", 0x01); // square root
put_new(Permitted_arguments_f3_0f, "52", 0x01); // inverse square root

BIN
apps/mu

Binary file not shown.

View File

@ -19833,6 +19833,46 @@ _Primitive-convert-xreg-to-reg: # (payload primitive)
1/imm32/xm32-is-first-inout
0/imm32/no-x32
0x11/imm32/alloc-id:fake
_Primitive-truncate-xmem-to-reg/imm32/next
_Primitive-truncate-xmem-to-reg: # (payload primitive)
0x11/imm32/alloc-id:fake:payload
# var1/reg <- truncate var2 => f3 0f 2c/truncate-to-int var2/xm32 var1/r32
0x11/imm32/alloc-id:fake
_string-truncate/imm32/name
0x11/imm32/alloc-id:fake
Single-float-var-in-mem/imm32/inouts
0x11/imm32/alloc-id:fake
Single-int-var-in-some-register/imm32/outputs
0x11/imm32/alloc-id:fake
_string_f3_0f_2c_truncate_to_int/imm32/subx-name
0/imm32/no-rm32
3/imm32/r32-is-first-output
0/imm32/no-imm32
0/imm32/no-imm8
0/imm32/no-disp32
1/imm32/xm32-is-first-inout
0/imm32/no-x32
0x11/imm32/alloc-id:fake
_Primitive-truncate-xreg-to-reg/imm32/next
_Primitive-truncate-xreg-to-reg: # (payload primitive)
0x11/imm32/alloc-id:fake:payload
# var1/reg <- truncate var2/xreg => f3 0f 2c/truncate-to-int var2/xm32 var1/r32
0x11/imm32/alloc-id:fake
_string-truncate/imm32/name
0x11/imm32/alloc-id:fake
Single-float-var-in-some-register/imm32/inouts
0x11/imm32/alloc-id:fake
Single-int-var-in-some-register/imm32/outputs
0x11/imm32/alloc-id:fake
_string_f3_0f_2c_truncate_to_int/imm32/subx-name
0/imm32/no-rm32
3/imm32/r32-is-first-output
0/imm32/no-imm32
0/imm32/no-imm8
0/imm32/no-disp32
1/imm32/xm32-is-first-inout
0/imm32/no-x32
0x11/imm32/alloc-id:fake
_Primitive-reinterpret-xmem-as-reg/imm32/next
# - reinterpret bytes (just for debugging)
_Primitive-reinterpret-xmem-as-reg: # (payload primitive)
@ -21732,6 +21772,11 @@ _string-convert: # (payload array byte)
# "convert"
0x7/imm32/size
0x63/c 0x6f/o 0x6e/n 0x76/v 0x65/e 0x72/r 0x74/t
_string-truncate: # (payload array byte)
0x11/imm32/alloc-id:fake:payload
# "truncate"
0x8/imm32/size
0x74/t 0x72/r 0x75/u 0x6e/n 0x63/c 0x61/a 0x74/t 0x65/e
_string-reinterpret: # (payload array byte)
0x11/imm32/alloc-id:fake:payload
# "reinterpret"
@ -22014,6 +22059,11 @@ _string_f3_0f_2d_convert_to_int:
# "f3 0f 2d/convert-to-int"
0x17/imm32/size
0x66/f 0x33/3 0x20/space 0x30/0 0x66/f 0x20/space 0x32/2 0x64/d 0x2f/slash 0x63/c 0x6f/o 0x6e/n 0x76/v 0x65/e 0x72/r 0x74/t 0x2d/dash 0x74/t 0x6f/o 0x2d/dash 0x69/i 0x6e/n 0x74/t
_string_f3_0f_2c_truncate_to_int:
0x11/imm32/alloc-id:fake:payload
# "f3 0f 2c/truncate-to-int"
0x18/imm32/size
0x66/f 0x33/3 0x20/space 0x30/0 0x66/f 0x20/space 0x32/2 0x63/c 0x2f/slash 0x74/t 0x72/r 0x75/u 0x6e/n 0x63/c 0x61/a 0x74/t 0x65/e 0x2d/dash 0x74/t 0x6f/o 0x2d/dash 0x69/i 0x6e/n 0x74/t
_string_f3_0f_58_add:
0x11/imm32/alloc-id:fake:payload
# "f3 0f 58/add"

View File

@ -212,6 +212,8 @@ fn main -> exit-status/ebx: int {
ray-color r, c
# write color
print-rgb 0, c
#? print-rgb-raw 0, c
#? print-string 0, "\n"
}
i <- increment
loop
@ -257,27 +259,28 @@ type rgb {
# print translating to [0, 256)
fn print-rgb screen: (addr screen), _c: (addr rgb) {
var c/esi: (addr rgb) <- copy _c
var n/ecx: int <- copy 0xff # turns out 255 works just as well as 255.999, which is lucky because we don't have floating-point literals
var xn/xmm1: float <- convert n
# print 255 * c->r
var xn: float
var xn-addr/ecx: (addr float) <- address xn
fill-in-rational xn-addr, 0x3e7ff, 0x3e8 # 255999 / 1000
# print 255.999 * c->r
var result/xmm0: float <- copy xn
var src-addr/eax: (addr float) <- get c, r
result <- multiply *src-addr
var result-int/edx: int <- convert result
var result-int/edx: int <- truncate result
print-int32-decimal screen, result-int
print-string screen, " "
# print 255 * c->g
# print 255.999 * c->g
src-addr <- get c, g
result <- copy xn
result <- multiply *src-addr
result-int <- convert result
result-int <- truncate result
print-int32-decimal screen, result-int
print-string screen, " "
# print 255 * c->b
# print 255.999 * c->b
src-addr <- get c, b
result <- copy xn
result <- multiply *src-addr
result-int <- convert result
result-int <- truncate result
print-int32-decimal screen, result-int
print-string screen, "\n"
}

File diff suppressed because it is too large Load Diff

View File

@ -94,6 +94,14 @@ int main() {
//? std::cerr << '\n';
color pixel_color = ray_color(r);
//? std::cerr << "pixel color: " << pixel_color.x() << " " << pixel_color.y() << " " << pixel_color.z() << '\n';
//? std::cout << "(";
//? p(std::cout, pixel_color.x());
//? std::cout << ", ";
//? p(std::cout, pixel_color.y());
//? std::cout << ", ";
//? p(std::cout, pixel_color.z());
//? std::cout << ")\n";
write_color(std::cout, pixel_color);
//? break;
}

View File

@ -351,10 +351,19 @@ var/xreg <span class="SpecialChar">&lt;-</span> convert var2/reg2 =&gt; <spa
var/xreg <span class="SpecialChar">&lt;-</span> convert var2 =&gt; <span class="Constant">&quot;f3 0f 2a/convert-to-float *(ebp+&quot;</span> var2.stack-offset <span class="Constant">&quot;) &quot;</span> xreg <span class="Constant">&quot;/x32&quot;</span>
var/xreg <span class="SpecialChar">&lt;-</span> convert *var2/reg2 =&gt; <span class="Constant">&quot;f3 0f 2a/convert-to-float *&quot;</span> reg2 <span class="Constant">&quot; &quot;</span> xreg <span class="Constant">&quot;/x32&quot;</span>
Converting floats to ints performs rounding by default. (We don't mess with the
MXCSR control register.)
var/reg <span class="SpecialChar">&lt;-</span> convert var2/xreg2 =&gt; <span class="Constant">&quot;f3 0f 2d/convert-to-int %&quot;</span> xreg2 <span class="Constant">&quot; &quot;</span> reg <span class="Constant">&quot;/r32&quot;</span>
var/reg <span class="SpecialChar">&lt;-</span> convert var2 =&gt; <span class="Constant">&quot;f3 0f 2d/convert-to-int *(ebp+&quot;</span> var2.stack-offset <span class="Constant">&quot;) &quot;</span> reg <span class="Constant">&quot;/r32&quot;</span>
var/reg <span class="SpecialChar">&lt;-</span> convert *var2/reg2 =&gt; <span class="Constant">&quot;f3 0f 2d/convert-to-int *&quot;</span> reg2 <span class="Constant">&quot; &quot;</span> reg <span class="Constant">&quot;/r32&quot;</span>
There's a separate instruction for truncating the fractional part.
var/reg <span class="SpecialChar">&lt;-</span> truncate var2/xreg2 =&gt; <span class="Constant">&quot;f3 0f 2c/truncate-to-int %&quot;</span> xreg2 <span class="Constant">&quot; &quot;</span> reg <span class="Constant">&quot;/r32&quot;</span>
var/reg <span class="SpecialChar">&lt;-</span> truncate var2 =&gt; <span class="Constant">&quot;f3 0f 2c/truncate-to-int *(ebp+&quot;</span> var2.stack-offset <span class="Constant">&quot;) &quot;</span> reg <span class="Constant">&quot;/r32&quot;</span>
var/reg <span class="SpecialChar">&lt;-</span> truncate *var2/reg2 =&gt; <span class="Constant">&quot;f3 0f 2c/truncate-to-int *&quot;</span> reg2 <span class="Constant">&quot; &quot;</span> reg <span class="Constant">&quot;/r32&quot;</span>
There are no instructions accepting floating-point literals. To obtain integer
literals in floating-point registers, copy them to general-purpose registers
and then convert them to floating-point.

4
mu.md
View File

@ -283,6 +283,10 @@ var/xreg <- convert *var2/reg2
var/reg <- convert var2/xreg2
var/reg <- convert var2
var/reg <- convert *var2/reg2
var/reg <- truncate var2/xreg2
var/reg <- truncate var2
var/reg <- truncate *var2/reg2
```
There are no instructions accepting floating-point literals. To obtain integer

View File

@ -327,10 +327,19 @@ var/xreg <- convert var2/reg2 => "f3 0f 2a/convert-to-float %" reg2 " " xreg
var/xreg <- convert var2 => "f3 0f 2a/convert-to-float *(ebp+" var2.stack-offset ") " xreg "/x32"
var/xreg <- convert *var2/reg2 => "f3 0f 2a/convert-to-float *" reg2 " " xreg "/x32"
Converting floats to ints performs rounding by default. (We don't mess with the
MXCSR control register.)
var/reg <- convert var2/xreg2 => "f3 0f 2d/convert-to-int %" xreg2 " " reg "/r32"
var/reg <- convert var2 => "f3 0f 2d/convert-to-int *(ebp+" var2.stack-offset ") " reg "/r32"
var/reg <- convert *var2/reg2 => "f3 0f 2d/convert-to-int *" reg2 " " reg "/r32"
There's a separate instruction for truncating the fractional part.
var/reg <- truncate var2/xreg2 => "f3 0f 2c/truncate-to-int %" xreg2 " " reg "/r32"
var/reg <- truncate var2 => "f3 0f 2c/truncate-to-int *(ebp+" var2.stack-offset ") " reg "/r32"
var/reg <- truncate *var2/reg2 => "f3 0f 2c/truncate-to-int *" reg2 " " reg "/r32"
There are no instructions accepting floating-point literals. To obtain integer
literals in floating-point registers, copy them to general-purpose registers
and then convert them to floating-point.

View File

@ -115,6 +115,7 @@ Opcodes currently supported by SubX:
f3 0f 10: copy xm32 to x32 (movss)
f3 0f 11: copy x32 to xm32 (movss)
f3 0f 2a: convert integer to floating-point (cvtsi2ss)
f3 0f 2c: truncate floating-point to int (cvttss2si)
f3 0f 2d: convert floating-point to int (cvtss2si)
f3 0f 51: square root of float (sqrtss)
f3 0f 52: inverse square root of float (rsqrtss)