6957
The final fix to the raytracing program involves rounding modes. It turns out x86 processors round floats by default, unlike C which has trained me to expect truncation. Rather than mess with the MXCSR register, I added another instruction for truncation. Now milestone 3 emits perfectly correct results.
This commit is contained in:
parent
bb3ce6cdea
commit
f13576b5d2
32
023float.cc
32
023float.cc
|
@ -111,10 +111,11 @@ case 0x2a: { // convert integer to float
|
|||
|
||||
:(before "End Initialize Op Names")
|
||||
put_new(Name_f3_0f, "2d", "convert floating-point to int (cvtss2si)");
|
||||
put_new(Name_f3_0f, "2c", "truncate floating-point to int (cvttss2si)");
|
||||
|
||||
:(code)
|
||||
void test_cvtss2si() {
|
||||
Xmm[0] = 10.0;
|
||||
Xmm[0] = 9.8;
|
||||
run(
|
||||
"== code 0x1\n"
|
||||
// op ModR/M SIB displacement immediate
|
||||
|
@ -134,7 +135,34 @@ case 0x2d: { // convert float to integer
|
|||
const uint8_t dest = (modrm>>3)&0x7;
|
||||
trace(Callstack_depth+1, "run") << "convert x/m32 to " << rname(dest) << end();
|
||||
const float* src = effective_address_float(modrm);
|
||||
Reg[dest].i = *src;
|
||||
Reg[dest].i = round(*src);
|
||||
trace(Callstack_depth+1, "run") << rname(dest) << " is now 0x" << HEXWORD << Reg[dest].i << end();
|
||||
break;
|
||||
}
|
||||
|
||||
:(code)
|
||||
void test_cvttss2si() {
|
||||
Xmm[0] = 9.8;
|
||||
run(
|
||||
"== code 0x1\n"
|
||||
// op ModR/M SIB displacement immediate
|
||||
"f3 0f 2c c0 \n"
|
||||
// ModR/M in binary: 11 (direct mode) 000 (EAX) 000 (XMM0)
|
||||
);
|
||||
CHECK_TRACE_CONTENTS(
|
||||
"run: truncate x/m32 to EAX\n"
|
||||
"run: x/m32 is XMM0\n"
|
||||
"run: EAX is now 0x00000009\n"
|
||||
);
|
||||
}
|
||||
|
||||
:(before "End Three-Byte Opcodes Starting With f3 0f")
|
||||
case 0x2c: { // truncate float to integer
|
||||
const uint8_t modrm = next();
|
||||
const uint8_t dest = (modrm>>3)&0x7;
|
||||
trace(Callstack_depth+1, "run") << "truncate x/m32 to " << rname(dest) << end();
|
||||
const float* src = effective_address_float(modrm);
|
||||
Reg[dest].i = trunc(*src);
|
||||
trace(Callstack_depth+1, "run") << rname(dest) << " is now 0x" << HEXWORD << Reg[dest].i << end();
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -701,6 +701,7 @@ map</*op*/string, /*bitvector*/uint8_t> Permitted_arguments_f3_0f;
|
|||
put_new(Permitted_arguments_f3_0f, "10", 0x01); // copy xm32 to x32
|
||||
put_new(Permitted_arguments_f3_0f, "11", 0x01); // copy x32 to xm32
|
||||
put_new(Permitted_arguments_f3_0f, "2a", 0x01); // convert-to-float
|
||||
put_new(Permitted_arguments_f3_0f, "2c", 0x01); // truncate-to-int
|
||||
put_new(Permitted_arguments_f3_0f, "2d", 0x01); // convert-to-int
|
||||
put_new(Permitted_arguments_f3_0f, "51", 0x01); // square root
|
||||
put_new(Permitted_arguments_f3_0f, "52", 0x01); // inverse square root
|
||||
|
|
50
apps/mu.subx
50
apps/mu.subx
|
@ -19833,6 +19833,46 @@ _Primitive-convert-xreg-to-reg: # (payload primitive)
|
|||
1/imm32/xm32-is-first-inout
|
||||
0/imm32/no-x32
|
||||
0x11/imm32/alloc-id:fake
|
||||
_Primitive-truncate-xmem-to-reg/imm32/next
|
||||
_Primitive-truncate-xmem-to-reg: # (payload primitive)
|
||||
0x11/imm32/alloc-id:fake:payload
|
||||
# var1/reg <- truncate var2 => f3 0f 2c/truncate-to-int var2/xm32 var1/r32
|
||||
0x11/imm32/alloc-id:fake
|
||||
_string-truncate/imm32/name
|
||||
0x11/imm32/alloc-id:fake
|
||||
Single-float-var-in-mem/imm32/inouts
|
||||
0x11/imm32/alloc-id:fake
|
||||
Single-int-var-in-some-register/imm32/outputs
|
||||
0x11/imm32/alloc-id:fake
|
||||
_string_f3_0f_2c_truncate_to_int/imm32/subx-name
|
||||
0/imm32/no-rm32
|
||||
3/imm32/r32-is-first-output
|
||||
0/imm32/no-imm32
|
||||
0/imm32/no-imm8
|
||||
0/imm32/no-disp32
|
||||
1/imm32/xm32-is-first-inout
|
||||
0/imm32/no-x32
|
||||
0x11/imm32/alloc-id:fake
|
||||
_Primitive-truncate-xreg-to-reg/imm32/next
|
||||
_Primitive-truncate-xreg-to-reg: # (payload primitive)
|
||||
0x11/imm32/alloc-id:fake:payload
|
||||
# var1/reg <- truncate var2/xreg => f3 0f 2c/truncate-to-int var2/xm32 var1/r32
|
||||
0x11/imm32/alloc-id:fake
|
||||
_string-truncate/imm32/name
|
||||
0x11/imm32/alloc-id:fake
|
||||
Single-float-var-in-some-register/imm32/inouts
|
||||
0x11/imm32/alloc-id:fake
|
||||
Single-int-var-in-some-register/imm32/outputs
|
||||
0x11/imm32/alloc-id:fake
|
||||
_string_f3_0f_2c_truncate_to_int/imm32/subx-name
|
||||
0/imm32/no-rm32
|
||||
3/imm32/r32-is-first-output
|
||||
0/imm32/no-imm32
|
||||
0/imm32/no-imm8
|
||||
0/imm32/no-disp32
|
||||
1/imm32/xm32-is-first-inout
|
||||
0/imm32/no-x32
|
||||
0x11/imm32/alloc-id:fake
|
||||
_Primitive-reinterpret-xmem-as-reg/imm32/next
|
||||
# - reinterpret bytes (just for debugging)
|
||||
_Primitive-reinterpret-xmem-as-reg: # (payload primitive)
|
||||
|
@ -21732,6 +21772,11 @@ _string-convert: # (payload array byte)
|
|||
# "convert"
|
||||
0x7/imm32/size
|
||||
0x63/c 0x6f/o 0x6e/n 0x76/v 0x65/e 0x72/r 0x74/t
|
||||
_string-truncate: # (payload array byte)
|
||||
0x11/imm32/alloc-id:fake:payload
|
||||
# "truncate"
|
||||
0x8/imm32/size
|
||||
0x74/t 0x72/r 0x75/u 0x6e/n 0x63/c 0x61/a 0x74/t 0x65/e
|
||||
_string-reinterpret: # (payload array byte)
|
||||
0x11/imm32/alloc-id:fake:payload
|
||||
# "reinterpret"
|
||||
|
@ -22014,6 +22059,11 @@ _string_f3_0f_2d_convert_to_int:
|
|||
# "f3 0f 2d/convert-to-int"
|
||||
0x17/imm32/size
|
||||
0x66/f 0x33/3 0x20/space 0x30/0 0x66/f 0x20/space 0x32/2 0x64/d 0x2f/slash 0x63/c 0x6f/o 0x6e/n 0x76/v 0x65/e 0x72/r 0x74/t 0x2d/dash 0x74/t 0x6f/o 0x2d/dash 0x69/i 0x6e/n 0x74/t
|
||||
_string_f3_0f_2c_truncate_to_int:
|
||||
0x11/imm32/alloc-id:fake:payload
|
||||
# "f3 0f 2c/truncate-to-int"
|
||||
0x18/imm32/size
|
||||
0x66/f 0x33/3 0x20/space 0x30/0 0x66/f 0x20/space 0x32/2 0x63/c 0x2f/slash 0x74/t 0x72/r 0x75/u 0x6e/n 0x63/c 0x61/a 0x74/t 0x65/e 0x2d/dash 0x74/t 0x6f/o 0x2d/dash 0x69/i 0x6e/n 0x74/t
|
||||
_string_f3_0f_58_add:
|
||||
0x11/imm32/alloc-id:fake:payload
|
||||
# "f3 0f 58/add"
|
||||
|
|
|
@ -212,6 +212,8 @@ fn main -> exit-status/ebx: int {
|
|||
ray-color r, c
|
||||
# write color
|
||||
print-rgb 0, c
|
||||
#? print-rgb-raw 0, c
|
||||
#? print-string 0, "\n"
|
||||
}
|
||||
i <- increment
|
||||
loop
|
||||
|
@ -257,27 +259,28 @@ type rgb {
|
|||
# print translating to [0, 256)
|
||||
fn print-rgb screen: (addr screen), _c: (addr rgb) {
|
||||
var c/esi: (addr rgb) <- copy _c
|
||||
var n/ecx: int <- copy 0xff # turns out 255 works just as well as 255.999, which is lucky because we don't have floating-point literals
|
||||
var xn/xmm1: float <- convert n
|
||||
# print 255 * c->r
|
||||
var xn: float
|
||||
var xn-addr/ecx: (addr float) <- address xn
|
||||
fill-in-rational xn-addr, 0x3e7ff, 0x3e8 # 255999 / 1000
|
||||
# print 255.999 * c->r
|
||||
var result/xmm0: float <- copy xn
|
||||
var src-addr/eax: (addr float) <- get c, r
|
||||
result <- multiply *src-addr
|
||||
var result-int/edx: int <- convert result
|
||||
var result-int/edx: int <- truncate result
|
||||
print-int32-decimal screen, result-int
|
||||
print-string screen, " "
|
||||
# print 255 * c->g
|
||||
# print 255.999 * c->g
|
||||
src-addr <- get c, g
|
||||
result <- copy xn
|
||||
result <- multiply *src-addr
|
||||
result-int <- convert result
|
||||
result-int <- truncate result
|
||||
print-int32-decimal screen, result-int
|
||||
print-string screen, " "
|
||||
# print 255 * c->b
|
||||
# print 255.999 * c->b
|
||||
src-addr <- get c, b
|
||||
result <- copy xn
|
||||
result <- multiply *src-addr
|
||||
result-int <- convert result
|
||||
result-int <- truncate result
|
||||
print-int32-decimal screen, result-int
|
||||
print-string screen, "\n"
|
||||
}
|
||||
|
|
44294
apps/raytracing/3.ppm
44294
apps/raytracing/3.ppm
File diff suppressed because it is too large
Load Diff
|
@ -94,6 +94,14 @@ int main() {
|
|||
//? std::cerr << '\n';
|
||||
color pixel_color = ray_color(r);
|
||||
//? std::cerr << "pixel color: " << pixel_color.x() << " " << pixel_color.y() << " " << pixel_color.z() << '\n';
|
||||
|
||||
//? std::cout << "(";
|
||||
//? p(std::cout, pixel_color.x());
|
||||
//? std::cout << ", ";
|
||||
//? p(std::cout, pixel_color.y());
|
||||
//? std::cout << ", ";
|
||||
//? p(std::cout, pixel_color.z());
|
||||
//? std::cout << ")\n";
|
||||
write_color(std::cout, pixel_color);
|
||||
//? break;
|
||||
}
|
||||
|
|
|
@ -351,10 +351,19 @@ var/xreg <span class="SpecialChar"><-</span> convert var2/reg2 => <spa
|
|||
var/xreg <span class="SpecialChar"><-</span> convert var2 => <span class="Constant">"f3 0f 2a/convert-to-float *(ebp+"</span> var2.stack-offset <span class="Constant">") "</span> xreg <span class="Constant">"/x32"</span>
|
||||
var/xreg <span class="SpecialChar"><-</span> convert *var2/reg2 => <span class="Constant">"f3 0f 2a/convert-to-float *"</span> reg2 <span class="Constant">" "</span> xreg <span class="Constant">"/x32"</span>
|
||||
|
||||
Converting floats to ints performs rounding by default. (We don't mess with the
|
||||
MXCSR control register.)
|
||||
|
||||
var/reg <span class="SpecialChar"><-</span> convert var2/xreg2 => <span class="Constant">"f3 0f 2d/convert-to-int %"</span> xreg2 <span class="Constant">" "</span> reg <span class="Constant">"/r32"</span>
|
||||
var/reg <span class="SpecialChar"><-</span> convert var2 => <span class="Constant">"f3 0f 2d/convert-to-int *(ebp+"</span> var2.stack-offset <span class="Constant">") "</span> reg <span class="Constant">"/r32"</span>
|
||||
var/reg <span class="SpecialChar"><-</span> convert *var2/reg2 => <span class="Constant">"f3 0f 2d/convert-to-int *"</span> reg2 <span class="Constant">" "</span> reg <span class="Constant">"/r32"</span>
|
||||
|
||||
There's a separate instruction for truncating the fractional part.
|
||||
|
||||
var/reg <span class="SpecialChar"><-</span> truncate var2/xreg2 => <span class="Constant">"f3 0f 2c/truncate-to-int %"</span> xreg2 <span class="Constant">" "</span> reg <span class="Constant">"/r32"</span>
|
||||
var/reg <span class="SpecialChar"><-</span> truncate var2 => <span class="Constant">"f3 0f 2c/truncate-to-int *(ebp+"</span> var2.stack-offset <span class="Constant">") "</span> reg <span class="Constant">"/r32"</span>
|
||||
var/reg <span class="SpecialChar"><-</span> truncate *var2/reg2 => <span class="Constant">"f3 0f 2c/truncate-to-int *"</span> reg2 <span class="Constant">" "</span> reg <span class="Constant">"/r32"</span>
|
||||
|
||||
There are no instructions accepting floating-point literals. To obtain integer
|
||||
literals in floating-point registers, copy them to general-purpose registers
|
||||
and then convert them to floating-point.
|
||||
|
|
4
mu.md
4
mu.md
|
@ -283,6 +283,10 @@ var/xreg <- convert *var2/reg2
|
|||
var/reg <- convert var2/xreg2
|
||||
var/reg <- convert var2
|
||||
var/reg <- convert *var2/reg2
|
||||
|
||||
var/reg <- truncate var2/xreg2
|
||||
var/reg <- truncate var2
|
||||
var/reg <- truncate *var2/reg2
|
||||
```
|
||||
|
||||
There are no instructions accepting floating-point literals. To obtain integer
|
||||
|
|
|
@ -327,10 +327,19 @@ var/xreg <- convert var2/reg2 => "f3 0f 2a/convert-to-float %" reg2 " " xreg
|
|||
var/xreg <- convert var2 => "f3 0f 2a/convert-to-float *(ebp+" var2.stack-offset ") " xreg "/x32"
|
||||
var/xreg <- convert *var2/reg2 => "f3 0f 2a/convert-to-float *" reg2 " " xreg "/x32"
|
||||
|
||||
Converting floats to ints performs rounding by default. (We don't mess with the
|
||||
MXCSR control register.)
|
||||
|
||||
var/reg <- convert var2/xreg2 => "f3 0f 2d/convert-to-int %" xreg2 " " reg "/r32"
|
||||
var/reg <- convert var2 => "f3 0f 2d/convert-to-int *(ebp+" var2.stack-offset ") " reg "/r32"
|
||||
var/reg <- convert *var2/reg2 => "f3 0f 2d/convert-to-int *" reg2 " " reg "/r32"
|
||||
|
||||
There's a separate instruction for truncating the fractional part.
|
||||
|
||||
var/reg <- truncate var2/xreg2 => "f3 0f 2c/truncate-to-int %" xreg2 " " reg "/r32"
|
||||
var/reg <- truncate var2 => "f3 0f 2c/truncate-to-int *(ebp+" var2.stack-offset ") " reg "/r32"
|
||||
var/reg <- truncate *var2/reg2 => "f3 0f 2c/truncate-to-int *" reg2 " " reg "/r32"
|
||||
|
||||
There are no instructions accepting floating-point literals. To obtain integer
|
||||
literals in floating-point registers, copy them to general-purpose registers
|
||||
and then convert them to floating-point.
|
||||
|
|
|
@ -115,6 +115,7 @@ Opcodes currently supported by SubX:
|
|||
f3 0f 10: copy xm32 to x32 (movss)
|
||||
f3 0f 11: copy x32 to xm32 (movss)
|
||||
f3 0f 2a: convert integer to floating-point (cvtsi2ss)
|
||||
f3 0f 2c: truncate floating-point to int (cvttss2si)
|
||||
f3 0f 2d: convert floating-point to int (cvtss2si)
|
||||
f3 0f 51: square root of float (sqrtss)
|
||||
f3 0f 52: inverse square root of float (rsqrtss)
|
||||
|
|
Loading…
Reference in New Issue