From: Chris Duncan Date: Tue, 14 Jan 2025 18:46:26 +0000 (-0800) Subject: Replace more 32-bit scalar rotations with vector rotations. X-Git-Tag: v2.0.0~120 X-Git-Url: https://git.codecow.com/?a=commitdiff_plain;h=09ab6f27fce2e8a475b78a315b1ecd9e78bbab3f;p=nano-pow.git Replace more 32-bit scalar rotations with vector rotations. --- diff --git a/src/shaders/compute.wgsl b/src/shaders/compute.wgsl index a484a8d..b26bb20 100644 --- a/src/shaders/compute.wgsl +++ b/src/shaders/compute.wgsl @@ -518,10 +518,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v30 ^ v0; - xor1 = v31 ^ v1; - v30 = xor1; - v31 = xor0; + v_01.x = v0; + v_01.y = v1; + v_3031.x = v30; + v_3031.y = v31; + v_3031 = v_3031 ^ v_01; + v30 = v_3031.y; + v31 = v_3031.x; // c = c + d v_2021.x = v20; @@ -1267,10 +1270,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v1 = o1; // d = rotr64(d ^ a, 32) - xor0 = v30 ^ v0; - xor1 = v31 ^ v1; - v30 = xor1; - v31 = xor0; + v_01.x = v0; + v_01.y = v1; + v_3031.x = v30; + v_3031.y = v31; + v_3031 = v_3031 ^ v_01; + v30 = v_3031.y; + v31 = v_3031.x; // c = c + d v_2021.x = v20; @@ -1998,10 +2004,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v30 ^ v0; - xor1 = v31 ^ v1; - v30 = xor1; - v31 = xor0; + v_01.x = v0; + v_01.y = v1; + v_3031.x = v30; + v_3031.y = v31; + v_3031 = v_3031 ^ v_01; + v30 = v_3031.y; + v31 = v_3031.x; // c = c + d v_2021.x = v20; @@ -2735,10 +2744,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v1 = o1; // d = rotr64(d ^ a, 32) - xor0 = v30 ^ v0; - xor1 = v31 ^ v1; - v30 = xor1; - v31 = xor0; + v_01.x = v0; + v_01.y = v1; + v_3031.x = v30; + v_3031.y = v31; + v_3031 = v_3031 ^ v_01; + v30 = v_3031.y; + v31 = v_3031.x; // c = c + d v_2021.x = v20; @@ -3475,10 +3487,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v30 ^ v0; - xor1 = v31 ^ v1; - v30 = xor1; - v31 = xor0; + v_01.x = v0; + v_01.y = v1; + v_3031.x = v30; + v_3031.y = v31; + v_3031 = v_3031 ^ v_01; + v30 = v_3031.y; + v31 = v_3031.x; // c = c + d v_2021.x = v20; @@ -4230,10 +4245,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v1 = o1; // d = rotr64(d ^ a, 32) - xor0 = v30 ^ v0; - xor1 = v31 ^ v1; - v30 = xor1; - v31 = xor0; + v_01.x = v0; + v_01.y = v1; + v_3031.x = v30; + v_3031.y = v31; + v_3031 = v_3031 ^ v_01; + v30 = v_3031.y; + v31 = v_3031.x; // c = c + d v_2021.x = v20; @@ -4973,10 +4991,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v1 = o1; // d = rotr64(d ^ a, 32) - xor0 = v30 ^ v0; - xor1 = v31 ^ v1; - v30 = xor1; - v31 = xor0; + v_01.x = v0; + v_01.y = v1; + v_3031.x = v30; + v_3031.y = v31; + v_3031 = v_3031 ^ v_01; + v30 = v_3031.y; + v31 = v_3031.x; // c = c + d v_2021.x = v20; @@ -5707,10 +5728,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v30 ^ v0; - xor1 = v31 ^ v1; - v30 = xor1; - v31 = xor0; + v_01.x = v0; + v_01.y = v1; + v_3031.x = v30; + v_3031.y = v31; + v_3031 = v_3031 ^ v_01; + v30 = v_3031.y; + v31 = v_3031.x; // c = c + d v_2021.x = v20; @@ -6447,10 +6471,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v30 ^ v0; - xor1 = v31 ^ v1; - v30 = xor1; - v31 = xor0; + v_01.x = v0; + v_01.y = v1; + v_3031.x = v30; + v_3031.y = v31; + v_3031 = v_3031 ^ v_01; + v30 = v_3031.y; + v31 = v_3031.x; // c = c + d v_2021.x = v20; @@ -7193,10 +7220,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v30 ^ v0; - xor1 = v31 ^ v1; - v30 = xor1; - v31 = xor0; + v_01.x = v0; + v_01.y = v1; + v_3031.x = v30; + v_3031.y = v31; + v_3031 = v_3031 ^ v_01; + v30 = v_3031.y; + v31 = v_3031.x; // c = c + d v_2021.x = v20; @@ -7936,10 +7966,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { // // skip since adding 0u does nothing // d = rotr64(d ^ a, 32) - xor0 = v30 ^ v0; - xor1 = v31 ^ v1; - v30 = xor1; - v31 = xor0; + v_01.x = v0; + v_01.y = v1; + v_3031.x = v30; + v_3031.y = v31; + v_3031 = v_3031 ^ v_01; + v30 = v_3031.y; + v31 = v_3031.x; // c = c + d v_2021.x = v20; @@ -8685,10 +8718,13 @@ fn main(@builtin(global_invocation_id) id: vec3) { v1 = o1; // d = rotr64(d ^ a, 32) - xor0 = v30 ^ v0; - xor1 = v31 ^ v1; - v30 = xor1; - v31 = xor0; + v_01.x = v0; + v_01.y = v1; + v_3031.x = v30; + v_3031.y = v31; + v_3031 = v_3031 ^ v_01; + v30 = v_3031.y; + v31 = v_3031.x; // c = c + d v_2021.x = v20;