]> git.codecow.com Git - nano-pow.git/commitdiff
Implement double-buffering for GPU.
authorChris Duncan <chris@zoso.dev>
Sun, 15 Jun 2025 08:22:37 +0000 (01:22 -0700)
committerChris Duncan <chris@zoso.dev>
Sun, 15 Jun 2025 08:22:37 +0000 (01:22 -0700)
src/lib/generate/webgpu/index.ts

index 94c0bdf8e2efd5c0d3a9d1f8fbf82d59974f15ab..d57837ac4d6359582624ec6f1e3eaed8a2689b9a 100644 (file)
@@ -15,8 +15,7 @@ const hashData: BigUint64Array = new BigUint64Array(4)
 const bufferReset: BigUint64Array = new BigUint64Array(4)
 const inputData: BigUint64Array = new BigUint64Array(6)
 const inputDataView: DataView = new DataView(inputData.buffer)
-let resultData: Uint32Array = new Uint32Array(5)
-let resultView: DataView = new DataView(resultData.buffer)
+const resultViews: DataView[] = []
 
 // Initialize process variables
 let isContextLost: number = 0
@@ -25,11 +24,11 @@ let status: NanoPowDeviceStatus = 'Idle'
 // Declare WebGPU variables
 let device: GPUDevice
 let bindGroupLayout: GPUBindGroupLayout | null
-let bindGroup: GPUBindGroup | null
+let bindGroups: (GPUBindGroup | null)[]
 let pipeline: GPUComputePipeline
-let inputBuffer: GPUBuffer
-let outputBuffer: GPUBuffer
-let resultBuffer: GPUBuffer
+let inputBuffers: GPUBuffer[]
+let outputBuffers: GPUBuffer[]
+let resultBuffers: GPUBuffer[]
 
 // Initialize WebGPU
 async function start (): Promise<void> {
@@ -77,21 +76,41 @@ async function getDevice () {
 // Compile and cache shader prior to actual dispatch
 async function compile () {
        // Create buffers for writing GPU calculations and reading from Javascript
-       inputBuffer = device.createBuffer({
-               label: 'INPUT',
-               size: 48,
-               usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
-       })
-       outputBuffer = device.createBuffer({
-               label: 'gpu',
-               size: 32,
-               usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC
-       })
-       resultBuffer = device.createBuffer({
-               label: 'cpu',
-               size: 32,
-               usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ
-       })
+       inputBuffers = [
+               device.createBuffer({
+                       label: 'INPUT_0',
+                       size: 48,
+                       usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
+               }),
+               device.createBuffer({
+                       label: 'INPUT_1',
+                       size: 48,
+                       usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
+               })
+       ]
+       outputBuffers = [
+               device.createBuffer({
+                       label: 'OUTPUT_0',
+                       size: 32,
+                       usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC
+               }),
+               device.createBuffer({
+                       label: 'OUTPUT_1',
+                       size: 32,
+                       usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC
+               })
+       ]
+       resultBuffers = [
+               device.createBuffer({
+                       label: 'RESULT',
+                       size: 32,
+                       usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ
+               }), device.createBuffer({
+                       label: 'RESULT',
+                       size: 32,
+                       usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ
+               })
+       ]
        // Create binding group data structure to use later once INPUT is known
        bindGroupLayout = device.createBindGroupLayout({
                entries: [
@@ -100,13 +119,22 @@ async function compile () {
                ],
        })
        // Bind INPUT read and GPU write buffers
-       bindGroup = device.createBindGroup({
-               layout: bindGroupLayout,
-               entries: [
-                       { binding: 0, resource: { buffer: inputBuffer }, },
-                       { binding: 1, resource: { buffer: outputBuffer }, },
-               ],
-       })
+       bindGroups = [
+               device.createBindGroup({
+                       layout: bindGroupLayout,
+                       entries: [
+                               { binding: 0, resource: { buffer: inputBuffers[0] }, },
+                               { binding: 1, resource: { buffer: outputBuffers[0] }, },
+                       ],
+               }),
+               device.createBindGroup({
+                       layout: bindGroupLayout,
+                       entries: [
+                               { binding: 0, resource: { buffer: inputBuffers[1] }, },
+                               { binding: 1, resource: { buffer: outputBuffers[1] }, },
+                       ],
+               })
+       ]
        // Create pipeline to connect compute shader to binding layout
        pipeline = device.createComputePipeline({
                layout: device.createPipelineLayout({
@@ -129,12 +157,14 @@ async function restore (): Promise<void> {
        logger.log('restoring')
        try {
                status = 'Restoring'
-               try { resultBuffer?.unmap() } catch { }
-               resultBuffer?.destroy()
-               outputBuffer?.destroy()
-               inputBuffer?.destroy()
+               for (let i = 0; i < 2; i++) {
+                       try { resultBuffers[i]?.unmap() } catch { }
+                       resultBuffers[i]?.destroy()
+                       outputBuffers[i]?.destroy()
+                       inputBuffers[i]?.destroy()
+                       bindGroups[i] = null
+               }
                bindGroupLayout = null
-               bindGroup = null
                await getDevice()
                await compile()
                window.clearTimeout(isContextLost)
@@ -160,18 +190,21 @@ async function init (hash: BigUint64Array, difficulty: bigint): Promise<void> {
                        inputDataView.setBigUint64(i * 8, hashData[i])
                }
                inputDataView.setBigUint64(32, difficulty, true)
-               device.queue.writeBuffer(inputBuffer, 0, inputDataView)
+               device.queue.writeBuffer(inputBuffers[0], 0, inputDataView)
+               device.queue.writeBuffer(inputBuffers[1], 0, inputDataView)
 
                // Reset OUTPUT properties to 0u before each calculation
-               device.queue.writeBuffer(outputBuffer, 0, bufferReset)
-               device.queue.writeBuffer(resultBuffer, 0, bufferReset)
+               device.queue.writeBuffer(outputBuffers[0], 0, bufferReset)
+               device.queue.writeBuffer(outputBuffers[1], 0, bufferReset)
+               device.queue.writeBuffer(resultBuffers[0], 0, bufferReset)
+               device.queue.writeBuffer(resultBuffers[1], 0, bufferReset)
        } catch (err) {
                logger.log(err)
                throw new Error('failed to initialize', { cause: err })
        }
 }
 
-async function dispatch (seed: bigint, effort: number): Promise<void> {
+async function dispatch (dispatchIndex: number, seed: bigint, effort: number): Promise<void> {
        logger.log('dispatching compute pass')
        try {
                logger.log('seed', bigintToHex(seed, 16))
@@ -179,7 +212,7 @@ async function dispatch (seed: bigint, effort: number): Promise<void> {
                // Copy seed into INPUT buffer
                inputDataView.setBigUint64(40, seed, true)
                logger.log('INPUT', inputDataView)
-               device.queue.writeBuffer(inputBuffer, 0, inputDataView)
+               device.queue.writeBuffer(inputBuffers[dispatchIndex], 0, inputDataView)
 
                // Create command encoder to issue commands to GPU and initiate computation
                const commandEncoder = device.createCommandEncoder()
@@ -187,13 +220,13 @@ async function dispatch (seed: bigint, effort: number): Promise<void> {
 
                // Issue commands and end compute pass structure
                passEncoder.setPipeline(pipeline)
-               passEncoder.setBindGroup(0, bindGroup)
+               passEncoder.setBindGroup(0, bindGroups[dispatchIndex])
                passEncoder.dispatchWorkgroups(effort * 0x100, effort * 0x100)
                passEncoder.end()
 
                // Copy 8-byte result, 8-byte nonce, and 4-byte found flag from GPU to CPU
                // for reading
-               commandEncoder.copyBufferToBuffer(outputBuffer, 0, resultBuffer, 0, 32)
+               commandEncoder.copyBufferToBuffer(outputBuffers[dispatchIndex], 0, resultBuffers[dispatchIndex], 0, 32)
 
                // End computation by passing array of command buffers to command queue for execution
                device.queue.submit([commandEncoder.finish()])
@@ -203,17 +236,16 @@ async function dispatch (seed: bigint, effort: number): Promise<void> {
        }
 }
 
-async function check (): Promise<boolean> {
+async function check (dispatchIndex: number): Promise<boolean> {
        logger.log('checking results from compute pass')
        try {
-               await resultBuffer.mapAsync(GPUMapMode.READ)
-               await device.queue.onSubmittedWorkDone()
-               resultData = new Uint32Array(resultBuffer.getMappedRange().slice(0))
-               resultBuffer.unmap()
-               resultView = new DataView(resultData.buffer)
-               logger.log('OUTPUT', resultView)
-               if (resultView == null) throw new Error('failed to get data from resultBuffer.')
-               return !!resultView.getUint32(0, true)
+               await resultBuffers[dispatchIndex].mapAsync(GPUMapMode.READ)
+               resultViews[dispatchIndex] = new DataView(resultBuffers[dispatchIndex].getMappedRange().slice(0))
+               resultBuffers[dispatchIndex].unmap()
+               logger.log('OUTPUT', resultViews[dispatchIndex])
+               if (resultViews[dispatchIndex] == null) throw new Error('failed to get data from resultBuffer.')
+               return !!resultViews[dispatchIndex].getUint32(0, true)
+
        } catch (err) {
                logger.log(err)
                throw new Error('failed to read results from compute pass', { cause: err })
@@ -223,13 +255,13 @@ async function check (): Promise<boolean> {
 /**
 * Map CPU buffer to GPU, read results to static result object, and unmap.
 */
-function read (): { work: bigint, difficulty: bigint } {
+function read (dispatchIndex: number): { work: bigint, difficulty: bigint } {
        logger.log('reading results from compute pass')
        try {
-               if (resultView == null) throw new Error('failed to get data from result view')
+               if (resultViews[dispatchIndex] == null) throw new Error('failed to get data from result view')
                return {
-                       work: resultView.getBigUint64(8, true),
-                       difficulty: resultView.getBigUint64(16, true)
+                       work: resultViews[dispatchIndex].getBigUint64(8, true),
+                       difficulty: resultViews[dispatchIndex].getBigUint64(16, true)
                }
        } catch (err) {
                logger.log(err)
@@ -248,7 +280,7 @@ export async function generate (hash: bigint, difficulty: bigint, effort: number
                timeout = true
                throw new Error('timed out')
        }, 60_000)
-       logger.groupStart('NanoPow WebGPU work_generate')
+       // logger.groupStart('NanoPow WebGPU work_generate')
        logger.log('generating')
        let found = false
        let result: { [key: string]: bigint } = {}
@@ -264,12 +296,17 @@ export async function generate (hash: bigint, difficulty: bigint, effort: number
                                        }
                                }
                                await q.add(init, bigintAsUintNArray(hash, 64, 4), difficulty)
+                               // Dispatch initial workgroups and set index
+                               await dispatch(0, bigintRandom(), effort)
+                               let dispatchIndex = 1
                                // Loop attempts until valid work found
                                do {
-                                       await dispatch(bigintRandom(), effort)
-                                       found = await check()
+                                       await dispatch(dispatchIndex, bigintRandom(), effort)
+                                       dispatchIndex ^= 1
+                                       found = await check(dispatchIndex)
                                } while (!found && !timeout)
-                               if (found) result = read()
+                               await device.queue.onSubmittedWorkDone()
+                               if (found) result = read(dispatchIndex)
                                isFirstRetry = false
                        } catch (err: any) {
                                if (status === 'Unsupported') {