From: Chris Duncan Date: Sun, 15 Jun 2025 08:22:37 +0000 (-0700) Subject: Implement double-buffering for GPU. X-Git-Tag: v5.0.0~24 X-Git-Url: https://git.codecow.com/?a=commitdiff_plain;h=8fa52ca4d6deee0ea572dce01dae91737cf613e8;p=nano-pow.git Implement double-buffering for GPU. --- diff --git a/src/lib/generate/webgpu/index.ts b/src/lib/generate/webgpu/index.ts index 94c0bdf..d57837a 100644 --- a/src/lib/generate/webgpu/index.ts +++ b/src/lib/generate/webgpu/index.ts @@ -15,8 +15,7 @@ const hashData: BigUint64Array = new BigUint64Array(4) const bufferReset: BigUint64Array = new BigUint64Array(4) const inputData: BigUint64Array = new BigUint64Array(6) const inputDataView: DataView = new DataView(inputData.buffer) -let resultData: Uint32Array = new Uint32Array(5) -let resultView: DataView = new DataView(resultData.buffer) +const resultViews: DataView[] = [] // Initialize process variables let isContextLost: number = 0 @@ -25,11 +24,11 @@ let status: NanoPowDeviceStatus = 'Idle' // Declare WebGPU variables let device: GPUDevice let bindGroupLayout: GPUBindGroupLayout | null -let bindGroup: GPUBindGroup | null +let bindGroups: (GPUBindGroup | null)[] let pipeline: GPUComputePipeline -let inputBuffer: GPUBuffer -let outputBuffer: GPUBuffer -let resultBuffer: GPUBuffer +let inputBuffers: GPUBuffer[] +let outputBuffers: GPUBuffer[] +let resultBuffers: GPUBuffer[] // Initialize WebGPU async function start (): Promise { @@ -77,21 +76,41 @@ async function getDevice () { // Compile and cache shader prior to actual dispatch async function compile () { // Create buffers for writing GPU calculations and reading from Javascript - inputBuffer = device.createBuffer({ - label: 'INPUT', - size: 48, - usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST - }) - outputBuffer = device.createBuffer({ - label: 'gpu', - size: 32, - usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC - }) - resultBuffer = device.createBuffer({ - label: 'cpu', - size: 32, - usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ - }) + inputBuffers = [ + device.createBuffer({ + label: 'INPUT_0', + size: 48, + usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST + }), + device.createBuffer({ + label: 'INPUT_1', + size: 48, + usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST + }) + ] + outputBuffers = [ + device.createBuffer({ + label: 'OUTPUT_0', + size: 32, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC + }), + device.createBuffer({ + label: 'OUTPUT_1', + size: 32, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC + }) + ] + resultBuffers = [ + device.createBuffer({ + label: 'RESULT', + size: 32, + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ + }), device.createBuffer({ + label: 'RESULT', + size: 32, + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ + }) + ] // Create binding group data structure to use later once INPUT is known bindGroupLayout = device.createBindGroupLayout({ entries: [ @@ -100,13 +119,22 @@ async function compile () { ], }) // Bind INPUT read and GPU write buffers - bindGroup = device.createBindGroup({ - layout: bindGroupLayout, - entries: [ - { binding: 0, resource: { buffer: inputBuffer }, }, - { binding: 1, resource: { buffer: outputBuffer }, }, - ], - }) + bindGroups = [ + device.createBindGroup({ + layout: bindGroupLayout, + entries: [ + { binding: 0, resource: { buffer: inputBuffers[0] }, }, + { binding: 1, resource: { buffer: outputBuffers[0] }, }, + ], + }), + device.createBindGroup({ + layout: bindGroupLayout, + entries: [ + { binding: 0, resource: { buffer: inputBuffers[1] }, }, + { binding: 1, resource: { buffer: outputBuffers[1] }, }, + ], + }) + ] // Create pipeline to connect compute shader to binding layout pipeline = device.createComputePipeline({ layout: device.createPipelineLayout({ @@ -129,12 +157,14 @@ async function restore (): Promise { logger.log('restoring') try { status = 'Restoring' - try { resultBuffer?.unmap() } catch { } - resultBuffer?.destroy() - outputBuffer?.destroy() - inputBuffer?.destroy() + for (let i = 0; i < 2; i++) { + try { resultBuffers[i]?.unmap() } catch { } + resultBuffers[i]?.destroy() + outputBuffers[i]?.destroy() + inputBuffers[i]?.destroy() + bindGroups[i] = null + } bindGroupLayout = null - bindGroup = null await getDevice() await compile() window.clearTimeout(isContextLost) @@ -160,18 +190,21 @@ async function init (hash: BigUint64Array, difficulty: bigint): Promise { inputDataView.setBigUint64(i * 8, hashData[i]) } inputDataView.setBigUint64(32, difficulty, true) - device.queue.writeBuffer(inputBuffer, 0, inputDataView) + device.queue.writeBuffer(inputBuffers[0], 0, inputDataView) + device.queue.writeBuffer(inputBuffers[1], 0, inputDataView) // Reset OUTPUT properties to 0u before each calculation - device.queue.writeBuffer(outputBuffer, 0, bufferReset) - device.queue.writeBuffer(resultBuffer, 0, bufferReset) + device.queue.writeBuffer(outputBuffers[0], 0, bufferReset) + device.queue.writeBuffer(outputBuffers[1], 0, bufferReset) + device.queue.writeBuffer(resultBuffers[0], 0, bufferReset) + device.queue.writeBuffer(resultBuffers[1], 0, bufferReset) } catch (err) { logger.log(err) throw new Error('failed to initialize', { cause: err }) } } -async function dispatch (seed: bigint, effort: number): Promise { +async function dispatch (dispatchIndex: number, seed: bigint, effort: number): Promise { logger.log('dispatching compute pass') try { logger.log('seed', bigintToHex(seed, 16)) @@ -179,7 +212,7 @@ async function dispatch (seed: bigint, effort: number): Promise { // Copy seed into INPUT buffer inputDataView.setBigUint64(40, seed, true) logger.log('INPUT', inputDataView) - device.queue.writeBuffer(inputBuffer, 0, inputDataView) + device.queue.writeBuffer(inputBuffers[dispatchIndex], 0, inputDataView) // Create command encoder to issue commands to GPU and initiate computation const commandEncoder = device.createCommandEncoder() @@ -187,13 +220,13 @@ async function dispatch (seed: bigint, effort: number): Promise { // Issue commands and end compute pass structure passEncoder.setPipeline(pipeline) - passEncoder.setBindGroup(0, bindGroup) + passEncoder.setBindGroup(0, bindGroups[dispatchIndex]) passEncoder.dispatchWorkgroups(effort * 0x100, effort * 0x100) passEncoder.end() // Copy 8-byte result, 8-byte nonce, and 4-byte found flag from GPU to CPU // for reading - commandEncoder.copyBufferToBuffer(outputBuffer, 0, resultBuffer, 0, 32) + commandEncoder.copyBufferToBuffer(outputBuffers[dispatchIndex], 0, resultBuffers[dispatchIndex], 0, 32) // End computation by passing array of command buffers to command queue for execution device.queue.submit([commandEncoder.finish()]) @@ -203,17 +236,16 @@ async function dispatch (seed: bigint, effort: number): Promise { } } -async function check (): Promise { +async function check (dispatchIndex: number): Promise { logger.log('checking results from compute pass') try { - await resultBuffer.mapAsync(GPUMapMode.READ) - await device.queue.onSubmittedWorkDone() - resultData = new Uint32Array(resultBuffer.getMappedRange().slice(0)) - resultBuffer.unmap() - resultView = new DataView(resultData.buffer) - logger.log('OUTPUT', resultView) - if (resultView == null) throw new Error('failed to get data from resultBuffer.') - return !!resultView.getUint32(0, true) + await resultBuffers[dispatchIndex].mapAsync(GPUMapMode.READ) + resultViews[dispatchIndex] = new DataView(resultBuffers[dispatchIndex].getMappedRange().slice(0)) + resultBuffers[dispatchIndex].unmap() + logger.log('OUTPUT', resultViews[dispatchIndex]) + if (resultViews[dispatchIndex] == null) throw new Error('failed to get data from resultBuffer.') + return !!resultViews[dispatchIndex].getUint32(0, true) + } catch (err) { logger.log(err) throw new Error('failed to read results from compute pass', { cause: err }) @@ -223,13 +255,13 @@ async function check (): Promise { /** * Map CPU buffer to GPU, read results to static result object, and unmap. */ -function read (): { work: bigint, difficulty: bigint } { +function read (dispatchIndex: number): { work: bigint, difficulty: bigint } { logger.log('reading results from compute pass') try { - if (resultView == null) throw new Error('failed to get data from result view') + if (resultViews[dispatchIndex] == null) throw new Error('failed to get data from result view') return { - work: resultView.getBigUint64(8, true), - difficulty: resultView.getBigUint64(16, true) + work: resultViews[dispatchIndex].getBigUint64(8, true), + difficulty: resultViews[dispatchIndex].getBigUint64(16, true) } } catch (err) { logger.log(err) @@ -248,7 +280,7 @@ export async function generate (hash: bigint, difficulty: bigint, effort: number timeout = true throw new Error('timed out') }, 60_000) - logger.groupStart('NanoPow WebGPU work_generate') + // logger.groupStart('NanoPow WebGPU work_generate') logger.log('generating') let found = false let result: { [key: string]: bigint } = {} @@ -264,12 +296,17 @@ export async function generate (hash: bigint, difficulty: bigint, effort: number } } await q.add(init, bigintAsUintNArray(hash, 64, 4), difficulty) + // Dispatch initial workgroups and set index + await dispatch(0, bigintRandom(), effort) + let dispatchIndex = 1 // Loop attempts until valid work found do { - await dispatch(bigintRandom(), effort) - found = await check() + await dispatch(dispatchIndex, bigintRandom(), effort) + dispatchIndex ^= 1 + found = await check(dispatchIndex) } while (!found && !timeout) - if (found) result = read() + await device.queue.onSubmittedWorkDone() + if (found) result = read(dispatchIndex) isFirstRetry = false } catch (err: any) { if (status === 'Unsupported') {