GPU Compute provides the ultimate & easiest way to setup, manage & execute GPU compute shaders in Unity. Reduces complexity and boilerplate code while providing powerful features for standard/async execution, global & local buffer/render texture creation, management, reading (standard/async), tracking & disposal, global and local variable management, GPU memory tracking, performance tracking, shader execution control & much more!
- Simplified Shader Execution - Easy setup and execution of compute shaders (synchronous & asynchronous)
- Automatic Buffer Management - Create, edit, and read buffers with automatically calculated strides and lengths
- Flexible Thread Group Sizing - Automatically calculate optimal GPU thread group sizes for 1D, 2D, and 3D workloads
- GPU Memory Tracking - Track local and global GPU memory usage in real-time with formatted output
- Performance Monitoring - Measure compute shader execution time
- Async GPU Readback - Efficiently retrieve data from GPU using AsyncGPUReadbackRequest
- Render Texture Support - Full support for creating and managing render textures
- Global Resources - Centralized management of global buffers and textures accessible across multiple compute shader instances
- Buffer Resizing - Dynamically resize buffers without recreating instances
- Shader Keywords - Enable/disable local shader keywords programmatically
- Buffer Debugging - Retrieve and inspect buffer contents for debugging purposes
- Centralized in a Single Class - All GPU compute operations managed through one easy-to-use class
using (GPUCompute gpuCompute = new GPUCompute(computeShader))
{
gpuCompute.SetBuffer("myBuffer", ref myBuffer);
gpuCompute.SetFloat("myFloatVar", 2f);
Debug.Log("GPU Compute Memory Allocated: " + gpuCompute.GetGPUMemoryUsedFormatted());
gpuCompute.Execute();
gpuCompute.GetBufferData("myBuffer", ref myBuffer);
Debug.Log("Compute Time: " + gpuCompute.GetLastComputeTime());
}- Installation
- Quick Start
- Buffers & Textures
- Shader Variables
- Thread Group Sizes
- Execution
- Memory & Performance
- Async Operations
- Global Resources
- Buffer Debugging
- Advanced Features
- Example Usage
Simply add the entire folder into your project's Assets folder.
Create a new instance of GPUCompute by providing your compute shader:
GPUCompute gpuCompute = new GPUCompute(myComputeShader);You can also specify a compute queue type for async execution:
GPUCompute gpuCompute = new GPUCompute(myComputeShader, ComputeQueueType.Default);Always dispose of your GPUCompute instances when finished to prevent GPU memory leaks:
gpuCompute.Dispose(); // Disposes both local and global resources
gpuCompute.DisposeLocal(); // Disposes only local buffers/textures
GPUCompute.DisposeGlobal(); // Disposes only global buffers/texturesCreate buffers from existing data (stride and length are calculated automatically):
Vector3[] vertices = new Vector3[1000];
gpuCompute.SetBuffer("vertices", ref vertices);
float[] uvs = new float[2000];
gpuCompute.SetBuffer("uvs", ref uvs);Create empty buffers by specifying the struct type and length:
gpuCompute.CreateEmptyBuffer<int>("myEmptyBuffer", 500);
gpuCompute.CreateEmptyBuffer<Vector3>("positions", 1000, kernelIndex: 0);Set buffers with multiple kernel indices:
int[] kernelIndices = { 0, 1, 2 };
gpuCompute.SetBuffer("sharedData", ref data, kernelIndices);Specify buffer type and mode:
gpuCompute.CreateEmptyBuffer<float>(
"counterBuffer",
100,
ComputeBufferType.Counter,
ComputeBufferMode.Dynamic
);Retrieve buffer data from GPU to CPU:
Vector3[] resultBuffer = new Vector3[100];
gpuCompute.GetBufferData("vertices", ref resultBuffer);
List<Vector3> resultList = new List<Vector3>(100);
gpuCompute.GetBufferData("vertices", ref resultList);
NativeArray<Vector3> resultNative = new NativeArray<Vector3>(100, Allocator.Persistent);
gpuCompute.GetBufferData("vertices", ref resultNative);Dynamically resize buffers (data will be lost):
gpuCompute.ResizeBuffer("myBuffer", 2000);Create render textures for compute shader output:
RenderTextureDescriptor descriptor = new RenderTextureDescriptor(1024, 1024, RenderTextureFormat.ARGBFloat);
gpuCompute.CreateEmptyRenderTexture("outputTexture", descriptor);
// With mip levels and custom settings
gpuCompute.CreateEmptyRenderTexture(
"outputTexture",
descriptor,
kernelIndex: 0,
mipLevel: 0,
wrapMode: TextureWrapMode.Clamp,
filterMode: FilterMode.Point,
anisoLevel: 1
);Set render textures:
RenderTexture myTexture = new RenderTexture(1024, 1024, 0, RenderTextureFormat.ARGBFloat);
gpuCompute.SetRenderTexture("myRenderTexture", ref myTexture);Retrieve render texture data:
Texture2D outputTexture = new Texture2D(1024, 1024, TextureFormat.RGBAFloat, false);
gpuCompute.GetRenderTextureData("outputTexture", ref outputTexture);Set various texture types:
Texture2D texture2D = Resources.Load<Texture2D>("myTexture");
gpuCompute.SetTexture("inputTexture", ref texture2D);
Texture3D texture3D = Resources.Load<Texture3D>("myVolume");
gpuCompute.SetTexture("volumeTexture", ref texture3D);
Texture2DArray textureArray = Resources.Load<Texture2DArray>("myTextureArray");
gpuCompute.SetTextureArray("textureArrayInput", ref textureArray);gpuCompute.SetInt("count", 100);
gpuCompute.SetFloat("deltaTime", Time.deltaTime);
gpuCompute.SetBool("useOptimization", true);gpuCompute.SetVector("position", new Vector3(1, 2, 3));
gpuCompute.SetVector("colorData", new Vector4(1, 0, 0, 1));
gpuCompute.SetVector("uv", new Vector2(0.5f, 0.5f));
gpuCompute.SetVector("gridSize", new Vector3Int(10, 10, 10));float[] floatArray = new float[100];
gpuCompute.SetFloatArray("floatData", floatArray);
Vector4[] vectorArray = new Vector4[50];
gpuCompute.SetVectorArray("vectorData", vectorArray);Matrix4x4 transformMatrix = Matrix4x4.identity;
gpuCompute.SetMatrix("transform", transformMatrix);
Matrix4x4[] matrixArray = new Matrix4x4[10];
gpuCompute.SetMatrixArray("transforms", matrixArray);int value = gpuCompute.GetInt("myInt");
float value = gpuCompute.GetFloat("myFloat");
Vector4 vector = gpuCompute.GetVector("myVector");
Matrix4x4 matrix = gpuCompute.GetMatrix("myMatrix");gpuCompute.SetThreadGroupSize(new Vector3Int(8, 8, 1));
Vector3Int currentSize = gpuCompute.GetThreadGroupSize();For array-based workloads:
int jobLength = vertices.Length;
int threadLength = gpuCompute.SetCalculatedThreadGroupSize(jobLength, kernelIndex: 0);For texture-based workloads:
int width = 1024;
int height = 1024;
gpuCompute.SetCalculatedThreadGroupSize(width, height, kernelIndex: 0);For volume-based workloads:
int width = 64;
int height = 64;
int depth = 64;
gpuCompute.SetCalculatedThreadGroupSize(width, height, depth, kernelIndex: 0);Retrieve the thread group sizes defined in your compute shader kernel:
Vector3Int kernelGroupSizes = gpuCompute.GetKernelThreadGroupSizes(kernelIndex: 0);
Debug.Log($"Kernel thread group size: {kernelGroupSizes}");Execute the compute shader and wait for completion:
gpuCompute.Execute(kernelIndex: 0);Execute the compute shader asynchronously (DX12 only):
StartCoroutine(gpuCompute.ExecuteAsync(kernelIndex: 0));Subscribe to completion events:
gpuCompute.OnExecuteComplete += OnComputeComplete;
private void OnComputeComplete(int kernelIndex)
{
Debug.Log($"Kernel {kernelIndex} execution completed");
}if (gpuCompute.IsExecuting())
{
Debug.Log("Compute shader is currently executing");
}Get local GPU memory used by the current instance:
long localMemoryBytes = gpuCompute.GetLocalGPUMemoryUsed();
string localMemoryFormatted = gpuCompute.GetLocalGPUMemoryUsedFormatted();
// Output: "1.25 MB"Get global GPU memory used across all instances:
long globalMemoryBytes = GPUCompute.GetGlobalGPUMemoryUsed();
string globalMemoryFormatted = GPUCompute.GetGlobalGPUMemoryUsedFormatted();Measure the duration of the last compute shader execution:
TimeSpan lastExecutionTime = gpuCompute.GetLastComputeTime();
Debug.Log($"Last execution took: {lastExecutionTime.TotalMilliseconds} ms");Convert byte counts to human-readable strings:
string formatted = GPUCompute.ByteCountToFormattedString(1024000);
// Output: "1.02 MB"Asynchronously retrieve buffer data using AsyncGPUReadbackRequest for better performance:
StartCoroutine(gpuCompute.GetBufferDataAsync("myBuffer"));
// Subscribe to readback completion
gpuCompute.OnReadbackComplete += OnBufferReadbackComplete;
private void OnBufferReadbackComplete(AsyncGPUReadbackRequest request, string bufferName)
{
Vector3[] data = new Vector3[100];
GPUCompute.ReadbackRequestToArray(ref request, ref data);
// Process data...
}Read buffer data with offset and length:
StartCoroutine(gpuCompute.GetBufferDataAsync("myBuffer", length: 100, startIndex: 50));Asynchronously retrieve render texture data:
StartCoroutine(gpuCompute.GetRenderTextureDataAsync("outputTexture"));
gpuCompute.OnReadbackComplete += OnReadbackComplete;
private void OnReadbackComplete(AsyncGPUReadbackRequest request, string textureName)
{
Texture2D output = new Texture2D(request.width, request.height, TextureFormat.RGBAFloat, false);
GPUCompute.ReadbackRequestToTexture2D(ref request, ref output);
}Asynchronously retrieve global render texture data:
Optional parameters can also be passed through for your own use.
int myDataToPassThrough = 0;
StartCoroutine(gpuCompute.GetGlobalRenderTextureDataAsync("outputTexture"), myDataToPassThrough);
gpuCompute.OnGlobalReadbackComplete += OnGlobalReadbackComplete;
private void OnGlobalReadbackComplete(AsyncGPUReadbackRequest request, string textureName, params object[] parameters)
{
int myDataToPassThrough = (int)parameters[0];
Texture2D output = new Texture2D(request.width, request.height, TextureFormat.RGBAFloat, false);
GPUCompute.ReadbackRequestToTexture2D(ref request, ref output);
}Retrieve specific regions:
StartCoroutine(gpuCompute.GetRenderTextureDataAsync(
"outputTexture",
width: 256,
height: 256,
depth: 1,
mipIndex: 0,
x: 100,
y: 100,
z: 0
));Convert async readback requests to different formats:
// To NativeArray
NativeArray<Vector3> nativeData = new NativeArray<Vector3>(100, Allocator.Persistent);
GPUCompute.ReadbackRequestToNativeArray(ref request, ref nativeData);
// To List
List<Vector3> listData = new List<Vector3>(100);
GPUCompute.ReadbackRequestToList(ref request, ref listData);
// To Texture3D
Texture3D volume = new Texture3D(64, 64, 64, TextureFormat.RGBAFloat, false);
GPUCompute.ReadbackRequestToTexture3D(ref request, ref volume);Global buffers and textures are accessible across all GPUCompute instances and persist for the lifetime of the application.
Create and set global buffers:
Vector3[] globalVertices = new Vector3[1000];
GPUCompute.SetGlobalBuffer("globalVertices", ref globalVertices);
GPUCompute.CreateEmptyGlobalBuffer<float>("globalData", 5000);Retrieve global buffer data:
Vector3[] outputBuffer = new Vector3[1000];
GPUCompute.GetGlobalBufferData("globalVertices", ref outputBuffer);Resize global buffers:
GPUCompute.ResizeGlobalBuffer("globalData", 10000);Set global textures:
Texture2D globalTexture = Resources.Load<Texture2D>("myGlobalTexture");
GPUCompute.SetGlobalTexture("globalTextureName", ref globalTexture);
Texture3D globalVolume = Resources.Load<Texture3D>("myVolume");
GPUCompute.SetGlobalTexture("globalVolume", ref globalVolume);
Texture2DArray globalArray = Resources.Load<Texture2DArray>("myArray");
GPUCompute.SetGlobalTextureArray("globalTextureArray", ref globalArray);Create global render textures:
RenderTextureDescriptor descriptor = new RenderTextureDescriptor(2048, 2048, RenderTextureFormat.ARGBFloat);
GPUCompute.CreateEmptyGlobalRenderTexture("globalOutput", descriptor);Retrieve global texture data:
Texture2D output = new Texture2D(2048, 2048, TextureFormat.RGBAFloat, false);
GPUCompute.GetGlobalRenderTextureData("globalOutput", ref output);Link global resources to compute shader instances:
gpuCompute.LinkGlobalBuffer("globalBufferName", "globalBufferName", kernelIndex: 0);
gpuCompute.LinkGlobalTexture("globalTextureName", "globalTextureName", kernelIndex: 0);
gpuCompute.LinkGlobalRenderTexture("globalOutputName", "globalOutputName", kernelIndex: 0);Link to multiple kernels:
int[] kernelIndices = { 0, 1, 2 };
gpuCompute.LinkGlobalBuffer("buffer", "buffer", kernelIndices);GPUCompute.DisposeGlobalBuffer("globalBufferName");
GPUCompute.DisposeGlobalRenderTexture("globalTextureName");
GPUCompute.DisposeGlobal(); // Dispose all global resourcesEnable shader keywords:
gpuCompute.EnableKeyword("MY_KEYWORD");Disable shader keywords:
gpuCompute.DisableKeyword("MY_KEYWORD");Set keyword state by boolean:
LocalKeyword keyword = gpuCompute.GetKeywordSpace().FindKeyword("MY_KEYWORD");
gpuCompute.SetKeyword(keyword, true);Check if keyword is enabled:
if (gpuCompute.IsKeywordEnabled("MY_KEYWORD"))
{
Debug.Log("MY_KEYWORD is enabled");
}Get all enabled keywords:
LocalKeyword[] enabledKeywords = gpuCompute.GetEnabledKeywords();
foreach (var keyword in enabledKeywords)
{
Debug.Log($"Enabled: {keyword.name}");
}Debug buffers to inspect their contents (CPU-side only, for debugging):
string debugInfo = gpuCompute.DebugBuffer<Vector3>("myBuffer");
Debug.Log(debugInfo);
// Output:
// Buffer Name: [myBuffer], Data Type: [System.Numerics.Vector3],
// Length: [100], Stride: [12], VRAM Usage: [1.17 KB], ...
// Values:
// Index 0: (1.00, 2.00, 3.00)
// Index 1: (4.00, 5.00, 6.00)
// ...Debug global buffers:
string debugInfo = GPUCompute.DebugGlobalBuffer<float>("globalBuffer");
Debug.Log(debugInfo);Find kernel indices:
int kernelIndex = gpuCompute.FindKernel("CSMain");Check if kernel exists:
if (gpuCompute.HasKernel("CSMain"))
{
Debug.Log("CSMain kernel found");
}Check device support:
if (gpuCompute.IsSupported(kernelIndex: 0))
{
Debug.Log("Kernel is supported on this device");
}Set compute queue type (for async execution):
gpuCompute.SetComputeQueueType(ComputeQueueType.Default);
ComputeQueueType currentType = gpuCompute.GetComputeQueueType();Set synchronization stage flags:
gpuCompute.SetSynchronisationStageFlags(SynchronisationStageFlags.ComputeProcessing);
SynchronisationStageFlags flags = gpuCompute.GetSynchronisationStageFlags();ComputeShader shader = gpuCompute.GetComputeShader();LocalKeywordSpace keywordSpace = gpuCompute.GetKeywordSpace();Complete example demonstrating common workflow:
using UnityEngine;
using GPUComputeModule;
public class ComputeExample : MonoBehaviour
{
private GPUCompute gpuCompute;
private ComputeShader computeShader;
void Start()
{
// Initialize
computeShader = Resources.Load<ComputeShader>("MyComputeShader");
gpuCompute = new GPUCompute(computeShader);
// Setup data
Vector3[] vertices = new Vector3[1000];
for (int i = 0; i < vertices.Length; i++)
vertices[i] = new Vector3(i, 0, 0);
// Create buffer and set data
gpuCompute.SetBuffer("vertices", ref vertices);
// Set shader variables
gpuCompute.SetFloat("time", Time.deltaTime);
gpuCompute.SetInt("vertexCount", vertices.Length);
// Calculate thread groups for 1D workload
gpuCompute.SetCalculatedThreadGroupSize(vertices.Length, kernelIndex: 0);
// Execute synchronously
gpuCompute.Execute();
// Read results
Vector3[] results = new Vector3[1000];
gpuCompute.GetBufferData("vertices", ref results);
// Check memory usage
Debug.Log($"GPU Memory Used: {gpuCompute.GetLocalGPUMemoryUsedFormatted()}");
Debug.Log($"Execution Time: {gpuCompute.GetLastComputeTime().TotalMilliseconds} ms");
// Cleanup
gpuCompute.Dispose();
}
}