CGPROGRAM
#pragma compute CSMain
#define NUM_THREADS 8

float4 _Size;

RWStructuredBuffer<float> _Write;
RWStructuredBuffer<float> _Pressure;
RWStructuredBuffer<float4> _Obstacles;
RWStructuredBuffer<float> _Divergence;

[numthreads(NUM_THREADS,NUM_THREADS,NUM_THREADS)]
void CSMain (int3 id : SV_DispatchThreadID)
{
	int idx = dot(id, float3(1, _Size.xw));

	int idxL = idx - step(1, id.x);
	int idxR = idx + 1 - step(_Size.x-1, id.x);
	
	int idxB = idx - step(1, id.y)*_Size.x;
	int idxT = idx + (1 - step(_Size.y-1, id.y))*_Size.x;
	
	int idxD = idx - step(1, id.z)*_Size.w;
	int idxU = idx + (1 - step(_Size.z-1, id.z))*_Size.w;
    
	float L = _Pressure[idxL];
	float R = _Pressure[idxR];
	
	float B = _Pressure[idxB];
	float T = _Pressure[idxT];
	
	float D = _Pressure[idxD];
	float U = _Pressure[idxU];
	
	float C = _Pressure[idx];
	
	float divergence = _Divergence[idx];
	
	if(_Obstacles[idxL].x > 0.0) L = C;
	if(_Obstacles[idxR].x > 0.0) R = C;
	
	if(_Obstacles[idxB].x > 0.0) B = C;
	if(_Obstacles[idxT].x > 0.0) T = C;
	
	if(_Obstacles[idxD].x > 0.0) D = C;
	if(_Obstacles[idxU].x > 0.0) U = C;
	
	_Write[idx] = ( L + R + B + T + U + D - divergence ) / 6.0;
}

ENDCG

