CGPROGRAM
#pragma compute CSMain
#define NUM_THREADS 8

float4 _Size;

RWStructuredBuffer<float4> _Write;
RWStructuredBuffer<float4> _Velocity;

[numthreads(NUM_THREADS,NUM_THREADS,NUM_THREADS)]
void CSMain (int3 id : SV_DispatchThreadID)
{
	int idx = dot(id, float3(1, _Size.xw));

	int idxL = idx - step(1, id.x);
	int idxR = idx + 1 - step(_Size.x-1, id.x);
	
	int idxB = idx - step(1, id.y)*_Size.x;
	int idxT = idx + (1 - step(_Size.y-1, id.y))*_Size.x;
	
	int idxD = idx - step(1, id.z)*_Size.w;
	int idxU = idx + (1 - step(_Size.z-1, id.z))*_Size.w;

	float3 L = _Velocity[idxL].xyz;
	float3 R = _Velocity[idxR].xyz;
	
	float3 B = _Velocity[idxB].xyz;
	float3 T = _Velocity[idxT].xyz;
	
	float3 D = _Velocity[idxD].xyz;
	float3 U = _Velocity[idxU].xyz;
	
	float3 vorticity = 0.5 * float3( (( T.z - B.z ) - ( U.y - D.y )) , (( U.x - D.x ) - ( R.z - L.z )) , (( R.y - L.y ) - ( T.x - B.x )) );
		

	_Write[idx] = float4(vorticity, 0.0);
}

ENDCG

