CGPROGRAM
#pragma compute CSMain
#define NUM_THREADS 8

float _DeltaTime, _Epsilon;
float4 _Size;

RWStructuredBuffer<float4> _Write;
RWStructuredBuffer<float4> _Vorticity;

[numthreads(NUM_THREADS,NUM_THREADS,NUM_THREADS)]
void CSMain (int3 id : SV_DispatchThreadID)
{
	int idx = dot(id, float3(1, _Size.xw));

	int idxL = idx - step(1, id.x);
	int idxR = idx + 1 - step(_Size.x-1, id.x);
	
	int idxB = idx - step(1, id.y)*_Size.x;
	int idxT = idx + (1 - step(_Size.y-1, id.y))*_Size.x;
	
	int idxD = idx - step(1, id.z)*_Size.w;
	int idxU = idx + (1 - step(_Size.z-1, id.z))*_Size.w;
	
	float3 omegaAddOne = float3(length(_Vorticity[idxR].xyz), length(_Vorticity[idxT].xyz), length(_Vorticity[idxU].xyz));
	float3 omegaSubOne = float3(length(_Vorticity[idxL].xyz), length(_Vorticity[idxB].xyz), length(_Vorticity[idxD].xyz));

	
	float3 eta = normalize( 0.5f * (omegaAddOne - omegaSubOne) + 0.0025f);
 
 	float3 ret = _Write[idx].xyz + _DeltaTime * _Epsilon * cross(eta, _Vorticity[idx].xyz);
	_Write[idx] = float4(ret, 0.0);
}

ENDCG

