CGPROGRAM
#pragma compute readBackVelFieldbyPos

#include "fluid_simulation.inc"

// fieldSrcV1 -> position
// fieldSrcV2 -> velocity
// fieldDstV  -> velocity

float3 _Psize;
float  _ParticleNum;

struct ParticleInfo
{
	float2 vPos;
	float2 vUv;
	float4 pPosRot;
	float3 pSizeFrame;
	float4 pColor;
	float3 pVel;
};
			
StructuredBuffer<ParticleInfo> fieldPPos     : register(t0);

RWStructuredBuffer<float3> fieldVelS    : register(u0);
RWStructuredBuffer<float3> fieldPVel    : register(u1);

[numthreads(THREAD_GROUP_SIZE, 1, 1)]
void readBackVelFieldbyPos(int3 id : SV_DispatchThreadID)
{
  int globalIndex = id.x + id.y * (int)_Size.x + id.z * (int)_Size.w;
  
	float3 ppos = fieldPPos[globalIndex].pPosRot.xyz;
	float3 fidx = WorldPos2GridCord(ppos);
	uint3  uidx = uint3((uint)fidx.x, (uint)fidx.y, (uint)fidx.z);
	float3 vCenter = fieldVelS[GridCord2GlobalIndex(uidx.x, uidx.y, uidx.z)];
	fieldPVel[globalIndex] = vCenter;
	
	
	float3 Torques = float3(0.0f, 0.0f, 0.0f);
	// Find neighboring velocities:
	float halfSize = _Psize.x / 2.0f;
	uint leftIdx =   (uint)floor(WorldPosX2GridCordX(ppos.x - halfSize));
	uint rightIdx =  (uint)ceil(WorldPosX2GridCordX(ppos.x + halfSize));
	uint bottomIdx = (uint)floor(WorldPosY2GridCordY(ppos.y - halfSize));
	uint topIdx =    (uint)ceil(WorldPosY2GridCordY(ppos.y + halfSize));
	
	//Top Bottom Row
	for(uint i = leftIdx; i < rightIdx +1; i++)
	{
		float3 vBottom = fieldVelS[GridCord2GlobalIndex(i, bottomIdx - 1, uidx.z)];
		float3 vTop =    fieldVelS[GridCord2GlobalIndex(i, topIdx + 1,    uidx.z)];	
		Torques += cross(GridCord2WorldPos(float3((float)i, (float)(bottomIdx - 1), (float)uidx.z)) - ppos, vBottom);
		Torques += cross(GridCord2WorldPos(float3((float)i, (float)(topIdx + 1),    (float)uidx.z)) - ppos, vTop);
	}
	// Left Right Col
	for(uint i = bottomIdx; i < topIdx +1; i++)
	{
		float3 vLeft =   fieldVelS[GridCord2GlobalIndex(leftIdx - 1, i, uidx.z)];
		float3 vRight =  fieldVelS[GridCord2GlobalIndex(rightIdx+1,  i, uidx.z)];
		Torques += cross(GridCord2WorldPos(float3((float)(leftIdx - 1),  (float)i, (float)uidx.z)) - ppos, vLeft);
		Torques += cross(GridCord2WorldPos(float3((float)(rightIdx + 1), (float)i, (float)uidx.z)) - ppos, vRight);
	}
	
	fieldPVel[globalIndex + (uint)_ParticleNum] = Torques;
							  
}

ENDCG

