#pragma once

#include "common.inc"

float DepthBias(float3 worldNorm)
{
	// 矩阵的行列顺序发生变化，原来的行现在变成列，原来的列变成行
	// float3 lightDirWS = float3(CAMERA_VIEW[2][0], CAMERA_VIEW[2][1], CAMERA_VIEW[2][2]);
	float3 lightDirWS = float3(CAMERA_VIEW[0][2], CAMERA_VIEW[1][2], CAMERA_VIEW[2][2]);
	lightDirWS = normalize(lightDirWS);
	float shadowCos = dot(worldNorm.xyz, lightDirWS.xyz);
	float shadowSin = abs(shadowCos);
	float normalBias = 0.03 * shadowSin;
	return normalBias;
}

float4 ApplyShadowBias(float4 clipPos)
{
    float shadowBias = LIGHT_PARAM.x;
	clipPos.z += (shadowBias / clipPos.w);
	return clipPos;
}

#if ShadowOn

DECLARE_SHADOWMAP(TEXTURE_SHADOW_DEPTH);

float3 CombineShadowcoordComponents(float2 baseUV, float2 deltaUV, float depth)
{
    float3 uv = float3(baseUV + deltaUV, depth);
    return uv;
}

float GetAreaAboveFirstTexelTriangle(float triangleHeight)
{
    return triangleHeight - 0.5;
}

void GetAreaPerTexel_3(float offset, out float4 computedArea, out float4 computedAreaUncut)
{
    float offset01SquaredHalved = (offset + 0.5) * (offset + 0.5) * 0.5;
    computedAreaUncut.x = computedArea.x = offset01SquaredHalved - offset;
    computedAreaUncut.w = computedArea.w = offset01SquaredHalved;

    computedAreaUncut.y = GetAreaAboveFirstTexelTriangle(1.5 - offset);
    
    float clampedOffsetLeft = min(offset,0);
    float areaOfSmallLeftTriangle = clampedOffsetLeft * clampedOffsetLeft;
    computedArea.y = computedAreaUncut.y - areaOfSmallLeftTriangle;

    computedAreaUncut.z = GetAreaAboveFirstTexelTriangle(1.5 + offset);
    float clampedOffsetRight = max(offset,0);
    float areaOfSmallRightTriangle = clampedOffsetRight * clampedOffsetRight;
    computedArea.z = computedAreaUncut.z - areaOfSmallRightTriangle;
}

void GetWeightPerTexel_3(float offset, out float4 computedWeight)
{
    float4 dummy = float4(1.0, 1.0, 1.0, 1.0);
    GetAreaPerTexel_3(offset, computedWeight, dummy);
    computedWeight *= 0.44444;//0.44 == 1/(the triangle area)
}

void GetWeightPerTexel_5(float offset, out float3 texelsWeightsA, out float3 texelsWeightsB)
{
    float4 computedArea_From3texelTriangle;
    float4 computedAreaUncut_From3texelTriangle;
    GetAreaPerTexel_3(offset, computedArea_From3texelTriangle, computedAreaUncut_From3texelTriangle);

    texelsWeightsA.x = 0.16 * (computedArea_From3texelTriangle.x);
    texelsWeightsA.y = 0.16 * (computedAreaUncut_From3texelTriangle.y);
    texelsWeightsA.z = 0.16 * (computedArea_From3texelTriangle.y + 1);
    texelsWeightsB.x = 0.16 * (computedArea_From3texelTriangle.z + 1);
    texelsWeightsB.y = 0.16 * (computedAreaUncut_From3texelTriangle.z);
    texelsWeightsB.z = 0.16 * (computedArea_From3texelTriangle.w);
}

void GetWeightPerTexel_7(float offset, out float4 texelsWeightsA, out float4 texelsWeightsB)
{
    float4 computedArea_From3texelTriangle;
    float4 computedAreaUncut_From3texelTriangle;
    GetAreaPerTexel_3(offset, computedArea_From3texelTriangle, computedAreaUncut_From3texelTriangle);

    texelsWeightsA.x = 0.081632 * (computedArea_From3texelTriangle.x);
    texelsWeightsA.y = 0.081632 * (computedAreaUncut_From3texelTriangle.y);
    texelsWeightsA.z = 0.081632 * (computedAreaUncut_From3texelTriangle.y + 1);
    texelsWeightsA.w = 0.081632 * (computedArea_From3texelTriangle.y + 2);
    texelsWeightsB.x = 0.081632 * (computedArea_From3texelTriangle.z + 2);
    texelsWeightsB.y = 0.081632 * (computedAreaUncut_From3texelTriangle.z + 1);
    texelsWeightsB.z = 0.081632 * (computedAreaUncut_From3texelTriangle.z);
    texelsWeightsB.w = 0.081632 * (computedArea_From3texelTriangle.w);
}

float PCF3x3NoHardwareSupport(float4 coord, float4 _ShadowMapTexture_TexelSize)
{
    float shadow = 1;

    // when we don't have hardware PCF sampling, then the above 5x5 optimized PCF really does not work.
    // Fallback to a simple 3x3 sampling with averaged results.
    float2 base_uv = coord.xy;
    float2 ts = _ShadowMapTexture_TexelSize.xy;
    shadow = 0;
    shadow += SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(base_uv, float2(-ts.x, -ts.y), coord.z));
    shadow += SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(base_uv, float2(0, -ts.y), coord.z));
    shadow += SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(base_uv, float2(ts.x, -ts.y), coord.z));
    shadow += SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(base_uv, float2(-ts.x, 0), coord.z));
    shadow += SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(base_uv, float2(0, 0), coord.z));
    shadow += SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(base_uv, float2(ts.x, 0), coord.z));
    shadow += SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(base_uv, float2(-ts.x, ts.y), coord.z));
    shadow += SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(base_uv, float2(0, ts.y), coord.z));
    shadow += SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(base_uv, float2(ts.x, ts.y), coord.z));
    shadow /= 9.0;

    return shadow;
}

float PCF3x3Tent(float4 coord, float4 _ShadowMapTexture_TexelSize)
{
    float shadow = 1;

    //return PCF3x3NoHardwareSupport(coord, _ShadowMapTexture_TexelSize);

    // tent base is 3x3 base thus covering from 9 to 12 texels, thus we need 4 bilinear PCF fetches
    float2 tentCenterInTexelSpace = coord.xy * _ShadowMapTexture_TexelSize.zw;
    float2 centerOfFetchesInTexelSpace = floor(tentCenterInTexelSpace + 0.5);
    float2 offsetFromTentCenterToCenterOfFetches = tentCenterInTexelSpace - centerOfFetchesInTexelSpace;

    // find the weight of each texel based
    float4 texelsWeightsU, texelsWeightsV;
    GetWeightPerTexel_3(offsetFromTentCenterToCenterOfFetches.x, texelsWeightsU);
    GetWeightPerTexel_3(offsetFromTentCenterToCenterOfFetches.y, texelsWeightsV);

    // each fetch will cover a group of 2x2 texels, the weight of each group is the sum of the weights of the texels
    float2 fetchesWeightsU = texelsWeightsU.xz + texelsWeightsU.yw;
    float2 fetchesWeightsV = texelsWeightsV.xz + texelsWeightsV.yw;

    // move the PCF bilinear fetches to respect texels weights
    float2 fetchesOffsetsU = texelsWeightsU.yw / fetchesWeightsU.xy + float2(-1.5,0.5);
    float2 fetchesOffsetsV = texelsWeightsV.yw / fetchesWeightsV.xy + float2(-1.5,0.5);
    fetchesOffsetsU *= _ShadowMapTexture_TexelSize.xx;
    fetchesOffsetsV *= _ShadowMapTexture_TexelSize.yy;

    // fetch !
    float2 bilinearFetchOrigin = centerOfFetchesInTexelSpace * _ShadowMapTexture_TexelSize.xy;
    shadow =  fetchesWeightsU.x * fetchesWeightsV.x * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.x, fetchesOffsetsV.x), coord.z));
    shadow += fetchesWeightsU.y * fetchesWeightsV.x * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.y, fetchesOffsetsV.x), coord.z));
    shadow += fetchesWeightsU.x * fetchesWeightsV.y * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.x, fetchesOffsetsV.y), coord.z));
    shadow += fetchesWeightsU.y * fetchesWeightsV.y * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.y, fetchesOffsetsV.y), coord.z));

    return shadow;
}

float PCF5x5Tent(float4 coord, float4 _ShadowMapTexture_TexelSize)
{
    float shadow = 1;

    //return PCF3x3NoHardwareSupport(coord, _ShadowMapTexture_TexelSize);

    // tent base is 5x5 base thus covering from 25 to 36 texels, thus we need 9 bilinear PCF fetches
    float2 tentCenterInTexelSpace = coord.xy * _ShadowMapTexture_TexelSize.zw;
    float2 centerOfFetchesInTexelSpace = floor(tentCenterInTexelSpace + 0.5);
    float2 offsetFromTentCenterToCenterOfFetches = tentCenterInTexelSpace - centerOfFetchesInTexelSpace;

    // find the weight of each texel based on the area of a 45 degree slop tent above each of them.
    float3 texelsWeightsU_A, texelsWeightsU_B;
    float3 texelsWeightsV_A, texelsWeightsV_B;
    GetWeightPerTexel_5(offsetFromTentCenterToCenterOfFetches.x, texelsWeightsU_A, texelsWeightsU_B);
    GetWeightPerTexel_5(offsetFromTentCenterToCenterOfFetches.y, texelsWeightsV_A, texelsWeightsV_B);

    // each fetch will cover a group of 2x2 texels, the weight of each group is the sum of the weights of the texels
    float3 fetchesWeightsU = float3(texelsWeightsU_A.xz, texelsWeightsU_B.y) + float3(texelsWeightsU_A.y, texelsWeightsU_B.xz);
    float3 fetchesWeightsV = float3(texelsWeightsV_A.xz, texelsWeightsV_B.y) + float3(texelsWeightsV_A.y, texelsWeightsV_B.xz);

    // move the PCF bilinear fetches to respect texels weights
    float3 fetchesOffsetsU = float3(texelsWeightsU_A.y, texelsWeightsU_B.xz) / fetchesWeightsU.xyz + float3(-2.5,-0.5,1.5);
    float3 fetchesOffsetsV = float3(texelsWeightsV_A.y, texelsWeightsV_B.xz) / fetchesWeightsV.xyz + float3(-2.5,-0.5,1.5);
    fetchesOffsetsU *= _ShadowMapTexture_TexelSize.xxx;
    fetchesOffsetsV *= _ShadowMapTexture_TexelSize.yyy;

    // fetch !
    float2 bilinearFetchOrigin = centerOfFetchesInTexelSpace * _ShadowMapTexture_TexelSize.xy;
    shadow  = fetchesWeightsU.x * fetchesWeightsV.x * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.x, fetchesOffsetsV.x), coord.z));
    shadow += fetchesWeightsU.y * fetchesWeightsV.x * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.y, fetchesOffsetsV.x), coord.z));
    shadow += fetchesWeightsU.z * fetchesWeightsV.x * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.z, fetchesOffsetsV.x), coord.z));
    shadow += fetchesWeightsU.x * fetchesWeightsV.y * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.x, fetchesOffsetsV.y), coord.z));
    shadow += fetchesWeightsU.y * fetchesWeightsV.y * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.y, fetchesOffsetsV.y), coord.z));
    shadow += fetchesWeightsU.z * fetchesWeightsV.y * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.z, fetchesOffsetsV.y), coord.z));
    shadow += fetchesWeightsU.x * fetchesWeightsV.z * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.x, fetchesOffsetsV.z), coord.z));
    shadow += fetchesWeightsU.y * fetchesWeightsV.z * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.y, fetchesOffsetsV.z), coord.z));
    shadow += fetchesWeightsU.z * fetchesWeightsV.z * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.z, fetchesOffsetsV.z), coord.z));

    return shadow;
}

float PCF7x7Tent(float4 coord, float4 _ShadowMapTexture_TexelSize)
{
    float shadow = 1;
    
    //return PCF3x3NoHardwareSupport(coord, _ShadowMapTexture_TexelSize);

    // tent base is 7x7 base thus covering from 49 to 64 texels, thus we need 16 bilinear PCF fetches
    float2 tentCenterInTexelSpace = coord.xy * _ShadowMapTexture_TexelSize.zw;
    float2 centerOfFetchesInTexelSpace = floor(tentCenterInTexelSpace + 0.5);
    float2 offsetFromTentCenterToCenterOfFetches = tentCenterInTexelSpace - centerOfFetchesInTexelSpace;

    // find the weight of each texel based on the area of a 45 degree slop tent above each of them.
    float4 texelsWeightsU_A, texelsWeightsU_B;
    float4 texelsWeightsV_A, texelsWeightsV_B;
    GetWeightPerTexel_7(offsetFromTentCenterToCenterOfFetches.x, texelsWeightsU_A, texelsWeightsU_B);
    GetWeightPerTexel_7(offsetFromTentCenterToCenterOfFetches.y, texelsWeightsV_A, texelsWeightsV_B);

    // each fetch will cover a group of 2x2 texels, the weight of each group is the sum of the weights of the texels
    float4 fetchesWeightsU = float4(texelsWeightsU_A.xz, texelsWeightsU_B.xz) + float4(texelsWeightsU_A.yw, texelsWeightsU_B.yw);
    float4 fetchesWeightsV = float4(texelsWeightsV_A.xz, texelsWeightsV_B.xz) + float4(texelsWeightsV_A.yw, texelsWeightsV_B.yw);

    // move the PCF bilinear fetches to respect texels weights
    float4 fetchesOffsetsU = float4(texelsWeightsU_A.yw, texelsWeightsU_B.yw) / fetchesWeightsU.xyzw + float4(-3.5,-1.5,0.5,2.5);
    float4 fetchesOffsetsV = float4(texelsWeightsV_A.yw, texelsWeightsV_B.yw) / fetchesWeightsV.xyzw + float4(-3.5,-1.5,0.5,2.5);
    fetchesOffsetsU *= _ShadowMapTexture_TexelSize.xxxx;
    fetchesOffsetsV *= _ShadowMapTexture_TexelSize.yyyy;

    // fetch !
    float2 bilinearFetchOrigin = centerOfFetchesInTexelSpace * _ShadowMapTexture_TexelSize.xy;
    shadow  = fetchesWeightsU.x * fetchesWeightsV.x * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.x, fetchesOffsetsV.x), coord.z));
    shadow += fetchesWeightsU.y * fetchesWeightsV.x * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.y, fetchesOffsetsV.x), coord.z));
    shadow += fetchesWeightsU.z * fetchesWeightsV.x * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.z, fetchesOffsetsV.x), coord.z));
    shadow += fetchesWeightsU.w * fetchesWeightsV.x * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.w, fetchesOffsetsV.x), coord.z));
    shadow += fetchesWeightsU.x * fetchesWeightsV.y * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.x, fetchesOffsetsV.y), coord.z));
    shadow += fetchesWeightsU.y * fetchesWeightsV.y * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.y, fetchesOffsetsV.y), coord.z));
    shadow += fetchesWeightsU.z * fetchesWeightsV.y * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.z, fetchesOffsetsV.y), coord.z));
    shadow += fetchesWeightsU.w * fetchesWeightsV.y * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.w, fetchesOffsetsV.y), coord.z));
    shadow += fetchesWeightsU.x * fetchesWeightsV.z * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.x, fetchesOffsetsV.z), coord.z));
    shadow += fetchesWeightsU.y * fetchesWeightsV.z * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.y, fetchesOffsetsV.z), coord.z));
    shadow += fetchesWeightsU.z * fetchesWeightsV.z * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.z, fetchesOffsetsV.z), coord.z));
    shadow += fetchesWeightsU.w * fetchesWeightsV.z * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.w, fetchesOffsetsV.z), coord.z));
    shadow += fetchesWeightsU.x * fetchesWeightsV.w * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.x, fetchesOffsetsV.w), coord.z));
    shadow += fetchesWeightsU.y * fetchesWeightsV.w * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.y, fetchesOffsetsV.w), coord.z));
    shadow += fetchesWeightsU.z * fetchesWeightsV.w * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.z, fetchesOffsetsV.w), coord.z));
    shadow += fetchesWeightsU.w * fetchesWeightsV.w * SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, CombineShadowcoordComponents(bilinearFetchOrigin, float2(fetchesOffsetsU.w, fetchesOffsetsV.w), coord.z));

    return shadow;
}

float SampleShadowRadius(float4 shadowCoord)
{
    const float radius = 0.002;
    float2 curCoord = shadowCoord.xy;
    float4 offRadius0 = curCoord.xyxy + float4(-0.1658961, 0.98614317, 0.8875289, 0.14930651) * radius;
    float4 offRadius1 = curCoord.xyxy + float4(0.13271689, -0.78891462, -0.69030023, -0.1161273) * radius;
    float4 shadowTex;
    float shadowOutMask;
    float in_OrigDepth = shadowCoord.z;

    shadowTex.x = SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, float3(offRadius0.xy, in_OrigDepth));
    shadowTex.y = SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, float3(offRadius0.zw, in_OrigDepth));
    shadowTex.z = SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, float3(offRadius1.xy, in_OrigDepth));
    shadowTex.w = SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, float3(offRadius1.zw, in_OrigDepth));
    shadowOutMask = dot(shadowTex, float4(0.0625, 0.0625, 0.0625, 0.0625));

    offRadius0 = curCoord.xyxy + float4(-0.35084891, 0.35623729, 0.46310851, 0.45610359) * radius;
    offRadius1 = curCoord.xyxy + float4(0.070169792, -0.071247473, -0.2493661, -0.24559429) * radius;
    shadowTex.x = SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, float3(offRadius0.xy, in_OrigDepth));
    shadowTex.y = SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, float3(offRadius0.zw, in_OrigDepth));
    shadowTex.z = SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, float3(offRadius1.xy, in_OrigDepth));
    shadowTex.w = SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, float3(offRadius1.zw, in_OrigDepth));
    shadowOutMask += dot(shadowTex, float4(0.0625, 0.0625, 0.0625, 0.0625));

    offRadius0 = curCoord.xyxy + float4(0.055099439, 0.2438525, 0.78032809, -0.1763182) * radius;
    offRadius1 = curCoord.xyxy + float4(-0.198358, -0.87786913, -0.9754101, 0.2203977) * radius;
    shadowTex.x = SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, float3(offRadius0.xy, in_OrigDepth));
    shadowTex.y = SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, float3(offRadius0.zw, in_OrigDepth));
    shadowTex.z = SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, float3(offRadius1.xy, in_OrigDepth));
    shadowTex.w = SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, float3(offRadius1.zw, in_OrigDepth));
    shadowOutMask += dot(shadowTex, float4(0.0625, 0.0625, 0.0625, 0.0625));

    offRadius0 = curCoord.xyxy + float4(0.4280726, 0.73433912, 0.25917849, -0.15108439) * radius;
    offRadius1 = curCoord.xyxy + float4(-0.3273496, -0.56155342, -0.1727857, 0.100723) * radius;
    shadowTex.x = SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, float3(offRadius0.xy, in_OrigDepth));
    shadowTex.y = SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, float3(offRadius0.zw, in_OrigDepth));
    shadowTex.z = SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, float3(offRadius1.xy, in_OrigDepth));
    shadowTex.w = SAMPLE_SHADOW(TEXTURE_SHADOW_DEPTH, float3(offRadius1.zw, in_OrigDepth));
    shadowOutMask += dot(shadowTex, float4(0.0625, 0.0625, 0.0625, 0.0625));

    return shadowOutMask;
}

#if DirLight
float GetShadowAtten(float4 shadowCoord)
{
    if (shadowCoord.w > CAMERA_SHADOWRANGE.x)
    {
        return 1.0;
    }

    float2 shadowsize = float2(LIGHT_PARAM.z, LIGHT_PARAM.z);
    float4 _ShadowMapTexture_TexelSize = float4(shadowsize.x, shadowsize.y, 1.0/shadowsize.x, 1.0/shadowsize.y);

    shadowCoord.y = 1.0 - shadowCoord.y;

#if SHADOW_ATTEN_NOHARDWARE
    float shadow = PCF3x3NoHardwareSupport(shadowCoord, _ShadowMapTexture_TexelSize);
#elif  SHADOW_ATTEN_PCF3x3
    float shadow = PCF3x3Tent(shadowCoord, _ShadowMapTexture_TexelSize);
#elif  SHADOW_ATTEN_PCF5x5
    float shadow = PCF5x5Tent(shadowCoord, _ShadowMapTexture_TexelSize);
#elif  SHADOW_ATTEN_PCF7x7
    float shadow = PCF7x7Tent(shadowCoord, _ShadowMapTexture_TexelSize);
#else
    float shadow = PCF3x3Tent(shadowCoord, _ShadowMapTexture_TexelSize);
#endif

    shadow = lerp(1.0, shadow, LIGHT_PARAM.y);
    return shadow;
}

#else
float GetShadowAtten(float4 worldCoord)
{
    float4 shadowCoord = mul(mul(worldCoord, LIGHT_CAMERA_VIEW), LIGHT_CAMERA_PROJECTION);
    shadowCoord.xyz /= shadowCoord.w;
    shadowCoord.xyz = shadowCoord.xyz * 0.5 + 0.5;
    shadowCoord.y = 1.0 - shadowCoord.y;

    float2 shadowsize = float2(LIGHT_PARAM.z, LIGHT_PARAM.z);
    float4 _ShadowMapTexture_TexelSize = float4(shadowsize.x, shadowsize.y, 1.0/shadowsize.x, 1.0/shadowsize.y);

#if SHADOW_ATTEN_RADIUS
    float shadow = SampleShadowRadius(shadowCoord);
#elif  SHADOW_ATTEN_PCF3x3
    float shadow = PCF3x3Tent(shadowCoord, _ShadowMapTexture_TexelSize);
#elif  SHADOW_ATTEN_PCF5x5
    float shadow = PCF5x5Tent(shadowCoord, _ShadowMapTexture_TexelSize);
#elif  SHADOW_ATTEN_PCF7x7
    float shadow = PCF7x7Tent(shadowCoord, _ShadowMapTexture_TexelSize);
#else
    float shadow = PCF3x3Tent(shadowCoord, _ShadowMapTexture_TexelSize);
#endif

    shadow = lerp(1.0, shadow, LIGHT_PARAM.y);
    return shadow;
}
#endif

#endif

#if ShadowOn
    #if DirLight
        #define SHADOW_COORDS(idx1) float4 _ShadowCoord : TEXCOORDidx1;
        #define TRANSFER_SHADOW(o, vertex) float4 lightCoordBuiltin = mul(mul(ObjectToWorldPos(vertex), LIGHT_CAMERA_VIEW), LIGHT_CAMERA_PROJECTION);\
                        o._ShadowCoord.xyz = lightCoordBuiltin.xyz * 0.5 + 0.5;\
                        float4 internalShadowViewPos = ObjectToViewPos(vertex);\
                        o._ShadowCoord.w = -internalShadowViewPos.z
        #define SHADOW_ATTEN(o) GetShadowAtten(o._ShadowCoord)
        #define SHADOW_ATTEN_DEFERRED(shadowCoord) GetShadowAtten(shadowCoord)
        #define SHADOW_DEFERRED(worldPos, outLightCoord) float4 lightCoordBuiltin = mul(mul(worldPos, LIGHT_CAMERA_VIEW), LIGHT_CAMERA_PROJECTION);\
                        outLightCoord.xyz = lightCoordBuiltin.xyz * 0.5 + 0.5;\
                        float4 internalShadowViewPos = mul(worldPos, CAMERA_VIEW);\
                        outLightCoord.w = -internalShadowViewPos.z
    #elif SpotLight
        #define SHADOW_COORDS(idx1) float4 _ShadowCoord : TEXCOORDidx1;
        #define TRANSFER_SHADOW(o, vertex) float4 lightCoordBuiltin = ObjectToWorldPos(vertex);\
                        o._ShadowCoord.xyz = lightCoordBuiltin.xyz;\
                        o._ShadowCoord.w = 1.0
        #define SHADOW_ATTEN(o) GetShadowAtten(o._ShadowCoord)
        #define SHADOW_ATTEN_DEFERRED(worldCoord) GetShadowAtten(worldCoord)
        #define SHADOW_DEFERRED(worldPos, outLightCoord) outLightCoord = worldPos
    #else
        #define SHADOW_COORDS(idx1) 
        #define TRANSFER_SHADOW(o, vertex) float4 lightCoordBuiltin = float4(1.0, 1.0, 1.0, 1.0)
        #define SHADOW_ATTEN(o) 1.0
        #define SHADOW_ATTEN_DEFERRED(shadowCoord) 1.0
        #define SHADOW_DEFERRED(worldPos, outLightCoord) outLightCoord = float4(1.0, 1.0, 1.0, 1.0)
    #endif
#else
    #define SHADOW_COORDS(idx1)
    #define TRANSFER_SHADOW(o, vertex) float4 lightCoordBuiltin = float4(1.0, 1.0, 1.0, 1.0)
    #define SHADOW_ATTEN(o) 1.0
    #define SHADOW_ATTEN_DEFERRED(shadowCoord) 1.0
    #define SHADOW_DEFERRED(worldPos, outLightCoord) outLightCoord = float4(1.0, 1.0, 1.0, 1.0)
#endif

