#DEFPARAMS
FRAME_SIZE      = {"FRAME_SIZE", VEC2, "16.0, 16.0"},
BLOCK_SIZE      = {"BLOCK_SIZE", VEC2, "16.0, 16.0"},
HISTOGRAM_SIZE  = {"HISTOGRAM_SIZE", VEC2, "16.0, 16.0"},
SATURATION_GAIN = {"SATURATION_GAIN", FLOAT, "1.0"},
SPLIT_DIRECTION = {"SPLIT_DIRECTION", FLOAT, "0.0"},
SPLIT_RATIO     = {"SPLIT_RATIO", FLOAT, "0.0"},
CUR_FRAME       = {"CUR_FRAME", TEXTURE2D, "CUR_FRAME"},
RGB_LUT         = {"RGB_LUT", TEXTURE2D, "RGB_LUT"},
#END

#DEFTAG
ShaderName  = "LutApplyStep"
RenderQueue = "PostEffect"
#END

#DEFPASS Always
COLOR_MASK   = COLOR_RGBA
ALPAH_MODE   = { ALPAH_OFF }
DRAW_MODE    = { CULL_FACE_OFF, DEPTH_MASK_OFF, DEPTH_TEST_OFF }
STENCIL_MODE = { STENCIL_OFF }
LIGHT_MODE   = { ALWAYS }

CGPROGRAM
#pragma vertex vert
#pragma fragment frag
#include "common.inc"

struct appdata
{
    float4 vertex : POSITION;
    float2 uv : TEXCOORD0;
};

struct v2f
{
    float2 uv : TEXCOORD0;
    float4 vertex : SV_POSITION;
};

float2    FRAME_SIZE;
float2    BLOCK_SIZE;
float2    HISTOGRAM_SIZE;
float     SATURATION_GAIN;
float     SPLIT_DIRECTION;
float     SPLIT_RATIO;
Texture2D CUR_FRAME;
Texture2D RGB_LUT;
SamplerState CUR_FRAME_Sampler;
SamplerState RGB_LUT_Sampler;

v2f vert(appdata v)
{
    v2f o;
    o.vertex = UniformNDC(v.vertex);
    o.uv = v.uv.xy;

    return o;
}

void frag(in v2f i, out float4 mainColor : SV_Target0)
{
    int block_num_x = ceil(float(FRAME_SIZE.x) / BLOCK_SIZE.x);
    int block_num_y = ceil(float(FRAME_SIZE.y) / BLOCK_SIZE.y);
    float out_frag_idx_x  = i.uv.x * FRAME_SIZE.x;
    float out_frag_idx_y  = i.uv.y * FRAME_SIZE.y;
    int block_idx_low_x = int(out_frag_idx_x / BLOCK_SIZE.x);
    int block_idx_low_y = int(out_frag_idx_y / BLOCK_SIZE.y);
    int block_idx_high_x = (block_idx_low_x + 1 >= block_num_x ? block_num_x - 1 : block_idx_low_x + 1);
    int block_idx_high_y = (block_idx_low_y + 1 >= block_num_y ? block_num_y - 1 : block_idx_low_y + 1);
    float block_weight_low_x = float(out_frag_idx_x - block_idx_low_x * BLOCK_SIZE.x) / BLOCK_SIZE.x;
    float block_weight_low_y = float(out_frag_idx_y - block_idx_low_y * BLOCK_SIZE.y) / BLOCK_SIZE.y;

    float3 in_rgb = CUR_FRAME.Sample(CUR_FRAME_Sampler, i.uv).xyz;
    int lut_entry_r = int(in_rgb.x * 255 + 0.5);
    int lut_entry_g = int(in_rgb.y * 255 + 0.5);
    int lut_entry_b = int(in_rgb.z * 255 + 0.5);
    int lut_entry_r_y = lut_entry_r / 16;
    int lut_entry_r_x = lut_entry_r - lut_entry_r_y * 16;
    int lut_entry_g_y = lut_entry_g / 16;
    int lut_entry_g_x = lut_entry_g - lut_entry_g_y * 16;
    int lut_entry_b_y = lut_entry_b / 16;
    int lut_entry_b_x = lut_entry_b - lut_entry_b_y * 16;

    float4 out_rgba = float4(0.0, 0.0, 0.0, 1.0);

    float left_coord_x  = float(block_idx_low_x * 16 + lut_entry_r_x + 0.5) / HISTOGRAM_SIZE.x;
    float up_coord_y    = float(block_idx_low_y * 16 + lut_entry_r_y + 0.5) / HISTOGRAM_SIZE.y;
    float right_coord_x = float(block_idx_high_x * 16 + lut_entry_r_x + 0.5) / HISTOGRAM_SIZE.x;
    float down_coord_y  = float(block_idx_high_y * 16 + lut_entry_r_y + 0.5) / HISTOGRAM_SIZE.y;
    float val_0 = RGB_LUT.Sample(RGB_LUT_Sampler, float2(left_coord_x, up_coord_y)).x;
    float val_1 = RGB_LUT.Sample(RGB_LUT_Sampler, float2(right_coord_x, up_coord_y)).x;
    float val_2 = RGB_LUT.Sample(RGB_LUT_Sampler, float2(left_coord_x, down_coord_y)).x;
    float val_3 = RGB_LUT.Sample(RGB_LUT_Sampler, float2(right_coord_x, down_coord_y)).x;
    float val_01 = lerp(val_0, val_1, block_weight_low_x);
    float val_23 = lerp(val_2, val_3, block_weight_low_x);
    out_rgba.x = lerp(val_01, val_23, block_weight_low_y);

    left_coord_x  = float(block_idx_low_x * 16 + lut_entry_g_x + 0.5) / HISTOGRAM_SIZE.x;
    up_coord_y    = float(block_idx_low_y * 16 + lut_entry_g_y + 0.5) / HISTOGRAM_SIZE.y;
    right_coord_x = float(block_idx_high_x * 16 + lut_entry_g_x + 0.5) / HISTOGRAM_SIZE.x;
    down_coord_y  = float(block_idx_high_y * 16 + lut_entry_g_y + 0.5) / HISTOGRAM_SIZE.y;
    val_0 = RGB_LUT.Sample(RGB_LUT_Sampler, float2(left_coord_x, up_coord_y)).y;
    val_1 = RGB_LUT.Sample(RGB_LUT_Sampler, float2(right_coord_x, up_coord_y)).y;
    val_2 = RGB_LUT.Sample(RGB_LUT_Sampler, float2(left_coord_x, down_coord_y)).y;
    val_3 = RGB_LUT.Sample(RGB_LUT_Sampler, float2(right_coord_x, down_coord_y)).y;
    val_01 = lerp(val_0, val_1, block_weight_low_x);
    val_23 = lerp(val_2, val_3, block_weight_low_x);
    out_rgba.y = lerp(val_01, val_23, block_weight_low_y);

    left_coord_x  = float(block_idx_low_x * 16 + lut_entry_b_x + 0.5) / HISTOGRAM_SIZE.x;
    up_coord_y    = float(block_idx_low_y * 16 + lut_entry_b_y + 0.5) / HISTOGRAM_SIZE.y;
    right_coord_x = float(block_idx_high_x * 16 + lut_entry_b_x + 0.5) / HISTOGRAM_SIZE.x;
    down_coord_y  = float(block_idx_high_y * 16 + lut_entry_b_y + 0.5) / HISTOGRAM_SIZE.y;
    val_0 = RGB_LUT.Sample(RGB_LUT_Sampler, float2(left_coord_x, up_coord_y)).z;
    val_1 = RGB_LUT.Sample(RGB_LUT_Sampler, float2(right_coord_x, up_coord_y)).z;
    val_2 = RGB_LUT.Sample(RGB_LUT_Sampler, float2(left_coord_x, down_coord_y)).z;
    val_3 = RGB_LUT.Sample(RGB_LUT_Sampler, float2(right_coord_x, down_coord_y)).z;
    val_01 = lerp(val_0, val_1, block_weight_low_x);
    val_23 = lerp(val_2, val_3, block_weight_low_x);
    out_rgba.z = lerp(val_01, val_23, block_weight_low_y);

    float luminance = 0.299 * out_rgba.x + 0.587 * out_rgba.y + 0.114 * out_rgba.z;
    out_rgba.x = clamp(out_rgba.x + SATURATION_GAIN * (out_rgba.x - luminance), 0.0, 1.0);
    out_rgba.y = clamp(out_rgba.y + SATURATION_GAIN * (out_rgba.y - luminance), 0.0, 1.0);
    out_rgba.z = clamp(out_rgba.z + SATURATION_GAIN * (out_rgba.z - luminance), 0.0, 1.0);

    float target_pos = (floor(SPLIT_DIRECTION + 0.5) == 0 ? i.uv.x : i.uv.y);
    out_rgba.xyz = (target_pos > SPLIT_RATIO ? in_rgb : out_rgba.xyz);

    mainColor = out_rgba;
}
ENDCG
#END