#DEFPARAMS
FRAME_SIZE                   = {"FRAME_SIZE", VEC2, "16.0, 16.0"},
BLOCK_SIZE                   = {"BLOCK_SIZE", VEC2, "16.0, 16.0"},
HISTOGRAM_SIZE               = {"HISTOGRAM_SIZE", VEC2, "16.0, 16.0"},
GAMMA_RGB                    = {"GAMMA_RGB",  VEC3, "1.0, 1.0, 1.0"},
ALPHA_HIGH_RGB               = {"ALPHA_HIGH_RGB",  VEC3, "1.0, 1.0, 1.0"},
ALPHA_HIGH_DIFF_GAIN_RGB     = {"ALPHA_HIGH_DIFF_GAIN_RGB", VEC3, "1.0, 1.0, 1.0"},
ALPHA_HIGH_DIFF_GAIN_RGB_MIN = {"ALPHA_HIGH_DIFF_GAIN_RGB_MIN", VEC3, "1.0, 1.0, 1.0"},
ALPHA_HIGH_DIFF_GAIN_RGB_MAX = {"ALPHA_HIGH_DIFF_GAIN_RGB_MAX", VEC3, "1.0, 1.0, 1.0"},
ALPHA_LOW_RGB                = {"ALPHA_LOW_RGB",  VEC3, "1.0, 1.0, 1.0"},
ALPHA_LOW_DIFF_GAIN_RGB      = {"ALPHA_LOW_DIFF_GAIN_RGB", VEC3, "1.0, 1.0, 1.0"},
ALPHA_LOW_DIFF_GAIN_RGB_MIN  = {"ALPHA_LOW_DIFF_GAIN_RGB_MIN", VEC3, "1.0, 1.0, 1.0"},
ALPHA_LOW_DIFF_GAIN_RGB_MAX  = {"ALPHA_LOW_DIFF_GAIN_RGB_MAX", VEC3, "1.0, 1.0, 1.0"},
G0_GAIN_RGB                  = {"G0_GAIN_RGB", VEC3, "1.0, 1.0, 1.0"},
SIGMA_RGB                    = {"SIGMA_RGB", VEC3, "1.0, 1.0, 1.0"},
BETA_RGB                     = {"BETA_RGB", VEC3, "1.0, 1.0, 1.0"},
HISTOGRAM                    = {"HISTOGRAM", TEXTURE2D, "HISTOGRAM"},
#END

#DEFTAG
ShaderName  = "LutGenStep"
RenderQueue = "PostEffect"
#END

#DEFPASS Always
COLOR_MASK   = COLOR_RGBA
ALPAH_MODE   = { ALPAH_OFF }
DRAW_MODE    = { CULL_FACE_OFF, DEPTH_MASK_OFF, DEPTH_TEST_OFF }
STENCIL_MODE = { STENCIL_OFF }
LIGHT_MODE   = { ALWAYS }

CGPROGRAM
#pragma vertex vert
#pragma fragment frag
#include "common.inc"

struct appdata
{
    float4 vertex : POSITION;
    float2 uv : TEXCOORD0;
};

struct v2f
{
    float2 uv : TEXCOORD0;
    float4 vertex : SV_POSITION;
};

float2    FRAME_SIZE;
float2    BLOCK_SIZE;
float2    HISTOGRAM_SIZE;
float3    GAMMA_RGB;
float3    ALPHA_HIGH_RGB;
float3    ALPHA_HIGH_DIFF_GAIN_RGB;
float3    ALPHA_HIGH_DIFF_GAIN_RGB_MIN;
float3    ALPHA_HIGH_DIFF_GAIN_RGB_MAX;
float3    ALPHA_LOW_RGB;
float3    ALPHA_LOW_DIFF_GAIN_RGB;
float3    ALPHA_LOW_DIFF_GAIN_RGB_MIN;
float3    ALPHA_LOW_DIFF_GAIN_RGB_MAX;
float3    G0_GAIN_RGB;
float3    SIGMA_RGB;
float3    BETA_RGB;
sampler2D HISTOGRAM;

v2f vert(appdata v)
{
    v2f o;
    o.vertex = UniformNDC(v.vertex);
    o.uv = v.uv.xy;

    return o;
}

float4 frag(v2f i) : SV_Target
{
    float out_frag_idx_x = i.uv.x * HISTOGRAM_SIZE.x;
    float out_frag_idx_y = i.uv.y * HISTOGRAM_SIZE.y;
    int block_idx_x = int(out_frag_idx_x / 16);
    int block_idx_y = int(out_frag_idx_y / 16);
    int hist_start_x = block_idx_x * 16;
    int hist_start_y = block_idx_y * 16;
    int hist_median = 0;
    float hist_mean_float = 0.0;
    float2 histogram_size_inv = float2(1.0 / HISTOGRAM_SIZE.x, 1.0 / HISTOGRAM_SIZE.y);
    int block_pixel_num = int(BLOCK_SIZE.x * BLOCK_SIZE.y);
    int half_block_pixel_num = block_pixel_num / 2;
    float normalized_accu_count = 0;
    for(int hist_idx_y = 0; hist_idx_y < 16; ++hist_idx_y)
    {
        for(int hist_idx_x = 0; hist_idx_x < 16; ++hist_idx_x)
        {
            int hist_idx = hist_idx_y * 16 + hist_idx_x;
            float2 histogram_coord = float2((hist_idx_x + hist_start_x) * histogram_size_inv.x, (hist_idx_y + hist_start_y) * histogram_size_inv.y);
            float normalized_entry_num = tex2D(HISTOGRAM, histogram_coord).x;
            hist_mean_float +=  normalized_entry_num * hist_idx;
            normalized_accu_count += normalized_entry_num;
            hist_median = (normalized_accu_count > 0.5 ? hist_median : hist_idx);
        }
    }
    int hist_mean = int(hist_mean_float);

    int diffM = abs(hist_median - hist_mean);

    float alpha_high_r = ALPHA_HIGH_RGB.x * clamp(diffM * ALPHA_HIGH_DIFF_GAIN_RGB.x, ALPHA_HIGH_DIFF_GAIN_RGB_MIN.x, ALPHA_HIGH_DIFF_GAIN_RGB_MAX.x);
    float alpha_high_g = ALPHA_HIGH_RGB.y * clamp(diffM * ALPHA_HIGH_DIFF_GAIN_RGB.y, ALPHA_HIGH_DIFF_GAIN_RGB_MIN.y, ALPHA_HIGH_DIFF_GAIN_RGB_MAX.y);
    float alpha_high_b = ALPHA_HIGH_RGB.z * clamp(diffM * ALPHA_HIGH_DIFF_GAIN_RGB.z, ALPHA_HIGH_DIFF_GAIN_RGB_MIN.z, ALPHA_HIGH_DIFF_GAIN_RGB_MAX.z);

    float alpha_low_r = ALPHA_LOW_RGB.x * clamp(diffM * ALPHA_LOW_DIFF_GAIN_RGB.x, ALPHA_LOW_DIFF_GAIN_RGB_MIN.x, ALPHA_LOW_DIFF_GAIN_RGB_MAX.x);
    float alpha_low_g = ALPHA_LOW_RGB.y * clamp(diffM * ALPHA_LOW_DIFF_GAIN_RGB.y, ALPHA_LOW_DIFF_GAIN_RGB_MIN.y, ALPHA_LOW_DIFF_GAIN_RGB_MAX.y);
    float alpha_low_b = ALPHA_LOW_RGB.z * clamp(diffM * ALPHA_LOW_DIFF_GAIN_RGB.z, ALPHA_LOW_DIFF_GAIN_RGB_MIN.z, ALPHA_LOW_DIFF_GAIN_RGB_MAX.z);

    int lut_entry_x = int(out_frag_idx_x - hist_start_x);
    int lut_entry_y = int(out_frag_idx_y - hist_start_y);
    int lut_entry = lut_entry_y * 16  + lut_entry_x;

    float4 lut_rgb = float4(0.0, 0.0, 0.0, 1.0);
    float3 val_max = float3(255.0, 255.0, 255.0);
    float3 val_min = float3(0.0, 0.0, 0.0);
    float3 lut_entry_vec3 = float3(lut_entry * 1.0, lut_entry * 1.0, lut_entry * 1.0);
    lut_rgb.xyz = clamp(val_max - pow((val_max - lut_entry_vec3) / val_max, GAMMA_RGB) * val_max, val_min, val_max);

    float3 g0 = (hist_mean + hist_median) * 0.5 * G0_GAIN_RGB;
    float3 diff = lut_rgb.xyz - g0;
    float3 magi = abs(diff);
    float3 sign = float3(0.0, 0.0, 0.0);
    sign.x = (diff.x >= 0 ? 1.0 : -1.0);
    sign.y = (diff.y >= 0 ? 1.0 : -1.0);
    sign.z = (diff.z >= 0 ? 1.0 : -1.0);

    float alpha_r = (sign.x > 0 ? alpha_high_r : alpha_low_r);
    float alpha_g = (sign.y > 0 ? alpha_high_g : alpha_low_g);
    float alpha_b = (sign.z > 0 ? alpha_high_b : alpha_low_b);

    float3 g0_offset = float3(0.0, 0.0, 0.0);
    g0_offset.x = (magi.x > SIGMA_RGB.x ? (magi.x - SIGMA_RGB.x) * BETA_RGB.x + SIGMA_RGB.x : pow(magi.x / SIGMA_RGB.x, alpha_r) * SIGMA_RGB.x);
    g0_offset.y = (magi.y > SIGMA_RGB.y ? (magi.y - SIGMA_RGB.y) * BETA_RGB.y + SIGMA_RGB.y : pow(magi.y / SIGMA_RGB.y, alpha_g) * SIGMA_RGB.y);
    g0_offset.z = (magi.z > SIGMA_RGB.z ? (magi.z - SIGMA_RGB.z) * BETA_RGB.z + SIGMA_RGB.z : pow(magi.z / SIGMA_RGB.z, alpha_b) * SIGMA_RGB.z);

    lut_rgb.xyz = clamp(g0 + sign * g0_offset, val_min, val_max);
    lut_rgb.xyz /= val_max;

    return lut_rgb;
}
ENDCG
#END