// Generated from libavcodec/vulkan/ffv1_rct_search.comp
const char *ff_source_ffv1_rct_search_comp =
"/*\n"
" * FFv1 codec\n"
" *\n"
" * Copyright (c) 2024 Lynne <dev@lynne.ee>\n"
" *\n"
" * This file is part of FFmpeg.\n"
" *\n"
" * FFmpeg is free software; you can redistribute it and/or\n"
" * modify it under the terms of the GNU Lesser General Public\n"
" * License as published by the Free Software Foundation; either\n"
" * version 2.1 of the License, or (at your option) any later version.\n"
" *\n"
" * FFmpeg is distributed in the hope that it will be useful,\n"
" * but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n"
" * Lesser General Public License for more details.\n"
" *\n"
" * You should have received a copy of the GNU Lesser General Public\n"
" * License along with FFmpeg; if not, write to the Free Software\n"
" * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\n"
" */\n"
"\n"
"ivec3 load_components(ivec2 pos)\n"
"{\n"
"    ivec3 pix = ivec3(imageLoad(src[0], pos));\n"
"    if (planar_rgb != 0) {\n"
"        for (int i = 1; i < 3; i++)\n"
"            pix[i] = int(imageLoad(src[i], pos)[0]);\n"
"    }\n"
"\n"
"    return ivec3(pix[fmt_lut[0]], pix[fmt_lut[1]], pix[fmt_lut[2]]);\n"
"}\n"
"\n"
"#define NUM_CHECKS 15\n"
"const ivec2 rct_y_coeff[NUM_CHECKS] = {\n"
"    ivec2(0, 0), //      4G\n"
"\n"
"    ivec2(0, 1), //      3G +  B\n"
"    ivec2(1, 0), //  R + 3G\n"
"    ivec2(1, 1), //  R + 2G + B\n"
"\n"
"    ivec2(0, 2), //      2G + 2B\n"
"    ivec2(2, 0), // 2R + 2G\n"
"    ivec2(2, 2), // 2R      + 2B\n"
"\n"
"    ivec2(0, 3), //      1G + 3B\n"
"    ivec2(3, 0), // 3R + 1G\n"
"\n"
"    ivec2(0, 4), //           4B\n"
"    ivec2(4, 0), // 4R\n"
"\n"
"    ivec2(1, 2), //  R +  G + 2B\n"
"    ivec2(2, 1), // 2R +  G +  B\n"
"\n"
"    ivec2(3, 1), // 3R      +  B\n"
"    ivec2(1, 3), //  R      + 3B\n"
"};\n"
"\n"
"shared ivec3 pix_buf[gl_WorkGroupSize.x + 1][gl_WorkGroupSize.y + 1] = { };\n"
"\n"
"ivec3 transform_sample(ivec3 pix, ivec2 rct_coef)\n"
"{\n"
"    pix.b -= pix.g;\n"
"    pix.r -= pix.g;\n"
"    pix.g += (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2;\n"
"    pix.b += rct_offset;\n"
"    pix.r += rct_offset;\n"
"    return pix;\n"
"}\n"
"\n"
"uint get_dist(ivec3 cur)\n"
"{\n"
"    ivec3 LL = pix_buf[gl_LocalInvocationID.x + 0][gl_LocalInvocationID.y + 1];\n"
"    ivec3 TL = pix_buf[gl_LocalInvocationID.x + 0][gl_LocalInvocationID.y + 0];\n"
"    ivec3 TT = pix_buf[gl_LocalInvocationID.x + 1][gl_LocalInvocationID.y + 0];\n"
"\n"
"    ivec3 pred = ivec3(predict(LL.r, ivec2(TL.r, TT.r)),\n"
"                       predict(LL.g, ivec2(TL.g, TT.g)),\n"
"                       predict(LL.b, ivec2(TL.b, TT.b)));\n"
"\n"
"    uvec3 c = abs(pred - cur);\n"
"    return mid_pred(c.r, c.g, c.b);\n"
"}\n"
"\n"
"shared uint score_cols[gl_WorkGroupSize.y] = { };\n"
"shared uint score_mode[16] = { };\n"
"\n"
"void process(ivec2 pos)\n"
"{\n"
"    ivec3 pix = load_components(pos);\n"
"\n"
"    for (int i = 0; i < NUM_CHECKS; i++) {\n"
"        ivec3 tx_pix = transform_sample(pix, rct_y_coeff[i]);\n"
"        pix_buf[gl_LocalInvocationID.x + 1][gl_LocalInvocationID.y + 1] = tx_pix;\n"
"        memoryBarrierShared();\n"
"\n"
"        uint dist = get_dist(tx_pix);\n"
"        atomicAdd(score_mode[i], dist);\n"
"    }\n"
"}\n"
"\n"
"void coeff_search(inout SliceContext sc)\n"
"{\n"
"    uvec2 img_size = imageSize(src[0]);\n"
"    uint sxs = slice_coord(img_size.x, gl_WorkGroupID.x + 0,\n"
"                           gl_NumWorkGroups.x, 0);\n"
"    uint sxe = slice_coord(img_size.x, gl_WorkGroupID.x + 1,\n"
"                           gl_NumWorkGroups.x, 0);\n"
"    uint sys = slice_coord(img_size.y, gl_WorkGroupID.y + 0,\n"
"                           gl_NumWorkGroups.y, 0);\n"
"    uint sye = slice_coord(img_size.y, gl_WorkGroupID.y + 1,\n"
"                           gl_NumWorkGroups.y, 0);\n"
"\n"
"    for (uint y = sys + gl_LocalInvocationID.y; y < sye; y += gl_WorkGroupSize.y) {\n"
"        for (uint x = sxs + gl_LocalInvocationID.x; x < sxe; x += gl_WorkGroupSize.x) {\n"
"            process(ivec2(x, y));\n"
"        }\n"
"    }\n"
"\n"
"    if (gl_LocalInvocationID.x == 0 && gl_LocalInvocationID.y == 0) {\n"
"        uint min_score = 0xFFFFFFFF;\n"
"        uint min_idx = 3;\n"
"        for (int i = 0; i < NUM_CHECKS; i++) {\n"
"            if (score_mode[i] < min_score) {\n"
"                min_score = score_mode[i];\n"
"                min_idx = i;\n"
"            }\n"
"        }\n"
"        sc.slice_rct_coef = rct_y_coeff[min_idx];\n"
"    }\n"
"}\n"
"\n"
"void main(void)\n"
"{\n"
"    if (force_pcm == 1)\n"
"        return;\n"
"    const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;\n"
"    coeff_search(slice_ctx[slice_idx]);\n"
"}\n"
;
