Skip to content

Commit

Permalink
[WIP] Remove binary search runtime lookup
Browse files Browse the repository at this point in the history
  • Loading branch information
Remi Achard committed Nov 26, 2024
1 parent 81c07fd commit 7c9d6d8
Show file tree
Hide file tree
Showing 4 changed files with 164 additions and 4 deletions.
12 changes: 12 additions & 0 deletions src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,17 @@ struct Table1D
float table[TABLE_TOTAL_SIZE];
};

static constexpr int GAMUT_TABLE_IDX_SIZE = 360 * 5;
struct Table1DLookup
{
static constexpr int base_index = GAMUT_TABLE_BASE_INDEX;
static constexpr int size = GAMUT_TABLE_IDX_SIZE;
static constexpr int total_size = GAMUT_TABLE_IDX_SIZE + TABLE_ADDITION_ENTRIES;
float table[GAMUT_TABLE_IDX_SIZE + TABLE_ADDITION_ENTRIES];
float start;
float end;
};

struct JMhParams
{
float F_L;
Expand Down Expand Up @@ -79,6 +90,7 @@ struct GamutCompressParams
float focus_dist;
float lower_hull_gamma;
Table1D reach_m_table;
Table1DLookup gamut_cusp_index_table;
Table3D gamut_cusp_table;
Table1D upper_hull_gamma_table;
};
Expand Down
74 changes: 70 additions & 4 deletions src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// Copyright Contributors to the OpenColorIO Project.

#include "Transform.h"
#include "ops/lut1d/Lut1DOpData.h"


namespace OCIO_NAMESPACE
Expand Down Expand Up @@ -46,8 +47,26 @@ int clamp_to_table_bounds(int entry, int table_size)
return std::min(table_size - 1, std::max(0, entry));
}

f2 cusp_from_table(float h, const Table3D &gt)
f2 cusp_from_table(float h, const Table3D &gt, const Table1DLookup &ht)
{
h = SanitizeFloat(h);

#ifdef NEW_CUSP_SAMPLING

float lut_h_min = ht.start;
float lut_h_max = ht.end;
float lut_h_range = lut_h_max - lut_h_min;
float lut_h = ((h / 360.f) - lut_h_min) / lut_h_range;
float f_lo = lut_h * (ht.total_size - 1);

int ii_lo = int(f_lo);
int ii_hi = ii_lo + 1;
float f = f_lo - int(f_lo);
int i_lo = int(lerp(ht.table[ii_lo], ht.table[ii_hi], f) * (gt.total_size - 1));
int i_hi = clamp_to_table_bounds(i_lo + 1, gt.total_size);

#else

int i_lo = 0;
int i_hi = gt.base_index + gt.size; // allowed as we have an extra entry in the table
int i = clamp_to_table_bounds(hue_position_in_uniform_table(h, gt.size) + gt.base_index, gt.total_size);
Expand All @@ -67,6 +86,8 @@ f2 cusp_from_table(float h, const Table3D &gt)

i_hi = std::max(1, i_hi);

#endif

const f3 lo {
gt.table[i_hi-1][0],
gt.table[i_hi-1][1],
Expand Down Expand Up @@ -424,6 +445,47 @@ Table3D make_gamut_table(const Primaries &P, float peakLuminance)
return gamutCuspTable;
}

Table1DLookup make_gamut_table_lookup(const Table3D & gt)
{
unsigned int lookup_size = gt.total_size;
float lut_start = gt.table[0][2];
float lut_end = gt.table[lookup_size-1][2];

Lut1DOpDataRcPtr lutData = std::make_shared<Lut1DOpData>(lookup_size);
Array::Values & vals = lutData->getArray().getValues();
for (unsigned int i = 0, p = 0; i < lookup_size; ++i)
{
for (int j = 0; j < 3; ++j, ++p)
{
vals[p] = (gt.table[i][2] - lut_start) / (lut_end - lut_start);
}
}

Table1DLookup gtl;
gtl.start = lut_start / 360.f;
gtl.end = lut_end / 360.f;

unsigned int inv_lookup_size = gtl.total_size;

auto invLut = lutData->inverse();
invLut->validate();
invLut->finalize();
ConstLut1DOpDataRcPtr constInvLut = invLut;
ConstLut1DOpDataRcPtr newDomainLut = std::make_shared<Lut1DOpData>(Lut1DOpData::LUT_STANDARD, inv_lookup_size, true);
ConstLut1DOpDataRcPtr fastInvLutData = Lut1DOpData::Compose(newDomainLut, constInvLut, Lut1DOpData::COMPOSE_RESAMPLE_NO);

auto & invLutArray = fastInvLutData->getArray().getValues();
for (unsigned int i = 0, p = 0; i < inv_lookup_size; ++i)
{
for (int j = 0; j < 3; ++j, ++p)
{
gtl.table[i] = invLutArray[p];
}
}

return gtl;
}

bool any_below_zero(const f3 &rgb)
{
return (rgb[0] < 0. || rgb[1] < 0. || rgb[2] < 0.);
Expand Down Expand Up @@ -649,7 +711,7 @@ f3 compressGamut(const f3 &JMh, float Jx, const ACES2::GamutCompressParams& p, b
else
{
const f2 project_from = {J, M};
const f2 JMcusp = cusp_from_table(h, p.gamut_cusp_table);
const f2 JMcusp = cusp_from_table(h, p.gamut_cusp_table, p.gamut_cusp_index_table);
const float focusJ = lerpf(JMcusp[0], p.mid_J, std::min(1.f, cusp_mid_blend - (JMcusp[0] / p.limit_J_max)));
const float slope_gain = p.limit_J_max * p.focus_dist * get_focus_gain(Jx, JMcusp[0], p.limit_J_max);

Expand Down Expand Up @@ -689,7 +751,7 @@ f3 gamut_compress_fwd(const f3 &JMh, const GamutCompressParams &p)

f3 gamut_compress_inv(const f3 &JMh, const GamutCompressParams &p)
{
const f2 JMcusp = cusp_from_table(JMh[2], p.gamut_cusp_table);
const f2 JMcusp = cusp_from_table(JMh[2], p.gamut_cusp_table, p.gamut_cusp_index_table);
float Jx = JMh[0];

f3 unCompressedJMh;
Expand Down Expand Up @@ -741,6 +803,7 @@ bool evaluate_gamma_fit(

Table1D make_upper_hull_gamma(
const Table3D &gamutCuspTable,
const Table1DLookup &gamutCuspIndexTable,
float peakLuminance,
float limit_J_max,
float mid_J,
Expand All @@ -759,7 +822,7 @@ Table1D make_upper_hull_gamma(
gammaTable.table[i] = -1.f;

const float hue = (float) i;
const f2 JMcusp = cusp_from_table(hue, gamutCuspTable);
const f2 JMcusp = cusp_from_table(hue, gamutCuspTable, gamutCuspIndexTable);

f3 testJMh[test_count]{};
for (int testIndex = 0; testIndex < test_count; testIndex++)
Expand Down Expand Up @@ -913,8 +976,11 @@ GamutCompressParams init_GamutCompressParams(float peakLuminance, const Primarie
params.lower_hull_gamma = lower_hull_gamma;
params.reach_m_table = make_reach_m_table(ACES_AP1::primaries, peakLuminance);
params.gamut_cusp_table = make_gamut_table(limitingPrimaries, peakLuminance);
params.gamut_cusp_index_table = make_gamut_table_lookup(params.gamut_cusp_table);

params.upper_hull_gamma_table = make_upper_hull_gamma(
params.gamut_cusp_table,
params.gamut_cusp_index_table,
peakLuminance,
limit_J_max,
mid_J,
Expand Down
2 changes: 2 additions & 0 deletions src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

#include "Common.h"

#define NEW_CUSP_SAMPLING

namespace OCIO_NAMESPACE
{

Expand Down
80 changes: 80 additions & 0 deletions src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -723,21 +723,99 @@ std::string _Add_Cusp_table(
shaderCreator->addToDeclareShaderCode(ss.string().c_str());
}

#ifdef NEW_CUSP_SAMPLING

// Reserve name
std::ostringstream resNameIndex;
resNameIndex << shaderCreator->getResourcePrefix()
<< std::string("_")
<< std::string("gamut_cusp_table_hues_")
<< resourceIndex;

// Note: Remove potentially problematic double underscores from GLSL resource names.
std::string nameIndex(resNameIndex.str());
StringUtils::ReplaceInPlace(nameIndex, "__", "_");

// Register texture
GpuShaderDesc::TextureDimensions dimensionsIndex = GpuShaderDesc::TEXTURE_1D;
if (shaderCreator->getLanguage() == GPU_LANGUAGE_GLSL_ES_1_0
|| shaderCreator->getLanguage() == GPU_LANGUAGE_GLSL_ES_3_0
|| !shaderCreator->getAllowTexture1D())
{
dimensionsIndex = GpuShaderDesc::TEXTURE_2D;
}

shaderCreator->addTexture(
nameIndex.c_str(),
GpuShaderText::getSamplerName(nameIndex).c_str(),
g.gamut_cusp_index_table.total_size,
1,
GpuShaderCreator::TEXTURE_RED_CHANNEL,
dimensionsIndex,
INTERP_NEAREST,
&(g.gamut_cusp_index_table.table[0]));

if (dimensionsIndex == GpuShaderDesc::TEXTURE_1D)
{
GpuShaderText ss(shaderCreator->getLanguage());
ss.declareTex1D(nameIndex);
shaderCreator->addToDeclareShaderCode(ss.string().c_str());
}
else
{
GpuShaderText ss(shaderCreator->getLanguage());
ss.declareTex2D(nameIndex);
shaderCreator->addToDeclareShaderCode(ss.string().c_str());
}

#endif

// Sampler function
GpuShaderText ss(shaderCreator->getLanguage());

#ifndef NEW_CUSP_SAMPLING
const std::string hues_array_name = name + "_hues_array";

std::vector<float> hues_array(g.gamut_cusp_table.total_size);
for (int i = 0; i < g.gamut_cusp_table.total_size; ++i)
{
hues_array[i] = g.gamut_cusp_table.table[i][2];
}
ss.declareFloatArrayConst(hues_array_name, (int) hues_array.size(), hues_array.data());
#endif

ss.newLine() << ss.float2Keyword() << " " << name << "_sample(float h)";
ss.newLine() << "{";
ss.indent();

#ifdef NEW_CUSP_SAMPLING

ss.newLine() << ss.floatDecl("lut_h_min") << " = " << g.gamut_cusp_index_table.start << ";";
ss.newLine() << ss.floatDecl("lut_h_max") << " = " << g.gamut_cusp_index_table.end << ";";
ss.newLine() << ss.floatDecl("lut_h_range") << " = lut_h_max - lut_h_min;";
ss.newLine() << ss.floatDecl("lut_h") << " = ((h / 360.0) - lut_h_min) / lut_h_range;";
ss.newLine() << ss.floatDecl("f_lo") << " = lut_h * (" << (g.gamut_cusp_index_table.total_size - 1) << ");";

ss.newLine() << ss.intDecl("ii_lo") << " = " << ss.intKeyword() << "(f_lo);";
ss.newLine() << ss.intDecl("ii_hi") << " = ii_lo + 1;";
ss.newLine() << ss.floatDecl("f") << " = f_lo - " << ss.intKeyword() << "(f_lo);";

if (dimensionsIndex == GpuShaderDesc::TEXTURE_1D)
{
ss.newLine() << ss.floatDecl("loo") << " = " << ss.sampleTex1D(nameIndex, std::string("(ii_lo + 0.5) / ") + std::to_string(g.gamut_cusp_index_table.total_size)) << ".r;";
ss.newLine() << ss.floatDecl("hii") << " = " << ss.sampleTex1D(nameIndex, std::string("(ii_hi + 0.5) / ") + std::to_string(g.gamut_cusp_index_table.total_size)) << ".r;";
}
else
{
ss.newLine() << ss.floatDecl("loo") << " = " << ss.sampleTex2D(nameIndex, ss.float2Const(std::string("(ii_lo + 0.5) / ") + std::to_string(g.gamut_cusp_index_table.total_size), "0.5")) << ".r;";
ss.newLine() << ss.floatDecl("hii") << " = " << ss.sampleTex2D(nameIndex, ss.float2Const(std::string("(ii_hi + 0.5) / ") + std::to_string(g.gamut_cusp_index_table.total_size), "0.5")) << ".r;";
}

ss.newLine() << ss.intDecl("i_lo") << " = " << ss.intKeyword() << "(" << ss.lerp("loo", "hii", "f") << " * (" << g.gamut_cusp_table.total_size - 1 << "));";
ss.newLine() << ss.intDecl("i_hi") << " = i_lo + 1;";

#else

ss.newLine() << ss.floatDecl("i_lo") << " = 0;";
ss.newLine() << ss.floatDecl("i_hi") << " = " << g.gamut_cusp_table.base_index + g.gamut_cusp_table.size << ";";

Expand Down Expand Up @@ -769,6 +847,8 @@ std::string _Add_Cusp_table(
ss.dedent();
ss.newLine() << "}";

#endif

if (dimensions == GpuShaderDesc::TEXTURE_1D)
{
ss.newLine() << ss.float3Decl("lo") << " = " << ss.sampleTex1D(name, std::string("(i_hi - 1 + 0.5) / ") + std::to_string(g.gamut_cusp_table.total_size)) << ".rgb;";
Expand Down

0 comments on commit 7c9d6d8

Please sign in to comment.