From 7ffc0268dfd78647187554d6248015329e9f5d2d Mon Sep 17 00:00:00 2001 From: paradust7 <102263465+paradust7@users.noreply.github.com> Date: Sat, 11 Jun 2022 11:01:30 -0700 Subject: Inline triLinearInterpolationNoEase and triLinearInterpolation (#12421) Performance profiling on Linux AMD64 showed this to be a significant bottleneck. The non-inlined functions are expensive due to XMM registers spilling onto the stack. --- src/noise.cpp | 89 +++++++++++++++++++---------------------------------------- 1 file changed, 28 insertions(+), 61 deletions(-) (limited to 'src') diff --git a/src/noise.cpp b/src/noise.cpp index d98d4dafb..99624f80d 100644 --- a/src/noise.cpp +++ b/src/noise.cpp @@ -38,15 +38,6 @@ // Unsigned magic seed prevents undefined behavior. #define NOISE_MAGIC_SEED 1013U -typedef float (*Interp2dFxn)( - float v00, float v10, float v01, float v11, - float x, float y); - -typedef float (*Interp3dFxn)( - float v000, float v100, float v010, float v110, - float v001, float v101, float v011, float v111, - float x, float y, float z); - FlagDesc flagdesc_noiseparams[] = { {"defaults", NOISE_FLAG_DEFAULTS}, {"eased", NOISE_FLAG_EASED}, @@ -198,47 +189,34 @@ inline float linearInterpolation(float v0, float v1, float t) inline float biLinearInterpolation( float v00, float v10, float v01, float v11, - float x, float y) -{ - float tx = easeCurve(x); - float ty = easeCurve(y); - float u = linearInterpolation(v00, v10, tx); - float v = linearInterpolation(v01, v11, tx); - return linearInterpolation(u, v, ty); -} - - -inline float biLinearInterpolationNoEase( - float v00, float v10, - float v01, float v11, - float x, float y) + float x, float y, + bool eased) { + // Inlining will optimize this branch out when possible + if (eased) { + x = easeCurve(x); + y = easeCurve(y); + } float u = linearInterpolation(v00, v10, x); float v = linearInterpolation(v01, v11, x); return linearInterpolation(u, v, y); } -float triLinearInterpolation( - float v000, float v100, float v010, float v110, - float v001, float v101, float v011, float v111, - float x, float y, float z) -{ - float tx = easeCurve(x); - float ty = easeCurve(y); - float tz = easeCurve(z); - float u = biLinearInterpolationNoEase(v000, v100, v010, v110, tx, ty); - float v = biLinearInterpolationNoEase(v001, v101, v011, v111, tx, ty); - return linearInterpolation(u, v, tz); -} - -float triLinearInterpolationNoEase( +inline float triLinearInterpolation( float v000, float v100, float v010, float v110, float v001, float v101, float v011, float v111, - float x, float y, float z) + float x, float y, float z, + bool eased) { - float u = biLinearInterpolationNoEase(v000, v100, v010, v110, x, y); - float v = biLinearInterpolationNoEase(v001, v101, v011, v111, x, y); + // Inlining will optimize this branch out when possible + if (eased) { + x = easeCurve(x); + y = easeCurve(y); + z = easeCurve(z); + } + float u = biLinearInterpolation(v000, v100, v010, v110, x, y, false); + float v = biLinearInterpolation(v001, v101, v011, v111, x, y, false); return linearInterpolation(u, v, z); } @@ -256,10 +234,7 @@ float noise2d_gradient(float x, float y, s32 seed, bool eased) float v01 = noise2d(x0, y0+1, seed); float v11 = noise2d(x0+1, y0+1, seed); // Interpolate - if (eased) - return biLinearInterpolation(v00, v10, v01, v11, xl, yl); - - return biLinearInterpolationNoEase(v00, v10, v01, v11, xl, yl); + return biLinearInterpolation(v00, v10, v01, v11, xl, yl, eased); } @@ -283,17 +258,11 @@ float noise3d_gradient(float x, float y, float z, s32 seed, bool eased) float v011 = noise3d(x0, y0 + 1, z0 + 1, seed); float v111 = noise3d(x0 + 1, y0 + 1, z0 + 1, seed); // Interpolate - if (eased) { - return triLinearInterpolation( - v000, v100, v010, v110, - v001, v101, v011, v111, - xl, yl, zl); - } - - return triLinearInterpolationNoEase( + return triLinearInterpolation( v000, v100, v010, v110, v001, v101, v011, v111, - xl, yl, zl); + xl, yl, zl, + eased); } @@ -518,9 +487,6 @@ void Noise::gradientMap2D( s32 x0, y0; bool eased = np.flags & (NOISE_FLAG_DEFAULTS | NOISE_FLAG_EASED); - Interp2dFxn interpolate = eased ? - biLinearInterpolation : biLinearInterpolationNoEase; - x0 = std::floor(x); y0 = std::floor(y); u = x - (float)x0; @@ -547,7 +513,8 @@ void Noise::gradientMap2D( u = orig_u; noisex = 0; for (i = 0; i != sx; i++) { - gradient_buf[index++] = interpolate(v00, v10, v01, v11, u, v); + gradient_buf[index++] = + biLinearInterpolation(v00, v10, v01, v11, u, v, eased); u += step_x; if (u >= 1.0) { @@ -583,8 +550,7 @@ void Noise::gradientMap3D( u32 nlx, nly, nlz; s32 x0, y0, z0; - Interp3dFxn interpolate = (np.flags & NOISE_FLAG_EASED) ? - triLinearInterpolation : triLinearInterpolationNoEase; + bool eased = np.flags & NOISE_FLAG_EASED; x0 = std::floor(x); y0 = std::floor(y); @@ -625,10 +591,11 @@ void Noise::gradientMap3D( u = orig_u; noisex = 0; for (i = 0; i != sx; i++) { - gradient_buf[index++] = interpolate( + gradient_buf[index++] = triLinearInterpolation( v000, v100, v010, v110, v001, v101, v011, v111, - u, v, w); + u, v, w, + eased); u += step_x; if (u >= 1.0) { -- cgit v1.2.3