aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorparadust7 <102263465+paradust7@users.noreply.github.com>2022-06-11 11:01:30 -0700
committerGitHub <noreply@github.com>2022-06-11 20:01:30 +0200
commit7ffc0268dfd78647187554d6248015329e9f5d2d (patch)
treee278298817298a549e227a1bd0e2bb3ff8ee1672 /src
parente7d4ec6834282402c24fe432e0dcebe78b7fcd01 (diff)
downloadminetest-7ffc0268dfd78647187554d6248015329e9f5d2d.tar.gz
minetest-7ffc0268dfd78647187554d6248015329e9f5d2d.tar.bz2
minetest-7ffc0268dfd78647187554d6248015329e9f5d2d.zip
Inline triLinearInterpolationNoEase and triLinearInterpolation (#12421)
Performance profiling on Linux AMD64 showed this to be a significant bottleneck. The non-inlined functions are expensive due to XMM registers spilling onto the stack.
Diffstat (limited to 'src')
-rw-r--r--src/noise.cpp89
1 files changed, 28 insertions, 61 deletions
diff --git a/src/noise.cpp b/src/noise.cpp
index d98d4dafb..99624f80d 100644
--- a/src/noise.cpp
+++ b/src/noise.cpp
@@ -38,15 +38,6 @@
// Unsigned magic seed prevents undefined behavior.
#define NOISE_MAGIC_SEED 1013U
-typedef float (*Interp2dFxn)(
- float v00, float v10, float v01, float v11,
- float x, float y);
-
-typedef float (*Interp3dFxn)(
- float v000, float v100, float v010, float v110,
- float v001, float v101, float v011, float v111,
- float x, float y, float z);
-
FlagDesc flagdesc_noiseparams[] = {
{"defaults", NOISE_FLAG_DEFAULTS},
{"eased", NOISE_FLAG_EASED},
@@ -198,47 +189,34 @@ inline float linearInterpolation(float v0, float v1, float t)
inline float biLinearInterpolation(
float v00, float v10,
float v01, float v11,
- float x, float y)
-{
- float tx = easeCurve(x);
- float ty = easeCurve(y);
- float u = linearInterpolation(v00, v10, tx);
- float v = linearInterpolation(v01, v11, tx);
- return linearInterpolation(u, v, ty);
-}
-
-
-inline float biLinearInterpolationNoEase(
- float v00, float v10,
- float v01, float v11,
- float x, float y)
+ float x, float y,
+ bool eased)
{
+ // Inlining will optimize this branch out when possible
+ if (eased) {
+ x = easeCurve(x);
+ y = easeCurve(y);
+ }
float u = linearInterpolation(v00, v10, x);
float v = linearInterpolation(v01, v11, x);
return linearInterpolation(u, v, y);
}
-float triLinearInterpolation(
- float v000, float v100, float v010, float v110,
- float v001, float v101, float v011, float v111,
- float x, float y, float z)
-{
- float tx = easeCurve(x);
- float ty = easeCurve(y);
- float tz = easeCurve(z);
- float u = biLinearInterpolationNoEase(v000, v100, v010, v110, tx, ty);
- float v = biLinearInterpolationNoEase(v001, v101, v011, v111, tx, ty);
- return linearInterpolation(u, v, tz);
-}
-
-float triLinearInterpolationNoEase(
+inline float triLinearInterpolation(
float v000, float v100, float v010, float v110,
float v001, float v101, float v011, float v111,
- float x, float y, float z)
+ float x, float y, float z,
+ bool eased)
{
- float u = biLinearInterpolationNoEase(v000, v100, v010, v110, x, y);
- float v = biLinearInterpolationNoEase(v001, v101, v011, v111, x, y);
+ // Inlining will optimize this branch out when possible
+ if (eased) {
+ x = easeCurve(x);
+ y = easeCurve(y);
+ z = easeCurve(z);
+ }
+ float u = biLinearInterpolation(v000, v100, v010, v110, x, y, false);
+ float v = biLinearInterpolation(v001, v101, v011, v111, x, y, false);
return linearInterpolation(u, v, z);
}
@@ -256,10 +234,7 @@ float noise2d_gradient(float x, float y, s32 seed, bool eased)
float v01 = noise2d(x0, y0+1, seed);
float v11 = noise2d(x0+1, y0+1, seed);
// Interpolate
- if (eased)
- return biLinearInterpolation(v00, v10, v01, v11, xl, yl);
-
- return biLinearInterpolationNoEase(v00, v10, v01, v11, xl, yl);
+ return biLinearInterpolation(v00, v10, v01, v11, xl, yl, eased);
}
@@ -283,17 +258,11 @@ float noise3d_gradient(float x, float y, float z, s32 seed, bool eased)
float v011 = noise3d(x0, y0 + 1, z0 + 1, seed);
float v111 = noise3d(x0 + 1, y0 + 1, z0 + 1, seed);
// Interpolate
- if (eased) {
- return triLinearInterpolation(
- v000, v100, v010, v110,
- v001, v101, v011, v111,
- xl, yl, zl);
- }
-
- return triLinearInterpolationNoEase(
+ return triLinearInterpolation(
v000, v100, v010, v110,
v001, v101, v011, v111,
- xl, yl, zl);
+ xl, yl, zl,
+ eased);
}
@@ -518,9 +487,6 @@ void Noise::gradientMap2D(
s32 x0, y0;
bool eased = np.flags & (NOISE_FLAG_DEFAULTS | NOISE_FLAG_EASED);
- Interp2dFxn interpolate = eased ?
- biLinearInterpolation : biLinearInterpolationNoEase;
-
x0 = std::floor(x);
y0 = std::floor(y);
u = x - (float)x0;
@@ -547,7 +513,8 @@ void Noise::gradientMap2D(
u = orig_u;
noisex = 0;
for (i = 0; i != sx; i++) {
- gradient_buf[index++] = interpolate(v00, v10, v01, v11, u, v);
+ gradient_buf[index++] =
+ biLinearInterpolation(v00, v10, v01, v11, u, v, eased);
u += step_x;
if (u >= 1.0) {
@@ -583,8 +550,7 @@ void Noise::gradientMap3D(
u32 nlx, nly, nlz;
s32 x0, y0, z0;
- Interp3dFxn interpolate = (np.flags & NOISE_FLAG_EASED) ?
- triLinearInterpolation : triLinearInterpolationNoEase;
+ bool eased = np.flags & NOISE_FLAG_EASED;
x0 = std::floor(x);
y0 = std::floor(y);
@@ -625,10 +591,11 @@ void Noise::gradientMap3D(
u = orig_u;
noisex = 0;
for (i = 0; i != sx; i++) {
- gradient_buf[index++] = interpolate(
+ gradient_buf[index++] = triLinearInterpolation(
v000, v100, v010, v110,
v001, v101, v011, v111,
- u, v, w);
+ u, v, w,
+ eased);
u += step_x;
if (u >= 1.0) {