aboutsummaryrefslogtreecommitdiff
path: root/src/profiler.h
blob: 271ad70c1f1947c0b03dfd575b7ce656a1cb592f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
/*
Minetest
Copyright (C) 2013 celeron55, Perttu Ahola <celeron55@gmail.com>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/

#ifndef PROFILER_HEADER
#define PROFILER_HEADER

#include "irrlichttypes_bloated.h"
#include <string>
#include <jmutex.h>
#include <jmutexautolock.h>
#include <map>
#include "util/timetaker.h"
#include "util/numeric.h" // paging()

/*
	Time profiler
*/

class Profiler
{
public:
	Profiler()
	{
		m_mutex.Init();
	}

	void add(const std::string &name, float value)
	{
		JMutexAutoLock lock(m_mutex);
		{
			/* No average shall have been used; mark add used as -2 */
			std::map<std::string, int>::iterator n = m_avgcounts.find(name);
			if(n == m_avgcounts.end())
				m_avgcounts[name] = -2;
			else{
				if(n->second == -1)
					n->second = -2;
				assert(n->second == -2);
			}
		}
		{
			std::map<std::string, float>::iterator n = m_data.find(name);
			if(n == m_data.end())
				m_data[name] = value;
			else
				n->second += value;
		}
	}

	void avg(const std::string &name, float value)
	{
		JMutexAutoLock lock(m_mutex);
		{
			std::map<std::string, int>::iterator n = m_avgcounts.find(name);
			if(n == m_avgcounts.end())
				m_avgcounts[name] = 1;
			else{
				/* No add shall have been used */
				assert(n->second != -2);
				n->second = (std::max)(n->second, 0) + 1;
			}
		}
		{
			std::map<std::string, float>::iterator n = m_data.find(name);
			if(n == m_data.end())
				m_data[name] = value;
			else
				n->second += value;
		}
	}

	void clear()
	{
		JMutexAutoLock lock(m_mutex);
		for(std::map<std::string, float>::iterator
				i = m_data.begin();
				i != m_data.end(); ++i)
		{
			i->second = 0;
		}
		m_avgcounts.clear();
	}

	void print(std::ostream &o)
	{
		printPage(o, 1, 1);
	}

	void printPage(std::ostream &o, u32 page, u32 pagecount)
	{
		JMutexAutoLock lock(m_mutex);

		u32 minindex, maxindex;
		paging(m_data.size(), page, pagecount, minindex, maxindex);

		for(std::map<std::string, float>::iterator
				i = m_data.begin();
				i != m_data.end(); ++i)
		{
			if(maxindex == 0)
				break;
			maxindex--;

			if(minindex != 0)
			{
				minindex--;
				continue;
			}

			std::string name = i->first;
			int avgcount = 1;
			std::map<std::string, int>::iterator n = m_avgcounts.find(name);
			if(n != m_avgcounts.end()){
				if(n->second >= 1)
					avgcount = n->second;
			}
			o<<"  "<<name<<": ";
			s32 clampsize = 40;
			s32 space = clampsize - name.size();
			for(s32 j=0; j<space; j++)
			{
				if(j%2 == 0 && j < space - 1)
					o<<"-";
				else
					o<<" ";
			}
			o<<(i->second / avgcount);
			o<<std::endl;
		}
	}

	typedef std::map<std::string, float> GraphValues;

	void graphAdd(const std::string &id, float value)
	{
		JMutexAutoLock lock(m_mutex);
		std::map<std::string, float>::iterator i =
				m_graphvalues.find(id);
		if(i == m_graphvalues.end())
			m_graphvalues[id] = value;
		else
			i->second += value;
	}
	void graphGet(GraphValues &result)
	{
		JMutexAutoLock lock(m_mutex);
		result = m_graphvalues;
		m_graphvalues.clear();
	}

private:
	JMutex m_mutex;
	std::map<std::string, float> m_data;
	std::map<std::string, int> m_avgcounts;
	std::map<std::string, float> m_graphvalues;
};

enum ScopeProfilerType{
	SPT_ADD,
	SPT_AVG,
	SPT_GRAPH_ADD
};

class ScopeProfiler
{
public:
	ScopeProfiler(Profiler *profiler, const std::string &name,
			enum ScopeProfilerType type = SPT_ADD):
		m_profiler(profiler),
		m_name(name),
		m_timer(NULL),
		m_type(type)
	{
		if(m_profiler)
			m_timer = new TimeTaker(m_name.c_str());
	}
	// name is copied
	ScopeProfiler(Profiler *profiler, const char *name,
			enum ScopeProfilerType type = SPT_ADD):
		m_profiler(profiler),
		m_name(name),
		m_timer(NULL),
		m_type(type)
	{
		if(m_profiler)
			m_timer = new TimeTaker(m_name.c_str());
	}
	~ScopeProfiler()
	{
		if(m_timer)
		{
			float duration_ms = m_timer->stop(true);
			float duration = duration_ms / 1000.0;
			if(m_profiler){
				switch(m_type){
				case SPT_ADD:
					m_profiler->add(m_name, duration);
					break;
				case SPT_AVG:
					m_profiler->avg(m_name, duration);
					break;
				case SPT_GRAPH_ADD:
					m_profiler->graphAdd(m_name, duration);
					break;
				}
			}
			delete m_timer;
		}
	}
private:
	Profiler *m_profiler;
	std::string m_name;
	TimeTaker *m_timer;
	enum ScopeProfilerType m_type;
};

#endif

pan class="hl opt">, -0.9238, -0.7071, -0.3826, 0, 0.3826, 0.7071, 0.9238 }; FlagDesc flagdesc_noiseparams[] = { {"defaults", NOISE_FLAG_DEFAULTS}, {"eased", NOISE_FLAG_EASED}, {"absvalue", NOISE_FLAG_ABSVALUE}, {"pointbuffer", NOISE_FLAG_POINTBUFFER}, {"simplex", NOISE_FLAG_SIMPLEX}, {NULL, 0} }; /////////////////////////////////////////////////////////////////////////////// PcgRandom::PcgRandom(u64 state, u64 seq) { seed(state, seq); } void PcgRandom::seed(u64 state, u64 seq) { m_state = 0U; m_inc = (seq << 1u) | 1u; next(); m_state += state; next(); } u32 PcgRandom::next() { u64 oldstate = m_state; m_state = oldstate * 6364136223846793005ULL + m_inc; u32 xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u; u32 rot = oldstate >> 59u; return (xorshifted >> rot) | (xorshifted << ((-rot) & 31)); } u32 PcgRandom::range(u32 bound) { // If the bound is 0, we cover the whole RNG's range if (bound == 0) return next(); /* If the bound is not a multiple of the RNG's range, it may cause bias, e.g. a RNG has a range from 0 to 3 and we take want a number 0 to 2. Using rand() % 3, the number 0 would be twice as likely to appear. With a very large RNG range, the effect becomes less prevalent but still present. This can be solved by modifying the range of the RNG to become a multiple of bound by dropping values above the a threshold. In our example, threshold == 4 - 3 = 1 % 3 == 1, so reject 0, thus making the range 3 with no bias. This loop looks dangerous, but will always terminate due to the RNG's property of uniformity. */ u32 threshold = -bound % bound; u32 r; while ((r = next()) < threshold) ; return r % bound; } s32 PcgRandom::range(s32 min, s32 max) { if (max < min) throw PrngException("Invalid range (max < min)"); u32 bound = max - min + 1; return range(bound) + min; } void PcgRandom::bytes(void *out, size_t len) { u8 *outb = (u8 *)out; int bytes_left = 0; u32 r; while (len--) { if (bytes_left == 0) { bytes_left = sizeof(u32); r = next(); } *outb = r & 0xFF; outb++; bytes_left--; r >>= CHAR_BIT; } } s32 PcgRandom::randNormalDist(s32 min, s32 max, int num_trials) { s32 accum = 0; for (int i = 0; i != num_trials; i++) accum += range(min, max); return myround((float)accum / num_trials); } /////////////////////////////////////////////////////////////////////////////// float noise2d(int x, int y, int seed) { unsigned int n = (NOISE_MAGIC_X * x + NOISE_MAGIC_Y * y + NOISE_MAGIC_SEED * seed) & 0x7fffffff; n = (n >> 13) ^ n; n = (n * (n * n * 60493 + 19990303) + 1376312589) & 0x7fffffff; return 1.f - (float)(int)n / 0x40000000; } float noise3d(int x, int y, int z, int seed) { unsigned int n = (NOISE_MAGIC_X * x + NOISE_MAGIC_Y * y + NOISE_MAGIC_Z * z + NOISE_MAGIC_SEED * seed) & 0x7fffffff; n = (n >> 13) ^ n; n = (n * (n * n * 60493 + 19990303) + 1376312589) & 0x7fffffff; return 1.f - (float)(int)n / 0x40000000; } inline float dotProduct(float vx, float vy, float wx, float wy) { return vx * wx + vy * wy; } inline float linearInterpolation(float v0, float v1, float t) { return v0 + (v1 - v0) * t; } inline float biLinearInterpolation( float v00, float v10, float v01, float v11, float x, float y) { float tx = easeCurve(x); float ty = easeCurve(y); float u = linearInterpolation(v00, v10, tx); float v = linearInterpolation(v01, v11, tx); return linearInterpolation(u, v, ty); } inline float biLinearInterpolationNoEase( float v00, float v10, float v01, float v11, float x, float y) { float u = linearInterpolation(v00, v10, x); float v = linearInterpolation(v01, v11, x); return linearInterpolation(u, v, y); } float triLinearInterpolation( float v000, float v100, float v010, float v110, float v001, float v101, float v011, float v111, float x, float y, float z) { float tx = easeCurve(x); float ty = easeCurve(y); float tz = easeCurve(z); float u = biLinearInterpolationNoEase(v000, v100, v010, v110, tx, ty); float v = biLinearInterpolationNoEase(v001, v101, v011, v111, tx, ty); return linearInterpolation(u, v, tz); } float triLinearInterpolationNoEase( float v000, float v100, float v010, float v110, float v001, float v101, float v011, float v111, float x, float y, float z) { float u = biLinearInterpolationNoEase(v000, v100, v010, v110, x, y); float v = biLinearInterpolationNoEase(v001, v101, v011, v111, x, y); return linearInterpolation(u, v, z); } float noise2d_gradient(float x, float y, int seed, bool eased) { // Calculate the integer coordinates int x0 = myfloor(x); int y0 = myfloor(y); // Calculate the remaining part of the coordinates float xl = x - (float)x0; float yl = y - (float)y0; // Get values for corners of square float v00 = noise2d(x0, y0, seed); float v10 = noise2d(x0+1, y0, seed); float v01 = noise2d(x0, y0+1, seed); float v11 = noise2d(x0+1, y0+1, seed); // Interpolate if (eased) return biLinearInterpolation(v00, v10, v01, v11, xl, yl); else return biLinearInterpolationNoEase(v00, v10, v01, v11, xl, yl); } float noise3d_gradient(float x, float y, float z, int seed, bool eased) { // Calculate the integer coordinates int x0 = myfloor(x); int y0 = myfloor(y); int z0 = myfloor(z); // Calculate the remaining part of the coordinates float xl = x - (float)x0; float yl = y - (float)y0; float zl = z - (float)z0; // Get values for corners of cube float v000 = noise3d(x0, y0, z0, seed); float v100 = noise3d(x0 + 1, y0, z0, seed); float v010 = noise3d(x0, y0 + 1, z0, seed); float v110 = noise3d(x0 + 1, y0 + 1, z0, seed); float v001 = noise3d(x0, y0, z0 + 1, seed); float v101 = noise3d(x0 + 1, y0, z0 + 1, seed); float v011 = noise3d(x0, y0 + 1, z0 + 1, seed); float v111 = noise3d(x0 + 1, y0 + 1, z0 + 1, seed); // Interpolate if (eased) { return triLinearInterpolation( v000, v100, v010, v110, v001, v101, v011, v111, xl, yl, zl); } else { return triLinearInterpolationNoEase( v000, v100, v010, v110, v001, v101, v011, v111, xl, yl, zl); } } float noise2d_perlin(float x, float y, int seed, int octaves, float persistence, bool eased) { float a = 0; float f = 1.0; float g = 1.0; for (int i = 0; i < octaves; i++) { a += g * noise2d_gradient(x * f, y * f, seed + i, eased); f *= 2.0; g *= persistence; } return a; } float noise2d_perlin_abs(float x, float y, int seed, int octaves, float persistence, bool eased) { float a = 0; float f = 1.0; float g = 1.0; for (int i = 0; i < octaves; i++) { a += g * fabs(noise2d_gradient(x * f, y * f, seed + i, eased)); f *= 2.0; g *= persistence; } return a; } float noise3d_perlin(float x, float y, float z, int seed, int octaves, float persistence, bool eased) { float a = 0; float f = 1.0; float g = 1.0; for (int i = 0; i < octaves; i++) { a += g * noise3d_gradient(x * f, y * f, z * f, seed + i, eased); f *= 2.0; g *= persistence; } return a; } float noise3d_perlin_abs(float x, float y, float z, int seed, int octaves, float persistence, bool eased) { float a = 0; float f = 1.0; float g = 1.0; for (int i = 0; i < octaves; i++) { a += g * fabs(noise3d_gradient(x * f, y * f, z * f, seed + i, eased)); f *= 2.0; g *= persistence; } return a; } float contour(float v) { v = fabs(v); if (v >= 1.0) return 0.0; return (1.0 - v); } ///////////////////////// [ New noise ] //////////////////////////// float NoisePerlin2D(NoiseParams *np, float x, float y, int seed) { float a = 0; float f = 1.0; float g = 1.0; x /= np->spread.X; y /= np->spread.Y; seed += np->seed; for (size_t i = 0; i < np->octaves; i++) { float noiseval = noise2d_gradient(x * f, y * f, seed + i, np->flags & (NOISE_FLAG_DEFAULTS | NOISE_FLAG_EASED)); if (np->flags & NOISE_FLAG_ABSVALUE) noiseval = fabs(noiseval); a += g * noiseval; f *= np->lacunarity; g *= np->persist; } return np->offset + a * np->scale; } float NoisePerlin3D(NoiseParams *np, float x, float y, float z, int seed) { float a = 0; float f = 1.0; float g = 1.0; x /= np->spread.X; y /= np->spread.Y; z /= np->spread.Z; seed += np->seed; for (size_t i = 0; i < np->octaves; i++) { float noiseval = noise3d_gradient(x * f, y * f, z * f, seed + i, np->flags & NOISE_FLAG_EASED); if (np->flags & NOISE_FLAG_ABSVALUE) noiseval = fabs(noiseval); a += g * noiseval; f *= np->lacunarity; g *= np->persist; } return np->offset + a * np->scale; } Noise::Noise(NoiseParams *np_, int seed, u32 sx, u32 sy, u32 sz) { memcpy(&np, np_, sizeof(np)); this->seed = seed; this->sx = sx; this->sy = sy; this->sz = sz; this->persist_buf = NULL; this->gradient_buf = NULL; this->result = NULL; allocBuffers(); } Noise::~Noise() { delete[] gradient_buf; delete[] persist_buf; delete[] noise_buf; delete[] result; } void Noise::allocBuffers() { if (sx < 1) sx = 1; if (sy < 1) sy = 1; if (sz < 1) sz = 1; this->noise_buf = NULL; resizeNoiseBuf(sz > 1); delete[] gradient_buf; delete[] persist_buf; delete[] result; try { size_t bufsize = sx * sy * sz; this->persist_buf = NULL; this->gradient_buf = new float[bufsize]; this->result = new float[bufsize]; } catch (std::bad_alloc &e) { throw InvalidNoiseParamsException(); } } void Noise::setSize(u32 sx, u32 sy, u32 sz) { this->sx = sx; this->sy = sy; this->sz = sz; allocBuffers(); } void Noise::setSpreadFactor(v3f spread) { this->np.spread = spread; resizeNoiseBuf(sz > 1); } void Noise::setOctaves(int octaves) { this->np.octaves = octaves; resizeNoiseBuf(sz > 1); } void Noise::resizeNoiseBuf(bool is3d) { //maximum possible spread value factor float ofactor = (np.lacunarity > 1.0) ? pow(np.lacunarity, np.octaves - 1) : np.lacunarity; // noise lattice point count // (int)(sz * spread * ofactor) is # of lattice points crossed due to length float num_noise_points_x = sx * ofactor / np.spread.X; float num_noise_points_y = sy * ofactor / np.spread.Y; float num_noise_points_z = sz * ofactor / np.spread.Z; // protect against obviously invalid parameters if (num_noise_points_x > 1000000000.f || num_noise_points_y > 1000000000.f || num_noise_points_z > 1000000000.f) throw InvalidNoiseParamsException(); // + 2 for the two initial endpoints // + 1 for potentially crossing a boundary due to offset size_t nlx = (size_t)ceil(num_noise_points_x) + 3; size_t nly = (size_t)ceil(num_noise_points_y) + 3; size_t nlz = is3d ? (size_t)ceil(num_noise_points_z) + 3 : 1; delete[] noise_buf; try { noise_buf = new float[nlx * nly * nlz]; } catch (std::bad_alloc &e) { throw InvalidNoiseParamsException(); } } /* * NB: This algorithm is not optimal in terms of space complexity. The entire * integer lattice of noise points could be done as 2 lines instead, and for 3D, * 2 lines + 2 planes. * However, this would require the noise calls to be interposed with the * interpolation loops, which may trash the icache, leading to lower overall * performance. * Another optimization that could save half as many noise calls is to carry over * values from the previous noise lattice as midpoints in the new lattice for the * next octave. */ #define idx(x, y) ((y) * nlx + (x)) void Noise::gradientMap2D( float x, float y, float step_x, float step_y, int seed) { float v00, v01, v10, v11, u, v, orig_u; u32 index, i, j, noisex, noisey; u32 nlx, nly; s32 x0, y0; bool eased = np.flags & (NOISE_FLAG_DEFAULTS | NOISE_FLAG_EASED); Interp2dFxn interpolate = eased ? biLinearInterpolation : biLinearInterpolationNoEase; x0 = floor(x); y0 = floor(y); u = x - (float)x0; v = y - (float)y0; orig_u = u; //calculate noise point lattice nlx = (u32)(u + sx * step_x) + 2; nly = (u32)(v + sy * step_y) + 2; index = 0; for (j = 0; j != nly; j++) for (i = 0; i != nlx; i++) noise_buf[index++] = noise2d(x0 + i, y0 + j, seed); //calculate interpolations index = 0; noisey = 0; for (j = 0; j != sy; j++) { v00 = noise_buf[idx(0, noisey)]; v10 = noise_buf[idx(1, noisey)]; v01 = noise_buf[idx(0, noisey + 1)]; v11 = noise_buf[idx(1, noisey + 1)]; u = orig_u; noisex = 0; for (i = 0; i != sx; i++) { gradient_buf[index++] = interpolate(v00, v10, v01, v11, u, v); u += step_x; if (u >= 1.0) { u -= 1.0; noisex++; v00 = v10; v01 = v11; v10 = noise_buf[idx(noisex + 1, noisey)]; v11 = noise_buf[idx(noisex + 1, noisey + 1)]; } } v += step_y; if (v >= 1.0) { v -= 1.0; noisey++; } } } #undef idx #define idx(x, y, z) ((z) * nly * nlx + (y) * nlx + (x)) void Noise::gradientMap3D( float x, float y, float z, float step_x, float step_y, float step_z, int seed) { float v000, v010, v100, v110; float v001, v011, v101, v111; float u, v, w, orig_u, orig_v; u32 index, i, j, k, noisex, noisey, noisez; u32 nlx, nly, nlz; s32 x0, y0, z0; Interp3dFxn interpolate = (np.flags & NOISE_FLAG_EASED) ? triLinearInterpolation : triLinearInterpolationNoEase; x0 = floor(x); y0 = floor(y); z0 = floor(z); u = x - (float)x0; v = y - (float)y0; w = z - (float)z0; orig_u = u; orig_v = v; //calculate noise point lattice nlx = (u32)(u + sx * step_x) + 2; nly = (u32)(v + sy * step_y) + 2; nlz = (u32)(w + sz * step_z) + 2; index = 0; for (k = 0; k != nlz; k++) for (j = 0; j != nly; j++) for (i = 0; i != nlx; i++) noise_buf[index++] = noise3d(x0 + i, y0 + j, z0 + k, seed); //calculate interpolations index = 0; noisey = 0; noisez = 0; for (k = 0; k != sz; k++) { v = orig_v; noisey = 0; for (j = 0; j != sy; j++) {