Implement custom printf function, use it in favour of OS *printf

* Custom printf supports UTF-8 and in fact assumes it implicitly for all strings. This means we can use UTF-8 everywhere, even on windows, without widening parameters to printf() style functions like RDCLOG et al. * It also allows me to add size_t support and binary printing. * Grisu2 is used for converting doubles to strings. Reference: Paper: http://florian.loitsch.com/publications/dtoa-pldi2010.pdf impl: https://github.com/floitsch/double-conversion impl: https://github.com/night-shift/fpconv impl: https://github.com/miloyip/dtoa-benchmark
2026-05-29 13:20:54 +00:00 · 2014-11-22 23:45:33 +00:00
parent 3abdff49a7
commit 9d164826da
7 changed files with 1770 additions and 59 deletions
@@ -26,8 +26,73 @@
 #include "os/os_specific.h"
 #include "common/string_utils.h"

+#include <stdarg.h>
+
 using std::string;

+int utf8printf(char *buf, size_t bufsize, const char *fmt, va_list args);
+
+namespace StringFormat
+{
+
+int snprintf(char *str, size_t bufSize, const char *fmt, ...)
+{
+	va_list args;
+	va_start(args, fmt);
+
+	int ret = StringFormat::vsnprintf(str, bufSize, fmt, args);
+
+	va_end(args);
+
+	return ret;
+}
+
+int vsnprintf(char *str, size_t bufSize, const char *format, va_list args)
+{
+	return ::utf8printf(str, bufSize, format, args);
+}
+
+int Wide2UTF8(wchar_t chr, char mbchr[4])
+{
+	//U+00000 -> U+00007F 1 byte  0xxxxxxx
+	//U+00080 -> U+0007FF 2 bytes 110xxxxx 10xxxxxx
+	//U+00800 -> U+00FFFF 3 bytes 1110xxxx 10xxxxxx 10xxxxxx
+	//U+10000 -> U+1FFFFF 4 bytes 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+
+	if(chr > 0x10FFFF)
+		chr = 0xFFFD; // replacement character
+
+	if(chr <= 0x7f)
+	{
+		mbchr[0] = (char)chr;
+		return 1;
+	}
+	else if(chr <= 0x7ff)
+	{
+		mbchr[1] = 0x80 | (char)(chr & 0x3f);chr >>= 6;
+		mbchr[0] = 0xC0 | (char)(chr & 0x1f);
+		return 2;
+	}
+	else if(chr <= 0xffff)
+	{
+		mbchr[2] = 0x80 | (char)(chr & 0x3f);chr >>= 6;
+		mbchr[1] = 0x80 | (char)(chr & 0x3f);chr >>= 6;
+		mbchr[0] = 0xE0 | (char)(chr & 0x0f);chr >>= 4;
+		return 3;
+	}
+	else
+	{
+		// invalid codepoints above 0x10FFFF were replaced above
+		mbchr[3] = 0x80 | (char)(chr & 0x3f);chr >>= 6;
+		mbchr[2] = 0x80 | (char)(chr & 0x3f);chr >>= 6;
+		mbchr[1] = 0x80 | (char)(chr & 0x3f);chr >>= 6;
+		mbchr[0] = 0xF0 | (char)(chr & 0x07);chr >>= 3;
+		return 4;
+	}
+}
+
+}; // namespace StringFormat
+
 wstring Callstack::AddressDetails::formattedString(const char *commonPath)
 {
 	wchar_t fmt[512] = {0};
@@ -202,19 +202,30 @@ namespace Keyboard
 	bool GetKeyState(int key);
 };

+// implemented per-platform
 namespace StringFormat
 {
-	int snprintf(char *str, size_t bufSize, const char *format, ...);
-	int wsnprintf(wchar_t *str, size_t bufSize, const wchar_t *format, ...);
-	int vsnprintf(char *str, size_t bufSize, const char *format, va_list v);
 	void sntimef(char *str, size_t bufSize, const char *format);
-	void wcsncpy(wchar_t *dst, const wchar_t *src, size_t count);

+	// forwards to vsnprintf below, needed to be here due to va_copy differences
 	string Fmt(const char *format, ...);
-	wstring WFmt(const wchar_t *format, ...);

 	string Wide2UTF8(const wstring &s);
 	wstring UTF82Wide(const string &s);
+
+	// TODO remove
+	int wsnprintf(wchar_t *str, size_t bufSize, const wchar_t *format, ...);
+	wstring WFmt(const wchar_t *format, ...);
+};
+
+// utility functions, implemented in os_specific.cpp, not per-platform (assuming standard stdarg.h)
+// forwarded to custom printf implementation in utf8printf.cpp
+namespace StringFormat
+{
+	int vsnprintf(char *str, size_t bufSize, const char *format, va_list v);
+	int snprintf(char *str, size_t bufSize, const char *format, ...);
+
+	int Wide2UTF8(wchar_t chr, char mbchr[4]);
 };

 namespace OSUtility
@@ -265,71 +265,19 @@ namespace FileIO

 namespace StringFormat
 {
-	int snprintf(char *str, size_t bufSize, const char *fmt, ...)
-	{
-		va_list args;
-		va_start(args, fmt);
-
-		int ret = vsnprintf(str, bufSize, fmt, args);
-
-		va_end(args);
-
-		return ret;
-	}
-	
+	///////////////////////////////////////////////////////////////////////////
 	int wsnprintf(wchar_t *str, size_t bufSize, const wchar_t *format, ...)
 	{
 		va_list args;
 		va_start(args, format);

-		int ret =  ::_vsnwprintf_s(str, bufSize, bufSize, format, args);
+		int ret =  ::_vsnwprintf_s(str, bufSize, bufSize-1, format, args);

 		va_end(args);

 		return ret;
 	}

-	int vsnprintf(char *str, size_t bufSize, const char *format, va_list args)
-	{
-		return ::vsnprintf_s(str, bufSize, bufSize, format, args);
-	}
-
-	void sntimef(char *str, size_t bufSize, const char *format)
-	{
-		time_t tim;
-		time(&tim);
-
-		tm tmv;
-		localtime_s(&tmv, &tim);
-
-		strftime(str, bufSize, format, &tmv);
-	}
-
-	void wcsncpy(wchar_t *dst, const wchar_t *src, size_t count)
-	{
-		::wcsncpy_s(dst, count, src, count);
-	}
-
-	string Fmt(const char *format, ...)
-	{
-		va_list args;
-		va_start(args, format);
-
-		int size = _vscprintf(format, args)+1;
-
-		char *buf = new char[size];
-
-		StringFormat::vsnprintf(buf, size, format, args);
-
-		va_end(args);
-
-		string ret = buf;
-
-		delete[] buf;
-		
-		return ret;
-	}
-
 	wstring WFmt(const wchar_t *format, ...)
 	{
 		va_list args;
@@ -349,6 +297,44 @@ namespace StringFormat
 		
 		return ret;
 	}
+	///////////////////////////////////////////////////////////////////////////
+	
+	void sntimef(char *str, size_t bufSize, const char *format)
+	{
+		time_t tim;
+		time(&tim);
+
+		tm tmv;
+		localtime_s(&tmv, &tim);
+
+		strftime(str, bufSize, format, &tmv);
+	}
+		
+	string Fmt(const char *format, ...)
+	{
+		va_list args;
+		va_start(args, format);
+
+		va_list args2;
+		//va_copy(args2, args); // not implemented on VS2010
+		args2 = args;
+
+		int size = StringFormat::vsnprintf(NULL, 0, format, args2);
+
+		char *buf = new char[size+1];
+		buf[size] = 0;
+
+		StringFormat::vsnprintf(buf, size, format, args);
+
+		va_end(args);
+		va_end(args2);
+
+		string ret = buf;
+
+		delete[] buf;
+		
+		return ret;
+	}

 	// save on reallocation, keep a persistent scratch buffer for conversions
 	vector<char> charBuffer;
@@ -367,7 +367,9 @@
    <ClCompile Include="replay\replay_output.cpp" />
    <ClCompile Include="replay\replay_renderer.cpp" />
    <ClCompile Include="replay\type_helpers.cpp" />
+    <ClCompile Include="serialise\grisu2.cpp" />
    <ClCompile Include="serialise\serialiser.cpp" />
+    <ClCompile Include="serialise\utf8printf.cpp" />
  </ItemGroup>
  <ItemGroup>
    <ResourceCompile Include="data\renderdoc.rc" />
@@ -85,6 +85,9 @@
    <Filter Include="3rdparty\lz4">
      <UniqueIdentifier>{043f5a32-683e-4b56-bcc6-512444b40d70}</UniqueIdentifier>
    </Filter>
+    <Filter Include="Common\Strings">
+      <UniqueIdentifier>{ce0b860f-38b7-48af-b49d-7dcb23378f82}</UniqueIdentifier>
+    </Filter>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="maths\camera.h">
@@ -524,6 +527,12 @@
    <ClCompile Include="driver\d3d11\d3d11_context2_wrap.cpp">
      <Filter>Drivers\D3D11</Filter>
    </ClCompile>
+    <ClCompile Include="serialise\grisu2.cpp">
+      <Filter>Common\Strings</Filter>
+    </ClCompile>
+    <ClCompile Include="serialise\utf8printf.cpp">
+      <Filter>Common\Strings</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <None Include="os\win32\comexport.def">
@@ -0,0 +1,324 @@
+/******************************************************************************
+ * The MIT License (MIT)
+ * 
+ * Copyright (c) 2014 Baldur Karlsson
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ ******************************************************************************/
+
+#include "common/common.h"
+
+#include <math.h>
+
+///////////////////////////////////////////////////////////////////////////
+// Grisu2 implementation (slightly simpler than Grisu3) for converting
+// doubles to strings
+//
+// Sources:
+// Based on Florian Loitsch 2010 "Printing Floating-Point Numbers Quickly
+//                                and Accurately with Integers"
+//     http://florian.loitsch.com/publications/dtoa-pldi2010.pdf
+//     https://github.com/floitsch/double-conversion (BSD licensed)
+//
+// Also implementations by Milo Yip and night-shift used as reference
+//     https://github.com/miloyip/dtoa-benchmark
+//     https://github.com/night-shift/fpconv
+
+struct diy_fp
+{
+	diy_fp() {}
+	diy_fp(uint64_t mant, int exponent) : mantissa(mant), exp(exponent) {}
+	uint64_t mantissa;
+	int exp;
+
+	// q in the paper, bits in the mantissa of the fixed point
+	// approximation
+	static const int bitsq = 64;
+};
+
+// subtract from Florian paper
+diy_fp operator -(const diy_fp &x, const diy_fp &y)
+{
+	// assume same exponent
+	return diy_fp(x.mantissa-y.mantissa, x.exp);
+}
+
+// multiply from Florian paper
+diy_fp operator *(const diy_fp &x, const diy_fp &y)
+{
+	// _a = upper 32 bits, _b = lower 32 bits
+	uint64_t xa = x.mantissa >> 32, xb = x.mantissa & 0xFFFFFFFF;
+	uint64_t ya = y.mantissa >> 32, yb = y.mantissa & 0xFFFFFFFF;
+
+	// perform each pair of multiplies
+	uint64_t upper = xa*ya;
+	uint64_t lower = xb*yb;
+	uint64_t cross1 = xb*ya;
+	uint64_t cross2 = xa*yb;
+
+	uint64_t tmp = (lower>>32) + (cross1&0xFFFFFFFF) + (cross2&0xFFFFFFFF);
+	tmp += 1U << 31; // Round up
+
+	// note - exponent is no longer normalised
+	return diy_fp(upper + (cross1>>32) + (cross2>>32) + (tmp>>32), x.exp + y.exp + 64);
+}
+
+static diy_fp pow10cache[] = {
+	diy_fp(18054884314459144840U, -1220),
+	diy_fp(13451937075301367670U, -1193),
+	diy_fp(10022474136428063862U, -1166),
+	diy_fp(14934650266808366570U, -1140),
+	diy_fp(11127181549972568877U, -1113),
+	diy_fp(16580792590934885855U, -1087),
+	diy_fp(12353653155963782858U, -1060),
+	diy_fp(18408377700990114895U, -1034),
+	diy_fp(13715310171984221708U, -1007),
+	diy_fp(10218702384817765436U, -980),
+	diy_fp(15227053142812498563U, -954),
+	diy_fp(11345038669416679861U, -927),
+	diy_fp(16905424996341287883U, -901),
+	diy_fp(12595523146049147757U, -874),
+	diy_fp(9384396036005875287U,  -847),
+	diy_fp(13983839803942852151U, -821),
+	diy_fp(10418772551374772303U, -794),
+	diy_fp(15525180923007089351U, -768),
+	diy_fp(11567161174868858868U, -741),
+	diy_fp(17236413322193710309U, -715),
+	diy_fp(12842128665889583758U, -688),
+	diy_fp(9568131466127621947U,  -661),
+	diy_fp(14257626930069360058U, -635),
+	diy_fp(10622759856335341974U, -608),
+	diy_fp(15829145694278690180U, -582),
+	diy_fp(11793632577567316726U, -555),
+	diy_fp(17573882009934360870U, -529),
+	diy_fp(13093562431584567480U, -502),
+	diy_fp(9755464219737475723U,  -475),
+	diy_fp(14536774485912137811U, -449),
+	diy_fp(10830740992659433045U, -422),
+	diy_fp(16139061738043178685U, -396),
+	diy_fp(12024538023802026127U, -369),
+	diy_fp(17917957937422433684U, -343),
+	diy_fp(13349918974505688015U, -316),
+	diy_fp(9946464728195732843U,  -289),
+	diy_fp(14821387422376473014U, -263),
+	diy_fp(11042794154864902060U, -236),
+	diy_fp(16455045573212060422U, -210),
+	diy_fp(12259964326927110867U, -183),
+	diy_fp(18268770466636286478U, -157),
+	diy_fp(13611294676837538539U, -130),
+	diy_fp(10141204801825835212U, -103),
+	diy_fp(15111572745182864684U, -77),
+	diy_fp(11258999068426240000U, -50),
+	diy_fp(16777216000000000000U, -24),
+	diy_fp(12500000000000000000U,   3),
+	diy_fp(9313225746154785156U,   30),
+	diy_fp(13877787807814456755U,  56),
+	diy_fp(10339757656912845936U,  83),
+	diy_fp(15407439555097886824U, 109),
+	diy_fp(11479437019748901445U, 136),
+	diy_fp(17105694144590052135U, 162),
+	diy_fp(12744735289059618216U, 189),
+	diy_fp(9495567745759798747U,  216),
+	diy_fp(14149498560666738074U, 242),
+	diy_fp(10542197943230523224U, 269),
+	diy_fp(15709099088952724970U, 295),
+	diy_fp(11704190886730495818U, 322),
+	diy_fp(17440603504673385349U, 348),
+	diy_fp(12994262207056124023U, 375),
+	diy_fp(9681479787123295682U,  402),
+	diy_fp(14426529090290212157U, 428),
+	diy_fp(10748601772107342003U, 455),
+	diy_fp(16016664761464807395U, 481),
+	diy_fp(11933345169920330789U, 508),
+	diy_fp(17782069995880619868U, 534),
+	diy_fp(13248674568444952270U, 561),
+	diy_fp(9871031767461413346U,  588),
+	diy_fp(14708983551653345445U, 614),
+	diy_fp(10959046745042015199U, 641),
+	diy_fp(16330252207878254650U, 667),
+	diy_fp(12166986024289022870U, 694),
+	diy_fp(18130221999122236476U, 720),
+	diy_fp(13508068024458167312U, 747),
+	diy_fp(10064294952495520794U, 774),
+	diy_fp(14996968138956309548U, 800),
+	diy_fp(11173611982879273257U, 827),
+	diy_fp(16649979327439178909U, 853),
+	diy_fp(12405201291620119593U, 880),
+	diy_fp(9242595204427927429U,  907),
+	diy_fp(13772540099066387757U, 933),
+	diy_fp(10261342003245940623U, 960),
+	diy_fp(15290591125556738113U, 986),
+	diy_fp(11392378155556871081U, 1013),
+	diy_fp(16975966327722178521U, 1039),
+	diy_fp(12648080533535911531U, 1066),
+};
+
+static const int firstpow10 = -348; // first cached power of 10
+static const int cachestep = 8; // power of 10 steps between cache items
+
+diy_fp find_cachedpow10(int exp, int& kout)
+{
+	const double inv_log2_10 = 0.30102999566398114;
+	const double alpha = -60.0;
+
+	// k calculation from the paper ceil[ (alpha - exp + q - 1) * 1/log2(10) ]
+	// exponent is shifted by #bits
+	int k = (int)ceil( (alpha - double(exp + diy_fp::bitsq) + diy_fp::bitsq - 1) * inv_log2_10 );
+
+	// determine index in above array
+	int idx = (-firstpow10 + k - 1) / cachestep + 1;
+
+	// output the decimal power that corresponds to this k
+	kout = (firstpow10 + idx * cachestep);
+
+	return pow10cache[idx];
+}
+
+static int gen_digits(const diy_fp &lower, const diy_fp &upper, char *digits, int &kout)
+{
+	diy_fp delta = upper-lower;
+
+	// generate 1.0 to the desired exponent so we can split integer from decimal part
+	diy_fp one(uint64_t(1) << -upper.exp, upper.exp);
+
+	// mask off integer and decimal parts
+	uint64_t intpart = upper.mantissa >> -one.exp;
+	uint64_t decpart = upper.mantissa & (one.mantissa - 1);
+
+	// len is current number of digits produced
+	int len = 0;
+	// kappa is an exponent shift, to account for if we don't produce exactly the number
+	// of digits to reach the decimal place, and there should be extra 0s beyond the produced
+	// digits. (or negative if there should be preceeding 0s)
+	int kappa = 10;
+	uint32_t div = 1000000000; // highest possible pow10 in 32bits = 10^9
+
+	// handle integer component before decimal separator
+	while(kappa > 0)
+	{
+		// get digit at current power of ten
+		uint64_t digit = intpart / div;
+
+		// don't include preceeding 0 digits (so either include if
+		// digit is non-0, or if we've started including digits ie.
+		// len > 0)
+		if(digit || len) digits[len++] = '0' + char(digit);
+
+		// remove this pow10 from the int for future iterations
+		intpart %= div; kappa--; div /= 10;
+
+		// this is our termination condition, when we've produced the number.
+		// delta is the difference between upper and lower, and the left side
+		// is the current remainder after the currently generated digits have
+		// been removed. If that is small enough that we've produced the number,
+		// exit and increment kout to account for the extra exponential
+		if( (intpart << -one.exp) + decpart <= delta.mantissa)
+		{
+			kout += kappa;
+			return len;
+		}
+	}
+
+	// note, after this part if we're still here, intpart is 0 as we've
+	// masked off all digits, so only decpart remains.
+	// Kappa has also reached 0, beyond here it decrements below 0
+
+	// handle decimal portion after separator
+	do
+	{
+		decpart *= 10;
+		uint64_t digit = decpart >> -one.exp;
+
+		// don't include preceeding 0s (see above - note if we've produced
+		// any integer digits at all, len will be > 0)
+		if(digit || len) digits[len++] = '0' + char(digit);
+
+		// remove this pow10 from the decimal part
+		decpart &= (one.mantissa-1); kappa--; delta.mantissa *= 10;
+
+		// stop looping when decpart is lower than delta (see above for termination condition)
+	} while(decpart > delta.mantissa);
+
+	kout += kappa;
+
+	return len;
+}
+
+int grisu2(uint64_t mantissa, int exponent, char digits[18], int &kout)
+{
+	// the IEEE format implicitly has a hidden 1 bit above the mantissa for all normalised
+	// numbers
+	const uint64_t hiddenbit = 0x0010000000000000;
+
+	// exponent is shifted by a further 52 because input exponent is assuming mantissa
+	// is 1.2345678...e exp (fraction)
+	// but grisu2 treats number as
+	//    12345678...e exp-52 (whole number)
+	diy_fp w = diy_fp(mantissa|hiddenbit, exponent - 52);
+	if(exponent == -1023) w.exp = 1 - (1023 + 52); // subnormal exponent
+
+	// we know the w input comes from a double, so is only using the lower 52 bits at
+	// most. We can safely multiply by 2 (and cancel by lowering exponent to match), then
+	// add 1 to get the upper value
+	diy_fp upper;
+	upper.mantissa = (w.mantissa<<1) + 1;
+	upper.exp = w.exp-1;
+
+	diy_fp lower;
+	if(mantissa == 0)
+	{
+		// if mantissa is 0 we are going to underflow, so shift by 2
+		// to maintain precision/normalised value
+		lower.mantissa = (w.mantissa<<2) - 1;
+		lower.exp = w.exp-2;
+	}
+	else
+	{
+		lower.mantissa = (w.mantissa<<1) - 1;
+		lower.exp = w.exp-1;
+	}
+
+	// normalise upper - shift the mantissa until the top bit is set, and decrement the
+	// exponent each time to keep the number the same (1.2e5 and 12e4 are equivalent representations)
+	while((upper.mantissa & 0x8000000000000000) == 0)
+	{
+		upper.mantissa <<= 1;
+		upper.exp--;
+	}
+	// lower needs to be the same exponent as upper so we can calculate delta by upper-lower, so
+	// it is not normalised, but shifted to upper's exponent
+	lower.mantissa <<= (lower.exp - upper.exp);
+	lower.exp = upper.exp;
+
+	int k = 0;
+	diy_fp ck = find_cachedpow10(upper.exp, k);
+
+	lower = lower * ck;
+	upper = upper * ck;
+
+	// squeeze the range in by 1 ULP
+	lower.mantissa++; upper.mantissa--;
+
+	// set our initial exponent. This will be shifted
+	// if we have any preceeding or trailing 0s to get the
+	// final exponent
+	kout = -k;
+
+	return gen_digits(lower, upper, digits, kout);
+}