From 9d164826da2e0ac7f380613c1aa195862759b084 Mon Sep 17 00:00:00 2001
From: baldurk <baldurk@baldurk.org>
Date: Sat, 22 Nov 2014 23:45:33 +0000
Subject: [PATCH] Implement custom printf function, use it in favour of OS
 *printf

* Custom printf supports UTF-8 and in fact assumes it implicitly for all
  strings. This means we can use UTF-8 everywhere, even on windows, without
  widening parameters to printf() style functions like RDCLOG et al.
* It also allows me to add size_t support and binary printing.
* Grisu2 is used for converting doubles to strings. Reference:

Paper: http://florian.loitsch.com/publications/dtoa-pldi2010.pdf
impl: https://github.com/floitsch/double-conversion
impl: https://github.com/night-shift/fpconv
impl: https://github.com/miloyip/dtoa-benchmark
---
 renderdoc/os/os_specific.cpp          |   65 ++
 renderdoc/os/os_specific.h            |   21 +-
 renderdoc/os/win32/win32_stringio.cpp |   94 +-
 renderdoc/renderdoc.vcxproj           |    2 +
 renderdoc/renderdoc.vcxproj.filters   |    9 +
 renderdoc/serialise/grisu2.cpp        |  324 ++++++
 renderdoc/serialise/utf8printf.cpp    | 1314 +++++++++++++++++++++++++
 7 files changed, 1770 insertions(+), 59 deletions(-)
 create mode 100644 renderdoc/serialise/grisu2.cpp
 create mode 100644 renderdoc/serialise/utf8printf.cpp

diff --git a/renderdoc/os/os_specific.cpp b/renderdoc/os/os_specific.cpp
index 1e95e6670..14bdd57ef 100644
--- a/renderdoc/os/os_specific.cpp
+++ b/renderdoc/os/os_specific.cpp
@@ -26,8 +26,73 @@
 #include "os/os_specific.h"
 #include "common/string_utils.h"
 
+#include <stdarg.h>
+
 using std::string;
 
+int utf8printf(char *buf, size_t bufsize, const char *fmt, va_list args);
+
+namespace StringFormat
+{
+
+int snprintf(char *str, size_t bufSize, const char *fmt, ...)
+{
+	va_list args;
+	va_start(args, fmt);
+
+	int ret = StringFormat::vsnprintf(str, bufSize, fmt, args);
+
+	va_end(args);
+
+	return ret;
+}
+
+int vsnprintf(char *str, size_t bufSize, const char *format, va_list args)
+{
+	return ::utf8printf(str, bufSize, format, args);
+}
+
+int Wide2UTF8(wchar_t chr, char mbchr[4])
+{
+	//U+00000 -> U+00007F 1 byte  0xxxxxxx
+	//U+00080 -> U+0007FF 2 bytes 110xxxxx 10xxxxxx
+	//U+00800 -> U+00FFFF 3 bytes 1110xxxx 10xxxxxx 10xxxxxx
+	//U+10000 -> U+1FFFFF 4 bytes 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+
+	if(chr > 0x10FFFF)
+		chr = 0xFFFD; // replacement character
+
+	if(chr <= 0x7f)
+	{
+		mbchr[0] = (char)chr;
+		return 1;
+	}
+	else if(chr <= 0x7ff)
+	{
+		mbchr[1] = 0x80 | (char)(chr & 0x3f);chr >>= 6;
+		mbchr[0] = 0xC0 | (char)(chr & 0x1f);
+		return 2;
+	}
+	else if(chr <= 0xffff)
+	{
+		mbchr[2] = 0x80 | (char)(chr & 0x3f);chr >>= 6;
+		mbchr[1] = 0x80 | (char)(chr & 0x3f);chr >>= 6;
+		mbchr[0] = 0xE0 | (char)(chr & 0x0f);chr >>= 4;
+		return 3;
+	}
+	else
+	{
+		// invalid codepoints above 0x10FFFF were replaced above
+		mbchr[3] = 0x80 | (char)(chr & 0x3f);chr >>= 6;
+		mbchr[2] = 0x80 | (char)(chr & 0x3f);chr >>= 6;
+		mbchr[1] = 0x80 | (char)(chr & 0x3f);chr >>= 6;
+		mbchr[0] = 0xF0 | (char)(chr & 0x07);chr >>= 3;
+		return 4;
+	}
+}
+
+}; // namespace StringFormat
+
 wstring Callstack::AddressDetails::formattedString(const char *commonPath)
 {
 	wchar_t fmt[512] = {0};
diff --git a/renderdoc/os/os_specific.h b/renderdoc/os/os_specific.h
index 9632731ff..5b2591295 100644
--- a/renderdoc/os/os_specific.h
+++ b/renderdoc/os/os_specific.h
@@ -202,19 +202,30 @@ namespace Keyboard
 	bool GetKeyState(int key);
 };
 
+// implemented per-platform
 namespace StringFormat
 {
-	int snprintf(char *str, size_t bufSize, const char *format, ...);
-	int wsnprintf(wchar_t *str, size_t bufSize, const wchar_t *format, ...);
-	int vsnprintf(char *str, size_t bufSize, const char *format, va_list v);
 	void sntimef(char *str, size_t bufSize, const char *format);
-	void wcsncpy(wchar_t *dst, const wchar_t *src, size_t count);
 
+	// forwards to vsnprintf below, needed to be here due to va_copy differences
 	string Fmt(const char *format, ...);
-	wstring WFmt(const wchar_t *format, ...);
 
 	string Wide2UTF8(const wstring &s);
 	wstring UTF82Wide(const string &s);
+
+	// TODO remove
+	int wsnprintf(wchar_t *str, size_t bufSize, const wchar_t *format, ...);
+	wstring WFmt(const wchar_t *format, ...);
+};
+
+// utility functions, implemented in os_specific.cpp, not per-platform (assuming standard stdarg.h)
+// forwarded to custom printf implementation in utf8printf.cpp
+namespace StringFormat
+{
+	int vsnprintf(char *str, size_t bufSize, const char *format, va_list v);
+	int snprintf(char *str, size_t bufSize, const char *format, ...);
+
+	int Wide2UTF8(wchar_t chr, char mbchr[4]);
 };
 
 namespace OSUtility
diff --git a/renderdoc/os/win32/win32_stringio.cpp b/renderdoc/os/win32/win32_stringio.cpp
index 2d0ff89e4..2c75be61a 100644
--- a/renderdoc/os/win32/win32_stringio.cpp
+++ b/renderdoc/os/win32/win32_stringio.cpp
@@ -265,71 +265,19 @@ namespace FileIO
 
 namespace StringFormat
 {
-	int snprintf(char *str, size_t bufSize, const char *fmt, ...)
-	{
-		va_list args;
-		va_start(args, fmt);
-
-		int ret = vsnprintf(str, bufSize, fmt, args);
-
-		va_end(args);
-
-		return ret;
-	}
-	
+	///////////////////////////////////////////////////////////////////////////
 	int wsnprintf(wchar_t *str, size_t bufSize, const wchar_t *format, ...)
 	{
 		va_list args;
 		va_start(args, format);
 
-		int ret =  ::_vsnwprintf_s(str, bufSize, bufSize, format, args);
+		int ret =  ::_vsnwprintf_s(str, bufSize, bufSize-1, format, args);
 
 		va_end(args);
 
 		return ret;
 	}
 
-	int vsnprintf(char *str, size_t bufSize, const char *format, va_list args)
-	{
-		return ::vsnprintf_s(str, bufSize, bufSize, format, args);
-	}
-
-	void sntimef(char *str, size_t bufSize, const char *format)
-	{
-		time_t tim;
-		time(&tim);
-
-		tm tmv;
-		localtime_s(&tmv, &tim);
-
-		strftime(str, bufSize, format, &tmv);
-	}
-
-	void wcsncpy(wchar_t *dst, const wchar_t *src, size_t count)
-	{
-		::wcsncpy_s(dst, count, src, count);
-	}
-
-	string Fmt(const char *format, ...)
-	{
-		va_list args;
-		va_start(args, format);
-
-		int size = _vscprintf(format, args)+1;
-
-		char *buf = new char[size];
-
-		StringFormat::vsnprintf(buf, size, format, args);
-
-		va_end(args);
-
-		string ret = buf;
-
-		delete[] buf;
-		
-		return ret;
-	}
-
 	wstring WFmt(const wchar_t *format, ...)
 	{
 		va_list args;
@@ -349,6 +297,44 @@ namespace StringFormat
 		
 		return ret;
 	}
+	///////////////////////////////////////////////////////////////////////////
+	
+	void sntimef(char *str, size_t bufSize, const char *format)
+	{
+		time_t tim;
+		time(&tim);
+
+		tm tmv;
+		localtime_s(&tmv, &tim);
+
+		strftime(str, bufSize, format, &tmv);
+	}
+		
+	string Fmt(const char *format, ...)
+	{
+		va_list args;
+		va_start(args, format);
+
+		va_list args2;
+		//va_copy(args2, args); // not implemented on VS2010
+		args2 = args;
+
+		int size = StringFormat::vsnprintf(NULL, 0, format, args2);
+
+		char *buf = new char[size+1];
+		buf[size] = 0;
+
+		StringFormat::vsnprintf(buf, size, format, args);
+
+		va_end(args);
+		va_end(args2);
+
+		string ret = buf;
+
+		delete[] buf;
+		
+		return ret;
+	}
 
 	// save on reallocation, keep a persistent scratch buffer for conversions
 	vector<char> charBuffer;
diff --git a/renderdoc/renderdoc.vcxproj b/renderdoc/renderdoc.vcxproj
index 93f5ccabc..a473b5346 100644
--- a/renderdoc/renderdoc.vcxproj
+++ b/renderdoc/renderdoc.vcxproj
@@ -367,7 +367,9 @@
     <ClCompile Include="replay\replay_output.cpp" />
     <ClCompile Include="replay\replay_renderer.cpp" />
     <ClCompile Include="replay\type_helpers.cpp" />
+    <ClCompile Include="serialise\grisu2.cpp" />
     <ClCompile Include="serialise\serialiser.cpp" />
+    <ClCompile Include="serialise\utf8printf.cpp" />
   </ItemGroup>
   <ItemGroup>
     <ResourceCompile Include="data\renderdoc.rc" />
diff --git a/renderdoc/renderdoc.vcxproj.filters b/renderdoc/renderdoc.vcxproj.filters
index e3f943759..86a333156 100644
--- a/renderdoc/renderdoc.vcxproj.filters
+++ b/renderdoc/renderdoc.vcxproj.filters
@@ -85,6 +85,9 @@
     <Filter Include="3rdparty\lz4">
       <UniqueIdentifier>{043f5a32-683e-4b56-bcc6-512444b40d70}</UniqueIdentifier>
     </Filter>
+    <Filter Include="Common\Strings">
+      <UniqueIdentifier>{ce0b860f-38b7-48af-b49d-7dcb23378f82}</UniqueIdentifier>
+    </Filter>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="maths\camera.h">
@@ -524,6 +527,12 @@
     <ClCompile Include="driver\d3d11\d3d11_context2_wrap.cpp">
       <Filter>Drivers\D3D11</Filter>
     </ClCompile>
+    <ClCompile Include="serialise\grisu2.cpp">
+      <Filter>Common\Strings</Filter>
+    </ClCompile>
+    <ClCompile Include="serialise\utf8printf.cpp">
+      <Filter>Common\Strings</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <None Include="os\win32\comexport.def">
diff --git a/renderdoc/serialise/grisu2.cpp b/renderdoc/serialise/grisu2.cpp
new file mode 100644
index 000000000..a016af448
--- /dev/null
+++ b/renderdoc/serialise/grisu2.cpp
@@ -0,0 +1,324 @@
+/******************************************************************************
+ * The MIT License (MIT)
+ * 
+ * Copyright (c) 2014 Baldur Karlsson
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ ******************************************************************************/
+
+#include "common/common.h"
+
+#include <math.h>
+
+///////////////////////////////////////////////////////////////////////////
+// Grisu2 implementation (slightly simpler than Grisu3) for converting
+// doubles to strings
+//
+// Sources:
+// Based on Florian Loitsch 2010 "Printing Floating-Point Numbers Quickly
+//                                and Accurately with Integers"
+//     http://florian.loitsch.com/publications/dtoa-pldi2010.pdf
+//     https://github.com/floitsch/double-conversion (BSD licensed)
+//
+// Also implementations by Milo Yip and night-shift used as reference
+//     https://github.com/miloyip/dtoa-benchmark
+//     https://github.com/night-shift/fpconv
+
+struct diy_fp
+{
+	diy_fp() {}
+	diy_fp(uint64_t mant, int exponent) : mantissa(mant), exp(exponent) {}
+	uint64_t mantissa;
+	int exp;
+
+	// q in the paper, bits in the mantissa of the fixed point
+	// approximation
+	static const int bitsq = 64;
+};
+
+// subtract from Florian paper
+diy_fp operator -(const diy_fp &x, const diy_fp &y)
+{
+	// assume same exponent
+	return diy_fp(x.mantissa-y.mantissa, x.exp);
+}
+
+// multiply from Florian paper
+diy_fp operator *(const diy_fp &x, const diy_fp &y)
+{
+	// _a = upper 32 bits, _b = lower 32 bits
+	uint64_t xa = x.mantissa >> 32, xb = x.mantissa & 0xFFFFFFFF;
+	uint64_t ya = y.mantissa >> 32, yb = y.mantissa & 0xFFFFFFFF;
+
+	// perform each pair of multiplies
+	uint64_t upper = xa*ya;
+	uint64_t lower = xb*yb;
+	uint64_t cross1 = xb*ya;
+	uint64_t cross2 = xa*yb;
+
+	uint64_t tmp = (lower>>32) + (cross1&0xFFFFFFFF) + (cross2&0xFFFFFFFF);
+	tmp += 1U << 31; // Round up
+
+	// note - exponent is no longer normalised
+	return diy_fp(upper + (cross1>>32) + (cross2>>32) + (tmp>>32), x.exp + y.exp + 64);
+}
+
+static diy_fp pow10cache[] = {
+	diy_fp(18054884314459144840U, -1220),
+	diy_fp(13451937075301367670U, -1193),
+	diy_fp(10022474136428063862U, -1166),
+	diy_fp(14934650266808366570U, -1140),
+	diy_fp(11127181549972568877U, -1113),
+	diy_fp(16580792590934885855U, -1087),
+	diy_fp(12353653155963782858U, -1060),
+	diy_fp(18408377700990114895U, -1034),
+	diy_fp(13715310171984221708U, -1007),
+	diy_fp(10218702384817765436U, -980),
+	diy_fp(15227053142812498563U, -954),
+	diy_fp(11345038669416679861U, -927),
+	diy_fp(16905424996341287883U, -901),
+	diy_fp(12595523146049147757U, -874),
+	diy_fp(9384396036005875287U,  -847),
+	diy_fp(13983839803942852151U, -821),
+	diy_fp(10418772551374772303U, -794),
+	diy_fp(15525180923007089351U, -768),
+	diy_fp(11567161174868858868U, -741),
+	diy_fp(17236413322193710309U, -715),
+	diy_fp(12842128665889583758U, -688),
+	diy_fp(9568131466127621947U,  -661),
+	diy_fp(14257626930069360058U, -635),
+	diy_fp(10622759856335341974U, -608),
+	diy_fp(15829145694278690180U, -582),
+	diy_fp(11793632577567316726U, -555),
+	diy_fp(17573882009934360870U, -529),
+	diy_fp(13093562431584567480U, -502),
+	diy_fp(9755464219737475723U,  -475),
+	diy_fp(14536774485912137811U, -449),
+	diy_fp(10830740992659433045U, -422),
+	diy_fp(16139061738043178685U, -396),
+	diy_fp(12024538023802026127U, -369),
+	diy_fp(17917957937422433684U, -343),
+	diy_fp(13349918974505688015U, -316),
+	diy_fp(9946464728195732843U,  -289),
+	diy_fp(14821387422376473014U, -263),
+	diy_fp(11042794154864902060U, -236),
+	diy_fp(16455045573212060422U, -210),
+	diy_fp(12259964326927110867U, -183),
+	diy_fp(18268770466636286478U, -157),
+	diy_fp(13611294676837538539U, -130),
+	diy_fp(10141204801825835212U, -103),
+	diy_fp(15111572745182864684U, -77),
+	diy_fp(11258999068426240000U, -50),
+	diy_fp(16777216000000000000U, -24),
+	diy_fp(12500000000000000000U,   3),
+	diy_fp(9313225746154785156U,   30),
+	diy_fp(13877787807814456755U,  56),
+	diy_fp(10339757656912845936U,  83),
+	diy_fp(15407439555097886824U, 109),
+	diy_fp(11479437019748901445U, 136),
+	diy_fp(17105694144590052135U, 162),
+	diy_fp(12744735289059618216U, 189),
+	diy_fp(9495567745759798747U,  216),
+	diy_fp(14149498560666738074U, 242),
+	diy_fp(10542197943230523224U, 269),
+	diy_fp(15709099088952724970U, 295),
+	diy_fp(11704190886730495818U, 322),
+	diy_fp(17440603504673385349U, 348),
+	diy_fp(12994262207056124023U, 375),
+	diy_fp(9681479787123295682U,  402),
+	diy_fp(14426529090290212157U, 428),
+	diy_fp(10748601772107342003U, 455),
+	diy_fp(16016664761464807395U, 481),
+	diy_fp(11933345169920330789U, 508),
+	diy_fp(17782069995880619868U, 534),
+	diy_fp(13248674568444952270U, 561),
+	diy_fp(9871031767461413346U,  588),
+	diy_fp(14708983551653345445U, 614),
+	diy_fp(10959046745042015199U, 641),
+	diy_fp(16330252207878254650U, 667),
+	diy_fp(12166986024289022870U, 694),
+	diy_fp(18130221999122236476U, 720),
+	diy_fp(13508068024458167312U, 747),
+	diy_fp(10064294952495520794U, 774),
+	diy_fp(14996968138956309548U, 800),
+	diy_fp(11173611982879273257U, 827),
+	diy_fp(16649979327439178909U, 853),
+	diy_fp(12405201291620119593U, 880),
+	diy_fp(9242595204427927429U,  907),
+	diy_fp(13772540099066387757U, 933),
+	diy_fp(10261342003245940623U, 960),
+	diy_fp(15290591125556738113U, 986),
+	diy_fp(11392378155556871081U, 1013),
+	diy_fp(16975966327722178521U, 1039),
+	diy_fp(12648080533535911531U, 1066),
+};
+
+static const int firstpow10 = -348; // first cached power of 10
+static const int cachestep = 8; // power of 10 steps between cache items
+
+diy_fp find_cachedpow10(int exp, int& kout)
+{
+	const double inv_log2_10 = 0.30102999566398114;
+	const double alpha = -60.0;
+
+	// k calculation from the paper ceil[ (alpha - exp + q - 1) * 1/log2(10) ]
+	// exponent is shifted by #bits
+	int k = (int)ceil( (alpha - double(exp + diy_fp::bitsq) + diy_fp::bitsq - 1) * inv_log2_10 );
+
+	// determine index in above array
+	int idx = (-firstpow10 + k - 1) / cachestep + 1;
+
+	// output the decimal power that corresponds to this k
+	kout = (firstpow10 + idx * cachestep);
+
+	return pow10cache[idx];
+}
+
+static int gen_digits(const diy_fp &lower, const diy_fp &upper, char *digits, int &kout)
+{
+	diy_fp delta = upper-lower;
+
+	// generate 1.0 to the desired exponent so we can split integer from decimal part
+	diy_fp one(uint64_t(1) << -upper.exp, upper.exp);
+
+	// mask off integer and decimal parts
+	uint64_t intpart = upper.mantissa >> -one.exp;
+	uint64_t decpart = upper.mantissa & (one.mantissa - 1);
+
+	// len is current number of digits produced
+	int len = 0;
+	// kappa is an exponent shift, to account for if we don't produce exactly the number
+	// of digits to reach the decimal place, and there should be extra 0s beyond the produced
+	// digits. (or negative if there should be preceeding 0s)
+	int kappa = 10;
+	uint32_t div = 1000000000; // highest possible pow10 in 32bits = 10^9
+
+	// handle integer component before decimal separator
+	while(kappa > 0)
+	{
+		// get digit at current power of ten
+		uint64_t digit = intpart / div;
+
+		// don't include preceeding 0 digits (so either include if
+		// digit is non-0, or if we've started including digits ie.
+		// len > 0)
+		if(digit || len) digits[len++] = '0' + char(digit);
+
+		// remove this pow10 from the int for future iterations
+		intpart %= div; kappa--; div /= 10;
+
+		// this is our termination condition, when we've produced the number.
+		// delta is the difference between upper and lower, and the left side
+		// is the current remainder after the currently generated digits have
+		// been removed. If that is small enough that we've produced the number,
+		// exit and increment kout to account for the extra exponential
+		if( (intpart << -one.exp) + decpart <= delta.mantissa)
+		{
+			kout += kappa;
+			return len;
+		}
+	}
+
+	// note, after this part if we're still here, intpart is 0 as we've
+	// masked off all digits, so only decpart remains.
+	// Kappa has also reached 0, beyond here it decrements below 0
+
+	// handle decimal portion after separator
+	do
+	{
+		decpart *= 10;
+		uint64_t digit = decpart >> -one.exp;
+
+		// don't include preceeding 0s (see above - note if we've produced
+		// any integer digits at all, len will be > 0)
+		if(digit || len) digits[len++] = '0' + char(digit);
+
+		// remove this pow10 from the decimal part
+		decpart &= (one.mantissa-1); kappa--; delta.mantissa *= 10;
+
+		// stop looping when decpart is lower than delta (see above for termination condition)
+	} while(decpart > delta.mantissa);
+
+	kout += kappa;
+
+	return len;
+}
+
+int grisu2(uint64_t mantissa, int exponent, char digits[18], int &kout)
+{
+	// the IEEE format implicitly has a hidden 1 bit above the mantissa for all normalised
+	// numbers
+	const uint64_t hiddenbit = 0x0010000000000000;
+
+	// exponent is shifted by a further 52 because input exponent is assuming mantissa
+	// is 1.2345678...e exp (fraction)
+	// but grisu2 treats number as
+	//    12345678...e exp-52 (whole number)
+	diy_fp w = diy_fp(mantissa|hiddenbit, exponent - 52);
+	if(exponent == -1023) w.exp = 1 - (1023 + 52); // subnormal exponent
+
+	// we know the w input comes from a double, so is only using the lower 52 bits at
+	// most. We can safely multiply by 2 (and cancel by lowering exponent to match), then
+	// add 1 to get the upper value
+	diy_fp upper;
+	upper.mantissa = (w.mantissa<<1) + 1;
+	upper.exp = w.exp-1;
+
+	diy_fp lower;
+	if(mantissa == 0)
+	{
+		// if mantissa is 0 we are going to underflow, so shift by 2
+		// to maintain precision/normalised value
+		lower.mantissa = (w.mantissa<<2) - 1;
+		lower.exp = w.exp-2;
+	}
+	else
+	{
+		lower.mantissa = (w.mantissa<<1) - 1;
+		lower.exp = w.exp-1;
+	}
+
+	// normalise upper - shift the mantissa until the top bit is set, and decrement the
+	// exponent each time to keep the number the same (1.2e5 and 12e4 are equivalent representations)
+	while((upper.mantissa & 0x8000000000000000) == 0)
+	{
+		upper.mantissa <<= 1;
+		upper.exp--;
+	}
+	// lower needs to be the same exponent as upper so we can calculate delta by upper-lower, so
+	// it is not normalised, but shifted to upper's exponent
+	lower.mantissa <<= (lower.exp - upper.exp);
+	lower.exp = upper.exp;
+
+	int k = 0;
+	diy_fp ck = find_cachedpow10(upper.exp, k);
+
+	lower = lower * ck;
+	upper = upper * ck;
+
+	// squeeze the range in by 1 ULP
+	lower.mantissa++; upper.mantissa--;
+
+	// set our initial exponent. This will be shifted
+	// if we have any preceeding or trailing 0s to get the
+	// final exponent
+	kout = -k;
+
+	return gen_digits(lower, upper, digits, kout);
+}
diff --git a/renderdoc/serialise/utf8printf.cpp b/renderdoc/serialise/utf8printf.cpp
new file mode 100644
index 000000000..bf0fe3d83
--- /dev/null
+++ b/renderdoc/serialise/utf8printf.cpp
@@ -0,0 +1,1314 @@
+/******************************************************************************
+ * The MIT License (MIT)
+ * 
+ * Copyright (c) 2014 Baldur Karlsson
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ ******************************************************************************/
+
+#include "common/common.h"
+
+// grisu2 double-to-string function, returns number of digits written to digits array
+int grisu2(uint64_t mantissa, int exponent, char digits[18], int &kout);
+
+///////////////////////////////////////////////////////////////////////////////
+// functions for appending to output (handling running out of buffer space)
+
+void addchar(char *&output, size_t &actualsize, char *end, char c)
+{
+	actualsize++;
+
+	if(output == end) return;
+
+	*(output++) = c;
+}
+
+void addchars(char *&output, size_t &actualsize, char *end, size_t num, char c)
+{
+	actualsize += num;
+	for(size_t i=0; output != end && i < num; i++)
+		*(output++) = c;
+}
+
+void appendstring(char *&output, size_t &actualsize, char *end, const char *str, size_t len)
+{
+	for(size_t i=0; i < len; i++)
+	{
+		if(str[i] == 0) return;
+
+		actualsize++;
+		if(output != end)
+			*(output++) = str[i];
+	}
+}
+
+void appendstring(char *&output, size_t &actualsize, char *end, const char *str)
+{
+	for(size_t i=0; *str; i++)
+	{
+		actualsize++;
+		if(output != end)
+			*(output++) = *str;
+		str++;
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// Flags and general formatting parameters
+
+enum FormatterFlags
+{
+	LeftJustify = 0x1,
+	PrependPos = 0x2,
+	PrependSpace = 0x4,
+	AlternateForm = 0x8,
+	PadZeroes = 0x10,
+};
+
+enum LengthModifier
+{
+	None,
+	HalfHalf,
+	Half,
+	Long,
+	LongLong,
+	SizeT,
+};
+
+struct FormatterParams
+{
+	FormatterParams()
+		: Flags(0), Width(NoWidth), Precision(NoPrecision), Length(None)
+	{}
+	int Flags;
+	int Width;
+	int Precision;
+	LengthModifier Length;
+	
+	static const int NoWidth = -1; // can't set negative width, so -1 indicates no width specified
+	static const int NoPrecision = -1; // can't set negative precision, so -1 indicates no precision specified
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// Print a number in a specified base (16, 8, 10 or 2 supported)
+
+void PrintInteger(bool typeUnsigned, uint64_t argu, int base, uint64_t numbits,
+                 FormatterParams formatter, bool uppercaseDigits, char *&output, size_t &actualsize, char *end)
+{
+	int64_t argi = 0;
+	
+	// cast the appropriate size to signed version
+	switch(formatter.Length)
+	{
+		default:
+		case None:
+		case Long:
+			argi = (int64_t)*(signed int*)&argu;
+			break;
+		case HalfHalf:
+			argi = (int64_t)*(signed char*)&argu;
+			break;
+		case Half:
+			argi = (int64_t)*(signed short*)&argu;
+			break;
+		case LongLong:
+			argi = (int64_t)*(signed long long*)&argu;
+			break;
+	}
+
+	bool negative = false;
+	if(base == 10 && !typeUnsigned)
+	{
+		negative = argi < 0;
+	}
+
+	int digwidth = 0;
+	int numPad0s = 0;
+	int numPadWidth = 0;
+	{
+		int intwidth = 0;
+		int digits = 0;
+
+		// work out the number of decimal digits in the integer
+		if(!negative)
+		{
+			uint64_t accum = argu;
+			while(accum)
+			{
+				digits += 1;
+				accum /= base;
+			}
+		}
+		else
+		{
+			int64_t accum = argi;
+			while(accum)
+			{
+				digits += 1;
+				accum /= base;
+			}
+		}
+
+		intwidth = digwidth = RDCMAX(1, digits);
+
+		// printed int is 2 chars larger for 0x or 0b, and 1 char for 0 (octal)
+		if(base == 16 || base == 2)
+			intwidth += formatter.Flags & AlternateForm ? 2 : 0;
+		if(base == 8)
+			intwidth += formatter.Flags & AlternateForm ? 1 : 0;
+
+		if(formatter.Precision != FormatterParams::NoPrecision && formatter.Precision > intwidth)
+			numPad0s = formatter.Precision - intwidth;
+
+		intwidth += numPad0s;
+
+		// for decimal we can have a negative sign (or placeholder)
+		if(base == 10)
+		{
+			if(negative)
+				intwidth++;
+			else if(formatter.Flags & (PrependPos|PrependSpace))
+				intwidth++;
+		}
+
+		if(formatter.Width != FormatterParams::NoWidth && formatter.Width > intwidth)
+			numPadWidth = formatter.Width - intwidth;
+	}
+
+	// pad with spaces if necessary
+	if((formatter.Flags & (LeftJustify|PadZeroes)) == 0 && numPadWidth > 0)
+		addchars(output, actualsize, end, (size_t)numPadWidth, ' ');
+
+	if(base == 16)
+	{
+		if(formatter.Flags & AlternateForm)
+		{
+			if(uppercaseDigits)
+				appendstring(output, actualsize, end, "0X");
+			else
+				appendstring(output, actualsize, end, "0x");
+		}
+
+		// pad with 0s as appropriate
+		if((formatter.Flags & (LeftJustify|PadZeroes)) == PadZeroes && numPadWidth > 0)
+			addchars(output, actualsize, end, (size_t)numPadWidth, '0');
+		if(numPad0s > 0)
+			addchars(output, actualsize, end, (size_t)numPad0s, '0');
+
+		bool left0s = true;
+
+		// mask off each hex digit and print
+		for(uint64_t i=0; i < numbits; i+=4)
+		{
+			uint64_t shift = numbits-4-i;
+			uint64_t mask = 0xfULL << shift;
+			char digit = char((argu & mask) >> shift);
+			if(digit == 0 && left0s && i+4 < numbits) continue;
+			left0s = false;
+
+			if(digit < 10)
+				addchar(output, actualsize, end, '0' + digit);
+			else if(uppercaseDigits)
+				addchar(output, actualsize, end, 'A' + digit - 10);
+			else
+				addchar(output, actualsize, end, 'a' + digit - 10);
+		}
+	}
+	else if(base == 8)
+	{
+		if(formatter.Flags & AlternateForm)
+			appendstring(output, actualsize, end, "0");
+
+		if((formatter.Flags & (LeftJustify|PadZeroes)) == PadZeroes && numPadWidth > 0)
+			addchars(output, actualsize, end, (size_t)numPadWidth, '0');
+		if(numPad0s > 0)
+			addchars(output, actualsize, end, (size_t)numPad0s, '0');
+
+		// octal digits don't quite fit into typical integer sizes,
+		// so instead we pretend the number is a little bigger, then
+		// the shift just fills out the upper bits with 0s.
+		uint64_t offs = 0;
+		if(numbits % 3 == 1) offs = 2;
+		if(numbits % 3 == 2) offs = 1;
+
+		bool left0s = true;
+
+		for(uint64_t i=0; i < numbits; i+=3)
+		{
+			uint64_t shift = numbits-3-i+offs;
+			uint64_t mask = 0x7ULL << shift;
+
+			char digit = char((argu & mask) >> shift);
+			if(digit == 0 && left0s && i+3 < numbits) continue;
+			left0s = false;
+
+			addchar(output, actualsize, end, '0' + digit);
+		}
+	}
+	else if(base == 2)
+	{
+		if(formatter.Flags & AlternateForm)
+		{
+			if(uppercaseDigits)
+				appendstring(output, actualsize, end, "0B");
+			else
+				appendstring(output, actualsize, end, "0b");
+		}
+
+		if((formatter.Flags & (LeftJustify|PadZeroes)) == PadZeroes && numPadWidth > 0)
+			addchars(output, actualsize, end, (size_t)numPadWidth, '0');
+		if(numPad0s > 0)
+			addchars(output, actualsize, end, (size_t)numPad0s, '0');
+
+		bool left0s = true;
+
+		for(uint64_t i=0; i < numbits; i++)
+		{
+			uint64_t shift = numbits-1-i;
+			uint64_t mask = 0x1ULL << shift;
+			char digit = char((argu & mask) >> shift);
+			if(digit == 0 && left0s && i+1 < numbits) continue;
+			left0s = false;
+
+			addchar(output, actualsize, end, '0' + digit);
+		}
+	}
+	else
+	{
+		bool left0s = true;
+
+		// buffer large enough for any int (up to 64bit unsigned)
+		char intbuf[32] = {0};
+
+		// handle edge case of INT_MIN so we can negate the number and be sure we
+		// won't actualsize
+		if(argu == 0x8000000000000000)
+		{
+			addchar(output, actualsize, end, '-');
+			if((formatter.Flags & (LeftJustify|PadZeroes)) == PadZeroes && numPadWidth > 0)
+				addchars(output, actualsize, end, (size_t)numPadWidth, '0');
+			if(numPad0s > 0)
+				addchars(output, actualsize, end, (size_t)numPad0s, '0');
+			appendstring(output, actualsize, end, "9223372036854775808");
+		}
+		else
+		{
+			// we know we can negate without loss of precision because we handled 64bit INT_MIN above
+			if(negative)
+			{
+				addchar(output, actualsize, end, '-');
+				argi = -argi;
+			}
+			else if(formatter.Flags & PrependPos)
+				addchar(output, actualsize, end, '+');
+			else if(formatter.Flags & PrependSpace)
+				addchar(output, actualsize, end, ' ');
+
+			if((formatter.Flags & (LeftJustify|PadZeroes)) == PadZeroes && numPadWidth > 0)
+				addchars(output, actualsize, end, (size_t)numPadWidth, '0');
+			if(numPad0s > 0)
+				addchars(output, actualsize, end, (size_t)numPad0s, '0');
+
+			if(typeUnsigned)
+			{
+				uint64_t accum = argu;
+				for(int i=0; i < digwidth; i++)
+				{
+					int digit = accum%10;
+					accum /= 10;
+
+					intbuf[digwidth-1-i] = char('0' + digit);
+				}
+			}
+			else
+			{
+				int64_t accum = argi;
+				for(int i=0; i < digwidth; i++)
+				{
+					int digit = accum%10;
+					accum /= 10;
+
+					intbuf[digwidth-1-i] = char('0' + digit);
+				}
+			}
+
+			char *istr = intbuf;
+			while(*istr == '0') istr++;
+
+			if(*istr == 0 && istr > intbuf) istr--;
+
+			appendstring(output, actualsize, end, istr);
+		}
+	}
+
+	// if we were left justifying, pad on the right with spaces
+	if((formatter.Flags & LeftJustify) && numPadWidth > 0)
+	{
+		addchars(output, actualsize, end, (size_t)numPadWidth, ' ');
+	}
+}
+
+void PrintFloat0(bool e, bool f, FormatterParams formatter, char prepend,
+                 char *&output, size_t &actualsize, char *end)
+{
+	int numwidth = 0;
+
+	if(e)
+		numwidth = formatter.Precision+1+5; // 0 plus precision plus e+000
+	else if(f || formatter.Flags & AlternateForm)
+		numwidth = formatter.Precision+1;   // 0 plus precision
+	else
+		numwidth = 1;
+
+	// alternate form means . is included even if no digits after .
+	if(((e || f) && formatter.Precision > 0) || (formatter.Flags & AlternateForm))
+		numwidth++; // .
+
+	// sign space
+	if(prepend)	numwidth++;
+
+	int padlen = 0;
+
+	if(formatter.Width != FormatterParams::NoWidth && formatter.Width > numwidth)
+		padlen = formatter.Width - numwidth;
+
+	if(formatter.Flags & PadZeroes)
+	{
+		if(prepend) addchar(output, actualsize, end, prepend);
+		addchars(output, actualsize, end, size_t(padlen), '0');
+	}
+	else if(padlen > 0 && (formatter.Flags & LeftJustify) == 0)
+	{
+		addchars(output, actualsize, end, size_t(padlen), ' ');
+		if(prepend) addchar(output, actualsize, end, prepend);
+	}
+	else
+	{
+		if(prepend) addchar(output, actualsize, end, prepend);
+	}
+
+	// print a .0 for all cases except non-alternate %g
+	if(e || f || formatter.Flags & AlternateForm)
+	{
+		addchar(output, actualsize, end, '0');
+		if(formatter.Precision > 0 || (formatter.Flags & AlternateForm))
+			addchar(output, actualsize, end, '.');
+		addchars(output, actualsize, end, size_t(formatter.Precision), '0');
+
+		if(e)
+			appendstring(output, actualsize, end, "e+000");
+	}
+	else
+	{
+		addchar(output, actualsize, end, '0');
+	}
+
+	if(padlen > 0 && (formatter.Flags & LeftJustify))
+	{
+		addchars(output, actualsize, end, size_t(padlen), ' ');
+	}
+}
+
+void PrintFloat(double argd, FormatterParams &formatter, bool e, bool f, bool g, bool uppercaseDigits,
+                char *& output, size_t &actualsize, char *end)
+{
+	// extract the pieces out of the double
+	uint64_t *arg64 = (uint64_t *)&argd;
+	bool signbit = (*arg64 & 0x8000000000000000) ? true : false;
+	uint64_t rawexp = (*arg64 & 0x7ff0000000000000) >> 52;
+	int exponent = int(rawexp) - 1023;
+	uint64_t mantissa = (*arg64 & 0x000fffffffffffff);
+
+	char prepend = '\0';
+
+	if(signbit)
+		prepend = '-';
+	else if(formatter.Flags & PrependPos)
+		prepend = '+';
+	else if(formatter.Flags & PrependSpace)
+		prepend = ' ';
+
+	// special-case handling of printing 0
+	if(rawexp == 0 && mantissa == 0)
+	{
+		PrintFloat0(e, f, formatter, prepend, output, actualsize, end);
+	}
+	// handle 'special' values, inf and nan
+	else if(rawexp == 0x7ff)
+	{
+		if(mantissa == 0)
+		{
+			if(signbit)
+				appendstring(output, actualsize, end, uppercaseDigits ? "-INF" : "-inf");
+			else
+				appendstring(output, actualsize, end, uppercaseDigits ? "+INF" : "-inf");
+		}
+		else
+		{
+			appendstring(output, actualsize, end, uppercaseDigits ? "NAN" : "nan");
+		}
+	}
+	else
+	{
+		// call out to grisu2 to generate digits + exponent
+		char digits[18] = {0};
+
+		int K = 0;
+		int ndigits = grisu2(mantissa, exponent, digits, K);
+
+		// this is the decimal exponent (ie. 0 if the digits are 1.2345)
+		int expon = K + ndigits - 1;
+
+		// number of digits after the decimal
+		int decdigits = ndigits - expon - 1;
+
+		// for exponential form, this is always 1 less than the total number of digits
+		if(e) decdigits = RDCMAX(0, ndigits-1);
+
+		// see if we need to trim some digits (for %g, the precision is the number of
+		// significant figures which is just ndigits at the moment, will be padded with 0s
+		// later).
+		if(decdigits > formatter.Precision || (g && ndigits > formatter.Precision))
+		{
+			int removedigs = decdigits - formatter.Precision;
+
+			if(g) removedigs = RDCMAX(0, ndigits - formatter.Precision);
+
+			// if we're removing all digits, just check the first to see if it should be
+			// rounded up or down
+			if(removedigs == ndigits)
+			{
+				ndigits = 1;
+				if(digits[0] < '5')
+				{
+					digits[0] = '0';
+				}
+				else
+				{
+					// round up to "1" on the next exponent
+					digits[0] = '1';
+					expon++;
+				}
+			}
+			else
+			{
+				// remove the specified number of digits
+				ndigits -= removedigs;
+
+				// round up the last digit (continually rolling up if necessary)
+				// note this will look 'ahead' into the last removed digits at first
+				bool carry = true;
+				for(int i=ndigits-1; i >= 0; i--)
+				{
+					// should we round up?
+					if(digits[i+1] >= '5')
+					{
+						digits[i+1] = 0;
+
+						// unless current digit is a 9, we can just increment it and stop
+						if(digits[i] < '9')
+						{
+							digits[i]++;
+							carry = false;
+							break;
+						}
+
+						// continue (carry to next digit)
+					}
+					else
+					{
+						// didn't need to round up, everything's fine.
+						carry = false;
+						break;
+					}
+
+					// trim off a digit (was a 9)
+					ndigits--;
+					continue;
+				}
+
+				// we only get here with carry still true if digits are 9999999
+				if(carry)
+				{
+					// round up to "1" on the next exponent
+					ndigits = 1;
+					digits[0] = '1';
+					expon++;
+				}
+			}
+		}
+
+		// recalculate decimal digits with new ndigits
+		decdigits = ndigits - expon - 1;
+		if(e) decdigits = RDCMAX(0, ndigits-1);
+
+		// number of trailing 0s we need to pad after decimal point determined by
+		// the precision
+		int padtrailing0s = formatter.Precision - RDCMAX(0, decdigits);
+
+		if(g)
+		{
+			// for %g if the exponent is too far out of range, we revert to exponential form
+			if(expon >= formatter.Precision || expon < -4)
+			{
+				e = true;
+
+				// if not alternate form, all trailing 0 digits are removed and there is no padding.
+				if((formatter.Flags & AlternateForm) == 0)
+				{
+					while(ndigits > 1 && digits[ndigits-1] == '0')
+						ndigits--;
+
+					padtrailing0s = 0;
+				}
+				else
+					padtrailing0s = formatter.Precision - RDCMAX(0, ndigits);
+			}
+			else
+			{
+				padtrailing0s = formatter.Precision - RDCMAX(0, ndigits);
+			}
+		}
+
+		// exponential display
+		if(e)
+		{
+			int numwidth = 0;
+
+			// first calculate the width of the produced output, so we can calculate any padding
+
+			numwidth = ndigits; // digits
+			if(ndigits > 1 || (formatter.Flags & AlternateForm) || padtrailing0s > 0)
+				numwidth++; // '.'
+			numwidth += padtrailing0s;
+			numwidth += 2; // 'e+' or 'e-'
+			if(expon >= 1000 || expon <= -1000)
+				numwidth += 4;
+			else
+				numwidth += 3;
+			if(prepend) numwidth++; // +, - or ' '
+
+			int padlen = 0;
+
+			if(formatter.Width != FormatterParams::NoWidth && formatter.Width > numwidth)
+				padlen = formatter.Width - numwidth;
+
+			// pad with 0s or ' 's and insert the sign character
+			if(formatter.Flags & PadZeroes)
+			{
+				if(prepend) addchar(output, actualsize, end, prepend);
+				addchars(output, actualsize, end, size_t(padlen), '0');
+			}
+			else if(padlen > 0 && (formatter.Flags & LeftJustify) == 0)
+			{
+				addchars(output, actualsize, end, size_t(padlen), ' ');
+				if(prepend) addchar(output, actualsize, end, prepend);
+			}
+			else
+			{
+				if(prepend) addchar(output, actualsize, end, prepend);
+			}
+
+			// insert the mantissa as a 1.23456 decimal
+			addchar(output, actualsize, end, digits[0]);
+			if(ndigits > 1 || (formatter.Flags & AlternateForm) || padtrailing0s > 0)
+				addchar(output, actualsize, end, '.');
+			for(int i=1; i < ndigits; i++)
+				addchar(output, actualsize, end, digits[i]);
+
+			// add the trailing 0s here
+			if(padtrailing0s > 0)
+				addchars(output, actualsize, end, size_t(padtrailing0s), '0');
+
+			// print the e-XXX exponential
+			addchar(output, actualsize, end, uppercaseDigits ? 'E' : 'e');
+			if(expon >= 0)
+				addchar(output, actualsize, end, '+');
+			else
+				addchar(output, actualsize, end, '-');
+
+			int exponaccum = expon >= 0 ? expon : -expon;
+
+			if(exponaccum >= 1000)
+				addchar(output, actualsize, end, '0' + char(exponaccum/1000));
+			exponaccum %= 1000;
+
+			addchar(output, actualsize, end, '0' + char(exponaccum/100));
+			exponaccum %= 100;
+			addchar(output, actualsize, end, '0' + char(exponaccum/10));
+			exponaccum %= 10;
+			addchar(output, actualsize, end, '0' + char(exponaccum));
+
+			if(padlen > 0 && (formatter.Flags & LeftJustify))
+			{
+				addchars(output, actualsize, end, size_t(padlen), ' ');
+			}
+		}
+		else if(digits[0] == '0' && ndigits == 1)
+		{
+			// if we rounded off to a 0.0, print it with special handling
+			PrintFloat0(e, f, formatter, prepend, output, actualsize, end);
+		}
+		else
+		{
+			// we're printing as a normal decimal, e.g. 12345.6789
+
+			// if %g and not in alternate form, all 0s after the decimal point are stripped
+			if(g && (formatter.Flags & AlternateForm) == 0)
+				while(ndigits > 1 && ndigits-1 > expon && digits[ndigits-1] == '0')
+					ndigits--;
+
+			int numwidth = 0;
+			
+			// first calculate the width of the produced output, so we can calculate any padding
+
+			// always all digits are printed (after trailing 0s optionally removed above)
+			numwidth = ndigits;
+
+			if(prepend) numwidth++; // prefix +, - or ' '
+
+			// if the exponent is exactly the number of digits we have, we have one 0 to pad
+			// before the decimal point, and special handling of whether to display the decimal
+			// point for %g. (note that exponent 0 is mantissa x 10^0 which is 1.2345
+			if(expon == ndigits)
+			{
+				numwidth++; // 0 before decimal place
+
+				// if in alternate form for %g we print a . and any trailing 0s necessary to make
+				// up the precision (number of significant figures)
+				if(g && (formatter.Flags & AlternateForm))
+				{
+					numwidth++; // .
+
+					if(padtrailing0s > 1)
+						numwidth += (padtrailing0s-1);
+				}
+				else if(!g)
+				{
+					// otherwise we only print the . if alternate form is specified or we need to
+					// print trailing 0s
+					if(padtrailing0s > 0 || (formatter.Flags & AlternateForm))
+						numwidth++; // .
+					if(padtrailing0s > 0)
+						numwidth += padtrailing0s;
+				}
+			}
+			// exponent greater than ndigits means we have padding before the decimal place
+			// and no values after the decimal place
+			else if(expon > ndigits)
+			{
+				numwidth += (expon + 1 - ndigits); // 0s between digits and decimal place
+				if((!g || (formatter.Flags & AlternateForm)))
+					numwidth++; // .
+
+				if(padtrailing0s > 0 && (!g || (formatter.Flags & AlternateForm)))
+					numwidth += padtrailing0s;
+			}
+			else if(expon >= 0)
+			{
+				// expon < ndigits is true here
+
+				if(expon < ndigits-1 || !g || (formatter.Flags & AlternateForm))
+					numwidth++; // .
+
+				if(padtrailing0s > 0 && (!g || (formatter.Flags & AlternateForm)))
+					numwidth += padtrailing0s;
+			}
+			else //if(expon < 0)
+			{
+				numwidth += 2; // 0.;
+				numwidth += (-1-expon); // 0s before digits
+
+				if(!g || (formatter.Flags & AlternateForm))
+					numwidth += padtrailing0s;
+			}
+
+			int padlen = 0;
+
+			// calculate padding and print it (0s or ' 's) with the sign character
+			if(formatter.Width != FormatterParams::NoWidth && formatter.Width > numwidth)
+				padlen = formatter.Width - numwidth;
+
+			if(formatter.Flags & PadZeroes)
+			{
+				if(prepend) addchar(output, actualsize, end, prepend);
+				addchars(output, actualsize, end, size_t(padlen), '0');
+			}
+			else if(padlen > 0 && (formatter.Flags & LeftJustify) == 0)
+			{
+				addchars(output, actualsize, end, size_t(padlen), ' ');
+				if(prepend) addchar(output, actualsize, end, prepend);
+			}
+			else
+			{
+				if(prepend) addchar(output, actualsize, end, prepend);
+			}
+
+			// if the exponent is greater than 0 we have to handle padding,
+			// placing it correctly, whether to show the decimal place or not, etc
+			if(expon >= 0)
+			{
+				// print the digits, adding the . at the right column, as long as it's not
+				// after the last column AND we are in %g that's not alternate form (ie. 
+				// trailing 0s and . are stripped)
+				for(int i=0; i < ndigits; i++)
+				{
+					addchar(output, actualsize, end, digits[i]);
+
+					if(i == expon)
+					{
+						if(i < ndigits-1 || !g || (formatter.Flags & AlternateForm))
+							addchar(output, actualsize, end, '.');
+					}
+				}
+
+				// handle printing trailing 0s here as well as a trailing. if it
+				// wasn't printed above, and is needed for the print form.
+				if(expon == ndigits)
+				{
+					addchar(output, actualsize, end, '0');
+
+					if(g && (formatter.Flags & AlternateForm))
+					{
+						addchar(output, actualsize, end, '.');
+
+						if(padtrailing0s > 1)
+							addchars(output, actualsize, end, size_t(padtrailing0s-1), '0');
+					}
+					else if(!g)
+					{
+						if(padtrailing0s > 0 || (formatter.Flags & AlternateForm))
+							addchar(output, actualsize, end, '.');
+						if(padtrailing0s > 0)
+							addchars(output, actualsize, end, size_t(padtrailing0s), '0');
+					}
+				}
+				else if(expon > ndigits)
+				{
+					addchars(output, actualsize, end, size_t(expon + 1 - ndigits), '0');
+					if((!g || (formatter.Flags & AlternateForm)))
+						addchar(output, actualsize, end, '.');
+
+					if(padtrailing0s > 0 && (!g || (formatter.Flags & AlternateForm)))
+						addchars(output, actualsize, end, size_t(padtrailing0s), '0');
+				}
+				else
+				{
+					if(padtrailing0s > 0 && (!g || (formatter.Flags & AlternateForm)))
+						addchars(output, actualsize, end, size_t(padtrailing0s), '0');
+				}
+			}
+			// if exponent is less than 0 it's much easier - just print the number as
+			// digits at the right column, then any trailing 0s necessary
+			else
+			{
+				appendstring(output, actualsize, end, "0.");
+				addchars(output, actualsize, end, size_t(-1-expon), '0');
+
+				appendstring(output, actualsize, end, digits, size_t(ndigits));
+
+				if(padtrailing0s > 0 && (!g || (formatter.Flags & AlternateForm)))
+					addchars(output, actualsize, end, size_t(padtrailing0s), '0');
+			}
+
+			if(padlen > 0 && (formatter.Flags & LeftJustify))
+			{
+				addchars(output, actualsize, end, size_t(padlen), ' ');
+			}
+		}
+	}
+}
+
+void formatargument(char type, void *rawarg, FormatterParams formatter, char *&output, size_t &actualsize, char *end)
+{
+	// print a single character (ascii or wide)
+	if(type == 'c')
+	{
+		int arg = *(int *)rawarg;
+
+		// left padding - character is always by definition one space wide
+		if(formatter.Width != FormatterParams::NoWidth && !(formatter.Flags&LeftJustify))
+			addchars(output, actualsize, end, (size_t)formatter.Width - 1, ' ');
+
+		if(formatter.Length == Long)
+		{
+			wchar_t chr = (wchar_t)arg;
+
+			// convert single wide character to UTF-8 sequence, at most
+			// 4 characters
+			char mbchr[4];
+			int seqlen = StringFormat::Wide2UTF8(chr, mbchr);
+			appendstring(output, actualsize, end, mbchr, seqlen);
+		}
+		else
+		{
+			char chr = (char)arg;
+			addchar(output, actualsize, end, chr);
+		}
+
+		// right padding
+		if(formatter.Width != FormatterParams::NoWidth && (formatter.Flags&LeftJustify))
+			addchars(output, actualsize, end, (size_t)formatter.Width - 1, ' ');
+	}
+	else if(type == 's')
+	{
+		void* arg = *(void **)rawarg;
+
+		if(formatter.Length == Long)
+		{
+			const wchar_t *ws = (const wchar_t*)arg;
+
+			if(arg == NULL) ws = L"(null)";
+
+			size_t width = (size_t)formatter.Width;
+			size_t precision = (size_t)formatter.Precision;
+			size_t len = wcslen(ws);
+			// clip length to precision
+			if(formatter.Precision != FormatterParams::NoPrecision)
+				len = RDCMIN(len, precision);
+
+			// convert the substring to UTF-8
+			string str = StringFormat::Wide2UTF8(wstring(ws, ws + len));
+
+			// add left padding, if necessary
+			if(formatter.Width != FormatterParams::NoWidth && len < width && !(formatter.Flags&LeftJustify))
+				addchars(output, actualsize, end, width-len, ' ');
+
+			appendstring(output, actualsize, end, str.c_str());
+
+			// add right padding
+			if(formatter.Width != FormatterParams::NoWidth && len < width && (formatter.Flags&LeftJustify))
+				addchars(output, actualsize, end, width-len, ' ');
+		}
+		else
+		{
+			const char *s = (const char *)arg;
+
+			if(arg == NULL) s = "(null)";
+
+			size_t len = 0;
+			size_t clipoffs = 0;
+			size_t width = (size_t)formatter.Width;
+			size_t precision = (size_t)formatter.Precision;
+
+			// iterate through UTF-8 string to find its length (for padding in case
+			// format width is longer than the string) or where to clip off a substring
+			// (if the precision is shorter than the string)
+			const char *si = s;
+			while(*si)
+			{
+				if((*si & 0x80) == 0) // ascii character
+				{
+					si++;
+				}
+				else if((*si & 0xC0) == 0xC0) // first byte of a sequence
+				{
+					si++;
+					// skip past continuation bytes (if we hit a NULL terminator this loop will break out)
+					while((*si & 0xC0) == 0x80) si++;
+				}
+
+				len++; // one more codepoint
+				if(len == precision && formatter.Precision != FormatterParams::NoPrecision)
+				{
+					// if we've reached the desired precision we can stop counting
+					clipoffs = (si - s);
+					break;
+				}
+			}
+
+			if(formatter.Width != FormatterParams::NoWidth && len < width && !(formatter.Flags&LeftJustify))
+				addchars(output, actualsize, end, width-len, ' ');
+
+			if(clipoffs > 0)
+				appendstring(output, actualsize, end, s, clipoffs);
+			else
+				appendstring(output, actualsize, end, s);
+
+			if(formatter.Width != FormatterParams::NoWidth && len < width && (formatter.Flags&LeftJustify))
+				addchars(output, actualsize, end, width-len, ' ');
+		}
+	}
+	else if(
+		type == 'p' ||
+		type == 'b' || type == 'B' ||
+		type == 'o' ||
+		type == 'x' || type == 'X' ||
+		type == 'd' || type == 'i' || type == 'u'
+		)
+	{
+		uint64_t argu = 0;
+		uint64_t numbits = 4;
+
+		int base = 10;
+		bool uppercaseDigits = false;
+		bool typeUnsigned = false;
+
+		if(type == 'p')
+		{
+			// fetch pointer and set settings
+			argu = (uint64_t)*(void **)rawarg;
+			numbits = 8*sizeof(size_t);
+			uppercaseDigits = true;
+			typeUnsigned = true;
+			base = 16;
+
+			// pointer always padded to right number of hex digits
+			formatter.Precision = RDCMAX(formatter.Precision, int(2*sizeof(size_t)));
+
+			if(formatter.Flags & AlternateForm) formatter.Precision += 2;
+		}
+		else
+		{
+			// fetch the parameter and set its size
+			switch(formatter.Length)
+			{
+				default:
+				case None:
+				case Long:
+					argu = (uint64_t)*(unsigned int *)rawarg;
+					numbits = 8*sizeof(unsigned int);
+					break;
+				case HalfHalf:
+					numbits = 8*sizeof(unsigned char);
+					argu = (uint64_t)*(unsigned int *)rawarg;
+					break;
+				case Half:
+					numbits = 8*sizeof(unsigned short);
+					argu = (uint64_t)*(unsigned int *)rawarg;
+					break;
+				case LongLong:
+					numbits = 8*sizeof(unsigned long long);
+					argu = (uint64_t)*(unsigned long long *)rawarg;
+					break;
+				case SizeT:
+					numbits = 8*sizeof(size_t);
+					argu = (uint64_t)*(size_t *)rawarg;
+					typeUnsigned = true;
+					break;
+			}
+			uppercaseDigits = (type < 'a');
+
+			if(type == 'x' || type == 'X')
+				base = 16;
+			if(type == 'o')
+				base = 8;
+			if(type == 'b' || type == 'B')
+				base = 2;
+
+			if(type == 'u')
+				typeUnsigned = true;
+		}
+		
+		PrintInteger(typeUnsigned, argu, base, numbits, formatter, uppercaseDigits, output, actualsize, end);
+	}
+	else if(
+		type == 'e' || type == 'E'
+		|| type == 'f' || type == 'F'
+		|| type == 'g' || type == 'G'
+		//|| type == 'a' || type == 'A' // hex floats not supported
+		)
+	{
+		bool uppercaseDigits = type < 'a';
+		double argd = *(double *)rawarg;
+
+		if(formatter.Precision == FormatterParams::NoPrecision)
+			formatter.Precision = 6;
+
+		formatter.Precision = RDCMAX(0, formatter.Precision);
+
+		if(formatter.Precision == 0)
+		{
+			if(argd > 0.0f && argd < 1.0f)
+				argd = argd < 0.5f ? 0.0f : 1.0f;
+			else if(argd < 0.0f && argd > -1.0f)
+				argd = argd > -0.5f ? 0.0f : -1.0f;
+		}
+
+		bool e = (type == 'e' || type == 'E');
+		bool f = (type == 'f' || type == 'F');
+		bool g = (type == 'g' || type == 'G');
+
+		PrintFloat(argd, formatter, e, f, g, uppercaseDigits, output, actualsize, end);
+	}
+	else
+	{
+		// Unrecognised format specifier
+		RDCDUMPMSG("Unrecognised % formatter");
+	}
+}
+
+int utf8printf(char *buf, size_t bufsize, const char *fmt, va_list args)
+{
+	// format, buffer and string arguments are assumed to be UTF-8 (except wide strings).
+	// note that since the format specifiers are entirely ascii, we can byte-copy safely and handle
+	// UTF-8 strings, since % is not a valid UTF-8 continuation or starting character, so until we
+	// reach a % we can ignore and dumbly copy any other byte
+
+	size_t actualsize = 0;
+	char *output = buf;
+	char *end = buf ? buf+bufsize-1 : NULL;
+	if(end) *end = 0;
+
+	const char *iter = fmt;
+
+	while(*iter)
+	{
+		if(*iter == '%')
+		{
+			iter++;
+
+			if(*iter == 0) RDCDUMPMSG("unterminated formatter (should be %% if you want a literal %)");
+
+			if(*iter == '%') // %% found, insert single % and continue copying
+			{
+				addchar(output, actualsize, end, *iter);
+				iter++;
+				continue;
+			}
+		}
+		else
+		{
+			// not a %, continue copying
+			addchar(output, actualsize, end, *iter);
+			iter++;
+			continue;
+		}
+		
+		FormatterParams formatter;
+
+		//////////////////////////////
+		// now parsing an argument specifier
+
+		// parse out 0 or more flags
+		do
+		{
+			// if flag is found, continue looping to possibly find more flags
+			// otherwise break out of this loop
+			     if(*iter == '-') formatter.Flags |= LeftJustify;
+			else if(*iter == '+') formatter.Flags |= PrependPos;
+			else if(*iter == ' ') formatter.Flags |= PrependSpace;
+			else if(*iter == '#') formatter.Flags |= AlternateForm;
+			else if(*iter == '0') formatter.Flags |= PadZeroes;
+			else                  break;
+
+			// left justify overrides pad with zeroes
+			if(formatter.Flags & LeftJustify)
+				formatter.Flags &= ~PadZeroes;
+
+			// prepend + overrides prepend ' '
+			if(formatter.Flags & PrependPos)
+				formatter.Flags &= ~PrependSpace;
+
+			iter++;
+		} while(true);
+
+		// possibly parse a width. Note that width always started with 1-9 as it's decimal,
+		// and 0 or - would have been picked up as a flag above
+		{
+			// note standard printf supports * here to read precision from a vararg before
+			// the actual argument. We don't support that
+
+			// Width found
+			if(*iter >= '1' && *iter <= '9')
+			{
+				formatter.Width = int(*iter - '0');
+				iter++; // step to next character
+
+				// continue while encountering digits, accumulating into width
+				while(*iter >= '0' && *iter <= '9')
+				{
+					formatter.Width *= 10;
+					formatter.Width += int(*iter - '0');
+					iter++;
+				}
+
+				// unterminated formatter
+				if(*iter == 0) RDCDUMPMSG("Unterminated % formatter found after width");
+			}
+			else
+			{
+				// no width specified
+				formatter.Width = FormatterParams::NoWidth;
+			}
+		}
+
+		// parse out precision. 0 is valid here, but negative isn't
+		{
+			// precision found
+			if(*iter == '.')
+			{
+				iter++;
+
+				// invalid character following '.' it should be an integer
+				// note standard printf supports * here to read precision from a vararg
+				if(*iter < '0' || *iter > '9') RDCDUMPMSG("Unexpected character expecting precision");
+
+				formatter.Precision = int(*iter - '0');
+				iter++; // step to next character
+
+				// continue while encountering digits, accumulating into width
+				while(*iter >= '0' && *iter <= '9')
+				{
+					formatter.Precision *= 10;
+					formatter.Precision += int(*iter - '0');
+					iter++;
+				}
+
+				// unterminated formatter
+				if(*iter == 0) RDCDUMPMSG("Unterminated % formatter found after precision");
+			}
+			else
+			{
+				// no precision specified
+				formatter.Precision = FormatterParams::NoPrecision;
+			}
+		}
+
+		// parse out length modifier
+		{
+			// length modifier characters are assumed to be disjoint with format specifiers
+			// so that we don't have to look-ahead to determine if a character is a length
+			// modifier or format specifier.
+			
+			     if(*iter == 'z')  formatter.Length = SizeT;
+			else if(*iter == 'l')
+			{
+				if(*(iter+1) == 'l') formatter.Length = LongLong;
+				else                 formatter.Length = Long;
+			}
+			else if(*iter == 'L')  formatter.Length = Long;
+			else if(*iter == 'h')
+			{
+				if(*(iter+1) == 'h') formatter.Length = HalfHalf;
+				else                 formatter.Length = Half;
+			}
+			else
+			{
+				formatter.Length = None;
+			}
+
+			if(formatter.Length == HalfHalf || formatter.Length == LongLong)
+				iter += 2;
+			else if(formatter.Length != None)
+				iter++;
+		}
+
+		// now we parse the format specifier itself and apply all the information
+		// we grabbed above
+		char type = *(iter++);
+
+		// all elements fit in at most a uint64_t
+		uint64_t elem;
+		void *arg = (void *)&elem;
+
+		// fetch arg here (can't pass va_list easily by reference in a portable way)
+		if(type == 'c')
+		{
+			int *i = (int *)arg;
+			*i = va_arg(args, int);
+		}
+		else if(type == 's' || type == 'p')
+		{
+			void **p = (void **)arg;
+			*p = va_arg(args, void*);
+		}
+		else if(type == 'e' || type == 'E'
+			|| type == 'f' || type == 'F'
+			|| type == 'g' || type == 'G')
+		{
+			double *i = (double *)arg;
+			*i = va_arg(args, double);
+		}
+		else if(
+			type == 'b' || type == 'B' ||
+			type == 'o' ||
+			type == 'x' || type == 'X' ||
+			type == 'd' || type == 'i' || type == 'u')
+		{
+			if(formatter.Length == LongLong)
+			{
+				unsigned long long *ull = (unsigned long long *)arg;
+				*ull = va_arg(args, unsigned long long);
+			}
+			else if(formatter.Length == SizeT)
+			{
+				size_t *s = (size_t *)arg;
+				*s = va_arg(args, size_t);
+			}
+			else
+			{
+				unsigned int *u = (unsigned int *)arg;
+				*u = va_arg(args, unsigned int);
+			}
+		}
+		else
+		{
+			RDCDUMPMSG("Unrecognised % formatter");
+		}
+
+		formatargument(type, arg, formatter, output, actualsize, end);
+	}
+
+	// if we filled the buffer, remove any UTF-8 characters that might have been
+	// truncated. We just do nothing if we encounter an invalid sequence, e.g.
+	// continuation bytes without a starting byte, or two many continuation bytes
+	// for a starting byte.
+	if(output == end && output != NULL)
+	{
+		char *last = output-1;
+		int numcont = 0;
+		while(last >= buf)
+		{
+			if((*last & 0x80) == 0) // ascii character
+			{
+				break;
+			}
+			else if((*last & 0xC0) == 0x80) // continuation byte
+			{
+				numcont++; // count the number of continuation bytes
+			}
+			else if((*last & 0xC0) == 0xC0) // first byte of a sequence
+			{
+				int expected = 0;
+
+				// 110xxxxx
+				if((*last & 0xE0) == 0xC0)
+					expected = 1;
+				// 1110xxxx
+				else if((*last & 0xF0) == 0xE0)
+					expected = 2;
+				// 11110xxx
+				else if((*last & 0xF8) == 0xF0)
+					expected = 3;
+
+				// if the sequence was truncated, remove it entirely
+				if(numcont < expected)
+					output = last;
+
+				break;
+			}
+			last--;
+		}
+	}
+
+	if(output) *output = 0;
+
+	return int(actualsize);
+}