Implement custom printf function, use it in favour of OS *printf

* Custom printf supports UTF-8 and in fact assumes it implicitly for all
  strings. This means we can use UTF-8 everywhere, even on windows, without
  widening parameters to printf() style functions like RDCLOG et al.
* It also allows me to add size_t support and binary printing.
* Grisu2 is used for converting doubles to strings. Reference:

Paper: http://florian.loitsch.com/publications/dtoa-pldi2010.pdf
impl: https://github.com/floitsch/double-conversion
impl: https://github.com/night-shift/fpconv
impl: https://github.com/miloyip/dtoa-benchmark
This commit is contained in:
baldurk
2014-11-22 23:45:33 +00:00
parent 3abdff49a7
commit 9d164826da
7 changed files with 1770 additions and 59 deletions
+65
View File
@@ -26,8 +26,73 @@
#include "os/os_specific.h"
#include "common/string_utils.h"
#include <stdarg.h>
using std::string;
int utf8printf(char *buf, size_t bufsize, const char *fmt, va_list args);
namespace StringFormat
{
int snprintf(char *str, size_t bufSize, const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
int ret = StringFormat::vsnprintf(str, bufSize, fmt, args);
va_end(args);
return ret;
}
int vsnprintf(char *str, size_t bufSize, const char *format, va_list args)
{
return ::utf8printf(str, bufSize, format, args);
}
int Wide2UTF8(wchar_t chr, char mbchr[4])
{
//U+00000 -> U+00007F 1 byte 0xxxxxxx
//U+00080 -> U+0007FF 2 bytes 110xxxxx 10xxxxxx
//U+00800 -> U+00FFFF 3 bytes 1110xxxx 10xxxxxx 10xxxxxx
//U+10000 -> U+1FFFFF 4 bytes 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
if(chr > 0x10FFFF)
chr = 0xFFFD; // replacement character
if(chr <= 0x7f)
{
mbchr[0] = (char)chr;
return 1;
}
else if(chr <= 0x7ff)
{
mbchr[1] = 0x80 | (char)(chr & 0x3f);chr >>= 6;
mbchr[0] = 0xC0 | (char)(chr & 0x1f);
return 2;
}
else if(chr <= 0xffff)
{
mbchr[2] = 0x80 | (char)(chr & 0x3f);chr >>= 6;
mbchr[1] = 0x80 | (char)(chr & 0x3f);chr >>= 6;
mbchr[0] = 0xE0 | (char)(chr & 0x0f);chr >>= 4;
return 3;
}
else
{
// invalid codepoints above 0x10FFFF were replaced above
mbchr[3] = 0x80 | (char)(chr & 0x3f);chr >>= 6;
mbchr[2] = 0x80 | (char)(chr & 0x3f);chr >>= 6;
mbchr[1] = 0x80 | (char)(chr & 0x3f);chr >>= 6;
mbchr[0] = 0xF0 | (char)(chr & 0x07);chr >>= 3;
return 4;
}
}
}; // namespace StringFormat
wstring Callstack::AddressDetails::formattedString(const char *commonPath)
{
wchar_t fmt[512] = {0};
+16 -5
View File
@@ -202,19 +202,30 @@ namespace Keyboard
bool GetKeyState(int key);
};
// implemented per-platform
namespace StringFormat
{
int snprintf(char *str, size_t bufSize, const char *format, ...);
int wsnprintf(wchar_t *str, size_t bufSize, const wchar_t *format, ...);
int vsnprintf(char *str, size_t bufSize, const char *format, va_list v);
void sntimef(char *str, size_t bufSize, const char *format);
void wcsncpy(wchar_t *dst, const wchar_t *src, size_t count);
// forwards to vsnprintf below, needed to be here due to va_copy differences
string Fmt(const char *format, ...);
wstring WFmt(const wchar_t *format, ...);
string Wide2UTF8(const wstring &s);
wstring UTF82Wide(const string &s);
// TODO remove
int wsnprintf(wchar_t *str, size_t bufSize, const wchar_t *format, ...);
wstring WFmt(const wchar_t *format, ...);
};
// utility functions, implemented in os_specific.cpp, not per-platform (assuming standard stdarg.h)
// forwarded to custom printf implementation in utf8printf.cpp
namespace StringFormat
{
int vsnprintf(char *str, size_t bufSize, const char *format, va_list v);
int snprintf(char *str, size_t bufSize, const char *format, ...);
int Wide2UTF8(wchar_t chr, char mbchr[4]);
};
namespace OSUtility
+40 -54
View File
@@ -265,71 +265,19 @@ namespace FileIO
namespace StringFormat
{
int snprintf(char *str, size_t bufSize, const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
int ret = vsnprintf(str, bufSize, fmt, args);
va_end(args);
return ret;
}
///////////////////////////////////////////////////////////////////////////
int wsnprintf(wchar_t *str, size_t bufSize, const wchar_t *format, ...)
{
va_list args;
va_start(args, format);
int ret = ::_vsnwprintf_s(str, bufSize, bufSize, format, args);
int ret = ::_vsnwprintf_s(str, bufSize, bufSize-1, format, args);
va_end(args);
return ret;
}
int vsnprintf(char *str, size_t bufSize, const char *format, va_list args)
{
return ::vsnprintf_s(str, bufSize, bufSize, format, args);
}
void sntimef(char *str, size_t bufSize, const char *format)
{
time_t tim;
time(&tim);
tm tmv;
localtime_s(&tmv, &tim);
strftime(str, bufSize, format, &tmv);
}
void wcsncpy(wchar_t *dst, const wchar_t *src, size_t count)
{
::wcsncpy_s(dst, count, src, count);
}
string Fmt(const char *format, ...)
{
va_list args;
va_start(args, format);
int size = _vscprintf(format, args)+1;
char *buf = new char[size];
StringFormat::vsnprintf(buf, size, format, args);
va_end(args);
string ret = buf;
delete[] buf;
return ret;
}
wstring WFmt(const wchar_t *format, ...)
{
va_list args;
@@ -349,6 +297,44 @@ namespace StringFormat
return ret;
}
///////////////////////////////////////////////////////////////////////////
void sntimef(char *str, size_t bufSize, const char *format)
{
time_t tim;
time(&tim);
tm tmv;
localtime_s(&tmv, &tim);
strftime(str, bufSize, format, &tmv);
}
string Fmt(const char *format, ...)
{
va_list args;
va_start(args, format);
va_list args2;
//va_copy(args2, args); // not implemented on VS2010
args2 = args;
int size = StringFormat::vsnprintf(NULL, 0, format, args2);
char *buf = new char[size+1];
buf[size] = 0;
StringFormat::vsnprintf(buf, size, format, args);
va_end(args);
va_end(args2);
string ret = buf;
delete[] buf;
return ret;
}
// save on reallocation, keep a persistent scratch buffer for conversions
vector<char> charBuffer;
+2
View File
@@ -367,7 +367,9 @@
<ClCompile Include="replay\replay_output.cpp" />
<ClCompile Include="replay\replay_renderer.cpp" />
<ClCompile Include="replay\type_helpers.cpp" />
<ClCompile Include="serialise\grisu2.cpp" />
<ClCompile Include="serialise\serialiser.cpp" />
<ClCompile Include="serialise\utf8printf.cpp" />
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="data\renderdoc.rc" />
+9
View File
@@ -85,6 +85,9 @@
<Filter Include="3rdparty\lz4">
<UniqueIdentifier>{043f5a32-683e-4b56-bcc6-512444b40d70}</UniqueIdentifier>
</Filter>
<Filter Include="Common\Strings">
<UniqueIdentifier>{ce0b860f-38b7-48af-b49d-7dcb23378f82}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="maths\camera.h">
@@ -524,6 +527,12 @@
<ClCompile Include="driver\d3d11\d3d11_context2_wrap.cpp">
<Filter>Drivers\D3D11</Filter>
</ClCompile>
<ClCompile Include="serialise\grisu2.cpp">
<Filter>Common\Strings</Filter>
</ClCompile>
<ClCompile Include="serialise\utf8printf.cpp">
<Filter>Common\Strings</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="os\win32\comexport.def">
+324
View File
@@ -0,0 +1,324 @@
/******************************************************************************
* The MIT License (MIT)
*
* Copyright (c) 2014 Baldur Karlsson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
******************************************************************************/
#include "common/common.h"
#include <math.h>
///////////////////////////////////////////////////////////////////////////
// Grisu2 implementation (slightly simpler than Grisu3) for converting
// doubles to strings
//
// Sources:
// Based on Florian Loitsch 2010 "Printing Floating-Point Numbers Quickly
// and Accurately with Integers"
// http://florian.loitsch.com/publications/dtoa-pldi2010.pdf
// https://github.com/floitsch/double-conversion (BSD licensed)
//
// Also implementations by Milo Yip and night-shift used as reference
// https://github.com/miloyip/dtoa-benchmark
// https://github.com/night-shift/fpconv
struct diy_fp
{
diy_fp() {}
diy_fp(uint64_t mant, int exponent) : mantissa(mant), exp(exponent) {}
uint64_t mantissa;
int exp;
// q in the paper, bits in the mantissa of the fixed point
// approximation
static const int bitsq = 64;
};
// subtract from Florian paper
diy_fp operator -(const diy_fp &x, const diy_fp &y)
{
// assume same exponent
return diy_fp(x.mantissa-y.mantissa, x.exp);
}
// multiply from Florian paper
diy_fp operator *(const diy_fp &x, const diy_fp &y)
{
// _a = upper 32 bits, _b = lower 32 bits
uint64_t xa = x.mantissa >> 32, xb = x.mantissa & 0xFFFFFFFF;
uint64_t ya = y.mantissa >> 32, yb = y.mantissa & 0xFFFFFFFF;
// perform each pair of multiplies
uint64_t upper = xa*ya;
uint64_t lower = xb*yb;
uint64_t cross1 = xb*ya;
uint64_t cross2 = xa*yb;
uint64_t tmp = (lower>>32) + (cross1&0xFFFFFFFF) + (cross2&0xFFFFFFFF);
tmp += 1U << 31; // Round up
// note - exponent is no longer normalised
return diy_fp(upper + (cross1>>32) + (cross2>>32) + (tmp>>32), x.exp + y.exp + 64);
}
static diy_fp pow10cache[] = {
diy_fp(18054884314459144840U, -1220),
diy_fp(13451937075301367670U, -1193),
diy_fp(10022474136428063862U, -1166),
diy_fp(14934650266808366570U, -1140),
diy_fp(11127181549972568877U, -1113),
diy_fp(16580792590934885855U, -1087),
diy_fp(12353653155963782858U, -1060),
diy_fp(18408377700990114895U, -1034),
diy_fp(13715310171984221708U, -1007),
diy_fp(10218702384817765436U, -980),
diy_fp(15227053142812498563U, -954),
diy_fp(11345038669416679861U, -927),
diy_fp(16905424996341287883U, -901),
diy_fp(12595523146049147757U, -874),
diy_fp(9384396036005875287U, -847),
diy_fp(13983839803942852151U, -821),
diy_fp(10418772551374772303U, -794),
diy_fp(15525180923007089351U, -768),
diy_fp(11567161174868858868U, -741),
diy_fp(17236413322193710309U, -715),
diy_fp(12842128665889583758U, -688),
diy_fp(9568131466127621947U, -661),
diy_fp(14257626930069360058U, -635),
diy_fp(10622759856335341974U, -608),
diy_fp(15829145694278690180U, -582),
diy_fp(11793632577567316726U, -555),
diy_fp(17573882009934360870U, -529),
diy_fp(13093562431584567480U, -502),
diy_fp(9755464219737475723U, -475),
diy_fp(14536774485912137811U, -449),
diy_fp(10830740992659433045U, -422),
diy_fp(16139061738043178685U, -396),
diy_fp(12024538023802026127U, -369),
diy_fp(17917957937422433684U, -343),
diy_fp(13349918974505688015U, -316),
diy_fp(9946464728195732843U, -289),
diy_fp(14821387422376473014U, -263),
diy_fp(11042794154864902060U, -236),
diy_fp(16455045573212060422U, -210),
diy_fp(12259964326927110867U, -183),
diy_fp(18268770466636286478U, -157),
diy_fp(13611294676837538539U, -130),
diy_fp(10141204801825835212U, -103),
diy_fp(15111572745182864684U, -77),
diy_fp(11258999068426240000U, -50),
diy_fp(16777216000000000000U, -24),
diy_fp(12500000000000000000U, 3),
diy_fp(9313225746154785156U, 30),
diy_fp(13877787807814456755U, 56),
diy_fp(10339757656912845936U, 83),
diy_fp(15407439555097886824U, 109),
diy_fp(11479437019748901445U, 136),
diy_fp(17105694144590052135U, 162),
diy_fp(12744735289059618216U, 189),
diy_fp(9495567745759798747U, 216),
diy_fp(14149498560666738074U, 242),
diy_fp(10542197943230523224U, 269),
diy_fp(15709099088952724970U, 295),
diy_fp(11704190886730495818U, 322),
diy_fp(17440603504673385349U, 348),
diy_fp(12994262207056124023U, 375),
diy_fp(9681479787123295682U, 402),
diy_fp(14426529090290212157U, 428),
diy_fp(10748601772107342003U, 455),
diy_fp(16016664761464807395U, 481),
diy_fp(11933345169920330789U, 508),
diy_fp(17782069995880619868U, 534),
diy_fp(13248674568444952270U, 561),
diy_fp(9871031767461413346U, 588),
diy_fp(14708983551653345445U, 614),
diy_fp(10959046745042015199U, 641),
diy_fp(16330252207878254650U, 667),
diy_fp(12166986024289022870U, 694),
diy_fp(18130221999122236476U, 720),
diy_fp(13508068024458167312U, 747),
diy_fp(10064294952495520794U, 774),
diy_fp(14996968138956309548U, 800),
diy_fp(11173611982879273257U, 827),
diy_fp(16649979327439178909U, 853),
diy_fp(12405201291620119593U, 880),
diy_fp(9242595204427927429U, 907),
diy_fp(13772540099066387757U, 933),
diy_fp(10261342003245940623U, 960),
diy_fp(15290591125556738113U, 986),
diy_fp(11392378155556871081U, 1013),
diy_fp(16975966327722178521U, 1039),
diy_fp(12648080533535911531U, 1066),
};
static const int firstpow10 = -348; // first cached power of 10
static const int cachestep = 8; // power of 10 steps between cache items
diy_fp find_cachedpow10(int exp, int& kout)
{
const double inv_log2_10 = 0.30102999566398114;
const double alpha = -60.0;
// k calculation from the paper ceil[ (alpha - exp + q - 1) * 1/log2(10) ]
// exponent is shifted by #bits
int k = (int)ceil( (alpha - double(exp + diy_fp::bitsq) + diy_fp::bitsq - 1) * inv_log2_10 );
// determine index in above array
int idx = (-firstpow10 + k - 1) / cachestep + 1;
// output the decimal power that corresponds to this k
kout = (firstpow10 + idx * cachestep);
return pow10cache[idx];
}
static int gen_digits(const diy_fp &lower, const diy_fp &upper, char *digits, int &kout)
{
diy_fp delta = upper-lower;
// generate 1.0 to the desired exponent so we can split integer from decimal part
diy_fp one(uint64_t(1) << -upper.exp, upper.exp);
// mask off integer and decimal parts
uint64_t intpart = upper.mantissa >> -one.exp;
uint64_t decpart = upper.mantissa & (one.mantissa - 1);
// len is current number of digits produced
int len = 0;
// kappa is an exponent shift, to account for if we don't produce exactly the number
// of digits to reach the decimal place, and there should be extra 0s beyond the produced
// digits. (or negative if there should be preceeding 0s)
int kappa = 10;
uint32_t div = 1000000000; // highest possible pow10 in 32bits = 10^9
// handle integer component before decimal separator
while(kappa > 0)
{
// get digit at current power of ten
uint64_t digit = intpart / div;
// don't include preceeding 0 digits (so either include if
// digit is non-0, or if we've started including digits ie.
// len > 0)
if(digit || len) digits[len++] = '0' + char(digit);
// remove this pow10 from the int for future iterations
intpart %= div; kappa--; div /= 10;
// this is our termination condition, when we've produced the number.
// delta is the difference between upper and lower, and the left side
// is the current remainder after the currently generated digits have
// been removed. If that is small enough that we've produced the number,
// exit and increment kout to account for the extra exponential
if( (intpart << -one.exp) + decpart <= delta.mantissa)
{
kout += kappa;
return len;
}
}
// note, after this part if we're still here, intpart is 0 as we've
// masked off all digits, so only decpart remains.
// Kappa has also reached 0, beyond here it decrements below 0
// handle decimal portion after separator
do
{
decpart *= 10;
uint64_t digit = decpart >> -one.exp;
// don't include preceeding 0s (see above - note if we've produced
// any integer digits at all, len will be > 0)
if(digit || len) digits[len++] = '0' + char(digit);
// remove this pow10 from the decimal part
decpart &= (one.mantissa-1); kappa--; delta.mantissa *= 10;
// stop looping when decpart is lower than delta (see above for termination condition)
} while(decpart > delta.mantissa);
kout += kappa;
return len;
}
int grisu2(uint64_t mantissa, int exponent, char digits[18], int &kout)
{
// the IEEE format implicitly has a hidden 1 bit above the mantissa for all normalised
// numbers
const uint64_t hiddenbit = 0x0010000000000000;
// exponent is shifted by a further 52 because input exponent is assuming mantissa
// is 1.2345678...e exp (fraction)
// but grisu2 treats number as
// 12345678...e exp-52 (whole number)
diy_fp w = diy_fp(mantissa|hiddenbit, exponent - 52);
if(exponent == -1023) w.exp = 1 - (1023 + 52); // subnormal exponent
// we know the w input comes from a double, so is only using the lower 52 bits at
// most. We can safely multiply by 2 (and cancel by lowering exponent to match), then
// add 1 to get the upper value
diy_fp upper;
upper.mantissa = (w.mantissa<<1) + 1;
upper.exp = w.exp-1;
diy_fp lower;
if(mantissa == 0)
{
// if mantissa is 0 we are going to underflow, so shift by 2
// to maintain precision/normalised value
lower.mantissa = (w.mantissa<<2) - 1;
lower.exp = w.exp-2;
}
else
{
lower.mantissa = (w.mantissa<<1) - 1;
lower.exp = w.exp-1;
}
// normalise upper - shift the mantissa until the top bit is set, and decrement the
// exponent each time to keep the number the same (1.2e5 and 12e4 are equivalent representations)
while((upper.mantissa & 0x8000000000000000) == 0)
{
upper.mantissa <<= 1;
upper.exp--;
}
// lower needs to be the same exponent as upper so we can calculate delta by upper-lower, so
// it is not normalised, but shifted to upper's exponent
lower.mantissa <<= (lower.exp - upper.exp);
lower.exp = upper.exp;
int k = 0;
diy_fp ck = find_cachedpow10(upper.exp, k);
lower = lower * ck;
upper = upper * ck;
// squeeze the range in by 1 ULP
lower.mantissa++; upper.mantissa--;
// set our initial exponent. This will be shifted
// if we have any preceeding or trailing 0s to get the
// final exponent
kout = -k;
return gen_digits(lower, upper, digits, kout);
}
File diff suppressed because it is too large Load Diff