Merge pull request #76 from kaitai-io/to_string-simplify

generalmimon · web-flow · commit 0c6884e24d4d · 2025-03-20T12:05:51.000+01:00
to_string(): replace template with 6 overloads, simplify
diff --git a/.build/gtest-v1.8.1-fix-Werror-maybe-uninitialized.patch b/.build/gtest-v1.8.1-fix-Werror-maybe-uninitialized.patch
@@ -0,0 +1,21 @@
+diff --git a/googletest/src/gtest-death-test.cc b/googletest/src/gtest-death-test.cc
+index 0908355..d6f492e 100644
+--- a/googletest/src/gtest-death-test.cc
++++ b/googletest/src/gtest-death-test.cc
+@@ -1212,14 +1212,14 @@ static int ExecDeathTestChildMain(void* child_arg) {
+ static void StackLowerThanAddress(const void* ptr,
+                                   bool* result) GTEST_NO_INLINE_;
+ static void StackLowerThanAddress(const void* ptr, bool* result) {
+-  int dummy;
++  int dummy = 0;
+   *result = (&dummy < ptr);
+ }
+
+ // Make sure AddressSanitizer does not tamper with the stack here.
+ GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
+ static bool StackGrowsDown() {
+-  int dummy;
++  int dummy = 0;
+   bool result;
+   StackLowerThanAddress(&dummy, &result);
+   return result;
diff --git a/.build/install-gtest b/.build/install-gtest
@@ -5,7 +5,51 @@ set -ef
 #
 # See https://github.com/google/googletest/blob/d83fee138a9ae6cb7c03688a2d08d4043a39815d/googletest/README.md#build-with-cmake
 
-gtest_version_tag=v1.14.0
+# https://unix.stackexchange.com/a/706411
+script_dir=$(CDPATH='' cd -- "$(dirname -- "$0")" && pwd)
+
+usage()
+{
+	echo "Usage: $0 <cpp-standard>"
+	echo
+	echo '  <cpp-standard>  C++ standard to target (supported: 98, 11, any number between 14 and 26 inclusive)'
+}
+
+die()
+{
+	printf '%s\n' "$1" >&2
+	printf '\n%s\n' "$(usage)" >&2
+	exit 1
+}
+
+cpp_standard=$1
+if [ -z "$cpp_standard" ]; then
+	die 'Error: <cpp-standard> must not be empty'
+fi
+
+case $cpp_standard in
+	98)
+		# GoogleTest v1.8.1 seems to be the last version supporting C++98,
+		# see https://github.com/google/googletest/releases/tag/release-1.8.1
+		gtest_version_tag=release-1.8.1
+		;;
+	11)
+		# GoogleTest v1.12.1 seems to be the last version supporting C++11,
+		# see https://github.com/google/googletest/releases/tag/release-1.12.1
+		gtest_version_tag=release-1.12.1
+		;;
+	*)
+		if [ "$cpp_standard" -ge 14 ] && [ "$cpp_standard" -le 26 ]; then
+			# GoogleTest v1.16.0 (the latest version at the time of writing) requires at
+			# least C++14, see https://github.com/google/googletest/releases/tag/v1.16.0
+			gtest_version_tag=v1.16.0
+		else
+			die "Error: unsupported value $1 of <cpp-standard>"
+		fi
+		;;
+esac
+
+echo "Installing GoogleTest $gtest_version_tag"
 
 temp_dir=$(mktemp -d)
 cleanup()
@@ -20,6 +64,11 @@ trap 'cleanup' INT TERM HUP EXIT
 # Download
 git clone --depth 1 -b "$gtest_version_tag" -- https://github.com/google/googletest.git "$temp_dir"
 cd -- "$temp_dir"
+if [ "$gtest_version_tag" = release-1.8.1 ]; then
+	# We must apply a patch to fix `-Werror=maybe-uninitialized` (see
+	# https://github.com/google/googletest/issues/3219), otherwise it won't compile
+	git apply -- "$script_dir"/gtest-v1.8.1-fix-Werror-maybe-uninitialized.patch
+fi
 mkdir build
 cd build
 # Configure - build only GoogleTest, not GoogleMock
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -19,7 +19,9 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Install GoogleTest
-        run: .build/install-gtest
+        env:
+          CPP_STANDARD: ${{ matrix.cpp-standard }}
+        run: .build/install-gtest "$CPP_STANDARD"
       - name: Update package lists
         run: sudo apt-get update
       - name: Install packages
@@ -123,7 +125,7 @@ jobs:
             pkg info
 
             echo '#### Installing GoogleTest'
-            .build/install-gtest
+            .build/install-gtest 11
 
             echo '#### Building'
             .build/build -DCMAKE_CXX_STANDARD=11 -DCMAKE_CXX_STANDARD_REQUIRED=ON -DCMAKE_CXX_EXTENSIONS=OFF
diff --git a/kaitai/kaitaistream.cpp b/kaitai/kaitaistream.cpp
@@ -791,20 +791,66 @@ int kaitai::kstream::mod(int a, int b) {
     return r;
 }
 
-void kaitai::kstream::unsigned_to_decimal(uint64_t number, char *buffer) {
-    // Implementation from https://ideone.com/nrQfA8 by Alf P. Steinbach
+void kaitai::kstream::unsigned_to_decimal(uint64_t number, char *buf, std::size_t &buf_contents_start) {
+    // Implementation inspired by https://ideone.com/nrQfA8 by Alf P. Steinbach
     // (see https://vitaut.net/posts/2013/integer-to-string-conversion-in-cplusplus/)
-    if (number == 0) {
-        *buffer++ = '0';
+    do {
+        buf[--buf_contents_start] = static_cast<char>('0' + (number % 10));
+        number /= 10;
+    } while (number != 0);
+}
+
+std::string kaitai::kstream::to_string_signed(int64_t val) {
+    // `digits10 + 2` because of minus sign + leading digit (NB: null terminator is not used)
+    char buf[std::numeric_limits<int64_t>::digits10 + 2];
+    std::size_t buf_contents_start = sizeof(buf);
+    if (val < 0) {
+        // NB: `val` is negative and we need to get its absolute value (i.e. minus `val`). However, since
+        // `int64_t` uses two's complement representation, its range is `[-2**63, 2**63 - 1] =
+        // [-0x8000_0000_0000_0000, 0x7fff_ffff_ffff_ffff]` (both ends inclusive) and thus the naive
+        // `-val` operation will overflow for `val = std::numeric_limits<int64_t>::min() =
+        // -0x8000_0000_0000_0000` (because the result of `-val` is mathematically
+        // `-(-0x8000_0000_0000_0000) = 0x8000_0000_0000_0000`, but the `int64_t` type can represent at
+        // most `0x7fff_ffff_ffff_ffff`). And signed integer overflow is undefined behavior in C++.
+        //
+        // To avoid undefined behavior for `val = -0x8000_0000_0000_0000 = -2**63`, we do the following
+        // steps for all negative `val`s:
+        //
+        // 1. Convert the signed (and negative) `val` to an unsigned `uint64_t` type. This is a
+        //    well-defined operation in C++: the resulting `uint64_t` value will be `val mod 2**64` (`mod`
+        //    is modulo). The maximum `val` we can have here is `-1` (because `val < 0`), a theoretical
+        //    minimum we are able to support would be `-2**64 + 1 = -0xffff_ffff_ffff_ffff` (even though
+        //    in practice the widest standard type is `int64_t` with the minimum of `-2**63`):
+        //
+        //    * `static_cast<uint64_t>(-1) = -1 mod 2**64 = 2**64 + (-1) = 0xffff_ffff_ffff_ffff = 2**64 - 1`
+        //    * `static_cast<uint64_t>(-2**64 + 1) = (-2**64 + 1) mod 2**64 = 2**64 + (-2**64 + 1) = 1`
+        //
+        // 2. Subtract `static_cast<uint64_t>(val)` from `2**64 - 1 = 0xffff_ffff_ffff_ffff`. Since
+        //    `static_cast<uint64_t>(val)` is in range `[1, 2**64 - 1]` (see step 1), the result of this
+        //    subtraction will be mathematically in range `[0, (2**64 - 1) - 1] = [0, 2**64 - 2]`. So the
+        //    mathematical result cannot be negative, hence this unsigned integer subtraction can never
+        //    wrap around (which wouldn't be a good thing to rely upon because it confuses programmers and
+        //    code analysis tools).
+        //
+        // 3. Since we did mathematically `(2**64 - 1) - (2**64 + val) = -val - 1` so far (and we wanted
+        //    to do `-val`), we add `1` to correct that. From step 2 we know that the result of `-val - 1`
+        //    is in range `[0, 2**64 - 2]`, so adding `1` will not wrap (at most we could get `2**64 - 1 =
+        //    0xffff_ffff_ffff_ffff`, which is still in the valid range of `uint64_t`).
+        unsigned_to_decimal((std::numeric_limits<uint64_t>::max() - static_cast<uint64_t>(val)) + 1, buf, buf_contents_start);
+
+        buf[--buf_contents_start] = '-';
     } else {
-        char *p_first = buffer;
-        while (number != 0) {
-            *buffer++ = static_cast<char>('0' + number % 10);
-            number /= 10;
-        }
-        std::reverse(p_first, buffer);
+        unsigned_to_decimal(static_cast<uint64_t>(val), buf, buf_contents_start);
     }
-    *buffer = '\0';
+    return std::string(&buf[buf_contents_start], sizeof(buf) - buf_contents_start);
+}
+
+std::string kaitai::kstream::to_string_unsigned(uint64_t val) {
+    // `digits10 + 1` because of leading digit (NB: null terminator is not used)
+    char buf[std::numeric_limits<uint64_t>::digits10 + 1];
+    std::size_t buf_contents_start = sizeof(buf);
+    unsigned_to_decimal(val, buf, buf_contents_start);
+    return std::string(&buf[buf_contents_start], sizeof(buf) - buf_contents_start);
 }
 
 int64_t kaitai::kstream::string_to_int(const std::string& str, int base) {
diff --git a/kaitai/kaitaistream.h b/kaitai/kaitaistream.h
@@ -12,14 +12,10 @@
 #include <stdint.h> // int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t
 
 #include <ios> // std::streamsize, forward declaration of std::istream  // IWYU pragma: keep
-#include <limits> // std::numeric_limits
+#include <cstddef> // std::size_t
 #include <sstream> // std::istringstream  // IWYU pragma: keep
 #include <string> // std::string
 
-#ifdef KAITAI_STREAM_H_CPP11_SUPPORT
-#include <type_traits> // std::enable_if, std::is_integral
-#endif
-
 namespace kaitai {
 
 /**
@@ -232,70 +228,72 @@ class kstream {
      */
     static int mod(int a, int b);
 
+    // NB: the following 6 overloads of `to_string` are exactly the ones that
+    // [`std::to_string`](https://en.cppreference.com/w/cpp/string/basic_string/to_string) has.
+    // Testing has shown that they are all necessary: if you remove any of them, you will get
+    // something like `error: call to 'to_string' is ambiguous` when trying to call `to_string`
+    // with the integer type for which you removed the overload.
+
+    /**
+     * Converts given integer `val` to a decimal string representation.
+     * Should be used in place of `std::to_string(int)` (which is available only
+     * since C++11) in older C++ implementations.
+     */
+    static std::string to_string(int val) {
+        return to_string_signed(val);
+    }
+
     /**
      * Converts given integer `val` to a decimal string representation.
-     * Should be used in place of std::to_string() (which is available only
+     * Should be used in place of `std::to_string(long)` (which is available only
      * since C++11) in older C++ implementations.
      */
-    template<typename I>
+    static std::string to_string(long val) {
+        return to_string_signed(val);
+    }
+
+// The `long long` type is only available since C++11, so we use it only in C++11 mode.
 #ifdef KAITAI_STREAM_H_CPP11_SUPPORT
-    // https://stackoverflow.com/a/27913885
-    typename std::enable_if<
-            std::is_integral<I>::value &&
-            // check if we don't have something too large like GCC's `__int128_t`
-            std::numeric_limits<I>::max() >= 0 &&
-            std::numeric_limits<I>::max() <= std::numeric_limits<uint64_t>::max(),
-            std::string
-    >::type
-#else
-    std::string
+    /**
+     * Converts given integer `val` to a decimal string representation.
+     * Should be used in place of `std::to_string(long long)` (which is available only
+     * since C++11) in older C++ implementations.
+     */
+    static std::string to_string(long long val) {
+        return to_string_signed(val);
+    }
 #endif
-    static to_string(I val) {
-        // in theory, `digits10 + 3` would be enough (minus sign + leading digit
-        // + null terminator), but let's add a little more to be safe
-        char buf[std::numeric_limits<I>::digits10 + 5];
-        if (val < 0) {
-            buf[0] = '-';
-
-            // NB: `val` is negative and we need to get its absolute value (i.e. minus `val`). However, since
-            // `int64_t` uses two's complement representation, its range is `[-2**63, 2**63 - 1] =
-            // [-0x8000_0000_0000_0000, 0x7fff_ffff_ffff_ffff]` (both ends inclusive) and thus the naive
-            // `-val` operation will overflow for `val = std::numeric_limits<int64_t>::min() =
-            // -0x8000_0000_0000_0000` (because the result of `-val` is mathematically
-            // `-(-0x8000_0000_0000_0000) = 0x8000_0000_0000_0000`, but the `int64_t` type can represent at
-            // most `0x7fff_ffff_ffff_ffff`). And signed integer overflow is undefined behavior in C++.
-            //
-            // To avoid undefined behavior for `val = -0x8000_0000_0000_0000 = -2**63`, we do the following
-            // steps for all negative `val`s:
-            //
-            // 1. Convert the signed (and negative) `val` to an unsigned `uint64_t` type. This is a
-            //    well-defined operation in C++: the resulting `uint64_t` value will be `val mod 2**64` (`mod`
-            //    is modulo). The maximum `val` we can have here is `-1` (because `val < 0`), a theoretical
-            //    minimum we are able to support would be `-2**64 + 1 = -0xffff_ffff_ffff_ffff` (even though
-            //    in practice the widest standard type is `int64_t` with the minimum of `-2**63`):
-            //
-            //    * `static_cast<uint64_t>(-1) = -1 mod 2**64 = 2**64 + (-1) = 0xffff_ffff_ffff_ffff = 2**64 - 1`
-            //    * `static_cast<uint64_t>(-2**64 + 1) = (-2**64 + 1) mod 2**64 = 2**64 + (-2**64 + 1) = 1`
-            //
-            // 2. Subtract `static_cast<uint64_t>(val)` from `2**64 - 1 = 0xffff_ffff_ffff_ffff`. Since
-            //    `static_cast<uint64_t>(val)` is in range `[1, 2**64 - 1]` (see step 1), the result of this
-            //    subtraction will be mathematically in range `[0, (2**64 - 1) - 1] = [0, 2**64 - 2]`. So the
-            //    mathematical result cannot be negative, hence this unsigned integer subtraction can never
-            //    wrap around (which wouldn't be a good thing to rely upon because it confuses programmers and
-            //    code analysis tools).
-            //
-            // 3. Since we did mathematically `(2**64 - 1) - (2**64 + val) = -val - 1` so far (and we wanted
-            //    to do `-val`), we add `1` to correct that. From step 2 we know that the result of `-val - 1`
-            //    is in range `[0, 2**64 - 2]`, so adding `1` will not wrap (at most we could get `2**64 - 1 =
-            //    0xffff_ffff_ffff_ffff`, which is still in the valid range of `uint64_t`).
-
-            unsigned_to_decimal((std::numeric_limits<uint64_t>::max() - static_cast<uint64_t>(val)) + 1, &buf[1]);
-        } else {
-            unsigned_to_decimal(val, buf);
-        }
-        return std::string(buf);
+
+    /**
+     * Converts given integer `val` to a decimal string representation.
+     * Should be used in place of `std::to_string(unsigned)` (which is available only
+     * since C++11) in older C++ implementations.
+     */
+    static std::string to_string(unsigned val) {
+        return to_string_unsigned(val);
+    }
+
+    /**
+     * Converts given integer `val` to a decimal string representation.
+     * Should be used in place of `std::to_string(unsigned long)` (which is available only
+     * since C++11) in older C++ implementations.
+     */
+    static std::string to_string(unsigned long val) {
+        return to_string_unsigned(val);
     }
 
+// The `unsigned long long` type is only available since C++11, so we use it only in C++11 mode.
+#ifdef KAITAI_STREAM_H_CPP11_SUPPORT
+    /**
+     * Converts given integer `val` to a decimal string representation.
+     * Should be used in place of `std::to_string(unsigned long long)` (which is available only
+     * since C++11) in older C++ implementations.
+     */
+    static std::string to_string(unsigned long long val) {
+        return to_string_unsigned(val);
+    }
+#endif
+
     /**
      * Converts string `str` to an integer value. Throws an exception if the
      * string is not a valid integer.
@@ -347,7 +345,9 @@ class kstream {
     void init();
     void exceptions_enable() const;
 
-    static void unsigned_to_decimal(uint64_t number, char *buffer);
+    static void unsigned_to_decimal(uint64_t number, char *buf, std::size_t &buf_contents_start);
+    static std::string to_string_signed(int64_t val);
+    static std::string to_string_unsigned(uint64_t val);
 
 #ifdef KS_STR_ENCODING_WIN32API
     enum {
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -8,6 +8,18 @@ include(../Common.cmake)
 
 target_include_directories(unittest PRIVATE ${CMAKE_SOURCE_DIR})
 
+target_compile_options(unittest PRIVATE
+    $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:
+        # At the time of writing, we use a variadic `SETUP_STREAM(...)` macro everywhere
+        # in `unittest.cpp`. However, variadic macros are only available since C++11, so
+        # the `#define SETUP_STREAM(...)` line raises a warning `anonymous variadic macros
+        # were introduced in C++11` when compiling with `-std=c++98`. Since we also use
+        # `-Werror`, this warning turns into an error and blocks compilation. Therefore,
+        # we change this specific diagnostic back to a non-fatal warning.
+        -Wno-error=variadic-macros
+    >
+)
+
 # Link the test executable with the main library and the test framework/library
 target_link_libraries(unittest PRIVATE kaitai_struct_cpp_stl_runtime GTest::GTest GTest::Main)
 
diff --git a/tests/unittest.cpp b/tests/unittest.cpp