Skip to content

to_string(): replace template with 6 overloads, simplify #76

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Mar 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .build/gtest-v1.8.1-fix-Werror-maybe-uninitialized.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
diff --git a/googletest/src/gtest-death-test.cc b/googletest/src/gtest-death-test.cc
index 0908355..d6f492e 100644
--- a/googletest/src/gtest-death-test.cc
+++ b/googletest/src/gtest-death-test.cc
@@ -1212,14 +1212,14 @@ static int ExecDeathTestChildMain(void* child_arg) {
static void StackLowerThanAddress(const void* ptr,
bool* result) GTEST_NO_INLINE_;
static void StackLowerThanAddress(const void* ptr, bool* result) {
- int dummy;
+ int dummy = 0;
*result = (&dummy < ptr);
}

// Make sure AddressSanitizer does not tamper with the stack here.
GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
static bool StackGrowsDown() {
- int dummy;
+ int dummy = 0;
bool result;
StackLowerThanAddress(&dummy, &result);
return result;
51 changes: 50 additions & 1 deletion .build/install-gtest
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,51 @@ set -ef
#
# See https://github.com/google/googletest/blob/d83fee138a9ae6cb7c03688a2d08d4043a39815d/googletest/README.md#build-with-cmake

gtest_version_tag=v1.14.0
# https://unix.stackexchange.com/a/706411
script_dir=$(CDPATH='' cd -- "$(dirname -- "$0")" && pwd)

usage()
{
echo "Usage: $0 <cpp-standard>"
echo
echo ' <cpp-standard> C++ standard to target (supported: 98, 11, any number between 14 and 26 inclusive)'
}

die()
{
printf '%s\n' "$1" >&2
printf '\n%s\n' "$(usage)" >&2
exit 1
}

cpp_standard=$1
if [ -z "$cpp_standard" ]; then
die 'Error: <cpp-standard> must not be empty'
fi

case $cpp_standard in
98)
# GoogleTest v1.8.1 seems to be the last version supporting C++98,
# see https://github.com/google/googletest/releases/tag/release-1.8.1
gtest_version_tag=release-1.8.1
;;
11)
# GoogleTest v1.12.1 seems to be the last version supporting C++11,
# see https://github.com/google/googletest/releases/tag/release-1.12.1
gtest_version_tag=release-1.12.1
;;
*)
if [ "$cpp_standard" -ge 14 ] && [ "$cpp_standard" -le 26 ]; then
# GoogleTest v1.16.0 (the latest version at the time of writing) requires at
# least C++14, see https://github.com/google/googletest/releases/tag/v1.16.0
gtest_version_tag=v1.16.0
else
die "Error: unsupported value $1 of <cpp-standard>"
fi
;;
esac

echo "Installing GoogleTest $gtest_version_tag"

temp_dir=$(mktemp -d)
cleanup()
Expand All @@ -20,6 +64,11 @@ trap 'cleanup' INT TERM HUP EXIT
# Download
git clone --depth 1 -b "$gtest_version_tag" -- https://github.com/google/googletest.git "$temp_dir"
cd -- "$temp_dir"
if [ "$gtest_version_tag" = release-1.8.1 ]; then
# We must apply a patch to fix `-Werror=maybe-uninitialized` (see
# https://github.com/google/googletest/issues/3219), otherwise it won't compile
git apply -- "$script_dir"/gtest-v1.8.1-fix-Werror-maybe-uninitialized.patch
fi
mkdir build
cd build
# Configure - build only GoogleTest, not GoogleMock
Expand Down
6 changes: 4 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Install GoogleTest
run: .build/install-gtest
env:
CPP_STANDARD: ${{ matrix.cpp-standard }}
run: .build/install-gtest "$CPP_STANDARD"
- name: Update package lists
run: sudo apt-get update
- name: Install packages
Expand Down Expand Up @@ -123,7 +125,7 @@ jobs:
pkg info

echo '#### Installing GoogleTest'
.build/install-gtest
.build/install-gtest 11

echo '#### Building'
.build/build -DCMAKE_CXX_STANDARD=11 -DCMAKE_CXX_STANDARD_REQUIRED=ON -DCMAKE_CXX_EXTENSIONS=OFF
Expand Down
68 changes: 57 additions & 11 deletions kaitai/kaitaistream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -791,20 +791,66 @@ int kaitai::kstream::mod(int a, int b) {
return r;
}

void kaitai::kstream::unsigned_to_decimal(uint64_t number, char *buffer) {
// Implementation from https://ideone.com/nrQfA8 by Alf P. Steinbach
void kaitai::kstream::unsigned_to_decimal(uint64_t number, char *buf, std::size_t &buf_contents_start) {
// Implementation inspired by https://ideone.com/nrQfA8 by Alf P. Steinbach
// (see https://vitaut.net/posts/2013/integer-to-string-conversion-in-cplusplus/)
if (number == 0) {
*buffer++ = '0';
do {
buf[--buf_contents_start] = static_cast<char>('0' + (number % 10));
number /= 10;
} while (number != 0);
}

std::string kaitai::kstream::to_string_signed(int64_t val) {
// `digits10 + 2` because of minus sign + leading digit (NB: null terminator is not used)
char buf[std::numeric_limits<int64_t>::digits10 + 2];
std::size_t buf_contents_start = sizeof(buf);
if (val < 0) {
// NB: `val` is negative and we need to get its absolute value (i.e. minus `val`). However, since
// `int64_t` uses two's complement representation, its range is `[-2**63, 2**63 - 1] =
// [-0x8000_0000_0000_0000, 0x7fff_ffff_ffff_ffff]` (both ends inclusive) and thus the naive
// `-val` operation will overflow for `val = std::numeric_limits<int64_t>::min() =
// -0x8000_0000_0000_0000` (because the result of `-val` is mathematically
// `-(-0x8000_0000_0000_0000) = 0x8000_0000_0000_0000`, but the `int64_t` type can represent at
// most `0x7fff_ffff_ffff_ffff`). And signed integer overflow is undefined behavior in C++.
//
// To avoid undefined behavior for `val = -0x8000_0000_0000_0000 = -2**63`, we do the following
// steps for all negative `val`s:
//
// 1. Convert the signed (and negative) `val` to an unsigned `uint64_t` type. This is a
// well-defined operation in C++: the resulting `uint64_t` value will be `val mod 2**64` (`mod`
// is modulo). The maximum `val` we can have here is `-1` (because `val < 0`), a theoretical
// minimum we are able to support would be `-2**64 + 1 = -0xffff_ffff_ffff_ffff` (even though
// in practice the widest standard type is `int64_t` with the minimum of `-2**63`):
//
// * `static_cast<uint64_t>(-1) = -1 mod 2**64 = 2**64 + (-1) = 0xffff_ffff_ffff_ffff = 2**64 - 1`
// * `static_cast<uint64_t>(-2**64 + 1) = (-2**64 + 1) mod 2**64 = 2**64 + (-2**64 + 1) = 1`
//
// 2. Subtract `static_cast<uint64_t>(val)` from `2**64 - 1 = 0xffff_ffff_ffff_ffff`. Since
// `static_cast<uint64_t>(val)` is in range `[1, 2**64 - 1]` (see step 1), the result of this
// subtraction will be mathematically in range `[0, (2**64 - 1) - 1] = [0, 2**64 - 2]`. So the
// mathematical result cannot be negative, hence this unsigned integer subtraction can never
// wrap around (which wouldn't be a good thing to rely upon because it confuses programmers and
// code analysis tools).
//
// 3. Since we did mathematically `(2**64 - 1) - (2**64 + val) = -val - 1` so far (and we wanted
// to do `-val`), we add `1` to correct that. From step 2 we know that the result of `-val - 1`
// is in range `[0, 2**64 - 2]`, so adding `1` will not wrap (at most we could get `2**64 - 1 =
// 0xffff_ffff_ffff_ffff`, which is still in the valid range of `uint64_t`).
unsigned_to_decimal((std::numeric_limits<uint64_t>::max() - static_cast<uint64_t>(val)) + 1, buf, buf_contents_start);

buf[--buf_contents_start] = '-';
} else {
char *p_first = buffer;
while (number != 0) {
*buffer++ = static_cast<char>('0' + number % 10);
number /= 10;
}
std::reverse(p_first, buffer);
unsigned_to_decimal(static_cast<uint64_t>(val), buf, buf_contents_start);
}
*buffer = '\0';
return std::string(&buf[buf_contents_start], sizeof(buf) - buf_contents_start);
}

std::string kaitai::kstream::to_string_unsigned(uint64_t val) {
// `digits10 + 1` because of leading digit (NB: null terminator is not used)
char buf[std::numeric_limits<uint64_t>::digits10 + 1];
std::size_t buf_contents_start = sizeof(buf);
unsigned_to_decimal(val, buf, buf_contents_start);
return std::string(&buf[buf_contents_start], sizeof(buf) - buf_contents_start);
}

int64_t kaitai::kstream::string_to_int(const std::string& str, int base) {
Expand Down
124 changes: 62 additions & 62 deletions kaitai/kaitaistream.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,10 @@
#include <stdint.h> // int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t

#include <ios> // std::streamsize, forward declaration of std::istream // IWYU pragma: keep
#include <limits> // std::numeric_limits
#include <cstddef> // std::size_t
#include <sstream> // std::istringstream // IWYU pragma: keep
#include <string> // std::string

#ifdef KAITAI_STREAM_H_CPP11_SUPPORT
#include <type_traits> // std::enable_if, std::is_integral
#endif

namespace kaitai {

/**
Expand Down Expand Up @@ -232,70 +228,72 @@ class kstream {
*/
static int mod(int a, int b);

// NB: the following 6 overloads of `to_string` are exactly the ones that
// [`std::to_string`](https://en.cppreference.com/w/cpp/string/basic_string/to_string) has.
// Testing has shown that they are all necessary: if you remove any of them, you will get
// something like `error: call to 'to_string' is ambiguous` when trying to call `to_string`
// with the integer type for which you removed the overload.

/**
* Converts given integer `val` to a decimal string representation.
* Should be used in place of `std::to_string(int)` (which is available only
* since C++11) in older C++ implementations.
*/
static std::string to_string(int val) {
return to_string_signed(val);
}

/**
* Converts given integer `val` to a decimal string representation.
* Should be used in place of std::to_string() (which is available only
* Should be used in place of `std::to_string(long)` (which is available only
* since C++11) in older C++ implementations.
*/
template<typename I>
static std::string to_string(long val) {
return to_string_signed(val);
}

// The `long long` type is only available since C++11, so we use it only in C++11 mode.
#ifdef KAITAI_STREAM_H_CPP11_SUPPORT
// https://stackoverflow.com/a/27913885
typename std::enable_if<
std::is_integral<I>::value &&
// check if we don't have something too large like GCC's `__int128_t`
std::numeric_limits<I>::max() >= 0 &&
std::numeric_limits<I>::max() <= std::numeric_limits<uint64_t>::max(),
std::string
>::type
#else
std::string
/**
* Converts given integer `val` to a decimal string representation.
* Should be used in place of `std::to_string(long long)` (which is available only
* since C++11) in older C++ implementations.
*/
static std::string to_string(long long val) {
return to_string_signed(val);
}
#endif
static to_string(I val) {
// in theory, `digits10 + 3` would be enough (minus sign + leading digit
// + null terminator), but let's add a little more to be safe
char buf[std::numeric_limits<I>::digits10 + 5];
if (val < 0) {
buf[0] = '-';

// NB: `val` is negative and we need to get its absolute value (i.e. minus `val`). However, since
// `int64_t` uses two's complement representation, its range is `[-2**63, 2**63 - 1] =
// [-0x8000_0000_0000_0000, 0x7fff_ffff_ffff_ffff]` (both ends inclusive) and thus the naive
// `-val` operation will overflow for `val = std::numeric_limits<int64_t>::min() =
// -0x8000_0000_0000_0000` (because the result of `-val` is mathematically
// `-(-0x8000_0000_0000_0000) = 0x8000_0000_0000_0000`, but the `int64_t` type can represent at
// most `0x7fff_ffff_ffff_ffff`). And signed integer overflow is undefined behavior in C++.
//
// To avoid undefined behavior for `val = -0x8000_0000_0000_0000 = -2**63`, we do the following
// steps for all negative `val`s:
//
// 1. Convert the signed (and negative) `val` to an unsigned `uint64_t` type. This is a
// well-defined operation in C++: the resulting `uint64_t` value will be `val mod 2**64` (`mod`
// is modulo). The maximum `val` we can have here is `-1` (because `val < 0`), a theoretical
// minimum we are able to support would be `-2**64 + 1 = -0xffff_ffff_ffff_ffff` (even though
// in practice the widest standard type is `int64_t` with the minimum of `-2**63`):
//
// * `static_cast<uint64_t>(-1) = -1 mod 2**64 = 2**64 + (-1) = 0xffff_ffff_ffff_ffff = 2**64 - 1`
// * `static_cast<uint64_t>(-2**64 + 1) = (-2**64 + 1) mod 2**64 = 2**64 + (-2**64 + 1) = 1`
//
// 2. Subtract `static_cast<uint64_t>(val)` from `2**64 - 1 = 0xffff_ffff_ffff_ffff`. Since
// `static_cast<uint64_t>(val)` is in range `[1, 2**64 - 1]` (see step 1), the result of this
// subtraction will be mathematically in range `[0, (2**64 - 1) - 1] = [0, 2**64 - 2]`. So the
// mathematical result cannot be negative, hence this unsigned integer subtraction can never
// wrap around (which wouldn't be a good thing to rely upon because it confuses programmers and
// code analysis tools).
//
// 3. Since we did mathematically `(2**64 - 1) - (2**64 + val) = -val - 1` so far (and we wanted
// to do `-val`), we add `1` to correct that. From step 2 we know that the result of `-val - 1`
// is in range `[0, 2**64 - 2]`, so adding `1` will not wrap (at most we could get `2**64 - 1 =
// 0xffff_ffff_ffff_ffff`, which is still in the valid range of `uint64_t`).

unsigned_to_decimal((std::numeric_limits<uint64_t>::max() - static_cast<uint64_t>(val)) + 1, &buf[1]);
} else {
unsigned_to_decimal(val, buf);
}
return std::string(buf);

/**
* Converts given integer `val` to a decimal string representation.
* Should be used in place of `std::to_string(unsigned)` (which is available only
* since C++11) in older C++ implementations.
*/
static std::string to_string(unsigned val) {
return to_string_unsigned(val);
}

/**
* Converts given integer `val` to a decimal string representation.
* Should be used in place of `std::to_string(unsigned long)` (which is available only
* since C++11) in older C++ implementations.
*/
static std::string to_string(unsigned long val) {
return to_string_unsigned(val);
}

// The `unsigned long long` type is only available since C++11, so we use it only in C++11 mode.
#ifdef KAITAI_STREAM_H_CPP11_SUPPORT
/**
* Converts given integer `val` to a decimal string representation.
* Should be used in place of `std::to_string(unsigned long long)` (which is available only
* since C++11) in older C++ implementations.
*/
static std::string to_string(unsigned long long val) {
return to_string_unsigned(val);
}
#endif

/**
* Converts string `str` to an integer value. Throws an exception if the
* string is not a valid integer.
Expand Down Expand Up @@ -347,7 +345,9 @@ class kstream {
void init();
void exceptions_enable() const;

static void unsigned_to_decimal(uint64_t number, char *buffer);
static void unsigned_to_decimal(uint64_t number, char *buf, std::size_t &buf_contents_start);
static std::string to_string_signed(int64_t val);
static std::string to_string_unsigned(uint64_t val);

#ifdef KS_STR_ENCODING_WIN32API
enum {
Expand Down
12 changes: 12 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,18 @@ include(../Common.cmake)

target_include_directories(unittest PRIVATE ${CMAKE_SOURCE_DIR})

target_compile_options(unittest PRIVATE
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:
# At the time of writing, we use a variadic `SETUP_STREAM(...)` macro everywhere
# in `unittest.cpp`. However, variadic macros are only available since C++11, so
# the `#define SETUP_STREAM(...)` line raises a warning `anonymous variadic macros
# were introduced in C++11` when compiling with `-std=c++98`. Since we also use
# `-Werror`, this warning turns into an error and blocks compilation. Therefore,
# we change this specific diagnostic back to a non-fatal warning.
-Wno-error=variadic-macros
>
)

# Link the test executable with the main library and the test framework/library
target_link_libraries(unittest PRIVATE kaitai_struct_cpp_stl_runtime GTest::GTest GTest::Main)

Expand Down
Loading