From 90bd67014557fd38993119d139291f4cc53064af Mon Sep 17 00:00:00 2001 From: Petr Pucil Date: Sun, 23 Feb 2025 21:02:27 +0100 Subject: [PATCH 1/6] to_string(): replace template with 6 overloads, simplify As mentioned in the code comment in `kaitai/kaitaistream.cpp`, this approach follows what `std::to_string` is doing: https://en.cppreference.com/w/cpp/string/basic_string/to_string Avoiding templates and separating signed and unsigned cases has been already suggested by @webbnh in https://github.com/kaitai-io/kaitai_struct_cpp_stl_runtime/pull/50. The motivation for this change is that this new version of `to_string` also accepts values of unscoped enums that can be implicitly converted to integers, just like `std::to_string` does. For example, you can do `kaitai::kstream::to_string(FRUIT_APPLE)` just like you can do `std::to_string(FRUIT_APPLE)`, where `FRUIT_APPLE` is an unscoped enum member: ```cpp enum fruit_t { FRUIT_APPLE = 2, FRUIT_ORANGE = 5 }; ``` This simplifies the translation of expressions like `fruit::apple.to_i.to_s`, because it means that the `enum.to_i` operation can remain a no-op in the KSC: [`CppTranslator.scala:193-194`](https://github.com/kaitai-io/kaitai_struct_compiler/blob/12dbc32226eb9e94b76f803f3c8ff5f8943e5f8d/shared/src/main/scala/io/kaitai/struct/translators/CppTranslator.scala#L193-L194) ```scala override def enumToInt(v: expr, et: EnumType): String = translate(v) ``` Therefore, this change fixes the [EnumToI](https://github.com/kaitai-io/kaitai_struct_tests/blob/af0359aeb87233640fcd85b57850c712b6f03f63/formats/enum_to_i.ksy) and EnumToIInvalid tests for C++11, which currently fail with this error (https://github.com/kaitai-io/ci_artifacts/blob/786a45a62978409fd90fab124b5b1ad1f225a81e/test_out/cpp_stl_11/build-1.log#L1495-L1498): ``` /tests/compiled/cpp_stl_11/enum_to_i.cpp: In member function 'std::string enum_to_i_t::pet_1_i_to_s()': /tests/compiled/cpp_stl_11/enum_to_i.cpp:65:48: error: no matching function for call to 'kaitai::kstream::to_string(enum_to_i_t::animal_t)' 65 | m_pet_1_i_to_s = kaitai::kstream::to_string(pet_1()); | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~ ``` Eventually, we might switch to scoped enums (see https://github.com/kaitai-io/kaitai_struct/issues/959), which are not implicitly convertible to integers, so then it won't be possible to keep the `.to_i` operation implemented as a no-op, but for now (as long as we're using unscoped enums) it works. --- kaitai/kaitaistream.cpp | 98 ++++++++++++++++++++++++++++++++----- kaitai/kaitaistream.h | 106 ++++++++++++++++------------------------ tests/unittest.cpp | 95 ++++++++++++++++++++++++++--------- 3 files changed, 199 insertions(+), 100 deletions(-) diff --git a/kaitai/kaitaistream.cpp b/kaitai/kaitaistream.cpp index f0754fd..40acbe5 100644 --- a/kaitai/kaitaistream.cpp +++ b/kaitai/kaitaistream.cpp @@ -791,20 +791,96 @@ int kaitai::kstream::mod(int a, int b) { return r; } -void kaitai::kstream::unsigned_to_decimal(uint64_t number, char *buffer) { - // Implementation from https://ideone.com/nrQfA8 by Alf P. Steinbach +void kaitai::kstream::unsigned_to_decimal(uint64_t number, char *buf, std::size_t &buf_contents_start) { + // Implementation inspired by https://ideone.com/nrQfA8 by Alf P. Steinbach // (see https://vitaut.net/posts/2013/integer-to-string-conversion-in-cplusplus/) - if (number == 0) { - *buffer++ = '0'; + do { + buf[--buf_contents_start] = static_cast('0' + (number % 10)); + number /= 10; + } while (number != 0); +} + +std::string kaitai::kstream::to_string_signed(int64_t val) { + // `digits10 + 2` because of minus sign + leading digit (NB: null terminator is not used) + char buf[std::numeric_limits::digits10 + 2]; + std::size_t buf_contents_start = sizeof(buf); + if (val < 0) { + // NB: `val` is negative and we need to get its absolute value (i.e. minus `val`). However, since + // `int64_t` uses two's complement representation, its range is `[-2**63, 2**63 - 1] = + // [-0x8000_0000_0000_0000, 0x7fff_ffff_ffff_ffff]` (both ends inclusive) and thus the naive + // `-val` operation will overflow for `val = std::numeric_limits::min() = + // -0x8000_0000_0000_0000` (because the result of `-val` is mathematically + // `-(-0x8000_0000_0000_0000) = 0x8000_0000_0000_0000`, but the `int64_t` type can represent at + // most `0x7fff_ffff_ffff_ffff`). And signed integer overflow is undefined behavior in C++. + // + // To avoid undefined behavior for `val = -0x8000_0000_0000_0000 = -2**63`, we do the following + // steps for all negative `val`s: + // + // 1. Convert the signed (and negative) `val` to an unsigned `uint64_t` type. This is a + // well-defined operation in C++: the resulting `uint64_t` value will be `val mod 2**64` (`mod` + // is modulo). The maximum `val` we can have here is `-1` (because `val < 0`), a theoretical + // minimum we are able to support would be `-2**64 + 1 = -0xffff_ffff_ffff_ffff` (even though + // in practice the widest standard type is `int64_t` with the minimum of `-2**63`): + // + // * `static_cast(-1) = -1 mod 2**64 = 2**64 + (-1) = 0xffff_ffff_ffff_ffff = 2**64 - 1` + // * `static_cast(-2**64 + 1) = (-2**64 + 1) mod 2**64 = 2**64 + (-2**64 + 1) = 1` + // + // 2. Subtract `static_cast(val)` from `2**64 - 1 = 0xffff_ffff_ffff_ffff`. Since + // `static_cast(val)` is in range `[1, 2**64 - 1]` (see step 1), the result of this + // subtraction will be mathematically in range `[0, (2**64 - 1) - 1] = [0, 2**64 - 2]`. So the + // mathematical result cannot be negative, hence this unsigned integer subtraction can never + // wrap around (which wouldn't be a good thing to rely upon because it confuses programmers and + // code analysis tools). + // + // 3. Since we did mathematically `(2**64 - 1) - (2**64 + val) = -val - 1` so far (and we wanted + // to do `-val`), we add `1` to correct that. From step 2 we know that the result of `-val - 1` + // is in range `[0, 2**64 - 2]`, so adding `1` will not wrap (at most we could get `2**64 - 1 = + // 0xffff_ffff_ffff_ffff`, which is still in the valid range of `uint64_t`). + unsigned_to_decimal((std::numeric_limits::max() - static_cast(val)) + 1, buf, buf_contents_start); + + buf[--buf_contents_start] = '-'; } else { - char *p_first = buffer; - while (number != 0) { - *buffer++ = static_cast('0' + number % 10); - number /= 10; - } - std::reverse(p_first, buffer); + unsigned_to_decimal(static_cast(val), buf, buf_contents_start); } - *buffer = '\0'; + return std::string(&buf[buf_contents_start], sizeof(buf) - buf_contents_start); +} + +std::string kaitai::kstream::to_string_unsigned(uint64_t val) { + // `digits10 + 1` because of leading digit (NB: null terminator is not used) + char buf[std::numeric_limits::digits10 + 1]; + std::size_t buf_contents_start = sizeof(buf); + unsigned_to_decimal(val, buf, buf_contents_start); + return std::string(&buf[buf_contents_start], sizeof(buf) - buf_contents_start); +} + +// NB: the following 6 overloads are exactly the ones that +// [`std::to_string`](https://en.cppreference.com/w/cpp/string/basic_string/to_string) has. +// Testing has shown that they are all necessary: if you remove any of them, you will get +// something like `error: call to 'to_string' is ambiguous` when trying to call `to_string` +// with the integer type for which you removed the overload. + +std::string kaitai::kstream::to_string(int val) { + return to_string_signed(val); +} + +std::string kaitai::kstream::to_string(long val) { + return to_string_signed(val); +} + +std::string kaitai::kstream::to_string(long long val) { + return to_string_signed(val); +} + +std::string kaitai::kstream::to_string(unsigned val) { + return to_string_unsigned(val); +} + +std::string kaitai::kstream::to_string(unsigned long val) { + return to_string_unsigned(val); +} + +std::string kaitai::kstream::to_string(unsigned long long val) { + return to_string_unsigned(val); } int64_t kaitai::kstream::string_to_int(const std::string& str, int base) { diff --git a/kaitai/kaitaistream.h b/kaitai/kaitaistream.h index 1a13406..2c6f616 100644 --- a/kaitai/kaitaistream.h +++ b/kaitai/kaitaistream.h @@ -12,14 +12,10 @@ #include // int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t #include // std::streamsize, forward declaration of std::istream // IWYU pragma: keep -#include // std::numeric_limits +#include // std::size_t #include // std::istringstream // IWYU pragma: keep #include // std::string -#ifdef KAITAI_STREAM_H_CPP11_SUPPORT -#include // std::enable_if, std::is_integral -#endif - namespace kaitai { /** @@ -234,67 +230,45 @@ class kstream { /** * Converts given integer `val` to a decimal string representation. - * Should be used in place of std::to_string() (which is available only + * Should be used in place of `std::to_string(int)` (which is available only * since C++11) in older C++ implementations. */ - template -#ifdef KAITAI_STREAM_H_CPP11_SUPPORT - // https://stackoverflow.com/a/27913885 - typename std::enable_if< - std::is_integral::value && - // check if we don't have something too large like GCC's `__int128_t` - std::numeric_limits::max() >= 0 && - std::numeric_limits::max() <= std::numeric_limits::max(), - std::string - >::type -#else - std::string -#endif - static to_string(I val) { - // in theory, `digits10 + 3` would be enough (minus sign + leading digit - // + null terminator), but let's add a little more to be safe - char buf[std::numeric_limits::digits10 + 5]; - if (val < 0) { - buf[0] = '-'; - - // NB: `val` is negative and we need to get its absolute value (i.e. minus `val`). However, since - // `int64_t` uses two's complement representation, its range is `[-2**63, 2**63 - 1] = - // [-0x8000_0000_0000_0000, 0x7fff_ffff_ffff_ffff]` (both ends inclusive) and thus the naive - // `-val` operation will overflow for `val = std::numeric_limits::min() = - // -0x8000_0000_0000_0000` (because the result of `-val` is mathematically - // `-(-0x8000_0000_0000_0000) = 0x8000_0000_0000_0000`, but the `int64_t` type can represent at - // most `0x7fff_ffff_ffff_ffff`). And signed integer overflow is undefined behavior in C++. - // - // To avoid undefined behavior for `val = -0x8000_0000_0000_0000 = -2**63`, we do the following - // steps for all negative `val`s: - // - // 1. Convert the signed (and negative) `val` to an unsigned `uint64_t` type. This is a - // well-defined operation in C++: the resulting `uint64_t` value will be `val mod 2**64` (`mod` - // is modulo). The maximum `val` we can have here is `-1` (because `val < 0`), a theoretical - // minimum we are able to support would be `-2**64 + 1 = -0xffff_ffff_ffff_ffff` (even though - // in practice the widest standard type is `int64_t` with the minimum of `-2**63`): - // - // * `static_cast(-1) = -1 mod 2**64 = 2**64 + (-1) = 0xffff_ffff_ffff_ffff = 2**64 - 1` - // * `static_cast(-2**64 + 1) = (-2**64 + 1) mod 2**64 = 2**64 + (-2**64 + 1) = 1` - // - // 2. Subtract `static_cast(val)` from `2**64 - 1 = 0xffff_ffff_ffff_ffff`. Since - // `static_cast(val)` is in range `[1, 2**64 - 1]` (see step 1), the result of this - // subtraction will be mathematically in range `[0, (2**64 - 1) - 1] = [0, 2**64 - 2]`. So the - // mathematical result cannot be negative, hence this unsigned integer subtraction can never - // wrap around (which wouldn't be a good thing to rely upon because it confuses programmers and - // code analysis tools). - // - // 3. Since we did mathematically `(2**64 - 1) - (2**64 + val) = -val - 1` so far (and we wanted - // to do `-val`), we add `1` to correct that. From step 2 we know that the result of `-val - 1` - // is in range `[0, 2**64 - 2]`, so adding `1` will not wrap (at most we could get `2**64 - 1 = - // 0xffff_ffff_ffff_ffff`, which is still in the valid range of `uint64_t`). - - unsigned_to_decimal((std::numeric_limits::max() - static_cast(val)) + 1, &buf[1]); - } else { - unsigned_to_decimal(val, buf); - } - return std::string(buf); - } + static std::string to_string(int val); + + /** + * Converts given integer `val` to a decimal string representation. + * Should be used in place of `std::to_string(long)` (which is available only + * since C++11) in older C++ implementations. + */ + static std::string to_string(long val); + + /** + * Converts given integer `val` to a decimal string representation. + * Should be used in place of `std::to_string(long long)` (which is available only + * since C++11) in older C++ implementations. + */ + static std::string to_string(long long val); + + /** + * Converts given integer `val` to a decimal string representation. + * Should be used in place of `std::to_string(unsigned)` (which is available only + * since C++11) in older C++ implementations. + */ + static std::string to_string(unsigned val); + + /** + * Converts given integer `val` to a decimal string representation. + * Should be used in place of `std::to_string(unsigned long)` (which is available only + * since C++11) in older C++ implementations. + */ + static std::string to_string(unsigned long val); + + /** + * Converts given integer `val` to a decimal string representation. + * Should be used in place of `std::to_string(unsigned long long)` (which is available only + * since C++11) in older C++ implementations. + */ + static std::string to_string(unsigned long long val); /** * Converts string `str` to an integer value. Throws an exception if the @@ -347,7 +321,9 @@ class kstream { void init(); void exceptions_enable() const; - static void unsigned_to_decimal(uint64_t number, char *buffer); + static void unsigned_to_decimal(uint64_t number, char *buf, std::size_t &buf_contents_start); + static std::string to_string_signed(int64_t val); + static std::string to_string_unsigned(uint64_t val); #ifdef KS_STR_ENCODING_WIN32API enum { diff --git a/tests/unittest.cpp b/tests/unittest.cpp index 283fb73..028a01b 100644 --- a/tests/unittest.cpp +++ b/tests/unittest.cpp @@ -66,52 +66,99 @@ TEST(KaitaiStreamTest, to_string) EXPECT_EQ(kaitai::kstream::to_string(-123), "-123"); } -TEST(KaitaiStreamTest, to_string_uint8) +// Since `kstream::to_string` must have several overloads (just like +// [`std::to_string`](https://en.cppreference.com/w/cpp/string/basic_string/to_string)) to +// cover all [standard integer +// types](https://en.cppreference.com/w/cpp/language/types#Properties) while avoiding +// templates, it's a good idea to test whether it actually works with each standard +// integer type. If even just one of the 6 required overloads is missing or not working, +// these tests should be able to detect it. +// +// We test the standard integer types (keywords), not [fixed width integer +// types](https://en.cppreference.com/w/cpp/header/cstdint) (like `int32_t`), because then +// we could potentially have a blind spot: `int32_t` tends to be almost universally +// equivalent to `int`, but `int64_t` is either `long` (typically on 64-bit Linux) or +// `long long` (typically on 64-bit Windows) but not both. So I believe that using +// standard integer types gives us better coverage. + +TEST(KaitaiStreamTest, to_string_unsigned_char) { - EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::min()), "0"); - EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::max()), "255"); + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::min()), "0"); + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::max()), "255"); } -TEST(KaitaiStreamTest, to_string_int8) +TEST(KaitaiStreamTest, to_string_signed_char) { - EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::min()), "-128"); - EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::max()), "127"); + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::min()), "-128"); + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::max()), "127"); } -TEST(KaitaiStreamTest, to_string_uint16) +TEST(KaitaiStreamTest, to_string_unsigned_short) { - EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::min()), "0"); - EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::max()), "65535"); + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::min()), "0"); + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::max()), "65535"); } -TEST(KaitaiStreamTest, to_string_int16) +TEST(KaitaiStreamTest, to_string_short) { - EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::min()), "-32768"); - EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::max()), "32767"); + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::min()), "-32768"); + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::max()), "32767"); } -TEST(KaitaiStreamTest, to_string_uint32) +TEST(KaitaiStreamTest, to_string_unsigned) { - EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::min()), "0"); - EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::max()), "4294967295"); + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::min()), "0"); + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::max()), "4294967295"); } -TEST(KaitaiStreamTest, to_string_int32) +TEST(KaitaiStreamTest, to_string_int) { - EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::min()), "-2147483648"); - EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::max()), "2147483647"); + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::min()), "-2147483648"); + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::max()), "2147483647"); +} + +#ifdef _MSC_VER +#pragma warning(push) +// Disable `warning C4127: conditional expression is constant` +// (see https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-4-c4127?view=msvc-170) +#pragma warning(disable: 4127) +#endif + +TEST(KaitaiStreamTest, to_string_unsigned_long) +{ + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::min()), "0"); + if (sizeof(unsigned long) == 4) { + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::max()), "4294967295"); + } else { + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::max()), "18446744073709551615"); + } } -TEST(KaitaiStreamTest, to_string_uint64) +TEST(KaitaiStreamTest, to_string_long) +{ + if (sizeof(long) == 4) { + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::min()), "-2147483648"); + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::max()), "2147483647"); + } else { + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::min()), "-9223372036854775808"); + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::max()), "9223372036854775807"); + } +} + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +TEST(KaitaiStreamTest, to_string_unsigned_long_long) { - EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::min()), "0"); - EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::max()), "18446744073709551615"); + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::min()), "0"); + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::max()), "18446744073709551615"); } -TEST(KaitaiStreamTest, to_string_int64) +TEST(KaitaiStreamTest, to_string_long_long) { - EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::min()), "-9223372036854775808"); - EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::max()), "9223372036854775807"); + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::min()), "-9223372036854775808"); + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::max()), "9223372036854775807"); } TEST(KaitaiStreamTest, string_to_int) From 3d712193cace9c95bfc9a804f1423c4e634ac135 Mon Sep 17 00:00:00 2001 From: Petr Pucil Date: Sun, 2 Mar 2025 23:51:12 +0100 Subject: [PATCH 2/6] to_string(): define `long long` overloads only in C++11 and later MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we get errors like this when compiling with `-std=c++98`: ``` In file included from /home/runner/work/kaitai_struct_cpp_stl_runtime/kaitai_struct_cpp_stl_runtime/kaitai/kaitaistream.cpp:1: /home/runner/work/kaitai_struct_cpp_stl_runtime/kaitai_struct_cpp_stl_runtime/kaitai/kaitaistream.h:250:39: error: ISO C++ 1998 does not support ‘long long’ [-Werror=long-long] 250 | static std::string to_string(long long val); | ^~~~ ``` --- kaitai/kaitaistream.cpp | 4 ++++ kaitai/kaitaistream.h | 6 ++++++ tests/unittest.cpp | 16 ++++++++++++++++ 3 files changed, 26 insertions(+) diff --git a/kaitai/kaitaistream.cpp b/kaitai/kaitaistream.cpp index 40acbe5..613e9e2 100644 --- a/kaitai/kaitaistream.cpp +++ b/kaitai/kaitaistream.cpp @@ -867,9 +867,11 @@ std::string kaitai::kstream::to_string(long val) { return to_string_signed(val); } +#ifdef KAITAI_STREAM_H_CPP11_SUPPORT std::string kaitai::kstream::to_string(long long val) { return to_string_signed(val); } +#endif std::string kaitai::kstream::to_string(unsigned val) { return to_string_unsigned(val); @@ -879,9 +881,11 @@ std::string kaitai::kstream::to_string(unsigned long val) { return to_string_unsigned(val); } +#ifdef KAITAI_STREAM_H_CPP11_SUPPORT std::string kaitai::kstream::to_string(unsigned long long val) { return to_string_unsigned(val); } +#endif int64_t kaitai::kstream::string_to_int(const std::string& str, int base) { char *str_end; diff --git a/kaitai/kaitaistream.h b/kaitai/kaitaistream.h index 2c6f616..2549399 100644 --- a/kaitai/kaitaistream.h +++ b/kaitai/kaitaistream.h @@ -242,12 +242,15 @@ class kstream { */ static std::string to_string(long val); +// The `long long` type is only available since C++11, so we use it only in C++11 mode. +#ifdef KAITAI_STREAM_H_CPP11_SUPPORT /** * Converts given integer `val` to a decimal string representation. * Should be used in place of `std::to_string(long long)` (which is available only * since C++11) in older C++ implementations. */ static std::string to_string(long long val); +#endif /** * Converts given integer `val` to a decimal string representation. @@ -263,12 +266,15 @@ class kstream { */ static std::string to_string(unsigned long val); +// The `unsigned long long` type is only available since C++11, so we use it only in C++11 mode. +#ifdef KAITAI_STREAM_H_CPP11_SUPPORT /** * Converts given integer `val` to a decimal string representation. * Should be used in place of `std::to_string(unsigned long long)` (which is available only * since C++11) in older C++ implementations. */ static std::string to_string(unsigned long long val); +#endif /** * Converts string `str` to an integer value. Throws an exception if the diff --git a/tests/unittest.cpp b/tests/unittest.cpp index 028a01b..58eec6c 100644 --- a/tests/unittest.cpp +++ b/tests/unittest.cpp @@ -149,6 +149,8 @@ TEST(KaitaiStreamTest, to_string_long) #pragma warning(pop) #endif +// The `long long` type is only available since C++11, so we use it only in C++11 mode. +#ifdef KAITAI_STREAM_H_CPP11_SUPPORT TEST(KaitaiStreamTest, to_string_unsigned_long_long) { EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::min()), "0"); @@ -160,6 +162,20 @@ TEST(KaitaiStreamTest, to_string_long_long) EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::min()), "-9223372036854775808"); EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::max()), "9223372036854775807"); } +#else +// Make sure we still support 64-bit integers. +TEST(KaitaiStreamTest, to_string_uint64) +{ + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::min()), "0"); + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::max()), "18446744073709551615"); +} + +TEST(KaitaiStreamTest, to_string_int64) +{ + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::min()), "-9223372036854775808"); + EXPECT_EQ(kaitai::kstream::to_string(std::numeric_limits::max()), "9223372036854775807"); +} +#endif TEST(KaitaiStreamTest, string_to_int) { From 4bc5896414023207e908cd2811293390149c44ef Mon Sep 17 00:00:00 2001 From: Petr Pucil Date: Mon, 3 Mar 2025 00:28:16 +0100 Subject: [PATCH 3/6] GH Actions: select GoogleTest version based on C++ standard If we always use [GoogleTest v1.14.0](https://github.com/google/googletest/releases/tag/v1.14.0) (which declares that it requires at least C++14) even for `-DCMAKE_CXX_STANDARD=98`, then only `kaitaistream.cpp` gets a `-std=c++98` option, while `unittest.cpp` is compiled under `-std=c++14`. This is a problem, because then neither `tests/unittest.cpp` nor `kaitai/kaitaistream.h` included from `tests/unittest.cpp` know that the `kaitaistream` library was compiled without C++11 support and try to use it, which doesn't work. Not to mention that such mixing of C++ standards doesn't make much sense if we want to verify that we are compatible with a particular version of the C++ standard. --- ...1.8.1-fix-Werror-maybe-uninitialized.patch | 21 ++++++++ .build/install-gtest | 53 ++++++++++++++++++- .github/workflows/build.yml | 6 ++- tests/CMakeLists.txt | 12 +++++ tests/unittest.cpp | 7 ++- 5 files changed, 95 insertions(+), 4 deletions(-) create mode 100644 .build/gtest-v1.8.1-fix-Werror-maybe-uninitialized.patch diff --git a/.build/gtest-v1.8.1-fix-Werror-maybe-uninitialized.patch b/.build/gtest-v1.8.1-fix-Werror-maybe-uninitialized.patch new file mode 100644 index 0000000..e72c9d1 --- /dev/null +++ b/.build/gtest-v1.8.1-fix-Werror-maybe-uninitialized.patch @@ -0,0 +1,21 @@ +diff --git a/googletest/src/gtest-death-test.cc b/googletest/src/gtest-death-test.cc +index 0908355..d6f492e 100644 +--- a/googletest/src/gtest-death-test.cc ++++ b/googletest/src/gtest-death-test.cc +@@ -1212,14 +1212,14 @@ static int ExecDeathTestChildMain(void* child_arg) { + static void StackLowerThanAddress(const void* ptr, + bool* result) GTEST_NO_INLINE_; + static void StackLowerThanAddress(const void* ptr, bool* result) { +- int dummy; ++ int dummy = 0; + *result = (&dummy < ptr); + } + + // Make sure AddressSanitizer does not tamper with the stack here. + GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_ + static bool StackGrowsDown() { +- int dummy; ++ int dummy = 0; + bool result; + StackLowerThanAddress(&dummy, &result); + return result; diff --git a/.build/install-gtest b/.build/install-gtest index c5fec57..9f9009f 100755 --- a/.build/install-gtest +++ b/.build/install-gtest @@ -5,7 +5,53 @@ set -ef # # See https://github.com/google/googletest/blob/d83fee138a9ae6cb7c03688a2d08d4043a39815d/googletest/README.md#build-with-cmake -gtest_version_tag=v1.14.0 +# https://unix.stackexchange.com/a/706411 +script_dir=$(CDPATH='' cd -- "$(dirname -- "$0")" && pwd) + +usage() +{ + echo "Usage: $0 " + echo + echo ' C++ standard to target (supported: 98, 11, 14 and higher)' +} + +die() +{ + printf '%s\n' "$1" >&2 + printf '\n%s\n' "$(usage)" >&2 + exit 1 +} + +cpp_standard=$1 +if [ -z "$cpp_standard" ]; then + die 'Error: must not be empty' +fi + +case $cpp_standard in + 98) + # GoogleTest v1.8.1 seems to be the last version supporting C++98, + # see https://github.com/google/googletest/releases/tag/release-1.8.1 + gtest_version_tag=release-1.8.1 + ;; + 11) + # GoogleTest v1.12.1 seems to be the last version supporting C++11, + # see https://github.com/google/googletest/releases/tag/release-1.12.1 + gtest_version_tag=release-1.12.1 + ;; + *) + if [ "$cpp_standard" -ge 14 ]; then + # GoogleTest v1.16.0 (the latest version at the time of writing) requires at + # least C++14, see https://github.com/google/googletest/releases/tag/v1.16.0 + gtest_version_tag=v1.16.0 + else + echo "Error: unsupported value $1 of " >&2 + printf '\n%s\n' "$(usage)" >&2 + exit 1 + fi + ;; +esac + +echo "Installing GoogleTest $gtest_version_tag" temp_dir=$(mktemp -d) cleanup() @@ -20,6 +66,11 @@ trap 'cleanup' INT TERM HUP EXIT # Download git clone --depth 1 -b "$gtest_version_tag" -- https://github.com/google/googletest.git "$temp_dir" cd -- "$temp_dir" +if [ "$gtest_version_tag" = release-1.8.1 ]; then + # We must apply a patch to fix `-Werror=maybe-uninitialized` (see + # https://github.com/google/googletest/issues/3219), otherwise it won't compile + git apply -- "$script_dir"/gtest-v1.8.1-fix-Werror-maybe-uninitialized.patch +fi mkdir build cd build # Configure - build only GoogleTest, not GoogleMock diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9b2a4a0..c7b0798 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -19,7 +19,9 @@ jobs: steps: - uses: actions/checkout@v4 - name: Install GoogleTest - run: .build/install-gtest + env: + CPP_STANDARD: ${{ matrix.cpp-standard }} + run: .build/install-gtest "$CPP_STANDARD" - name: Update package lists run: sudo apt-get update - name: Install packages @@ -123,7 +125,7 @@ jobs: pkg info echo '#### Installing GoogleTest' - .build/install-gtest + .build/install-gtest 11 echo '#### Building' .build/build -DCMAKE_CXX_STANDARD=11 -DCMAKE_CXX_STANDARD_REQUIRED=ON -DCMAKE_CXX_EXTENSIONS=OFF diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 1f91888..39e3c17 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -8,6 +8,18 @@ include(../Common.cmake) target_include_directories(unittest PRIVATE ${CMAKE_SOURCE_DIR}) +target_compile_options(unittest PRIVATE + $<$>: + # At the time of writing, we use a variadic `SETUP_STREAM(...)` macro everywhere + # in `unittest.cpp`. However, variadic macros are only available since C++11, so + # the `#define SETUP_STREAM(...)` line raises a warning `anonymous variadic macros + # were introduced in C++11` when compiling with `-std=c++98`. Since we also use + # `-Werror`, this warning turns into an error and blocks compilation. Therefore, + # we change this specific diagnostic back to a non-fatal warning. + -Wno-error=variadic-macros + > +) + # Link the test executable with the main library and the test framework/library target_link_libraries(unittest PRIVATE kaitai_struct_cpp_stl_runtime GTest::GTest GTest::Main) diff --git a/tests/unittest.cpp b/tests/unittest.cpp index 58eec6c..38711fa 100644 --- a/tests/unittest.cpp +++ b/tests/unittest.cpp @@ -1,7 +1,12 @@ #ifdef GTEST_NANO #include "tests/gtest-nano.h" #else -#include "gtest/gtest.h" +// These IWYU pragmas are needed for versions of GoogleTest older than 1.12, +// see https://github.com/kaitai-io/kaitai_struct_cpp_stl_runtime/pull/72#issuecomment-2093287161 +#include "gtest/gtest.h" // IWYU pragma: keep +// IWYU pragma: no_include +// IWYU pragma: no_include +// IWYU pragma: no_include "gtest/gtest_pred_impl.h" #endif #include "kaitai/kaitaistream.h" From f592fc032a3aac6e4a134c84e6a62d045f794d74 Mon Sep 17 00:00:00 2001 From: Petr Pucil Date: Mon, 3 Mar 2025 00:48:50 +0100 Subject: [PATCH 4/6] .build/install-gtest: call the `die` function for consistency --- .build/install-gtest | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.build/install-gtest b/.build/install-gtest index 9f9009f..2abb8fd 100755 --- a/.build/install-gtest +++ b/.build/install-gtest @@ -44,9 +44,7 @@ case $cpp_standard in # least C++14, see https://github.com/google/googletest/releases/tag/v1.16.0 gtest_version_tag=v1.16.0 else - echo "Error: unsupported value $1 of " >&2 - printf '\n%s\n' "$(usage)" >&2 - exit 1 + die "Error: unsupported value $1 of " fi ;; esac From 850bd27592d484aad523d7e444bea2a1d4834b22 Mon Sep 17 00:00:00 2001 From: Petr Pucil Date: Mon, 3 Mar 2025 13:27:46 +0100 Subject: [PATCH 5/6] .build/install-gtest: accept at most the C++26 standard --- .build/install-gtest | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.build/install-gtest b/.build/install-gtest index 2abb8fd..e67dd40 100755 --- a/.build/install-gtest +++ b/.build/install-gtest @@ -12,7 +12,7 @@ usage() { echo "Usage: $0 " echo - echo ' C++ standard to target (supported: 98, 11, 14 and higher)' + echo ' C++ standard to target (supported: 98, 11, any number between 14 and 26 inclusive)' } die() @@ -39,7 +39,7 @@ case $cpp_standard in gtest_version_tag=release-1.12.1 ;; *) - if [ "$cpp_standard" -ge 14 ]; then + if [ "$cpp_standard" -ge 14 ] && [ "$cpp_standard" -le 26 ]; then # GoogleTest v1.16.0 (the latest version at the time of writing) requires at # least C++14, see https://github.com/google/googletest/releases/tag/v1.16.0 gtest_version_tag=v1.16.0 From cb1c7b1635264dfc4421713e437f6856965db132 Mon Sep 17 00:00:00 2001 From: Petr Pucil Date: Thu, 6 Mar 2025 11:22:21 +0100 Subject: [PATCH 6/6] to_string(): implement overloads in the header .h file (not .cpp) This is more foolproof, because it allows you to compile the `kaitaistream.cpp` library under `-std=c++98` and link to it from an application compiled under `-std=c++11` (and everything will still work fine). This was not possible before, see 4bc5896414023207e908cd2811293390149c44ef. --- kaitai/kaitaistream.cpp | 34 ---------------------------------- kaitai/kaitaistream.h | 30 ++++++++++++++++++++++++------ 2 files changed, 24 insertions(+), 40 deletions(-) diff --git a/kaitai/kaitaistream.cpp b/kaitai/kaitaistream.cpp index 613e9e2..2ad14a4 100644 --- a/kaitai/kaitaistream.cpp +++ b/kaitai/kaitaistream.cpp @@ -853,40 +853,6 @@ std::string kaitai::kstream::to_string_unsigned(uint64_t val) { return std::string(&buf[buf_contents_start], sizeof(buf) - buf_contents_start); } -// NB: the following 6 overloads are exactly the ones that -// [`std::to_string`](https://en.cppreference.com/w/cpp/string/basic_string/to_string) has. -// Testing has shown that they are all necessary: if you remove any of them, you will get -// something like `error: call to 'to_string' is ambiguous` when trying to call `to_string` -// with the integer type for which you removed the overload. - -std::string kaitai::kstream::to_string(int val) { - return to_string_signed(val); -} - -std::string kaitai::kstream::to_string(long val) { - return to_string_signed(val); -} - -#ifdef KAITAI_STREAM_H_CPP11_SUPPORT -std::string kaitai::kstream::to_string(long long val) { - return to_string_signed(val); -} -#endif - -std::string kaitai::kstream::to_string(unsigned val) { - return to_string_unsigned(val); -} - -std::string kaitai::kstream::to_string(unsigned long val) { - return to_string_unsigned(val); -} - -#ifdef KAITAI_STREAM_H_CPP11_SUPPORT -std::string kaitai::kstream::to_string(unsigned long long val) { - return to_string_unsigned(val); -} -#endif - int64_t kaitai::kstream::string_to_int(const std::string& str, int base) { char *str_end; diff --git a/kaitai/kaitaistream.h b/kaitai/kaitaistream.h index 2549399..625de2d 100644 --- a/kaitai/kaitaistream.h +++ b/kaitai/kaitaistream.h @@ -228,19 +228,29 @@ class kstream { */ static int mod(int a, int b); + // NB: the following 6 overloads of `to_string` are exactly the ones that + // [`std::to_string`](https://en.cppreference.com/w/cpp/string/basic_string/to_string) has. + // Testing has shown that they are all necessary: if you remove any of them, you will get + // something like `error: call to 'to_string' is ambiguous` when trying to call `to_string` + // with the integer type for which you removed the overload. + /** * Converts given integer `val` to a decimal string representation. * Should be used in place of `std::to_string(int)` (which is available only * since C++11) in older C++ implementations. */ - static std::string to_string(int val); + static std::string to_string(int val) { + return to_string_signed(val); + } /** * Converts given integer `val` to a decimal string representation. * Should be used in place of `std::to_string(long)` (which is available only * since C++11) in older C++ implementations. */ - static std::string to_string(long val); + static std::string to_string(long val) { + return to_string_signed(val); + } // The `long long` type is only available since C++11, so we use it only in C++11 mode. #ifdef KAITAI_STREAM_H_CPP11_SUPPORT @@ -249,7 +259,9 @@ class kstream { * Should be used in place of `std::to_string(long long)` (which is available only * since C++11) in older C++ implementations. */ - static std::string to_string(long long val); + static std::string to_string(long long val) { + return to_string_signed(val); + } #endif /** @@ -257,14 +269,18 @@ class kstream { * Should be used in place of `std::to_string(unsigned)` (which is available only * since C++11) in older C++ implementations. */ - static std::string to_string(unsigned val); + static std::string to_string(unsigned val) { + return to_string_unsigned(val); + } /** * Converts given integer `val` to a decimal string representation. * Should be used in place of `std::to_string(unsigned long)` (which is available only * since C++11) in older C++ implementations. */ - static std::string to_string(unsigned long val); + static std::string to_string(unsigned long val) { + return to_string_unsigned(val); + } // The `unsigned long long` type is only available since C++11, so we use it only in C++11 mode. #ifdef KAITAI_STREAM_H_CPP11_SUPPORT @@ -273,7 +289,9 @@ class kstream { * Should be used in place of `std::to_string(unsigned long long)` (which is available only * since C++11) in older C++ implementations. */ - static std::string to_string(unsigned long long val); + static std::string to_string(unsigned long long val) { + return to_string_unsigned(val); + } #endif /**