Skip to content

Adopt a system of exceptions derived from KaitaiStructError #80

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 25, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 73 additions & 23 deletions kaitaistruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,9 @@ def is_eof(self):
return self._io.tell() >= self.size()

def seek(self, n):
if n < 0:
raise InvalidArgumentError("cannot seek to invalid position %d" % (n,))
Copy link
Member Author

@generalmimon generalmimon Sep 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the record, this exception was already thrown by the io.IOBase.seek() method in the Python standard library. The exception type and message text and depended on which underlying stream was used. When using io.BytesIO, a ValueError("negative seek value %zd") was thrown (see Modules/_io/bytesio.c:659-660). But when using a regular file, I was getting OSError: [Errno 22] Invalid argument on both Windows and Linux.

So it seems better to do this check ourselves, if only for the sake of consistent behavior. And also because a negative seek position can easily be a result of invalid input data (and thus the raised exception should be a subclass of KaitaiStructError) - imagine a situation like this:

meta:
  id: negative_seek
seq:
  # Consider parsing a byte with the highest bit set, i.e. `80..ff` in hex
  - id: ofs_foo
    type: s1
instances:
  foo:
    pos: ofs_foo
    type: u1

If ofs_foo is negative and we attempt to read foo, the error that occurs is a clear parse error.


if self.bits_write_mode:
self.write_align_to_byte()
else:
Expand Down Expand Up @@ -376,7 +379,7 @@ def read_bytes(self, n):

def _read_bytes_not_aligned(self, n):
if n < 0:
raise ValueError(
raise InvalidArgumentError(
"requested invalid %d amount of bytes" %
(n,)
)
Expand Down Expand Up @@ -404,9 +407,10 @@ def _read_bytes_not_aligned(self, n):

if not is_satisfiable:
# noinspection PyUnboundLocalVariable
raise EOFError(
raise EndOfStreamError(
"requested %d bytes, but only %d bytes available" %
(n, num_bytes_available)
(n, num_bytes_available),
n, num_bytes_available
)

# noinspection PyUnboundLocalVariable
Expand All @@ -424,10 +428,7 @@ def read_bytes_term(self, term, include_term, consume_term, eos_error):
c = self._io.read(1)
if not c:
if eos_error:
raise Exception(
"end of stream reached, but no terminator %d found" %
(term,)
)
raise NoTerminatorFoundError(term_byte, 0)

return bytes(r)

Expand All @@ -448,10 +449,7 @@ def read_bytes_term_multi(self, term, include_term, consume_term, eos_error):
c = self._io.read(unit_size)
if len(c) < unit_size:
if eos_error:
raise Exception(
"end of stream reached, but no terminator %s found" %
(repr(term),)
)
raise NoTerminatorFoundError(term, len(c))

r += c
return bytes(r)
Expand Down Expand Up @@ -523,9 +521,10 @@ def _ensure_bytes_left_to_write(self, n, pos):

num_bytes_left = full_size - pos
if n > num_bytes_left:
raise EOFError(
raise EndOfStreamError(
"requested to write %d bytes, but only %d bytes left in the stream" %
(n, num_bytes_left)
(n, num_bytes_left),
n, num_bytes_left
)

# region Integer numbers
Expand Down Expand Up @@ -739,14 +738,25 @@ def _write_bytes_not_aligned(self, buf):

def write_bytes_limit(self, buf, size, term, pad_byte):
n = len(buf)
# Strictly speaking, this assertion is redundant because it is already
# done in the corresponding _check() method in the generated code, but
# it seems to make sense to include it here anyway so that this method
# itself does something reasonable for every set of arguments.
#
# However, it should never be `false` when operated correctly (and in
# this case, assigning inconsistent values to fields of a KS-generated
# object is considered correct operation if the user application calls
# the corresponding _check(), which we know would raise an error and
# thus the code should not reach _write() and this method at all). So
# it's by design that this throws AssertionError, not any specific
# error, because it's not intended to be caught in user applications,
# but avoided by calling all _check() methods correctly.
assert n <= size, "writing %d bytes, but %d bytes were given" % (size, n)

self.write_bytes(buf)
if n < size:
self.write_u1(term)
pad_len = size - n - 1
for _ in range(pad_len):
self.write_u1(pad_byte)
elif n > size:
raise ValueError("writing %d bytes, but %d bytes were given" % (size, n))
self.write_bytes(KaitaiStream.byte_from_int(pad_byte) * (size - n - 1))

# endregion

Expand All @@ -771,7 +781,7 @@ def process_xor_many(data, key):
@staticmethod
def process_rotate_left(data, amount, group_size):
if group_size != 1:
raise Exception(
raise NotImplementedError(
"unable to rotate group of %d bytes yet" %
(group_size,)
)
Expand Down Expand Up @@ -861,15 +871,55 @@ def _write_back(self, parent):


class KaitaiStructError(Exception):
"""Common ancestor for all error originating from Kaitai Struct usage.
Stores KSY source path, pointing to an element supposedly guilty of
an error.
"""Common ancestor for all errors originating from correct Kaitai Struct
usage (i.e. errors that indicate a problem with user input, not errors
indicating incorrect usage that are not meant to be caught but fixed in the
application code). Use this exception type in the `except` clause if you
want to handle all parse errors and serialization errors.

If available, the `src_path` attribute will contain the KSY source path
pointing to the element where the error occurred. If it is not available,
`src_path` will be `None`.
"""
def __init__(self, msg, src_path):
super(KaitaiStructError, self).__init__("%s: %s" % (src_path, msg))
super(KaitaiStructError, self).__init__(("" if src_path is None else src_path + ": ") + msg)
self.src_path = src_path


class InvalidArgumentError(KaitaiStructError, ValueError):
"""Indicates that an invalid argument value was received (like `ValueError`),
but used in places where this might indicate invalid user input and
therefore represents a parse error or serialization error.
"""
def __init__(self, msg):
super(InvalidArgumentError, self).__init__(msg, None)


class EndOfStreamError(KaitaiStructError, EOFError):
"""Read or write beyond end of stream. Provides the `bytes_needed` (number
of bytes requested to read or write) and `bytes_available` (number of bytes
remaining in the stream) attributes.
"""
def __init__(self, msg, bytes_needed, bytes_available):
super(EndOfStreamError, self).__init__(msg, None)
self.bytes_needed = bytes_needed
self.bytes_available = bytes_available


class NoTerminatorFoundError(EndOfStreamError):
"""Special type of `EndOfStreamError` that occurs when end of stream is
reached before the required terminator is found. If you want to tolerate a
missing terminator, you can specify `eos-error: false` in the KSY
specification, in which case the end of stream will be considered a valid
end of field and this error will no longer be raised.

The `term` attribute contains a `bytes` object with the searched terminator.
"""
def __init__(self, term, bytes_available):
super(NoTerminatorFoundError, self).__init__("end of stream reached, but no terminator %r found" % (term,), len(term), bytes_available)
self.term = term


class UndecidedEndiannessError(KaitaiStructError):
"""Error that occurs when default endianness should be decided with
switch, but nothing matches (although using endianness expression
Expand Down