Skip to content

More efficient (fixed-format) serialization #19668

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 41 additions & 8 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from typing_extensions import TypeAlias as _TypeAlias

import mypy.semanal_main
from mypy.cache import Buffer
from mypy.checker import TypeChecker
from mypy.error_formatter import OUTPUT_CHOICES, ErrorFormatter
from mypy.errors import CompileError, ErrorInfo, Errors, report_internal_error
Expand Down Expand Up @@ -1139,6 +1140,17 @@ def read_deps_cache(manager: BuildManager, graph: Graph) -> dict[str, FgDepMeta]
return module_deps_metas


def _load_ff_file(file: str, manager: BuildManager, log_error: str) -> bytes | None:
t0 = time.time()
try:
data = manager.metastore.read(file)
except OSError:
manager.log(log_error + file)
return None
manager.add_stats(metastore_read_time=time.time() - t0)
return data


def _load_json_file(
file: str, manager: BuildManager, log_success: str, log_error: str
) -> dict[str, Any] | None:
Expand Down Expand Up @@ -1259,7 +1271,11 @@ def get_cache_names(id: str, path: str, options: Options) -> tuple[str, str, str
deps_json = None
if options.cache_fine_grained:
deps_json = prefix + ".deps.json"
return (prefix + ".meta.json", prefix + ".data.json", deps_json)
if options.fixed_format_cache:
data_suffix = ".data.ff"
else:
data_suffix = ".data.json"
return (prefix + ".meta.json", prefix + data_suffix, deps_json)


def find_cache_meta(id: str, path: str, manager: BuildManager) -> CacheMeta | None:
Expand Down Expand Up @@ -1559,8 +1575,13 @@ def write_cache(
tree.path = path

# Serialize data and analyze interface
data = tree.serialize()
data_bytes = json_dumps(data, manager.options.debug_cache)
if manager.options.fixed_format_cache:
data_io = Buffer()
tree.write(data_io)
data_bytes = data_io.getvalue()
else:
data = tree.serialize()
data_bytes = json_dumps(data, manager.options.debug_cache)
interface_hash = hash_digest(data_bytes)

plugin_data = manager.plugin.report_config_data(ReportConfigContext(id, path, is_check=False))
Expand Down Expand Up @@ -2085,15 +2106,23 @@ def load_tree(self, temporary: bool = False) -> None:
self.meta is not None
), "Internal error: this method must be called only for cached modules"

data = _load_json_file(
self.meta.data_json, self.manager, "Load tree ", "Could not load tree: "
)
data: bytes | dict[str, Any] | None
if self.options.fixed_format_cache:
data = _load_ff_file(self.meta.data_json, self.manager, "Could not load tree: ")
else:
data = _load_json_file(
self.meta.data_json, self.manager, "Load tree ", "Could not load tree: "
)
if data is None:
return

t0 = time.time()
# TODO: Assert data file wasn't changed.
self.tree = MypyFile.deserialize(data)
if isinstance(data, bytes):
data_io = Buffer(data)
self.tree = MypyFile.read(data_io)
else:
self.tree = MypyFile.deserialize(data)
t1 = time.time()
self.manager.add_stats(deserialize_time=t1 - t0)
if not temporary:
Expand Down Expand Up @@ -2481,7 +2510,11 @@ def write_cache(self) -> None:
):
if self.options.debug_serialize:
try:
self.tree.serialize()
if self.manager.options.fixed_format_cache:
data = Buffer()
self.tree.write(data)
else:
self.tree.serialize()
except Exception:
print(f"Error serializing {self.id}", file=self.manager.stdout)
raise # Propagate to display traceback
Expand Down
153 changes: 153 additions & 0 deletions mypy/cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
from __future__ import annotations

from collections.abc import Sequence
from typing import TYPE_CHECKING, Final

try:
from native_internal import (
Buffer as Buffer,
read_bool as read_bool,
read_float as read_float,
read_int as read_int,
read_str as read_str,
write_bool as write_bool,
write_float as write_float,
write_int as write_int,
write_str as write_str,
)
except ImportError:
# TODO: temporary, remove this after we publish mypy-native on PyPI.
if not TYPE_CHECKING:

class Buffer:
def __init__(self, source: bytes = b"") -> None:
raise NotImplementedError

def getvalue(self) -> bytes:
raise NotImplementedError

def read_int(data: Buffer) -> int:
raise NotImplementedError

def write_int(data: Buffer, value: int) -> None:
raise NotImplementedError

def read_str(data: Buffer) -> str:
raise NotImplementedError

def write_str(data: Buffer, value: str) -> None:
raise NotImplementedError

def read_bool(data: Buffer) -> bool:
raise NotImplementedError

def write_bool(data: Buffer, value: bool) -> None:
raise NotImplementedError

def read_float(data: Buffer) -> float:
raise NotImplementedError

def write_float(data: Buffer, value: float) -> None:
raise NotImplementedError


LITERAL_INT: Final = 1
LITERAL_STR: Final = 2
LITERAL_BOOL: Final = 3
LITERAL_FLOAT: Final = 4
LITERAL_COMPLEX: Final = 5
LITERAL_NONE: Final = 6


def read_literal(data: Buffer, marker: int) -> int | str | bool | float:
if marker == LITERAL_INT:
return read_int(data)
elif marker == LITERAL_STR:
return read_str(data)
elif marker == LITERAL_BOOL:
return read_bool(data)
elif marker == LITERAL_FLOAT:
return read_float(data)
assert False, f"Unknown literal marker {marker}"


def write_literal(data: Buffer, value: int | str | bool | float | complex | None) -> None:
if isinstance(value, bool):
write_int(data, LITERAL_BOOL)
write_bool(data, value)
elif isinstance(value, int):
write_int(data, LITERAL_INT)
write_int(data, value)
elif isinstance(value, str):
write_int(data, LITERAL_STR)
write_str(data, value)
elif isinstance(value, float):
write_int(data, LITERAL_FLOAT)
write_float(data, value)
elif isinstance(value, complex):
write_int(data, LITERAL_COMPLEX)
write_float(data, value.real)
write_float(data, value.imag)
else:
write_int(data, LITERAL_NONE)


def read_int_opt(data: Buffer) -> int | None:
if read_bool(data):
return read_int(data)
return None


def write_int_opt(data: Buffer, value: int | None) -> None:
if value is not None:
write_bool(data, True)
write_int(data, value)
else:
write_bool(data, False)


def read_str_opt(data: Buffer) -> str | None:
if read_bool(data):
return read_str(data)
return None


def write_str_opt(data: Buffer, value: str | None) -> None:
if value is not None:
write_bool(data, True)
write_str(data, value)
else:
write_bool(data, False)


def read_int_list(data: Buffer) -> list[int]:
size = read_int(data)
return [read_int(data) for _ in range(size)]


def write_int_list(data: Buffer, value: list[int]) -> None:
write_int(data, len(value))
for item in value:
write_int(data, item)


def read_str_list(data: Buffer) -> list[str]:
size = read_int(data)
return [read_str(data) for _ in range(size)]


def write_str_list(data: Buffer, value: Sequence[str]) -> None:
write_int(data, len(value))
for item in value:
write_str(data, item)


def read_str_opt_list(data: Buffer) -> list[str | None]:
size = read_int(data)
return [read_str_opt(data) for _ in range(size)]


def write_str_opt_list(data: Buffer, value: list[str | None]) -> None:
write_int(data, len(value))
for item in value:
write_str_opt(data, item)
2 changes: 2 additions & 0 deletions mypy/fixup.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ def visit_type_info(self, info: TypeInfo) -> None:
info.declared_metaclass.accept(self.type_fixer)
if info.metaclass_type:
info.metaclass_type.accept(self.type_fixer)
if info.self_type:
info.self_type.accept(self.type_fixer)
if info.alt_promote:
info.alt_promote.accept(self.type_fixer)
instance = Instance(info, [])
Expand Down
5 changes: 5 additions & 0 deletions mypy/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -1056,6 +1056,11 @@ def add_invertible_flag(
action="store_true",
help="Include fine-grained dependency information in the cache for the mypy daemon",
)
incremental_group.add_argument(
"--fixed-format-cache",
action="store_true",
help="Use experimental binary fixed format cache",
)
incremental_group.add_argument(
"--skip-version-check",
action="store_true",
Expand Down
7 changes: 5 additions & 2 deletions mypy/modulefinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,7 @@ def default_lib_path(
custom_typeshed_dir = os.path.abspath(custom_typeshed_dir)
typeshed_dir = os.path.join(custom_typeshed_dir, "stdlib")
mypy_extensions_dir = os.path.join(custom_typeshed_dir, "stubs", "mypy-extensions")
mypy_native_dir = os.path.join(custom_typeshed_dir, "stubs", "mypy-native")
versions_file = os.path.join(typeshed_dir, "VERSIONS")
if not os.path.isdir(typeshed_dir) or not os.path.isfile(versions_file):
print(
Expand All @@ -811,11 +812,13 @@ def default_lib_path(
data_dir = auto
typeshed_dir = os.path.join(data_dir, "typeshed", "stdlib")
mypy_extensions_dir = os.path.join(data_dir, "typeshed", "stubs", "mypy-extensions")
mypy_native_dir = os.path.join(data_dir, "typeshed", "stubs", "mypy-native")
path.append(typeshed_dir)

# Get mypy-extensions stubs from typeshed, since we treat it as an
# "internal" library, similar to typing and typing-extensions.
# Get mypy-extensions and mypy-native stubs from typeshed, since we treat them as
# "internal" libraries, similar to typing and typing-extensions.
path.append(mypy_extensions_dir)
path.append(mypy_native_dir)

# Add fallback path that can be used if we have a broken installation.
if sys.platform != "win32":
Expand Down
Loading
Loading