Source code for code_index.utils.custom_json

"""Custom JSON serialization utilities for code indexer data structures.

This module provides enhanced JSON encoding and decoding capabilities for handling
complex data structures used in the code indexer, including dataclasses, Path objects,
and custom type registration for serialization.

The module supports:
    - Automatic dataclass serialization/deserialization
    - Path object handling (automatic conversion to/from strings)
    - Type registration system for custom classes
    - Strict/non-strict deserialization modes

Classes:
    EnhancedJSONEncoder: Custom JSON encoder for handling non-standard types.

Functions:
    register_json_type: Decorator for registering dataclasses for JSON serialization.
    custom_json_decoder: Custom JSON decoder for reconstructing objects.
    dump_index_to_json: Utility function for saving index data to JSON files.
    load_index_from_json: Utility function for loading index data from JSON files.
"""

import json
from dataclasses import fields, is_dataclass
from pathlib import Path
from typing import Any, Dict, Type, TypeVar

from .logger import logger


[docs] class EnhancedJSONEncoder(json.JSONEncoder): """Enhanced JSON encoder for handling non-standard Python types. This encoder extends the standard JSONEncoder to automatically handle: - pathlib.Path objects (converted to strings) - dataclass objects (converted to dictionaries with type information) The encoder preserves type information by adding a special "__class__" field to serialized dataclass objects, enabling proper reconstruction during deserialization. """
[docs] def default(self, o): """Serialize objects that are not natively JSON serializable. Args: o: The object to serialize. Returns: A JSON-serializable representation of the object. Raises: TypeError: If the object type is not supported by this encoder. """ # Convert Path objects to strings if isinstance(o, Path): return str(o) # Convert dataclass objects to dictionaries with type information if is_dataclass(o): # Use manual field extraction to avoid recursion issues dict_data = {f.name: getattr(o, f.name) for f in fields(o)} dict_data["__class__"] = o.__class__.__name__ # Add type info for deserialization return dict_data # Fall back to default encoder for all other types return super().default(o)
T = TypeVar("T") JSON_TYPE_REGISTRY: dict[str, Type[Any]] = {} """Global registry mapping class names to their types for JSON deserialization. This dictionary is automatically populated when classes are decorated with @register_json_type and is used by custom_json_decoder to reconstruct the correct object types during JSON deserialization. The registry maps class names (strings) to their corresponding type objects, enabling the decoder to instantiate the proper classes when encountering serialized dataclass objects in JSON data. """
[docs] def register_json_type(cls: Type[T]) -> Type[T]: """Register a dataclass type for JSON serialization support. This decorator registers a dataclass in the global type registry, enabling automatic serialization and deserialization through the custom JSON utilities. Only dataclasses can be registered. Args: cls: The dataclass type to register. Must be a dataclass. Returns: The same class (unmodified), allowing use as a decorator. Raises: ValueError: If the provided class is not a dataclass. Example: >>> @register_json_type ... @dataclass ... class MyData: ... value: int >>> # MyData is now registered and can be serialized/deserialized """ if not is_dataclass(cls): logger.warning( f"Attempted to register {cls.__name__} which is not a dataclass. Skipping registration." ) JSON_TYPE_REGISTRY[cls.__name__] = cls return cls
[docs] def custom_json_decoder(dct: Dict, strict=False) -> object: """Custom JSON decoder for reconstructing objects from dictionaries. This decoder handles the reconstruction of registered dataclass objects and automatic Path object conversion during JSON deserialization. Args: dct: Dictionary containing serialized object data. strict: If True, raises exceptions when encountering unregistered classes. If False, returns the dictionary unchanged for unregistered types. Returns: The reconstructed object if type information is available and registered, otherwise the original dictionary. Raises: ValueError: If strict=True and an unregistered class is encountered. Example: >>> data = {"value": 42, "__class__": "MyData"} >>> obj = custom_json_decoder(data) >>> isinstance(obj, MyData) True """ # Handle Path objects stored as strings if "file_path" in dct and isinstance(dct["file_path"], str): dct["file_path"] = Path(dct["file_path"]) # Handle registered dataclass objects if "__class__" in dct: class_name = dct.pop("__class__") cls = JSON_TYPE_REGISTRY.get(class_name) if cls is None: if strict: raise ValueError(f"Class {class_name} not registered in JSON_TYPE_REGISTRY.") elif is_dataclass(cls): # Convert string paths to Path objects for fields typed as Path for field_info in fields(cls): field_name = field_info.name if ( field_name in dct and isinstance(dct[field_name], str) and field_info.type == Path ): dct[field_name] = Path(dct[field_name]) # noinspection PyArgumentList return cls(**dct) return dct # Return original dictionary if no matching class found
[docs] def dump_index_to_json(index: dict, output_path: Path): """Save index data to a JSON file with enhanced encoding. This function serializes index data to JSON format using the EnhancedJSONEncoder to handle complex data types like dataclasses and Path objects. Args: index: The index data dictionary to serialize. output_path: Path where the JSON file should be written. Raises: IOError: If the file cannot be written due to permissions or disk issues. Example: >>> index_data = {"functions": [some_function_data]} >>> dump_index_to_json(index_data, Path("index.json")) """ with output_path.open("w", encoding="utf-8") as f: json.dump(index, f, indent=2, ensure_ascii=False, cls=EnhancedJSONEncoder)
[docs] def load_index_from_json(input_path: Path, strict=False): """Load index data from a JSON file with custom decoding. This function deserializes index data from JSON format using the custom decoder to reconstruct dataclass objects and handle Path conversion. Args: input_path: Path to the JSON file to load. strict: If True, raises exceptions for unregistered classes during deserialization. If False, leaves unregistered objects as dictionaries. Returns: The deserialized index data with proper object types reconstructed. Raises: IOError: If the file cannot be read. ValueError: If strict=True and unregistered classes are encountered. json.JSONDecodeError: If the file contains invalid JSON. Example: >>> data = load_index_from_json(Path("index.json")) >>> # Returns properly typed objects based on registry """ with input_path.open("r", encoding="utf-8") as f: data = json.load(f, object_hook=lambda dct: custom_json_decoder(dct, strict)) return data