Source code for code_index.utils.test

"""Testing utilities for code indexer data comparison and validation.

This module provides specialized utilities for testing code indexer functionality,
particularly for comparing complex data structures like IndexData objects that
contain nested dataclasses, lists, and Path objects.

The utilities handle normalization of data structures to enable reliable comparison
by sorting lists, normalizing paths, and converting dataclasses to comparable formats
while preserving semantic meaning.

Functions:
    normalize_path: Standardize path strings for comparison.
    normalize_dataclass_for_comparison: Convert dataclass objects to comparable format.
    normalize_index_data_for_comparison: Normalize IndexData for testing comparison.
    compare_index_data: Compare two IndexData objects with detailed diff reporting.
    assert_index_data_equal: Assertion function for IndexData equality testing.
"""

import dataclasses
from pathlib import Path
from typing import Any, Tuple, Union

from pydantic import BaseModel

from ..models import (
    IndexData,
)



[docs]
def normalize_path(path: Union[Path, str]) -> str:
    """Normalize path strings for reliable cross-platform comparison.

    Converts path objects to resolved absolute path strings to ensure
    consistent comparison regardless of the original path format or
    current working directory.

    Args:
        path: Path object or string to normalize.

    Returns:
        Normalized absolute path string.

    Example:
        >>> normalize_path("./src/../src/main.py")
        "/absolute/path/to/src/main.py"
    """
    return str(Path(path).resolve())




[docs]
def normalize_dataclass_for_comparison(obj: Any) -> Any:
    """Convert dataclass objects to comparable format with recursive processing.

    This function recursively processes complex data structures containing
    dataclasses, dictionaries, lists, and other types to create a normalized
    representation suitable for equality comparison in tests.

    The normalization process:
        - Converts dataclasses to dictionaries
        - Recursively processes nested structures
        - Sorts lists and tuples when possible (for order-independent comparison)
        - Normalizes Path objects to strings

    Args:
        obj: The object to normalize (can be any type).

    Returns:
        Normalized representation of the object suitable for comparison.

    Example:
        >>> @dataclass
        ... class TestData:
        ...     items: list[str]
        >>> obj = TestData(items=["b", "a"])
        >>> normalized = normalize_dataclass_for_comparison(obj)
        >>> normalized["items"]
        ["a", "b"]  # Sorted for consistent comparison
    """
    if isinstance(obj, BaseModel):
        # Convert Pydantic model to dictionary
        result = obj.model_dump()
        # Recursively process dictionary values
        return {k: normalize_dataclass_for_comparison(v) for k, v in result.items()}
    if dataclasses.is_dataclass(obj):
        # Convert dataclass to dictionary
        # assert it is an instance of dataclass, not a class
        if isinstance(obj, type):
            raise TypeError("Expected an instance of a dataclass, not a class.")
        result = dataclasses.asdict(obj)
        # Recursively process dictionary values
        return {k: normalize_dataclass_for_comparison(v) for k, v in result.items()}
    elif isinstance(obj, dict):
        # Recursively process dictionaries
        return {k: normalize_dataclass_for_comparison(v) for k, v in obj.items()}
    elif isinstance(obj, (list, tuple, set)):
        # Recursively process lists/tuples and sort for order-independent comparison
        items = [normalize_dataclass_for_comparison(item) for item in obj]
        # Only sort items that are sortable
        try:
            return sorted(items, key=lambda x: str(x))
        except (TypeError, KeyError):
            # If sorting fails, maintain original order
            return items
    elif isinstance(obj, Path):
        # Normalize paths
        return normalize_path(obj)
    else:
        # Return other types unchanged
        return obj




[docs]
def normalize_index_data_for_comparison(data: IndexData) -> dict[str, Any]:
    """Normalize IndexData objects for reliable test comparison.

    Converts IndexData to a standardized dictionary format with consistent
    ordering of nested structures. This enables reliable equality testing
    by eliminating order dependencies that don't affect semantic meaning.

    The normalization process:
        - Converts the entire IndexData to a dictionary
        - Sorts data entries by symbol name and type
        - Sorts definitions by file path and line number
        - Sorts references by file path and line number
        - Sorts function calls within definitions

    Args:
        data: IndexData object to normalize.

    Returns:
        Normalized dictionary representation suitable for comparison.

    Example:
        >>> index_data = IndexData(type="simple", data=[...])
        >>> normalized = normalize_index_data_for_comparison(index_data)
        >>> # All nested lists are now consistently sorted
    """
    # Convert entire object using dataclasses.asdict
    normalized = normalize_dataclass_for_comparison(data)

    # Sort top-level data list by symbol name and type
    if "data" in normalized and isinstance(normalized["data"], list):
        normalized["data"] = sorted(
            normalized["data"],
            key=lambda e: (
                e.get("symbol", {}).get("name", ""),
                (
                    e.get("symbol", {}).get("__class__", {}).get("__name__", "")
                    if isinstance(e.get("symbol"), dict)
                    else str(type(e.get("symbol", "")).__name__)
                ),
            ),
        )

        # Sort lists within each entry
        for entry in normalized["data"]:
            if "info" in entry and isinstance(entry["info"], dict):
                info = entry["info"]

                # Sort definitions list
                if "definitions" in info and isinstance(info["definitions"], list):
                    info["definitions"] = sorted(
                        info["definitions"],
                        key=lambda d: (
                            d.get("location", {}).get("file_path", ""),
                            d.get("location", {}).get("start_lineno", 0),
                        ),
                    )

                    # Sort calls list within each definition
                    for defn in info["definitions"]:
                        if "calls" in defn and isinstance(defn["calls"], list):
                            defn["calls"] = sorted(
                                defn["calls"],
                                key=lambda c: (
                                    c.get("symbol", {}).get("name", ""),
                                    str(c.get("symbol", {})),
                                ),
                            )

                # Sort references list
                if "references" in info and isinstance(info["references"], list):
                    info["references"] = sorted(
                        info["references"],
                        key=lambda r: (
                            r.get("location", {}).get("file_path", ""),
                            r.get("location", {}).get("start_lineno", 0),
                        ),
                    )

    return normalized




[docs]
def compare_index_data(data1: IndexData, data2: IndexData) -> Tuple[bool, list[str]]:
    """Compare two IndexData objects for test equality with detailed difference reporting.

    Performs a deep comparison of two IndexData objects after normalization,
    providing detailed information about any differences found. This is useful
    for debugging test failures and understanding how data structures differ.

    Args:
        data1: First IndexData object to compare.
        data2: Second IndexData object to compare.

    Returns:
        A tuple containing:
            - bool: True if objects are equal, False otherwise
            - list[str]: List of difference descriptions (empty if equal)

    Example:
        >>> data1 = IndexData(...)
        >>> data2 = IndexData(...)
        >>> is_equal, differences = compare_index_data(data1, data2)
        >>> if not is_equal:
        ...     for diff in differences:
        ...         print(f"Difference: {diff}")
    """
    differences = []

    try:
        normalized1 = normalize_index_data_for_comparison(data1)
        normalized2 = normalize_index_data_for_comparison(data2)

        # Recursive value comparison with detailed difference tracking
        def compare_values(v1: Any, v2: Any, path: str = "") -> list[str]:
            """Recursively compare two values and track differences.

            Args:
                v1: First value to compare.
                v2: Second value to compare.
                path: Current path in the data structure (for error reporting).

            Returns:
                List of difference descriptions.
            """
            diffs = []

            match v1, v2:
                case dict() as d1, dict() as d2:
                    # Compare dictionaries
                    keys1, keys2 = set(d1.keys()), set(d2.keys())
                    if keys1 != keys2:
                        missing_in_v2 = keys1 - keys2
                        missing_in_v1 = keys2 - keys1
                        if missing_in_v2:
                            diffs.append(f"{path}: Missing keys in second object: {missing_in_v2}")
                        if missing_in_v1:
                            diffs.append(f"{path}: Extra keys in second object: {missing_in_v1}")

                    # Compare common keys
                    for key in keys1 & keys2:
                        current_path = f"{path}.{key}" if path else str(key)
                        diffs.extend(compare_values(d1[key], d2[key], current_path))

                case (list() as l1, list() as l2) | (tuple() as l1, tuple() as l2):
                    # Compare lists/tuples
                    if len(l1) != len(l2):
                        diffs.append(f"{path}: List length mismatch: {len(l1)} != {len(l2)}")
                    else:
                        for i, (item1, item2) in enumerate(zip(l1, l2)):
                            diffs.extend(compare_values(item1, item2, f"{path}[{i}]"))

                case _ if type(v1) is not type(v2):
                    # Type mismatch
                    diffs.append(
                        f"{path}: Type mismatch: {type(v1).__name__} != {type(v2).__name__}"
                    )

                case _ if v1 != v2:
                    # Direct value comparison
                    diffs.append(f"{path}: {v1!r} != {v2!r}")

            return diffs

        differences = compare_values(normalized1, normalized2)

    except Exception as e:
        differences.append(f"Error during comparison: {e}")

    return len(differences) == 0, differences




[docs]
def assert_index_data_equal(
    actual: IndexData,
    expected: IndexData,
    msg: str = "IndexData objects are not equal",
) -> None:
    """Assert that two IndexData objects are equal in testing context.

    This function provides a detailed assertion for IndexData equality,
    showing specific differences when the assertion fails. It's designed
    to be used in unit tests where detailed failure information is needed.

    Args:
        actual: The actual IndexData object (from test execution).
        expected: The expected IndexData object (reference/baseline).
        msg: Custom message to include in assertion failure.

    Raises:
        AssertionError: If the objects are not equal, with detailed difference
                       information in the error message.

    Example:
        >>> def test_indexing():
        ...     actual_data = index_some_code()
        ...     expected_data = load_expected_data()
        ...     assert_index_data_equal(
        ...         actual_data, expected_data, "Code indexing produced unexpected results"
        ...     )
    """
    is_equal, differences = compare_index_data(actual, expected)

    if not is_equal:
        error_msg = f"{msg}\n" + "\n".join(f"  - {diff}" for diff in differences)
        raise AssertionError(error_msg)