Source code for code_index.language_processor.impl_c

# code_index/language_processor/impl_c.py

"""C language processor implementation.

This module provides a concrete implementation of the LanguageProcessor protocol
for C source code. It handles C-specific syntax for function definitions and
function calls using tree-sitter.

The processor supports:
- Function definitions with various declaration patterns
- Function calls and references
- Handling of function pointers and complex declarators
"""

from tree_sitter import Node
from tree_sitter_language_pack import get_language

from ..models import (
    CodeLocation,
    Definition,
    Function,
    Reference,
    Symbol,
    SymbolReference,
)
from .base import BaseLanguageProcessor, QueryContext


[docs] class CProcessor(BaseLanguageProcessor): """Language processor for C source code. Handles parsing and analysis of C function definitions and calls. Supports various C function declaration patterns including: - Simple function definitions - Functions with storage class specifiers - Functions returning pointers - Function pointer declarations """
[docs] def __init__(self): """Initialize the C processor with language-specific configuration.""" super().__init__( name="c", language=get_language("c"), extensions=[".c", ".h"], def_query_str=""" (function_definition) @function.definition """, ref_query_str=""" (call_expression) @function.call """, )
[docs] def handle_definition( self, node: Node, ctx: QueryContext, ) -> tuple[Symbol, Definition] | None: """Process a C function definition node. Handles function_definition nodes with various declaration patterns: 1. Simple: primitive_type -> function_declarator -> compound_statement 2. With modifiers: storage_class_specifier -> primitive_type -> function_declarator -> compound_statement 3. Pointer return: storage_class_specifier -> primitive_type -> pointer_declarator -> compound_statement Args: node: A function_definition syntax tree node. ctx: Query context containing file information. Returns: A tuple of (Function, Definition) if successful, None if the function name cannot be extracted or the definition format is not recognized. """ # Extract function name from various AST patterns func_name = self._extract_function_name(node, ctx) if not func_name: return None # Extract preceding comment/documentation doc_comment = self._extract_preceding_comment(node, ctx) # Find all function calls within the function body calls = [] # Get the function body node (compound_statement) body_node = node.child_by_field_name("body") if body_node: # Search for all function calls within the function body for call_node in self.get_reference_nodes(body_node): call_result = self.handle_reference(call_node, ctx) if call_result: symbol, reference = call_result calls.append(SymbolReference(symbol=symbol, reference=reference.to_pure())) return ( Function(name=func_name), Definition( location=CodeLocation( file_path=ctx.file_path, start_lineno=node.start_point[0] + 1, start_col=node.start_point[1], end_lineno=node.end_point[0] + 1, end_col=node.end_point[1], start_byte=node.start_byte, end_byte=node.end_byte, ), doc=doc_comment, calls=calls, ), )
[docs] def _extract_function_name(self, function_def_node: Node, ctx: QueryContext) -> str | None: """Extract function name from a function_definition node. Handles various C function declaration patterns by traversing the declarator field which may be either a function_declarator or pointer_declarator containing a function_declarator. Args: function_def_node: The function_definition node to process. ctx: Query context for accessing source bytes. Returns: The function name as a string, or None if extraction fails. """ # Find the declarator field, could be function_declarator or pointer_declarator declarator_node = function_def_node.child_by_field_name("declarator") if not declarator_node: return None # If it's a pointer_declarator, search for nested function_declarator if declarator_node.type == "pointer_declarator": # Look for function_declarator in pointer_declarator children for child in declarator_node.children: if child.type == "function_declarator": declarator_node = child break else: return None # Now declarator_node should be function_declarator if declarator_node.type != "function_declarator": return None # Extract function name from function_declarator name_node = declarator_node.child_by_field_name("declarator") if not name_node or name_node.type != "identifier": return None return ctx.source_bytes[name_node.start_byte : name_node.end_byte].decode("utf8")
[docs] def handle_reference( self, node: Node, ctx: QueryContext, ) -> tuple[Symbol, Reference] | None: """Process a C function call expression. Handles call_expression nodes to extract the called function name. Uses the entire call_expression range including function name, parentheses, and arguments for accurate location tracking. Args: node: A call_expression syntax tree node. ctx: Query context containing file information. Returns: A tuple of (Function, PureReference) if successful, None if the call expression doesn't have a recognizable function identifier. """ name_node = node.child_by_field_name("function") if not name_node or name_node.type != "identifier": return None func_name = ctx.source_bytes[name_node.start_byte : name_node.end_byte].decode("utf8") # Use the entire call_expression node range, including function name, parentheses and arguments return ( Function(name=func_name), Reference( location=CodeLocation( file_path=ctx.file_path, start_lineno=node.start_point[0] + 1, start_col=node.start_point[1], end_lineno=node.end_point[0] + 1, end_col=node.end_point[1], start_byte=node.start_byte, end_byte=node.end_byte, ), ), )
[docs] def _extract_preceding_comment(self, node: Node, ctx: QueryContext) -> str | None: """Extract the preceding comment/documentation for a C function definition. Looks for comment nodes that appear immediately before the function definition. Handles both single-line (``//``) and multi-line (``/* */``) comment styles. Args: node: A function_definition syntax tree node. ctx: Query context containing file information. Returns: The comment text as a string, or None if not present. """ # Look for comment nodes that precede this function definition current = node.prev_sibling comments = [] # Traverse backwards through siblings to find comments while current: if current.type == "comment": comment_text = ctx.source_bytes[current.start_byte : current.end_byte].decode( "utf8" ) comments.append(self._clean_c_comment(comment_text)) elif current.type not in [ "preproc_include", "preproc_def", "preproc_ifdef", "preproc_ifndef", "preproc_endif", "preproc_else", "preproc_elif", ]: # Stop if we hit a non-comment, non-preprocessor node break current = current.prev_sibling if comments: # Reverse to get comments in the original order comments.reverse() return "\n".join(comments) return None
[docs] def _clean_c_comment(self, raw_comment: str) -> str: """Clean up a C comment by removing comment delimiters and normalizing whitespace. Args: raw_comment: The raw comment text including delimiters. Returns: The cleaned comment text. """ # Remove comment delimiters if raw_comment.startswith("/*") and raw_comment.endswith("*/"): content = raw_comment[2:-2] elif raw_comment.startswith("//"): content = raw_comment[2:] else: content = raw_comment # Clean up whitespace and common comment formatting lines = content.split("\n") cleaned_lines = [] for line in lines: # Remove leading whitespace and common comment prefixes line = line.strip() if line.startswith("* "): line = line[2:] elif line.startswith("*"): line = line[1:] cleaned_lines.append(line) return "\n".join(cleaned_lines).strip()