Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## 2024-05-24 - Optimizing AST Parsing
**Learning:** Nested `ast.walk` loops in AST parsing (like in `PythonParser._extract_calls`) can cause O(N^2) traversal overhead, heavily impacting indexing performance on large files.
**Action:** Use `ast.NodeVisitor` for a single-pass O(N) traversal over the AST tree when extracting calls or computing complexity.
1 change: 0 additions & 1 deletion src/api/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
from src.api.routes.memory import router as memory_router
from src.api.routes.scanner import router as scanner_router
from src.api.schemas import APIResponse, StatusEnum
from src.config import settings

logger = logging.getLogger("xmem.api")

Expand Down
2 changes: 1 addition & 1 deletion src/api/routes/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def _parse_github_url(url: str) -> tuple:
if m:
return m.group(1), m.group(2)
raise ValueError(
f"Invalid GitHub URL. Expected format: https://github.com/org/repo"
"Invalid GitHub URL. Expected format: https://github.com/org/repo"
)


Expand Down
1 change: 0 additions & 1 deletion src/api/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

from __future__ import annotations

from datetime import datetime
from enum import Enum
from typing import Any, Dict, List, Optional

Expand Down
5 changes: 2 additions & 3 deletions src/config/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,12 @@

import logging

from logging.handlers import RotatingFileHandler, TimedRotatingFileHandler
from logging.handlers import RotatingFileHandler
import sys
import os
from pathlib import Path
from typing import Optional
from enum import Enum
from dataclasses import dataclass, field
from dataclasses import dataclass


class LogLevel(str, Enum):
Expand Down
1 change: 0 additions & 1 deletion src/pipelines/code_retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
from src.scanner.code_store import CodeStore
from src.schemas.code import (
annotations_namespace,
directories_namespace,
files_namespace,
snippets_namespace,
symbols_namespace,
Expand Down
2 changes: 1 addition & 1 deletion src/pipelines/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@
)
from src.schemas.events import EventResult
from src.schemas.image import ImageResult
from src.schemas.judge import JudgeDomain, JudgeResult, OperationType
from src.schemas.judge import JudgeDomain, JudgeResult
from src.schemas.profile import ProfileResult
from src.schemas.summary import SummaryResult
from src.schemas.weaver import WeaverResult
Expand Down
1 change: 0 additions & 1 deletion src/pipelines/retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
from __future__ import annotations

import logging
import os
from typing import Any, Callable, Dict, List, Optional

from dotenv import load_dotenv
Expand Down
1 change: 0 additions & 1 deletion src/prompts/profiler_topics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from dataclasses import dataclass, field
from typing import Dict, List, Union

from src.config.constants import LLM_TAB_SEPARATOR


@dataclass
Expand Down
2 changes: 1 addition & 1 deletion src/prompts/summarizer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

from functools import lru_cache
from typing import List, Tuple
from typing import List
import inspect

from src.prompts.examples.summary import SUMMARY_EXAMPLES
Expand Down
113 changes: 78 additions & 35 deletions src/scanner/ast_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,8 @@
import hashlib
import logging
import re
import textwrap
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple
from typing import Any, Dict, List, Optional, Tuple

# Tree-sitter imports (optional β€” graceful degradation if not installed)
try:
Expand Down Expand Up @@ -190,21 +188,27 @@ def parse_file(self, file_path: str, content: str) -> ParsedFile:
def _extract_imports(self, tree: ast.Module) -> List[ParsedImport]:
imports: List[ParsedImport] = []

for node in ast.walk(tree):
if isinstance(node, ast.Import):
class ImportVisitor(ast.NodeVisitor):
def visit_Import(self, node: ast.Import):
for alias in node.names:
imports.append(ParsedImport(
module=alias.name,
alias=alias.asname,
))
elif isinstance(node, ast.ImportFrom):
self.generic_visit(node)

def visit_ImportFrom(self, node: ast.ImportFrom):
module = node.module or ""
names = [a.name for a in node.names]
imports.append(ParsedImport(
module=module,
names=names,
is_relative=node.level > 0,
))
self.generic_visit(node)

visitor = ImportVisitor()
visitor.visit(tree)

return imports

Expand Down Expand Up @@ -334,24 +338,37 @@ def _extract_calls(
) -> List[ParsedCall]:
"""Extract function calls from within each symbol's AST subtree."""
calls: List[ParsedCall] = []
known_names = {s.name for s in symbols}

for node in ast.walk(tree):
if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
continue
class CallVisitor(ast.NodeVisitor):
def __init__(self, parser_instance):
self.parser = parser_instance
self.current_caller: Optional[str] = None

def visit_FunctionDef(self, node: ast.FunctionDef):
prev_caller = self.current_caller
self.current_caller = node.name
self.generic_visit(node)
self.current_caller = prev_caller

def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef):
prev_caller = self.current_caller
self.current_caller = node.name
self.generic_visit(node)
self.current_caller = prev_caller

def visit_Call(self, node: ast.Call):
if self.current_caller:
callee = self.parser._call_to_name(node)
if callee and callee != self.current_caller:
calls.append(ParsedCall(
caller_name=self.current_caller,
callee_name=callee,
is_direct=True,
))
self.generic_visit(node)

caller = node.name
for child in ast.walk(node):
if not isinstance(child, ast.Call):
continue

callee = self._call_to_name(child)
if callee and callee != caller:
calls.append(ParsedCall(
caller_name=caller,
callee_name=callee,
is_direct=True,
))
visitor = CallVisitor(self)
visitor.visit(tree)

return calls

Expand Down Expand Up @@ -417,19 +434,45 @@ def _decorator_to_str(self, node: ast.expr) -> str:

def _compute_complexity(self, node: ast.AST) -> int:
"""Compute cyclomatic complexity from AST nodes. No LLM needed."""
complexity = 1
for child in ast.walk(node):
if isinstance(child, (ast.If, ast.IfExp)):
complexity += 1
elif isinstance(child, (ast.For, ast.AsyncFor, ast.While)):
complexity += 1
elif isinstance(child, ast.ExceptHandler):
complexity += 1
elif isinstance(child, ast.BoolOp):
complexity += len(child.values) - 1
elif isinstance(child, ast.Assert):
complexity += 1
return complexity
class ComplexityVisitor(ast.NodeVisitor):
def __init__(self):
self.complexity = 1

def visit_If(self, n: ast.If):
self.complexity += 1
self.generic_visit(n)

def visit_IfExp(self, n: ast.IfExp):
self.complexity += 1
self.generic_visit(n)

def visit_For(self, n: ast.For):
self.complexity += 1
self.generic_visit(n)

def visit_AsyncFor(self, n: ast.AsyncFor):
self.complexity += 1
self.generic_visit(n)

def visit_While(self, n: ast.While):
self.complexity += 1
self.generic_visit(n)

def visit_ExceptHandler(self, n: ast.ExceptHandler):
self.complexity += 1
self.generic_visit(n)

def visit_BoolOp(self, n: ast.BoolOp):
self.complexity += len(n.values) - 1
self.generic_visit(n)

def visit_Assert(self, n: ast.Assert):
self.complexity += 1
self.generic_visit(n)

visitor = ComplexityVisitor()
visitor.visit(node)
return visitor.complexity


# ---------------------------------------------------------------------------
Expand Down
1 change: 0 additions & 1 deletion src/scanner/git_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from __future__ import annotations

import logging
import os
import subprocess
from dataclasses import dataclass, field
from enum import Enum
Expand Down
2 changes: 0 additions & 2 deletions src/scanner/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,8 @@
from src.scanner.ast_parser import ParsedFile, ParsedSymbol, parse_file, compute_content_hash
from src.scanner.code_store import CodeStore
from src.scanner.git_ops import (
DiffResult,
clone_or_pull,
get_diff,
get_head_sha,
get_language,
list_all_files,
should_skip_file,
Expand Down
1 change: 0 additions & 1 deletion src/scanner/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@
import os
import sys
import time
from pathlib import Path
from typing import Any, Dict, List

from dotenv import load_dotenv
Expand Down
2 changes: 1 addition & 1 deletion src/schemas/retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from __future__ import annotations

from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List


@dataclass
Expand Down
1 change: 0 additions & 1 deletion src/schemas/summary.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from __future__ import annotations
from typing import List
from pydantic import BaseModel, Field


Expand Down
3 changes: 0 additions & 3 deletions src/storage/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,7 @@ def process_memories(store: BaseVectorStore): # <- Takes ANY vector store
from enum import Enum
from ..config import get_logger
from ..utils.exceptions import (
VectorStoreError,
VectorStoreConnectionError,
VectorStoreValidationError,
VectorNotFoundError,
)

logger = get_logger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion src/utils/retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def another_api_call():
import time
import logging
from dataclasses import dataclass, field
from .exceptions import XMemError, ValidationError
from .exceptions import ValidationError

logger = logging.getLogger(__name__)
T = TypeVar("T")
Expand Down