diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ccf902c..155bba0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -20,17 +20,15 @@ jobs: - name: Setup .NET uses: actions/setup-dotnet@v4 with: - dotnet-version: 8.0.x + dotnet-version: 9.0.x - name: Restore dependencies run: dotnet restore - name: Build run: dotnet build --no-restore --configuration Release - - name: Test - run: dotnet test --no-build --verbosity normal - name: Publish run: dotnet publish --configuration Release --output ./publish --runtime ${{ matrix.os == 'ubuntu-latest' && 'linux-x64' || matrix.os == 'windows-latest' && 'win-x64' || 'osx-x64' }} --self-contained true - name: Archive production artifacts - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: dist-${{ matrix.os }} path: publish @@ -42,30 +40,18 @@ jobs: permissions: contents: write steps: - - name: Create Release - id: create_release - uses: actions/create-release@v1 - env: - GITHUB_TOKEN: ${{ secrets.RELEASE_TOKEN }} - with: - tag_name: v${{ github.run_number }} - release_name: Release ${{ github.run_number }} - body: | - Automated release for commit ${{ github.sha }} - draft: false - prerelease: false - name: Download Linux artifact - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: dist-ubuntu-latest path: dist-linux - name: Download Windows artifact - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: dist-windows-latest path: dist-windows - name: Download macOS artifact - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: dist-macos-latest path: dist-macos @@ -74,30 +60,18 @@ jobs: zip -r release-linux.zip dist-linux zip -r release-windows.zip dist-windows zip -r release-macos.zip dist-macos - - name: Upload Linux Release Asset - uses: actions/upload-release-asset@v1 + - name: Create Release and Upload Assets + uses: softprops/action-gh-release@v2 env: GITHUB_TOKEN: ${{ secrets.RELEASE_TOKEN }} with: - upload_url: ${{ steps.create_release.outputs.upload_url }} - asset_path: ./release-linux.zip - asset_name: release-linux.zip - asset_content_type: application/zip - - name: Upload Windows Release Asset - uses: actions/upload-release-asset@v1 - env: - GITHUB_TOKEN: ${{ secrets.RELEASE_TOKEN }} - with: - upload_url: ${{ steps.create_release.outputs.upload_url }} - asset_path: ./release-windows.zip - asset_name: release-windows.zip - asset_content_type: application/zip - - name: Upload macOS Release Asset - uses: actions/upload-release-asset@v1 - env: - GITHUB_TOKEN: ${{ secrets.RELEASE_TOKEN }} - with: - upload_url: ${{ steps.create_release.outputs.upload_url }} - asset_path: ./release-macos.zip - asset_name: release-macos.zip - asset_content_type: application/zip + tag_name: v${{ github.run_number }} + name: Release ${{ github.run_number }} + body: | + Automated release for commit ${{ github.sha }} + draft: false + prerelease: false + files: | + release-linux.zip + release-windows.zip + release-macos.zip diff --git a/Configuration/FilterConfiguration.cs b/Configuration/FilterConfiguration.cs new file mode 100644 index 0000000..83c1d3a --- /dev/null +++ b/Configuration/FilterConfiguration.cs @@ -0,0 +1,271 @@ +namespace CodeContext.Configuration; + +/// +/// Contains configuration for file and directory filtering. +/// +public class FilterConfiguration +{ + /// + /// Maximum file size in bytes to process. + /// + public long MaxFileSizeBytes { get; init; } = 100 * 1024; // 100KB + + /// + /// File extensions to ignore during processing. + /// + public HashSet IgnoredExtensions { get; init; } = new(StringComparer.OrdinalIgnoreCase) + { + // Executable and library files + ".exe", ".dll", ".pdb", ".bin", ".obj", ".lib", ".so", ".dylib", ".a", ".o", + + // Image files + ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".svg", ".webp", ".tiff", ".tif", ".raw", ".psd", ".ai", + ".eps", ".ps", + + // Audio and video files + ".mp3", ".mp4", ".wav", ".avi", ".mov", ".flv", ".wmv", ".m4a", ".m4v", ".mkv", ".webm", ".ogg", + + // Compressed files + ".zip", ".rar", ".7z", ".tar", ".gz", ".bz2", ".xz", ".tgz", + + // Database files + ".db", ".sqlite", ".mdf", ".ldf", ".bak", ".mdb", ".accdb", + + // Document files + ".docx", ".xlsx", ".pptx", ".pdf", ".doc", ".xls", ".ppt", ".rtf", ".odt", ".ods", ".odp", + + // Log and temporary files + ".log", ".cache", ".tmp", ".temp", + + // Minified and source map files + ".min.js", ".min.css", ".map", ".lock", + + // Design files + ".sketch", ".fig", ".xd", + + // Deployment and settings files + ".pub", ".pubxml", ".publishsettings", ".settings", ".suo", ".user", ".userosscache", + + // Version control files + ".vspscc", ".vssscc", ".pidb", ".scc", + + // System files + ".DS_Store", ".localized", ".manifest", + + // Project-specific files + ".csproj.user", ".sln.docstates", ".suo", ".user", ".vssscc", + + // Compiler and build output + ".pdb", ".ilk", ".msi", ".idb", ".pch", ".res", + + // Font files + ".eot", ".ttf", ".woff", ".woff2", + + // 3D model files + ".fbx", ".obj", ".3ds", ".max", + + // Unity-specific files + ".unity", ".unitypackage", ".asset", + + // Certificate files + ".pfx", ".cer", ".crt", + + // Package manager files + ".nupkg", ".snupkg", + + // Java-specific files + ".class", ".jar", + + // Python-specific files + ".pyc", ".pyo", + + // Node.js-specific files + ".node", + + // Ruby-specific files + ".gem", + + // Rust-specific files + ".rlib", + + // Go-specific files + ".a", + + // Swift-specific files + ".swiftmodule", + + // Docker-specific files + ".dockerignore", + + // Kubernetes-specific files + ".kubeconfig", + + // Machine learning model files + ".h5", ".pkl", ".onnx", + + // Executable scripts (to be cautious) + ".bat", ".sh", ".cmd", ".ps1", + + ".sql" + }; + + /// + /// Directory names to ignore during processing. + /// + public HashSet IgnoredDirectories { get; init; } = new(StringComparer.OrdinalIgnoreCase) + { + ".sonarqube", + + // Version control systems + ".git", ".svn", ".hg", ".bzr", ".cvs", + + // IDE and editor-specific + ".vs", ".idea", ".vscode", ".atom", ".sublime-project", + + // Build output + "bin", "obj", "Debug", "Release", "x64", "x86", "AnyCPU", + + // Package management + "packages", "node_modules", "bower_components", "jspm_packages", + + // Python-specific + "__pycache__", "venv", "env", "virtualenv", ".venv", ".env", ".pytest_cache", + + // Ruby-specific + ".bundle", "vendor/bundle", + + // Java-specific + "target", ".gradle", "build", + + // JavaScript/TypeScript-specific + "dist", "out", "build", ".next", ".nuxt", ".cache", + + // Testing and coverage + "coverage", "test-results", "reports", ".nyc_output", + + // Logs and temporary files + "logs", "temp", "tmp", ".temp", ".tmp", + + // Content and media + "uploads", "media", "static", "public", "assets", + + // Third-party and dependencies + "vendor", "third-party", "external", "lib", "libs", + + // WordPress-specific + "wp-content", "wp-includes", "wp-admin", + + // Mobile development + "Pods", "DerivedData", + + // Containerization + ".docker", + + // CI/CD + ".github", ".gitlab", ".circleci", ".jenkins", + + // Documentation + "docs", "_site", ".docusaurus", + + // Caching + ".cache", ".sass-cache", ".parcel-cache", + + // Compiled languages + "__pycache__", ".mypy_cache", ".rpt2_cache", ".rts2_cache_cjs", ".rts2_cache_es", ".rts2_cache_umd", + + // OS-specific + ".DS_Store", "Thumbs.db", + + // Dependency lock files directory + ".pnpm-store", + + // Serverless frameworks + ".serverless", + + // Terraform + ".terraform", + + // Yarn + ".yarn", + + // Expo (React Native) + ".expo", + + // Electron + "out", + + // Flutter/Dart + ".dart_tool", ".flutter-plugins", ".flutter-plugins-dependencies", + + // Kubernetes + ".kube", + + // Ansible + ".ansible", + + // Chef + ".chef", + + // Vagrant + ".vagrant", + + // Unity + "Library", "Temp", "Obj", "Builds", "Logs", + + // Unreal Engine + "Binaries", "Build", "Saved", "Intermediate", + + // Godot Engine + ".import", "export_presets.cfg", + + // R language + ".Rproj.user", ".Rhistory", ".RData", + + // Jupyter Notebooks + ".ipynb_checkpoints", + + // LaTeX + "build", "out", + + // Rust + "target", + + // Go + "vendor", + + // Elixir + "_build", ".elixir_ls", + + // Helm Charts + "charts", + + // Pipenv + ".venv" + }; + + /// + /// File names to ignore during processing. + /// + public HashSet IgnoredFiles { get; init; } = new(StringComparer.OrdinalIgnoreCase) + { + ".bzrignore", ".coveragerc", ".editorconfig", ".env", ".env.development", + ".env.production", ".env.local", ".env.test", ".eslintrc", ".gitattributes", + "thumbs.db", "desktop.ini", ".DS_Store", "npm-debug.log", "yarn-error.log", + "package-lock.json", "yarn.lock", "composer.lock", ".gitignore" + }; + + /// + /// Number of lines to check for generated code markers. + /// + public int GeneratedCodeLinesToCheck { get; init; } = 10; + + /// + /// Threshold for binary file detection (0.0 to 1.0). + /// + public double BinaryThreshold { get; init; } = 0.3; + + /// + /// Chunk size for binary file detection. + /// + public int BinaryCheckChunkSize { get; init; } = 4096; +} diff --git a/FileChecker.cs b/FileChecker.cs index dfd59d5..2056388 100644 --- a/FileChecker.cs +++ b/FileChecker.cs @@ -1,341 +1,26 @@ -using System.Text.RegularExpressions; -using CodeContext; +using CodeContext.Configuration; +using CodeContext.Services; +namespace CodeContext; + +/// +/// Legacy compatibility wrapper for FileFilterService. +/// Use FileFilterService directly for new code. +/// +[Obsolete("Use FileFilterService instead for better testability and maintainability.")] public class FileChecker { - private const long MaxFileSizeBytes = 100 * 1024; // 100KB - - private static readonly HashSet IgnoredExtensions = new(StringComparer.OrdinalIgnoreCase) - { - // Executable and library files - ".exe", ".dll", ".pdb", ".bin", ".obj", ".lib", ".so", ".dylib", ".a", ".o", - - // Image files - ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".svg", ".webp", ".tiff", ".tif", ".raw", ".psd", ".ai", - ".eps", ".ps", - - // Audio and video files - ".mp3", ".mp4", ".wav", ".avi", ".mov", ".flv", ".wmv", ".m4a", ".m4v", ".mkv", ".webm", ".ogg", - - // Compressed files - ".zip", ".rar", ".7z", ".tar", ".gz", ".bz2", ".xz", ".tgz", - - // Database files - ".db", ".sqlite", ".mdf", ".ldf", ".bak", ".mdb", ".accdb", - - // Document files - ".docx", ".xlsx", ".pptx", ".pdf", ".doc", ".xls", ".ppt", ".rtf", ".odt", ".ods", ".odp", - - // Log and temporary files - ".log", ".cache", ".tmp", ".temp", - - // Minified and source map files - ".min.js", ".min.css", ".map", ".lock", - - // Design files - ".sketch", ".fig", ".xd", - - // Deployment and settings files - ".pub", ".pubxml", ".publishsettings", ".settings", ".suo", ".user", ".userosscache", - - // Version control files - ".vspscc", ".vssscc", ".pidb", ".scc", - - // System files - ".DS_Store", ".localized", ".manifest", - - // Project-specific files - ".csproj.user", ".sln.docstates", ".suo", ".user", ".vssscc", - - // Compiler and build output - ".pdb", ".ilk", ".msi", ".idb", ".pch", ".res", - - // Font files - ".eot", ".ttf", ".woff", ".woff2", - - // 3D model files - ".fbx", ".obj", ".3ds", ".max", - - // Unity-specific files - ".unity", ".unitypackage", ".asset", - - // Certificate files - ".pfx", ".cer", ".crt", - - // Package manager files - ".nupkg", ".snupkg", - - // Java-specific files - ".class", ".jar", - - // Python-specific files - ".pyc", ".pyo", - - // Node.js-specific files - ".node", - - // Ruby-specific files - ".gem", - - // Rust-specific files - ".rlib", - - // Go-specific files - ".a", - - // Swift-specific files - ".swiftmodule", - - // Docker-specific files - ".dockerignore", - - // Kubernetes-specific files - ".kubeconfig", - - // Machine learning model files - ".h5", ".pkl", ".onnx", - - // Executable scripts (to be cautious) - ".bat", ".sh", ".cmd", ".ps1", - - // - ".sql" - }; - - private static readonly HashSet IgnoredDirectories = new(StringComparer.OrdinalIgnoreCase) - { - ".sonarqube", - - // Version control systems - ".git", ".svn", ".hg", ".bzr", ".cvs", - - // IDE and editor-specific - ".vs", ".idea", ".vscode", ".atom", ".sublime-project", - - // Build output - "bin", "obj", "Debug", "Release", "x64", "x86", "AnyCPU", - - // Package management - "packages", "node_modules", "bower_components", "jspm_packages", - - // Python-specific - "__pycache__", "venv", "env", "virtualenv", ".venv", ".env", ".pytest_cache", - - // Ruby-specific - ".bundle", "vendor/bundle", - - // Java-specific - "target", ".gradle", "build", - - // JavaScript/TypeScript-specific - "dist", "out", "build", ".next", ".nuxt", ".cache", - - // Testing and coverage - "coverage", "test-results", "reports", ".nyc_output", - - // Logs and temporary files - "logs", "temp", "tmp", ".temp", ".tmp", - - // Content and media - "uploads", "media", "static", "public", "assets", - - // Third-party and dependencies - "vendor", "third-party", "external", "lib", "libs", - - // WordPress-specific - "wp-content", "wp-includes", "wp-admin", - - // Mobile development - "Pods", "DerivedData", - - // Containerization - ".docker", - - // CI/CD - ".github", ".gitlab", ".circleci", ".jenkins", - - // Documentation - "docs", "_site", ".docusaurus", - - // Caching - ".cache", ".sass-cache", ".parcel-cache", - - // Compiled languages - "__pycache__", ".mypy_cache", ".rpt2_cache", ".rts2_cache_cjs", ".rts2_cache_es", ".rts2_cache_umd", - - // OS-specific - ".DS_Store", "Thumbs.db", - - // Dependency lock files directory - ".pnpm-store", - - // Serverless frameworks - ".serverless", - - // Terraform - ".terraform", - - // Yarn - ".yarn", - - // Expo (React Native) - ".expo", - - // Electron - "out", - - // Flutter/Dart - ".dart_tool", ".flutter-plugins", ".flutter-plugins-dependencies", - - // Kubernetes - ".kube", - - // Ansible - ".ansible", - - // Chef - ".chef", - - // Vagrant - ".vagrant", - - // Unity - "Library", "Temp", "Obj", "Builds", "Logs", - - // Unreal Engine - "Binaries", "Build", "Saved", "Intermediate", - - // Godot Engine - ".import", "export_presets.cfg", - - // R language - ".Rproj.user", ".Rhistory", ".RData", - - // Jupyter Notebooks - ".ipynb_checkpoints", - - // LaTeX - "build", "out", - - // Rust - "target", - - // Go - "vendor", - - // Elixir - "_build", ".elixir_ls", - - // Helm Charts - "charts", - - // Pipenv - ".venv" - }; - - private static readonly HashSet IgnoredFiles = new(StringComparer.OrdinalIgnoreCase) - { - ".bzrignore", ".coveragerc", ".editorconfig", ".env", ".env.development", - ".env.production", ".env.local", ".env.test", ".eslintrc", ".gitattributes", - "thumbs.db", "desktop.ini", ".DS_Store", "npm-debug.log", "yarn-error.log", - "package-lock.json", "yarn.lock", "composer.lock", ".gitignore" - }; - - private static List gitIgnorePatterns; - + private static readonly Lazy _instance = new(() => + new FileFilterService(new FilterConfiguration())); + + /// + /// Determines if a file or directory should be skipped during processing. + /// + /// The file or directory information. + /// The root path of the project being scanned. + /// True if the file/directory should be skipped; otherwise, false. public static bool ShouldSkip(FileSystemInfo info, string rootPath) { - // Check if any parent directory is in the ignored list - var relativePath = Path.GetRelativePath(rootPath, info.FullName); - var pathParts = relativePath.Split(Path.DirectorySeparatorChar); - - if (pathParts.Any(IgnoredDirectories.Contains)) - { - return true; - } - - if (info.Attributes.HasFlag(FileAttributes.Directory)) - return false; // We've already checked if it's an ignored directory - - // Check for ignored files - if (IgnoredFiles.Contains(info.Name)) - return true; - - // Improved extension checking - var fileName = info.Name; - var extension = Path.GetExtension(fileName); - if (IgnoredExtensions.Contains(extension)) - return true; - - // Check for compound extensions like .min.css - var lastDotIndex = fileName.LastIndexOf('.'); - if (lastDotIndex > 0) - { - var secondLastDotIndex = fileName.LastIndexOf('.', lastDotIndex - 1); - if (secondLastDotIndex >= 0) - { - var compoundExtension = fileName.Substring(secondLastDotIndex); - if (IgnoredExtensions.Contains(compoundExtension)) - return true; - } - } - - if (info is FileInfo fileInfo && fileInfo.Length > MaxFileSizeBytes) - return true; - - if (IsInGitRepository(rootPath)) - { - if (gitIgnorePatterns == null) - LoadGitIgnore(rootPath); - - if (IsIgnoredByGitIgnore(info.FullName, rootPath)) - return true; - } - - return FileUtils.IsBinaryFile(info.FullName) || - IgnoredDirectories.Any(dir => - info.FullName.Contains($"{Path.DirectorySeparatorChar}{dir}{Path.DirectorySeparatorChar}") || - IsGeneratedCode(info.FullName)); - } - - - private static bool IsGeneratedCode(string filePath) - { - const int linesToCheck = 10; - var lines = File.ReadLines(filePath).Take(linesToCheck); - return lines.Any(line => line.Contains("")); - } - - private static bool IsInGitRepository(string path) - { - while (!string.IsNullOrEmpty(path)) - { - if (Directory.Exists(Path.Combine(path, ".git"))) - return true; - path = Path.GetDirectoryName(path); - } - - return false; - } - - private static void LoadGitIgnore(string rootPath) - { - gitIgnorePatterns = new List(); - var gitIgnorePath = Path.Combine(rootPath, ".gitignore"); - if (File.Exists(gitIgnorePath)) - gitIgnorePatterns.AddRange(File.ReadAllLines(gitIgnorePath) - .Where(line => !string.IsNullOrWhiteSpace(line) && !line.StartsWith('#'))); - } - - private static bool IsIgnoredByGitIgnore(string filePath, string rootPath) - { - var relativePath = Path.GetRelativePath(rootPath, filePath); - return gitIgnorePatterns.Any(pattern => IsMatch(relativePath, pattern)); - } - - private static bool IsMatch(string path, string pattern) - { - pattern = pattern.Replace(".", "\\.").Replace("*", ".*").Replace("?", "."); - return Regex.IsMatch(path, $"^{pattern}$", RegexOptions.IgnoreCase); + return _instance.Value.ShouldSkip(info, rootPath); } } \ No newline at end of file diff --git a/FileUtils.cs b/FileUtils.cs index 3a9ad1f..c19af74 100644 --- a/FileUtils.cs +++ b/FileUtils.cs @@ -1,41 +1,21 @@ -namespace CodeContext; +using CodeContext.Utils; +namespace CodeContext; + +/// +/// Legacy compatibility wrapper for FileUtilities. +/// Use FileUtilities instead for new code. +/// +[Obsolete("Use FileUtilities in CodeContext.Utils namespace instead.")] public static class FileUtils { + /// + /// Determines if a file is binary based on its content. + /// + /// Path to the file to check. + /// True if the file appears to be binary; otherwise, false. public static bool IsBinaryFile(string filePath) { - const int chunkSize = 4096; - const double binaryThreshold = 0.3; - using var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read); - - if (stream.Length == 0) - return false; - - // Check for UTF-8 BOM - if (HasUtf8Bom(stream)) - return false; - - return CheckBinaryContent(stream, chunkSize, binaryThreshold); - } - - private static bool HasUtf8Bom(FileStream stream) - { - var bom = new byte[3]; - stream.Read(bom, 0, 3); - stream.Position = 0; - return bom[0] == 0xEF && bom[1] == 0xBB && bom[2] == 0xBF; - } - - private static bool CheckBinaryContent(FileStream stream, int chunkSize, double threshold) - { - var buffer = new byte[chunkSize]; - var bytesRead = stream.Read(buffer, 0, chunkSize); - var nonPrintableCount = buffer.Take(bytesRead).Count(IsBinaryByte); - return (double)nonPrintableCount / bytesRead > threshold; - } - - private static bool IsBinaryByte(byte b) - { - return b is (< 7 or > 14) and (< 32 or > 127); + return FileUtilities.IsBinaryFile(filePath); } } \ No newline at end of file diff --git a/Interfaces/IConsoleWriter.cs b/Interfaces/IConsoleWriter.cs new file mode 100644 index 0000000..d373671 --- /dev/null +++ b/Interfaces/IConsoleWriter.cs @@ -0,0 +1,25 @@ +namespace CodeContext.Interfaces; + +/// +/// Provides an abstraction for console output operations. +/// +public interface IConsoleWriter +{ + /// + /// Writes a line of text to the console. + /// + /// The message to write. + void WriteLine(string message); + + /// + /// Writes text to the console without a line break. + /// + /// The message to write. + void Write(string message); + + /// + /// Reads a line of input from the console. + /// + /// The input string. + string? ReadLine(); +} diff --git a/Interfaces/IFileChecker.cs b/Interfaces/IFileChecker.cs new file mode 100644 index 0000000..01714fa --- /dev/null +++ b/Interfaces/IFileChecker.cs @@ -0,0 +1,15 @@ +namespace CodeContext.Interfaces; + +/// +/// Provides functionality to determine if files or directories should be skipped during processing. +/// +public interface IFileChecker +{ + /// + /// Determines if a file or directory should be skipped during processing. + /// + /// The file or directory information. + /// The root path of the project being scanned. + /// True if the file/directory should be skipped; otherwise, false. + bool ShouldSkip(FileSystemInfo info, string rootPath); +} diff --git a/MyAppsContext.cs b/MyAppsContext.cs index d79cf25..6b6a40b 100644 --- a/MyAppsContext.cs +++ b/MyAppsContext.cs @@ -1,79 +1,56 @@ -using System.Text; +using CodeContext.Configuration; +using CodeContext.Services; namespace CodeContext; + +/// +/// Legacy compatibility wrapper for ProjectScanner. +/// Use ProjectScanner directly for new code. +/// +[Obsolete("Use ProjectScanner instead for better testability and maintainability.")] public class MyAppsContext { - public static string GitRepoRoot { get; private set; } - + private static readonly Lazy _instance = new(() => + { + var config = new FilterConfiguration(); + var fileChecker = new FileFilterService(config); + var console = new ConsoleWriter(); + return new ProjectScanner(fileChecker, console); + }); + + /// + /// Gets the git repository root path. + /// + public static string? GitRepoRoot => _instance.Value.GitRepoRoot; + + /// + /// Gets user input with a prompt. + /// + /// The prompt to display. + /// The user's input. public static string GetUserInput(string prompt) { - Console.Write(prompt); - return Console.ReadLine(); + return _instance.Value.GetUserInput(prompt); } + /// + /// Generates a hierarchical structure representation of the project directory. + /// + /// The directory path to scan. + /// Current indentation level (used for recursion). + /// A string representation of the directory structure. public static string GetProjectStructure(string path, int indent = 0) { - if (GitRepoRoot == null) GitRepoRoot = FindGitRepoRoot(path); - if (indent == 0) Console.WriteLine("📁 Analyzing directory structure..."); - - var entries = Directory.EnumerateFileSystemEntries(path) - .OrderBy(e => e) - .Where(e => GitRepoRoot == null || !FileChecker.ShouldSkip(new FileInfo(e), GitRepoRoot)) - .ToList(); - - var sb = new StringBuilder(); - - // Process all entries - for (int i = 0; i < entries.Count; i++) - { - WriteProgress(i + 1, entries.Count); - var entry = entries[i]; - - if (Directory.Exists(entry)) - { - var dir = new DirectoryInfo(entry); - sb.AppendLine($"{new string(' ', indent * 2)}[{dir.Name}/]") - .Append(GetProjectStructure(entry, indent + 1)); - } - else - { - var file = new FileInfo(entry); - sb.AppendLine($"{new string(' ', indent * 2)}[{file.Extension}] {file.Name}"); - } - } - - return sb.ToString(); + return _instance.Value.GetProjectStructure(path, indent); } + /// + /// Retrieves the contents of all non-filtered files in the directory tree. + /// + /// The directory path to scan. + /// A string containing all file contents with separators. public static string GetFileContents(string path) { - if (GitRepoRoot == null) GitRepoRoot = FindGitRepoRoot(path); - Console.WriteLine("\n📄 Processing files..."); - - var files = Directory.EnumerateFiles(path, "*", SearchOption.AllDirectories) - .Where(f => !FileChecker.ShouldSkip(new FileInfo(f), GitRepoRoot)) - .ToList(); - - return string.Join("\n\n", files.Select((f, i) => - { - WriteProgress(i + 1, files.Count); - return $"{f}\n{new string('-', 100)}\n{File.ReadAllText(f)}"; - })); - } - - private static string FindGitRepoRoot(string path) - { - if (string.IsNullOrEmpty(path) || !Directory.Exists(path)) - return null; - - return Directory.Exists(Path.Combine(path, ".git")) - ? path - : string.IsNullOrEmpty(path) ? null : FindGitRepoRoot(Path.GetDirectoryName(path)); - } - - private static void WriteProgress(int current, int total) - { - var percent = (int)((current / (double)total) * 100); - Console.Write($"\r⏳ Progress: {percent}% ({current}/{total})"); + return _instance.Value.GetFileContents(path); } } \ No newline at end of file diff --git a/Program.cs b/Program.cs index ccb3102..b5862bc 100644 --- a/Program.cs +++ b/Program.cs @@ -1,4 +1,4 @@ -using System.Diagnostics; +using System.Diagnostics; using System.Text; using System.Text.Json; using CodeContext; @@ -10,82 +10,143 @@ var config = LoadConfig(); var path = GetValidPath(args.FirstOrDefault() ?? config.DefaultInputPath); - // 1. Get input folder name - var inputFolderName = new DirectoryInfo(path).Name; - if (string.IsNullOrEmpty(inputFolderName) || inputFolderName == ".") // Handle cases like "." or "C:\" - { - // For "." use current directory name, for root drives, use a generic name or drive letter - inputFolderName = new DirectoryInfo(Environment.CurrentDirectory).Name; - if (path.EndsWith(Path.DirectorySeparatorChar.ToString()) || path.EndsWith(Path.AltDirectorySeparatorChar.ToString())) - { - // If path was like "C:/", DirectoryInfo(path).Name might be "C:". - // Let's try to get a more descriptive name if it's a root drive. - var root = Path.GetPathRoot(Path.GetFullPath(path)); - if (!string.IsNullOrEmpty(root)) - { - inputFolderName = root.Replace(Path.DirectorySeparatorChar.ToString(), "").Replace(Path.AltDirectorySeparatorChar.ToString(), "").Replace(":", ""); - if (string.IsNullOrEmpty(inputFolderName)) inputFolderName = "root"; - } - } - } - - - // 2. Construct prefixed default file name + var inputFolderName = GetInputFolderName(path); var prefixedDefaultFileName = $"{inputFolderName}_{config.DefaultOutputFileName}"; - - // 3. Default output is INSIDE the input path folder with the prefixed name var defaultFullOutputPath = Path.Combine(path, prefixedDefaultFileName); - - // 4. Get final output path (could be a file or directory specified by user, or the default) var outputTarget = GetValidOutputPath(args.ElementAtOrDefault(1), defaultFullOutputPath); var sw = Stopwatch.StartNew(); var content = BuildContent(path, config); var stats = CalculateStats(path, content, sw.Elapsed); - // 5. Pass prefixedDefaultFileName to WriteOutput string actualOutputPath = WriteOutput(outputTarget, content, config.OutputFormat, prefixedDefaultFileName); - Console.WriteLine($"\n✅ Output written to {actualOutputPath}"); // 6. Use actual output path + Console.WriteLine($"\n✅ Output written to {actualOutputPath}"); Console.WriteLine(stats); } -catch (Exception ex) +catch (DirectoryNotFoundException ex) { - Console.WriteLine($"❌ Error: {ex.Message}"); + Console.WriteLine($"❌ Directory Error: {ex.Message}"); Environment.Exit(1); } +catch (IOException ex) +{ + Console.WriteLine($"❌ I/O Error: {ex.Message}"); + Environment.Exit(2); +} +catch (UnauthorizedAccessException ex) +{ + Console.WriteLine($"❌ Access Denied: {ex.Message}"); + Environment.Exit(3); +} +catch (Exception ex) +{ + Console.WriteLine($"❌ Unexpected Error: {ex.Message}"); + if (ex.InnerException != null) + { + Console.WriteLine($" Details: {ex.InnerException.Message}"); + } + Environment.Exit(4); +} -static Config LoadConfig() => - JsonSerializer.Deserialize( - File.Exists("config.json") ? File.ReadAllText("config.json") : "{}" - ) ?? new(); +/// +/// Loads configuration from config.json file if it exists, otherwise returns default configuration. +/// +/// The loaded or default configuration. +static Config LoadConfig() +{ + try + { + var configJson = File.Exists("config.json") ? File.ReadAllText("config.json") : "{}"; + return JsonSerializer.Deserialize(configJson) ?? new Config(); + } + catch (JsonException ex) + { + Console.WriteLine($"⚠️ Warning: Invalid config.json format ({ex.Message}). Using defaults."); + return new Config(); + } +} +/// +/// Gets and validates the directory path to be indexed. +/// +/// The default path to use if user doesn't provide one. +/// The validated full path. +/// Thrown when the specified directory doesn't exist. static string GetValidPath(string defaultPath) { var path = MyAppsContext.GetUserInput($"Enter the path to index (default: {defaultPath}): "); var finalPath = string.IsNullOrWhiteSpace(path) ? defaultPath : path; - var fullPath = Path.GetFullPath(finalPath); // Resolve to full path for consistency + var fullPath = Path.GetFullPath(finalPath); - return Directory.Exists(fullPath) - ? fullPath - : throw new DirectoryNotFoundException($"Invalid directory path: {fullPath}"); + if (!Directory.Exists(fullPath)) + { + throw new DirectoryNotFoundException($"Directory not found: {fullPath}"); + } + + return fullPath; } -// Modified to accept user's argument and the fully resolved default path +/// +/// Gets and validates the output path for the generated context file. +/// +/// Optional output path from command-line arguments. +/// Default output path if none provided. +/// The validated full output path. static string GetValidOutputPath(string? outputArgFromUser, string defaultFullOutputPathIfNoArgAndNoInput) { - // If an argument is provided, use it directly. if (!string.IsNullOrWhiteSpace(outputArgFromUser)) { - return Path.GetFullPath(outputArgFromUser); // Resolve to full path + return Path.GetFullPath(outputArgFromUser); } - // Otherwise, prompt the user, showing the calculated default. + var userInput = MyAppsContext.GetUserInput($"Enter output file/directory (default: {defaultFullOutputPathIfNoArgAndNoInput}): "); return string.IsNullOrWhiteSpace(userInput) ? defaultFullOutputPathIfNoArgAndNoInput - : Path.GetFullPath(userInput); // Resolve to full path + : Path.GetFullPath(userInput); } +/// +/// Extracts a clean folder name from the input path for output file naming. +/// +/// The input path. +/// A sanitized folder name. +static string GetInputFolderName(string path) +{ + var inputFolderName = new DirectoryInfo(path).Name; + + if (string.IsNullOrEmpty(inputFolderName) || inputFolderName == ".") + { + inputFolderName = new DirectoryInfo(Environment.CurrentDirectory).Name; + + if (path.EndsWith(Path.DirectorySeparatorChar.ToString()) || + path.EndsWith(Path.AltDirectorySeparatorChar.ToString())) + { + var root = Path.GetPathRoot(Path.GetFullPath(path)); + if (!string.IsNullOrEmpty(root)) + { + inputFolderName = root + .Replace(Path.DirectorySeparatorChar.ToString(), "") + .Replace(Path.AltDirectorySeparatorChar.ToString(), "") + .Replace(":", ""); + + if (string.IsNullOrEmpty(inputFolderName)) + { + inputFolderName = "root"; + } + } + } + } + + return inputFolderName; +} +/// +/// Builds the complete content output including structure and file contents based on configuration. +/// +/// The directory path to process. +/// The configuration specifying what to include. +/// The complete output content. +/// Thrown when an error occurs during processing. static string BuildContent(string path, Config config) { try @@ -112,25 +173,51 @@ static string BuildContent(string path, Config config) } } -static string CalculateStats(string path, string content, TimeSpan timeTaken) => - $""" +/// +/// Calculates and formats statistics about the processing operation. +/// +/// The directory that was processed. +/// The generated content. +/// Time elapsed during processing. +/// Formatted statistics string. +static string CalculateStats(string path, string content, TimeSpan timeTaken) +{ + try + { + var fileCount = Directory.GetFiles(path, "*", SearchOption.AllDirectories).Length; + var lineCount = content.Count(c => c == '\n'); + + return $""" - 📊 Stats: - 📁 Files processed: {Directory.GetFiles(path, "*", SearchOption.AllDirectories).Length} - 📝 Total lines: {content.Count(c => c == '\n')} - ⏱️ Time taken: {timeTaken.TotalSeconds:F2}s - 💾 Output size: {content.Length} characters - """; + 📊 Stats: + 📁 Files processed: {fileCount} + 📝 Total lines: {lineCount} + ⏱️ Time taken: {timeTaken.TotalSeconds:F2}s + 💾 Output size: {content.Length} characters + """; + } + catch (Exception) + { + return "\n📊 Stats: Unable to calculate statistics"; + } +} -// Modified to accept the effective output filename and return the actual path written +/// +/// Writes the generated content to the specified output location. +/// +/// Target path (file or directory). +/// Content to write. +/// Output format (text or json). +/// Filename to use if outputTarget is a directory. +/// The actual path where the file was written. +/// Thrown when an error occurs during file writing. static string WriteOutput(string outputTarget, string content, string format, string effectiveOutputFileName) { Console.WriteLine("\n💾 Writing output..."); - string resolvedFilePath = ""; + string resolvedFilePath; + try { - // If outputTarget is an existing directory, combine it with the effectiveOutputFileName. - // Otherwise, assume outputTarget is the full file path. if (Directory.Exists(outputTarget)) { resolvedFilePath = Path.Combine(outputTarget, effectiveOutputFileName); @@ -138,8 +225,8 @@ static string WriteOutput(string outputTarget, string content, string format, st else { resolvedFilePath = outputTarget; - // Ensure the directory for the output file exists var outputDirectory = Path.GetDirectoryName(resolvedFilePath); + if (!string.IsNullOrEmpty(outputDirectory) && !Directory.Exists(outputDirectory)) { Directory.CreateDirectory(outputDirectory); @@ -147,24 +234,35 @@ static string WriteOutput(string outputTarget, string content, string format, st } var formattedContent = format.ToLower() == "json" - ? JsonSerializer.Serialize(new { content, timestamp = DateTime.Now }) + ? JsonSerializer.Serialize(new { content, timestamp = DateTime.Now }, new JsonSerializerOptions { WriteIndented = true }) : content; + File.WriteAllText(resolvedFilePath, formattedContent); - return resolvedFilePath; // Return the actual path + return resolvedFilePath; + } + catch (UnauthorizedAccessException ex) + { + throw new IOException($"Access denied writing to {outputTarget}", ex); } catch (Exception ex) { - // Try to provide a more specific path in the error if resolvedFilePath was determined - string errorPath = string.IsNullOrEmpty(resolvedFilePath) ? outputTarget : resolvedFilePath; - throw new IOException($"Error writing output to {errorPath}", ex); + throw new IOException($"Failed to write output to {outputTarget}", ex); } } +/// +/// Application configuration record. +/// +/// Default directory path to scan. +/// Default output file name. +/// Output format (text or json). +/// Whether to include directory structure in output. +/// Whether to include file contents in output. record Config { public string DefaultInputPath { get; init; } = "."; - public string DefaultOutputFileName { get; init; } = "context.txt"; // Base name + public string DefaultOutputFileName { get; init; } = "context.txt"; public string OutputFormat { get; init; } = "text"; public bool IncludeStructure { get; init; } = true; public bool IncludeContents { get; init; } = true; -} \ No newline at end of file +} diff --git a/README.md b/README.md index 7b4b79d..9734fb7 100644 --- a/README.md +++ b/README.md @@ -1,78 +1,166 @@ # CodeContext -CodeContext is an app for Mac & Windows to provide code context to Language Learning Models (LLMs). +CodeContext is a cross-platform CLI tool for Mac, Windows, and Linux that provides code context to Language Learning Models (LLMs). It scans project directories, generates a structured representation of the project, and extracts relevant file contents while intelligently filtering out unnecessary files and directories. ![screenshot](https://github.com/DavidVeksler/CodeContext/blob/master/screenshot.png?raw=true) -Update: a more comprehensive tool is [code2prompt](https://github.com/mufeedvh/code2prompt). -I found that CodeContext is more user friendly, faster, and automatically includes only user code (based on both extension and file contents), but you may have better luck. +Update: A more comprehensive tool is [code2prompt](https://github.com/mufeedvh/code2prompt). +I found that CodeContext is more user-friendly, faster, and automatically includes only user code (based on both extension and file contents), but you may have better luck with alternatives. ## Features -- Generates a hierarchical project structure -- Extracts contents of relevant files -- Intelligent file and directory filtering -- Git-aware: respects .gitignore rules -- Handles binary files and large files -- Excludes generated code -- Customizable ignored extensions, directories, and files +- **Hierarchical Project Structure**: Generates a clear tree view of your project +- **Smart Content Extraction**: Extracts contents of relevant source files +- **Intelligent Filtering**: Automatically filters out binaries, dependencies, build outputs, and more +- **Git-Aware**: Respects .gitignore rules +- **Binary File Detection**: Automatically detects and skips binary files +- **Generated Code Detection**: Excludes auto-generated code +- **Highly Customizable**: Configure ignored extensions, directories, and file size limits +- **Multiple Output Formats**: Supports plain text and JSON output +- **Well-Architected**: Clean separation of concerns with interfaces for testability + +## Architecture + +The project follows SOLID principles with a modular architecture: + +- **`Configuration/`**: Filter configuration settings +- **`Interfaces/`**: Abstraction interfaces (IFileChecker, IConsoleWriter) +- **`Services/`**: Core business logic (FileFilterService, ProjectScanner, GitIgnoreParser) +- **`Utils/`**: Utility functions (FileUtilities) + +This design makes the codebase maintainable, testable, and extensible. ## Getting Started ### Prerequisites -- .NET 6.0 or later +- .NET 9.0 or later #### macOS -1. Install .NET SDK if you haven't already: +Install .NET SDK if you haven't already: +```bash brew install --cask dotnet-sdk +``` -2. Clone the repository: -git clone https://github.com/yourusername/CodeContext.git +#### Windows -3. Navigate to the project directory: -cd CodeContext +Download and install the [.NET 9 SDK](https://dotnet.microsoft.com/download/dotnet/9.0) -4. Build the project: -dotnet build +#### Linux + +Follow the [official .NET installation guide](https://learn.microsoft.com/en-us/dotnet/core/install/linux) for your distribution. ### Installation 1. Clone the repository: -git clone https://github.com/DavidVeksler/CodeContext/CodeContext.git - -2. Navigate to the project directory: +```bash +git clone https://github.com/DavidVeksler/CodeContext.git cd CodeContext +``` -3. Build the project: +2. Build the project: +```bash dotnet build +``` + +3. (Optional) Publish for your platform: +```bash +# Self-contained executable +dotnet publish -c Release -r win-x64 --self-contained # Windows +dotnet publish -c Release -r osx-x64 --self-contained # macOS +dotnet publish -c Release -r linux-x64 --self-contained # Linux +``` -1. ### Usage Run the application with: +```bash dotnet run [path_to_index] [output_file] +``` +Arguments: - `path_to_index`: The directory to analyze (optional, will prompt if not provided) -- `output_file`: The file to write the output (optional, defaults to `context.txt` in the parent directory of the indexed path) +- `output_file`: The file to write the output (optional, defaults to `{foldername}_context.txt` in the indexed directory) + +If no arguments are provided, the application will prompt for input interactively. + +### Example + +```bash +# Interactive mode +dotnet run + +# With arguments +dotnet run ./MyProject ./output/context.txt -If no arguments are provided, the application will prompt for input. +# Using published executable +./CodeContext ./MyProject ./output/context.txt +``` ## Configuration -Customize ignored files, directories, and extensions by modifying the `FileChecker` class: +Create a `config.json` file in the application directory to customize settings: -- `IgnoredExtensions`: File extensions to ignore -- `IgnoredDirectories`: Directories to ignore -- `IgnoredFiles`: Specific files to ignore -- `MaxFileSizeBytes`: Maximum file size to process (default: 100KB) +```json +{ + "DefaultInputPath": ".", + "DefaultOutputFileName": "context.txt", + "OutputFormat": "text", + "IncludeStructure": true, + "IncludeContents": true +} +``` + +### Advanced Configuration + +Customize filtering behavior by modifying the `FilterConfiguration` class: + +- **`IgnoredExtensions`**: File extensions to ignore (e.g., `.exe`, `.dll`, `.png`) +- **`IgnoredDirectories`**: Directories to ignore (e.g., `node_modules`, `bin`, `obj`) +- **`IgnoredFiles`**: Specific files to ignore (e.g., `.gitignore`, `package-lock.json`) +- **`MaxFileSizeBytes`**: Maximum file size to process (default: 100KB) +- **`BinaryThreshold`**: Threshold for binary file detection (default: 0.3) + +## Output Formats + +### Text Format (default) +Plain text output with file paths, separators, and content. + +### JSON Format +Structured JSON with content and timestamp: +```json +{ + "content": "...", + "timestamp": "2025-11-21T10:30:00" +} +``` + +## Error Handling + +The application provides clear error messages with appropriate exit codes: +- `1`: Directory not found +- `2`: I/O error +- `3`: Access denied +- `4`: Unexpected error ## Contributing Contributions are welcome! Please feel free to submit a Pull Request. +### Development + +The codebase uses: +- **C# 12** with modern language features +- **Nullable reference types** for better null safety +- **XML documentation comments** on all public APIs +- **Dependency injection** patterns for testability + ## License -This project is licensed under the MIT License - see the [LICENSE.txt](LICENSE) file for details. +This project is licensed under the MIT License - see the [LICENSE.txt](LICENSE.txt) file for details. + +## Acknowledgments + +Built with ❤️ for the developer community to make working with LLMs more efficient. diff --git a/Services/ConsoleWriter.cs b/Services/ConsoleWriter.cs new file mode 100644 index 0000000..cf663f7 --- /dev/null +++ b/Services/ConsoleWriter.cs @@ -0,0 +1,18 @@ +using CodeContext.Interfaces; + +namespace CodeContext.Services; + +/// +/// Standard console implementation of IConsoleWriter. +/// +public class ConsoleWriter : IConsoleWriter +{ + /// + public void WriteLine(string message) => Console.WriteLine(message); + + /// + public void Write(string message) => Console.Write(message); + + /// + public string? ReadLine() => Console.ReadLine(); +} diff --git a/Services/FileFilterService.cs b/Services/FileFilterService.cs new file mode 100644 index 0000000..76304c6 --- /dev/null +++ b/Services/FileFilterService.cs @@ -0,0 +1,175 @@ +using CodeContext.Configuration; +using CodeContext.Interfaces; +using CodeContext.Utils; + +namespace CodeContext.Services; + +/// +/// Service for determining if files and directories should be filtered out during processing. +/// +public class FileFilterService : IFileChecker +{ + private readonly FilterConfiguration _config; + private readonly GitIgnoreParser _gitIgnoreParser; + private bool _gitIgnoreLoaded; + + /// + /// Initializes a new instance of the FileFilterService class. + /// + /// The filter configuration to use. + public FileFilterService(FilterConfiguration config) + { + _config = config ?? throw new ArgumentNullException(nameof(config)); + _gitIgnoreParser = new GitIgnoreParser(); + } + + /// + public bool ShouldSkip(FileSystemInfo info, string rootPath) + { + if (info == null) + { + throw new ArgumentNullException(nameof(info)); + } + + if (string.IsNullOrEmpty(rootPath)) + { + throw new ArgumentException("Root path cannot be null or empty.", nameof(rootPath)); + } + + // Check if any parent directory is in the ignored list + var relativePath = Path.GetRelativePath(rootPath, info.FullName); + var pathParts = relativePath.Split(Path.DirectorySeparatorChar); + + if (pathParts.Any(_config.IgnoredDirectories.Contains)) + { + return true; + } + + if (info.Attributes.HasFlag(FileAttributes.Directory)) + { + return false; // We've already checked if it's an ignored directory + } + + // Check for ignored files + if (_config.IgnoredFiles.Contains(info.Name)) + { + return true; + } + + // Check file extension + if (ShouldSkipByExtension(info.Name)) + { + return true; + } + + // Check file size + if (info is FileInfo fileInfo && fileInfo.Length > _config.MaxFileSizeBytes) + { + return true; + } + + // Check gitignore patterns + if (ShouldSkipByGitIgnore(info.FullName, rootPath)) + { + return true; + } + + // Check if binary + if (FileUtilities.IsBinaryFile(info.FullName, _config.BinaryCheckChunkSize, _config.BinaryThreshold)) + { + return true; + } + + // Check for generated code + if (IsGeneratedCode(info.FullName)) + { + return true; + } + + return false; + } + + private bool ShouldSkipByExtension(string fileName) + { + var extension = Path.GetExtension(fileName); + if (_config.IgnoredExtensions.Contains(extension)) + { + return true; + } + + // Check for compound extensions like .min.css + var lastDotIndex = fileName.LastIndexOf('.'); + if (lastDotIndex > 0) + { + var secondLastDotIndex = fileName.LastIndexOf('.', lastDotIndex - 1); + if (secondLastDotIndex >= 0) + { + var compoundExtension = fileName.Substring(secondLastDotIndex); + if (_config.IgnoredExtensions.Contains(compoundExtension)) + { + return true; + } + } + } + + return false; + } + + private bool ShouldSkipByGitIgnore(string filePath, string rootPath) + { + if (!IsInGitRepository(rootPath)) + { + return false; + } + + if (!_gitIgnoreLoaded) + { + var gitIgnorePath = Path.Combine(FindGitRepoRoot(rootPath) ?? rootPath, ".gitignore"); + _gitIgnoreParser.LoadFromFile(gitIgnorePath); + _gitIgnoreLoaded = true; + } + + if (!_gitIgnoreParser.HasPatterns) + { + return false; + } + + var gitRoot = FindGitRepoRoot(rootPath) ?? rootPath; + var relativePath = Path.GetRelativePath(gitRoot, filePath); + return _gitIgnoreParser.IsIgnored(relativePath); + } + + private bool IsGeneratedCode(string filePath) + { + try + { + var lines = File.ReadLines(filePath).Take(_config.GeneratedCodeLinesToCheck); + return lines.Any(line => line.Contains("")); + } + catch (Exception) + { + // If we can't read the file, assume it's not generated code + return false; + } + } + + private static bool IsInGitRepository(string path) + { + return FindGitRepoRoot(path) != null; + } + + private static string? FindGitRepoRoot(string path) + { + var currentPath = path; + while (!string.IsNullOrEmpty(currentPath)) + { + if (Directory.Exists(Path.Combine(currentPath, ".git"))) + { + return currentPath; + } + currentPath = Path.GetDirectoryName(currentPath); + } + + return null; + } +} diff --git a/Services/GitIgnoreParser.cs b/Services/GitIgnoreParser.cs new file mode 100644 index 0000000..a361d84 --- /dev/null +++ b/Services/GitIgnoreParser.cs @@ -0,0 +1,68 @@ +using System.Text.RegularExpressions; + +namespace CodeContext.Services; + +/// +/// Handles parsing and matching of .gitignore patterns. +/// +public class GitIgnoreParser +{ + private readonly List _patterns = new(); + private readonly Dictionary _regexCache = new(); + + /// + /// Loads .gitignore patterns from a file. + /// + /// Path to the .gitignore file. + public void LoadFromFile(string gitIgnorePath) + { + if (!File.Exists(gitIgnorePath)) + { + return; + } + + _patterns.Clear(); + _regexCache.Clear(); + + var lines = File.ReadAllLines(gitIgnorePath) + .Where(line => !string.IsNullOrWhiteSpace(line) && !line.StartsWith('#')); + + _patterns.AddRange(lines); + } + + /// + /// Checks if a relative path matches any loaded gitignore patterns. + /// + /// The relative path to check. + /// True if the path should be ignored; otherwise, false. + public bool IsIgnored(string relativePath) + { + return _patterns.Any(pattern => IsMatch(relativePath, pattern)); + } + + /// + /// Checks if there are any loaded patterns. + /// + public bool HasPatterns => _patterns.Count > 0; + + private bool IsMatch(string path, string pattern) + { + if (!_regexCache.TryGetValue(pattern, out var regex)) + { + var regexPattern = ConvertGitIgnorePatternToRegex(pattern); + regex = new Regex($"^{regexPattern}$", RegexOptions.IgnoreCase | RegexOptions.Compiled); + _regexCache[pattern] = regex; + } + + return regex.IsMatch(path); + } + + private static string ConvertGitIgnorePatternToRegex(string pattern) + { + // Simple conversion - could be enhanced for full gitignore spec + return pattern + .Replace(".", "\\.") + .Replace("*", ".*") + .Replace("?", "."); + } +} diff --git a/Services/ProjectScanner.cs b/Services/ProjectScanner.cs new file mode 100644 index 0000000..8041162 --- /dev/null +++ b/Services/ProjectScanner.cs @@ -0,0 +1,166 @@ +using System.Text; +using CodeContext.Interfaces; + +namespace CodeContext.Services; + +/// +/// Service for scanning and analyzing project directories. +/// +public class ProjectScanner +{ + private readonly IFileChecker _fileChecker; + private readonly IConsoleWriter _console; + private string? _gitRepoRoot; + + /// + /// Initializes a new instance of the ProjectScanner class. + /// + /// The file checker to use for filtering. + /// The console writer for output. + public ProjectScanner(IFileChecker fileChecker, IConsoleWriter console) + { + _fileChecker = fileChecker ?? throw new ArgumentNullException(nameof(fileChecker)); + _console = console ?? throw new ArgumentNullException(nameof(console)); + } + + /// + /// Gets user input with a prompt. + /// + /// The prompt to display. + /// The user's input. + public string GetUserInput(string prompt) + { + _console.Write(prompt); + return _console.ReadLine() ?? string.Empty; + } + + /// + /// Generates a hierarchical structure representation of the project directory. + /// + /// The directory path to scan. + /// Current indentation level (used for recursion). + /// A string representation of the directory structure. + public string GetProjectStructure(string path, int indent = 0) + { + if (string.IsNullOrEmpty(path)) + { + throw new ArgumentException("Path cannot be null or empty.", nameof(path)); + } + + if (!Directory.Exists(path)) + { + throw new DirectoryNotFoundException($"Directory not found: {path}"); + } + + _gitRepoRoot ??= FindGitRepoRoot(path); + + if (indent == 0) + { + _console.WriteLine("📁 Analyzing directory structure..."); + } + + var rootPath = _gitRepoRoot ?? path; + var entries = Directory.EnumerateFileSystemEntries(path) + .OrderBy(e => e) + .Where(e => !_fileChecker.ShouldSkip(new FileInfo(e), rootPath)) + .ToList(); + + var sb = new StringBuilder(); + + for (int i = 0; i < entries.Count; i++) + { + WriteProgress(i + 1, entries.Count); + var entry = entries[i]; + + if (Directory.Exists(entry)) + { + var dir = new DirectoryInfo(entry); + sb.AppendLine($"{new string(' ', indent * 2)}[{dir.Name}/]"); + sb.Append(GetProjectStructure(entry, indent + 1)); + } + else + { + var file = new FileInfo(entry); + sb.AppendLine($"{new string(' ', indent * 2)}[{file.Extension}] {file.Name}"); + } + } + + return sb.ToString(); + } + + /// + /// Retrieves the contents of all non-filtered files in the directory tree. + /// + /// The directory path to scan. + /// A string containing all file contents with separators. + public string GetFileContents(string path) + { + if (string.IsNullOrEmpty(path)) + { + throw new ArgumentException("Path cannot be null or empty.", nameof(path)); + } + + if (!Directory.Exists(path)) + { + throw new DirectoryNotFoundException($"Directory not found: {path}"); + } + + _gitRepoRoot ??= FindGitRepoRoot(path); + _console.WriteLine("\n📄 Processing files..."); + + var rootPath = _gitRepoRoot ?? path; + var files = Directory.EnumerateFiles(path, "*", SearchOption.AllDirectories) + .Where(f => !_fileChecker.ShouldSkip(new FileInfo(f), rootPath)) + .ToList(); + + var results = new List(); + for (int i = 0; i < files.Count; i++) + { + WriteProgress(i + 1, files.Count); + var file = files[i]; + + try + { + var content = File.ReadAllText(file); + results.Add($"{file}\n{new string('-', 100)}\n{content}"); + } + catch (Exception ex) + { + _console.WriteLine($"\n⚠️ Warning: Could not read file {file}: {ex.Message}"); + } + } + + return string.Join("\n\n", results); + } + + /// + /// Gets the root path of the git repository containing the specified path. + /// + public string? GitRepoRoot => _gitRepoRoot; + + private string? FindGitRepoRoot(string path) + { + if (string.IsNullOrEmpty(path) || !Directory.Exists(path)) + { + return null; + } + + var currentPath = path; + while (!string.IsNullOrEmpty(currentPath)) + { + if (Directory.Exists(Path.Combine(currentPath, ".git"))) + { + return currentPath; + } + currentPath = Path.GetDirectoryName(currentPath); + } + + return null; + } + + private void WriteProgress(int current, int total) + { + var percent = (int)((current / (double)total) * 100); + _console.Write($"\r⏳ Progress: {percent}% ({current}/{total})"); + } +} diff --git a/Utils/FileUtilities.cs b/Utils/FileUtilities.cs new file mode 100644 index 0000000..d4f0671 --- /dev/null +++ b/Utils/FileUtilities.cs @@ -0,0 +1,82 @@ +namespace CodeContext.Utils; + +/// +/// Utility methods for file operations. +/// +public static class FileUtilities +{ + /// + /// Determines if a file is binary based on its content. + /// + /// Path to the file to check. + /// Number of bytes to read for analysis. + /// Threshold ratio (0.0-1.0) of non-printable bytes to consider a file binary. + /// True if the file appears to be binary; otherwise, false. + public static bool IsBinaryFile(string filePath, int chunkSize = 4096, double binaryThreshold = 0.3) + { + if (string.IsNullOrEmpty(filePath)) + { + throw new ArgumentException("File path cannot be null or empty.", nameof(filePath)); + } + + if (!File.Exists(filePath)) + { + return false; + } + + try + { + using var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); + + if (stream.Length == 0) + { + return false; + } + + // Check for UTF-8 BOM + if (HasUtf8Bom(stream)) + { + return false; + } + + return CheckBinaryContent(stream, chunkSize, binaryThreshold); + } + catch (Exception) + { + // If we can't read the file (permissions, etc.), assume it's not binary + return false; + } + } + + private static bool HasUtf8Bom(FileStream stream) + { + if (stream.Length < 3) + { + return false; + } + + var bom = new byte[3]; + stream.Read(bom, 0, 3); + stream.Position = 0; + return bom[0] == 0xEF && bom[1] == 0xBB && bom[2] == 0xBF; + } + + private static bool CheckBinaryContent(FileStream stream, int chunkSize, double threshold) + { + var buffer = new byte[chunkSize]; + var bytesRead = stream.Read(buffer, 0, chunkSize); + + if (bytesRead == 0) + { + return false; + } + + var nonPrintableCount = buffer.Take(bytesRead).Count(IsBinaryByte); + return (double)nonPrintableCount / bytesRead > threshold; + } + + private static bool IsBinaryByte(byte b) + { + return b is (< 7 or > 14) and (< 32 or > 127); + } +}