diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index ccf902c..155bba0 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -20,17 +20,15 @@ jobs:
- name: Setup .NET
uses: actions/setup-dotnet@v4
with:
- dotnet-version: 8.0.x
+ dotnet-version: 9.0.x
- name: Restore dependencies
run: dotnet restore
- name: Build
run: dotnet build --no-restore --configuration Release
- - name: Test
- run: dotnet test --no-build --verbosity normal
- name: Publish
run: dotnet publish --configuration Release --output ./publish --runtime ${{ matrix.os == 'ubuntu-latest' && 'linux-x64' || matrix.os == 'windows-latest' && 'win-x64' || 'osx-x64' }} --self-contained true
- name: Archive production artifacts
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
with:
name: dist-${{ matrix.os }}
path: publish
@@ -42,30 +40,18 @@ jobs:
permissions:
contents: write
steps:
- - name: Create Release
- id: create_release
- uses: actions/create-release@v1
- env:
- GITHUB_TOKEN: ${{ secrets.RELEASE_TOKEN }}
- with:
- tag_name: v${{ github.run_number }}
- release_name: Release ${{ github.run_number }}
- body: |
- Automated release for commit ${{ github.sha }}
- draft: false
- prerelease: false
- name: Download Linux artifact
- uses: actions/download-artifact@v3
+ uses: actions/download-artifact@v4
with:
name: dist-ubuntu-latest
path: dist-linux
- name: Download Windows artifact
- uses: actions/download-artifact@v3
+ uses: actions/download-artifact@v4
with:
name: dist-windows-latest
path: dist-windows
- name: Download macOS artifact
- uses: actions/download-artifact@v3
+ uses: actions/download-artifact@v4
with:
name: dist-macos-latest
path: dist-macos
@@ -74,30 +60,18 @@ jobs:
zip -r release-linux.zip dist-linux
zip -r release-windows.zip dist-windows
zip -r release-macos.zip dist-macos
- - name: Upload Linux Release Asset
- uses: actions/upload-release-asset@v1
+ - name: Create Release and Upload Assets
+ uses: softprops/action-gh-release@v2
env:
GITHUB_TOKEN: ${{ secrets.RELEASE_TOKEN }}
with:
- upload_url: ${{ steps.create_release.outputs.upload_url }}
- asset_path: ./release-linux.zip
- asset_name: release-linux.zip
- asset_content_type: application/zip
- - name: Upload Windows Release Asset
- uses: actions/upload-release-asset@v1
- env:
- GITHUB_TOKEN: ${{ secrets.RELEASE_TOKEN }}
- with:
- upload_url: ${{ steps.create_release.outputs.upload_url }}
- asset_path: ./release-windows.zip
- asset_name: release-windows.zip
- asset_content_type: application/zip
- - name: Upload macOS Release Asset
- uses: actions/upload-release-asset@v1
- env:
- GITHUB_TOKEN: ${{ secrets.RELEASE_TOKEN }}
- with:
- upload_url: ${{ steps.create_release.outputs.upload_url }}
- asset_path: ./release-macos.zip
- asset_name: release-macos.zip
- asset_content_type: application/zip
+ tag_name: v${{ github.run_number }}
+ name: Release ${{ github.run_number }}
+ body: |
+ Automated release for commit ${{ github.sha }}
+ draft: false
+ prerelease: false
+ files: |
+ release-linux.zip
+ release-windows.zip
+ release-macos.zip
diff --git a/Configuration/FilterConfiguration.cs b/Configuration/FilterConfiguration.cs
new file mode 100644
index 0000000..83c1d3a
--- /dev/null
+++ b/Configuration/FilterConfiguration.cs
@@ -0,0 +1,271 @@
+namespace CodeContext.Configuration;
+
+///
+/// Contains configuration for file and directory filtering.
+///
+public class FilterConfiguration
+{
+ ///
+ /// Maximum file size in bytes to process.
+ ///
+ public long MaxFileSizeBytes { get; init; } = 100 * 1024; // 100KB
+
+ ///
+ /// File extensions to ignore during processing.
+ ///
+ public HashSet IgnoredExtensions { get; init; } = new(StringComparer.OrdinalIgnoreCase)
+ {
+ // Executable and library files
+ ".exe", ".dll", ".pdb", ".bin", ".obj", ".lib", ".so", ".dylib", ".a", ".o",
+
+ // Image files
+ ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".svg", ".webp", ".tiff", ".tif", ".raw", ".psd", ".ai",
+ ".eps", ".ps",
+
+ // Audio and video files
+ ".mp3", ".mp4", ".wav", ".avi", ".mov", ".flv", ".wmv", ".m4a", ".m4v", ".mkv", ".webm", ".ogg",
+
+ // Compressed files
+ ".zip", ".rar", ".7z", ".tar", ".gz", ".bz2", ".xz", ".tgz",
+
+ // Database files
+ ".db", ".sqlite", ".mdf", ".ldf", ".bak", ".mdb", ".accdb",
+
+ // Document files
+ ".docx", ".xlsx", ".pptx", ".pdf", ".doc", ".xls", ".ppt", ".rtf", ".odt", ".ods", ".odp",
+
+ // Log and temporary files
+ ".log", ".cache", ".tmp", ".temp",
+
+ // Minified and source map files
+ ".min.js", ".min.css", ".map", ".lock",
+
+ // Design files
+ ".sketch", ".fig", ".xd",
+
+ // Deployment and settings files
+ ".pub", ".pubxml", ".publishsettings", ".settings", ".suo", ".user", ".userosscache",
+
+ // Version control files
+ ".vspscc", ".vssscc", ".pidb", ".scc",
+
+ // System files
+ ".DS_Store", ".localized", ".manifest",
+
+ // Project-specific files
+ ".csproj.user", ".sln.docstates", ".suo", ".user", ".vssscc",
+
+ // Compiler and build output
+ ".pdb", ".ilk", ".msi", ".idb", ".pch", ".res",
+
+ // Font files
+ ".eot", ".ttf", ".woff", ".woff2",
+
+ // 3D model files
+ ".fbx", ".obj", ".3ds", ".max",
+
+ // Unity-specific files
+ ".unity", ".unitypackage", ".asset",
+
+ // Certificate files
+ ".pfx", ".cer", ".crt",
+
+ // Package manager files
+ ".nupkg", ".snupkg",
+
+ // Java-specific files
+ ".class", ".jar",
+
+ // Python-specific files
+ ".pyc", ".pyo",
+
+ // Node.js-specific files
+ ".node",
+
+ // Ruby-specific files
+ ".gem",
+
+ // Rust-specific files
+ ".rlib",
+
+ // Go-specific files
+ ".a",
+
+ // Swift-specific files
+ ".swiftmodule",
+
+ // Docker-specific files
+ ".dockerignore",
+
+ // Kubernetes-specific files
+ ".kubeconfig",
+
+ // Machine learning model files
+ ".h5", ".pkl", ".onnx",
+
+ // Executable scripts (to be cautious)
+ ".bat", ".sh", ".cmd", ".ps1",
+
+ ".sql"
+ };
+
+ ///
+ /// Directory names to ignore during processing.
+ ///
+ public HashSet IgnoredDirectories { get; init; } = new(StringComparer.OrdinalIgnoreCase)
+ {
+ ".sonarqube",
+
+ // Version control systems
+ ".git", ".svn", ".hg", ".bzr", ".cvs",
+
+ // IDE and editor-specific
+ ".vs", ".idea", ".vscode", ".atom", ".sublime-project",
+
+ // Build output
+ "bin", "obj", "Debug", "Release", "x64", "x86", "AnyCPU",
+
+ // Package management
+ "packages", "node_modules", "bower_components", "jspm_packages",
+
+ // Python-specific
+ "__pycache__", "venv", "env", "virtualenv", ".venv", ".env", ".pytest_cache",
+
+ // Ruby-specific
+ ".bundle", "vendor/bundle",
+
+ // Java-specific
+ "target", ".gradle", "build",
+
+ // JavaScript/TypeScript-specific
+ "dist", "out", "build", ".next", ".nuxt", ".cache",
+
+ // Testing and coverage
+ "coverage", "test-results", "reports", ".nyc_output",
+
+ // Logs and temporary files
+ "logs", "temp", "tmp", ".temp", ".tmp",
+
+ // Content and media
+ "uploads", "media", "static", "public", "assets",
+
+ // Third-party and dependencies
+ "vendor", "third-party", "external", "lib", "libs",
+
+ // WordPress-specific
+ "wp-content", "wp-includes", "wp-admin",
+
+ // Mobile development
+ "Pods", "DerivedData",
+
+ // Containerization
+ ".docker",
+
+ // CI/CD
+ ".github", ".gitlab", ".circleci", ".jenkins",
+
+ // Documentation
+ "docs", "_site", ".docusaurus",
+
+ // Caching
+ ".cache", ".sass-cache", ".parcel-cache",
+
+ // Compiled languages
+ "__pycache__", ".mypy_cache", ".rpt2_cache", ".rts2_cache_cjs", ".rts2_cache_es", ".rts2_cache_umd",
+
+ // OS-specific
+ ".DS_Store", "Thumbs.db",
+
+ // Dependency lock files directory
+ ".pnpm-store",
+
+ // Serverless frameworks
+ ".serverless",
+
+ // Terraform
+ ".terraform",
+
+ // Yarn
+ ".yarn",
+
+ // Expo (React Native)
+ ".expo",
+
+ // Electron
+ "out",
+
+ // Flutter/Dart
+ ".dart_tool", ".flutter-plugins", ".flutter-plugins-dependencies",
+
+ // Kubernetes
+ ".kube",
+
+ // Ansible
+ ".ansible",
+
+ // Chef
+ ".chef",
+
+ // Vagrant
+ ".vagrant",
+
+ // Unity
+ "Library", "Temp", "Obj", "Builds", "Logs",
+
+ // Unreal Engine
+ "Binaries", "Build", "Saved", "Intermediate",
+
+ // Godot Engine
+ ".import", "export_presets.cfg",
+
+ // R language
+ ".Rproj.user", ".Rhistory", ".RData",
+
+ // Jupyter Notebooks
+ ".ipynb_checkpoints",
+
+ // LaTeX
+ "build", "out",
+
+ // Rust
+ "target",
+
+ // Go
+ "vendor",
+
+ // Elixir
+ "_build", ".elixir_ls",
+
+ // Helm Charts
+ "charts",
+
+ // Pipenv
+ ".venv"
+ };
+
+ ///
+ /// File names to ignore during processing.
+ ///
+ public HashSet IgnoredFiles { get; init; } = new(StringComparer.OrdinalIgnoreCase)
+ {
+ ".bzrignore", ".coveragerc", ".editorconfig", ".env", ".env.development",
+ ".env.production", ".env.local", ".env.test", ".eslintrc", ".gitattributes",
+ "thumbs.db", "desktop.ini", ".DS_Store", "npm-debug.log", "yarn-error.log",
+ "package-lock.json", "yarn.lock", "composer.lock", ".gitignore"
+ };
+
+ ///
+ /// Number of lines to check for generated code markers.
+ ///
+ public int GeneratedCodeLinesToCheck { get; init; } = 10;
+
+ ///
+ /// Threshold for binary file detection (0.0 to 1.0).
+ ///
+ public double BinaryThreshold { get; init; } = 0.3;
+
+ ///
+ /// Chunk size for binary file detection.
+ ///
+ public int BinaryCheckChunkSize { get; init; } = 4096;
+}
diff --git a/FileChecker.cs b/FileChecker.cs
index dfd59d5..2056388 100644
--- a/FileChecker.cs
+++ b/FileChecker.cs
@@ -1,341 +1,26 @@
-using System.Text.RegularExpressions;
-using CodeContext;
+using CodeContext.Configuration;
+using CodeContext.Services;
+namespace CodeContext;
+
+///
+/// Legacy compatibility wrapper for FileFilterService.
+/// Use FileFilterService directly for new code.
+///
+[Obsolete("Use FileFilterService instead for better testability and maintainability.")]
public class FileChecker
{
- private const long MaxFileSizeBytes = 100 * 1024; // 100KB
-
- private static readonly HashSet IgnoredExtensions = new(StringComparer.OrdinalIgnoreCase)
- {
- // Executable and library files
- ".exe", ".dll", ".pdb", ".bin", ".obj", ".lib", ".so", ".dylib", ".a", ".o",
-
- // Image files
- ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".svg", ".webp", ".tiff", ".tif", ".raw", ".psd", ".ai",
- ".eps", ".ps",
-
- // Audio and video files
- ".mp3", ".mp4", ".wav", ".avi", ".mov", ".flv", ".wmv", ".m4a", ".m4v", ".mkv", ".webm", ".ogg",
-
- // Compressed files
- ".zip", ".rar", ".7z", ".tar", ".gz", ".bz2", ".xz", ".tgz",
-
- // Database files
- ".db", ".sqlite", ".mdf", ".ldf", ".bak", ".mdb", ".accdb",
-
- // Document files
- ".docx", ".xlsx", ".pptx", ".pdf", ".doc", ".xls", ".ppt", ".rtf", ".odt", ".ods", ".odp",
-
- // Log and temporary files
- ".log", ".cache", ".tmp", ".temp",
-
- // Minified and source map files
- ".min.js", ".min.css", ".map", ".lock",
-
- // Design files
- ".sketch", ".fig", ".xd",
-
- // Deployment and settings files
- ".pub", ".pubxml", ".publishsettings", ".settings", ".suo", ".user", ".userosscache",
-
- // Version control files
- ".vspscc", ".vssscc", ".pidb", ".scc",
-
- // System files
- ".DS_Store", ".localized", ".manifest",
-
- // Project-specific files
- ".csproj.user", ".sln.docstates", ".suo", ".user", ".vssscc",
-
- // Compiler and build output
- ".pdb", ".ilk", ".msi", ".idb", ".pch", ".res",
-
- // Font files
- ".eot", ".ttf", ".woff", ".woff2",
-
- // 3D model files
- ".fbx", ".obj", ".3ds", ".max",
-
- // Unity-specific files
- ".unity", ".unitypackage", ".asset",
-
- // Certificate files
- ".pfx", ".cer", ".crt",
-
- // Package manager files
- ".nupkg", ".snupkg",
-
- // Java-specific files
- ".class", ".jar",
-
- // Python-specific files
- ".pyc", ".pyo",
-
- // Node.js-specific files
- ".node",
-
- // Ruby-specific files
- ".gem",
-
- // Rust-specific files
- ".rlib",
-
- // Go-specific files
- ".a",
-
- // Swift-specific files
- ".swiftmodule",
-
- // Docker-specific files
- ".dockerignore",
-
- // Kubernetes-specific files
- ".kubeconfig",
-
- // Machine learning model files
- ".h5", ".pkl", ".onnx",
-
- // Executable scripts (to be cautious)
- ".bat", ".sh", ".cmd", ".ps1",
-
- //
- ".sql"
- };
-
- private static readonly HashSet IgnoredDirectories = new(StringComparer.OrdinalIgnoreCase)
- {
- ".sonarqube",
-
- // Version control systems
- ".git", ".svn", ".hg", ".bzr", ".cvs",
-
- // IDE and editor-specific
- ".vs", ".idea", ".vscode", ".atom", ".sublime-project",
-
- // Build output
- "bin", "obj", "Debug", "Release", "x64", "x86", "AnyCPU",
-
- // Package management
- "packages", "node_modules", "bower_components", "jspm_packages",
-
- // Python-specific
- "__pycache__", "venv", "env", "virtualenv", ".venv", ".env", ".pytest_cache",
-
- // Ruby-specific
- ".bundle", "vendor/bundle",
-
- // Java-specific
- "target", ".gradle", "build",
-
- // JavaScript/TypeScript-specific
- "dist", "out", "build", ".next", ".nuxt", ".cache",
-
- // Testing and coverage
- "coverage", "test-results", "reports", ".nyc_output",
-
- // Logs and temporary files
- "logs", "temp", "tmp", ".temp", ".tmp",
-
- // Content and media
- "uploads", "media", "static", "public", "assets",
-
- // Third-party and dependencies
- "vendor", "third-party", "external", "lib", "libs",
-
- // WordPress-specific
- "wp-content", "wp-includes", "wp-admin",
-
- // Mobile development
- "Pods", "DerivedData",
-
- // Containerization
- ".docker",
-
- // CI/CD
- ".github", ".gitlab", ".circleci", ".jenkins",
-
- // Documentation
- "docs", "_site", ".docusaurus",
-
- // Caching
- ".cache", ".sass-cache", ".parcel-cache",
-
- // Compiled languages
- "__pycache__", ".mypy_cache", ".rpt2_cache", ".rts2_cache_cjs", ".rts2_cache_es", ".rts2_cache_umd",
-
- // OS-specific
- ".DS_Store", "Thumbs.db",
-
- // Dependency lock files directory
- ".pnpm-store",
-
- // Serverless frameworks
- ".serverless",
-
- // Terraform
- ".terraform",
-
- // Yarn
- ".yarn",
-
- // Expo (React Native)
- ".expo",
-
- // Electron
- "out",
-
- // Flutter/Dart
- ".dart_tool", ".flutter-plugins", ".flutter-plugins-dependencies",
-
- // Kubernetes
- ".kube",
-
- // Ansible
- ".ansible",
-
- // Chef
- ".chef",
-
- // Vagrant
- ".vagrant",
-
- // Unity
- "Library", "Temp", "Obj", "Builds", "Logs",
-
- // Unreal Engine
- "Binaries", "Build", "Saved", "Intermediate",
-
- // Godot Engine
- ".import", "export_presets.cfg",
-
- // R language
- ".Rproj.user", ".Rhistory", ".RData",
-
- // Jupyter Notebooks
- ".ipynb_checkpoints",
-
- // LaTeX
- "build", "out",
-
- // Rust
- "target",
-
- // Go
- "vendor",
-
- // Elixir
- "_build", ".elixir_ls",
-
- // Helm Charts
- "charts",
-
- // Pipenv
- ".venv"
- };
-
- private static readonly HashSet IgnoredFiles = new(StringComparer.OrdinalIgnoreCase)
- {
- ".bzrignore", ".coveragerc", ".editorconfig", ".env", ".env.development",
- ".env.production", ".env.local", ".env.test", ".eslintrc", ".gitattributes",
- "thumbs.db", "desktop.ini", ".DS_Store", "npm-debug.log", "yarn-error.log",
- "package-lock.json", "yarn.lock", "composer.lock", ".gitignore"
- };
-
- private static List gitIgnorePatterns;
-
+ private static readonly Lazy _instance = new(() =>
+ new FileFilterService(new FilterConfiguration()));
+
+ ///
+ /// Determines if a file or directory should be skipped during processing.
+ ///
+ /// The file or directory information.
+ /// The root path of the project being scanned.
+ /// True if the file/directory should be skipped; otherwise, false.
public static bool ShouldSkip(FileSystemInfo info, string rootPath)
{
- // Check if any parent directory is in the ignored list
- var relativePath = Path.GetRelativePath(rootPath, info.FullName);
- var pathParts = relativePath.Split(Path.DirectorySeparatorChar);
-
- if (pathParts.Any(IgnoredDirectories.Contains))
- {
- return true;
- }
-
- if (info.Attributes.HasFlag(FileAttributes.Directory))
- return false; // We've already checked if it's an ignored directory
-
- // Check for ignored files
- if (IgnoredFiles.Contains(info.Name))
- return true;
-
- // Improved extension checking
- var fileName = info.Name;
- var extension = Path.GetExtension(fileName);
- if (IgnoredExtensions.Contains(extension))
- return true;
-
- // Check for compound extensions like .min.css
- var lastDotIndex = fileName.LastIndexOf('.');
- if (lastDotIndex > 0)
- {
- var secondLastDotIndex = fileName.LastIndexOf('.', lastDotIndex - 1);
- if (secondLastDotIndex >= 0)
- {
- var compoundExtension = fileName.Substring(secondLastDotIndex);
- if (IgnoredExtensions.Contains(compoundExtension))
- return true;
- }
- }
-
- if (info is FileInfo fileInfo && fileInfo.Length > MaxFileSizeBytes)
- return true;
-
- if (IsInGitRepository(rootPath))
- {
- if (gitIgnorePatterns == null)
- LoadGitIgnore(rootPath);
-
- if (IsIgnoredByGitIgnore(info.FullName, rootPath))
- return true;
- }
-
- return FileUtils.IsBinaryFile(info.FullName) ||
- IgnoredDirectories.Any(dir =>
- info.FullName.Contains($"{Path.DirectorySeparatorChar}{dir}{Path.DirectorySeparatorChar}") ||
- IsGeneratedCode(info.FullName));
- }
-
-
- private static bool IsGeneratedCode(string filePath)
- {
- const int linesToCheck = 10;
- var lines = File.ReadLines(filePath).Take(linesToCheck);
- return lines.Any(line => line.Contains(""));
- }
-
- private static bool IsInGitRepository(string path)
- {
- while (!string.IsNullOrEmpty(path))
- {
- if (Directory.Exists(Path.Combine(path, ".git")))
- return true;
- path = Path.GetDirectoryName(path);
- }
-
- return false;
- }
-
- private static void LoadGitIgnore(string rootPath)
- {
- gitIgnorePatterns = new List();
- var gitIgnorePath = Path.Combine(rootPath, ".gitignore");
- if (File.Exists(gitIgnorePath))
- gitIgnorePatterns.AddRange(File.ReadAllLines(gitIgnorePath)
- .Where(line => !string.IsNullOrWhiteSpace(line) && !line.StartsWith('#')));
- }
-
- private static bool IsIgnoredByGitIgnore(string filePath, string rootPath)
- {
- var relativePath = Path.GetRelativePath(rootPath, filePath);
- return gitIgnorePatterns.Any(pattern => IsMatch(relativePath, pattern));
- }
-
- private static bool IsMatch(string path, string pattern)
- {
- pattern = pattern.Replace(".", "\\.").Replace("*", ".*").Replace("?", ".");
- return Regex.IsMatch(path, $"^{pattern}$", RegexOptions.IgnoreCase);
+ return _instance.Value.ShouldSkip(info, rootPath);
}
}
\ No newline at end of file
diff --git a/FileUtils.cs b/FileUtils.cs
index 3a9ad1f..c19af74 100644
--- a/FileUtils.cs
+++ b/FileUtils.cs
@@ -1,41 +1,21 @@
-namespace CodeContext;
+using CodeContext.Utils;
+namespace CodeContext;
+
+///
+/// Legacy compatibility wrapper for FileUtilities.
+/// Use FileUtilities instead for new code.
+///
+[Obsolete("Use FileUtilities in CodeContext.Utils namespace instead.")]
public static class FileUtils
{
+ ///
+ /// Determines if a file is binary based on its content.
+ ///
+ /// Path to the file to check.
+ /// True if the file appears to be binary; otherwise, false.
public static bool IsBinaryFile(string filePath)
{
- const int chunkSize = 4096;
- const double binaryThreshold = 0.3;
- using var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read);
-
- if (stream.Length == 0)
- return false;
-
- // Check for UTF-8 BOM
- if (HasUtf8Bom(stream))
- return false;
-
- return CheckBinaryContent(stream, chunkSize, binaryThreshold);
- }
-
- private static bool HasUtf8Bom(FileStream stream)
- {
- var bom = new byte[3];
- stream.Read(bom, 0, 3);
- stream.Position = 0;
- return bom[0] == 0xEF && bom[1] == 0xBB && bom[2] == 0xBF;
- }
-
- private static bool CheckBinaryContent(FileStream stream, int chunkSize, double threshold)
- {
- var buffer = new byte[chunkSize];
- var bytesRead = stream.Read(buffer, 0, chunkSize);
- var nonPrintableCount = buffer.Take(bytesRead).Count(IsBinaryByte);
- return (double)nonPrintableCount / bytesRead > threshold;
- }
-
- private static bool IsBinaryByte(byte b)
- {
- return b is (< 7 or > 14) and (< 32 or > 127);
+ return FileUtilities.IsBinaryFile(filePath);
}
}
\ No newline at end of file
diff --git a/Interfaces/IConsoleWriter.cs b/Interfaces/IConsoleWriter.cs
new file mode 100644
index 0000000..d373671
--- /dev/null
+++ b/Interfaces/IConsoleWriter.cs
@@ -0,0 +1,25 @@
+namespace CodeContext.Interfaces;
+
+///
+/// Provides an abstraction for console output operations.
+///
+public interface IConsoleWriter
+{
+ ///
+ /// Writes a line of text to the console.
+ ///
+ /// The message to write.
+ void WriteLine(string message);
+
+ ///
+ /// Writes text to the console without a line break.
+ ///
+ /// The message to write.
+ void Write(string message);
+
+ ///
+ /// Reads a line of input from the console.
+ ///
+ /// The input string.
+ string? ReadLine();
+}
diff --git a/Interfaces/IFileChecker.cs b/Interfaces/IFileChecker.cs
new file mode 100644
index 0000000..01714fa
--- /dev/null
+++ b/Interfaces/IFileChecker.cs
@@ -0,0 +1,15 @@
+namespace CodeContext.Interfaces;
+
+///
+/// Provides functionality to determine if files or directories should be skipped during processing.
+///
+public interface IFileChecker
+{
+ ///
+ /// Determines if a file or directory should be skipped during processing.
+ ///
+ /// The file or directory information.
+ /// The root path of the project being scanned.
+ /// True if the file/directory should be skipped; otherwise, false.
+ bool ShouldSkip(FileSystemInfo info, string rootPath);
+}
diff --git a/MyAppsContext.cs b/MyAppsContext.cs
index d79cf25..6b6a40b 100644
--- a/MyAppsContext.cs
+++ b/MyAppsContext.cs
@@ -1,79 +1,56 @@
-using System.Text;
+using CodeContext.Configuration;
+using CodeContext.Services;
namespace CodeContext;
+
+///
+/// Legacy compatibility wrapper for ProjectScanner.
+/// Use ProjectScanner directly for new code.
+///
+[Obsolete("Use ProjectScanner instead for better testability and maintainability.")]
public class MyAppsContext
{
- public static string GitRepoRoot { get; private set; }
-
+ private static readonly Lazy _instance = new(() =>
+ {
+ var config = new FilterConfiguration();
+ var fileChecker = new FileFilterService(config);
+ var console = new ConsoleWriter();
+ return new ProjectScanner(fileChecker, console);
+ });
+
+ ///
+ /// Gets the git repository root path.
+ ///
+ public static string? GitRepoRoot => _instance.Value.GitRepoRoot;
+
+ ///
+ /// Gets user input with a prompt.
+ ///
+ /// The prompt to display.
+ /// The user's input.
public static string GetUserInput(string prompt)
{
- Console.Write(prompt);
- return Console.ReadLine();
+ return _instance.Value.GetUserInput(prompt);
}
+ ///
+ /// Generates a hierarchical structure representation of the project directory.
+ ///
+ /// The directory path to scan.
+ /// Current indentation level (used for recursion).
+ /// A string representation of the directory structure.
public static string GetProjectStructure(string path, int indent = 0)
{
- if (GitRepoRoot == null) GitRepoRoot = FindGitRepoRoot(path);
- if (indent == 0) Console.WriteLine("📁 Analyzing directory structure...");
-
- var entries = Directory.EnumerateFileSystemEntries(path)
- .OrderBy(e => e)
- .Where(e => GitRepoRoot == null || !FileChecker.ShouldSkip(new FileInfo(e), GitRepoRoot))
- .ToList();
-
- var sb = new StringBuilder();
-
- // Process all entries
- for (int i = 0; i < entries.Count; i++)
- {
- WriteProgress(i + 1, entries.Count);
- var entry = entries[i];
-
- if (Directory.Exists(entry))
- {
- var dir = new DirectoryInfo(entry);
- sb.AppendLine($"{new string(' ', indent * 2)}[{dir.Name}/]")
- .Append(GetProjectStructure(entry, indent + 1));
- }
- else
- {
- var file = new FileInfo(entry);
- sb.AppendLine($"{new string(' ', indent * 2)}[{file.Extension}] {file.Name}");
- }
- }
-
- return sb.ToString();
+ return _instance.Value.GetProjectStructure(path, indent);
}
+ ///
+ /// Retrieves the contents of all non-filtered files in the directory tree.
+ ///
+ /// The directory path to scan.
+ /// A string containing all file contents with separators.
public static string GetFileContents(string path)
{
- if (GitRepoRoot == null) GitRepoRoot = FindGitRepoRoot(path);
- Console.WriteLine("\n📄 Processing files...");
-
- var files = Directory.EnumerateFiles(path, "*", SearchOption.AllDirectories)
- .Where(f => !FileChecker.ShouldSkip(new FileInfo(f), GitRepoRoot))
- .ToList();
-
- return string.Join("\n\n", files.Select((f, i) =>
- {
- WriteProgress(i + 1, files.Count);
- return $"{f}\n{new string('-', 100)}\n{File.ReadAllText(f)}";
- }));
- }
-
- private static string FindGitRepoRoot(string path)
- {
- if (string.IsNullOrEmpty(path) || !Directory.Exists(path))
- return null;
-
- return Directory.Exists(Path.Combine(path, ".git"))
- ? path
- : string.IsNullOrEmpty(path) ? null : FindGitRepoRoot(Path.GetDirectoryName(path));
- }
-
- private static void WriteProgress(int current, int total)
- {
- var percent = (int)((current / (double)total) * 100);
- Console.Write($"\r⏳ Progress: {percent}% ({current}/{total})");
+ return _instance.Value.GetFileContents(path);
}
}
\ No newline at end of file
diff --git a/Program.cs b/Program.cs
index ccb3102..b5862bc 100644
--- a/Program.cs
+++ b/Program.cs
@@ -1,4 +1,4 @@
-using System.Diagnostics;
+using System.Diagnostics;
using System.Text;
using System.Text.Json;
using CodeContext;
@@ -10,82 +10,143 @@
var config = LoadConfig();
var path = GetValidPath(args.FirstOrDefault() ?? config.DefaultInputPath);
- // 1. Get input folder name
- var inputFolderName = new DirectoryInfo(path).Name;
- if (string.IsNullOrEmpty(inputFolderName) || inputFolderName == ".") // Handle cases like "." or "C:\"
- {
- // For "." use current directory name, for root drives, use a generic name or drive letter
- inputFolderName = new DirectoryInfo(Environment.CurrentDirectory).Name;
- if (path.EndsWith(Path.DirectorySeparatorChar.ToString()) || path.EndsWith(Path.AltDirectorySeparatorChar.ToString()))
- {
- // If path was like "C:/", DirectoryInfo(path).Name might be "C:".
- // Let's try to get a more descriptive name if it's a root drive.
- var root = Path.GetPathRoot(Path.GetFullPath(path));
- if (!string.IsNullOrEmpty(root))
- {
- inputFolderName = root.Replace(Path.DirectorySeparatorChar.ToString(), "").Replace(Path.AltDirectorySeparatorChar.ToString(), "").Replace(":", "");
- if (string.IsNullOrEmpty(inputFolderName)) inputFolderName = "root";
- }
- }
- }
-
-
- // 2. Construct prefixed default file name
+ var inputFolderName = GetInputFolderName(path);
var prefixedDefaultFileName = $"{inputFolderName}_{config.DefaultOutputFileName}";
-
- // 3. Default output is INSIDE the input path folder with the prefixed name
var defaultFullOutputPath = Path.Combine(path, prefixedDefaultFileName);
-
- // 4. Get final output path (could be a file or directory specified by user, or the default)
var outputTarget = GetValidOutputPath(args.ElementAtOrDefault(1), defaultFullOutputPath);
var sw = Stopwatch.StartNew();
var content = BuildContent(path, config);
var stats = CalculateStats(path, content, sw.Elapsed);
- // 5. Pass prefixedDefaultFileName to WriteOutput
string actualOutputPath = WriteOutput(outputTarget, content, config.OutputFormat, prefixedDefaultFileName);
- Console.WriteLine($"\n✅ Output written to {actualOutputPath}"); // 6. Use actual output path
+ Console.WriteLine($"\n✅ Output written to {actualOutputPath}");
Console.WriteLine(stats);
}
-catch (Exception ex)
+catch (DirectoryNotFoundException ex)
{
- Console.WriteLine($"❌ Error: {ex.Message}");
+ Console.WriteLine($"❌ Directory Error: {ex.Message}");
Environment.Exit(1);
}
+catch (IOException ex)
+{
+ Console.WriteLine($"❌ I/O Error: {ex.Message}");
+ Environment.Exit(2);
+}
+catch (UnauthorizedAccessException ex)
+{
+ Console.WriteLine($"❌ Access Denied: {ex.Message}");
+ Environment.Exit(3);
+}
+catch (Exception ex)
+{
+ Console.WriteLine($"❌ Unexpected Error: {ex.Message}");
+ if (ex.InnerException != null)
+ {
+ Console.WriteLine($" Details: {ex.InnerException.Message}");
+ }
+ Environment.Exit(4);
+}
-static Config LoadConfig() =>
- JsonSerializer.Deserialize(
- File.Exists("config.json") ? File.ReadAllText("config.json") : "{}"
- ) ?? new();
+///
+/// Loads configuration from config.json file if it exists, otherwise returns default configuration.
+///
+/// The loaded or default configuration.
+static Config LoadConfig()
+{
+ try
+ {
+ var configJson = File.Exists("config.json") ? File.ReadAllText("config.json") : "{}";
+ return JsonSerializer.Deserialize(configJson) ?? new Config();
+ }
+ catch (JsonException ex)
+ {
+ Console.WriteLine($"⚠️ Warning: Invalid config.json format ({ex.Message}). Using defaults.");
+ return new Config();
+ }
+}
+///
+/// Gets and validates the directory path to be indexed.
+///
+/// The default path to use if user doesn't provide one.
+/// The validated full path.
+/// Thrown when the specified directory doesn't exist.
static string GetValidPath(string defaultPath)
{
var path = MyAppsContext.GetUserInput($"Enter the path to index (default: {defaultPath}): ");
var finalPath = string.IsNullOrWhiteSpace(path) ? defaultPath : path;
- var fullPath = Path.GetFullPath(finalPath); // Resolve to full path for consistency
+ var fullPath = Path.GetFullPath(finalPath);
- return Directory.Exists(fullPath)
- ? fullPath
- : throw new DirectoryNotFoundException($"Invalid directory path: {fullPath}");
+ if (!Directory.Exists(fullPath))
+ {
+ throw new DirectoryNotFoundException($"Directory not found: {fullPath}");
+ }
+
+ return fullPath;
}
-// Modified to accept user's argument and the fully resolved default path
+///
+/// Gets and validates the output path for the generated context file.
+///
+/// Optional output path from command-line arguments.
+/// Default output path if none provided.
+/// The validated full output path.
static string GetValidOutputPath(string? outputArgFromUser, string defaultFullOutputPathIfNoArgAndNoInput)
{
- // If an argument is provided, use it directly.
if (!string.IsNullOrWhiteSpace(outputArgFromUser))
{
- return Path.GetFullPath(outputArgFromUser); // Resolve to full path
+ return Path.GetFullPath(outputArgFromUser);
}
- // Otherwise, prompt the user, showing the calculated default.
+
var userInput = MyAppsContext.GetUserInput($"Enter output file/directory (default: {defaultFullOutputPathIfNoArgAndNoInput}): ");
return string.IsNullOrWhiteSpace(userInput)
? defaultFullOutputPathIfNoArgAndNoInput
- : Path.GetFullPath(userInput); // Resolve to full path
+ : Path.GetFullPath(userInput);
}
+///
+/// Extracts a clean folder name from the input path for output file naming.
+///
+/// The input path.
+/// A sanitized folder name.
+static string GetInputFolderName(string path)
+{
+ var inputFolderName = new DirectoryInfo(path).Name;
+
+ if (string.IsNullOrEmpty(inputFolderName) || inputFolderName == ".")
+ {
+ inputFolderName = new DirectoryInfo(Environment.CurrentDirectory).Name;
+
+ if (path.EndsWith(Path.DirectorySeparatorChar.ToString()) ||
+ path.EndsWith(Path.AltDirectorySeparatorChar.ToString()))
+ {
+ var root = Path.GetPathRoot(Path.GetFullPath(path));
+ if (!string.IsNullOrEmpty(root))
+ {
+ inputFolderName = root
+ .Replace(Path.DirectorySeparatorChar.ToString(), "")
+ .Replace(Path.AltDirectorySeparatorChar.ToString(), "")
+ .Replace(":", "");
+
+ if (string.IsNullOrEmpty(inputFolderName))
+ {
+ inputFolderName = "root";
+ }
+ }
+ }
+ }
+
+ return inputFolderName;
+}
+///
+/// Builds the complete content output including structure and file contents based on configuration.
+///
+/// The directory path to process.
+/// The configuration specifying what to include.
+/// The complete output content.
+/// Thrown when an error occurs during processing.
static string BuildContent(string path, Config config)
{
try
@@ -112,25 +173,51 @@ static string BuildContent(string path, Config config)
}
}
-static string CalculateStats(string path, string content, TimeSpan timeTaken) =>
- $"""
+///
+/// Calculates and formats statistics about the processing operation.
+///
+/// The directory that was processed.
+/// The generated content.
+/// Time elapsed during processing.
+/// Formatted statistics string.
+static string CalculateStats(string path, string content, TimeSpan timeTaken)
+{
+ try
+ {
+ var fileCount = Directory.GetFiles(path, "*", SearchOption.AllDirectories).Length;
+ var lineCount = content.Count(c => c == '\n');
+
+ return $"""
- 📊 Stats:
- 📁 Files processed: {Directory.GetFiles(path, "*", SearchOption.AllDirectories).Length}
- 📝 Total lines: {content.Count(c => c == '\n')}
- ⏱️ Time taken: {timeTaken.TotalSeconds:F2}s
- 💾 Output size: {content.Length} characters
- """;
+ 📊 Stats:
+ 📁 Files processed: {fileCount}
+ 📝 Total lines: {lineCount}
+ ⏱️ Time taken: {timeTaken.TotalSeconds:F2}s
+ 💾 Output size: {content.Length} characters
+ """;
+ }
+ catch (Exception)
+ {
+ return "\n📊 Stats: Unable to calculate statistics";
+ }
+}
-// Modified to accept the effective output filename and return the actual path written
+///
+/// Writes the generated content to the specified output location.
+///
+/// Target path (file or directory).
+/// Content to write.
+/// Output format (text or json).
+/// Filename to use if outputTarget is a directory.
+/// The actual path where the file was written.
+/// Thrown when an error occurs during file writing.
static string WriteOutput(string outputTarget, string content, string format, string effectiveOutputFileName)
{
Console.WriteLine("\n💾 Writing output...");
- string resolvedFilePath = "";
+ string resolvedFilePath;
+
try
{
- // If outputTarget is an existing directory, combine it with the effectiveOutputFileName.
- // Otherwise, assume outputTarget is the full file path.
if (Directory.Exists(outputTarget))
{
resolvedFilePath = Path.Combine(outputTarget, effectiveOutputFileName);
@@ -138,8 +225,8 @@ static string WriteOutput(string outputTarget, string content, string format, st
else
{
resolvedFilePath = outputTarget;
- // Ensure the directory for the output file exists
var outputDirectory = Path.GetDirectoryName(resolvedFilePath);
+
if (!string.IsNullOrEmpty(outputDirectory) && !Directory.Exists(outputDirectory))
{
Directory.CreateDirectory(outputDirectory);
@@ -147,24 +234,35 @@ static string WriteOutput(string outputTarget, string content, string format, st
}
var formattedContent = format.ToLower() == "json"
- ? JsonSerializer.Serialize(new { content, timestamp = DateTime.Now })
+ ? JsonSerializer.Serialize(new { content, timestamp = DateTime.Now }, new JsonSerializerOptions { WriteIndented = true })
: content;
+
File.WriteAllText(resolvedFilePath, formattedContent);
- return resolvedFilePath; // Return the actual path
+ return resolvedFilePath;
+ }
+ catch (UnauthorizedAccessException ex)
+ {
+ throw new IOException($"Access denied writing to {outputTarget}", ex);
}
catch (Exception ex)
{
- // Try to provide a more specific path in the error if resolvedFilePath was determined
- string errorPath = string.IsNullOrEmpty(resolvedFilePath) ? outputTarget : resolvedFilePath;
- throw new IOException($"Error writing output to {errorPath}", ex);
+ throw new IOException($"Failed to write output to {outputTarget}", ex);
}
}
+///
+/// Application configuration record.
+///
+/// Default directory path to scan.
+/// Default output file name.
+/// Output format (text or json).
+/// Whether to include directory structure in output.
+/// Whether to include file contents in output.
record Config
{
public string DefaultInputPath { get; init; } = ".";
- public string DefaultOutputFileName { get; init; } = "context.txt"; // Base name
+ public string DefaultOutputFileName { get; init; } = "context.txt";
public string OutputFormat { get; init; } = "text";
public bool IncludeStructure { get; init; } = true;
public bool IncludeContents { get; init; } = true;
-}
\ No newline at end of file
+}
diff --git a/README.md b/README.md
index 7b4b79d..9734fb7 100644
--- a/README.md
+++ b/README.md
@@ -1,78 +1,166 @@
# CodeContext
-CodeContext is an app for Mac & Windows to provide code context to Language Learning Models (LLMs).
+CodeContext is a cross-platform CLI tool for Mac, Windows, and Linux that provides code context to Language Learning Models (LLMs).
It scans project directories, generates a structured representation of the project, and extracts relevant file contents while intelligently filtering out unnecessary files and directories.

-Update: a more comprehensive tool is [code2prompt](https://github.com/mufeedvh/code2prompt).
-I found that CodeContext is more user friendly, faster, and automatically includes only user code (based on both extension and file contents), but you may have better luck.
+Update: A more comprehensive tool is [code2prompt](https://github.com/mufeedvh/code2prompt).
+I found that CodeContext is more user-friendly, faster, and automatically includes only user code (based on both extension and file contents), but you may have better luck with alternatives.
## Features
-- Generates a hierarchical project structure
-- Extracts contents of relevant files
-- Intelligent file and directory filtering
-- Git-aware: respects .gitignore rules
-- Handles binary files and large files
-- Excludes generated code
-- Customizable ignored extensions, directories, and files
+- **Hierarchical Project Structure**: Generates a clear tree view of your project
+- **Smart Content Extraction**: Extracts contents of relevant source files
+- **Intelligent Filtering**: Automatically filters out binaries, dependencies, build outputs, and more
+- **Git-Aware**: Respects .gitignore rules
+- **Binary File Detection**: Automatically detects and skips binary files
+- **Generated Code Detection**: Excludes auto-generated code
+- **Highly Customizable**: Configure ignored extensions, directories, and file size limits
+- **Multiple Output Formats**: Supports plain text and JSON output
+- **Well-Architected**: Clean separation of concerns with interfaces for testability
+
+## Architecture
+
+The project follows SOLID principles with a modular architecture:
+
+- **`Configuration/`**: Filter configuration settings
+- **`Interfaces/`**: Abstraction interfaces (IFileChecker, IConsoleWriter)
+- **`Services/`**: Core business logic (FileFilterService, ProjectScanner, GitIgnoreParser)
+- **`Utils/`**: Utility functions (FileUtilities)
+
+This design makes the codebase maintainable, testable, and extensible.
## Getting Started
### Prerequisites
-- .NET 6.0 or later
+- .NET 9.0 or later
#### macOS
-1. Install .NET SDK if you haven't already:
+Install .NET SDK if you haven't already:
+```bash
brew install --cask dotnet-sdk
+```
-2. Clone the repository:
-git clone https://github.com/yourusername/CodeContext.git
+#### Windows
-3. Navigate to the project directory:
-cd CodeContext
+Download and install the [.NET 9 SDK](https://dotnet.microsoft.com/download/dotnet/9.0)
-4. Build the project:
-dotnet build
+#### Linux
+
+Follow the [official .NET installation guide](https://learn.microsoft.com/en-us/dotnet/core/install/linux) for your distribution.
### Installation
1. Clone the repository:
-git clone https://github.com/DavidVeksler/CodeContext/CodeContext.git
-
-2. Navigate to the project directory:
+```bash
+git clone https://github.com/DavidVeksler/CodeContext.git
cd CodeContext
+```
-3. Build the project:
+2. Build the project:
+```bash
dotnet build
+```
+
+3. (Optional) Publish for your platform:
+```bash
+# Self-contained executable
+dotnet publish -c Release -r win-x64 --self-contained # Windows
+dotnet publish -c Release -r osx-x64 --self-contained # macOS
+dotnet publish -c Release -r linux-x64 --self-contained # Linux
+```
-1.
### Usage
Run the application with:
+```bash
dotnet run [path_to_index] [output_file]
+```
+Arguments:
- `path_to_index`: The directory to analyze (optional, will prompt if not provided)
-- `output_file`: The file to write the output (optional, defaults to `context.txt` in the parent directory of the indexed path)
+- `output_file`: The file to write the output (optional, defaults to `{foldername}_context.txt` in the indexed directory)
+
+If no arguments are provided, the application will prompt for input interactively.
+
+### Example
+
+```bash
+# Interactive mode
+dotnet run
+
+# With arguments
+dotnet run ./MyProject ./output/context.txt
-If no arguments are provided, the application will prompt for input.
+# Using published executable
+./CodeContext ./MyProject ./output/context.txt
+```
## Configuration
-Customize ignored files, directories, and extensions by modifying the `FileChecker` class:
+Create a `config.json` file in the application directory to customize settings:
-- `IgnoredExtensions`: File extensions to ignore
-- `IgnoredDirectories`: Directories to ignore
-- `IgnoredFiles`: Specific files to ignore
-- `MaxFileSizeBytes`: Maximum file size to process (default: 100KB)
+```json
+{
+ "DefaultInputPath": ".",
+ "DefaultOutputFileName": "context.txt",
+ "OutputFormat": "text",
+ "IncludeStructure": true,
+ "IncludeContents": true
+}
+```
+
+### Advanced Configuration
+
+Customize filtering behavior by modifying the `FilterConfiguration` class:
+
+- **`IgnoredExtensions`**: File extensions to ignore (e.g., `.exe`, `.dll`, `.png`)
+- **`IgnoredDirectories`**: Directories to ignore (e.g., `node_modules`, `bin`, `obj`)
+- **`IgnoredFiles`**: Specific files to ignore (e.g., `.gitignore`, `package-lock.json`)
+- **`MaxFileSizeBytes`**: Maximum file size to process (default: 100KB)
+- **`BinaryThreshold`**: Threshold for binary file detection (default: 0.3)
+
+## Output Formats
+
+### Text Format (default)
+Plain text output with file paths, separators, and content.
+
+### JSON Format
+Structured JSON with content and timestamp:
+```json
+{
+ "content": "...",
+ "timestamp": "2025-11-21T10:30:00"
+}
+```
+
+## Error Handling
+
+The application provides clear error messages with appropriate exit codes:
+- `1`: Directory not found
+- `2`: I/O error
+- `3`: Access denied
+- `4`: Unexpected error
## Contributing
Contributions are welcome! Please feel free to submit a Pull Request.
+### Development
+
+The codebase uses:
+- **C# 12** with modern language features
+- **Nullable reference types** for better null safety
+- **XML documentation comments** on all public APIs
+- **Dependency injection** patterns for testability
+
## License
-This project is licensed under the MIT License - see the [LICENSE.txt](LICENSE) file for details.
+This project is licensed under the MIT License - see the [LICENSE.txt](LICENSE.txt) file for details.
+
+## Acknowledgments
+
+Built with ❤️ for the developer community to make working with LLMs more efficient.
diff --git a/Services/ConsoleWriter.cs b/Services/ConsoleWriter.cs
new file mode 100644
index 0000000..cf663f7
--- /dev/null
+++ b/Services/ConsoleWriter.cs
@@ -0,0 +1,18 @@
+using CodeContext.Interfaces;
+
+namespace CodeContext.Services;
+
+///
+/// Standard console implementation of IConsoleWriter.
+///
+public class ConsoleWriter : IConsoleWriter
+{
+ ///
+ public void WriteLine(string message) => Console.WriteLine(message);
+
+ ///
+ public void Write(string message) => Console.Write(message);
+
+ ///
+ public string? ReadLine() => Console.ReadLine();
+}
diff --git a/Services/FileFilterService.cs b/Services/FileFilterService.cs
new file mode 100644
index 0000000..76304c6
--- /dev/null
+++ b/Services/FileFilterService.cs
@@ -0,0 +1,175 @@
+using CodeContext.Configuration;
+using CodeContext.Interfaces;
+using CodeContext.Utils;
+
+namespace CodeContext.Services;
+
+///
+/// Service for determining if files and directories should be filtered out during processing.
+///
+public class FileFilterService : IFileChecker
+{
+ private readonly FilterConfiguration _config;
+ private readonly GitIgnoreParser _gitIgnoreParser;
+ private bool _gitIgnoreLoaded;
+
+ ///
+ /// Initializes a new instance of the FileFilterService class.
+ ///
+ /// The filter configuration to use.
+ public FileFilterService(FilterConfiguration config)
+ {
+ _config = config ?? throw new ArgumentNullException(nameof(config));
+ _gitIgnoreParser = new GitIgnoreParser();
+ }
+
+ ///
+ public bool ShouldSkip(FileSystemInfo info, string rootPath)
+ {
+ if (info == null)
+ {
+ throw new ArgumentNullException(nameof(info));
+ }
+
+ if (string.IsNullOrEmpty(rootPath))
+ {
+ throw new ArgumentException("Root path cannot be null or empty.", nameof(rootPath));
+ }
+
+ // Check if any parent directory is in the ignored list
+ var relativePath = Path.GetRelativePath(rootPath, info.FullName);
+ var pathParts = relativePath.Split(Path.DirectorySeparatorChar);
+
+ if (pathParts.Any(_config.IgnoredDirectories.Contains))
+ {
+ return true;
+ }
+
+ if (info.Attributes.HasFlag(FileAttributes.Directory))
+ {
+ return false; // We've already checked if it's an ignored directory
+ }
+
+ // Check for ignored files
+ if (_config.IgnoredFiles.Contains(info.Name))
+ {
+ return true;
+ }
+
+ // Check file extension
+ if (ShouldSkipByExtension(info.Name))
+ {
+ return true;
+ }
+
+ // Check file size
+ if (info is FileInfo fileInfo && fileInfo.Length > _config.MaxFileSizeBytes)
+ {
+ return true;
+ }
+
+ // Check gitignore patterns
+ if (ShouldSkipByGitIgnore(info.FullName, rootPath))
+ {
+ return true;
+ }
+
+ // Check if binary
+ if (FileUtilities.IsBinaryFile(info.FullName, _config.BinaryCheckChunkSize, _config.BinaryThreshold))
+ {
+ return true;
+ }
+
+ // Check for generated code
+ if (IsGeneratedCode(info.FullName))
+ {
+ return true;
+ }
+
+ return false;
+ }
+
+ private bool ShouldSkipByExtension(string fileName)
+ {
+ var extension = Path.GetExtension(fileName);
+ if (_config.IgnoredExtensions.Contains(extension))
+ {
+ return true;
+ }
+
+ // Check for compound extensions like .min.css
+ var lastDotIndex = fileName.LastIndexOf('.');
+ if (lastDotIndex > 0)
+ {
+ var secondLastDotIndex = fileName.LastIndexOf('.', lastDotIndex - 1);
+ if (secondLastDotIndex >= 0)
+ {
+ var compoundExtension = fileName.Substring(secondLastDotIndex);
+ if (_config.IgnoredExtensions.Contains(compoundExtension))
+ {
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
+ private bool ShouldSkipByGitIgnore(string filePath, string rootPath)
+ {
+ if (!IsInGitRepository(rootPath))
+ {
+ return false;
+ }
+
+ if (!_gitIgnoreLoaded)
+ {
+ var gitIgnorePath = Path.Combine(FindGitRepoRoot(rootPath) ?? rootPath, ".gitignore");
+ _gitIgnoreParser.LoadFromFile(gitIgnorePath);
+ _gitIgnoreLoaded = true;
+ }
+
+ if (!_gitIgnoreParser.HasPatterns)
+ {
+ return false;
+ }
+
+ var gitRoot = FindGitRepoRoot(rootPath) ?? rootPath;
+ var relativePath = Path.GetRelativePath(gitRoot, filePath);
+ return _gitIgnoreParser.IsIgnored(relativePath);
+ }
+
+ private bool IsGeneratedCode(string filePath)
+ {
+ try
+ {
+ var lines = File.ReadLines(filePath).Take(_config.GeneratedCodeLinesToCheck);
+ return lines.Any(line => line.Contains(""));
+ }
+ catch (Exception)
+ {
+ // If we can't read the file, assume it's not generated code
+ return false;
+ }
+ }
+
+ private static bool IsInGitRepository(string path)
+ {
+ return FindGitRepoRoot(path) != null;
+ }
+
+ private static string? FindGitRepoRoot(string path)
+ {
+ var currentPath = path;
+ while (!string.IsNullOrEmpty(currentPath))
+ {
+ if (Directory.Exists(Path.Combine(currentPath, ".git")))
+ {
+ return currentPath;
+ }
+ currentPath = Path.GetDirectoryName(currentPath);
+ }
+
+ return null;
+ }
+}
diff --git a/Services/GitIgnoreParser.cs b/Services/GitIgnoreParser.cs
new file mode 100644
index 0000000..a361d84
--- /dev/null
+++ b/Services/GitIgnoreParser.cs
@@ -0,0 +1,68 @@
+using System.Text.RegularExpressions;
+
+namespace CodeContext.Services;
+
+///
+/// Handles parsing and matching of .gitignore patterns.
+///
+public class GitIgnoreParser
+{
+ private readonly List _patterns = new();
+ private readonly Dictionary _regexCache = new();
+
+ ///
+ /// Loads .gitignore patterns from a file.
+ ///
+ /// Path to the .gitignore file.
+ public void LoadFromFile(string gitIgnorePath)
+ {
+ if (!File.Exists(gitIgnorePath))
+ {
+ return;
+ }
+
+ _patterns.Clear();
+ _regexCache.Clear();
+
+ var lines = File.ReadAllLines(gitIgnorePath)
+ .Where(line => !string.IsNullOrWhiteSpace(line) && !line.StartsWith('#'));
+
+ _patterns.AddRange(lines);
+ }
+
+ ///
+ /// Checks if a relative path matches any loaded gitignore patterns.
+ ///
+ /// The relative path to check.
+ /// True if the path should be ignored; otherwise, false.
+ public bool IsIgnored(string relativePath)
+ {
+ return _patterns.Any(pattern => IsMatch(relativePath, pattern));
+ }
+
+ ///
+ /// Checks if there are any loaded patterns.
+ ///
+ public bool HasPatterns => _patterns.Count > 0;
+
+ private bool IsMatch(string path, string pattern)
+ {
+ if (!_regexCache.TryGetValue(pattern, out var regex))
+ {
+ var regexPattern = ConvertGitIgnorePatternToRegex(pattern);
+ regex = new Regex($"^{regexPattern}$", RegexOptions.IgnoreCase | RegexOptions.Compiled);
+ _regexCache[pattern] = regex;
+ }
+
+ return regex.IsMatch(path);
+ }
+
+ private static string ConvertGitIgnorePatternToRegex(string pattern)
+ {
+ // Simple conversion - could be enhanced for full gitignore spec
+ return pattern
+ .Replace(".", "\\.")
+ .Replace("*", ".*")
+ .Replace("?", ".");
+ }
+}
diff --git a/Services/ProjectScanner.cs b/Services/ProjectScanner.cs
new file mode 100644
index 0000000..8041162
--- /dev/null
+++ b/Services/ProjectScanner.cs
@@ -0,0 +1,166 @@
+using System.Text;
+using CodeContext.Interfaces;
+
+namespace CodeContext.Services;
+
+///
+/// Service for scanning and analyzing project directories.
+///
+public class ProjectScanner
+{
+ private readonly IFileChecker _fileChecker;
+ private readonly IConsoleWriter _console;
+ private string? _gitRepoRoot;
+
+ ///
+ /// Initializes a new instance of the ProjectScanner class.
+ ///
+ /// The file checker to use for filtering.
+ /// The console writer for output.
+ public ProjectScanner(IFileChecker fileChecker, IConsoleWriter console)
+ {
+ _fileChecker = fileChecker ?? throw new ArgumentNullException(nameof(fileChecker));
+ _console = console ?? throw new ArgumentNullException(nameof(console));
+ }
+
+ ///
+ /// Gets user input with a prompt.
+ ///
+ /// The prompt to display.
+ /// The user's input.
+ public string GetUserInput(string prompt)
+ {
+ _console.Write(prompt);
+ return _console.ReadLine() ?? string.Empty;
+ }
+
+ ///
+ /// Generates a hierarchical structure representation of the project directory.
+ ///
+ /// The directory path to scan.
+ /// Current indentation level (used for recursion).
+ /// A string representation of the directory structure.
+ public string GetProjectStructure(string path, int indent = 0)
+ {
+ if (string.IsNullOrEmpty(path))
+ {
+ throw new ArgumentException("Path cannot be null or empty.", nameof(path));
+ }
+
+ if (!Directory.Exists(path))
+ {
+ throw new DirectoryNotFoundException($"Directory not found: {path}");
+ }
+
+ _gitRepoRoot ??= FindGitRepoRoot(path);
+
+ if (indent == 0)
+ {
+ _console.WriteLine("📁 Analyzing directory structure...");
+ }
+
+ var rootPath = _gitRepoRoot ?? path;
+ var entries = Directory.EnumerateFileSystemEntries(path)
+ .OrderBy(e => e)
+ .Where(e => !_fileChecker.ShouldSkip(new FileInfo(e), rootPath))
+ .ToList();
+
+ var sb = new StringBuilder();
+
+ for (int i = 0; i < entries.Count; i++)
+ {
+ WriteProgress(i + 1, entries.Count);
+ var entry = entries[i];
+
+ if (Directory.Exists(entry))
+ {
+ var dir = new DirectoryInfo(entry);
+ sb.AppendLine($"{new string(' ', indent * 2)}[{dir.Name}/]");
+ sb.Append(GetProjectStructure(entry, indent + 1));
+ }
+ else
+ {
+ var file = new FileInfo(entry);
+ sb.AppendLine($"{new string(' ', indent * 2)}[{file.Extension}] {file.Name}");
+ }
+ }
+
+ return sb.ToString();
+ }
+
+ ///
+ /// Retrieves the contents of all non-filtered files in the directory tree.
+ ///
+ /// The directory path to scan.
+ /// A string containing all file contents with separators.
+ public string GetFileContents(string path)
+ {
+ if (string.IsNullOrEmpty(path))
+ {
+ throw new ArgumentException("Path cannot be null or empty.", nameof(path));
+ }
+
+ if (!Directory.Exists(path))
+ {
+ throw new DirectoryNotFoundException($"Directory not found: {path}");
+ }
+
+ _gitRepoRoot ??= FindGitRepoRoot(path);
+ _console.WriteLine("\n📄 Processing files...");
+
+ var rootPath = _gitRepoRoot ?? path;
+ var files = Directory.EnumerateFiles(path, "*", SearchOption.AllDirectories)
+ .Where(f => !_fileChecker.ShouldSkip(new FileInfo(f), rootPath))
+ .ToList();
+
+ var results = new List();
+ for (int i = 0; i < files.Count; i++)
+ {
+ WriteProgress(i + 1, files.Count);
+ var file = files[i];
+
+ try
+ {
+ var content = File.ReadAllText(file);
+ results.Add($"{file}\n{new string('-', 100)}\n{content}");
+ }
+ catch (Exception ex)
+ {
+ _console.WriteLine($"\n⚠️ Warning: Could not read file {file}: {ex.Message}");
+ }
+ }
+
+ return string.Join("\n\n", results);
+ }
+
+ ///
+ /// Gets the root path of the git repository containing the specified path.
+ ///
+ public string? GitRepoRoot => _gitRepoRoot;
+
+ private string? FindGitRepoRoot(string path)
+ {
+ if (string.IsNullOrEmpty(path) || !Directory.Exists(path))
+ {
+ return null;
+ }
+
+ var currentPath = path;
+ while (!string.IsNullOrEmpty(currentPath))
+ {
+ if (Directory.Exists(Path.Combine(currentPath, ".git")))
+ {
+ return currentPath;
+ }
+ currentPath = Path.GetDirectoryName(currentPath);
+ }
+
+ return null;
+ }
+
+ private void WriteProgress(int current, int total)
+ {
+ var percent = (int)((current / (double)total) * 100);
+ _console.Write($"\r⏳ Progress: {percent}% ({current}/{total})");
+ }
+}
diff --git a/Utils/FileUtilities.cs b/Utils/FileUtilities.cs
new file mode 100644
index 0000000..d4f0671
--- /dev/null
+++ b/Utils/FileUtilities.cs
@@ -0,0 +1,82 @@
+namespace CodeContext.Utils;
+
+///
+/// Utility methods for file operations.
+///
+public static class FileUtilities
+{
+ ///
+ /// Determines if a file is binary based on its content.
+ ///
+ /// Path to the file to check.
+ /// Number of bytes to read for analysis.
+ /// Threshold ratio (0.0-1.0) of non-printable bytes to consider a file binary.
+ /// True if the file appears to be binary; otherwise, false.
+ public static bool IsBinaryFile(string filePath, int chunkSize = 4096, double binaryThreshold = 0.3)
+ {
+ if (string.IsNullOrEmpty(filePath))
+ {
+ throw new ArgumentException("File path cannot be null or empty.", nameof(filePath));
+ }
+
+ if (!File.Exists(filePath))
+ {
+ return false;
+ }
+
+ try
+ {
+ using var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
+
+ if (stream.Length == 0)
+ {
+ return false;
+ }
+
+ // Check for UTF-8 BOM
+ if (HasUtf8Bom(stream))
+ {
+ return false;
+ }
+
+ return CheckBinaryContent(stream, chunkSize, binaryThreshold);
+ }
+ catch (Exception)
+ {
+ // If we can't read the file (permissions, etc.), assume it's not binary
+ return false;
+ }
+ }
+
+ private static bool HasUtf8Bom(FileStream stream)
+ {
+ if (stream.Length < 3)
+ {
+ return false;
+ }
+
+ var bom = new byte[3];
+ stream.Read(bom, 0, 3);
+ stream.Position = 0;
+ return bom[0] == 0xEF && bom[1] == 0xBB && bom[2] == 0xBF;
+ }
+
+ private static bool CheckBinaryContent(FileStream stream, int chunkSize, double threshold)
+ {
+ var buffer = new byte[chunkSize];
+ var bytesRead = stream.Read(buffer, 0, chunkSize);
+
+ if (bytesRead == 0)
+ {
+ return false;
+ }
+
+ var nonPrintableCount = buffer.Take(bytesRead).Count(IsBinaryByte);
+ return (double)nonPrintableCount / bytesRead > threshold;
+ }
+
+ private static bool IsBinaryByte(byte b)
+ {
+ return b is (< 7 or > 14) and (< 32 or > 127);
+ }
+}