diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b3c4db..a1fe4a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,9 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.13.0] - 2026-02-03 ### Added +- Added `file_snippet` scan configuration support in `scanoss.json` for engine tuning parameters (`min_snippet_hits`, `min_snippet_lines`, `honour_file_exts`, `ranking_enabled`, `ranking_threshold`, `skip_headers`, `skip_headers_limit`) +- Added CLI scan configuration options with resolution priority (file_snippet > CLI) +- Added `FileSnippet` class for scan configuration management and resolution -- Upcoming changes... ## [0.12.1] - 2026-01-08 ### Changed - Updated slf4j from 2.0.16 to 2.0.17 @@ -141,4 +144,5 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [0.10.1]: https://github.com/scanoss/scanoss.java/compare/v0.10.0...v0.10.1 [0.11.0]: https://github.com/scanoss/scanoss.java/compare/v0.10.1...v0.11.0 [0.12.0]: https://github.com/scanoss/scanoss.java/compare/v0.11.0...v0.12.0 -[0.12.1]: https://github.com/scanoss/scanoss.java/compare/v0.12.0...v0.12.1 \ No newline at end of file +[0.12.1]: https://github.com/scanoss/scanoss.java/compare/v0.12.0...v0.12.1 +[0.13.0]: https://github.com/scanoss/scanoss.java/compare/v0.12.1...v0.13.0 \ No newline at end of file diff --git a/pom.xml b/pom.xml index c646000..dccba64 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.scanoss scanoss - 0.12.1 + 0.13.0 jar scanoss.java https://github.com/scanoss/scanoss.java diff --git a/src/main/java/com/scanoss/Scanner.java b/src/main/java/com/scanoss/Scanner.java index 9f86e63..004ad6c 100644 --- a/src/main/java/com/scanoss/Scanner.java +++ b/src/main/java/com/scanoss/Scanner.java @@ -31,6 +31,7 @@ import com.scanoss.processor.*; import com.scanoss.rest.ScanApi; import com.scanoss.settings.Bom; +import com.scanoss.settings.FileSnippet; import com.scanoss.settings.ScanossSettings; import com.scanoss.utils.JsonUtils; import lombok.*; @@ -104,6 +105,8 @@ public class Scanner { private final ScanFileProcessor scanFileProcessor; private final WfpFileProcessor wfpFileProcessor; private final ScanossSettings settings; + private final FileSnippet cliFileSnippet; // CLI-provided scan config (lowest priority) + private final FileSnippet fileSnippet; // Resolved scan config (after priority merge) private final ScannerPostProcessor postProcessor; private final FilterConfig filterConfig; private Predicate fileFilter; @@ -116,7 +119,8 @@ private Scanner(Boolean skipSnippets, Boolean allExtensions, Boolean obfuscate, Integer snippetLimit, String customCert, Proxy proxy, Winnowing winnowing, ScanApi scanApi, ScanFileProcessor scanFileProcessor, WfpFileProcessor wfpFileProcessor, - ScanossSettings settings, ScannerPostProcessor postProcessor, FilterConfig filterConfig, + ScanossSettings settings, FileSnippet cliFileSnippet, FileSnippet fileSnippet, + ScannerPostProcessor postProcessor, FilterConfig filterConfig, Predicate fileFilter, Predicate folderFilter ) { @@ -137,20 +141,27 @@ private Scanner(Boolean skipSnippets, Boolean allExtensions, Boolean obfuscate, this.snippetLimit = snippetLimit; this.customCert = customCert; this.proxy = proxy; + this.settings = Objects.requireNonNullElseGet(settings, () -> ScanossSettings.builder().build()); + this.cliFileSnippet = cliFileSnippet; + // Resolve scan config: file_snippet (highest) > CLI (lowest) + this.fileSnippet = this.settings.getResolvedScanConfig( + Objects.requireNonNullElseGet(cliFileSnippet, () -> FileSnippet.builder().build())); this.winnowing = Objects.requireNonNullElseGet(winnowing, () -> Winnowing.builder().skipSnippets(skipSnippets).allExtensions(allExtensions).obfuscate(obfuscate) .hpsm(hpsm).snippetLimit(snippetLimit) + .skipHeaders(this.fileSnippet.getSkipHeaders() != null && this.fileSnippet.getSkipHeaders()) + .skipHeadersLimit(this.fileSnippet.getSkipHeadersLimit() != null ? this.fileSnippet.getSkipHeadersLimit() : 0) .build()); this.scanApi = Objects.requireNonNullElseGet(scanApi, () -> ScanApi.builder().url(url).apiKey(apiKey).timeout(timeout).retryLimit(retryLimit).flags(scanFlags) - .sbomType(sbomType).sbom(sbom).customCert(customCert).proxy(proxy).settings(settings) + .sbomType(sbomType).sbom(sbom).customCert(customCert).proxy(proxy).settings(this.settings) + .fileSnippet(this.fileSnippet) .build()); this.scanFileProcessor = Objects.requireNonNullElseGet(scanFileProcessor, () -> ScanFileProcessor.builder().winnowing(this.winnowing).scanApi(this.scanApi).build()); this.wfpFileProcessor = Objects.requireNonNullElseGet(wfpFileProcessor, () -> WfpFileProcessor.builder() .winnowing(this.winnowing) .build()); - this.settings = Objects.requireNonNullElseGet(settings, () -> ScanossSettings.builder().build()); this.postProcessor = Objects.requireNonNullElseGet(postProcessor, () -> ScannerPostProcessor.builder().build()); diff --git a/src/main/java/com/scanoss/Winnowing.java b/src/main/java/com/scanoss/Winnowing.java index 8ae3954..034bf24 100644 --- a/src/main/java/com/scanoss/Winnowing.java +++ b/src/main/java/com/scanoss/Winnowing.java @@ -80,6 +80,10 @@ public class Winnowing { @Builder.Default private int snippetLimit = MAX_LONG_LINE_CHARS; // Enable limiting of size of a single line of snippet generation @Builder.Default + private boolean skipHeaders = false; // Skip license headers, comments and imports at the beginning of files + @Builder.Default + private int skipHeadersLimit = 0; // Maximum number of header lines to skip (0 = auto-detect) + @Builder.Default private Map obfuscationMap = new ConcurrentHashMap<>(); /** @@ -168,6 +172,12 @@ public String wfpForContents(@NonNull String filename, Boolean binFile, byte[] c wfpBuilder.append(String.format("hpsm=%s\n", Hpsm.calcHpsm(contents))); } + int skipLines = 0; + if (this.skipHeaders) { + skipLines = detectHeaderLines(fileContents, this.skipHeadersLimit); + log.trace("Skipping {} header lines for snippet generation: {}", skipLines, filename); + } + String gram = ""; List window = new ArrayList<>(); char normalized; @@ -183,7 +193,7 @@ public String wfpForContents(@NonNull String filename, Boolean binFile, byte[] c } else { normalized = WinnowingUtils.normalize(c); } - if (normalized > 0) { + if (normalized > 0 && line > skipLines) { gram += normalized; if (gram.length() >= ScanossConstants.GRAM) { Long gramCRC32 = crc32c(gram); @@ -312,6 +322,69 @@ private Boolean skipSnippets(@NonNull String filename, char[] contents) { return false; } + /** + * Detect the number of header lines at the beginning of a file. + * Header lines include license comment blocks, single-line comments, + * blank lines, and import/package statements. + * + * @param contents file contents as char array + * @param maxLines maximum number of header lines to detect (0 = no limit) + * @return number of header lines detected + */ + int detectHeaderLines(char[] contents, int maxLines) { + int headerLines = 0; + boolean inBlockComment = false; + int lineStart = 0; + + for (int i = 0; i <= contents.length; i++) { + if (i == contents.length || contents[i] == '\n') { + String line = new String(contents, lineStart, i - lineStart).trim(); + + if (inBlockComment) { + headerLines++; + if (line.contains("*/")) { + inBlockComment = false; + } + } else if (line.isEmpty()) { + headerLines++; + } else if (line.startsWith("//") || line.startsWith("#!") || line.startsWith("# ")) { + headerLines++; + } else if (line.startsWith("/*")) { + headerLines++; + if (!line.contains("*/")) { + inBlockComment = true; + } + } else if (line.startsWith("*") || line.startsWith("* ")) { + headerLines++; + } else if (isImportOrPackageLine(line)) { + headerLines++; + } else { + break; // Non-header line found + } + + if (maxLines > 0 && headerLines >= maxLines) { + break; + } + + lineStart = i + 1; + } + } + + return headerLines; + } + + /** + * Check if a line is an import or package declaration. + * + * @param line trimmed source line + * @return true if the line is an import/package/include statement + */ + private boolean isImportOrPackageLine(String line) { + return line.startsWith("import ") || line.startsWith("package ") || + line.startsWith("from ") || line.startsWith("#include ") || + line.startsWith("using ") || line.startsWith("require "); + } + /** * Try to detect if this is a text file or not * diff --git a/src/main/java/com/scanoss/cli/ScanCommandLine.java b/src/main/java/com/scanoss/cli/ScanCommandLine.java index d3e6c53..e69e0e3 100644 --- a/src/main/java/com/scanoss/cli/ScanCommandLine.java +++ b/src/main/java/com/scanoss/cli/ScanCommandLine.java @@ -25,6 +25,7 @@ import com.scanoss.Scanner; import com.scanoss.exceptions.ScannerException; import com.scanoss.exceptions.WinnowingException; +import com.scanoss.settings.FileSnippet; import com.scanoss.settings.ScanossSettings; import com.scanoss.utils.JsonUtils; import com.scanoss.utils.ProxyUtils; @@ -105,6 +106,27 @@ class ScanCommandLine implements Runnable { @picocli.CommandLine.Option(names = {"-H", "--hpsm"}, description = "Use High Precision Snippet Matching algorithm") private boolean enableHpsm = false; + @picocli.CommandLine.Option(names = {"--min-snippet-hits"}, description = "Minimum snippet hits required (0 = unset, uses server config)") + private int minSnippetHits = 0; + + @picocli.CommandLine.Option(names = {"--min-snippet-lines"}, description = "Minimum snippet lines required (0 = unset, uses server config)") + private int minSnippetLines = 0; + + @picocli.CommandLine.Option(names = {"--honour-file-exts"}, description = "Honour file extensions (true|false|unset)", arity = "1") + private String honourFileExts = null; + + @picocli.CommandLine.Option(names = {"--ranking"}, description = "Enable/disable ranking (true|false|unset)", arity = "1") + private String ranking = null; + + @picocli.CommandLine.Option(names = {"--ranking-threshold"}, description = "Ranking threshold value (-1 = unset, uses server config)") + private int rankingThreshold = -1; + + @picocli.CommandLine.Option(names = {"--skip-headers"}, description = "Skip license headers, comments and imports at the beginning of files (applies locally)") + private boolean skipHeaders = false; + + @picocli.CommandLine.Option(names = {"--skip-headers-limit"}, description = "Skip limit for license headers (0 = unset, applies locally)") + private int skipHeadersLimit = 0; + @picocli.CommandLine.Parameters(arity = "1", description = "file/folder to scan") private String fileFolder; @@ -160,13 +182,14 @@ public void run() { printMsg(err, String.format("Using flags %s", scanFlags)); } } + FileSnippet cliFileSnippet = buildCliScanConfig(); + printDebug(err, "CLI file snippet: " + cliFileSnippet.toString()); scanner = Scanner.builder().skipSnippets(skipSnippets).allFolders(allFolders).allExtensions(allExtensions) .hiddenFilesFolders(allHidden).numThreads(numThreads).url(apiUrl).apiKey(apiKey) .retryLimit(retryLimit).timeout(Duration.ofSeconds(timeoutLimit)).scanFlags(scanFlags) .snippetLimit(snippetLimit).customCert(caCertPem).proxy(proxy).hpsm(enableHpsm) - .settings(settings).obfuscate(obfuscate) + .settings(settings).obfuscate(obfuscate).cliFileSnippet(cliFileSnippet) .build(); - File f = new File(fileFolder); if (!f.exists()) { throw new RuntimeException(String.format("Error: File or folder does not exist: %s\n", fileFolder)); @@ -198,6 +221,38 @@ private String loadFileToString(@NonNull String filename) { } } + /** + * Build a ScanConfig from CLI arguments. + * + * @return ScanConfig populated with CLI-provided values + */ + private FileSnippet buildCliScanConfig() { + FileSnippet.FileSnippetBuilder builder = FileSnippet.builder() + .minSnippetHits(minSnippetHits) + .minSnippetLines(minSnippetLines) + .rankingThreshold(rankingThreshold) + .skipHeaders(skipHeaders) + .skipHeadersLimit(skipHeadersLimit); + + builder.honourFileExts(parseTriStateBoolean(honourFileExts)); + builder.rankingEnabled(parseTriStateBoolean(ranking)); + + return builder.build(); + } + + /** + * Parse a tri-state boolean string value. + * + * @param value the string value ("true", "false", "unset", or null) + * @return Boolean.TRUE, Boolean.FALSE, or null for unset + */ + private static Boolean parseTriStateBoolean(String value) { + if (value == null || value.equalsIgnoreCase("unset")) { + return null; + } + return Boolean.parseBoolean(value); + } + /** * Scan the specified file and output the results * @@ -245,7 +300,7 @@ private void scanFolder(String folder) { if (CommandLine.debug) { e.printStackTrace(err); } - throw new RuntimeException(String.format("Something went wrong while scanning %s.", folder)); + throw new RuntimeException(String.format("Something went wrong while scanning %s.", folder), e); } } } diff --git a/src/main/java/com/scanoss/rest/ScanApi.java b/src/main/java/com/scanoss/rest/ScanApi.java index 8526b36..3f2563f 100644 --- a/src/main/java/com/scanoss/rest/ScanApi.java +++ b/src/main/java/com/scanoss/rest/ScanApi.java @@ -25,6 +25,7 @@ import com.scanoss.dto.SbomLegacy; import com.scanoss.exceptions.ScanApiException; import com.scanoss.settings.Rule; +import com.scanoss.settings.FileSnippet; import com.scanoss.settings.ScanossSettings; import com.scanoss.utils.JsonUtils; import com.scanoss.utils.PackageDetails; @@ -36,6 +37,7 @@ import okhttp3.tls.HandshakeCertificates; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.io.InterruptedIOException; import java.net.Proxy; import java.time.Duration; @@ -72,12 +74,14 @@ public class ScanApi { private Proxy proxy; // Proxy configuration private String baseUrl; // SCANOSS base API URI (to used instead of url) private ScanossSettings settings; + private FileSnippet fileSnippet; // Resolved scan configuration parameters @SuppressWarnings("unused") private ScanApi(String scanType, Duration timeout, Integer retryLimit, String url, String apiKey, String flags, String sbomType, String sbom, OkHttpClient okHttpClient, Map headers, String customCert, - Proxy proxy, String baseUrl, ScanossSettings settings) { + Proxy proxy, String baseUrl, ScanossSettings settings, FileSnippet fileSnippet) { this.settings = settings; + this.fileSnippet = fileSnippet; this.scanType = scanType; this.timeout = timeout; this.retryLimit = retryLimit; @@ -178,6 +182,31 @@ public String scan(String wfp, String context, int scanID) throws ScanApiExcepti data.put("type", "identify"); } + // Add scan configuration parameters as base64-encoded JSON in scanoss-settings key + if (fileSnippet != null) { + Map settingsMap = new LinkedHashMap<>(); + if (fileSnippet.isMinSnippetHitsSet()) { + settingsMap.put("min_snippet_hits", fileSnippet.getMinSnippetHits()); + } + if (fileSnippet.isMinSnippetLinesSet()) { + settingsMap.put("min_snippet_lines", fileSnippet.getMinSnippetLines()); + } + if (fileSnippet.isHonourFileExtsSet()) { + settingsMap.put("honour_file_exts", fileSnippet.getHonourFileExts()); + } + if (fileSnippet.isRankingEnabledSet()) { + settingsMap.put("ranking_enabled", fileSnippet.getRankingEnabled()); + } + if (fileSnippet.isRankingThresholdSet()) { + settingsMap.put("ranking_threshold", fileSnippet.getRankingThreshold()); + } + if (!settingsMap.isEmpty()) { + String json = JsonUtils.toJson(settingsMap); + log.debug("scanoss settings:" + json); + String encoded = Base64.getEncoder().encodeToString(json.getBytes(StandardCharsets.UTF_8)); + headers.put("scanoss-settings", encoded); + } + } Request request; // Create multipart request try { diff --git a/src/main/java/com/scanoss/settings/FileSnippet.java b/src/main/java/com/scanoss/settings/FileSnippet.java new file mode 100644 index 0000000..481328e --- /dev/null +++ b/src/main/java/com/scanoss/settings/FileSnippet.java @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright (c) 2026, SCANOSS + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package com.scanoss.settings; + +import com.google.gson.annotations.SerializedName; +import lombok.*; + +/** + * Scan configuration parameters for engine tuning. + *

+ * Holds parameters that control how the SCANOSS scanning engine processes files. + * Used both for JSON deserialization from scanoss.json and for resolved CLI configuration. + *

+ * Resolution priority (highest to lowest): + *
    + *
  1. settings.file_snippet section in scanoss.json
  2. + *
  3. CLI arguments
  4. + *
+ */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class FileSnippet { + + @Builder.Default + @SerializedName("min_snippet_hits") + private Integer minSnippetHits = 0; + + @Builder.Default + @SerializedName("min_snippet_lines") + private Integer minSnippetLines = 0; + + @SerializedName("honour_file_exts") + private Boolean honourFileExts; + + @SerializedName("ranking_enabled") + private Boolean rankingEnabled; + + @Builder.Default + @SerializedName("ranking_threshold") + private Integer rankingThreshold = -1; + + @Builder.Default + @SerializedName("skip_headers") + private Boolean skipHeaders = false; + + @Builder.Default + @SerializedName("skip_headers_limit") + private Integer skipHeadersLimit = 0; + + public boolean isMinSnippetHitsSet() { + return minSnippetHits != null && minSnippetHits > 0; + } + + public boolean isMinSnippetLinesSet() { + return minSnippetLines != null && minSnippetLines > 0; + } + + public boolean isHonourFileExtsSet() { + return honourFileExts != null; + } + + public boolean isRankingEnabledSet() { + return rankingEnabled != null; + } + + public boolean isRankingThresholdSet() { + return rankingThreshold != null && rankingThreshold >= 0; + } + + public boolean isSkipHeadersLimitSet() { + return skipHeadersLimit != null && skipHeadersLimit > 0; + } + + /** + * Resolves scan configuration by merging three priority layers. + * Priority: fileSnippet (highest) > cli (lowest). + *

+ * When a setting is "unset" at a given level, it is not applied, allowing + * lower-priority levels to provide the value. + *

+ * + * @param cli CLI-provided config (lowest priority) + * @param fileSnippet File snippet config from scanoss.json (highest priority) + * @return Resolved ScanConfig with highest-priority non-unset values + */ + public static FileSnippet resolve(FileSnippet cli, FileSnippet fileSnippet) { + FileSnippet resolved = FileSnippet.builder().build(); + + if (cli != null) { + applyNonDefault(cli, resolved); + } + if (fileSnippet != null) { + applyNonDefault(fileSnippet, resolved); + } + + return resolved; + } + + private static void applyNonDefault(FileSnippet source, FileSnippet target) { + if (source.isMinSnippetHitsSet()) { + target.setMinSnippetHits(source.getMinSnippetHits()); + } + if (source.isMinSnippetLinesSet()) { + target.setMinSnippetLines(source.getMinSnippetLines()); + } + if (source.isHonourFileExtsSet()) { + target.setHonourFileExts(source.getHonourFileExts()); + } + if (source.isRankingEnabledSet()) { + target.setRankingEnabled(source.getRankingEnabled()); + } + if (source.isRankingThresholdSet()) { + target.setRankingThreshold(source.getRankingThreshold()); + } + if (source.getSkipHeaders() != null && source.getSkipHeaders()) { + target.setSkipHeaders(true); + } + if (source.isSkipHeadersLimitSet()) { + target.setSkipHeadersLimit(source.getSkipHeadersLimit()); + } + } +} diff --git a/src/main/java/com/scanoss/settings/ScanossSettings.java b/src/main/java/com/scanoss/settings/ScanossSettings.java index ff975b9..733dc04 100644 --- a/src/main/java/com/scanoss/settings/ScanossSettings.java +++ b/src/main/java/com/scanoss/settings/ScanossSettings.java @@ -23,6 +23,7 @@ package com.scanoss.settings; import com.google.gson.Gson; +import com.google.gson.annotations.SerializedName; import com.scanoss.dto.SbomLegacy; import lombok.*; import lombok.extern.slf4j.Slf4j; @@ -59,6 +60,9 @@ public class ScanossSettings { @AllArgsConstructor public static class Settings { private final @Builder.Default Skip skip = Skip.builder().build(); + + @SerializedName("file_snippet") + private FileSnippet fileSnippet; } @Data @@ -147,5 +151,21 @@ public List getScanningIgnorePattern() { return this.settings.getSkip().getPatterns().getScanning(); } + /** + * Resolves scan configuration by merging CLI arguments with settings from scanoss.json. + * Priority (highest to lowest): + *
    + *
  1. settings.file_snippet section in scanoss.json
  2. + *
  3. settings section in scanoss.json
  4. + *
  5. CLI arguments (fallback)
  6. + *
+ * + * @param cliConfig Configuration from CLI arguments + * @return Resolved ScanConfig with highest-priority non-unset values + */ + public FileSnippet getResolvedScanConfig(FileSnippet cliConfig) { + FileSnippet fileSnippet = (settings != null) ? settings.getFileSnippet() : null; + return FileSnippet.resolve(cliConfig, fileSnippet); + } } diff --git a/src/test/java/com/scanoss/TestSettings.java b/src/test/java/com/scanoss/TestSettings.java index fa71132..73cbe3a 100644 --- a/src/test/java/com/scanoss/TestSettings.java +++ b/src/test/java/com/scanoss/TestSettings.java @@ -22,6 +22,7 @@ */ package com.scanoss; +import com.scanoss.settings.FileSnippet; import com.scanoss.settings.ScanossSettings; import com.scanoss.utils.JsonUtils; import lombok.extern.slf4j.Slf4j; @@ -40,6 +41,7 @@ public class TestSettings { private Path existingSettingsPath; private Path nonExistentSettingsPath; + private Path scanConfigSettingsPath; @Before public void Setup() throws URISyntaxException { @@ -58,6 +60,11 @@ public void Setup() throws URISyntaxException { existingSettingsPath = Paths.get(resource.toURI()); nonExistentSettingsPath = Paths.get("non-existent-settings.json"); + var scanConfigResource = getClass().getClassLoader().getResource("scanoss-scan-config.json"); + if (scanConfigResource != null) { + scanConfigSettingsPath = Paths.get(scanConfigResource.toURI()); + } + // Verify the file actually exists if (!Files.exists(existingSettingsPath)) { throw new IllegalStateException( @@ -145,4 +152,172 @@ public void testSkip() { log.info("Finished {} -->", methodName); } + @Test + public void testScanConfigFromJson() { + String methodName = new Object() {}.getClass().getEnclosingMethod().getName(); + log.info("<-- Starting {}", methodName); + + assertNotNull("Scan config settings path should exist", scanConfigSettingsPath); + ScanossSettings settings = ScanossSettings.createFromPath(scanConfigSettingsPath); + assertNotNull("Settings should not be null", settings); + + // Verify settings-level scan config fields + assertEquals(Integer.valueOf(3), settings.getSettings().getFileSnippet().getMinSnippetHits()); + assertEquals(Integer.valueOf(10), settings.getSettings().getFileSnippet().getMinSnippetLines()); + assertEquals(Boolean.TRUE, settings.getSettings().getFileSnippet().getHonourFileExts()); + assertEquals(Boolean.TRUE, settings.getSettings().getFileSnippet().getRankingEnabled()); + assertEquals(Integer.valueOf(5), settings.getSettings().getFileSnippet().getRankingThreshold()); + assertEquals(Boolean.FALSE, settings.getSettings().getFileSnippet().getSkipHeaders()); + assertEquals(Integer.valueOf(0), settings.getSettings().getFileSnippet().getSkipHeadersLimit()); + + // Verify file_snippet section + assertNotNull("file_snippet should not be null", settings.getSettings().getFileSnippet()); + assertEquals(Integer.valueOf(3), settings.getSettings().getFileSnippet().getMinSnippetHits()); + assertEquals(Integer.valueOf(5), settings.getSettings().getFileSnippet().getRankingThreshold()); + + log.info("Finished {} -->", methodName); + } + + @Test + public void testScanConfigResolution() { + String methodName = new Object() {}.getClass().getEnclosingMethod().getName(); + log.info("<-- Starting {}", methodName); + + // CLI config (lowest priority) + FileSnippet cliConfig = FileSnippet.builder() + .minSnippetHits(2) + .minSnippetLines(5) + .rankingEnabled(false) + .rankingThreshold(3) + .build(); + + // File-snippet config (highest priority) + FileSnippet fileSnippetConfig = FileSnippet.builder() + .minSnippetHits(5) + .rankingThreshold(8) + .build(); + + FileSnippet resolved = FileSnippet.resolve(cliConfig, fileSnippetConfig); + + // file_snippet has min_snippet_hits=5 (highest priority) + assertEquals(Integer.valueOf(5), resolved.getMinSnippetHits()); + // settings has ranking_enabled=true (file_snippet doesn't override) + assertEquals(Boolean.FALSE, resolved.getRankingEnabled()); + // file_snippet has ranking_threshold=8 (highest priority) + assertEquals(Integer.valueOf(8), resolved.getRankingThreshold()); + // CLI has min_snippet_lines=5 (only CLI sets it) + assertEquals(Integer.valueOf(5), resolved.getMinSnippetLines()); + + log.info("Finished {} -->", methodName); + } + + @Test + public void testScanConfigResolutionFromSettingsFile() { + String methodName = new Object() {}.getClass().getEnclosingMethod().getName(); + log.info("<-- Starting {}", methodName); + + assertNotNull("Scan config settings path should exist", scanConfigSettingsPath); + ScanossSettings settings = ScanossSettings.createFromPath(scanConfigSettingsPath); + assertNotNull("Settings should not be null", settings); + + // CLI provides some values + FileSnippet cliConfig = FileSnippet.builder() + .minSnippetHits(2) + .minSnippetLines(9) + .rankingThreshold(4) + .build(); + + FileSnippet resolved = settings.getResolvedScanConfig(cliConfig); + + // file_snippet provides min_snippet_hits=3 (overrides cli: 2) + assertEquals(Integer.valueOf(3), resolved.getMinSnippetHits()); + // file_snippet provides ranking_threshold=5 (overrides cli: 4) + assertEquals(Integer.valueOf(5), resolved.getRankingThreshold()); + // file_snippet provides ranking_enabled=true (CLI doesn't set it) + assertEquals(Boolean.TRUE, resolved.getRankingEnabled()); + // file_snippet provides min_snippet_lines=10 (overrides cli: 9) + assertEquals(Integer.valueOf(10), resolved.getMinSnippetLines()); + // file_snippet provides honour_file_exts=true (CLI doesn't set it) + assertEquals(Boolean.TRUE, resolved.getHonourFileExts()); + + log.info("Finished {} -->", methodName); + } + + @Test + public void testScanConfigUnsetValues() { + String methodName = new Object() {}.getClass().getEnclosingMethod().getName(); + log.info("<-- Starting {}", methodName); + + // Default config - all values unset + FileSnippet defaultConfig = FileSnippet.builder().build(); + + assertFalse("min_snippet_hits should be unset", defaultConfig.isMinSnippetHitsSet()); + assertFalse("min_snippet_lines should be unset", defaultConfig.isMinSnippetLinesSet()); + assertFalse("honour_file_exts should be unset", defaultConfig.isHonourFileExtsSet()); + assertFalse("ranking_enabled should be unset", defaultConfig.isRankingEnabledSet()); + assertFalse("ranking_threshold should be unset", defaultConfig.isRankingThresholdSet()); + assertFalse("skip_headers_limit should be unset", defaultConfig.isSkipHeadersLimitSet()); + + // Config with values set + FileSnippet setConfig = FileSnippet.builder() + .minSnippetHits(3) + .minSnippetLines(10) + .honourFileExts(true) + .rankingEnabled(false) + .rankingThreshold(5) + .skipHeadersLimit(20) + .build(); + + assertTrue("min_snippet_hits should be set", setConfig.isMinSnippetHitsSet()); + assertTrue("min_snippet_lines should be set", setConfig.isMinSnippetLinesSet()); + assertTrue("honour_file_exts should be set", setConfig.isHonourFileExtsSet()); + assertTrue("ranking_enabled should be set", setConfig.isRankingEnabledSet()); + assertTrue("ranking_threshold should be set", setConfig.isRankingThresholdSet()); + assertTrue("skip_headers_limit should be set", setConfig.isSkipHeadersLimitSet()); + + log.info("Finished {} -->", methodName); + } + + @Test + public void testScanConfigResolutionWithNulls() { + String methodName = new Object() {}.getClass().getEnclosingMethod().getName(); + log.info("<-- Starting {}", methodName); + + // Test resolution with null layers + FileSnippet cliConfig = FileSnippet.builder() + .minSnippetHits(2) + .build(); + + FileSnippet resolved = FileSnippet.resolve(cliConfig, null); + assertEquals(Integer.valueOf(2), resolved.getMinSnippetHits()); + assertNull("ranking_enabled should remain unset", resolved.getRankingEnabled()); + + // Test with all nulls + FileSnippet allNullResolved = FileSnippet.resolve(null, null); + assertFalse("All values should be unset", allNullResolved.isMinSnippetHitsSet()); + assertNull("ranking_enabled should be null", allNullResolved.getRankingEnabled()); + + log.info("Finished {} -->", methodName); + } + + @Test + public void testExistingSettingsWithoutScanConfig() { + String methodName = new Object() {}.getClass().getEnclosingMethod().getName(); + log.info("<-- Starting {}", methodName); + + // Test that existing settings file without scan config fields still works + ScanossSettings settings = ScanossSettings.createFromPath(existingSettingsPath); + assertNotNull("Settings should not be null", settings); + + // Scan config fields should be null when not in JSON + assertNull("file snippet should be null", settings.getSettings().getFileSnippet()); + + + // Resolution should still work with defaults + FileSnippet cliConfig = FileSnippet.builder().minSnippetHits(2).build(); + FileSnippet resolved = settings.getResolvedScanConfig(cliConfig); + assertEquals(Integer.valueOf(2), resolved.getMinSnippetHits()); + + log.info("Finished {} -->", methodName); + } } diff --git a/src/test/java/com/scanoss/TestWinnowing.java b/src/test/java/com/scanoss/TestWinnowing.java index 449a324..58d0bd9 100644 --- a/src/test/java/com/scanoss/TestWinnowing.java +++ b/src/test/java/com/scanoss/TestWinnowing.java @@ -334,5 +334,91 @@ public void TestDeobfuscateFilePathInvalid() { log.info("Finished {} -->", methodName); } + + @Test + public void TestDetectHeaderLines() { + String methodName = new Object() { + }.getClass().getEnclosingMethod().getName(); + log.info("<-- Starting {}", methodName); + + Winnowing winnowing = Winnowing.builder().build(); + + // Test with a typical Java file header + String javaHeader = "// SPDX-License-Identifier: MIT\n" + + "/*\n" + + " * Copyright (c) 2024, SCANOSS\n" + + " */\n" + + "package com.scanoss;\n" + + "\n" + + "import java.util.List;\n" + + "import java.io.File;\n" + + "\n" + + "public class MyClass {\n" + + " int x = 1;\n" + + "}\n"; + char[] contents = javaHeader.toCharArray(); + int headerLines = winnowing.detectHeaderLines(contents, 0); + assertEquals("Should detect 9 header lines", 9, headerLines); + + // Test with limit + int limitedHeaderLines = winnowing.detectHeaderLines(contents, 5); + assertEquals("Should detect at most 5 header lines with limit", 5, limitedHeaderLines); + + // Test with no header + String noHeader = "public class MyClass {\n int x = 1;\n}\n"; + int noHeaderLines = winnowing.detectHeaderLines(noHeader.toCharArray(), 0); + assertEquals("Should detect 0 header lines", 0, noHeaderLines); + + // Test with block comment + String blockComment = "/*\n * License block\n * More license\n */\n\nclass Foo {}\n"; + int blockLines = winnowing.detectHeaderLines(blockComment.toCharArray(), 0); + assertEquals("Should detect 5 header lines (block comment + blank)", 5, blockLines); + + log.info("Finished {} -->", methodName); + } + + @Test + public void TestWinnowingSkipHeaders() { + String methodName = new Object() { + }.getClass().getEnclosingMethod().getName(); + log.info("<-- Starting {}", methodName); + + // Build content with a license header + code + String content = "// License header line 1\n" + + "// License header line 2\n" + + "\n" + + "import java.util.List;\n" + + "\n" + + "sample c code with lots of code that we should analyse\n" + + "And even more code to get connected.\n" + + "And we need to get this as long as possible, in order to trigger snippet matching.\n" + + "Here comes more code to help get this working.\n" + + "Please help get this across the line. We need all the help we can get.\n"; + + // Without skip headers - should generate snippet hashes + Winnowing winnowingNoSkip = Winnowing.builder().skipHeaders(false).build(); + String wfpNoSkip = winnowingNoSkip.wfpForContents("test.c", false, content.getBytes()); + assertNotNull(wfpNoSkip); + assertTrue("Should have snippets", snippetPat.matcher(wfpNoSkip).find()); + + // With skip headers - should still generate snippet hashes (just starting later) + Winnowing winnowingSkip = Winnowing.builder().skipHeaders(true).build(); + String wfpSkip = winnowingSkip.wfpForContents("test.c", false, content.getBytes()); + assertNotNull(wfpSkip); + + // The file= line should be the same (full file hash) + assertTrue("Both should start with file= line", wfpNoSkip.startsWith("file=")); + assertTrue("Both should start with file= line", wfpSkip.startsWith("file=")); + + // The file hash should be identical (skip_headers doesn't affect the file hash) + String noSkipFirstLine = wfpNoSkip.split("\n")[0]; + String skipFirstLine = wfpSkip.split("\n")[0]; + assertEquals("File hash should be the same regardless of skip_headers", noSkipFirstLine, skipFirstLine); + + log.info("WFP without skip: {}", wfpNoSkip); + log.info("WFP with skip: {}", wfpSkip); + + log.info("Finished {} -->", methodName); + } }