settingsMap = new LinkedHashMap<>();
+ if (fileSnippet.isMinSnippetHitsSet()) {
+ settingsMap.put("min_snippet_hits", fileSnippet.getMinSnippetHits());
+ }
+ if (fileSnippet.isMinSnippetLinesSet()) {
+ settingsMap.put("min_snippet_lines", fileSnippet.getMinSnippetLines());
+ }
+ if (fileSnippet.isHonourFileExtsSet()) {
+ settingsMap.put("honour_file_exts", fileSnippet.getHonourFileExts());
+ }
+ if (fileSnippet.isRankingEnabledSet()) {
+ settingsMap.put("ranking_enabled", fileSnippet.getRankingEnabled());
+ }
+ if (fileSnippet.isRankingThresholdSet()) {
+ settingsMap.put("ranking_threshold", fileSnippet.getRankingThreshold());
+ }
+ if (!settingsMap.isEmpty()) {
+ String json = JsonUtils.toJson(settingsMap);
+ log.debug("scanoss settings:" + json);
+ String encoded = Base64.getEncoder().encodeToString(json.getBytes(StandardCharsets.UTF_8));
+ headers.put("scanoss-settings", encoded);
+ }
+ }
Request request; // Create multipart request
try {
diff --git a/src/main/java/com/scanoss/settings/FileSnippet.java b/src/main/java/com/scanoss/settings/FileSnippet.java
new file mode 100644
index 0000000..481328e
--- /dev/null
+++ b/src/main/java/com/scanoss/settings/FileSnippet.java
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright (c) 2026, SCANOSS
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package com.scanoss.settings;
+
+import com.google.gson.annotations.SerializedName;
+import lombok.*;
+
+/**
+ * Scan configuration parameters for engine tuning.
+ *
+ * Holds parameters that control how the SCANOSS scanning engine processes files.
+ * Used both for JSON deserialization from scanoss.json and for resolved CLI configuration.
+ *
+ * Resolution priority (highest to lowest):
+ *
+ * - settings.file_snippet section in scanoss.json
+ * - CLI arguments
+ *
+ */
+@Data
+@Builder
+@NoArgsConstructor
+@AllArgsConstructor
+public class FileSnippet {
+
+ @Builder.Default
+ @SerializedName("min_snippet_hits")
+ private Integer minSnippetHits = 0;
+
+ @Builder.Default
+ @SerializedName("min_snippet_lines")
+ private Integer minSnippetLines = 0;
+
+ @SerializedName("honour_file_exts")
+ private Boolean honourFileExts;
+
+ @SerializedName("ranking_enabled")
+ private Boolean rankingEnabled;
+
+ @Builder.Default
+ @SerializedName("ranking_threshold")
+ private Integer rankingThreshold = -1;
+
+ @Builder.Default
+ @SerializedName("skip_headers")
+ private Boolean skipHeaders = false;
+
+ @Builder.Default
+ @SerializedName("skip_headers_limit")
+ private Integer skipHeadersLimit = 0;
+
+ public boolean isMinSnippetHitsSet() {
+ return minSnippetHits != null && minSnippetHits > 0;
+ }
+
+ public boolean isMinSnippetLinesSet() {
+ return minSnippetLines != null && minSnippetLines > 0;
+ }
+
+ public boolean isHonourFileExtsSet() {
+ return honourFileExts != null;
+ }
+
+ public boolean isRankingEnabledSet() {
+ return rankingEnabled != null;
+ }
+
+ public boolean isRankingThresholdSet() {
+ return rankingThreshold != null && rankingThreshold >= 0;
+ }
+
+ public boolean isSkipHeadersLimitSet() {
+ return skipHeadersLimit != null && skipHeadersLimit > 0;
+ }
+
+ /**
+ * Resolves scan configuration by merging three priority layers.
+ * Priority: fileSnippet (highest) > cli (lowest).
+ *
+ * When a setting is "unset" at a given level, it is not applied, allowing
+ * lower-priority levels to provide the value.
+ *
+ *
+ * @param cli CLI-provided config (lowest priority)
+ * @param fileSnippet File snippet config from scanoss.json (highest priority)
+ * @return Resolved ScanConfig with highest-priority non-unset values
+ */
+ public static FileSnippet resolve(FileSnippet cli, FileSnippet fileSnippet) {
+ FileSnippet resolved = FileSnippet.builder().build();
+
+ if (cli != null) {
+ applyNonDefault(cli, resolved);
+ }
+ if (fileSnippet != null) {
+ applyNonDefault(fileSnippet, resolved);
+ }
+
+ return resolved;
+ }
+
+ private static void applyNonDefault(FileSnippet source, FileSnippet target) {
+ if (source.isMinSnippetHitsSet()) {
+ target.setMinSnippetHits(source.getMinSnippetHits());
+ }
+ if (source.isMinSnippetLinesSet()) {
+ target.setMinSnippetLines(source.getMinSnippetLines());
+ }
+ if (source.isHonourFileExtsSet()) {
+ target.setHonourFileExts(source.getHonourFileExts());
+ }
+ if (source.isRankingEnabledSet()) {
+ target.setRankingEnabled(source.getRankingEnabled());
+ }
+ if (source.isRankingThresholdSet()) {
+ target.setRankingThreshold(source.getRankingThreshold());
+ }
+ if (source.getSkipHeaders() != null && source.getSkipHeaders()) {
+ target.setSkipHeaders(true);
+ }
+ if (source.isSkipHeadersLimitSet()) {
+ target.setSkipHeadersLimit(source.getSkipHeadersLimit());
+ }
+ }
+}
diff --git a/src/main/java/com/scanoss/settings/ScanossSettings.java b/src/main/java/com/scanoss/settings/ScanossSettings.java
index ff975b9..733dc04 100644
--- a/src/main/java/com/scanoss/settings/ScanossSettings.java
+++ b/src/main/java/com/scanoss/settings/ScanossSettings.java
@@ -23,6 +23,7 @@
package com.scanoss.settings;
import com.google.gson.Gson;
+import com.google.gson.annotations.SerializedName;
import com.scanoss.dto.SbomLegacy;
import lombok.*;
import lombok.extern.slf4j.Slf4j;
@@ -59,6 +60,9 @@ public class ScanossSettings {
@AllArgsConstructor
public static class Settings {
private final @Builder.Default Skip skip = Skip.builder().build();
+
+ @SerializedName("file_snippet")
+ private FileSnippet fileSnippet;
}
@Data
@@ -147,5 +151,21 @@ public List getScanningIgnorePattern() {
return this.settings.getSkip().getPatterns().getScanning();
}
+ /**
+ * Resolves scan configuration by merging CLI arguments with settings from scanoss.json.
+ * Priority (highest to lowest):
+ *
+ * - settings.file_snippet section in scanoss.json
+ * - settings section in scanoss.json
+ * - CLI arguments (fallback)
+ *
+ *
+ * @param cliConfig Configuration from CLI arguments
+ * @return Resolved ScanConfig with highest-priority non-unset values
+ */
+ public FileSnippet getResolvedScanConfig(FileSnippet cliConfig) {
+ FileSnippet fileSnippet = (settings != null) ? settings.getFileSnippet() : null;
+ return FileSnippet.resolve(cliConfig, fileSnippet);
+ }
}
diff --git a/src/test/java/com/scanoss/TestSettings.java b/src/test/java/com/scanoss/TestSettings.java
index fa71132..73cbe3a 100644
--- a/src/test/java/com/scanoss/TestSettings.java
+++ b/src/test/java/com/scanoss/TestSettings.java
@@ -22,6 +22,7 @@
*/
package com.scanoss;
+import com.scanoss.settings.FileSnippet;
import com.scanoss.settings.ScanossSettings;
import com.scanoss.utils.JsonUtils;
import lombok.extern.slf4j.Slf4j;
@@ -40,6 +41,7 @@
public class TestSettings {
private Path existingSettingsPath;
private Path nonExistentSettingsPath;
+ private Path scanConfigSettingsPath;
@Before
public void Setup() throws URISyntaxException {
@@ -58,6 +60,11 @@ public void Setup() throws URISyntaxException {
existingSettingsPath = Paths.get(resource.toURI());
nonExistentSettingsPath = Paths.get("non-existent-settings.json");
+ var scanConfigResource = getClass().getClassLoader().getResource("scanoss-scan-config.json");
+ if (scanConfigResource != null) {
+ scanConfigSettingsPath = Paths.get(scanConfigResource.toURI());
+ }
+
// Verify the file actually exists
if (!Files.exists(existingSettingsPath)) {
throw new IllegalStateException(
@@ -145,4 +152,172 @@ public void testSkip() {
log.info("Finished {} -->", methodName);
}
+ @Test
+ public void testScanConfigFromJson() {
+ String methodName = new Object() {}.getClass().getEnclosingMethod().getName();
+ log.info("<-- Starting {}", methodName);
+
+ assertNotNull("Scan config settings path should exist", scanConfigSettingsPath);
+ ScanossSettings settings = ScanossSettings.createFromPath(scanConfigSettingsPath);
+ assertNotNull("Settings should not be null", settings);
+
+ // Verify settings-level scan config fields
+ assertEquals(Integer.valueOf(3), settings.getSettings().getFileSnippet().getMinSnippetHits());
+ assertEquals(Integer.valueOf(10), settings.getSettings().getFileSnippet().getMinSnippetLines());
+ assertEquals(Boolean.TRUE, settings.getSettings().getFileSnippet().getHonourFileExts());
+ assertEquals(Boolean.TRUE, settings.getSettings().getFileSnippet().getRankingEnabled());
+ assertEquals(Integer.valueOf(5), settings.getSettings().getFileSnippet().getRankingThreshold());
+ assertEquals(Boolean.FALSE, settings.getSettings().getFileSnippet().getSkipHeaders());
+ assertEquals(Integer.valueOf(0), settings.getSettings().getFileSnippet().getSkipHeadersLimit());
+
+ // Verify file_snippet section
+ assertNotNull("file_snippet should not be null", settings.getSettings().getFileSnippet());
+ assertEquals(Integer.valueOf(3), settings.getSettings().getFileSnippet().getMinSnippetHits());
+ assertEquals(Integer.valueOf(5), settings.getSettings().getFileSnippet().getRankingThreshold());
+
+ log.info("Finished {} -->", methodName);
+ }
+
+ @Test
+ public void testScanConfigResolution() {
+ String methodName = new Object() {}.getClass().getEnclosingMethod().getName();
+ log.info("<-- Starting {}", methodName);
+
+ // CLI config (lowest priority)
+ FileSnippet cliConfig = FileSnippet.builder()
+ .minSnippetHits(2)
+ .minSnippetLines(5)
+ .rankingEnabled(false)
+ .rankingThreshold(3)
+ .build();
+
+ // File-snippet config (highest priority)
+ FileSnippet fileSnippetConfig = FileSnippet.builder()
+ .minSnippetHits(5)
+ .rankingThreshold(8)
+ .build();
+
+ FileSnippet resolved = FileSnippet.resolve(cliConfig, fileSnippetConfig);
+
+ // file_snippet has min_snippet_hits=5 (highest priority)
+ assertEquals(Integer.valueOf(5), resolved.getMinSnippetHits());
+ // settings has ranking_enabled=true (file_snippet doesn't override)
+ assertEquals(Boolean.FALSE, resolved.getRankingEnabled());
+ // file_snippet has ranking_threshold=8 (highest priority)
+ assertEquals(Integer.valueOf(8), resolved.getRankingThreshold());
+ // CLI has min_snippet_lines=5 (only CLI sets it)
+ assertEquals(Integer.valueOf(5), resolved.getMinSnippetLines());
+
+ log.info("Finished {} -->", methodName);
+ }
+
+ @Test
+ public void testScanConfigResolutionFromSettingsFile() {
+ String methodName = new Object() {}.getClass().getEnclosingMethod().getName();
+ log.info("<-- Starting {}", methodName);
+
+ assertNotNull("Scan config settings path should exist", scanConfigSettingsPath);
+ ScanossSettings settings = ScanossSettings.createFromPath(scanConfigSettingsPath);
+ assertNotNull("Settings should not be null", settings);
+
+ // CLI provides some values
+ FileSnippet cliConfig = FileSnippet.builder()
+ .minSnippetHits(2)
+ .minSnippetLines(9)
+ .rankingThreshold(4)
+ .build();
+
+ FileSnippet resolved = settings.getResolvedScanConfig(cliConfig);
+
+ // file_snippet provides min_snippet_hits=3 (overrides cli: 2)
+ assertEquals(Integer.valueOf(3), resolved.getMinSnippetHits());
+ // file_snippet provides ranking_threshold=5 (overrides cli: 4)
+ assertEquals(Integer.valueOf(5), resolved.getRankingThreshold());
+ // file_snippet provides ranking_enabled=true (CLI doesn't set it)
+ assertEquals(Boolean.TRUE, resolved.getRankingEnabled());
+ // file_snippet provides min_snippet_lines=10 (overrides cli: 9)
+ assertEquals(Integer.valueOf(10), resolved.getMinSnippetLines());
+ // file_snippet provides honour_file_exts=true (CLI doesn't set it)
+ assertEquals(Boolean.TRUE, resolved.getHonourFileExts());
+
+ log.info("Finished {} -->", methodName);
+ }
+
+ @Test
+ public void testScanConfigUnsetValues() {
+ String methodName = new Object() {}.getClass().getEnclosingMethod().getName();
+ log.info("<-- Starting {}", methodName);
+
+ // Default config - all values unset
+ FileSnippet defaultConfig = FileSnippet.builder().build();
+
+ assertFalse("min_snippet_hits should be unset", defaultConfig.isMinSnippetHitsSet());
+ assertFalse("min_snippet_lines should be unset", defaultConfig.isMinSnippetLinesSet());
+ assertFalse("honour_file_exts should be unset", defaultConfig.isHonourFileExtsSet());
+ assertFalse("ranking_enabled should be unset", defaultConfig.isRankingEnabledSet());
+ assertFalse("ranking_threshold should be unset", defaultConfig.isRankingThresholdSet());
+ assertFalse("skip_headers_limit should be unset", defaultConfig.isSkipHeadersLimitSet());
+
+ // Config with values set
+ FileSnippet setConfig = FileSnippet.builder()
+ .minSnippetHits(3)
+ .minSnippetLines(10)
+ .honourFileExts(true)
+ .rankingEnabled(false)
+ .rankingThreshold(5)
+ .skipHeadersLimit(20)
+ .build();
+
+ assertTrue("min_snippet_hits should be set", setConfig.isMinSnippetHitsSet());
+ assertTrue("min_snippet_lines should be set", setConfig.isMinSnippetLinesSet());
+ assertTrue("honour_file_exts should be set", setConfig.isHonourFileExtsSet());
+ assertTrue("ranking_enabled should be set", setConfig.isRankingEnabledSet());
+ assertTrue("ranking_threshold should be set", setConfig.isRankingThresholdSet());
+ assertTrue("skip_headers_limit should be set", setConfig.isSkipHeadersLimitSet());
+
+ log.info("Finished {} -->", methodName);
+ }
+
+ @Test
+ public void testScanConfigResolutionWithNulls() {
+ String methodName = new Object() {}.getClass().getEnclosingMethod().getName();
+ log.info("<-- Starting {}", methodName);
+
+ // Test resolution with null layers
+ FileSnippet cliConfig = FileSnippet.builder()
+ .minSnippetHits(2)
+ .build();
+
+ FileSnippet resolved = FileSnippet.resolve(cliConfig, null);
+ assertEquals(Integer.valueOf(2), resolved.getMinSnippetHits());
+ assertNull("ranking_enabled should remain unset", resolved.getRankingEnabled());
+
+ // Test with all nulls
+ FileSnippet allNullResolved = FileSnippet.resolve(null, null);
+ assertFalse("All values should be unset", allNullResolved.isMinSnippetHitsSet());
+ assertNull("ranking_enabled should be null", allNullResolved.getRankingEnabled());
+
+ log.info("Finished {} -->", methodName);
+ }
+
+ @Test
+ public void testExistingSettingsWithoutScanConfig() {
+ String methodName = new Object() {}.getClass().getEnclosingMethod().getName();
+ log.info("<-- Starting {}", methodName);
+
+ // Test that existing settings file without scan config fields still works
+ ScanossSettings settings = ScanossSettings.createFromPath(existingSettingsPath);
+ assertNotNull("Settings should not be null", settings);
+
+ // Scan config fields should be null when not in JSON
+ assertNull("file snippet should be null", settings.getSettings().getFileSnippet());
+
+
+ // Resolution should still work with defaults
+ FileSnippet cliConfig = FileSnippet.builder().minSnippetHits(2).build();
+ FileSnippet resolved = settings.getResolvedScanConfig(cliConfig);
+ assertEquals(Integer.valueOf(2), resolved.getMinSnippetHits());
+
+ log.info("Finished {} -->", methodName);
+ }
}
diff --git a/src/test/java/com/scanoss/TestWinnowing.java b/src/test/java/com/scanoss/TestWinnowing.java
index 449a324..58d0bd9 100644
--- a/src/test/java/com/scanoss/TestWinnowing.java
+++ b/src/test/java/com/scanoss/TestWinnowing.java
@@ -334,5 +334,91 @@ public void TestDeobfuscateFilePathInvalid() {
log.info("Finished {} -->", methodName);
}
+
+ @Test
+ public void TestDetectHeaderLines() {
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+ log.info("<-- Starting {}", methodName);
+
+ Winnowing winnowing = Winnowing.builder().build();
+
+ // Test with a typical Java file header
+ String javaHeader = "// SPDX-License-Identifier: MIT\n" +
+ "/*\n" +
+ " * Copyright (c) 2024, SCANOSS\n" +
+ " */\n" +
+ "package com.scanoss;\n" +
+ "\n" +
+ "import java.util.List;\n" +
+ "import java.io.File;\n" +
+ "\n" +
+ "public class MyClass {\n" +
+ " int x = 1;\n" +
+ "}\n";
+ char[] contents = javaHeader.toCharArray();
+ int headerLines = winnowing.detectHeaderLines(contents, 0);
+ assertEquals("Should detect 9 header lines", 9, headerLines);
+
+ // Test with limit
+ int limitedHeaderLines = winnowing.detectHeaderLines(contents, 5);
+ assertEquals("Should detect at most 5 header lines with limit", 5, limitedHeaderLines);
+
+ // Test with no header
+ String noHeader = "public class MyClass {\n int x = 1;\n}\n";
+ int noHeaderLines = winnowing.detectHeaderLines(noHeader.toCharArray(), 0);
+ assertEquals("Should detect 0 header lines", 0, noHeaderLines);
+
+ // Test with block comment
+ String blockComment = "/*\n * License block\n * More license\n */\n\nclass Foo {}\n";
+ int blockLines = winnowing.detectHeaderLines(blockComment.toCharArray(), 0);
+ assertEquals("Should detect 5 header lines (block comment + blank)", 5, blockLines);
+
+ log.info("Finished {} -->", methodName);
+ }
+
+ @Test
+ public void TestWinnowingSkipHeaders() {
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+ log.info("<-- Starting {}", methodName);
+
+ // Build content with a license header + code
+ String content = "// License header line 1\n" +
+ "// License header line 2\n" +
+ "\n" +
+ "import java.util.List;\n" +
+ "\n" +
+ "sample c code with lots of code that we should analyse\n" +
+ "And even more code to get connected.\n" +
+ "And we need to get this as long as possible, in order to trigger snippet matching.\n" +
+ "Here comes more code to help get this working.\n" +
+ "Please help get this across the line. We need all the help we can get.\n";
+
+ // Without skip headers - should generate snippet hashes
+ Winnowing winnowingNoSkip = Winnowing.builder().skipHeaders(false).build();
+ String wfpNoSkip = winnowingNoSkip.wfpForContents("test.c", false, content.getBytes());
+ assertNotNull(wfpNoSkip);
+ assertTrue("Should have snippets", snippetPat.matcher(wfpNoSkip).find());
+
+ // With skip headers - should still generate snippet hashes (just starting later)
+ Winnowing winnowingSkip = Winnowing.builder().skipHeaders(true).build();
+ String wfpSkip = winnowingSkip.wfpForContents("test.c", false, content.getBytes());
+ assertNotNull(wfpSkip);
+
+ // The file= line should be the same (full file hash)
+ assertTrue("Both should start with file= line", wfpNoSkip.startsWith("file="));
+ assertTrue("Both should start with file= line", wfpSkip.startsWith("file="));
+
+ // The file hash should be identical (skip_headers doesn't affect the file hash)
+ String noSkipFirstLine = wfpNoSkip.split("\n")[0];
+ String skipFirstLine = wfpSkip.split("\n")[0];
+ assertEquals("File hash should be the same regardless of skip_headers", noSkipFirstLine, skipFirstLine);
+
+ log.info("WFP without skip: {}", wfpNoSkip);
+ log.info("WFP with skip: {}", wfpSkip);
+
+ log.info("Finished {} -->", methodName);
+ }
}