diff --git a/app/components/Workflow/DependencyGraph/CustomNodes/Code/CodeNode.js b/app/components/Workflow/DependencyGraph/CustomNodes/Code/CodeNode.js index e904903..4841511 100644 --- a/app/components/Workflow/DependencyGraph/CustomNodes/Code/CodeNode.js +++ b/app/components/Workflow/DependencyGraph/CustomNodes/Code/CodeNode.js @@ -18,6 +18,7 @@ const ICON_TYPES = { DART: `${ICON_PATH}dart.svg`, SQL: `${ICON_PATH}sql.svg`, GO: `${ICON_PATH}go.svg`, + SCALA: `${ICON_PATH}scala.svg`, C: `${ICON_PATH}c.svg`, }; @@ -47,6 +48,8 @@ function CodeNode({ node, renderType }) { iconUrl = ICON_TYPES.SQL; } else if (node.assetType === 'go') { iconUrl = ICON_TYPES.GO; + } else if (node.assetType === 'scala') { + iconUrl = ICON_TYPES.SCALA; } else if (node.assetType === 'c') { iconUrl = ICON_TYPES.C; } else if (node.assetType === 'dart') { diff --git a/app/components/Workflow/DependencyGraph/DependencyGraphEChart.js b/app/components/Workflow/DependencyGraph/DependencyGraphEChart.js index b48beab..a4e6b6e 100644 --- a/app/components/Workflow/DependencyGraph/DependencyGraphEChart.js +++ b/app/components/Workflow/DependencyGraph/DependencyGraphEChart.js @@ -25,6 +25,7 @@ const ICON_TYPES = { DART: `${ICON_PATH}dart.svg`, SQL: `${ICON_PATH}sql.svg`, GO: `${ICON_PATH}go.svg`, + SCALA: `${ICON_PATH}scala.svg`, C: `${ICON_PATH}c.svg`, }; @@ -55,6 +56,8 @@ function getIcon(node) { iconUrl = ICON_TYPES.SQL; } else if (node.value === 'go') { iconUrl = ICON_TYPES.GO; + } else if (node.value === 'scala') { + iconUrl = ICON_TYPES.SCALA; } else if (node.value === 'c') { iconUrl = ICON_TYPES.C; } else if (node.value === 'dart') { diff --git a/app/constants/assets-config.js b/app/constants/assets-config.js index e692866..3c57bc1 100644 --- a/app/constants/assets-config.js +++ b/app/constants/assets-config.js @@ -59,6 +59,11 @@ module.exports = { extensions: ['rs'], categories: ['code'], }, + { + name : 'Scala', + extensions: ['scala'], + categories: ['code'], + }, { name: 'Dart', extensions: ['dart'], diff --git a/app/images/scala.svg b/app/images/scala.svg new file mode 100644 index 0000000..efcfd1a --- /dev/null +++ b/app/images/scala.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/app/preload.js b/app/preload.js index ff09b64..232cd4c 100644 --- a/app/preload.js +++ b/app/preload.js @@ -16,6 +16,7 @@ import ProjectUtil from './utils/project'; import JavaHandler from './services/assets/handlers/java'; import RustHandler from './services/assets/handlers/rust'; import GoHandler from './services/assets/handlers/go'; +import ScalaHandler from './services/assets/handlers/scala'; import CppHandler from './services/assets/handlers/cpp'; import CHandler from './services/assets/handlers/c'; import Constants from './constants/constants'; @@ -63,6 +64,7 @@ contextBridge.exposeInMainWorld('workerElectronBridge', { new RustHandler(), new SQLHandler(), new GoHandler(), + new ScalaHandler(), new CppHandler(), new CHandler(), new DartHandler(), diff --git a/app/services/assets/handlers/scala.js b/app/services/assets/handlers/scala.js new file mode 100644 index 0000000..a8444a6 --- /dev/null +++ b/app/services/assets/handlers/scala.js @@ -0,0 +1,267 @@ +import BaseCodeHandler from './baseCode'; +import Constants from '../../../constants/constants'; + +const FILE_EXTENSION_LIST = ['scala']; + +export default class ScalaHandler extends BaseCodeHandler { + static id = 'StatWrap.ScalaHandler'; + + constructor() { + super(ScalaHandler.id, FILE_EXTENSION_LIST); + } + + id() { + return ScalaHandler.id; + } + + getLibraryId(packageName, importName) { + let id = ''; + if (packageName && importName) { + id = `${packageName}.${importName}`; + } else if (packageName) { + id = packageName; + } else if (importName) { + id = importName; + } else { + id = '(unknown)'; + } + return id; + } + + getInputs(uri, text) { + const inputs = []; + if (!text || text.trim() === '') { + return inputs; + } + + // For file read operations + const processedPaths = new Set(); + + // For nested reader operations like BufferedReader, InputStreamReader + const nestedReaderMatches = [ + ...text.matchAll(/new\s+(BufferedReader|InputStreamReader)\s*\(\s*new\s+(?:FileReader|FileInputStream)\s*\(\s*(?:new\s+File\s*\(\s*)?(['"]{1,}[\s\S]*?['"]{1,}|[a-zA-Z0-9_.]+)[\s\S]*?\)\s*\)/gim), + ]; + + for (let index = 0; index < nestedReaderMatches.length; index++) { + const match = nestedReaderMatches[index]; + const operation = match[1]; + const path = match[2].trim(); + if (!processedPaths.has(path)) { + inputs.push({ + id: `${operation} - ${path}`, + type: Constants.DependencyType.DATA, + path, + }); + processedPaths.add(path); + } + } + + // For direct file read operations + const fileReadMatches = [ + ...text.matchAll(/new\s+(FileInputStream|FileReader|Scanner)\s*\(\s*(?:new\s+File\s*\(\s*)?(['"]{1,}[\s\S]*?['"]{1,}|[a-zA-Z0-9_.]+)[\s\S]*?\)/gim), + ...text.matchAll(/Files\.(?:(read|readAllLines|readAllBytes|newBufferedReader|newInputStream))\s*\(\s*(?:Paths\.get\s*\(\s*)?\s*(['"]{1,}[\s\S]*?['"]{1,}|[a-zA-Z0-9_.]+)[\s\S]*?\)/gim), + ...text.matchAll(/((?:scala\.io\.)?Source\.fromFile)\s*\(\s*(?:new\s+File\s*\(\s*)?(['"]{1,}[\s\S]*?['"]{1,}|[a-zA-Z0-9_.]+)[\s\S]*?\)/gim), + ]; + + for (let index = 0; index < fileReadMatches.length; index++) { + const match = fileReadMatches[index]; + const operation = match[1] || 'Files.read'; + const path = match.length > 2 ? match[2].trim() : match[1].trim(); + + if (!processedPaths.has(path)) { + inputs.push({ + id: `${operation} - ${path}`, + type: Constants.DependencyType.DATA, + path, + }); + processedPaths.add(path); + } + } + + // For Spark Big Data IO operations + const sparkReadMatches = [ + ...text.matchAll(/\b(csv|parquet|json|text|textFile|orc|jdbc|load)\s*\(\s*(['"]{1,}[\s\S]*?['"]{1,}|[a-zA-Z0-9_.]+)[\s\S]*?\)/gim), + ]; + + for (let index = 0; index < sparkReadMatches.length; index++) { + const match = sparkReadMatches[index]; + const operation = match[1].trim(); + const path = match[2].trim(); + + if (!processedPaths.has(path)) { + inputs.push({ + id: `${operation} - ${path}`, + type: Constants.DependencyType.DATA, + path, + }); + processedPaths.add(path); + } + } + + // For database operations + const jdbcMatches = [ + ...text.matchAll(/DriverManager\.getConnection\s*\(\s*(['"]{1,}\s*?[\s\S]+?['"]{1,})[\s\S]*?\)/gim), + ]; + + for (let index = 0; index < jdbcMatches.length; index++) { + const match = jdbcMatches[index]; + const path = match[1].trim(); + inputs.push({ + id: `JDBC - ${path}`, + type: Constants.DependencyType.DATA, + path, + }); + } + + return inputs; + } + + getOutputs(uri, text) { + const outputs = []; + if (!text || text.trim() === '') { + return outputs; + } + + // For file write operations + const processedPaths = new Set(); + + // For nested writer operations like BufferedWriter, OutputStreamWriter + const nestedWriterMatches = [ + ...text.matchAll(/new\s+(BufferedWriter|OutputStreamWriter)\s*\(\s*new\s+(?:FileWriter|FileOutputStream)\s*\(\s*(?:new\s+File\s*\(\s*)?(['"]{1,}\s*?[\s\S]+?['"]{1,})[\s\S]*?\)\s*\)/gim), + ]; + + for (let index = 0; index < nestedWriterMatches.length; index++) { + const match = nestedWriterMatches[index]; + const operation = match[1]; + const path = match[2].trim(); + if (!processedPaths.has(path)) { + outputs.push({ + id: `${operation} - ${path}`, + type: Constants.DependencyType.DATA, + path, + }); + processedPaths.add(path); + } + } + + // For file write operations + const fileWriteMatches = [ + ...text.matchAll(/new\s+(FileOutputStream|FileWriter|PrintWriter)\s*\(\s*(?:new\s+File\s*\(\s*)?(['"]{1,}[\s\S]*?['"]{1,}|[a-zA-Z0-9_.]+)[\s\S]*?\)/gim), + ...text.matchAll(/Files\.(?:write|writeString|newBufferedWriter|newOutputStream)\s*\(\s*(?:Paths\.get\s*\(\s*)?\s*(['"]{1,}[\s\S]*?['"]{1,}|[a-zA-Z0-9_.]+)[\s\S]*?\)/gim), + ]; + + for (let index = 0; index < fileWriteMatches.length; index++) { + const match = fileWriteMatches[index]; + const operation = match[1] || 'Files.write'; + const path = match.length > 2 ? match[2].trim() : match[1].trim(); + + if (!processedPaths.has(path)) { + outputs.push({ + id: `${operation} - ${path}`, + type: Constants.DependencyType.DATA, + path, + }); + processedPaths.add(path); + } + } + + // For spark / big data write operations + const sparkWriteMatches = [ + ...text.matchAll(/\b(csv|parquet|json|text|textFile|orc|jdbc|save|saveAsTable|saveAsTextFile)\s*\(\s*(['"]{1,}[\s\S]*?['"]{1,}|[a-zA-Z0-9_.]+)[\s\S]*?\)/gim), + ]; + + for (let index = 0; index < sparkWriteMatches.length; index++) { + const match = sparkWriteMatches[index]; + const operation = match[1].trim(); + const path = match[2].trim(); + + if (!processedPaths.has(path)) { + outputs.push({ + id: `${operation} - ${path}`, + type: Constants.DependencyType.DATA, + path, + }); + processedPaths.add(path); + } + } + + // For image write operations + const imageWriteMatches = [ + ...text.matchAll(/ImageIO\.write\s*\(\s*[\s\S]*?,\s*['"]{1,}[\s\S]+?['"]{1,}\s*,\s*(?:new\s+File\s*\(\s*)?(['"]{1,}\s*?[\s\S]+?['"]{1,})[\s\S]*?\)/gim), + ]; + + for (let index = 0; index < imageWriteMatches.length; index++) { + const match = imageWriteMatches[index]; + const path = match[1].trim(); + outputs.push({ + id: `ImageIO.write - ${path}`, + type: Constants.DependencyType.FIGURE, + path, + }); + } + + // For chart export operations + const chartExportMatches = [ + ...text.matchAll(/ChartUtilities\.saveChartAs(?:JPEG|PNG)\s*\(\s*(?:new\s+File\s*\(\s*)?(['"]{1,}[\s\S]+?['"]{1,})[\s\S]*?\)/gim), + ...text.matchAll(/ChartUtils\.saveChartAs(?:JPEG|PNG)\s*\(\s*(?:new\s+File\s*\(\s*)?(['"]{1,}[\s\S]+?['"]{1,})[\s\S]*?\)/gim), + ]; + + for (let index = 0; index < chartExportMatches.length; index++) { + const match = chartExportMatches[index]; + const path = match[1].trim(); + outputs.push({ + id: `Chart Export - ${path}`, + type: Constants.DependencyType.FIGURE, + path, + }); + } + + return outputs; + } + + getLibraries(uri, text) { + const libraries = []; + if (!text || text.trim() === '') { + return libraries; + } + + const packageMatches = [ + ...text.matchAll(/package\s+([\w.]+)/gm), + ]; + + let currentPackage = ''; + if (packageMatches.length > 0) { + currentPackage = packageMatches[0][1]; + } + + const importMatches = [ + ...text.matchAll(/import\s+(?:static\s+)?([^;\r\n]+)/gm), + ]; + + for (let index = 0; index < importMatches.length; index++) { + const match = importMatches[index]; + const fullImport = match[1].trim(); + + let packageName, className; + const lastDotIndex = fullImport.lastIndexOf('.'); + + if (lastDotIndex !== -1) { + packageName = fullImport.substring(0, lastDotIndex); + className = fullImport.substring(lastDotIndex + 1); + } else { + packageName = fullImport; + className = '*'; + } + + + libraries.push({ + id: this.getLibraryId(packageName, className), + module: packageName, + import: className, + alias: null, + }); + } + + return libraries; + } +} diff --git a/app/utils/workflow.js b/app/utils/workflow.js index de99d30..64fac93 100644 --- a/app/utils/workflow.js +++ b/app/utils/workflow.js @@ -8,6 +8,7 @@ import JavaHandler from '../services/assets/handlers/java'; import RustHandler from '../services/assets/handlers/rust'; import SQLHandler from '../services/assets/handlers/sql'; import GoHandler from '../services/assets/handlers/go'; +import ScalaHandler from '../services/assets/handlers/scala'; import CppHandler from '../services/assets/handlers/cpp'; import CHandler from '../services/assets/handlers/c'; import DartHandler from '../services/assets/handlers/dart'; @@ -66,6 +67,8 @@ export default class WorkflowUtil { assetType = 'sql'; } else if (AssetUtil.getHandlerMetadata(GoHandler.id, asset.metadata)) { assetType = 'go'; + } else if (AssetUtil.getHandlerMetadata(ScalaHandler.id, asset.metadata)) { + assetType = 'scala'; } else if (AssetUtil.getHandlerMetadata(CHandler.id, asset.metadata)) { assetType = 'c'; } else if (AssetUtil.getHandlerMetadata(DartHandler.id, asset.metadata)) { @@ -371,6 +374,7 @@ export default class WorkflowUtil { WorkflowUtil._getMetadataDependencies(asset, RustHandler.id, libraries, inputs, outputs); WorkflowUtil._getMetadataDependencies(asset, SQLHandler.id, libraries, inputs, outputs); WorkflowUtil._getMetadataDependencies(asset, GoHandler.id, libraries, inputs, outputs); + WorkflowUtil._getMetadataDependencies(asset, ScalaHandler.id, libraries, inputs, outputs); WorkflowUtil._getMetadataDependencies(asset, CppHandler.id, libraries, inputs, outputs); WorkflowUtil._getMetadataDependencies(asset, CHandler.id, libraries, inputs, outputs); @@ -440,6 +444,7 @@ export default class WorkflowUtil { WorkflowUtil._getMetadataDependencies(asset, RustHandler.id, libraries, [], []); WorkflowUtil._getMetadataDependencies(asset, SQLHandler.id, libraries, [], []); WorkflowUtil._getMetadataDependencies(asset, GoHandler.id, libraries, [], []); + WorkflowUtil._getMetadataDependencies(asset, ScalaHandler.id, libraries, [], []); WorkflowUtil._getMetadataDependencies(asset, CppHandler.id, libraries, [], []); WorkflowUtil._getMetadataDependencies(asset, CHandler.id, libraries, [], []); WorkflowUtil._getMetadataDependencies(asset, DartHandler.id, libraries, [], []); diff --git a/test/services/assets/handlers/scala.spec.js b/test/services/assets/handlers/scala.spec.js new file mode 100644 index 0000000..9c4a6d9 --- /dev/null +++ b/test/services/assets/handlers/scala.spec.js @@ -0,0 +1,230 @@ +import fs from 'fs'; +import ScalaHandler from '../../../../app/services/assets/handlers/scala'; +import Constants from '../../../../app/constants/constants'; + +jest.mock('fs'); + +describe('services', () => { + describe('ScalaHandler', () => { + afterEach(() => { + jest.restoreAllMocks(); + jest.clearAllMocks(); + }); + + describe('id', () => { + it('should return an id that matches the class name plus StatWrap pseudo-namespace', () => { + expect(new ScalaHandler().id()).toEqual(`StatWrap.${ScalaHandler.name}`); + }); + }); + + describe('includeFile', () => { + it('should include Scala files and exclude others', () => { + const handler = new ScalaHandler(); + // Valid files + expect(handler.includeFile('/path/to/Test.scala')).toBeTruthy(); + expect(handler.includeFile('/path/to/Main.SCALA')).toBeTruthy(); + + // Invalid files + expect(handler.includeFile('/path/to/Test.class')).toBeFalsy(); + expect(handler.includeFile('/path/to/app.jar')).toBeFalsy(); + expect(handler.includeFile(null)).toBeFalsy(); + expect(handler.includeFile('/path/to/Test.scala.bak')).toBeFalsy(); + }); + }); + + describe('getLibraries', () => { + it('should extract import statements', () => { + const libraries = new ScalaHandler().getLibraries( + 'test.uri', + 'import scala.collection.mutable.ListBuffer\nimport java.io.File' + ); + expect(libraries.length).toEqual(2); + expect(libraries[0]).toMatchObject({ + id: 'scala.collection.mutable.ListBuffer', + module: 'scala.collection.mutable', + import: 'ListBuffer', + alias: null, + }); + expect(libraries[1]).toMatchObject({ + id: 'java.io.File', + module: 'java.io', + import: 'File', + alias: null, + }); + }); + + it('should detect wildcard imports', () => { + const libraries = new ScalaHandler().getLibraries( + 'test.uri', + 'import scala.collection.mutable._' + ); + expect(libraries.length).toEqual(1); + expect(libraries[0]).toMatchObject({ + id: 'scala.collection.mutable._', + module: 'scala.collection.mutable', + import: '_', + alias: null, + }); + }); + + it('should detect import statements with semicolons', () => { + const libraries = new ScalaHandler().getLibraries( + 'test.uri', + 'import java.lang.Math.PI;' + ); + expect(libraries.length).toEqual(1); + expect(libraries[0]).toMatchObject({ + id: 'java.lang.Math.PI', + module: 'java.lang.Math', + import: 'PI', + alias: null, + }); + }); + }); + + describe('getInputs', () => { + it('should detect file read operations', () => { + const inputs = new ScalaHandler().getInputs( + 'test.uri', + 'val fis = new FileInputStream("input.txt")' + ); + expect(inputs.length).toEqual(1); + expect(inputs[0]).toMatchObject({ + id: 'FileInputStream - "input.txt"', + type: 'data', + path: '"input.txt"', + }); + }); + + it('should detect various file read classes', () => { + const inputs = new ScalaHandler().getInputs( + 'test.uri', + ` + val fis = new FileInputStream("input1.txt") + val fr = new FileReader("input2.txt") + val br = new BufferedReader(new FileReader("input3.txt")) + val scanner = new Scanner(new File("input4.txt")) + val data = Files.readAllBytes(Paths.get("input5.txt")) + val source = Source.fromFile("input6.txt") + val df = spark.read.csv("input7.csv") + val rdd = sc.textFile("input8.txt") + val dfChained = spark.read.option("header", "true").csv("input9.csv") + ` + ); + expect(inputs.length).toEqual(9); + expect(inputs[0].id).toContain('BufferedReader'); + expect(inputs[1].id).toContain('FileInputStream'); + expect(inputs[2].id).toContain('FileReader'); + expect(inputs[3].id).toContain('Scanner'); + expect(inputs[4].id).toContain('readAllBytes'); + expect(inputs[5].id).toContain('Source.fromFile'); + expect(inputs[6].id).toContain('csv'); + expect(inputs[7].id).toContain('textFile'); + expect(inputs[8].id).toContain('csv'); + }); + + it('should detect JDBC connections', () => { + const inputs = new ScalaHandler().getInputs( + 'test.uri', + 'val conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/demodatabase")' + ); + expect(inputs.length).toEqual(1); + expect(inputs[0]).toMatchObject({ + id: 'JDBC - "jdbc:mysql://localhost:3306/demodatabase"', + type: 'data', + path: '"jdbc:mysql://localhost:3306/demodatabase"', + }); + }); + }); + + describe('getOutputs', () => { + it('should detect file write operations', () => { + const outputs = new ScalaHandler().getOutputs( + 'test.uri', + ` + val fos = new FileOutputStream("output1.txt") + val fw = new FileWriter("output2.txt") + val bw = new BufferedWriter(new FileWriter("output3.txt")) + val pw = new PrintWriter("output4.txt") + ` + ); + expect(outputs.length).toEqual(4); + expect(outputs[0].id).toContain('BufferedWriter'); + expect(outputs[1].id).toContain('FileOutputStream'); + expect(outputs[2].id).toContain('FileWriter'); + expect(outputs[3].id).toContain('PrintWriter'); + }); + + it('should detect various file write operations', () => { + const outputs = new ScalaHandler().getOutputs( + 'test.uri', + ` + val fos = new FileOutputStream("output1.txt") + val fw = new FileWriter("output2.txt") + val pw = new PrintWriter("output4.txt") + df.write.parquet("output5.parquet") + df.write.mode("overwrite").parquet("output6.parquet") + ` + ); + expect(outputs.length).toEqual(5); + expect(outputs[0].id).toContain('FileOutputStream'); + expect(outputs[1].id).toContain('FileWriter'); + expect(outputs[2].id).toContain('PrintWriter'); + expect(outputs[3].id).toContain('parquet'); + expect(outputs[4].id).toContain('parquet'); + }); + + it('should detect image write operations', () => { + const outputs = new ScalaHandler().getOutputs( + 'test.uri', + 'ImageIO.write(bufferedImage, "png", new File("chart.png"))' + ); + expect(outputs.length).toEqual(1); + expect(outputs[0]).toMatchObject({ + id: 'ImageIO.write - "chart.png"', + type: 'figure', + path: '"chart.png"', + }); + }); + + it('should detect chart export operations', () => { + const outputs = new ScalaHandler().getOutputs( + 'test.uri', + ` + ChartUtilities.saveChartAsPNG(new File("chart1.png"), chart, 500, 300) + ChartUtils.saveChartAsJPEG(new File("chart2.jpg"), chart, 500, 300) + ` + ); + expect(outputs.length).toEqual(2); + expect(outputs[0].id).toContain('chart1.png'); + expect(outputs[0].type).toEqual('figure'); + expect(outputs[1].id).toContain('chart2.jpg'); + expect(outputs[1].type).toEqual('figure'); + }); + }); + + describe('scan', () => { + it('should return metadata for a valid Scala file', () => { + fs.readFileSync.mockReturnValue('import scala.collection.mutable.ListBuffer\nclass Test {}'); + + const testAsset = { + uri: '/path/to/Test.scala', + type: 'file', + metadata: [], + }; + + const response = new ScalaHandler().scan(testAsset); + expect(response.metadata[0]).toMatchObject({ + id: 'StatWrap.ScalaHandler', + libraries: [ + { + id: 'scala.collection.mutable.ListBuffer', + module: 'scala.collection.mutable', + import: 'ListBuffer', + } + ] + }); + }); + }); + }); +});