Acoda/utils.py at main · security-pride/Acoda · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
from python_transformer import PythonTransformerBase, ParentAwareTransformer, GiveErrorSummary, InductionAnnotation, InsertUnsafeCode, FunctionClassRenamer, VariableRenamer, StringObfuscator, TryExceptWrapper, StopInference

from evaluator import safe_eval, deobfuscate_code as deobfuscate_code_by_llm, quantification_deobfuscation as quanti_deobf

from typing import List, Optional, Dict, Any
import logging
import random
import re
import os
import json
from datetime import datetime


logger = logging.getLogger(__name__)


def apply_give_error_summary(code: str, position: int = -1) -> str:
    logger.debug(f"Applying GiveErrorSummary transformation, position: {position}, code length: {len(code)}")
    if not code.strip():
        logger.warning("Empty code provided to apply_give_error_summary")
        return code
    try:
        transformer = GiveErrorSummary()
        result = transformer.insert_error_summary(code, position)
        logger.info(f"GiveErrorSummary transformation successful, result length: {len(result)}")
        return result
    except Exception as e:
        logger.error(f"Error in apply_give_error_summary: {e}")
        raise

def apply_induction_annotation(code: str, position: int = -1, ratio: float = 0.5) -> str:
    logger.debug(f"Applying InductionAnnotation transformation, ratio: {ratio}, code length: {len(code)}")
    if not code.strip():
        logger.warning("Empty code provided to apply_induction_annotation")
        return code
    try:
        transformer = InductionAnnotation()
        result = transformer.insert_induction_annotations(code)
        logger.info(f"InductionAnnotation transformation successful, result length: {len(result)}")
        return result
    except Exception as e:
        logger.error(f"Error in apply_induction_annotation: {e}")
        raise

def apply_insert_unsafe_code(code: str) -> List[str]:
    logger.debug(f"Applying InsertUnsafeCode transformation, code length: {len(code)}")
    if not code.strip():
        logger.warning("Empty code provided to apply_insert_unsafe_code")
        return [code]

    try:
        transformer = InsertUnsafeCode()
        blocks = transformer.get_available_blocks()
        logger.debug(f"Available unsafe code blocks: {blocks}")

        ob_code = []
        for block in blocks:
            try:
                result = transformer.insert_unsafe_code(code, [block])
                ob_code.append(result)
                logger.debug(f"Successfully inserted unsafe code block '{block}'")
            except Exception as e:
                logger.error(f"Error inserting unsafe code block '{block}': {e}")

        logger.info(f"InsertUnsafeCode transformation completed, generated {len(ob_code)} variants")
        return ob_code
    except Exception as e:
        logger.error(f"Error in apply_insert_unsafe_code: {e}")
        raise

def apply_rename_function_class_name(code: str) -> str:
    logger.debug(f"Applying FunctionClassRenamer transformation, code length: {len(code)}")
    if not code.strip():
        logger.warning("Empty code provided to apply_rename_function_class_name")
        return code
    try:
        transformer = FunctionClassRenamer()
        result = transformer.rename_function_class_name(code)
        logger.info(f"FunctionClassRenamer transformation successful, result length: {len(result)}")
        return result
    except Exception as e:
        logger.error(f"Error in apply_rename_function_class_name: {e}")
        raise

def apply_rename_variable_name(code: str) -> str:
    logger.debug(f"Applying VariableRenamer transformation, code length: {len(code)}")
    if not code.strip():
        logger.warning("Empty code provided to apply_rename_variable_name")
        return code
    try:
        transformer = VariableRenamer()
        result = transformer.rename_variable_name(code)
        logger.info(f"VariableRenamer transformation successful, result length: {len(result)}")
        return result
    except Exception as e:
        logger.error(f"Error in apply_rename_variable_name: {e}")
        raise

def apply_string_obfuscation(code: str) -> str:
    logger.debug(f"Applying StringObfuscator transformation, code length: {len(code)}")
    if not code.strip():
        logger.warning("Empty code provided to apply_string_obfuscation")
        return code
    try:
        transformer = StringObfuscator()
        result = transformer.obfuscate_strings(code)
        logger.info(f"StringObfuscator transformation successful, result length: {len(result)}")
        return result
    except Exception as e:
        logger.error(f"Error in apply_string_obfuscation: {e}")
        raise

def apply_try_except_wrapper(code: str, entire_code: bool = False) -> str:
    logger.debug(f"Applying TryExceptWrapper transformation, code length: {len(code)}")
    if not code.strip():
        logger.warning("Empty code provided to apply_try_except_wrapper")
        return code
    try:
        transformer = TryExceptWrapper()
        result = transformer.wrap_with_try_except(code)
        logger.info(f"TryExceptWrapper transformation successful, result length: {len(result)}")
        return result
    except Exception as e:
        logger.error(f"Error in apply_try_except_wrapper: {e}")
        raise

def apply_stop_inference(code: str, randomize_position: bool = True, multiple_insertions: bool = True, max_insertions: int = 3) -> str:
    logger.debug(f"Applying StopInference transformation, code length: {len(code)}")
    if not code.strip():
        logger.warning("Empty code provided to apply_stop_inference")
        return code
    try:
        transformer = StopInference()
        result = transformer.stop_inference(code, randomize_position, multiple_insertions, max_insertions)
        logger.info(f"StopInference transformation successful, result length: {len(result)}")
        return result
    except Exception as e:
        logger.error(f"Error in apply_stop_inference: {e}")
        raise

def transform_code(sample, code: str, obfuscation_method: str, **kwargs) -> Optional[str]:
    input_data = sample.get('input', '')
    expected_output = sample.get('output', '')

    logger.info(f"Starting code transformation with method: {obfuscation_method}")
    logger.debug(f"Input code length: {len(code)}")

    if not code.strip():
        logger.warning("Empty code provided to transform_code")
        return None

    # Map of available obfuscation methods
    method_map = {
        "give_error_summary": apply_give_error_summary,
        "induction_annotation": apply_induction_annotation,
        "insert_unsafe_code": apply_insert_unsafe_code,
        "rename_function_class_name": apply_rename_function_class_name,
        "rename_variable_name": apply_rename_variable_name,
        "string_obfuscation": apply_string_obfuscation,
        "try_except_wrapper": apply_try_except_wrapper,
        "stop_inference": apply_stop_inference
    }

    if obfuscation_method not in method_map:
        logger.error(f"Unknown obfuscation method: {obfuscation_method}")
        return None

    # Try obfuscation up to 3 times
    for attempt in range(1, 4):
        try:
            logger.debug(f"Attempt {attempt} for method {obfuscation_method}")

            # Apply the obfuscation method
            obfuscation_func = method_map[obfuscation_method]

            # Handle methods that return lists (like insert_unsafe_code)
            if obfuscation_method == "insert_unsafe_code":
                obfuscated_results = obfuscation_func(code)
                if not obfuscated_results:
                    logger.warning(f"Method {obfuscation_method} returned empty results on attempt {attempt}")
                    continue
                obfuscated_code = random.choice(obfuscated_results)
            else:
                # Handle methods with additional parameters
                if obfuscation_method == "try_except_wrapper":
                    entire_code = kwargs.get("entire_code", False)
                    obfuscated_code = obfuscation_func(code, entire_code=entire_code)
                elif obfuscation_method == "give_error_summary":
                    position = kwargs.get("position", -1)
                    obfuscated_code = obfuscation_func(code, position=position)
                elif obfuscation_method == "induction_annotation":
                    position = kwargs.get("position", -1)
                    ratio = kwargs.get("ratio", 0.5)
                    obfuscated_code = obfuscation_func(code, position=position, ratio=ratio)
                else:
                    obfuscated_code = obfuscation_func(code)

            if not obfuscated_code or not obfuscated_code.strip():
                logger.warning(f"Method {obfuscation_method} returned empty code on attempt {attempt}")
                continue

            logger.debug(f"Obfuscated code length: {len(obfuscated_code)}")

            # Validate the obfuscated code using safe_eval
            logger.debug(f"Validating obfuscated code on attempt {attempt}")
            validation_result = safe_eval(obfuscated_code, input_data, expected_output)

            if validation_result:
                logger.info(f"Obfuscation successful and validated on attempt {attempt}")
                return obfuscated_code
            else:
                logger.warning(f"Validation failed on attempt {attempt}")

        except Exception as e:
            logger.error(f"Error during obfuscation attempt {attempt}: {e}")

    # If all attempts failed
    logger.error(f"All 3 attempts failed for method {obfuscation_method}")
    logger.error(f"Problem_id: {sample.get('problem_id', 'unknown')}")
    logger.error(f"Submission_id: {sample.get('submission_id','unknown')}")
    logger.error(f"Failed code: {code}")
    logger.error(f"Method: {obfuscation_method}")
    logger.error(f"Input data: {input_data}")
    logger.error(f"Expected output: {expected_output}")

    return None


def deobfuscate_code(obfuscated_code: str, is_local: bool = True, model_name: str = "deepseek-coder:6.7B") -> Optional[str]:
    """ Deobfuscate code using the specified model. """
    logger.info(f"Starting code deobfuscation using model: {model_name} (local: {is_local})")
    logger.debug(f"Obfuscated code length: {len(obfuscated_code)}")

    if not obfuscated_code.strip():
        logger.warning("Empty obfuscated code provided to deobfuscate_code")
        return None

    try:
        # Call the evaluator's deobfuscate_code function
        deobfuscated_code = deobfuscate_code_by_llm(
            code=obfuscated_code,
            is_local=is_local,
            model_name=model_name,
            lang="Python"
        )

        if deobfuscated_code and deobfuscated_code.strip():
            logger.info(f"Deobfuscation successful, result length: {len(deobfuscated_code)}")
            return deobfuscated_code
        else:
            logger.warning("Deobfuscation returned empty result")
            return None

    except Exception as e:
        logger.error(f"Error during deobfuscation: {e}")
        return None

def quantification_deobfuscation(sample, obfuscated_code: str, deobfuscated_code: str, model_name: str = "") -> bool:
    return quanti_deobf(sample, obfuscated_code, deobfuscated_code, model_name=model_name)


def verify_against_obfuscation(sample, obfuscated_code: str) -> bool:
    """ Verify if the deobfuscated code matches the original code. """
    logger.info("Starting verification against obfuscation")

    deobfuscated_code = deobfuscate_code_by_llm(code=obfuscated_code, is_local=False, model_name="gpt-4o", lang="Python")

    logger.info(f"Deobfuscated code with GPT:\n{deobfuscated_code}")

    qd = quantification_deobfuscation(sample, obfuscated_code, deobfuscated_code, model_name="gpt-4o")
    return qd

def save_best_individuals_to_results(best_individual, results_dir: str = "results"):
    if not best_individual:
        logger.warning("No individual provided to save")
        return

    # Create date-based directory structure

    os.makedirs(results_dir, exist_ok=True)

    # Get sample info from best individual
    problem_id = best_individual.sample.get('problem_id', 'unknown')
    submission_id = best_individual.sample.get('submission_id', 'unknown')

    # Generate filename based on problem_id and submission_id
    filename = "best_individual.jsonl"
    filepath = os.path.join(results_dir, filename)

    # Process the best individual and write to JSONL file
    try:
        with open(filepath, 'a', encoding='utf-8') as f:
            individual_data = {
                "problem_id": problem_id,
                "submission_id": submission_id,
                "genes": best_individual.genes,
                "obfuscated_code": best_individual.obfuscated_code,
                "adversarial_score": best_individual.adversarial_score,
                "quantification_scores": best_individual.quantification_scores,
                "deobfuscated_code": best_individual.deobfuscated_code
                }
            f.write(json.dumps(individual_data, ensure_ascii=False) + '\n')

        logger.info(f"Saved best individual to {filepath}")
        logger.info(f"Problem ID: {problem_id}, Submission ID: {submission_id}")

    except Exception as e:
        logger.error(f"Error saving best individual to {filepath}: {e}")
        raise