import matplotlib.pyplot as plt import numpy as np import seaborn as sns from difflib import Differ import pandas as pd # Example collision data collisions = [ { "colliding_token_sequence": [265, 393, 320], "num_raw_variants": 21, "raw_chunk_variants": [ "\nif __name__ == '_", "\nif __n", "\nif __name__ == '__main", "\nif __name__ == '__main__'", "\nif __", "\nif _", "\nif __name__ ", "\nif __name__ =", "\nif __name__ == '__", "\nif __na", "\nif __name__", "\nif __name", "\nif __name__ == '__main__':", "\nif __name__ == '__main__':\n", "\nif __nam", "\nif __name_", "\nif __name__ == ", "\nif __name__ == '__ma", "\nif __name__ == '__main_" ], "levenshtein_analysis": { "distances": [11, 5, 8, 12, 13, 5, 4, 1, 10, 6, 8, 9, 10, 9, 7, 2, 3, 6, 7, 3], "average_distance": 8.74, "max_distance": 23, "min_distance": 1 } }, { "colliding_token_sequence": [506, 354, 256], "num_raw_variants": 2, "raw_chunk_variants": [ "数据", "数�" ], "levenshtein_analysis": { "distances": [0, 1], "average_distance": 0.5, "max_distance": 1, "min_distance": 0 } }, { "colliding_token_sequence": [123, 456, 789], "num_raw_variants": 4, "raw_chunk_variants": [ " } ", " }\r\n ", "!", " }\r\n " ], "levenshtein_analysis": { "distances": [2, 1, 4, 7], "average_distance": 3.5, "max_distance": 7, "min_distance": 1 } } ] # --- 1. Plot Text Diff (simplified) --- def plot_text_diff(variants, title, save_path): differ = Differ() diff = list(differ.compare(variants[0].splitlines(), variants[1].splitlines())) fig, ax = plt.subplots(figsize=(8, 6)) ax.set_title(title) colors = {"+": "red", "-": "green", " ": "blue"} for i, line in enumerate(diff): color = colors.get(line[0], "black") ax.text(0, i, line, color=color, fontsize=12, va='top', ha='left') plt.axis("off") plt.savefig(save_path, bbox_inches="tight") plt.close(fig) # --- 2. Plot LCP Ratio for Variants --- def plot_lcp_ratios(lcp_ratios, title, save_path): plt.figure(figsize=(8, 6)) sns.barplot(x=list(range(len(lcp_ratios))), y=lcp_ratios, color="skyblue") plt.title(title) plt.xlabel('Variant Index') plt.ylabel('LCP Ratio') plt.savefig(save_path, bbox_inches="tight") plt.close() # --- 3. Levenshtein Distance Distribution --- def plot_levenshtein_distances(distances, title, save_path): plt.figure(figsize=(8, 6)) sns.histplot(distances, bins=10, kde=True, color="salmon") plt.title(title) plt.xlabel('Levenshtein Distance') plt.ylabel('Frequency') plt.savefig(save_path, bbox_inches="tight") plt.close() # Plot for the first collision case (Example) for i in range(3): collision = collisions[i] plot_text_diff(collision["raw_chunk_variants"], f"Text Difference Visualization Case {i+1}", f"text_diff_case{i+1}.png") plot_lcp_ratios([collision["levenshtein_analysis"]["average_distance"]]*collision["num_raw_variants"], f"LCP Ratio of Variants Case {i+1}", f"lcp_case{i+1}.png") plot_levenshtein_distances(collision["levenshtein_analysis"]["distances"], "Levenshtein Distance Distribution", f"levenshtein_case{i+1}.png") # You can repeat the plotting for other cases as needed print("Plots generated successfully!")