-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdebug_labeling_visualizer.py
More file actions
87 lines (68 loc) · 2.76 KB
/
debug_labeling_visualizer.py
File metadata and controls
87 lines (68 loc) · 2.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import os
import sys
# Ensure src can be imported
sys.path.append(os.getcwd())
from src.data_process.dataset_builder import DatasetBuilder
from src.config import config
from src.core.constants import EXT_TO_LANG
def debug_specific_file(target_file_name):
"""
Scans the input project, finds the specific file, and prints how it is labeled.
"""
builder = DatasetBuilder()
input_root = config.project.input_root
# Find file
target_path = None
for root, dirs, files in os.walk(input_root):
if target_file_name in files:
target_path = os.path.join(root, target_file_name)
break
if not target_path:
print(f"File {target_file_name} not found in {input_root}")
return
print(f"=== Debugging Labeling Logic for: {target_file_name} ===")
# Read and Parse
content = builder.file_manager.read_file(target_path)
tree = builder.parser.parse_code(content, target_path)
functions = builder.parser.extract_functions(tree, target_path)
ext = os.path.splitext(target_path)[1]
lang = EXT_TO_LANG.get(ext)
for func in functions:
print(f"\n--- Function: {func['name']} ---")
lines = func['code'].split('\n')
i = 0
line_display_idx = 0
while i < len(lines):
line_content = lines[i]
# 1. Skip empty lines (Simulation of DatasetBuilder logic)
if not line_content.strip():
print(f"{line_display_idx+1:03d} [SKIP] {line_content}")
i += 1
continue
# 2. Skip existing Log lines (They are removed from input, serve as labels for prev line)
if builder.is_log_line(line_content, lang):
print(f"{line_display_idx+1:03d} [LOG ] {line_content} <-- This line is REMOVED from input")
i += 1
continue
# 3. Determine Label for current line
# Look ahead to see if next valid line is a log
label = 0
j = i + 1
has_log_after = False
while j < len(lines):
next_line = lines[j]
if not next_line.strip(): # Skip empty lookahead
j += 1
continue
if builder.is_log_line(next_line, lang):
has_log_after = True
break
label_str = "🔴 1 (INSERT HERE)" if has_log_after else "⚪ 0"
print(f"{line_display_idx+1:03d} [{label_str}] {line_content}")
i += 1
line_display_idx += 1
if __name__ == "__main__":
# Replace with a filename you know has logs in your dataset
# e.g., "Axis2BackEndServerController.java"
target_file = "Axis2BackEndServerController.java"
debug_specific_file(target_file)