Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 29 additions & 13 deletions Analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,17 @@ def __init__(self, filename: str, parsed_data: Parser, total_lines: int, out_dir
"""
self.filename = filename
self.parsed_data = parsed_data

# ! Outdated branch pattern detection
# self.branchV1_detector = BranchV1(filename, total_lines, directory_name)
self.branchV2_detector = BranchV2(filename, parsed_data.arch.name, parsed_data.opt, directory_name, sensitivity=4)
self.branchV2_detector = BranchV2(filename, parsed_data.arch.name, parsed_data.opt, directory_name, directory_name, sensitivity=4)
self.constant_detector = ConstantCoding(filename, parsed_data.arch.name, parsed_data.opt, total_lines, directory_name, sensitivity=4)
self.loop_detector = LoopCheck(filename, parsed_data.arch.name, parsed_data.opt, total_lines, directory_name)
self.bypass_detector = Bypass(filename, parsed_data.arch.name, parsed_data.opt, total_lines, directory_name)
# TODO: Instantiate a list of detectors, then iterate on detectors for other functions
# Doing the above avoids the below condition on subsequent function calls
if parsed_data.arch.name == "x86":
self.bypass_detector = None
# if self.create_directory(console):
self.static_analysis()

Expand Down Expand Up @@ -54,7 +58,8 @@ def static_analysis(self) -> None:
self.branchV2_detector.analysis(line)
self.constant_detector.analysis(line)
self.loop_detector.analysis(line)
self.bypass_detector.analysis(line)
if self.bypass_detector:
self.bypass_detector.analysis(line)
elif type(line) == Location:
self.constant_detector.analysis(line)
self.loop_detector.analysis(line)
Expand All @@ -80,8 +85,9 @@ def print_analysis_results(self, console: Console) -> None:
console.print(f"[Pattern] [bright_yellow]LoopCheck[/bright_yellow]\n")
self.loop_detector.print_results(console)

console.print(f"[Pattern] [bright_yellow]Bypass[/bright_yellow]\n")
self.bypass_detector.print_results(console)
if self.bypass_detector:
console.print(f"[Pattern] [bright_yellow]Bypass[/bright_yellow]\n")
self.bypass_detector.print_results(console)

def save_and_print_analysis_results(self, console: Console) -> None:
"""
Expand All @@ -101,14 +107,18 @@ def save_and_print_analysis_results(self, console: Console) -> None:
self.loop_detector.save_and_print_results(console)
console.print(f"Saved")

console.print(f"Saving Bypass...")
self.bypass_detector.save_and_print_results(console)
console.print(f"Saved")
if self.bypass_detector:
console.print(f"Saving Bypass...")
self.bypass_detector.save_and_print_results(console)
console.print(f"Saved")

def print_total_vulnerable_lines(self, console: Console) -> None:
# total number of vulnerable lines
total_vulnerable_lines = (len(self.branchV2_detector.vulnerable_instructions) + len(self.constant_detector.vulnerable_instructions)
+ len(self.loop_detector.vulnerable_instructions) + len(self.bypass_detector.vulnerable_set))
+ len(self.loop_detector.vulnerable_instructions))

if self.bypass_detector:
total_vulnerable_lines += len(self.bypass_detector.vulnerable_set)
print(f"Total number of vulnerable lines: {total_vulnerable_lines}")

# total number of branch faults
Expand All @@ -124,9 +134,15 @@ def print_total_vulnerable_lines(self, console: Console) -> None:
console.print(f"\tTotal number of Loop Check vulnerabilities: {total_loop_faults}")

# total number of bypass faults
total_bypass_faults = len(self.bypass_detector.vulnerable_set)
console.print(f"\tTotal number of Bypass vulnerabilities: {total_bypass_faults}")
if self.bypass_detector:
total_bypass_faults = len(self.bypass_detector.vulnerable_set)
console.print(f"\tTotal number of Bypass vulnerabilities: {total_bypass_faults}")

def get_total_vulnerable_lines(self) -> int:
return (len(self.branchV2_detector.vulnerable_instructions) + len(self.constant_detector.vulnerable_instructions)
+ len(self.loop_detector.vulnerable_instructions) + len(self.bypass_detector.vulnerable_set))
total_lines = (len(self.branchV2_detector.vulnerable_instructions) + len(self.constant_detector.vulnerable_instructions)
+ len(self.loop_detector.vulnerable_instructions))

if self.bypass_detector:
total_lines += len(self.bypass_detector.vulnerable_set)

return total_lines
100 changes: 91 additions & 9 deletions Parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import re
from subprocess import check_output
from capstone import *
from rich.console import Console
from elftools.elf.elffile import ELFFile

from constants import optimization_levels
from constants import Architectures, BinaryModes, optimization_levels

class Register():
def __init__(self, name: str):
Expand Down Expand Up @@ -97,7 +100,10 @@ def set_arguments(self, value: str):
if indicator != -1:
# The string has two args
# The +2 here is because of the ', #' that will be at index "indicator"
args["offset"] = IntegerLiteral(int(value[value.index('#')+1:]))
try:
args["offset"] = IntegerLiteral(int(value[value.index('#')+1:]))
except ValueError:
args["offset"] = IntegerLiteral(int(value[value.index('#')+1:], 16))

return args

Expand Down Expand Up @@ -157,15 +163,66 @@ def __init__(self, file: str, console: Console):
self.total_lines: int = 0

self.arch = Architecture(line=None, instruction=None)
self.opt : str
self.opt : str = "O0"
self.is_binary = False

self.parseFile(console)

def parseFile(self, console: Console):
console.log(f"Reading file: {self.filename}")
with open(self.filename) as f:
lines: list[str] = f.readlines()
lines: list[str] = []

# Source file parsing
if self.__is_file_source(self.filename):
with open(self.filename, mode="r") as source_file:
lines = source_file.readlines()
else:
self.is_binary = True
with open(self.filename, mode="rb") as binary_file:
elf_file = ELFFile(binary_file)
text_section = elf_file.get_section_by_name(".text")
data_section = elf_file.get_section_by_name(".data")
rodata_section = elf_file.get_section_by_name(".rodata")
# Sections print for debugging
for section in elf_file.iter_sections():
print(hex(section["sh_addr"]), section.name)

symtab = elf_file.get_section_by_name(".symtab")
for i in range(5):
print("symbol #{} - {}".format(i, symtab.get_symbol(i).name))

main_offset = symtab.get_symbol_by_name("main")[0].entry["st_value"]
main_size = symtab.get_symbol_by_name("main")[0].entry["st_value"]

# Code
ops = text_section.data()
addr = text_section["sh_addr"]

# Global Vars
dops = data_section.data()
daddr = data_section["sh_addr"]

# Strings
rdops = rodata_section.data()
rdaddr = rodata_section["sh_addr"]

# Determine architecture and mode for Capstone
file_target_system = self.__determine_binary_architecture(elf_file)
self.arch.architecture_found(file_target_system.name.lower())
file_mode = None
if file_target_system == Architectures.ARM:
file_mode = CS_MODE_ARM
else:
file_mode = self.__determine_binary_mode(elf_file).value

md = Cs(file_target_system.value, file_mode)
# Dissassemble and store lines
lines = []
for i in md.disasm(code=ops, offset=addr):
# lines.append("{} {}".format(i.mnemonic, i.op_str))
print("0x%x:\t%s\t%s" %(i.address, i.mnemonic, i.op_str))
console.log(f"[green]File read successfully![/green]\n")


console.log(f"Processing assembly data:")
self.isolateSections(lines)
Expand All @@ -175,7 +232,9 @@ def isolateSections(self, lines: list[str]):
program = []
line_number = 1

global attribute_1, attribute_2 # For determining optimization level
# For determining optimization level
attribute_1 = None
attribute_2 = None
for line in lines:
s = line.strip()
# Line is a location
Expand All @@ -195,10 +254,10 @@ def isolateSections(self, lines: list[str]):
elif s.startswith(".ident"):
break
# if line starts with .eabi_attribute 30, we get 1st attribute for optimization level
elif s.startswith(".eabi_attribute 30"):
elif s.startswith(".eabi_attribute 30") and not self.is_binary:
attribute_1 = self.get_eabi_attribute(s)
# if line starts with .eabi_attribute 23, we get 2nd attribute for optimization level
elif s.startswith(".eabi_attribute 23"):
elif s.startswith(".eabi_attribute 23") and not self.is_binary:
attribute_2 = self.get_eabi_attribute(s)
# Line is an instruction
else:
Expand Down Expand Up @@ -241,7 +300,11 @@ def parseArguments(self, line: str, line_number: int):

# Check if a number
if self.isNumber(arg):
arguments.append(IntegerLiteral(int(arg[1:] if arg.startswith('#') or arg.startswith('$') else arg)))
try:
arguments.append(IntegerLiteral(int(arg[1:] if arg.startswith('#') or arg.startswith('$') else arg)))
except ValueError:
arguments.append(IntegerLiteral(int(arg[1:] if arg.startswith('#') or arg.startswith('$') else arg, 16)))

# ! This notation can also be used in ARM for LDR
elif re.search(r"\.long|\.value", instruction) and self.isNumber(arg):
# in case its a global variable
Expand Down Expand Up @@ -282,3 +345,22 @@ def get_eabi_attribute(self, s: str):
return int(tag_match.group(2))
else:
return None

def __is_file_source(self, file_path: str):
"""Uses `file` command on the provided file path and determines its type from the command output

Args:
file_path (str): Path to the file being analyzed
"""
file_output = check_output(["file", file_path]).decode()

if re.match(r".*ASCII\stext\s.*", file_output):
# File is source
return True
return False

def __determine_binary_mode(self, elf_file: ELFFile):
return BinaryModes.from_elf_class(elf_file.elfclass)

def __determine_binary_architecture(self, elf_file: ELFFile):
return Architectures.from_elf_machine(elf_file.header.get("e_machine", ""))
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ The entrypoint to the program, `main.py`, serves as a central location to utiliz

Main parsing module. Intended to parse assembly code. It combs through the source code and creates objects depending on what it encounters. Once the source code is transformed into a list of objects, it can be more easily worked with to discover patterns. It uses Python’s type hints to be more transparent.

> If passed a compiled binary, the parser uses [capstone](https://www.capstone-engine.org/) in combination with [pyefltools](https://github.com/eliben/pyelftools) to dissassemble and parse the binary.

`Locations` are spots in the code that can be referenced and jumped to. Example: .LC0 and main.

`IntegerLiterals` are integers. In 32-bit syntax, these are prefaced with a “#”
Expand Down
16 changes: 16 additions & 0 deletions constants/Architectures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from enum import Enum


class Architectures(Enum):
ARM = 0
X86 = 3
UNKNOWN = 9

@staticmethod
def from_elf_machine(elf_machine: str):
if "ARM" in elf_machine:
return Architectures.ARM
elif "X86" in elf_machine:
return Architectures.X86
else:
return Architectures.UNKNOWN
17 changes: 17 additions & 0 deletions constants/BinaryModes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from enum import Enum


class BinaryModes(Enum):
# NOTE: These values match their Capstone Modes (CS_MODE) counterparts
B32 = 4
B64 = 8
UNKNOWN = 9

@staticmethod
def from_elf_class(elf_class: int):
if elf_class == 32:
return BinaryModes.B32
elif elf_class == 64:
return BinaryModes.B64
else:
return BinaryModes.UNKNOWN
4 changes: 3 additions & 1 deletion constants/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
from .patterns import *
from .trivialValues import *
from .trivialValues import *
from .Architectures import *
from .BinaryModes import *
3 changes: 2 additions & 1 deletion constants/patterns.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
pattern_list = {
"x86": {
"branch": ['cmpl', ['jne', 'je', 'jnz', 'jz']], #cmp??
"branch": [['cmpl'], ['jne', 'je', 'jnz', 'jz'], []], #cmp??
"constant_coding": ['movl', 'movq', 'movw', '.value', ".long"],
"loop_check": ['cmpl', 'cmpl', 'j'], #cmpb, cmp??
"bypass": [[], []]
},
"arm": {
"branch": [['cmp', 'subs', 'rsbs'], ['beq', 'bne', 'bcs', 'bhs', 'bcc', 'blo', 'bmi', 'bpl', 'bvs', 'bvc', 'bhi', 'bls', 'bge',
Expand Down
5 changes: 5 additions & 0 deletions docs/developer_notes/architecture_and_compilers.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
- [Architectures](#architectures)
- [Cross-compiling](#cross-compiling)
- [ARM](#arm)
- [RISC-V](#risc-v)
- [x86](#x86)
- [Tool Chain Conventions](#tool-chain-conventions)

Expand Down Expand Up @@ -44,6 +45,10 @@ In order to compile the dataset, scripts will be provided in order to have both
arm-none-eabi-gcc -S -o filename.s /path/to/filename.c
```

```bash
arm-none-eabi-gcc --specs=nosys.specs -o filename /path/to/filename.c
```

### RISC-V
The RISC-V GCC toolchain and its installation instructions can be found at this [link](https://github.com/riscv-collab/riscv-gnu-toolchain). Once you have installed the toolchain successfully, you can create Assembly binaries with the following command:
```bash
Expand Down
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
capstone
rich
setuptools
rich
pyelftools
Loading