Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ on:
- '**.md'

jobs:
build-and-test:
build-and-test-aarch64:
runs-on: ubuntu-22.04-arm
name: Build-and-Run
name: AArch64 Build-and-Run
steps:
- name: Checkout
uses: actions/checkout@v4
Expand All @@ -33,3 +33,23 @@ jobs:

- name: run integration test
run: docker run --rm -w /root/elfconv/build elfconv-image "ninja test_dependencies && ctest"

build-and-test-amd64:
runs-on: ubuntu-22.04
name: AMD64 Build-and-Run
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Pull base image or build locally
run: |
if ! docker pull ghcr.io/yomaytk/elfconv-base:amd64; then
echo "Base image not found, building locally..."
docker build -f Dockerfile.base -t ghcr.io/yomaytk/elfconv-base:amd64 .
fi

- name: build container image
run: docker build . --build-arg ECV_X86=1 -t elfconv-image

- name: run instruction tests
run: docker run --rm -w /root/elfconv/build elfconv-image "ninja test_dependencies && ctest --output-on-failure"
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -133,5 +133,6 @@ elfconv-v*
!release/outdir/index.html

# AI
CLAUDE.md
SKILLS.md
skills
6 changes: 4 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,7 @@ add_subdirectory(backend/remill)
# lifter build
add_subdirectory(lifter)

# elfconv integration tests
add_subdirectory(tests/elfconv)
# elfconv integration tests (AArch64 only now)
if(CMAKE_ELFCONV_AARCH64_BUILD)
add_subdirectory(tests/elfconv)
endif()
2 changes: 1 addition & 1 deletion backend/remill/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ add_custom_target(semantics)
if(CMAKE_ELFCONV_AARCH64_BUILD)
add_subdirectory(tests/AArch64)
elseif(CMAKE_ELFCONV_X86_BUILD)
message("X86 test directory should be executed (FIXME).")
add_subdirectory(tests/X86)
else()
message(FATAL_ERROR, "CMAKE_ELFCONV_<arch_name>_BUILD must be 1. (remill/CMakeLists.txt: 3)")
endif()
Expand Down
73 changes: 16 additions & 57 deletions backend/remill/lib/Arch/X86/Arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1169,71 +1169,30 @@ bool X86Arch::ArchDecodeInstruction(uint64_t address, std::string_view inst_byte
}
}

// Push implicit operands.
auto push_operand = [&](Operand::Type type, Operand::Action action, xed_reg_enum_t reg) {
Operand op = {};
op.type = type;
op.action = action;
op.reg = RegOp(reg);
op.size = op.reg.size;
inst.operands.push_back(op);
};

switch (iform) {
case XED_IFORM_CALL_NEAR_RELBRd:
case XED_IFORM_RET_NEAR:
push_operand(Operand::kTypeRegister, Operand::kActionWrite, XED_REG_RIP);
push_operand(Operand::kTypeRegister, Operand::kActionRead, XED_REG_RSP);
push_operand(Operand::kTypeRegister, Operand::kActionWrite, XED_REG_RSP);
break;
case XED_IFORM_PUSH_GPRv_50:
case XED_IFORM_POP_GPRv_58:
push_operand(Operand::kTypeRegister, Operand::kActionRead, XED_REG_RSP);
push_operand(Operand::kTypeRegister, Operand::kActionWrite, XED_REG_RSP);
break;
case XED_IFORM_CDQ:
push_operand(Operand::kTypeRegister, Operand::kActionRead, XED_REG_EAX);
#if 64 == ADDRESS_SIZE_BITS
push_operand(Operand::kTypeRegister, Operand::kActionWrite, XED_REG_RDX);
#else
push_operand(Operand::kTypeRegister, Operand::kActionWrite, XED_REG_EDX);
#endif
break;
case XED_IFORM_CDQE:
push_operand(Operand::kTypeRegister, Operand::kActionRead, XED_REG_EAX);
push_operand(Operand::kTypeRegister, Operand::kActionWrite, XED_REG_RAX);
break;
case XED_IFORM_IDIV_MEMv:
case XED_IFORM_IDIV_GPRv:
push_operand(Operand::kTypeRegister, Operand::kActionRead, XED_REG_EAX);
push_operand(Operand::kTypeRegister, Operand::kActionRead, XED_REG_EDX);
push_operand(Operand::kTypeRegister, Operand::kActionWrite, XED_REG_RAX);
push_operand(Operand::kTypeRegister, Operand::kActionWrite, XED_REG_RDX);
break;
case XED_IFORM_DIV_GPRv:
push_operand(Operand::kTypeRegister, Operand::kActionRead, XED_REG_RAX);
push_operand(Operand::kTypeRegister, Operand::kActionRead, XED_REG_RDX);
push_operand(Operand::kTypeRegister, Operand::kActionWrite, XED_REG_RAX);
push_operand(Operand::kTypeRegister, Operand::kActionWrite, XED_REG_RDX);
break;
default: break;
// Control flow operands update the next program counter.
if (inst.IsControlFlow()) {
inst.operands.emplace_back();
auto &dst_ret_pc = inst.operands.back();
dst_ret_pc.type = Operand::kTypeRegister;
dst_ret_pc.action = Operand::kActionWrite;
dst_ret_pc.size = address_size;
dst_ret_pc.reg.name = "NEXT_PC";
dst_ret_pc.reg.size = address_size;
}

SetSemaFuncArgType(inst, iform);

if (inst.IsFunctionCall()) {
DecodeFallThroughPC(inst, xedd);

// The semantics will store the return address in `RETURN_PC`. This is to
// help synchronize program counters when lifting instructions on an ISA
// with delay slots.
// inst.operands.emplace_back();
// auto &dst_ret_pc = inst.operands.back();
// dst_ret_pc.type = Operand::kTypeRegister;
// dst_ret_pc.action = Operand::kActionWrite;
// dst_ret_pc.size = address_size;
// dst_ret_pc.reg.name = "RETURN_PC";
// dst_ret_pc.reg.size = address_size;
inst.operands.emplace_back();
auto &dst_ret_pc2 = inst.operands.back();
dst_ret_pc2.type = Operand::kTypeRegister;
dst_ret_pc2.action = Operand::kActionWrite;
dst_ret_pc2.size = address_size;
dst_ret_pc2.reg.name = "RETURN_PC";
dst_ret_pc2.reg.size = address_size;
}

if (UsesStopFailure(xedd)) {
Expand Down
24 changes: 12 additions & 12 deletions backend/remill/lib/Arch/X86/Runtime/Instructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,33 +204,33 @@ DEF_HELPER(SquareRoot32, float32_t src_float)->float32_t {

// #include "lib/Arch/X86/Semantics/AVX.cpp"
#include "lib/Arch/X86/Semantics/BINARY.cpp"
#include "lib/Arch/X86/Semantics/BITBYTE.cpp"
#include "lib/Arch/X86/Semantics/CALL_RET.cpp"
// #include "lib/Arch/X86/Semantics/BITBYTE.cpp"
// #include "lib/Arch/X86/Semantics/CALL_RET.cpp"
// #include "lib/Arch/X86/Semantics/CMOV.cpp"
#include "lib/Arch/X86/Semantics/COND_BR.cpp"
#include "lib/Arch/X86/Semantics/CONVERT.cpp"
// #include "lib/Arch/X86/Semantics/COND_BR.cpp"
// #include "lib/Arch/X86/Semantics/CONVERT.cpp"
#include "lib/Arch/X86/Semantics/DATAXFER.cpp"
// #include "lib/Arch/X86/Semantics/DECIMAL.cpp"
// #include "lib/Arch/X86/Semantics/FLAGOP.cpp"
// #include "lib/Arch/X86/Semantics/FMA.cpp"
// #include "lib/Arch/X86/Semantics/INTERRUPT.cpp"
// #include "lib/Arch/X86/Semantics/IO.cpp"
#include "lib/Arch/X86/Semantics/LOGICAL.cpp"
#include "lib/Arch/X86/Semantics/MISC.cpp"
// #include "lib/Arch/X86/Semantics/LOGICAL.cpp"
// #include "lib/Arch/X86/Semantics/MISC.cpp"
// #include "lib/Arch/X86/Semantics/MMX.cpp"
#include "lib/Arch/X86/Semantics/NOP.cpp"
#include "lib/Arch/X86/Semantics/POP.cpp"
// #include "lib/Arch/X86/Semantics/NOP.cpp"
// #include "lib/Arch/X86/Semantics/POP.cpp"
// #include "lib/Arch/X86/Semantics/PREFETCH.cpp"
#include "lib/Arch/X86/Semantics/PUSH.cpp"
// #include "lib/Arch/X86/Semantics/PUSH.cpp"
// #include "lib/Arch/X86/Semantics/ROTATE.cpp"
// #include "lib/Arch/X86/Semantics/RTM.cpp"
// #include "lib/Arch/X86/Semantics/SEMAPHORE.cpp"
#include "lib/Arch/X86/Semantics/SHIFT.cpp"
// #include "lib/Arch/X86/Semantics/SHIFT.cpp"
// #include "lib/Arch/X86/Semantics/SSE.cpp"
// #include "lib/Arch/X86/Semantics/STRINGOP.cpp"
#include "lib/Arch/X86/Semantics/SYSCALL.cpp"
// #include "lib/Arch/X86/Semantics/SYSCALL.cpp"
// #include "lib/Arch/X86/Semantics/SYSTEM.cpp"
#include "lib/Arch/X86/Semantics/UNCOND_BR.cpp"
// #include "lib/Arch/X86/Semantics/UNCOND_BR.cpp"
// #include "lib/Arch/X86/Semantics/X87.cpp"
// #include "lib/Arch/X86/Semantics/XOP.cpp"
// #include "lib/Arch/X86/Semantics/XSAVE.cpp"
Expand Down
84 changes: 60 additions & 24 deletions backend/remill/lib/BC/InstructionLifter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,30 +141,58 @@ LiftStatus InstructionLifter::LiftIntoBlock(Instruction &arch_inst, llvm::BasicB
if (arch_inst.lift_config.target_elf_arch == kArchAArch64LittleEndian) {
LiftAArch64EveryOperand(arch_inst, block, state_ptr, isel_func, bb_reg_info_node);
} else if (arch_inst.lift_config.target_elf_arch == kArchAMD64) {
LiftX86EveryOperand(arch_inst, block, state_ptr, isel_func, bb_reg_info_node);
}

// End an atomic block.
// (FIXME) In the current design, we don't consider the atomic instructions.
if (arch_inst.is_atomic_read_modify_write) {
// llvm::Value *temp_args[] = {ir.CreateLoad(impl->memory_ptr_type, mem_ptr_ref)};
// ir.CreateStore(ir.CreateCall(impl->intrinsics->atomic_end, temp_args), mem_ptr_ref);
}
// Standard remill lifting path for x86 (no VRO).
llvm::IRBuilder<> ir(block);

// Restore the true target of the delayed branch.
if (is_delayed) {
// Update PC and NEXT_PC in State before calling the semantic function.
const auto [pc_ref, pc_ref_type] =
LoadRegAddress(block, state_ptr, kPCVariableName);
const auto [next_pc_ref, next_pc_ref_type] =
LoadRegAddress(block, state_ptr, kNextPCVariableName);
const auto next_pc = ir.CreateLoad(impl->word_type, next_pc_ref);
ir.CreateStore(next_pc, pc_ref);
ir.CreateStore(
ir.CreateAdd(next_pc, llvm::ConstantInt::get(impl->word_type,
arch_inst.bytes.size())),
next_pc_ref);

// Build args: [RuntimeManager*, State*, ...operands...]
std::vector<llvm::Value *> args;
args.reserve(arch_inst.operands.size() + 2);

auto runtime_ptr = NthArgument(func, kRuntimePointerArgNum);
args.push_back(runtime_ptr);
args.push_back(state_ptr);

auto isel_func_type = isel_func->getFunctionType();
auto arg_num = 2U;

for (auto &op : arch_inst.operands) {
auto num_params = isel_func_type->getNumParams();
if (!(arg_num < num_params)) {
status = kLiftedMismatchedISEL;
printf("[Bug] kLiftedMismatchedISEL at 0x%08lx, inst: %s, arg_num: %u, num_params: %u\n",
arch_inst.pc, arch_inst.function.c_str(), arg_num, num_params);
break;
}

// This is the delayed update of the program counter.
// ir.CreateStore(next_pc, pc_ref);
auto arg = NthArgument(isel_func, arg_num);
auto arg_type = arg->getType();
auto operand = LiftOperand(arch_inst, block, state_ptr, arg, op);
arg_num += 1;
auto op_type = operand->getType();
CHECK_EQ(op_type, arg_type)
<< "Lifted operand " << op.Serialize() << " to " << arch_inst.function
<< " does not have the correct type. Expected "
<< LLVMThingToString(arg_type) << " but got "
<< LLVMThingToString(op_type) << ".";

args.push_back(operand);
}

// We don't know what the `NEXT_PC` is going to be because of the next
// instruction size is unknown (really, it's likely to be
// `arch->MaxInstructionSize()`), and for normal instructions, before they
// are lifted, we do the `PC = NEXT_PC + size`, so this is fine.
// ir.CreateStore(next_pc, next_pc_ref);
LOG(FATAL) << "Unexpected to enter the `is_delayed`.";
// llvm::Value *temp_args[] = {ir.CreateLoad(impl->memory_ptr_type, mem_ptr_ref)};
// ir.CreateStore(ir.CreateCall(impl->intrinsics->delay_slot_end, temp_args), mem_ptr_ref);
if (status == kLiftedInstruction) {
ir.CreateCall(isel_func, args);
}
}

/* append `debug_memory_value_change` function call */
Expand Down Expand Up @@ -626,6 +654,9 @@ llvm::Value *InstructionLifter::LiftImmediateOperand(Instruction &inst, llvm::Ba
} else if (arg->getType()->isDoubleTy()) {
auto double_val = *reinterpret_cast<double *>(&arch_op.imm.val);
return llvm::ConstantFP::get(arg->getType(), double_val);
} else if (arg->getType()->isPointerTy()) {
auto int_val = llvm::ConstantInt::get(impl->word_type, arch_op.imm.val, arch_op.imm.is_signed);
return llvm::ConstantExpr::getIntToPtr(int_val, arg->getType());
}

else {
Expand Down Expand Up @@ -856,15 +887,20 @@ llvm::Value *InstructionLifter::LiftOperand(Instruction &inst, llvm::BasicBlock

case Operand::kTypeImmediate: return LiftImmediateOperand(inst, block, arg, arch_op);

case Operand::kTypeAddress:
if (arg_type != impl->word_type) {
case Operand::kTypeAddress: {
if (arg_type != impl->word_type && !arg_type->isPointerTy()) {
LOG(FATAL) << "Expected that a memory operand should be represented by "
<< "machine word type. Argument type is " << LLVMThingToString(arg_type)
<< " and word type is " << LLVMThingToString(impl->word_type)
<< " in instruction at 0x" << std::hex << inst.pc;
}

return LiftAddressOperand(inst, block, state_ptr, arg, arch_op);
auto addr_val = LiftAddressOperand(inst, block, state_ptr, arg, arch_op);
if (arg_type->isPointerTy() && !addr_val->getType()->isPointerTy()) {
return new llvm::IntToPtrInst(addr_val, arg_type, "", block);
}
return addr_val;
}

case Operand::kTypeExpression:
case Operand::kTypeRegisterExpression:
Expand Down
2 changes: 1 addition & 1 deletion backend/remill/scripts/x86/print_save_state_asm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ ${CXX} \
-Wno-nested-anon-types -Wno-variadic-macros -Wno-extended-offsetof \
-Wno-invalid-offsetof \
-Wno-return-type-c-linkage \
-m64 -I${DIR} \
-m64 -I"${DIR}/include" \
-DADDRESS_SIZE_BITS=64 -DHAS_FEATURE_AVX=1 -DHAS_FEATURE_AVX512=1 \
$DIR/tests/X86/PrintSaveState.cpp

Expand Down
21 changes: 16 additions & 5 deletions backend/remill/tests/X86/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ cmake_minimum_required(VERSION 3.2)

function(COMPILE_X86_TESTS name address_size has_avx has_avx512)
set(X86_TEST_FLAGS
-I${CMAKE_SOURCE_DIR}
-DADDRESS_SIZE_BITS=${address_size}
-DHAS_FEATURE_AVX=${has_avx}
-DHAS_FEATURE_AVX512=${has_avx512}
Expand All @@ -35,6 +34,12 @@ function(COMPILE_X86_TESTS name address_size has_avx has_avx512)
PRIVATE ${X86_TEST_FLAGS} -DIN_TEST_GENERATOR
)

target_include_directories(lift-${name}-tests PRIVATE
"${CMAKE_SOURCE_DIR}/backend/remill/include"
"${CMAKE_SOURCE_DIR}/backend/remill"
"${CMAKE_SOURCE_DIR}"
)

file(GLOB X86_TEST_FILES
"${CMAKE_CURRENT_LIST_DIR}/*/*.S"
)
Expand Down Expand Up @@ -62,6 +67,11 @@ function(COMPILE_X86_TESTS name address_size has_avx has_avx512)
target_link_libraries(run-${name}-tests PUBLIC remill GTest::gtest)
target_compile_definitions(run-${name}-tests PUBLIC ${PROJECT_DEFINITIONS})

target_include_directories(run-${name}-tests PRIVATE
"${CMAKE_SOURCE_DIR}/backend/remill/include"
"${CMAKE_SOURCE_DIR}/backend/remill"
)

target_compile_options(run-${name}-tests
PRIVATE ${X86_TEST_FLAGS}
)
Expand All @@ -75,10 +85,11 @@ find_package(GTest CONFIG REQUIRED)

enable_testing()

if (NOT APPLE)
COMPILE_X86_TESTS(x86 32 0 0)
COMPILE_X86_TESTS(x86_avx 32 1 0)
endif()
# 32-bit x86 tests disabled for now
# if (NOT APPLE)
# COMPILE_X86_TESTS(x86 32 0 0)
# COMPILE_X86_TESTS(x86_avx 32 1 0)
# endif()

COMPILE_X86_TESTS(amd64 64 0 0)
COMPILE_X86_TESTS(amd64_avx 64 1 0)
Loading