From c29ae7a65c636b8d1fa37c6589278dcdee97658f Mon Sep 17 00:00:00 2001 From: Dennis Brentjes Date: Mon, 19 Sep 2016 17:38:23 +0200 Subject: Started working on a x86 emulator. --- CMakeLists.txt | 2 +- disasm/CMakeLists.txt | 1 - disasm/dumpobject.cpp | 2 +- disasm/extractfunction.cpp | 42 +++++++++++++---- disasm/leparseutil.cpp | 97 -------------------------------------- disasm/leparseutil.hpp | 14 ------ emulate/CMakeLists.txt | 18 ++++++++ emulate/cpustate.hpp | 77 ++++++++++++++++++++++++++++++ emulate/emulate.cpp | 51 ++++++++++++++++++++ emulate/emulator.cpp | 0 emulate/emulator.hpp | 113 +++++++++++++++++++++++++++++++++++++++++++++ le/CMakeLists.txt | 1 + le/le_parse_util.cpp | 99 +++++++++++++++++++++++++++++++++++++++ le/le_parse_util.hpp | 14 ++++++ 14 files changed, 408 insertions(+), 123 deletions(-) delete mode 100644 disasm/leparseutil.cpp delete mode 100644 disasm/leparseutil.hpp create mode 100644 emulate/CMakeLists.txt create mode 100644 emulate/cpustate.hpp create mode 100644 emulate/emulate.cpp create mode 100644 emulate/emulator.cpp create mode 100644 emulate/emulator.hpp create mode 100644 le/le_parse_util.cpp create mode 100644 le/le_parse_util.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 47043ab..eeac074 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,4 +12,4 @@ add_subdirectory(binparse) add_subdirectory(mz) add_subdirectory(le) add_subdirectory(disasm) - +add_subdirectory(emulate) diff --git a/disasm/CMakeLists.txt b/disasm/CMakeLists.txt index cd5113c..04f7372 100644 --- a/disasm/CMakeLists.txt +++ b/disasm/CMakeLists.txt @@ -1,7 +1,6 @@ add_executable(disasm disasm.cpp - leparseutil.hpp leparseutil.cpp dumpobject.hpp dumpobject.cpp extractfunction.hpp extractfunction.cpp ) diff --git a/disasm/dumpobject.cpp b/disasm/dumpobject.cpp index f71208c..4d2e2f3 100644 --- a/disasm/dumpobject.cpp +++ b/disasm/dumpobject.cpp @@ -1,5 +1,5 @@ #include "dumpobject.hpp" -#include "leparseutil.hpp" +#include "le_parse_util.hpp" #include diff --git a/disasm/extractfunction.cpp b/disasm/extractfunction.cpp index f2da04c..288d78f 100644 --- a/disasm/extractfunction.cpp +++ b/disasm/extractfunction.cpp @@ -1,6 +1,6 @@ #include "extractfunction.hpp" -#include "leparseutil.hpp" +#include "le_parse_util.hpp" #include "output.hpp" @@ -14,7 +14,7 @@ void extract_function(std::string file_path, binparse::Value32 object_id, binparse::Offset32 function_offset) { std::stringstream ss; - ss << "f_" << function_offset << ".a"; + ss << "f_" << function_offset << ".asm"; std::string o_file = ss.str(); if(boost::filesystem::exists(o_file)) { return; @@ -26,7 +26,6 @@ void extract_function(std::string file_path, binparse::Value32 object_id, binpar auto file = parse_file(ifs); - _DInst decinst; _DecodeType dt = Decode32Bits; unsigned int decodedInstructionsCount = 0; @@ -42,7 +41,11 @@ void extract_function(std::string file_path, binparse::Value32 object_id, binpar binparse::Offset32 furthestjmp; bool done = false; + + std::cout << "Scanning function: " << function_offset << std::endl; while(!done) { + _DInst decinst; + distorm_decompose64(&ci, &decinst, 1, &decodedInstructionsCount); if(decinst.flags == FLAG_NOT_DECODABLE) { @@ -52,7 +55,22 @@ void extract_function(std::string file_path, binparse::Value32 object_id, binpar switch(META_GET_FC(decinst.meta)) { case FC_CALL: { binparse::Offset32 target = binparse::Offset32(INSTRUCTION_GET_TARGET(&decinst)); - extract_function(file_path, object_id, target); + + _DecodedInst inst; + distorm_format64(&ci, &decinst, &inst); + + //std::cout << inst.mnemonic.p << " " << inst.operands.p << std::endl; + //std::cout << "target: " << target << std::endl; + + if(decinst.ops[0].type == O_PC || decinst.ops[0].type == O_PTR || decinst.ops[0].type == O_DISP) { + + //extract_function(file_path, object_id, target); + + } else if (decinst.ops[0].type == O_SMEM) { + std::cout << "unhandled case: " << inst.mnemonic.p << " " << inst.operands.p << std::endl; + } else { + std::cout << "unrecognized case: " << inst.mnemonic.p << " " << inst.operands.p << std::endl; + } break; } case FC_CMOV: { @@ -79,7 +97,7 @@ void extract_function(std::string file_path, binparse::Value32 object_id, binpar break; } case FC_SYS: { - //std::cout << "SYS?!?:\t"; + //std::cout << "syscall:\t"; break; } case FC_UNC_BRANCH: { @@ -96,6 +114,11 @@ void extract_function(std::string file_path, binparse::Value32 object_id, binpar } } + _DecodedInst inst; + distorm_format64(&ci, &decinst, &inst); + + std::cout << binparse::Offset32(ci.codeOffset) << " " << inst.mnemonic.p << " " << inst.operands.p << std::endl; + if(!done) { auto distance = ci.nextOffset - ci.codeOffset; @@ -104,8 +127,8 @@ void extract_function(std::string file_path, binparse::Value32 object_id, binpar ci.codeOffset += distance; } } - std::cout << "Function starts at: " << function_offset << std::endl; - std::cout << "Function ends at: " << binparse::Offset32(ci.nextOffset) << std::endl; + //std::cout << "Function starts at: " << function_offset << std::endl; + //std::cout << "Function ends at: " << binparse::Offset32(ci.nextOffset) << std::endl; auto end = ci.nextOffset; @@ -117,13 +140,14 @@ void extract_function(std::string file_path, binparse::Value32 object_id, binpar ci.features = DF_NONE; while(ci.nextOffset < end) { + _DInst decinst; distorm_decompose(&ci, &decinst, 1, &decodedInstructionsCount); _DecodedInst inst; distorm_format64(&ci, &decinst, &inst); - std::cout << std::hex << std::setw(8) << std::setfill('0') << inst.offset << ":\t" << inst.mnemonic.p << " " << inst.operands.p << std::endl; - output << inst.mnemonic.p << " " << inst.operands.p << std::endl; + //std::cout << std::hex << std::setw(8) << std::setfill('0') << inst.offset << ":\t" << inst.mnemonic.p << " " << inst.operands.p << std::endl; + output << binparse::Offset32(inst.offset) << " " << inst.mnemonic.p << " " << inst.operands.p << std::endl; auto distance = ci.nextOffset - ci.codeOffset; diff --git a/disasm/leparseutil.cpp b/disasm/leparseutil.cpp deleted file mode 100644 index f2af13c..0000000 --- a/disasm/leparseutil.cpp +++ /dev/null @@ -1,97 +0,0 @@ -#include "leparseutil.hpp" - -#include "parse.hpp" - -std::vector read_file_part(std::istream& is, std::streamsize offset, size_t length) { - is.clear(); - is.seekg(0, std::ios::beg); - - is.ignore(offset); - - std::vector vec; - vec.reserve(length); - - std::copy_n(std::istream_iterator(is), length, std::back_inserter(vec)); - return vec; -} - -std::vector read_object(std::istream& is, le::File file, binparse::Value32 object_id) { - auto object = file.object_table.entries[object_id]; - auto index = object.page_table_index; - - return read_file_part(is, file.le_header.data_page_offset, file.le_header.page_size * (object.nr_page_table_entries -(index -1))); -} - -le::File parse_file(std::istream& is) -{ - is.clear(); - is.seekg(0, std::ios::beg); - - auto file = le::parse_file(is); - - return file; -} - -void relocate(std::vector fixups, std::vector& binary, binparse::Value32 page_nr, le::File const& file) { - binparse::Offset32 page_offset; - for(auto&& object : file.object_table.entries) { - if(page_nr >= object.second.page_table_index && page_nr < object.second.page_table_index + object.second.nr_page_table_entries) { - page_offset = object.second.reloc_base_address; - page_offset += ((page_nr - 1) - object.second.page_table_index) * file.le_header.page_size; - break; - } - } - - for(auto&& fixup : fixups) { - auto internal = boost::get(fixup.data); - - auto source_offset = boost::get(fixup.source_offset_or_source_list_count); - - auto target_object = boost::get(internal.object_number); - - auto binary_offset = page_offset + (int32_t) source_offset; - - binparse::Offset32 dest_offset; - if(bit(4, fixup.target_flags)) { - dest_offset = boost::get(internal.target_offset); - } else { - dest_offset = binparse::Offset32(boost::get(internal.target_offset)); - } - dest_offset += file.object_table.entries.at(target_object).reloc_base_address; - for(int i = 0; i < 4; ++i) { - uint8_t& byte = *(binary.data() + binary_offset + i); - byte = ((dest_offset >> (i * 8)) & 255); - } - } -} - -std::vector load_binary(le::File file) -{ - size_t binary_size = 0; - - for(auto&& object : file.object_table.entries) { - auto furthest_object_point = object.second.reloc_base_address + (object.second.nr_page_table_entries * file.le_header.page_size); - if (furthest_object_point > binary_size) { - binary_size = furthest_object_point; - } - } - - std::vector binary(binary_size, 0x90); - - for(auto&& object : file.object_table.entries) { - auto index = object.second.page_table_index; - for(binparse::Value32 i = index ; i < index + object.second.nr_page_table_entries; ++i) { - auto page = file.pages.map[i]; - - binparse::Offset32 dest_offset = binparse::Offset32(object.second.reloc_base_address + ((i-1)-index) * file.le_header.page_size); - - std::copy(page.begin(), page.end(), binary.data() + dest_offset); - } - } - - for(auto&& relocation : file.fixup_record_table.entries) { - relocate(relocation.second, binary, relocation.first, file); - } - - return binary; -} diff --git a/disasm/leparseutil.hpp b/disasm/leparseutil.hpp deleted file mode 100644 index 77bcaf1..0000000 --- a/disasm/leparseutil.hpp +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once - -#include "le_file.hpp" - -#include - -le::File parse_file(std::istream& is); - -std::vector read_file_part(std::istream& is, std::streamsize offset, size_t length); - -std::vector read_object(std::istream& is, le::File file, binparse::Value32 object_id); - -std::vector relocate(le::File& file, std::vector page, size_t index); -std::vector load_binary(le::File file); \ No newline at end of file diff --git a/emulate/CMakeLists.txt b/emulate/CMakeLists.txt new file mode 100644 index 0000000..824d8c3 --- /dev/null +++ b/emulate/CMakeLists.txt @@ -0,0 +1,18 @@ + +add_executable(emulate + emulate.cpp + cpustate.hpp + emulator.hpp emulator.cpp +) + +find_package(Boost COMPONENTS filesystem program_options system REQUIRED) + +find_package(distorm3 REQUIRED CONFIG) + +target_link_libraries(emulate + PRIVATE Boost::program_options + PRIVATE Boost::system + PRIVATE Boost::filesystem + PRIVATE distorm3 + PRIVATE le +) \ No newline at end of file diff --git a/emulate/cpustate.hpp b/emulate/cpustate.hpp new file mode 100644 index 0000000..fb9c377 --- /dev/null +++ b/emulate/cpustate.hpp @@ -0,0 +1,77 @@ +#pragma once + +#include +#include +#include + +#define REGISTER1( NAME ) \ +private: \ + alignas(4) std::array NAME##_storage = {{0,0,0,0}}; \ +public: \ + uint32_t& e##NAME##x() { \ + return *reinterpret_cast(NAME##_storage.data()); \ + } \ + \ + uint16_t& NAME##x() { \ + return *reinterpret_cast(NAME##_storage.data()); \ + } \ + \ + uint8_t& NAME##h() { \ + return *reinterpret_cast(NAME##_storage.data()+1); \ + } \ + \ + uint8_t& NAME##l() { \ + return *reinterpret_cast(NAME##_storage.data()); \ + } \ + +#define REGISTER2( NAME ) \ +private: \ + alignas(4) std::array NAME##_storage = {{0,0,0,0}}; \ +public: \ + uint32_t& e##NAME() { \ + return *reinterpret_cast(NAME##_storage.data()); \ + } \ + \ + uint16_t& NAME() { \ + return *reinterpret_cast(NAME##_storage.data()); \ + } \ + +#define EFLAGS \ +private: \ + alignas(8) std::bitset<32> storage = 2; \ +public: \ + using ref = std::bitset<32>::reference; \ + ref cf() { return storage[0]; } \ + ref pf() { return storage[2]; } \ + ref af() { return storage[4]; } \ + ref zf() { return storage[6]; } \ + ref sf() { return storage[7]; } \ + ref tf() { return storage[8]; } \ + ref intf() { return storage[9]; } \ + ref df() { return storage[10]; } \ + ref of() { return storage[11]; } \ + /*TODO: iopl*/ \ + ref nt() { return storage[14]; } \ + ref rf() { return storage[16]; } \ + ref vm() { return storage[17]; } \ + ref ac() { return storage[18]; } \ + ref vif() { return storage[19]; } \ + ref vip() { return storage[20]; } \ + ref id() { return storage[21]; } \ + +struct CpuState { + REGISTER2(ip) + REGISTER1(a) + REGISTER1(c) + REGISTER1(d) + REGISTER1(b) + REGISTER2(sp) + REGISTER2(bp) + REGISTER2(si) + REGISTER2(di) + EFLAGS +}; + +#undef REGISTER1 +#undef REGISTER2 +#undef EFLAGS \ No newline at end of file diff --git a/emulate/emulate.cpp b/emulate/emulate.cpp new file mode 100644 index 0000000..4254dcf --- /dev/null +++ b/emulate/emulate.cpp @@ -0,0 +1,51 @@ + +#include "emulator.hpp" + +#include +#include +#include + +int main(int argc, char* argv[]) { + boost::program_options::options_description description; + description.add_options() + ("help,h", "produces this help message") + ("exe,e", boost::program_options::value(), "The LE executable to parse the header for.") + ; + + boost::program_options::variables_map vm; + boost::program_options::store(boost::program_options::parse_command_line(argc, argv, description), vm); + boost::program_options::notify(vm); + + if(vm.count("help")) { + std::cout << description << std::endl; + return 0; + } + + boost::filesystem::path file_path; + if(vm.count("exe")) { + std::string exe_file = vm["exe"].as(); + + if(boost::filesystem::exists(exe_file)) { + if(!boost::filesystem::is_directory(exe_file)) { + file_path = exe_file; + } else { + std::cerr << exe_file << " is a folder" << std::endl; + std::cerr << std::endl; + std::cerr << description << std::endl; + return -1; + } + } else { + std::cerr << "file: " << exe_file << " does not exist" << std::endl; + std::cerr << std::endl; + std::cerr << description << std::endl; + return -1; + } + } else { + std::cerr << "Option \"exe_file\" is required"; + std::cerr << std::endl; + std::cerr << description << std::endl; + return -1; + } + + emulate(file_path.string()); +} \ No newline at end of file diff --git a/emulate/emulator.cpp b/emulate/emulator.cpp new file mode 100644 index 0000000..e69de29 diff --git a/emulate/emulator.hpp b/emulate/emulator.hpp new file mode 100644 index 0000000..e894042 --- /dev/null +++ b/emulate/emulator.hpp @@ -0,0 +1,113 @@ +#pragma once + +#include "cpustate.hpp" + +#include "le_parse_util.hpp" + +#include +#include + +#include +#include + +class Emulator { +public: + CpuState cpu; + + std::map> opcode_handlers; + + + + void handle_I_JMP(_DInst inst) { + if(inst.ops[0].type == O_PC || inst.ops[0].type == O_PTR || inst.ops[0].type == O_DISP) { + binparse::Offset32 target = binparse::Offset32(INSTRUCTION_GET_TARGET(&inst)); + cpu.eip() = target; + } else if (inst.ops[0].type == O_SMEM) { + throw UnhandledInstruction(); + } else { + throw UnrecognizedInstruction(); + } + } + void handle_I_STI(_DInst) { + cpu.intf() = 1; + } + + void handle_I_AND(_DInst inst) { + std::cout << inst.ops[0].size << std::endl; + //get_op(0, inst) &= get_op(1, inst); + } + +public: + Emulator(binparse::Offset32 init_eip, binparse::Offset32 init_esp) + : cpu() + { + cpu.eip() = init_eip; + cpu.esp() = init_esp; + + #define REGISTER_HANDLER(OPCODE) opcode_handlers[OPCODE] = std::bind(&Emulator::handle_##OPCODE, this, std::placeholders::_1) + + REGISTER_HANDLER(I_JMP); + REGISTER_HANDLER(I_STI); + REGISTER_HANDLER(I_AND); + + #undef REGISTER_HANDLER + } + + struct UnhandledInstruction : public std::runtime_error { + UnhandledInstruction() + : std::runtime_error("Encountered unhandled instruction") + {} + }; + + struct UnrecognizedInstruction : public std::runtime_error { + UnrecognizedInstruction() + : std::runtime_error("Encountered unhandled instruction") + {} + }; + + void handle_instruction(_DInst inst) { + opcode_handlers.at(inst.opcode)(inst); + } + +}; + +void emulate(std::string file_path) { + std::ifstream file_stream(file_path, std::ios::binary); + file_stream.unsetf(std::ios::skipws); + auto file = le::parse_file(file_stream); + auto binary = load_binary(file); + + _CodeInfo ci; + _DecodeType dt = Decode32Bits; + + auto code_object = file.object_table.entries.at(file.le_header.EIP_object); + auto initial_eip = code_object.reloc_base_address + file.le_header.EIP; + + auto data_object = file.object_table.entries.at(file.le_header.ESP_object); + auto initial_esp = data_object.reloc_base_address + file.le_header.ESP; + + Emulator emulator(initial_eip, initial_esp); + + unsigned int decodedInstructionsCount; + + bool run = true; + while(run) { + ci.code = binary.data() + emulator.cpu.eip(); + ci.nextOffset = emulator.cpu.eip(); + ci.codeLen = binary.size() - emulator.cpu.eip(); + ci.codeOffset = emulator.cpu.eip(); + ci.dt = dt; + ci.features = DF_NONE; + + _DInst decinst; + distorm_decompose(&ci, &decinst, 1, &decodedInstructionsCount); + + emulator.cpu.eip() += decinst.size; + + _DecodedInst inst; + distorm_format64(&ci, &decinst, &inst); + std::cout << "CurrentInstruction: " << std::hex << std::setw(8) << std::setfill('0') << inst.offset << ":\t" << inst.mnemonic.p << " " << inst.operands.p << std::endl; + + emulator.handle_instruction(decinst); + } +} diff --git a/le/CMakeLists.txt b/le/CMakeLists.txt index d01ae7f..7f738fc 100644 --- a/le/CMakeLists.txt +++ b/le/CMakeLists.txt @@ -11,6 +11,7 @@ add_library(le STATIC le_fixup_page_table.hpp le_fixup_page_table.cpp le_fixup_record_table.hpp le_fixup_record_table.cpp le_pages.hpp le_pages.cpp + le_parse_util.hpp le_parse_util.cpp ) target_include_directories(le diff --git a/le/le_parse_util.cpp b/le/le_parse_util.cpp new file mode 100644 index 0000000..ab2d70d --- /dev/null +++ b/le/le_parse_util.cpp @@ -0,0 +1,99 @@ +#include "le_parse_util.hpp" + +#include "parse.hpp" + +std::vector read_file_part(std::istream& is, std::streamsize offset, size_t length) { + is.clear(); + is.seekg(0, std::ios::beg); + + is.ignore(offset); + + std::vector vec; + vec.reserve(length); + + std::copy_n(std::istream_iterator(is), length, std::back_inserter(vec)); + return vec; +} + +std::vector read_object(std::istream& is, le::File file, binparse::Value32 object_id) { + auto object = file.object_table.entries[object_id]; + auto index = object.page_table_index; + + return read_file_part(is, file.le_header.data_page_offset, file.le_header.page_size * (object.nr_page_table_entries -(index -1))); +} + +le::File parse_file(std::istream& is) +{ + is.clear(); + is.seekg(0, std::ios::beg); + + auto file = le::parse_file(is); + + return file; +} + +void relocate(std::vector fixups, std::vector& binary, binparse::Value32 page_nr, le::File const& file) { + binparse::Offset32 page_offset; + for(auto&& object : file.object_table.entries) { + if(page_nr >= object.second.page_table_index && page_nr < object.second.page_table_index + object.second.nr_page_table_entries) { + page_offset = object.second.reloc_base_address; + page_offset += ((page_nr - 1) - object.second.page_table_index) * file.le_header.page_size; + break; + } + } + + for(auto&& fixup : fixups) { + auto internal = boost::get(fixup.data); + + auto source_offset = boost::get(fixup.source_offset_or_source_list_count); + + auto target_object = boost::get(internal.object_number); + + auto binary_offset = page_offset + (int32_t) source_offset; + + binparse::Offset32 dest_offset; + if(bit(4, fixup.target_flags)) { + dest_offset = boost::get(internal.target_offset); + } else { + dest_offset = binparse::Offset32(boost::get(internal.target_offset)); + } + dest_offset += file.object_table.entries.at(target_object).reloc_base_address; + for(int i = 0; i < 4; ++i) { + uint8_t& byte = *(binary.data() + binary_offset + i); + byte = ((dest_offset >> (i * 8)) & 255); + } + } +} + +std::vector load_binary(le::File file) +{ + size_t binary_size = 0; + + for(auto&& object : file.object_table.entries) { + auto furthest_object_point = object.second.reloc_base_address + (object.second.nr_page_table_entries * file.le_header.page_size); + if (furthest_object_point > binary_size) { + binary_size = furthest_object_point; + } + } + + std::vector binary(binary_size, 0x90); + + for(auto&& entry : file.object_table.entries) { + auto&& object = entry.second; + auto index = object.page_table_index; + for(binparse::Value32 i = index ; i < index + object.nr_page_table_entries; ++i) { + auto page = file.pages.map[i]; + + binparse::Offset32 dest_offset = binparse::Offset32(object.reloc_base_address + (i-index) * file.le_header.page_size); + + std::copy(page.begin(), page.end(), binary.data() + dest_offset); + } + } + + for(auto&& entry : file.fixup_record_table.entries) { + auto&& relocation = entry.second; + relocate(relocation, binary, entry.first, file); + } + + return binary; +} diff --git a/le/le_parse_util.hpp b/le/le_parse_util.hpp new file mode 100644 index 0000000..77bcaf1 --- /dev/null +++ b/le/le_parse_util.hpp @@ -0,0 +1,14 @@ +#pragma once + +#include "le_file.hpp" + +#include + +le::File parse_file(std::istream& is); + +std::vector read_file_part(std::istream& is, std::streamsize offset, size_t length); + +std::vector read_object(std::istream& is, le::File file, binparse::Value32 object_id); + +std::vector relocate(le::File& file, std::vector page, size_t index); +std::vector load_binary(le::File file); \ No newline at end of file -- cgit v1.2.3-70-g09d2