使用C/C+语言(或汇编语言)创建出PE文件后,源代码就被转换成了机器码。一名合格的
进向分析人员必须能够解析这些机器码,并理解其工作原理。但机器码是用二进制(0与1)示
的,我们很难读懂它。因此一般要把机器码转换为16进制代码,转换后可读性提高,但我们识读16进制代码时仍然会感到吃力。虽然intel公司给出了指令解析手册,例如解析这个指令 68 A0B44000 PUSH 0040B4A0,在指令解析手册的操作码映射表中查找指令的第一个字节68。可以看到操作码68对应PUSHD64Iz
其中Iz用来表示操作数的类型,大写字母I指寻址方法是立即数寻址,小写字母z在32位模式下表示的大小位DWORD 也就是4个字节。综合以上信息,操作码68对应的PUSH Iz指令中,Iz(操作数格式)表示大小为4个字节的立即数,所以继续读取68之后的4个字节(0040B4A0),整条指令最终解析为PUSH 0040B4A0。
但是这样的办法未免太麻烦,一个一个地肉眼去查实在太浪费时间。
所以,最后借助调试器内嵌的反汇编引擎将机器码转换为反汇编代码,识读这些反汇编代码就容易多了。
BeaEngine 是一个 C 库,旨在解码来自 16 位、32 位和 64 位英特尔架构的指令。它包括标准指令集和来自 FPU、MMX、SSE、SSE2、SSE3、SSSE3、SSE4.1、SSE4.2、VMX、CLMUL、AES、MPX、AVX、AVX2、AVX512(VEX 和 EVEX 前缀)、CET 的指令集、BMI1、BMI2、SGX、UINTR、KL、TDX 和 AMX 扩展。如果您想分析恶意代码和更普遍的混淆代码,BeaEngine 会发回一个复杂的结构来精确描述分析的指令。
地址:https://github.com/BeaEngine/beaengine
首先在Git上下载,下载好以后直接解压到指定磁盘,然后配置VS项目,在【调试】->【属性】->【VC目录】将include和lib目录包含到项目中。
或者自己手动导入以下文件:
beaengine-5.3.0\headers\BeaEngine.h
beaengine-5.3.0\headers\Includes\basic_types.h
beaengine-5.3.0\headers\Includes\export.h
beaengine-5.3.0\dll_x64\BeaEngine.dll
beaengine-5.3.0\dll_x64\BeaEngine.lib
BeaEngine 反汇编特定字符串
#include
#include
#include "BeaEngine.h"
#pragma comment(lib, "BeaEngine.lib")
void DisassembleCode(char *start_offset, int size)
{
DISASM infos;
int len;
char *end_offset = (char*)start_offset + size;
(void)memset(&infos, 0, sizeof(DISASM));
infos.EIP = (UInt64)start_offset;
while (!infos.Error) {
infos.SecurityBlock = (int)end_offset - infos.EIP;
if (infos.SecurityBlock <= 0) break;
len = Disasm(&infos);
switch (infos.Error)
{
case OUT_OF_BLOCK:
(void)printf("disasm engine is not allowed to read more memory \n");
break;
case UNKNOWN_OPCODE:
(void)printf("%s\n", &infos.CompleteInstr);
infos.EIP += 1;
infos.Error = 0;
break;
default:
(void)printf("%s\n", &infos.CompleteInstr);
infos.EIP += len;
}
};
return;
}
int main(int argc,char *argv)
{
char *buffer = "\x55\x8b\xec\x81\xec\x24\x03\x00\x00\x6a\x17";
DisassembleCode(buffer, 11);
BYTE bTest[] = { 0x68, 0x37, 0x31, 0x40, 0x00, 0xFF, 0x15, 0x0C, 0x20, 0x40 };
DisassembleCode(buffer, 14);
system("pause");
return 0;
}
反汇编字节数组
// 反汇编字节数组
void DisassembleCodeByte(BYTE *ptr,int len)
{
DISASM Disasm_Info;
char *end_offset = (char*)ptr + 10;
(void)memset(&Disasm_Info, 0, sizeof(DISASM));
Disasm_Info.EIP = (UInt64)ptr;
Disasm_Info.Archi = 1; // 1 = 表示反汇编32位 / 0 = 表示反汇编64位
Disasm_Info.Options = MasmSyntax; // 指定语法格式 MASM
while (!Disasm_Info.Error)
{
Disasm_Info.SecurityBlock = (UInt64)end_offset - Disasm_Info.EIP;
if (Disasm_Info.SecurityBlock <= 0)
break;
len = Disasm(&Disasm_Info);
switch (Disasm_Info.Error)
{
case OUT_OF_BLOCK:
break;
case UNKNOWN_OPCODE:
Disasm_Info.EIP += 1;
Disasm_Info.Error = 0;
break;
default:
printf("%s \n", &Disasm_Info.CompleteInstr);
Disasm_Info.EIP += len;
}
}
}
int main(int argc, char *argv)
{
BYTE bTest[] = { 0x55, 0x8b, 0xec, 0x81, 0xec, 0x24, 0x03, 0x00, 0x00, 0x6a, 0x17 };
DisassembleCodeByte(bTest,10);
system("pause");
return 0;
}
反汇编时,显示虚拟地址
void DisassembleCodeInstr(char *start_offset, char *end_offset, int virtual_address)
{
DISASM Disasm_Info;
int len;
(void)memset(&Disasm_Info, 0, sizeof(DISASM));
Disasm_Info.EIP = (UINT64)start_offset;
Disasm_Info.VirtualAddr = (UINT64)virtual_address;
Disasm_Info.Archi = 0;
Disasm_Info.Options = MasmSyntax;
while (!Disasm_Info.Error)
{
Disasm_Info.SecurityBlock = (UInt64)end_offset - Disasm_Info.EIP;
if (Disasm_Info.SecurityBlock <= 0)
break;
len = Disasm(&Disasm_Info);
switch (Disasm_Info.Error)
{
case OUT_OF_BLOCK:
break;
case UNKNOWN_OPCODE:
Disasm_Info.EIP += 1;
Disasm_Info.VirtualAddr += 1;
break;
default:
printf("%.16llx > %s\n", Disasm_Info.VirtualAddr,&Disasm_Info.CompleteInstr);
Disasm_Info.EIP += len;
Disasm_Info.VirtualAddr += len;
}
}
}
int main(int argc,char *argv)
{
/*
char *buffer = "\x55\x8b\xec\x81\xec\x24\x03\x00\x00\x6a\x17";
DisassembleCode(buffer, 11);
*/
void *pBuffer = malloc(200);
memcpy(pBuffer, main, 200);
DisassembleCodeInstr((char *)pBuffer, (char *)pBuffer + 200, 0x401000);
system("pause");
return 0;
}
检查EAX寄存器状态: 如何只检索修改寄存器eax的指令,也就是说,当我们的寄存器REG0零号,发生写入请求时,将自动获取到此处的汇编代码位置。
void DisassembleCodeInstr(char *start_offset, char *end_offset, int virtual_address)
{
DISASM Disasm_Info;
int len;
(void)memset(&Disasm_Info, 0, sizeof(DISASM));
Disasm_Info.EIP = (UINT64)start_offset;
Disasm_Info.VirtualAddr = (UINT64)virtual_address;
Disasm_Info.Archi = 0;
Disasm_Info.Options = MasmSyntax;
while (!Disasm_Info.Error)
{
Disasm_Info.SecurityBlock = (UInt64)end_offset - Disasm_Info.EIP;
if (Disasm_Info.SecurityBlock <= 0)
break;
len = Disasm(&Disasm_Info);
switch (Disasm_Info.Error)
{
case OUT_OF_BLOCK:
break;
case UNKNOWN_OPCODE:
Disasm_Info.EIP += 1;
Disasm_Info.VirtualAddr += 1;
break;
default:
if (
((Disasm_Info.Operand1.AccessMode == WRITE) && (Disasm_Info.Operand1.Registers.gpr & REG0)) ||
((Disasm_Info.Operand2.AccessMode == WRITE) && (Disasm_Info.Operand2.Registers.gpr & REG0)) ||
(Disasm_Info.Instruction.ImplicitModifiedRegs.gpr & REG0)
)
{
printf("%.16llx > %s \n", Disasm_Info.VirtualAddr, &Disasm_Info.CompleteInstr);
}
Disasm_Info.EIP += len;
Disasm_Info.VirtualAddr += len;
}
}
}
解码第三方可执行文件:
void DisassembleCodeRange(unsigned char *StartCodeSection, unsigned char *EndCodeSection, int(Virtual_Address))
{
DISASM Disasm_Info;
int len;
memset(&Disasm_Info, 0, sizeof(DISASM));
Disasm_Info.EIP = (UInt64)StartCodeSection;
Disasm_Info.VirtualAddr = (UInt64)Virtual_Address;
Disasm_Info.Archi = 0;
Disasm_Info.Options = MasmSyntax;
while (!Disasm_Info.Error)
{
Disasm_Info.SecurityBlock = (int)EndCodeSection - Disasm_Info.EIP;
len = Disasm(&Disasm_Info);
if (Disasm_Info.Error >= 0)
{
printf("%.16llx > %s \n", Disasm_Info.VirtualAddr, &Disasm_Info.CompleteInstr);
Disasm_Info.EIP += len;
Disasm_Info.VirtualAddr += len;
}
}
}
int main(int argc, char *argv)
{
void *uBuffer;
FILE *fp = fopen("c://main.exe", "rb+");
fseek(fp, 0, SEEK_END);
DWORD FileSize = ftell(fp);
rewind(fp);
uBuffer = malloc(FileSize);
memset(uBuffer, 0, sizeof(uBuffer));
fread(uBuffer, 1, FileSize, fp);
fclose(fp);
// 反汇编文件偏移为1025-1099处的机器指令.
DisassembleCodeRange((unsigned char*)uBuffer + 1025, (unsigned char*)uBuffer + 1099, 0x401000);
system("pause");
return 0;
}