最近正在看四哥的黄皮书,看里面so文件解析这一章是用java实现的,为加深理解自己实现了一遍。
写这种文件解析和以前写网络packet解析思路差不多,搞清楚文件格式然后就是代码翻译。下面是ELF文件的头文件结构。ELF文件中只有ELF Header有固定的位置。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
| typedef struct elf32_hdr{ unsigned char e_ident[EI_NIDENT]; Elf32_Half e_type; Elf32_Half e_machine; Elf32_Word e_version; Elf32_Addr e_entry; Elf32_Off e_phoff; Elf32_off e_shoff; Elf32_Word e_flags; Elf32_Half e_ehsize; Elf32_Half e_phentsize; Elf32_Half e_phnum; Elf32_Half e_shentsize; Elf32_Half e_shnum; Elf32_Half e_shstrndx; }Elf32_Ehdr;
|
elfhdr是字典,基本就是key:value的方式存储header的各个属性。每个属性从多少字节开始占几位都是固定的,剩下的就是学习python 文件操作函数和binascii库函数。调试的话目前基本就用print输出,高级点的还不太清楚。。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
| def init_elf(filename): f = open(filename,"rb") global elfhdr magic = binascii.b2a_hex(f.read(16)) identify = magic[0:8] if identify != '7f454c46': print "the file is not elf!" exit(0) else: elfhdr['magic'] = magic.decode('hex') f.seek(16, 0) e_type = int(binascii.b2a_hex(f.read(2)).decode('hex')[::-1].encode('hex'),16) elfhdr['e_type'] = e_type f.seek(18, 0) e_machine = int(binascii.b2a_hex(f.read(2)).decode('hex')[::-1].encode('hex'),16) elfhdr['e_machine'] = e_machine f.seek(20, 0) e_version = int(binascii.b2a_hex(f.read(4)).decode('hex')[::-1].encode('hex'),16) elfhdr['e_version'] = e_version f.seek(24, 0) e_entry = int(binascii.b2a_hex(f.read(4)).decode('hex')[::-1].encode('hex'),16) elfhdr['e_entry'] = e_entry f.seek(28, 0) e_phoff = int(binascii.b2a_hex(f.read(4)).decode('hex')[::-1].encode('hex'),16) elfhdr['e_phoff'] = e_phoff f.seek(32, 0) e_shoff = int(binascii.b2a_hex(f.read(4)).decode('hex')[::-1].encode('hex'),16) elfhdr['e_shoff'] = e_shoff f.seek(36, 0) e_flags = int(binascii.b2a_hex(f.read(4)).decode('hex')[::-1].encode('hex'),16) elfhdr['e_flags'] = e_flags f.seek(40, 0) e_ehsize = int(binascii.b2a_hex(f.read(2)).decode('hex')[::-1].encode('hex'),16) elfhdr['e_ehsize'] = e_ehsize f.seek(42, 0) e_phentsize = int(binascii.b2a_hex(f.read(2)).decode('hex')[::-1].encode('hex'),16) elfhdr['e_phentsize'] = e_phentsize f.seek(44, 0) e_phnum = int(binascii.b2a_hex(f.read(2)).decode('hex')[::-1].encode('hex'),16) elfhdr['e_phnum'] = e_phnum f.seek(46, 0) e_shentsize = int(binascii.b2a_hex(f.read(2)).decode('hex')[::-1].encode('hex'),16) elfhdr['e_shentsize'] = e_shentsize f.seek(48, 0) e_shnum= int(binascii.b2a_hex(f.read(2)).decode('hex')[::-1].encode('hex'),16) elfhdr['e_shnum'] = e_shnum f.seek(50, 0) e_shstrndx = int(binascii.b2a_hex(f.read(2)).decode('hex')[::-1].encode('hex'),16) elfhdr['e_shstrndx'] = e_shstrndx f.close()
|
解析函数
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
| def parse_elfhdr(): magic = elfhdr['magic'].encode('hex') print "Magic: ", for i in range(0,len(magic)-2,2): print magic[i:i+2] + " ", print el_class = magic[8:10] if el_class == '01': print "Class: ELF32" elif el_class == '02': print "Class: ELF64" else: print "invalid el_class!" exit(0) el_data = magic[10:12] if el_data == '01': print "Data: little endian" elif el_data == '02': print "Data: big endian" else: print "invalid el_data!" el_version = int(magic[12:14]) el_version = str(el_version) print "Version: " + el_version + " (current)" print "Type: " + str(elfhdr['e_type']) print "Machine " + str(elfhdr['e_machine']) print "Version " + str(elfhdr['e_version']) print "入口点地址 " + str(elfhdr['e_entry']) print "程序头起点 " + str(elfhdr['e_phoff']) print "Start of section headers: " + str(elfhdr['e_shoff']) print "标志: " + str(hex(elfhdr['e_flags'])) print "本头的大小: " + str(elfhdr['e_ehsize']) + "字节" print "程序的大小: " + str(elfhdr['e_phentsize']) + "字节" print "Number of program headers: " + str(elfhdr['e_phnum']) print "节头大小: " + str(elfhdr['e_shentsize']) + "字节" print "节头数量: " + str(elfhdr['e_shnum']) print "字符串表索引节头: " + str(elfhdr['e_shstrndx'])
|
Program Header程序头信息:
ELF文件中的程序头部是个结构数组,每个结构描述了一个段或系统准备程序执行所必需的其他信息。解析思路还是要关注ELF Header中的几个关键信息,e_phnum是程序头个数,e_phensize是程序头部表格表项大小,e_phoff是程序头在程序中的偏移量:
1 2 3 4 5 6 7 8 9 10
| typedef struct elf32_phdr{ Elf32_Word p_type; Elf32_Off p_offset; Elf32_Addr p_vaddr; Elf32_Addr p_paddr; Elf32_Word p_filesz; Elf32_Word p_memsz; Elf32_Word p_flags; Elf32_Word p_align; }Elf32_Phdr
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
| def parse_program_headers(elf_file): elfPhdr = {} programs = [] pro_start = elfhdr['e_phoff'] pro_size = elfhdr['e_phentsize'] pro_num = elfhdr['e_phnum'] f = open(elf_file,'rb') for i in range(0,pro_num): pro_start = pro_start + i * pro_size f.seek(pro_start, 0) elfPhdr = {} elfPhdr['p_type'] = binascii.b2a_hex(f.read(4)).decode('hex')[::-1].encode('hex') elfPhdr['p_offset'] = binascii.b2a_hex(f.read(4)).decode('hex')[::-1].encode('hex') elfPhdr['p_vaddr'] = binascii.b2a_hex(f.read(4)).decode('hex')[::-1].encode('hex') elfPhdr['p_paddr'] = binascii.b2a_hex(f.read(4)).decode('hex')[::-1].encode('hex') elfPhdr['p_filesz'] = binascii.b2a_hex(f.read(4)).decode('hex')[::-1].encode('hex') elfPhdr['p_memsz'] = binascii.b2a_hex(f.read(4)).decode('hex')[::-1].encode('hex') elfPhdr['p_flags'] = binascii.b2a_hex(f.read(4)).decode('hex')[::-1].encode('hex') elfPhdr['p_align'] = binascii.b2a_hex(f.read(4)).decode('hex')[::-1].encode('hex') print elfPhdr programs.append(elfPhdr) f.close()
|
Section Header解析实现和Program Header类似:
1 2 3 4 5 6 7 8 9 10 11 12
| typedef struct elf32_shdr{ Elf32_Word sh_name; Elf32_Word sh_type; Elf32_Word sh_flags; Elf32_Addr sh_addr; Elf32_Off sh_offset; Elf32_Word sh_size; Elf32_Word sh_link; Elf32_Word sh_info; Elf32_Word sh_addralign; Elf32_Word sh_entsize; }Elf32_Shdr
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
| def parse_section_headers(elf_file): elfShdr = {} sections = [] sec_start = elfhdr['e_shoff'] sec_size = elfhdr['e_shentsize'] sec_num = elfhdr['e_shnum'] f = open(elf_file,'rb') for i in range(0,sec_num): sec_start = sec_start + i * sec_size f.seek(sec_start, 0) elfShdr = {} elfShdr['sh_name'] = binascii.b2a_hex(f.read(4)).decode('hex')[::-1].encode('hex') elfShdr['sh_type'] = binascii.b2a_hex(f.read(4)).decode('hex')[::-1].encode('hex') elfShdr['sh_flags'] = binascii.b2a_hex(f.read(4)).decode('hex')[::-1].encode('hex') elfShdr['sh_addr'] = binascii.b2a_hex(f.read(4)).decode('hex')[::-1].encode('hex') elfShdr['sh_offset'] = binascii.b2a_hex(f.read(4)).decode('hex')[::-1].encode('hex') elfShdr['sh_size'] = binascii.b2a_hex(f.read(4)).decode('hex')[::-1].encode('hex') elfShdr['sh_link'] = binascii.b2a_hex(f.read(4)).decode('hex')[::-1].encode('hex') elfShdr['sh_info'] = binascii.b2a_hex(f.read(4)).decode('hex')[::-1].encode('hex') elfShdr['sh_addralign'] = binascii.b2a_hex(f.read(4)).decode('hex')[::-1].encode('hex') elfShdr['sh_entsize'] = binascii.b2a_hex(f.read(4)).decode('hex')[::-1].encode('hex') print elfShdr sections.append(elfShdr) f.close()
|
Last updated:
这里可以写作者留言,标签和 hexo 中所有变量及辅助函数等均可调用,示例:
darktemple9.github.io/2018/02/14/ELF文件解析/