-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathelf.py
More file actions
executable file
·5145 lines (4691 loc) · 198 KB
/
Copy pathelf.py
File metadata and controls
executable file
·5145 lines (4691 loc) · 198 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
from ast import List
import binascii
from collections import defaultdict
from enum import IntEnum, IntFlag
import json
import optparse
import os
import io
import subprocess
import sys
import tempfile
import uuid
import zlib
# Local imports
import dwarf.context
import dwarf.options
from dwarf.ranges import AddressRange, AddressRangeList
from file_extract import FileExtract, FileEncode
import file_extract
# typedef uint32_t Elf32_Addr
# typedef uint32_t Elf32_Off
# typedef uint16_t Elf32_Half
# typedef uint32_t Elf32_Word
# typedef uint64_t Elf64_Addr
# typedef uint64_t Elf64_Off
# typedef uint16_t Elf64_Half
# typedef uint32_t Elf64_Word
# typedef uint64_t Elf64_Xword
# e_ident size and indices.
class EI(IntEnum):
MAG0 = 0 # File identification index.
MAG1 = 1 # File identification index.
MAG2 = 2 # File identification index.
MAG3 = 3 # File identification index.
CLASS = 4 # File class.
DATA = 5 # Data encoding.
VERSION = 6 # File version.
OSABI = 7 # OS/ABI identification.
ABIVERSION = 8 # ABI version.
def __str__(self):
return 'EI_' + self.name
EI_PAD = 9 # Start of padding bytes.
EI_NIDENT = 16 # Number of bytes in e_ident.
# File types
class ET(IntEnum):
NONE = 0
REL = 1
EXEC = 2
DYN = 3
CORE = 4
LOPROC = 0xff00
HIPROC = 0xffff
def __str__(self):
return 'ET_' + self.name
# Versioning
class EV(IntEnum):
NONE = 0
CURRENT = 1
def __str__(self):
return 'EV_' + self.name
# Machine architectures
class EM(IntEnum):
EM_NONE = 0 # No machine
EM_M32 = 1 # AT&T WE 32100
EM_SPARC = 2 # SPARC
EM_386 = 3 # Intel 386
EM_68K = 4 # Motorola 68000
EM_88K = 5 # Motorola 88000
EM_IAMCU = 6 # Intel MCU
EM_860 = 7 # Intel 80860
EM_MIPS = 8 # MIPS R3000
EM_S370 = 9 # IBM System/370
EM_MIPS_RS3_LE = 10 # MIPS RS3000 Little-endian
EM_PARISC = 15 # Hewlett-Packard PA-RISC
EM_VPP500 = 17 # Fujitsu VPP500
EM_SPARC32PLUS = 18 # Enhanced instruction set SPARC
EM_960 = 19 # Intel 80960
EM_PPC = 20 # PowerPC
EM_PPC64 = 21 # PowerPC64
EM_S390 = 22 # IBM System/390
EM_SPU = 23 # IBM SPU/SPC
EM_V800 = 36 # NEC V800
EM_FR20 = 37 # Fujitsu FR20
EM_RH32 = 38 # TRW RH-32
EM_RCE = 39 # Motorola RCE
EM_ARM = 40 # ARM
EM_ALPHA = 41 # DEC Alpha
EM_SH = 42 # Hitachi SH
EM_SPARCV9 = 43 # SPARC V9
EM_TRICORE = 44 # Siemens TriCore
EM_ARC = 45 # Argonaut RISC Core
EM_H8_300 = 46 # Hitachi H8/300
EM_H8_300H = 47 # Hitachi H8/300H
EM_H8S = 48 # Hitachi H8S
EM_H8_500 = 49 # Hitachi H8/500
EM_IA_64 = 50 # Intel IA-64 processor architecture
EM_MIPS_X = 51 # Stanford MIPS-X
EM_COLDFIRE = 52 # Motorola ColdFire
EM_68HC12 = 53 # Motorola M68HC12
EM_MMA = 54 # Fujitsu MMA Multimedia Accelerator
EM_PCP = 55 # Siemens PCP
EM_NCPU = 56 # Sony nCPU embedded RISC processor
EM_NDR1 = 57 # Denso NDR1 microprocessor
EM_STARCORE = 58 # Motorola Star*Core processor
EM_ME16 = 59 # Toyota ME16 processor
EM_ST100 = 60 # STMicroelectronics ST100 processor
EM_TINYJ = 61 # Advanced Logic Corp. TinyJ embedded processor family
EM_X86_64 = 62 # AMD x86-64 architecture
EM_PDSP = 63 # Sony DSP Processor
EM_PDP10 = 64 # Digital Equipment Corp. PDP-10
EM_PDP11 = 65 # Digital Equipment Corp. PDP-11
EM_FX66 = 66 # Siemens FX66 microcontroller
EM_ST9PLUS = 67 # STMicroelectronics ST9+ 8/16 bit microcontroller
EM_ST7 = 68 # STMicroelectronics ST7 8-bit microcontroller
EM_68HC16 = 69 # Motorola MC68HC16 Microcontroller
EM_68HC11 = 70 # Motorola MC68HC11 Microcontroller
EM_68HC08 = 71 # Motorola MC68HC08 Microcontroller
EM_68HC05 = 72 # Motorola MC68HC05 Microcontroller
EM_SVX = 73 # Silicon Graphics SVx
EM_ST19 = 74 # STMicroelectronics ST19 8-bit microcontroller
EM_VAX = 75 # Digital VAX
EM_CRIS = 76 # Axis Communications 32-bit embedded processor
EM_JAVELIN = 77 # Infineon Technologies 32-bit embedded processor
EM_FIREPATH = 78 # Element 14 64-bit DSP Processor
EM_ZSP = 79 # LSI Logic 16-bit DSP Processor
EM_MMIX = 80 # Donald Knuth's educational 64-bit processor
EM_HUANY = 81 # Harvard University machine-independent object files
EM_PRISM = 82 # SiTera Prism
EM_AVR = 83 # Atmel AVR 8-bit microcontroller
EM_FR30 = 84 # Fujitsu FR30
EM_D10V = 85 # Mitsubishi D10V
EM_D30V = 86 # Mitsubishi D30V
EM_V850 = 87 # NEC v850
EM_M32R = 88 # Mitsubishi M32R
EM_MN10300 = 89 # Matsushita MN10300
EM_MN10200 = 90 # Matsushita MN10200
EM_PJ = 91 # picoJava
EM_OPENRISC = 92 # OpenRISC 32-bit embedded processor
EM_ARC_COMPACT = 93 # ARC International ARCompact processor
EM_XTENSA = 94 # Tensilica Xtensa Architecture
EM_VIDEOCORE = 95 # Alphamosaic VideoCore processor
EM_TMM_GPP = 96 # Thompson Multimedia General Purpose Processor
EM_NS32K = 97 # National Semiconductor 32000 series
EM_TPC = 98 # Tenor Network TPC processor
EM_SNP1K = 99 # Trebia SNP 1000 processor
EM_ST200 = 100 # STMicroelectronics (www.st.com) ST200
EM_IP2K = 101 # Ubicom IP2xxx microcontroller family
EM_MAX = 102 # MAX Processor
EM_CR = 103 # National Semiconductor CompactRISC microprocessor
EM_F2MC16 = 104 # Fujitsu F2MC16
EM_MSP430 = 105 # Texas Instruments embedded microcontroller msp430
EM_BLACKFIN = 106 # Analog Devices Blackfin (DSP) processor
EM_SE_C33 = 107 # S1C33 Family of Seiko Epson processors
EM_SEP = 108 # Sharp embedded microprocessor
EM_ARCA = 109 # Arca RISC Microprocessor
EM_UNICORE = 110 # Microprocessor series from PKU-Unity Ltd.
EM_EXCESS = 111 # eXcess: 16/32/64-bit configurable embedded CPU
EM_DXP = 112 # Icera Semiconductor Inc. Deep Execution Processor
EM_ALTERA_NIOS2 = 113 # Altera Nios II soft-core processor
EM_CRX = 114 # National Semiconductor CompactRISC CRX
EM_XGATE = 115 # Motorola XGATE embedded processor
EM_C166 = 116 # Infineon C16x/XC16x processor
EM_M16C = 117 # Renesas M16C series microprocessors
EM_DSPIC30F = 118 # Microchip Technology dsPIC30F Digital Signal Controller
EM_CE = 119 # Freescale Communication Engine RISC core
EM_M32C = 120 # Renesas M32C series microprocessors
EM_TSK3000 = 131 # Altium TSK3000 core
EM_RS08 = 132 # Freescale RS08 embedded processor
EM_SHARC = 133 # Analog Devices SHARC family of 32-bit DSP processors
EM_ECOG2 = 134 # Cyan Technology eCOG2 microprocessor
EM_SCORE7 = 135 # Sunplus S+core7 RISC processor
EM_DSP24 = 136 # New Japan Radio (NJR) 24-bit DSP Processor
EM_VIDEOCORE3 = 137 # Broadcom VideoCore III processor
EM_LATTICEMICO32 = 138 # RISC processor for Lattice FPGA architecture
EM_SE_C17 = 139 # Seiko Epson C17 family
EM_TI_C6000 = 140 # The Texas Instruments TMS320C6000 DSP family
EM_TI_C2000 = 141 # The Texas Instruments TMS320C2000 DSP family
EM_TI_C5500 = 142 # The Texas Instruments TMS320C55x DSP family
EM_MMDSP_PLUS = 160 # STMicroelectronics 64bit VLIW Data Signal Processor
EM_CYPRESS_M8C = 161 # Cypress M8C microprocessor
EM_R32C = 162 # Renesas R32C series microprocessors
EM_TRIMEDIA = 163 # NXP Semiconductors TriMedia architecture family
EM_HEXAGON = 164 # Qualcomm Hexagon processor
EM_8051 = 165 # Intel 8051 and variants
EM_STXP7X = 166 # STMicroelectronics STxP7x RISC processors
EM_NDS32 = 167 # Andes Technology compact code size embedded RISC
EM_ECOG1 = 168 # Cyan Technology eCOG1X family
EM_ECOG1X = 168 # Cyan Technology eCOG1X family
EM_MAXQ30 = 169 # Dallas Semiconductor MAXQ30 Core Micro-controllers
EM_XIMO16 = 170 # New Japan Radio (NJR) 16-bit DSP Processor
EM_MANIK = 171 # M2000 Reconfigurable RISC Microprocessor
EM_CRAYNV2 = 172 # Cray Inc. NV2 vector architecture
EM_RX = 173 # Renesas RX family
EM_METAG = 174 # Imagination Technologies META processor architecture
EM_MCST_ELBRUS = 175 # MCST Elbrus general purpose hardware architecture
EM_ECOG16 = 176 # Cyan Technology eCOG16 family
EM_CR16 = 177 # National Semiconductor CompactRISC CR16 16-bit microprocessor
EM_ETPU = 178 # Freescale Extended Time Processing Unit
EM_SLE9X = 179 # Infineon Technologies SLE9X core
EM_L10M = 180 # Intel L10M
EM_K10M = 181 # Intel K10M
EM_AARCH64 = 183 # ARM AArch64
EM_AVR32 = 185 # Atmel Corporation 32-bit microprocessor family
EM_STM8 = 186 # STMicroeletronics STM8 8-bit microcontroller
EM_TILE64 = 187 # Tilera TILE64 multicore architecture family
EM_TILEPRO = 188 # Tilera TILEPro multicore architecture family
EM_CUDA = 190 # NVIDIA CUDA architecture
EM_TILEGX = 191 # Tilera TILE-Gx multicore architecture family
EM_CLOUDSHIELD = 192 # CloudShield architecture family
EM_COREA_1ST = 193 # KIPO-KAIST Core-A 1st generation processor family
EM_COREA_2ND = 194 # KIPO-KAIST Core-A 2nd generation processor family
EM_ARC_COMPACT2 = 195 # Synopsys ARCompact V2
EM_OPEN8 = 196 # Open8 8-bit RISC soft processor core
EM_RL78 = 197 # Renesas RL78 family
EM_VIDEOCORE5 = 198 # Broadcom VideoCore V processor
EM_78KOR = 199 # Renesas 78KOR family
EM_56800EX = 200 # Freescale 56800EX Digital Signal Controller (DSC)
EM_BA1 = 201 # Beyond BA1 CPU architecture
EM_BA2 = 202 # Beyond BA2 CPU architecture
EM_XCORE = 203 # XMOS xCORE processor family
EM_MCHP_PIC = 204 # Microchip 8-bit PIC(r) family
EM_INTEL205 = 205 # Reserved by Intel
EM_INTEL206 = 206 # Reserved by Intel
EM_INTEL207 = 207 # Reserved by Intel
EM_INTEL208 = 208 # Reserved by Intel
EM_INTEL209 = 209 # Reserved by Intel
EM_KM32 = 210 # KM211 KM32 32-bit processor
EM_KMX32 = 211 # KM211 KMX32 32-bit processor
EM_KMX16 = 212 # KM211 KMX16 16-bit processor
EM_KMX8 = 213 # KM211 KMX8 8-bit processor
EM_KVARC = 214 # KM211 KVARC processor
EM_CDP = 215 # Paneve CDP architecture family
EM_COGE = 216 # Cognitive Smart Memory Processor
EM_COOL = 217 # iCelero CoolEngine
EM_NORC = 218 # Nanoradio Optimized RISC
EM_CSR_KALIMBA = 219 # CSR Kalimba architecture family
EM_AMDGPU = 224 # AMD GPU architecture
EM_RISCV = 0x00f3 # RISCV
def __str__(self):
return self.name
# EI_CLASS - Object file classes.
class EC(IntEnum):
ELFCLASSNONE = 0
ELFCLASS32 = 1 # 32-bit object file
ELFCLASS64 = 2 # 64-bit object file
def __str__(self):
return self.name
# EI_DATA - Object file byte orderings.
class ED(IntEnum):
ELFDATANONE = 0 # Invalid data encoding.
ELFDATA2LSB = 1 # Little-endian object file
ELFDATA2MSB = 2 # Big-endian object file
def __str__(self):
return self.name
# OS ABI identification.
class ELFOSABI(IntEnum):
NONE = 0 # UNIX System V ABI
HPUX = 1 # HP-UX operating system
NETBSD = 2 # NetBSD
GNU = 3 # GNU/Linux
LINUX = 3 # Historical alias for ELFOSABI_GNU.
HURD = 4 # GNU/Hurd
SOLARIS = 6 # Solaris
AIX = 7 # AIX
IRIX = 8 # IRIX
FREEBSD = 9 # FreeBSD
TRU64 = 10 # TRU64 UNIX
MODESTO = 11 # Novell Modesto
OPENBSD = 12 # OpenBSD
OPENVMS = 13 # OpenVMS
NSK = 14 # Hewlett-Packard Non-Stop Kernel
AROS = 15 # AROS
FENIXOS = 16 # FenixOS
CLOUDABI = 17 # Nuxi CloudABI
C6000_ELFABI = 64 # Bare-metal TMS320C6000
AMDGPU_HSA = 64 # AMD HSA runtime
C6000_LINUX = 65 # Linux TMS320C6000
ARM = 97 # ARM
STANDALONE = 255 # Standalone (embedded) application
def __str__(self):
return 'ELFOSABI_' + self.name
# We might parse ELF with OS ABIs we don't have above. We need to support
# displaying these unknown OS ABI values.
@classmethod
def _missing_(cls, value):
if isinstance(value, int):
return cls.create_pseudo_member_(value)
return None # will raise the ValueError in Enum.__new__
@classmethod
def create_pseudo_member_(cls, value):
pseudo_member = cls._value2member_map_.get(value, None)
if pseudo_member is None:
new_member = int.__new__(cls, value)
new_member._name_ = '_unknown_%4.4x' % value
new_member._value_ = value
pseudo_member = cls._value2member_map_.setdefault(value, new_member)
return pseudo_member
# Section header types
class SHT(IntEnum):
NULL = 0 # No associated section (inactive entry).
PROGBITS = 1 # Program-defined contents.
SYMTAB = 2 # Symbol table.
STRTAB = 3 # String table.
RELA = 4 # Relocation entries; explicit addends.
HASH = 5 # Symbol hash table.
DYNAMIC = 6 # Information for dynamic linking.
NOTE = 7 # Information about the file.
NOBITS = 8 # Data occupies no space in the file.
REL = 9 # Relocation entries; no explicit addends.
SHLIB = 10 # Reserved.
DYNSYM = 11 # Symbol table.
INIT_ARRAY = 14 # Pointers to initialization functions.
FINI_ARRAY = 15 # Pointers to termination functions.
PREINIT_ARRAY = 16 # Pointers to pre-init functions.
GROUP = 17 # Section group.
SYMTAB_SHNDX = 18 # Indices for SHN_XINDEX entries.
SHT_RELR = 19
LOOS = 0x60000000
HIOS = 0x6fffffff
# LOPROC = 0x70000000
# HIPROC = 0x7fffffff
LOUSER = 0x80000000
HIUSER = 0xffffffff
ANDROID_REL = 0x60000001
ANDROID_RELA = 0x60000002
GNU_ATTRIBUTES = 0x6ffffff5
GNU_HASH = 0x6ffffff6
GNU_verdef = 0x6ffffffd
GNU_verneed = 0x6ffffffe
GNU_versym = 0x6fffffff
# ARM_EXIDX = 0x70000001
# ARM_PREEMPTMAP = 0x70000002
# ARM_ATTRIBUTES = 0x70000003
# ARM_DEBUGOVERLAY = 0x70000004
# ARM_OVERLAYSECTION = 0x70000005
# HEX_ORDERED = 0x70000000
# MIPS_REGINFO = 0x70000006
# MIPS_OPTIONS = 0x7000000d
# MIPS_DWARF = 0x7000001e
# MIPS_ABIFLAGS = 0x7000002a
LLVM_ODRTAB = 0x6fff4c00
LLVM_LINKER_OPTIONS = 0x6fff4c01
LLVM_ADDRSIG = 0x6fff4c03
LLVM_DEPENDENT_LIBRARIES = 0x6fff4c04
LLVM_SYMPART = 0x6fff4c05
LLVM_PART_EHDR = 0x6fff4c06
LLVM_PART_PHDR = 0x6fff4c07
LLVM_BB_ADDR_MAP_V0 = 0x6fff4c08
LLVM_CALL_GRAPH_PROFILE = 0x6fff4c09
LLVM_BB_ADDR_MAP = 0x6fff4c0a
LLVM_OFFLOADING = 0x6fff4c0b
LLVM_LTO = 0x6fff4c0c
ANDROID_RELR = 0x6fffff00
# AARCH64_AUTH_RELR = 0x70000004
# AARCH64_MEMTAG_GLOBALS_STATIC = 0x70000007
# AARCH64_MEMTAG_GLOBALS_DYNAMIC = 0x70000008
# X86_64_UNWIND = 0x70000001
# MSP430_ATTRIBUTES = 0x70000003
# RISCV_ATTRIBUTES = 0x70000003
# CSKY_ATTRIBUTES = 0x70000001
# HEXAGON_ATTRIBUTES = 0x70000003
def __str__(self):
return 'SHT_' + self.name
# We might parse ELF with SHT_XXXX defines we don't have above. We need to
# support displaying these unknown values;
@classmethod
def _missing_(cls, value):
if isinstance(value, int):
return cls.create_pseudo_member_(value)
return None # will raise the ValueError in Enum.__new__
@classmethod
def create_pseudo_member_(cls, value):
pseudo_member = cls._value2member_map_.get(value, None)
if pseudo_member is None:
new_member = int.__new__(cls, value)
new_member._name_ = '%4.4x' % value
new_member._value_ = value
pseudo_member = cls._value2member_map_.setdefault(value, new_member)
return pseudo_member
# Special Section Indexes
SHN_UNDEF = 0
SHN_LORESERVE = 0xff00
SHN_LOPROC = 0xff00
SHN_HIPROC = 0xff1f
SHN_ABS = 0xfff1
SHN_COMMON = 0xfff2
SHN_HIRESERVE = 0xffff
SHN_XINDEX = 0xffff
# The size (in bytes) of symbol table entries.
SYMENTRY_SIZE32 = 16 # 32-bit symbol entry size
SYMENTRY_SIZE64 = 24 # 64-bit symbol entry size.
# Symbol bindings.
class STB(IntEnum):
LOCAL = 0 # Local symbol, not visible outside obj file containing def
GLOBAL = 1 # Global symbol, visible to all object files being combined
WEAK = 2 # Weak symbol, like global but lower-precedence
GNU_UNIQUE = 10
LOOS = 10 # Lowest operating system-specific binding type
HIOS = 12 # Highest operating system-specific binding type
LOPROC = 13 # Lowest processor-specific binding type
HIPROC = 15 # Highest processor-specific binding type
@classmethod
def max_width(cls):
return 14
def __str__(self):
return 'STB_' + self.name
# Symbol types.
class STT(IntEnum):
NOTYPE = 0 # Symbol's type is not specified
OBJECT = 1 # Symbol is a data object (variable, array, etc.)
FUNC = 2 # Symbol is executable code (function, etc.)
SECTION = 3 # Symbol refers to a section
FILE = 4 # Local, absolute symbol that refers to a file
COMMON = 5 # An uninitialized common block
TLS = 6 # Thread local data object
GNU_IFUNC = 10 # GNU indirect function
LOOS = 10 # Lowest operating system-specific symbol type
HIOS = 12 # Highest operating system-specific symbol type
LOPROC = 13 # Lowest processor-specific symbol type
HIPROC = 15 # Highest processor-specific symbol type
@classmethod
def max_width(cls):
return 13
def __str__(self):
return 'STT_' + self.name
class STV(IntEnum):
DEFAULT = 0 # Visibility is specified by binding type
INTERNAL = 1 # Defined by processor supplements
HIDDEN = 2 # Not visible to other components
PROTECTED = 3 # Visible in other components but not preemptable
def __str__(self):
return 'STV_' + self.name
# Symbol number.
STN_UNDEF = 0
class PT(IntEnum):
NULL = 0 # Unused segment.
LOAD = 1 # Loadable segment.
DYNAMIC = 2 # Dynamic linking information.
INTERP = 3 # Interpreter pathname.
NOTE = 4 # Auxiliary information.
SHLIB = 5 # Reserved.
PHDR = 6 # The program header table itself.
TLS = 7 # The thread-local storage template.
LOOS = 0x60000000 # Lowest operating system-specific pt entry type.
HIOS = 0x6fffffff # Highest operating system-specific pt entry type.
LOPROC = 0x70000000 # Lowest processor-specific program hdr entry type.
HIPROC = 0x7fffffff # Highest processor-specific program hdr entry type.
GNU_EH_FRAME = 0x6474e550
GNU_PROPERTY = 0x6474e553
GNU_STACK = 0x6474e551
GNU_RELRO = 0x6474e552
ARM_UNWIND = 0x70000001
@classmethod
def from_object(cls, value):
# construct the PT enum value from the given object. The object can be
# an int or a string. If it's a string, it can optionally start with
# 'PT_'.
if isinstance(value, int):
return cls(value)
elif isinstance(value, str):
if value.startswith('PT_'):
value = value[3:]
return cls[value]
elif isinstance(value, cls):
return value
raise ValueError('Invalid value type: %s. Must be int, str, or %s instance.' % (type(value), cls.__name__))
@classmethod
def max_width(cls):
return 15
def __str__(self):
return 'PT_' + self.name
@classmethod
def _missing_(cls, value):
if isinstance(value, int):
return cls.create_pseudo_member_(value)
return None # will raise the ValueError in Enum.__new__
@classmethod
def create_pseudo_member_(cls, value):
pseudo_member = cls._value2member_map_.get(value, None)
if pseudo_member is None:
new_member = int.__new__(cls, value)
new_member._name_ = '_unknown_%4.4x' % value
new_member._value_ = value
pseudo_member = cls._value2member_map_.setdefault(value, new_member)
return pseudo_member
class PF(IntEnum):
X = 1 # Execute
W = 2 # Write
R = 4 # Read
def __str__(self):
return 'PF_' + self.name
# Note types for "LINUX" or "CORE" notes
class NT_LINUX(IntEnum):
PRSTATUS = 1
PRFPREG = 2
PRPSINFO = 3
TASKSTRUCT = 4
AUXV = 6
SIGINFO = 0x53494749
FILE = 0x46494c45
PRXFPREG = 0x46e62b7f
PPC_VMX = 0x100
PPC_SPE = 0x101
PPC_VSX = 0x102
PPC_TAR = 0x103
PPC_PPR = 0x104
PPC_DSCR = 0x105
PPC_EBB = 0x106
PPC_PMU = 0x107
PPC_TM_CGPR = 0x108
PPC_TM_CFPR = 0x109
PPC_TM_CVMX = 0x10a
PPC_TM_CVSX = 0x10b
PPC_TM_SPR = 0x10c
PPC_TM_CTAR = 0x10d
PPC_TM_CPPR = 0x10e
PPC_TM_CDSCR = 0x10f
PPC_PKEY = 0x110
PPC_DEXCR = 0x111
PPC_HASHKEYR = 0x112
_386_TLS = 0x200
_386_IOPERM = 0x201
X86_XSTATE = 0x202
CET_STATE = 0x203 # Old binutils treats 0x203 as a CET state
X86_SHSTK = 0x204
X86_XSAVE_LAYOUT = 0x205
S390_HIGH_GPRS = 0x300
S390_TIMER = 0x301
S390_TODCMP = 0x302
S390_TODPREG = 0x303
S390_CTRS = 0x304
S390_PREFIX = 0x305
S390_LAST_BREAK = 0x306
S390_SYSTEM_CALL = 0x307
S390_TDB = 0x308
S390_VXRS_LOW = 0x309
S390_VXRS_HIGH = 0x30a
S390_GS_CB = 0x30b
S390_GS_BC = 0x30c
S390_RI_CB = 0x30d
S390_PV_CPU_DATA = 0x30e
ARM_VFP = 0x400
ARM_TLS = 0x401
ARM_HW_BREAK = 0x402
ARM_HW_WATCH = 0x403
ARM_SYSTEM_CALL = 0x404
ARM_SVE = 0x405
ARM_PAC_MASK = 0x406
ARM_PACA_KEYS = 0x407
ARM_PACG_KEYS = 0x408
ARM_TAGGED_ADDR_CTRL = 0x409
ARM_PAC_ENABLED_KEYS = 0x40a
ARM_SSVE = 0x40b
ARM_ZA = 0x40c
ARM_ZT = 0x40d
ARM_FPMR = 0x40e
ARM_POE = 0x40f
ARM_GCS = 0x410
METAG_CBUF = 0x500
METAG_RPIPE = 0x501
METAG_TLS = 0x502
ARC_V2 = 0x600
VMCOREDD = 0x700
MIPS_DSP = 0x800
MIPS_FP_MODE = 0x801
MIPS_MSA = 0x802
RISCV_CSR = 0x900
RISCV_VECTOR = 0x901
RISCV_TAGGED_ADDR_CTRL = 0x902
RISCV_USER_CFI = 0x903
LOONGARCH_CPUCFG = 0xa00
LOONGARCH_CSR = 0xa01
LOONGARCH_LSX = 0xa02
LOONGARCH_LASX = 0xa03
LOONGARCH_LBT = 0xa04
LOONGARCH_HW_BREAK = 0xa05
LOONGARCH_HW_WATCH = 0xa06
@classmethod
def from_object(cls, value):
# construct the NT enum value from the given object. The object can be
# an int or a string. If it's a string, it can optionally start with
# 'NT_'.
if isinstance(value, int):
return cls(value)
elif isinstance(value, str):
if value.startswith('NT_'):
value = value[3:]
return cls[value]
elif isinstance(value, cls):
return value
raise ValueError('Invalid value type: %s. Must be int, str, or %s instance.' % (type(value), cls.__name__))
def __str__(self):
if self.name.startswith('_'):
return 'NT' + self.name
else:
return 'NT_' + self.name
# We might parse DWARF with user defined attributes. We need to support
# displaying these unknown attributes.
@classmethod
def _missing_(cls, value):
if isinstance(value, int):
return cls.create_pseudo_member_(value)
return None # will raise the ValueError in Enum.__new__
@classmethod
def create_pseudo_member_(cls, value):
pseudo_member = cls._value2member_map_.get(value, None)
if pseudo_member is None:
new_member = int.__new__(cls, value)
new_member._name_ = '%#8.8x' % value
new_member._value_ = value
pseudo_member = cls._value2member_map_.setdefault(value, new_member)
return pseudo_member
# Note types for "GNU" notes
class NT_GNU(IntEnum):
ABI_TAG = 1
HWCAP = 2
BUILD_ID = 3
GOLD_VERSION = 4
PROPERTY_TYPE_0 = 5
@classmethod
def from_object(cls, value):
# construct the NT enum value from the given object. The object can be
# an int or a string. If it's a string, it can optionally start with
# 'NT_GNU_'.
if isinstance(value, int):
return cls(value)
elif isinstance(value, str):
if value.startswith('NT_GNU_'):
value = value[3:]
return cls[value]
elif isinstance(value, cls):
return value
raise ValueError('Invalid value type: %s. Must be int, str, or %s instance.' % (type(value), cls.__name__))
def __str__(self):
return 'NT_GNU_' + self.name
# We might parse DWARF with user defined attributes. We need to support
# displaying these unknown attributes.
@classmethod
def _missing_(cls, value):
if isinstance(value, int):
return cls.create_pseudo_member_(value)
return None # will raise the ValueError in Enum.__new__
@classmethod
def create_pseudo_member_(cls, value):
pseudo_member = cls._value2member_map_.get(value, None)
if pseudo_member is None:
new_member = int.__new__(cls, value)
new_member._name_ = '%#8.8x' % value
new_member._value_ = value
pseudo_member = cls._value2member_map_.setdefault(value, new_member)
return pseudo_member
# NT_AUXV defines
class AT(IntEnum):
NULL = 0 # End of auxv.
IGNORE = 1 # Ignore entry.
EXECFD = 2 # File descriptor of program.
PHDR = 3 # Program headers.
PHENT = 4 # Size of program header.
PHNUM = 5 # Number of program headers.
PAGESZ = 6 # Page size.
BASE = 7 # Interpreter base address.
FLAGS = 8 # Flags.
ENTRY = 9 # Program entry point.
NOTELF = 10 # Set if program is not an ELF.
UID = 11 # UID.
EUID = 12 # Effective UID.
GID = 13 # GID.
EGID = 14 # Effective GID.
CLKTCK = 17 # Clock frequency (e.g. times(2)).
PLATFORM = 15 # String identifying platform.
HWCAP = 16 # Machine dependent hints about processor capabilities.
FPUCW = 18 # Used FPU control word.
DCACHEBSIZE = 19 # Data cache block size.
ICACHEBSIZE = 20 # Instruction cache block size.
UCACHEBSIZE = 21 # Unified cache block size.
IGNOREPPC = 22 # Entry should be ignored.
SECURE = 23 # Boolean, was exec setuid-like?
BASE_PLATFORM = 24 # String identifying real platforms.
RANDOM = 25 # Address of 16 random bytes.
HWCAP2 = 26 # Extension of AT_HWCAP.
RSEQ_FEATURE_SIZE = 27 # rseq supported feature size.
RSEQ_ALIGN = 28 # rseq allocation alignment.
HWCAP3 = 29 # extension of AT_HWCAP.
HWCAP4 = 30 # extension of AT_HWCAP.
EXECFN = 31 # Filename of executable.
SYSINFO = 32 # Pointer to the global system page used for sys calls
SYSINFO_EHDR = 33
L1I_CACHESHAPE = 34 # Shapes of the caches.
L1D_CACHESHAPE = 35
L2_CACHESHAPE = 36
L3_CACHESHAPE = 37
MINSIGSTKSZ = 51
__MAX_WIDTH = 0
def __str__(self):
return 'AT_' + self.name
@classmethod
def from_object(cls, value):
# construct the AT enum value from the given object. The object can be
# an int or a string. If it's a string, it can optionally start with
# 'AT_'.
if isinstance(value, int):
return cls(value)
elif isinstance(value, str):
if value.startswith('AT_'):
value = value[3:]
return cls[value]
elif isinstance(value, cls):
return value
raise ValueError('Invalid value type: %s. Must be int, str, or %s instance.' % (type(value), cls.__name__))
@classmethod
def max_width(cls):
if cls.__MAX_WIDTH == 0:
cls.__MAX_WIDTH = max(len(m.name) for m in cls) + 3
return cls.__MAX_WIDTH
# We might parse DWARF with user defined attributes. We need to support
# displaying these unknown attributes.
@classmethod
def _missing_(cls, value):
if isinstance(value, int):
return cls.create_pseudo_member_(value)
return None # will raise the ValueError in Enum.__new__
@classmethod
def create_pseudo_member_(cls, value):
pseudo_member = cls._value2member_map_.get(value, None)
if pseudo_member is None:
new_member = int.__new__(cls, value)
new_member._name_ = '%#8.8x' % value
new_member._value_ = value
pseudo_member = cls._value2member_map_.setdefault(value, new_member)
return pseudo_member
class SHF(IntFlag):
WRITE = 0x1 # Section data should be writable during execution.
ALLOC = 0x2 # Section occupies memory during program execution.
EXECINSTR = 0x4 # Section contains executable machine instructions.
MERGE = 0x10 # The data in this section may be merged.
STRINGS = 0x20 # The data in this section is null-terminated strings.
INFO_LINK = 0x40 # A field in this section holds a section header index.
LINK_ORDER = 0x80 # Adds special ordering requirements for link editors.
OS_NONCONFORMING = 0x100 # This section requires special OS-specific
# processing to avoid incorrect behavior.
GROUP = 0x200 # This section is a member of a section group.
TLS = 0x400 # This section holds Thread-Local Storage.
COMPRESSED = 0x800 # Identifies a section containing compressed data.
GNU_RETAIN = 0x200000
def __repr__(self):
if self.value == 0:
return ''
return '|'.join(
'SHF_' + m.name
for m in self.__class__
if m.value & self.value
)
__str__ = __repr__
SHF_MASKOS = 0x0ff00000
SHF_MASKPROC = 0xf0000000
SHF_MASK = 0x000fffff
# ELF Compression Types (CompressedHeader.ch_type)
ELFCOMPRESS_ZLIB = 1
def offsetToAlign(align, value):
delta = value % align
if delta == 0:
return 0
return align - delta
def sizeof_fmt(num):
for unit in ['B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
if abs(num) < 1024.0:
return "%3.1f%s" % (num, unit)
num /= 1024.0
return "%.1f%s" % (num, 'Y')
def get_percentage(part, total):
return (float(part) / float(total)) * 100.0
def find_diff_offset(b1: bytes, b2: bytes) -> int | None:
"""Finds the offset of the first differing byte."""
for i, (byte1, byte2) in enumerate(zip(b1, b2)):
if byte1 != byte2:
return i
# If one is a prefix of the other, the diff is at the end of the shorter one
if len(b1) != len(b2):
return min(len(b1), len(b2))
return None
def object_to_bytes(obj) -> bytes:
'''
Convert an object to bytes. The object can be a bytes object which requires
no conversion, a string (which is interpreted as hex), or a list of strings
(which are concatenated and interpreted as hex).
'''
if isinstance(obj, bytes):
return obj
if isinstance(obj, str):
try:
return bytes.fromhex(obj)
except ValueError:
return obj.encode('utf-8')
elif isinstance(obj, list):
return b"".join([bytes.fromhex(x) for x in obj])
raise ValueError('Unsupported object type: %s' % type(obj))
class FunctionInfo:
'''A class that accumulates function info from a variety of sources'''
def __init__(self):
self.addr_to_entry = {}
def add_arm_thumb(self, addr, size=None, name=None, source=None):
'''If an address is encoded where bit zero means thumb, then we
can deduce the "isa" from bit zero'''
isa = None
if addr & 1:
isa = 'thumb'
else:
isa = 'arm'
addr = addr & 0xfffffffe
if name and name.startswith('$'):
isa_char = name[1]
if isa_char == 'a':
isa = 'arm'
elif isa_char == 't':
isa = 'thumb'
elif isa_char == 'd':
# Ignore data
return
else:
raise ValueError('unexpected $ char %s in %s' % (isa_char,
name))
self.add(addr, size=size, name=name, source=source, isa=isa)
def add(self, addr, size=0, name=None, source=None, isa=None):
if addr in self.addr_to_entry:
e = self.addr_to_entry[addr]
# Some sources might not have thumb bit set (ARM unwind)
# so always add it if we find it
if isa:
if e.isa is None:
e.isa = isa
elif e.isa != isa:
print('warning: isa mismatch for addr=%#x: %s (%s) != %s '
'(%s) for symbol %s (%s) keeping original isa %s' % (
addr, e.isa, e.sources[-1], isa, source,
str(e.names), name, e.isa))
if size:
e_size = e.range.size()
if e_size == 0:
e.range.set_size(size)
elif e_size != size:
# if it is from the same source, then trust the smaller
# size. We have seen EH frame broken up into two
# overlapping ranges in libart.so...
if source in e.sources:
if size < e_size:
e.range.set_size(size)
else:
print('warning: size mismatch for addr=%#x: %u (%s) !='
' %u (%s) for symbol %s keeping original size %u'
% (addr, e_size, e.sources[-1], size, source,
str(e.names), e_size))
if name is not None:
if name not in e.names:
e.names.append(name)
if source is not None and source not in e.sources:
e.sources.append(source)
else:
self.addr_to_entry[addr] = FunctionInfo.Entry(addr,
size=size,
name=name,
source=source,
isa=isa)
def dump(self, verbose, f=sys.stdout):
sorted_addrs = self.addr_to_entry.keys()
sorted_addrs.sort()
sorted_entries = []
for addr in sorted_addrs:
entry = self.addr_to_entry[addr]
sorted_entries.append(entry)
prev_entry = None
for entry in sorted_entries:
if prev_entry:
if prev_entry.range.intersects(entry.range):
if verbose:
f.write('warning: overlapping entries, first range '
'will be truncated to match second entry\'s '
'address:\n')
prev_entry.dump(f=f)
entry.dump(f=f)
prev_entry.range.hi = entry.range.lo
prev_entry = entry
for entry in sorted_entries:
entry.dump(f=f)
class Entry:
def __init__(self, addr, size=0, name=None, source=None, isa=None):
self.range = AddressRange(addr, addr + size)
self.names = []
if name:
self.names.append(name)
self.sources = [source]
self.isa = isa
def dump(self, f=sys.stdout):