17
17
18
18
from __future__ import annotations
19
19
20
+ import hashlib
20
21
from collections import defaultdict
21
22
from collections .abc import Generator
22
23
from functools import lru_cache
23
24
from typing import TYPE_CHECKING
25
+ from pathlib import Path
24
26
25
27
# third-party imports
26
28
from bindiff import BindiffFile # type: ignore[import-untyped]
@@ -140,6 +142,43 @@ def compute_instruction_match(
140
142
yield from zip (primary_instr [k ], secondary_instr [k ])
141
143
142
144
145
+ def _compute_file_info (program : Program ) -> dict :
146
+ """
147
+ Compute a BinExport file information required for filling
148
+ .Bindiff database.
149
+
150
+ :param program: Binexport program
151
+ :return: dict of data
152
+ """
153
+ exec_path = Path (program .exec_path )
154
+ exp_path = Path (program .export_path )
155
+ hash = hashlib .sha256 (exec_path .read_bytes () if exec_path .exists () else exp_path .read_bytes ()).hexdigest ()
156
+
157
+ funs = {True : 0 , False : 0 }
158
+ bbs = {True : 0 , False : 0 }
159
+ edges = {True : 0 , False : 0 }
160
+ insts = {True : 0 , False : 0 }
161
+ for fun in program :
162
+ islib = fun .is_library ()
163
+ funs [islib ] += 1
164
+ bbs [islib ] += len (fun .flowgraph .nodes )
165
+ edges [islib ] += len (fun .flowgraph .edges )
166
+ insts [islib ] += sum (len (bb .instructions ) for bb in fun )
167
+
168
+ return {"export_name" : program .export_path ,
169
+ "hash" : hash ,
170
+ "executable_name" : program .exec_path ,
171
+ "functions" : funs [False ],
172
+ "libfunctions" : funs [True ],
173
+ "calls" : len (program .callgraph .edges ),
174
+ "basicblocks" : bbs [False ],
175
+ "libbasicblocks" : bbs [True ],
176
+ "edges" : edges [False ],
177
+ "libedges" : edges [True ],
178
+ "instructions" : insts [False ],
179
+ "libinstructions" : insts [True ]}
180
+
181
+
143
182
def export_to_bindiff (
144
183
filename : str , primary : Program , secondary : Program , mapping : Mapping
145
184
) -> None :
@@ -153,58 +192,50 @@ def export_to_bindiff(
153
192
"""
154
193
from qbindiff import __version__ # import the version here to avoid circular definition
155
194
156
- def count_items (program : Program ) -> tuple [int , int , int , int ]:
157
- fp , flib , bbs , inst = 0 , 0 , 0 , 0
158
- for f_addr , f in program .items ():
159
- fp += int (not (f .is_import ()))
160
- flib += int (f .is_import ())
161
- bbs += len (f )
162
- inst += sum (len (x ) for x in f )
163
- return fp , flib , bbs , inst
164
-
165
195
binfile = BindiffFile .create (
166
196
filename ,
167
- primary .export_path ,
168
- secondary .export_path ,
169
197
f"Qbindiff { __version__ } " ,
170
198
"" ,
171
199
mapping .normalized_similarity ,
172
200
0.0 ,
173
201
)
174
202
203
+ # Add the two files
204
+ infos_primary = _compute_file_info (primary )
205
+ binfile .add_file_matched (** infos_primary )
206
+
207
+ infos_secondary = _compute_file_info (secondary )
208
+ binfile .add_file_matched (** infos_secondary )
209
+
175
210
for m in mapping : # iterate all the matchs
176
211
with m .primary , m .secondary : # Do not unload basic blocks
177
212
# Add the function match
178
213
faddr1 , faddr2 = m .primary .addr , m .secondary .addr
179
214
215
+ # Add the function match here to provide the same_bb_count
216
+ funentry_id = binfile .add_function_match (
217
+ faddr1 ,
218
+ faddr2 ,
219
+ m .primary .name ,
220
+ m .secondary .name ,
221
+ float (m .similarity ),
222
+ float (m .confidence ),
223
+ 0 ,
224
+ )
225
+
180
226
# Compute the basic block match (bindiff style) and add it in database
181
227
same_bb_count = 0
182
228
bb_matches = compute_basic_block_match (m .primary , m .secondary )
183
229
for addr1 , addr2 in bb_matches :
184
230
bb1 , bb2 = m .primary [addr1 ], m .secondary [addr2 ]
185
231
same_bb_count += 1
186
- entry_id = binfile .add_basic_block_match (faddr1 , faddr2 , addr1 , addr2 )
232
+ bbentry_id = binfile .add_basic_block_match (funentry_id , addr1 , addr2 )
187
233
188
234
# Compute the instruction match (bindiff style) and add it in database
189
235
for instr_addr1 , instr_addr2 in compute_instruction_match (bb1 , bb2 ):
190
- binfile .add_instruction_match (entry_id , instr_addr1 , instr_addr2 )
191
-
192
- # Add the function match here to provide the same_bb_count
193
- binfile .add_function_match (
194
- faddr1 ,
195
- faddr2 ,
196
- m .primary .name ,
197
- m .secondary .name ,
198
- float (m .similarity ),
199
- float (m .confidence ),
200
- same_bb_count ,
201
- )
236
+ binfile .add_instruction_match (bbentry_id , instr_addr1 , instr_addr2 )
202
237
203
- # Update file infos about primary
204
- f , lib , bbs , insts = count_items (primary )
205
- binfile .update_file_infos (1 , f , lib , bbs , insts )
206
- # Update file infos about secondary
207
- f , lib , bbs , insts = count_items (secondary )
208
- binfile .update_file_infos (2 , f , lib , bbs , insts )
238
+ # Update a-posteriori identical basic blocks count
239
+ binfile .update_samebb_function_match (funentry_id , same_bb_count )
209
240
210
- # binfile.commit()
241
+ binfile .commit ()
0 commit comments