Skip to content

Commit 9398242

Browse files
committed
update .BinDiff file creation to comply with new API
1 parent 66af6ee commit 9398242

File tree

1 file changed

+62
-31
lines changed

1 file changed

+62
-31
lines changed

src/qbindiff/mapping/bindiff.py

Lines changed: 62 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@
1717

1818
from __future__ import annotations
1919

20+
import hashlib
2021
from collections import defaultdict
2122
from collections.abc import Generator
2223
from functools import lru_cache
2324
from typing import TYPE_CHECKING
25+
from pathlib import Path
2426

2527
# third-party imports
2628
from bindiff import BindiffFile # type: ignore[import-untyped]
@@ -140,6 +142,43 @@ def compute_instruction_match(
140142
yield from zip(primary_instr[k], secondary_instr[k])
141143

142144

145+
def _compute_file_info(program: Program) -> dict:
146+
"""
147+
Compute a BinExport file information required for filling
148+
.Bindiff database.
149+
150+
:param program: Binexport program
151+
:return: dict of data
152+
"""
153+
exec_path = Path(program.exec_path)
154+
exp_path = Path(program.export_path)
155+
hash = hashlib.sha256(exec_path.read_bytes() if exec_path.exists() else exp_path.read_bytes()).hexdigest()
156+
157+
funs = {True: 0, False: 0}
158+
bbs = {True: 0, False: 0}
159+
edges = {True: 0, False: 0}
160+
insts = {True: 0, False: 0}
161+
for fun in program:
162+
islib = fun.is_library()
163+
funs[islib] += 1
164+
bbs[islib] += len(fun.flowgraph.nodes)
165+
edges[islib] += len(fun.flowgraph.edges)
166+
insts[islib] += sum(len(bb.instructions) for bb in fun)
167+
168+
return {"export_name": program.export_path,
169+
"hash": hash,
170+
"executable_name": program.exec_path,
171+
"functions": funs[False],
172+
"libfunctions": funs[True],
173+
"calls": len(program.callgraph.edges),
174+
"basicblocks": bbs[False],
175+
"libbasicblocks": bbs[True],
176+
"edges": edges[False],
177+
"libedges": edges[True],
178+
"instructions": insts[False],
179+
"libinstructions": insts[True]}
180+
181+
143182
def export_to_bindiff(
144183
filename: str, primary: Program, secondary: Program, mapping: Mapping
145184
) -> None:
@@ -153,58 +192,50 @@ def export_to_bindiff(
153192
"""
154193
from qbindiff import __version__ # import the version here to avoid circular definition
155194

156-
def count_items(program: Program) -> tuple[int, int, int, int]:
157-
fp, flib, bbs, inst = 0, 0, 0, 0
158-
for f_addr, f in program.items():
159-
fp += int(not (f.is_import()))
160-
flib += int(f.is_import())
161-
bbs += len(f)
162-
inst += sum(len(x) for x in f)
163-
return fp, flib, bbs, inst
164-
165195
binfile = BindiffFile.create(
166196
filename,
167-
primary.export_path,
168-
secondary.export_path,
169197
f"Qbindiff {__version__}",
170198
"",
171199
mapping.normalized_similarity,
172200
0.0,
173201
)
174202

203+
# Add the two files
204+
infos_primary = _compute_file_info(primary)
205+
binfile.add_file_matched(**infos_primary)
206+
207+
infos_secondary = _compute_file_info(secondary)
208+
binfile.add_file_matched(**infos_secondary)
209+
175210
for m in mapping: # iterate all the matchs
176211
with m.primary, m.secondary: # Do not unload basic blocks
177212
# Add the function match
178213
faddr1, faddr2 = m.primary.addr, m.secondary.addr
179214

215+
# Add the function match here to provide the same_bb_count
216+
funentry_id = binfile.add_function_match(
217+
faddr1,
218+
faddr2,
219+
m.primary.name,
220+
m.secondary.name,
221+
float(m.similarity),
222+
float(m.confidence),
223+
0,
224+
)
225+
180226
# Compute the basic block match (bindiff style) and add it in database
181227
same_bb_count = 0
182228
bb_matches = compute_basic_block_match(m.primary, m.secondary)
183229
for addr1, addr2 in bb_matches:
184230
bb1, bb2 = m.primary[addr1], m.secondary[addr2]
185231
same_bb_count += 1
186-
entry_id = binfile.add_basic_block_match(faddr1, faddr2, addr1, addr2)
232+
bbentry_id = binfile.add_basic_block_match(funentry_id, addr1, addr2)
187233

188234
# Compute the instruction match (bindiff style) and add it in database
189235
for instr_addr1, instr_addr2 in compute_instruction_match(bb1, bb2):
190-
binfile.add_instruction_match(entry_id, instr_addr1, instr_addr2)
191-
192-
# Add the function match here to provide the same_bb_count
193-
binfile.add_function_match(
194-
faddr1,
195-
faddr2,
196-
m.primary.name,
197-
m.secondary.name,
198-
float(m.similarity),
199-
float(m.confidence),
200-
same_bb_count,
201-
)
236+
binfile.add_instruction_match(bbentry_id, instr_addr1, instr_addr2)
202237

203-
# Update file infos about primary
204-
f, lib, bbs, insts = count_items(primary)
205-
binfile.update_file_infos(1, f, lib, bbs, insts)
206-
# Update file infos about secondary
207-
f, lib, bbs, insts = count_items(secondary)
208-
binfile.update_file_infos(2, f, lib, bbs, insts)
238+
# Update a-posteriori identical basic blocks count
239+
binfile.update_samebb_function_match(funentry_id, same_bb_count)
209240

210-
# binfile.commit()
241+
binfile.commit()

0 commit comments

Comments
 (0)