-
Notifications
You must be signed in to change notification settings - Fork 58
/
Copy pathformula_manager.ml
1439 lines (1341 loc) · 50.4 KB
/
formula_manager.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
(*
Copyright (C) BitBlaze, 2009-2013, and copyright (C) 2010 Ensighta
Security Inc. All rights reserved.
*)
module V = Vine
module QE = Query_engine
open Exec_domain
open Exec_exceptions
open Exec_options
open Frag_simplify
open Frag_marshal
let reg_addr () = match !opt_arch with
| (X86|ARM) -> V.REG_32
| X64 -> V.REG_64
module VarWeak = Weak.Make(VarByInt)
(* Unlike Vine_util.list_unique, this preserves order (keeping the
first occurrence) which is important because the temps have to retain
a topological ordering. *)
let list_unique l =
let h = Hashtbl.create 10 in
let rec loop = function
| [] -> []
| e :: el ->
if Hashtbl.mem h e then
loop el
else
(Hashtbl.replace h e ();
e :: (loop el))
in
(loop l)
let list_where elt l =
let rec loop pos l =
match l with
| (a :: r) when a = elt -> pos
| (a :: r) -> loop (pos + 1) r
| [] -> failwith "Not present in list_where"
in
loop 0 l
let cf_eval e =
match Vine_opt.constant_fold (fun _ -> None) e with
| V.Constant(V.Int(_, _)) as c -> c
| e ->
Printf.printf "Left with %s\n" (V.exp_to_string e);
failwith "cf_eval failed in eval_expr"
let conjoin l =
match l with
| [] -> V.exp_true
| e :: el -> List.fold_left (fun a b -> V.BinOp(V.BITAND, a, b)) e el
let disjoin l =
match l with
| [] -> V.exp_false
| e :: el -> List.fold_left (fun a b -> V.BinOp(V.BITOR, a, b)) e el
let xorjoin l =
match l with
| [] -> V.exp_false
| e :: el -> List.fold_left (fun a b -> V.BinOp(V.XOR, a, b)) e el
let qe_decl_var =
function
| QE.InputVar(v) -> v
| QE.TempVar(v, e) -> v
| QE.TempArray(v, el) -> v
module FormulaManagerFunctor =
functor (D : Exec_domain.DOMAIN) ->
struct
(* This has to be outside the class because I want it to have
polymorphic type. *)
let if_expr_temp form_man var fn_t else_val else_fn =
let box = ref else_val in
form_man#if_expr_temp_unit var
(fun e ->
match e with
| Some (e) -> box := fn_t e
| None -> (else_fn var)
);
!box
(* This function captures a kind of structure used for a function
that would naturally be written as recursive over the structure of
expressions, but wants to recuse into the definitions of temporary
variables. It's important to memoize such a function over the
temporaries, since otherwise a given temporary could appear
exponentially many times in the traversal for a deep expression. We
also pass the size of the variable when it's reused, since that's
useful information that may not otherwise be visible nearby. It
might also be worth caching between calls to this function, though
that's less critical. As with if_expr_temp this has a polymorphic
type, though in the motivating examples the result of the traversal
was always an int.
*)
let map_expr_temp form_man e f combine =
let cache = V.VarHash.create 101 in
let rec recurse e =
let rec maybe_recurse e var wd =
try
V.VarHash.find cache var
with
Not_found ->
let box = ref None in
form_man#if_expr_temp_unit var
(function
| Some (e') ->
let res = f recurse e' in
V.VarHash.replace cache var res;
box := Some (combine wd res)
| None -> box := Some (f recurse e));
match !box with
| Some res -> res
| None -> failwith "Empty box invariant failure"
in
match e with
| V.Lval(V.Temp((_, _, V.REG_1) as var)) -> maybe_recurse e var 1
| V.Lval(V.Temp((_, _, V.REG_8) as var)) -> maybe_recurse e var 8
| V.Lval(V.Temp((_, _, V.REG_16) as var)) -> maybe_recurse e var 16
| V.Lval(V.Temp((_, _, V.REG_32) as var)) -> maybe_recurse e var 32
| V.Lval(V.Temp((_, _, V.REG_64) as var)) -> maybe_recurse e var 64
| _ -> f recurse e
in
recurse e
let to_symbolic v ty =
match ty with
| V.REG_1 -> D.to_symbolic_1 v
| V.REG_8 -> D.to_symbolic_8 v
| V.REG_16 -> D.to_symbolic_16 v
| V.REG_32 -> D.to_symbolic_32 v
| V.REG_64 -> D.to_symbolic_64 v
| _ -> failwith "Unexpected type in FM.to_symbolic"
class formula_manager = object(self)
val input_vars = Hashtbl.create 30
method private fresh_symbolic_var str ty =
try Hashtbl.find input_vars str with
Not_found ->
Hashtbl.replace input_vars str (V.newvar str ty);
Hashtbl.find input_vars str
method private fresh_symbolic_vexp str ty =
V.Lval(V.Temp(self#fresh_symbolic_var str ty))
method private fresh_symbolic str ty =
let v = D.from_symbolic (self#fresh_symbolic_vexp str ty) in
if !opt_use_tags then
Printf.printf "Symbolic %s is %Ld\n" str (D.get_tag v);
v
method fresh_symbolic_1 s = self#fresh_symbolic s V.REG_1
method fresh_symbolic_8 s = self#fresh_symbolic s V.REG_8
method fresh_symbolic_16 s = self#fresh_symbolic s V.REG_16
method fresh_symbolic_32 s = self#fresh_symbolic s V.REG_32
method fresh_symbolic_64 s = self#fresh_symbolic s V.REG_64
method get_input_vars = Hashtbl.fold (fun s v l -> v :: l) input_vars []
val region_base_vars = Hashtbl.create 30
method fresh_region_base s =
assert(not (Hashtbl.mem region_base_vars s));
let var = self#fresh_symbolic_var s (reg_addr ()) in
Hashtbl.replace region_base_vars s var;
D.from_symbolic (V.Lval(V.Temp(var)))
method known_region_base ((_,s,_):V.var) =
Hashtbl.mem region_base_vars s
val region_vars = Hashtbl.create 30
method declare_symbolic_region str =
if not (Hashtbl.mem region_vars str) then
Hashtbl.replace region_vars str
(V.newvar str (V.TMem(V.REG_32, V.Little)))
method private fresh_symbolic_mem ty str addr =
let v = try Hashtbl.find region_vars str with
Not_found ->
Hashtbl.replace region_vars str
(V.newvar str (V.TMem(V.REG_32, V.Little)));
Hashtbl.find region_vars str
in
D.from_symbolic
(V.Lval(V.Mem(v, V.Constant(V.Int(V.REG_32, addr)), ty)))
method fresh_symbolic_mem_1 = self#fresh_symbolic_mem V.REG_1
method fresh_symbolic_mem_8 = self#fresh_symbolic_mem V.REG_8
method fresh_symbolic_mem_16 = self#fresh_symbolic_mem V.REG_16
method fresh_symbolic_mem_32 = self#fresh_symbolic_mem V.REG_32
method fresh_symbolic_mem_64 = self#fresh_symbolic_mem V.REG_64
method input_dl =
(Hashtbl.fold (fun k v l -> v :: l) input_vars []) @
(Hashtbl.fold (fun k v l -> v :: l) region_vars [])
val seen_concolic = Hashtbl.create 30
val valuation = Hashtbl.create 30
method private make_concolic ty str v =
let var =
(if Hashtbl.mem seen_concolic (str, 0L, ty) then
let var = Hashtbl.find seen_concolic (str, 0L, ty) in
let old_val = Hashtbl.find valuation var in
if v <> old_val then
if !opt_trace_unexpected then
Printf.printf
"Value mismatch: %s was 0x%Lx and then later 0x%Lx\n"
str old_val v;
var
else
(let new_var = self#fresh_symbolic str ty in
Hashtbl.replace seen_concolic (str, 0L, ty) new_var;
new_var))
in
if !opt_trace_taint then
Printf.printf "Valuation %s = 0x%Lx:%s\n"
str v (V.type_to_string ty);
Hashtbl.replace valuation var v;
var
method make_concolic_8 s v = self#make_concolic V.REG_8 s(Int64.of_int v)
method make_concolic_16 s v = self#make_concolic V.REG_16 s(Int64.of_int v)
method make_concolic_32 s v = self#make_concolic V.REG_32 s v
method make_concolic_64 s v = self#make_concolic V.REG_64 s v
method fresh_region_base_concolic s v =
assert(not (Hashtbl.mem region_base_vars s));
let var = self#fresh_symbolic_var s (reg_addr ()) in
Hashtbl.replace region_base_vars s var;
ignore(self#make_concolic_32 s v);
D.from_symbolic (V.Lval(V.Temp(var)))
method make_concolic_mem_8 str addr v_int =
let v = Int64.of_int v_int in
let var =
(if Hashtbl.mem seen_concolic (str, addr, V.REG_8) then
let var = Hashtbl.find seen_concolic (str, addr, V.REG_8) in
let old_val = Hashtbl.find valuation var in
if v <> old_val then
if !opt_trace_unexpected then
Printf.printf
"Value mismatch: %s:0x%Lx was 0x%Lx and then later 0x%Lx\n"
str addr old_val v;
var
else
(let new_var = self#fresh_symbolic_mem V.REG_8 str addr in
Hashtbl.replace seen_concolic (str, addr, V.REG_8) new_var;
new_var))
in
if !opt_trace_taint then
Printf.printf "Byte valuation %s:0x%Lx = 0x%Lx\n"
str addr v;
Hashtbl.replace valuation var v;
(match !input_string_mem_prefix with
| None -> input_string_mem_prefix := Some (str ^ "_byte_")
| _ -> ());
max_input_string_length :=
max !max_input_string_length (1 + Int64.to_int addr);
var
method private mem_var region_str ty addr =
let ty_str = (match ty with
| V.REG_8 -> "byte"
| V.REG_16 -> "short"
| V.REG_32 -> "word"
| V.REG_64 -> "long"
| _ -> failwith "Bad size in mem_var")
in
let name = Printf.sprintf "%s_%s_0x%08Lx" region_str ty_str addr
in
self#fresh_symbolic_var name ty
val mutable mem_byte_vars = V.VarHash.create 30
method private mem_var_byte region_str addr =
let var = self#mem_var region_str V.REG_8 addr in
V.VarHash.replace mem_byte_vars var ();
var
method private mem_axioms_short region_str addr svar =
let bvar0 = self#mem_var_byte region_str addr and
bvar1 = self#mem_var_byte region_str (Int64.add addr 1L) in
[svar, D.to_symbolic_16
(D.assemble16 (D.from_symbolic (V.Lval(V.Temp(bvar0))))
(D.from_symbolic (V.Lval(V.Temp(bvar1)))))]
method private mem_axioms_word region_str addr wvar =
let svar0 = self#mem_var region_str V.REG_16 addr and
svar1 = self#mem_var region_str V.REG_16 (Int64.add addr 2L) in
[wvar, D.to_symbolic_32
(D.assemble32 (D.from_symbolic (V.Lval(V.Temp(svar0))))
(D.from_symbolic (V.Lval(V.Temp(svar1)))))]
@ (self#mem_axioms_short region_str addr svar0)
@ (self#mem_axioms_short region_str (Int64.add addr 2L) svar1)
method private mem_axioms_long region_str addr lvar =
let wvar0 = self#mem_var region_str V.REG_32 addr and
wvar1 = self#mem_var region_str V.REG_32 (Int64.add addr 4L) in
[lvar, D.to_symbolic_64
(D.assemble64 (D.from_symbolic (V.Lval(V.Temp(wvar0))))
(D.from_symbolic (V.Lval(V.Temp(wvar1)))))]
@ (self#mem_axioms_word region_str addr wvar0)
@ (self#mem_axioms_word region_str (Int64.add addr 4L) wvar1)
val mutable mem_axioms = V.VarHash.create 30
method private add_mem_axioms region_str ty addr =
let var = self#mem_var region_str ty addr in
if ty = V.REG_8 then
V.VarHash.replace mem_byte_vars var ()
else
let al = (match ty with
| V.REG_8 -> failwith "Unexpected REG_8"
| V.REG_16 -> self#mem_axioms_short region_str addr var
| V.REG_32 -> self#mem_axioms_word region_str addr var
| V.REG_64 -> self#mem_axioms_long region_str addr var
| _ -> failwith "Unexpected type in add_mem_axioms") in
List.iter
(fun (lhs, rhs) -> V.VarHash.replace mem_axioms lhs rhs)
al;
assert(V.VarHash.mem mem_axioms var);
(* State about "tables" of values, as used for loads from memory
when -table-limit has a positive value. A table is a list of
symbolic expressions that occurred sequentially in memory; its
length is always a power of two. (A common case is for all the
values to be concrete, like a character translation table.) The
way tables are translated for the SMT solver depends on the
-tables-as-arrays option. If it's off, each access to a table is
expanded into a tree of if-then-else choices on the bits of the
index. If -tables-as-arrays is set, a table is translated into an
SMT variable with an array type, and any access can be translated
symbolically. In this latter case, the table is represented by a
Vine variable with Array type. We keep track of all the unique
tables we've seen, and number them sequentially starting from 0.
We also use lists to keep track of information about tables, which
is reasonable because we generally don't create very many.
*)
(* The i-th entry in this list is the Vine variable that
represents the i-th table. It has a name like "table5", and its
type tells you the length of the table (currently always a power
of two) and the element type (REG_8/16/32/64). *)
val mutable table_vars = []
(* The i-th entry in this list gives the contents of the i-th table
as a list of Vine expressions. *)
val mutable tables_by_idx : V.exp list list = []
(* This hash table maps from the contents of a table to its number;
it's how we avoid creating the same table more than once. It's
roughly the inverse mapping to tables_by_idx, though the contents
are stored as domain elements because that is more convenient for
the fragment machine code that creates them. *)
val tables : (D.t list, int) Hashtbl.t = Hashtbl.create 101
val tables_cache_limit = 1000000L
(* The Vine expression memory-access syntax is used for two
slightly different purposes is FuzzBALL symbolic expressions:
representing indexing into tables, and representing accesses to
variable-granularity memory. You can tell you are in the former
case if the memory variable is in table_vars. Variable-granularity
memory is translated by the formula manager into scalar variables
of various granularities which are related to each other by
constraints called "mem_axioms" here. Table indexing is kept
distinct until the solver interface when it is translated into an
SMTLIB "select" array operator or the equivalent for other
solvers. This method only performs the latter rewriting.
The former rewriting should be handled by callers of this method. *)
method private rewrite_mem_to_scalar e =
match e with
| V.Lval(V.Mem((_,region_str,ty1),
V.Constant(V.Int((V.REG_32|V.REG_64), addr)), ty2))
-> (self#add_mem_axioms region_str ty2 addr;
V.Lval(V.Temp(self#mem_var region_str ty2 addr)))
| _ -> failwith ("Bad expression " ^ (V.exp_to_string e) ^
" in rewrite_mem_to_scalar")
method rewrite_for_solver e =
let rec loop e =
match e with
| V.BinOp(op, e1, e2) -> V.BinOp(op, (loop e1), (loop e2))
| V.FBinOp(op, rm, e1, e2) -> V.FBinOp(op, rm, (loop e1), (loop e2))
| V.UnOp(op, e1) -> V.UnOp(op, (loop e1))
| V.FUnOp(op, rm, e1) -> V.FUnOp(op, rm, (loop e1))
| V.Constant(_) -> e
| V.Lval(V.Temp(_)) -> e
| V.Lval(V.Mem(table_var, idx_e, elt_ty))
when List.mem table_var table_vars ->
V.Lval(V.Mem(table_var, (loop idx_e), elt_ty))
| V.Lval(V.Mem(_, _, _)) -> self#rewrite_mem_to_scalar e
| V.Name(_) -> e
| V.Cast(kind, ty, e1) -> V.Cast(kind, ty, (loop e1))
| V.FCast(kind, rm, ty, e1) -> V.FCast(kind, rm, ty, (loop e1))
| V.Unknown(_) -> e
| V.Let(V.Temp(v), e1, e2) ->
V.Let(V.Temp(v), (loop e1), (loop e2))
| V.Let(V.Mem(_,_,_), _, _) ->
failwith "Unexpected memory let in rewrite_for_solver"
| V.Ite(ce, te, fe) ->
V.Ite((loop ce), (loop te), (loop fe))
in
loop e
method get_mem_axioms =
let of_type ty ((n,s,ty'),e) = (ty = ty') in
let l = V.VarHash.fold
(fun lhs rhs l -> (lhs, rhs) :: l) mem_axioms [] in
let shorts = List.filter (of_type V.REG_16) l and
words = List.filter (of_type V.REG_32) l and
longs = List.filter (of_type V.REG_64) l in
shorts @ words @ longs
method get_mem_bytes =
V.VarHash.fold (fun v _ l -> v :: l) mem_byte_vars []
method private reset_mem_axioms =
V.VarHash.clear mem_byte_vars;
V.VarHash.clear mem_axioms
method private with_saved_mem_axioms f =
let old_mbv = mem_byte_vars and
old_max = mem_axioms in
mem_byte_vars <- V.VarHash.create 30;
mem_axioms <- V.VarHash.create 30;
let r = f () in
mem_byte_vars <- old_mbv;
mem_axioms <- old_max;
r
method private eval_var lv =
let d = D.from_symbolic (V.Lval lv) in
match lv with
| V.Mem(mem_var, V.Constant(V.Int(_, addr)), V.REG_8) ->
if not (Hashtbl.mem valuation d) then
Printf.printf "Unexpected symbolic byte %s\n"
(V.lval_to_string lv);
assert(Hashtbl.mem valuation d);
D.from_concrete_8 (Int64.to_int (Hashtbl.find valuation d))
| V.Mem(mem_var, V.Constant(V.Int(_, addr)), V.REG_16) ->
if Hashtbl.mem valuation d then
D.from_concrete_16 (Int64.to_int (Hashtbl.find valuation d))
else
D.assemble16
(self#eval_var
(V.Mem(mem_var, V.Constant(V.Int(V.REG_32, addr)),
V.REG_8)))
(self#eval_var
(V.Mem(mem_var,
V.Constant(V.Int(V.REG_32,
(Int64.add 1L addr))),
V.REG_8)))
| V.Mem(mem_var, V.Constant(V.Int(_, addr)), V.REG_32) ->
if Hashtbl.mem valuation d then
D.from_concrete_32 (Hashtbl.find valuation d)
else
D.assemble32
(self#eval_var
(V.Mem(mem_var, V.Constant(V.Int(V.REG_32, addr)),
V.REG_16)))
(self#eval_var
(V.Mem(mem_var, V.Constant(V.Int(V.REG_32,
(Int64.add 2L addr))),
V.REG_16)))
| V.Mem(mem_var, V.Constant(V.Int(_, addr)), V.REG_64) ->
if Hashtbl.mem valuation d then
D.from_concrete_64 (Hashtbl.find valuation d)
else
D.assemble64
(self#eval_var
(V.Mem(mem_var, V.Constant(V.Int(V.REG_32, addr)),
V.REG_32)))
(self#eval_var
(V.Mem(mem_var, V.Constant(V.Int(V.REG_32,
(Int64.add 4L addr))),
V.REG_32)))
| V.Temp(_, _, V.REG_8) ->
assert(Hashtbl.mem valuation d);
D.from_concrete_8 (Int64.to_int (Hashtbl.find valuation d))
| V.Temp(_, _, V.REG_16) ->
assert(Hashtbl.mem valuation d);
D.from_concrete_16 (Int64.to_int (Hashtbl.find valuation d))
| V.Temp(_, _, V.REG_32) ->
assert(Hashtbl.mem valuation d);
D.from_concrete_32 (Hashtbl.find valuation d)
| V.Temp(_, _, V.REG_64) ->
assert(Hashtbl.mem valuation d);
D.from_concrete_64 (Hashtbl.find valuation d)
| _ -> failwith "unexpected lval expr in eval_var"
(* subexpression cache *)
val subexpr_to_temp_var_info = Hashtbl.create 1001
val temp_var_num_to_subexpr = Hashtbl.create 1001
val mutable temp_var_num = 0
(* This table represents a subset of the subexpressions that are
available for use on this execution path. We retain the values and
numbering if the temporary variables across exexcution paths, but
to make formula simplification deterministic across re-execution
of path prefixes, we avoid using temporaries until their
corresponding sub-expressions are first encountered on the current
path. This hashtable is parallel to subexpr_to_temp_var_info but
only maps an encoded subexpression to unit if it is availble to
use. *)
val subexpr_to_temp_var_enabled = Hashtbl.create 1001
val temp_var_num_evaled = Hashtbl.create 1001
method eval_expr e =
let rec loop e =
match e with
| V.BinOp(op, e1, e2) -> cf_eval (V.BinOp(op, loop e1, loop e2))
| V.FBinOp(op, rm, e1, e2)
-> cf_eval (V.FBinOp(op, rm, loop e1, loop e2))
| V.UnOp(op, e1) -> cf_eval (V.UnOp(op, loop e1))
| V.FUnOp(op, rm, e1) -> cf_eval (V.FUnOp(op, rm, loop e1))
| V.Cast(op, ty, e1) -> cf_eval (V.Cast(op, ty, loop e1))
| V.FCast(op, rm, ty, e1) -> cf_eval (V.FCast(op, rm, ty, loop e1))
| V.Lval(V.Mem(_, _, ty) as lv) ->
let d = self#eval_var lv in
let v = match ty with
| V.REG_8 -> Int64.of_int (D.to_concrete_8 d)
| V.REG_16 -> Int64.of_int (D.to_concrete_16 d)
| V.REG_32 -> D.to_concrete_32 d
| V.REG_64 -> D.to_concrete_64 d
| _ -> failwith "Unexpected type in eval_expr"
in
V.Constant(V.Int(ty, v))
| V.Constant(V.Int(_, _)) -> e
| V.Lval(V.Temp(n,s,t))
when Hashtbl.mem temp_var_num_to_subexpr n ->
(try Hashtbl.find temp_var_num_evaled n
with
| Not_found ->
let (e_enc, _) = Hashtbl.find temp_var_num_to_subexpr n in
let e' = loop (decode_exp e_enc)
in
if !opt_trace_temps then
Printf.printf "%s evaluates to %s\n"
s (V.exp_to_string e');
Hashtbl.replace temp_var_num_evaled n e';
e')
| V.Lval(V.Temp(n,s,ty) as lv) ->
let d = self#eval_var lv in
let v = match ty with
| V.REG_8 -> Int64.of_int (D.to_concrete_8 d)
| V.REG_16 -> Int64.of_int (D.to_concrete_16 d)
| V.REG_32 -> D.to_concrete_32 d
| V.REG_64 -> D.to_concrete_64 d
| _ -> failwith "Unexpected type in eval_expr"
in
V.Constant(V.Int(ty, v))
| V.Ite(ce, te, fe) -> cf_eval (V.Ite(loop ce, loop te, loop fe))
| V.Let(_, _, _)
| V.Unknown(_)
| V.Name(_)
| V.Constant(V.Str(_))
->
Printf.printf "Can't evaluate %s\n" (V.exp_to_string e);
failwith "Unexpected expr in eval_expr"
in
match loop e with
| V.Constant(V.Int(_, i64)) -> i64
| e ->
Printf.printf "Left with %s\n" (V.exp_to_string e);
failwith "Constant invariant failed in eval_expr"
method concolic_eval_1 d =
Int64.to_int (self#eval_expr (D.to_symbolic_1 d))
method concolic_eval_8 d =
Int64.to_int (self#eval_expr (D.to_symbolic_8 d))
method concolic_eval_16 d =
Int64.to_int (self#eval_expr (D.to_symbolic_16 d))
method concolic_eval_32 d =
self#eval_expr (D.to_symbolic_32 d)
method concolic_eval_64 d =
self#eval_expr (D.to_symbolic_64 d)
method private eval_var_from_ce ce lv =
match lv with
| V.Temp(_, s, ty) ->
let v = try Query_engine.ce_lookup_nf ce s
with Not_found ->
0L
(* Printf.printf "Missing var %s in counterexample\n" s;
List.iter (fun (s,v) -> Printf.printf " %s = 0x%Lx\n" s v) ce;
failwith "eval_var_from_ce failed on missing value" *)
in
V.Constant(V.Int(ty, v))
| _ ->
Printf.printf "Bad lvalue: %s\n" (V.lval_to_string lv);
failwith "Unhandled lvalue type in eval_var_from_ce"
method eval_expr_from_ce ce e =
let memo = Hashtbl.create 100 in
let rec loop e =
match e with
| V.BinOp(op, e1, e2) -> cf_eval (V.BinOp(op, loop e1, loop e2))
| V.FBinOp(op, rm, e1, e2)
-> cf_eval (V.FBinOp(op, rm, loop e1, loop e2))
| V.UnOp(op, e1) -> cf_eval (V.UnOp(op, loop e1))
| V.FUnOp(op, rm, e1) -> cf_eval (V.FUnOp(op, rm, loop e1))
| V.Cast(op, ty, e1) -> cf_eval (V.Cast(op, ty, loop e1))
| V.FCast(op, rm, ty, e1) -> cf_eval (V.FCast(op, rm, ty, loop e1))
| V.Constant(V.Int(_, _)) -> e
| V.Lval(V.Temp(n,s,t))
when Hashtbl.mem temp_var_num_to_subexpr n ->
(try Hashtbl.find memo n
with
| Not_found ->
let (e_enc, _) = Hashtbl.find temp_var_num_to_subexpr n in
let e' = loop (decode_exp e_enc)
in
Hashtbl.replace memo n e';
e')
| V.Lval(V.Mem(table_var, idx_e, elt_ty))
when List.mem table_var table_vars ->
let idx = Int64.to_int (loop_to_i64 idx_e) and
table_num = list_where table_var table_vars in
let table = List.nth tables_by_idx table_num in
if idx >= List.length table then
(* Undefined, treat as 0 *)
(* Printf.printf "Out of range index %d in table %d\n"
idx table_num; *)
V.Constant(V.Int(elt_ty, 0L))
else
let elt_e = List.nth table idx in
loop elt_e
| V.Lval(V.Mem(_, _, _)) -> loop (self#rewrite_mem_to_scalar e)
| V.Lval(V.Temp(memvar))
when V.VarHash.mem mem_axioms memvar
->
loop (V.VarHash.find mem_axioms memvar)
| V.Lval(lv) -> self#eval_var_from_ce ce lv
| V.Ite(ce, te, fe) -> cf_eval (V.Ite(loop ce, loop te, loop fe))
| V.Let(_, _, _)
| V.Unknown(_)
| V.Name(_)
| V.Constant(V.Str(_))
->
Printf.printf "Can't evaluate %s\n" (V.exp_to_string e);
failwith "Unexpected expr in eval_expr_from_ce"
and loop_to_i64 e =
match loop e with
| V.Constant(V.Int(_, i64)) -> i64
| e ->
Printf.printf "Left with %s\n" (V.exp_to_string e);
failwith "Constant invariant failed in eval_expr"
in
loop_to_i64 e
val temp_var_num_has_loop_var = Hashtbl.create 1001
method has_loop_var d =
let rec loop e =
match e with
| V.BinOp(op, e1, e2) -> (loop e1) || (loop e2)
| V.FBinOp(op, _, e1, e2) -> (loop e1) || (loop e2)
| V.UnOp(op, e1) -> loop e1
| V.FUnOp(op, _, e1) -> loop e1
| V.Cast(op, ty, e1) -> loop e1
| V.FCast(op, _, ty, e1) -> loop e1
| V.Lval(V.Mem(_, _, _)) -> false
| V.Constant(V.Int(_, _)) -> false
| V.Lval(V.Temp(n,s,t))
when Hashtbl.mem temp_var_num_to_subexpr n ->
(try Hashtbl.find temp_var_num_has_loop_var n
with
| Not_found ->
let (e_enc, _) = Hashtbl.find temp_var_num_to_subexpr n in
let b = loop (decode_exp e_enc)
in
Hashtbl.replace temp_var_num_has_loop_var n b;
b)
| V.Lval(V.Temp(_,s,_)) ->
String.length s >= 3 && String.sub s 0 3 = "LTC"
| V.Ite(ce, te, fe) -> (loop ce) || (loop te) || (loop fe)
| V.Let(_, _, _)
| V.Unknown(_)
| V.Name(_)
| V.Constant(V.Str(_))
->
failwith "Unexpected expr in has_loop_var"
in
loop (D.to_symbolic_32 d)
(* Depending on the program's behavior, the set of temporary
variables representing sub-expressions can grow without bound
during program execution, so along with the decision tree it can
be a limiting factor for memory usage scalability. We've
experimented in the past with using weak references so that
ununsed temporaries can be garbage collected, and regenerated if
the expressions re-appear. But we didn't reach the point of it
working stably, so it's currently disabled. Switch use_weak to
true if you want to experiment with it. *)
val use_weak = false
val temp_vars_weak = VarWeak.create 1001
val temp_vars_unweak = Hashtbl.create 1001
method private lookup_temp_var (temp_num, var_num, ty) =
let var = (var_num, "t" ^ (string_of_int temp_num), ty) in
if use_weak then
VarWeak.find temp_vars_weak var
else
Hashtbl.find temp_vars_unweak var
method private make_temp_var e ty =
let cleanup_temp_var (n, s, t) =
let (e_enc, _) = Hashtbl.find temp_var_num_to_subexpr n in
Hashtbl.remove temp_var_num_to_subexpr n;
Hashtbl.remove subexpr_to_temp_var_info e_enc;
Hashtbl.remove subexpr_to_temp_var_enabled e_enc;
Frag_marshal.free_var (n,s,t)
in
let (e_enc, used_vars) = encode_exp e in
try
Hashtbl.replace subexpr_to_temp_var_enabled e_enc ();
self#lookup_temp_var
(Hashtbl.find subexpr_to_temp_var_info e_enc)
with Not_found ->
let temp_num = temp_var_num in
let s = "t" ^ (string_of_int temp_num) in
temp_var_num <- temp_var_num + 1;
let var = V.newvar s ty in
let (var_num,_,_) = var in
let var_info = (temp_num, var_num, ty) in
Gc.finalise cleanup_temp_var var;
Hashtbl.replace subexpr_to_temp_var_info e_enc var_info;
Hashtbl.replace temp_var_num_to_subexpr var_num
(e_enc, used_vars);
if use_weak then
VarWeak.add temp_vars_weak var
else
Hashtbl.add temp_vars_unweak var var;
if !opt_trace_temps_encoded then
Printf.printf "%s = %s\n" s (encode_printable_exp e);
if !opt_trace_temps then
Printf.printf "%s = %s\n" s (V.exp_to_string e);
var
(* Expand the definitions of all the temporaries that occur directly
in an expression, including occurrences of those temporaries inside
the definitions of other expanded temporaries. For instance if
t3 is defined in terms of t1 and t2, and "e" contains t3 and t2,
both the direct occurrence of t2 in e and the occurrence inside
t3 will be expanded, but t1 will not be expanded. The motivation
for this is to expand just enough to let simplification rules
consistently apply. *)
method private expand_temps_1level e =
let to_expand = V.VarHash.create 21 in
let rec collect e = match e with
| V.BinOp(_, e1, e2) -> collect e1; collect e2
| V.FBinOp(_, rm, e1, e2) -> collect e1; collect e2
| V.UnOp(_, e1) -> collect e1
| V.FUnOp(_, _, e1) -> collect e1
| V.Constant(_) -> ()
| V.Lval(V.Temp(var)) ->
if_expr_temp self var
(fun e' -> V.VarHash.replace to_expand var e')
()
(fun _ -> ())
| V.Lval(V.Mem(_, e1, _)) -> collect e1
| V.Name(_) -> ()
| V.Cast(_, _, e1) -> collect e1
| V.FCast(_, _, _, e1) -> collect e1
| V.Unknown(_) -> ()
| V.Let(_, e1, e2) -> collect e1; collect e2
| V.Ite(ce, te, fe) -> collect ce; collect te; collect fe
in
let rec replace e = match e with
| V.BinOp(op, e1, e2) -> V.BinOp(op, (replace e1), (replace e2))
| V.FBinOp(op, rm, e1, e2) ->
V.FBinOp(op, rm, (replace e1), (replace e2))
| V.UnOp(op, e1) -> V.UnOp(op, (replace e1))
| V.FUnOp(op, rm, e1) -> V.FUnOp(op, rm, (replace e1))
| V.Constant(_) -> e
| V.Lval(V.Temp(var)) ->
(try
let (e':Vine.exp) = V.VarHash.find to_expand var in
replace e'
with Not_found -> e)
| V.Lval(V.Mem(v, e1, ty)) -> V.Lval(V.Mem(v, (replace e1), ty))
| V.Name(_) -> e
| V.Cast(kind, ty, e1) -> V.Cast(kind, ty, (replace e1))
| V.FCast(kind, rm, ty, e1) -> V.FCast(kind, rm, ty, (replace e1))
| V.Unknown(_) -> e
| V.Let(V.Temp(v), e1, e2) ->
V.Let(V.Temp(v), (replace e1), (replace e2))
| V.Let(V.Mem(v, e1, ty), e2, e3) ->
V.Let(V.Mem(v, (replace e1), ty), (replace e2), (replace e3))
| V.Ite(ce, te, fe) ->
V.Ite((replace ce), (replace te), (replace fe))
in
collect e;
replace e
method private collapse_temps e =
let temp_avail_for e =
let (e_enc, _) = encode_exp e in
if Hashtbl.mem subexpr_to_temp_var_enabled e_enc then
(let v = self#lookup_temp_var
(Hashtbl.find subexpr_to_temp_var_info e_enc) in
Some (V.Lval(V.Temp(v))))
else
None
in
let rec loop e =
match temp_avail_for e with
| Some e2 -> e2
| None ->
let e' = match e with
| V.BinOp(op, e1, e2) -> V.BinOp(op, (loop e1), (loop e2))
| V.FBinOp(op, rm, e1, e2) -> V.FBinOp(op, rm, (loop e1),
(loop e2))
| V.UnOp(op, e1) -> V.UnOp(op, (loop e1))
| V.FUnOp(op, rm, e1) -> V.FUnOp(op, rm, (loop e1))
| V.Constant(_) -> e
| V.Lval(V.Temp(_)) -> e
| V.Lval(V.Mem(v, e1, ty)) -> V.Lval(V.Mem(v, (loop e1), ty))
| V.Name(_) -> e
| V.Cast(kind, ty, e1) -> V.Cast(kind, ty, (loop e1))
| V.FCast(kind, rm, ty, e1) -> V.FCast(kind, rm, ty, (loop e1))
| V.Unknown(_) -> e
| V.Let(V.Temp(v), e1, e2) ->
V.Let(V.Temp(v), (loop e1), (loop e2))
| V.Let(V.Mem(v, e1, ty), e2, e3) ->
V.Let(V.Mem(v, (loop e1), ty), (loop e2), (loop e3))
| V.Ite(ce, te, fe) ->
V.Ite((loop ce), (loop te), (loop fe))
in
match temp_avail_for e' with
| Some e3 -> e3
| None -> e'
in
loop e
(* Take various steps to simplify an expression that might include
temporaries. The simplification rules for expressions are
elsewhere in frag_simplify and vine_opt. The extra level of
complexity we perform here is temporarily expanding one level of t
variables to see if that enables any extra simplifications, before
recollapsing any temporaries that did not change. The goal of this
is to get tbe benefit of the simplification rules in more cases,
without the lower levels of simplification machinery needing to
know about the t variable mechanism.
The need for the second simplification after collapsing is
subtle; mostly there wouldn't be any simplifications that can
apply after re-collapsing that wouldn't have applied before
collapsing. But in addition to simplifications that make the
formulas strictly simpler, we also canonicalize them including
by sorting the arguments to commutative/associative
operations. Temporary variables would often sort differently
than the expression they represent, so without redoing the
simplification we could break the canonicalization and cause
other problems.
*)
method private simplify_exp e =
let e2 = self#expand_temps_1level e in
let e3 = Frag_simplify.simplify_fp e2 in
let e4 = self#collapse_temps e3 in
let e5 = Frag_simplify.simplify_fp e4 in
let e_final = e5 in
(* if e <> e_final then
Printf.printf "Simplifying %s -> %s\n"
(V.exp_to_string e) (V.exp_to_string e_final); *)
if !opt_sanity_checks && !opt_concolic_prob <> None then
(let pre_val = self#eval_expr e and
post_val = self#eval_expr e_final
in
if pre_val <> post_val then
let pre_str = V.exp_to_string e and
post_str = V.exp_to_string e_final
in
Printf.printf "Uh oh, %s (%Ld) 'simplified' to %s (%Ld)\n"
pre_str pre_val post_str post_val;
failwith "Incorrect simplification occurred");
e_final
method private simplify (v:D.t) ty =
D.inside_symbolic
(fun e ->
let e' = self#simplify_exp e in
(* We're supposed to simplify expressions as we build
them, so something is going wrong if they get way to big
at once: *)
(* assert(expr_size e' < 1000); *)
if expr_size e' < !opt_t_expr_size then
e'
else
V.Lval(V.Temp(self#make_temp_var e' ty))
) v
method simplify1 e = self#simplify e V.REG_1
method simplify8 e = self#simplify e V.REG_8
method simplify16 e = self#simplify e V.REG_16
method simplify32 e = self#simplify e V.REG_32
method simplify64 e = self#simplify e V.REG_64
method simplify_with_callback f (v:D.t) ty =
D.inside_symbolic
(fun e ->
let e2 = self#simplify_exp e in
match e2 with
| V.Constant(_) -> e2
| _ ->
(match (f e2 ty) with
| Some e3 -> e3
| None ->
(if expr_size e2 < !opt_t_expr_size then
e2
else
V.Lval(V.Temp(self#make_temp_var e2 ty))))
) v
method tempify_exp e ty =
let e2 = self#simplify_exp e in
match e2 with
| V.Constant(_) -> e2
| V.Lval(V.Temp(n,s,t))
when Hashtbl.mem temp_var_num_to_subexpr n ->
e2
| _ ->
V.Lval(V.Temp(self#make_temp_var e2 ty))
method private tempify (v:D.t) ty =
D.inside_symbolic (fun e -> self#tempify_exp e ty) v
method tempify1 e = self#tempify e V.REG_1
method tempify8 e = self#tempify e V.REG_8
method tempify16 e = self#tempify e V.REG_16
method tempify32 e = self#tempify e V.REG_32
method tempify64 e = self#tempify e V.REG_64
method tempify_with_callback f (v:D.t) ty =
D.inside_symbolic
(fun e ->
let e2 = self#simplify_exp e in
match e2 with
| V.Constant(_) -> e2
| _ ->
(match (f e2 ty) with
| Some e3 -> e3
| None ->
V.Lval(V.Temp(self#make_temp_var e2 ty)))
) v
method make_ite cond_v ty v_true v_false =
let cond_v' = self#tempify cond_v V.REG_1 and
v_true' = self#simplify v_true ty and
v_false' = self#simplify v_false ty
in
if v_true' = v_false' then
v_true'
else
let func =
match ty with
| V.REG_1 -> D.ite1
| V.REG_8 -> D.ite8
| V.REG_16 -> D.ite16
| V.REG_32 -> D.ite32
| V.REG_64 -> D.ite64
| _ -> failwith "Unexpected type in make_ite"
in
func cond_v' v_true' v_false'
method private lookup_tree e bits ty expr_list =
let rec nth_tail n l = match (n, l) with
| (0, l) -> l
| (_, []) -> failwith "List too short in nth_tail"
| (n, l) -> nth_tail (n-1) (List.tl l)
in
assert((List.length expr_list) >= (1 lsl bits));
if bits = 0 then
List.hd expr_list
else
let shift_amt = Int64.of_int (bits - 1) in
let cond_e = V.Cast(V.CAST_LOW, V.REG_1,
V.BinOp(V.RSHIFT, e,
V.Constant(V.Int(V.REG_8, shift_amt))))
in
let half_two = nth_tail (1 lsl (bits - 1)) expr_list in
self#make_ite (D.from_symbolic cond_e) ty
(self#lookup_tree e (bits - 1) ty half_two)
(self#lookup_tree e (bits - 1) ty expr_list)