-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathautomaton.lisp
executable file
·1213 lines (1142 loc) · 40.6 KB
/
automaton.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
;;; -*- mode: lisp -*-
;;;
;;; (c) copyright 2005 by Aleksandar Bakic (a_bakic@yahoo.com)
;;;
;;; Derived from dk.brics.automaton v1.8.1, (c) 2001-2005 by Anders Møller
;;; - Functionality not used by the regular expression engine and not tested
;;; has been omitted from this initial release.
;;; - Some comments have been copied verbatim from the original code.
(in-package :automaton)
(deftype minimization () '(member huffman brzozowski hopcroft))
(defvar *minimization* 'hopcroft)
(defvar *minimize-always* t)
;;; Class invariants:
;;; - An automaton is either represented explicitly (with state and
;;; transition objects) or with a singleton string in case the
;;; automaton accepts exactly one string.
;;; - Automata are always reduced (see areduce) and have no transitions
;;; to dead states (see remove-dead-transitions).
;;; - If an automaton is nondeterministic, then deterministic returns nil
;;; (but the converse is not required).
;;; Implicitly, all states and transitions of an automaton are reachable
;;; from its initial state.
;;; If the states or transitions are manipulated manually, the
;;; restore-invariant and (setf deterministic) methods should be used
;;; afterwards to restore certain representation invariants that are
;;; assumed by the built-in automata operations.
;;; If minimize-always is true, minimize will automatically be invoked
;;; after every operation that otherwise may produce a non-minimal automaton
;;; (usually, an intermediate result).
(defclass automaton ()
((minimization :initform *minimization* :accessor minimization
:type minimization)
(initial :initform (make-instance 'state) :accessor initial :type state)
(deterministic :initform t :accessor deterministic :type boolean)
(info :initform nil :accessor info)
(hash-code :initform 0 :accessor hash-code :type fixnum)
(singleton :initform nil :accessor singleton :type (or null string))
(minimize-always :initform *minimize-always* :accessor minimize-always
:type boolean)))
(defun restore-invariant (a)
(remove-dead-transitions a)
(setf (hash-code a) 0))
(declaim (special *state-ht*))
(defun automaton-equal (a1 a2) ; for testing, assumes minimization
(and (eq (minimization a1) (minimization a2))
(let ((*state-ht* (make-hash-table :test #'equal)))
(state-equal (initial a1) (initial a2)))
(eq (deterministic a1) (deterministic a2))
(eqv a1 a2 +equalp-key-situation+)
(eq (minimize-always a1) (minimize-always a2))))
(defclass int-pair () ; TODO: replace with a simple cons
((n1 :initarg :n1 :reader n1 :type fixnum)
(n2 :initarg :n2 :reader n2 :type fixnum)))
(defclass state-list-node ()
((q :initform nil :initarg :q :accessor q :type (or null state))
(succ :initform nil :accessor succ :type (or null state-list-node))
(pred :initform nil :accessor pred :type (or null state-list-node))
(sl :initform nil :initarg :sl :accessor sl :type (or null state-list))))
(defclass state-list ()
((size :initform 0 :accessor size :type fixnum)
(fst :initform nil :accessor fst :type (or null state-list-node))
(lst :initform nil :accessor lst :type (or null state-list-node))))
(defun check-minimize-always (a)
(if (minimize-always a)
(minimize a)
a))
(defun states (a)
"Returns a hash table containing the set of states reachable from
the initial state of A."
(expand-singleton a)
(let ((visited (make-hash-table))
(worklist nil))
(setf (gethash (initial a) visited) t)
(push (initial a) worklist)
(loop while worklist
for s = (pop worklist) do
(with-ht (tr nil) (transitions s)
(let ((s2 (to tr)))
(unless (gethash s2 visited)
(setf (gethash s2 visited) t)
(push s2 worklist)))))
visited))
(defun accepting-states (a)
"Returns a hash table containing the set of accepting states
reachable from the initial state of A."
(let ((accepting (make-hash-table)))
(loop for s being the hash-key of (states a)
when (accept s) do
(setf (gethash s accepting) t))
accepting))
(defun set-state-nums (states)
"Renumerates, by assigning consecutive numbers to the NUM slot of
states being the keys of STATES hash table, and finally returns
STATES."
(let ((i -1))
(loop for s being the hash-key of states do
(setf (num s) (incf i))))
states)
(defun totalize (a)
"Adds transitions to an explicit crash state, added to A, to ensure
that the transition function is total. Finally, returns A."
(let* ((s (make-instance 'state))
(tr (make-instance
'transition :minc +min-char-code+ :maxc +max-char-code+ :to s)))
(htadd (transitions s) tr)
(loop for p being the hash-key of (states a)
and maxi = +min-char-code+ do
(loop for tr in (sorted-transition-list p nil) do
(with-slots (minc maxc) tr
(when (> minc maxi)
(htadd (transitions p)
(make-instance
'transition :minc maxi :maxc (1- minc) :to s)))
(when (> (1+ maxc) maxi)
(setq maxi (1+ maxc)))))
(when (<= maxi +max-char-code+)
(htadd (transitions p)
(make-instance
'transition :minc maxi :maxc +max-char-code+ :to s))))
a))
(defun areduce (a)
"Reduces automaton A by combining overlapping and adjacent edge
intervals with the same destination. Finally, returns A."
(if (singleton a)
a
(let ((states (states a)))
(set-state-nums states)
(loop for s being the hash-key of states do
(let ((st (sorted-transition-list s t)))
(reset-transitions s)
(let ((p nil)
(min -1)
(max -1))
(loop for tr in st
if (eq p (to tr)) do
(with-slots (minc maxc) tr
(if (<= minc (1+ max))
(when (> maxc max)
(setq max maxc))
(progn
(when p
(htadd
(transitions s)
(make-instance
'transition :minc min :maxc max :to p)))
(setq min minc
max maxc))))
else do
(with-slots (minc maxc to) tr
(when p
(htadd (transitions s)
(make-instance
'transition :minc min :maxc max :to p)))
(setq p to
min minc
max maxc)))
(when p
(htadd (transitions s)
(make-instance
'transition :minc min :maxc max :to p))))))
a)))
(defun start-points (a)
"Returns a sorted vector of all interval start points (character
codes)."
(let ((pset (make-hash-table)))
(loop for s being the hash-key of (states a) do
(setf (gethash +min-char-code+ pset) t)
(with-ht (tr nil) (transitions s)
(with-slots (minc maxc) tr
(setf (gethash minc pset) t)
(when (< maxc +max-char-code+)
(setf (gethash (1+ maxc) pset) t)))))
(let ((pa (make-array (hash-table-count pset)
:element-type 'char-code-type)))
(loop for p being the hash-key of pset and n from 0 do
(setf (aref pa n) p)
finally (return (sort pa #'<))))))
(defun live-states2 (a states)
"Returns the set of live states of A that are in STATES hash
table. A state is live if an accepting state is reachable from it."
(let ((map (make-hash-table)))
(loop for s being the hash-key of states do
(setf (gethash s map) (make-hash-table)))
(loop for s being the hash-key of states do
(with-ht (tr nil) (transitions s)
(setf (gethash s (gethash (to tr) map)) t)))
(let* ((live (accepting-states a))
(worklist (loop for s being the hash-key of live collect s)))
(loop while worklist
for s = (pop worklist) do
(loop for p being the hash-key of (gethash s map)
unless (gethash p live) do
(setf (gethash p live) t)
(push p worklist)))
live)))
(defun remove-dead-transitions (a)
"Returns reduced A with transitions to dead states removed. A state
is dead if no accepting state is reachable from it."
(if (singleton a)
nil
(let* ((states (states a))
(live (live-states2 a states)))
(loop for s being the hash-key of states do
(let ((st (transitions s)))
(reset-transitions s)
(with-ht (tr nil) st
(when (gethash (to tr) live)
(htadd (transitions s) tr)))))
(areduce a))))
(defun sorted-transitions (states)
"Renumerates each state in STATES hash table, and returns a vector
of sorted vectors of transitions for each state, ordered by the NUM
slot."
(set-state-nums states)
(let ((transitions (make-array (hash-table-count states))))
(loop for s being the hash-key of states do
(setf (aref transitions (num s)) (sorted-transition-vector s nil)))
transitions))
(defun empty-automaton ()
"Returns a new determinsitic automaton with the empty language."
(let ((a (make-instance 'automaton))
(s (make-instance 'state)))
(setf (initial a) s
(deterministic a) t)
a))
(defun empty-string-automaton ()
"Returns a new deterministic automaton that accepts only the empty
string."
(let ((a (make-instance 'automaton)))
(setf (singleton a) ""
(deterministic a) t)
a))
(defun any-string-automaton ()
"Returns a new deterministic automaton that accepts any string."
(let ((a (make-instance 'automaton))
(s (make-instance 'state)))
(setf (initial a) s
(accept s) t
(deterministic a) t)
(htadd (transitions s)
(make-instance
'transition :minc +min-char-code+ :maxc +max-char-code+ :to s))
a))
(defun any-char-automaton ()
"Returns a new deterministic automaton that accepts any single
character."
(char-range-automaton +min-char-code+ +max-char-code+))
(defun char-automaton (c)
"Returns a new deterministic automaton that accepts a single
character whose code is C."
(char-range-automaton c c))
(defun char-range-automaton (cmin cmax)
"Returns a new deterministic automaton that accepts a single
character whose code is in closed interval [CMIN, CMAX]."
(let ((a (make-instance 'automaton))
(s1 (make-instance 'state))
(s2 (make-instance 'state)))
(setf (initial a) s1
(accept s2) t
(deterministic a) t)
(when (<= cmin cmax)
(htadd (transitions s1)
(make-instance 'transition :minc cmin :maxc cmax :to s2)))
a))
(defun char-set-automaton (str)
"Returns a new deterministic automaton that accepts a single
character in set STR."
(let ((a (make-instance 'automaton))
(s1 (make-instance 'state))
(s2 (make-instance 'state)))
(setf (initial a) s1
(accept s2) t
(deterministic a) t)
(loop with t-table = (transitions s1)
for c across str do
(htadd t-table (make-instance
'transition :minc (char-code c) :maxc (char-code c)
:to s2)))
(areduce a)))
(defun any-of-right-length-subautomaton (str n)
"Returns a new sub-automaton (root of a state graph) accepting
non-negative (decimal) integers of length of (subseq STR N)."
(let ((s (make-instance 'state)))
(if (= (length str) n)
(setf (accept s) t)
(htadd (transitions s)
(make-instance
'transition :minc (char-code #\0) :maxc (char-code #\9)
:to (any-of-right-length-subautomaton str (1+ n)))))
s))
(defun at-least-subautomaton (str n initials zeros)
"Returns a new sub-automaton (root of a state graph) accepting
non-negative (decimal) integers of value at least the one represented
by (subseq STR N), and of length of (subseq STR N)."
(let ((s (make-instance 'state)))
(if (= (length str) n)
(setf (accept s) t)
(let ((c (elt str n)))
(when zeros
(push s (car initials)))
(htadd (transitions s)
(make-instance
'transition :minc (char-code c) :maxc (char-code c)
:to (at-least-subautomaton
str (1+ n) initials (and zeros (char= c #\0)))))
(when (char< c #\9)
(htadd
(transitions s)
(make-instance
'transition :minc (1+ (char-code c)) :maxc (char-code #\9)
:to (any-of-right-length-subautomaton str (1+ n)))))))
s))
(defun at-most-subautomaton (str n)
"Returns a new sub-automaton (root of a state graph) accepting
non-negative (decimal) integers of value at most the one represented
by (subseq STR N), and of length of (subseq STR N)."
(let ((s (make-instance 'state)))
(if (= (length str) n)
(setf (accept s) t)
(let ((c (elt str n)))
(htadd (transitions s)
(make-instance
'transition :minc (char-code c) :maxc (char-code c)
:to (at-most-subautomaton str (1+ n))))
(when (char> c #\0)
(htadd (transitions s)
(make-instance
'transition :minc (char-code #\0) :maxc (1- (char-code c))
:to (any-of-right-length-subautomaton str (1+ n)))))))
s))
(defun between-subautomaton (str1 str2 n initials zeros)
"Returns a new sub-automaton (root of a state graph) accepting
non-negative (decimal) integers of value between the one represented
by (subseq STR1 N) and (subseq STR2 N), inclusive, and of length of
\(subseq STR1 N) = (subseq STR2 N)."
(let ((s (make-instance 'state)))
(if (= (length str1) n)
(setf (accept s) t)
(let ((c1 (elt str1 n))
(c2 (elt str2 n)))
(when zeros
(push s (car initials)))
(if (char= c1 c2)
(htadd (transitions s)
(make-instance
'transition :minc (char-code c1) :maxc (char-code c1)
:to (between-subautomaton str1 str2 (1+ n) initials
(and zeros (char= c1 #\0)))))
(progn
(htadd
(transitions s)
(make-instance
'transition :minc (char-code c1) :maxc (char-code c1)
:to (at-least-subautomaton str1 (1+ n) initials
(and zeros (char= c1 #\0)))))
(htadd
(transitions s)
(make-instance
'transition :minc (char-code c2) :maxc (char-code c2)
:to (at-most-subautomaton str2 (1+ n))))
(when (< (1+ (char-code c1)) (char-code c2))
(htadd
(transitions s)
(make-instance
'transition
:minc (1+ (char-code c1)) :maxc (1- (char-code c2))
:to (any-of-right-length-subautomaton str1 (1+ n)))))))))
s))
(defun interval-automaton (min max digits)
"Returns a new automaton that accepts strings representing
non-negative (decimal) integers in interval [MIN, MAX]. If DIGITS > 0,
uses the fixed number of digits (strings must be prefixed by 0s to
obtain the right length). Otherwise, the number of digits is not
fixed. If MIN > MAX or if the numbers in the interval cannot be
expressed with the given fixed number of digits, an error is
signaled."
(flet ((%num-digits (n) (if (= n 0) 1 (1+ (floor (log n 10))))))
(assert (and (<= 0 min max)
(or (<= digits 0) (<= (%num-digits max) digits)))
() "MIN and MAX not expressible with the same number of digits")
(let* ((a (make-instance 'automaton))
(d (if (> digits 0) digits (%num-digits max)))
(str1 (format nil "~V,'0D" d min))
(str2 (format nil "~V,'0D" d max))
(initials (cons nil nil)))
(setf (initial a)
(between-subautomaton str1 str2 0 initials (<= digits 0)))
(if (<= digits 0)
(let ((pairs nil))
(loop for p in (car initials)
unless (eq (initial a) p) do
(push (make-instance 'state-pair :s1 (initial a) :s2 p)
pairs))
(add-epsilons a pairs)
(htadd (transitions (initial a))
(make-instance
'transition :minc (char-code #\0) :maxc (char-code #\0)
:to (initial a)))
(setf (deterministic a) nil))
(setf (deterministic a) t))
(check-minimize-always a))))
(defun expand-singleton (a)
"Expands the singleton representation of A into the regular
representation, and returns A."
(with-slots ((st singleton)) a
(when st
(let ((p (make-instance 'state)))
(setf (initial a) p)
(loop for c across st do
(let ((q (make-instance 'state)))
(htadd (transitions p)
(make-instance
'transition :minc (char-code c) :maxc (char-code c)
:to q))
(setq p q)))
(setf (accept p) t
(deterministic a) t
st nil))))
a)
(defun string-automaton (str)
"Returns a new deterministic automaton that accepts the single given
string STR."
(let ((a (make-instance 'automaton)))
(setf (singleton a) str
(deterministic a) t)
a))
(defun aconcatenate (a1 a2)
"Returns a new automaton that accepts the concatenation of the
languages of A1 and A2. Complexity: linear in the number of states."
(if (and (singleton a1) (singleton a2))
(string-automaton (concatenate 'string (singleton a1) (singleton a2)))
(progn
(setf a1 (clone-expanded a1)
a2 (clone-expanded a2))
(loop for s being the hash-key of (accepting-states a1) do
(setf (accept s) nil)
(add-epsilon s (initial a2)))
(setf (deterministic a1) nil)
(check-minimize-always a1))))
(defun aconcatenate-many (l)
"Returns a new automaton that accepts the concatenation of the
languages of automata in list L, respecting the order. Complexity:
linear in the total number of states."
(if l
(let* ((a1 (clone-expanded (car l)))
(ac1 (accepting-states a1)))
(loop for a2 in (cdr l) do
(let* ((a2 (clone-expanded a2))
(ac2 (accepting-states a2)))
(loop for s being the hash-key of ac1 do
(setf (accept s) nil)
(add-epsilon s (initial a2))
(when (accept s)
(setf (gethash s ac2) t)))
(setq ac1 ac2)))
(setf (deterministic a1) nil)
(check-minimize-always a1))
(empty-string-automaton)))
(defun optional (a)
"Returns a new automaton that accepts the union of the empty string
and the language of A. Complexity: linear in the number of states."
(let ((a (clone-expanded a))
(s (make-instance 'state)))
(add-epsilon s (initial a))
(setf (accept s) t
(initial a) s
(deterministic a) nil)
(check-minimize-always a)))
(defun repeat (a)
"Returns a new automaton that accepts the Kleene star (zero or more
concatenated repetitions) of the language of A. Complexity: linear in
the number of states."
(let ((a (clone-expanded a))
(s (make-instance 'state)))
(setf (accept s) t)
(add-epsilon s (initial a))
(loop for p being the hash-key of (accepting-states a) do
(add-epsilon p s))
(setf (initial a) s
(deterministic a) nil)
(check-minimize-always a)))
(defun repeat-min (a min)
"Returns a new automaton that accepts MIN or more concatenated
repetitions of the language of A."
(let ((a2 (repeat a)))
(loop while (> min 0) do
(setq a2 (aconcatenate a a2)
min (1- min)))
a2))
(defun repeat-minmax (a min max)
"Returns a new automaton that accepts a number, from [MIN, MAX], of
concatenated repetitions of the language of A. If MIN > MAX, the empty
automaton is returned."
(expand-singleton a)
(when (> min max)
(return-from repeat-minmax (empty-automaton)))
(decf max min)
(let ((a2 (cond
((= min 0) (empty-string-automaton))
((= min 1) (clone a))
(t (loop with tmp = a
while (> (decf min) 0) do
(setq tmp (aconcatenate a tmp))
finally (return tmp))))))
(when (= max 0)
(return-from repeat-minmax a2))
(let ((a3 (clone a)))
(loop while (> (decf max) 0) do
(let ((a4 (clone a)))
(loop for p being the hash-key of (accepting-states a4) do
(add-epsilon p (initial a3)))
(setq a3 a4)))
(loop for p being the hash-key of (accepting-states a2) do
(add-epsilon p (initial a3)))
(setf (deterministic a2) nil)
(check-minimize-always a2))))
(defun acomplement (a)
"Returns a new deterministic"
(let ((a (clone-expanded a)))
(determinize a)
(totalize a)
(loop for p being the hash-key of (states a) do
(setf (accept p) (not (accept p))))
(remove-dead-transitions a)
(check-minimize-always a)))
(defun aintersection (a1 a2)
"Returns a new deterministic automaton that accepts the intersection
of the languages of A and A2. As a side-effect, both A1 and A2 are
determinized if not already deterministic. Complexity: quadratic in
the number of states (when deterministic)."
(if (and (singleton a1) (singleton a2))
(if (string= (singleton a1) (singleton a2))
(string-automaton (singleton a1))
(empty-automaton))
(progn
(determinize a1)
(determinize a2)
(let* ((trs1 (sorted-transitions (states a1)))
(trs2 (sorted-transitions (states a2)))
(a3 (make-instance 'automaton))
(worklist nil)
(newstates (make-generalized-hash-table +equalp-key-situation+))
(s (make-instance 'state))
(p (make-instance
'state-pair :s s :s1 (initial a1) :s2 (initial a2))))
(setf (initial a3) s)
(push p worklist)
(setf (htref newstates p) p)
(loop while worklist do
(setq p (pop worklist))
(setf (accept (s p)) (and (accept (s1 p)) (accept (s2 p))))
(let* ((t1 (aref trs1 (num (s1 p))))
(t2 (aref trs2 (num (s2 p))))
(t1l (length t1))
(t2l (length t2)))
(loop with n1 = 0 and n2 = 0
while (and (< n1 t1l) (< n2 t2l)) do
(cond
((< (maxc (aref t1 n1)) (minc (aref t2 n2)))
(incf n1))
((< (maxc (aref t2 n2)) (minc (aref t1 n1)))
(incf n2))
(t (let* ((q (make-instance 'state-pair
:s1 (to (aref t1 n1))
:s2 (to (aref t2 n2))))
(r (htref newstates q))
(min (max (minc (aref t1 n1))
(minc (aref t2 n2))))
(max (min (maxc (aref t1 n1))
(maxc (aref t2 n2)))))
(unless r
(setf (s q) (make-instance 'state))
(push q worklist)
(setf (htref newstates q) q)
(setq r q))
(htadd (transitions (s p))
(make-instance
'transition
:minc min :maxc max :to (s r)))
(if (< (maxc (aref t1 n1)) (maxc (aref t2 n2)))
(incf n1)
(incf n2))))))))
(setf (deterministic a3) t)
(remove-dead-transitions a3)
(check-minimize-always a3)))))
(defun aunion (a1 a2)
"Returns a new automaton that accepts the union of the languages of
A1 and A2. Complexity: linear in the number of states."
(when (and (singleton a1) (singleton a2)
(string= (singleton a1) (singleton a2)))
(return-from aunion (clone a1)))
(let ((a2 (clone-expanded a2))
(a3 (clone-expanded a1))
(s (make-instance 'state)))
(add-epsilon s (initial a2))
(add-epsilon s (initial a3))
(setf (initial a2) s
(deterministic a2) nil)
(check-minimize-always a2)))
(defun aunion-many (l)
"Returns a new automaton that accepts the union of the languages of
automata given in list L."
(let ((s (make-instance 'state))
(a (make-instance 'automaton)))
(loop for b in l do
(add-epsilon s (initial (clone-expanded b))))
(setf (initial a) s
(deterministic a) nil)
(check-minimize-always a)))
(defun determinize (a)
"Determinizes A and returns it."
(if (or (deterministic a) (singleton a))
a
(let ((initialset (make-instance 'state-set)))
(setf (gethash (initial a) (ht initialset)) t)
(determinize2 a initialset))))
(defun determinize2 (a initialset)
"Determinizes A using the set of initial states in INITIALSET
state-set."
(let ((points (start-points a))
(sets (make-generalized-hash-table +equalp-key-situation+))
(worklist nil)
(newstate (make-generalized-hash-table +equalp-key-situation+)))
(setf (htref sets initialset) initialset)
(push initialset worklist)
(setf (initial a) (make-instance 'state))
(setf (htref newstate initialset) (initial a))
(loop while worklist do
(let* ((s (pop worklist))
(r (htref newstate s)))
(loop for q being the hash-key of (ht s)
when (accept q) do
(setf (accept r) t)
(return))
(loop with len = (length points)
for c across points
and n from 0 do
(let ((p (make-instance 'state-set)))
(loop for q being the hash-key of (ht s) do
(with-ht (tr nil) (transitions q)
(when (<= (minc tr) c (maxc tr))
(setf (gethash (to tr) (ht p)) t))))
(unless (htpresent sets p)
(setf (htref sets p) p)
(push p worklist)
(setf (htref newstate p) (make-instance 'state)))
(let ((q (htref newstate p))
(min c)
(max (if (< (1+ n) len)
(1- (aref points (1+ n)))
+max-char-code+)))
(htadd (transitions r)
(make-instance
'transition :minc min :maxc max :to q)))))))
(setf (deterministic a) t)
(remove-dead-transitions a)))
(defun minimize (a)
"Minimizes, and determinizes if not already deterministic, A and
returns it."
(with-slots (singleton minimization hash-code) a
(unless singleton
(ecase minimization
(huffman (minimize-huffman a))
(brzozowski (minimize-brzozowski a))
(hopcroft (minimize-hopcroft a))))
(setf hash-code (+ (* 3 (num-of-states a)) (* 2 (num-of-transitions a))))
(when (= hash-code 0)
(setf hash-code 1)))
a)
(defun states-agree (trs mark n1 n2)
(let ((t1 (aref trs n1))
(t2 (aref trs n2)))
(loop with k1 = 0 and k2 = 0
and l1 = (length t1) and l2 = (length t2)
while (and (< k1 l1) (< k2 l2)) do
(cond
((< (maxc (aref t1 k1)) (minc (aref t2 k2)))
(incf k1))
((< (maxc (aref t2 k2)) (minc (aref t1 k1)))
(incf k2))
(t (let ((m1 (num (to (aref t1 k1))))
(m2 (num (to (aref t2 k2)))))
(when (> m1 m2)
(rotatef m1 m2))
(when (aref mark m1 m2)
(return nil))
(if (< (maxc (aref t1 k1)) (maxc (aref t2 k2)))
(incf k1)
(incf k2)))))
finally (return t))))
(defun add-triggers (trs triggers n1 n2)
(let ((t1 (aref trs n1))
(t2 (aref trs n2)))
(loop with k1 = 0 and k2 = 0
while (and (< k1 (length t1)) (< k2 (length t2))) do
(cond
((< (maxc (aref t1 k1)) (minc (aref t2 k2)))
(incf k1))
((< (maxc (aref t2 k2)) (minc (aref t1 k1)))
(incf k2))
(t (unless (eq (to (aref t1 k1)) (to (aref t2 k2)))
(let ((m1 (num (to (aref t1 k1))))
(m2 (num (to (aref t2 k2)))))
(when (> m1 m2)
(rotatef m1 m2))
(unless (aref triggers m1 m2)
(setf (aref triggers m1 m2) (make-hash-table)))
(setf (gethash (make-instance 'int-pair :n1 n1 :n2 n2)
(aref triggers m1 m2))
t)))
(if (< (maxc (aref t1 k1)) (maxc (aref t2 k2)))
(incf k1)
(incf k2)))))))
(defun mark-pair (mark triggers n1 n2)
(setf (aref mark n1 n2) t)
(when (aref triggers n1 n2)
(loop for p being the hash-key of (aref triggers n1 n2) do
(let ((m1 (n1 p))
(m2 (n2 p)))
(when (> m1 m2)
(rotatef m1 m2))
(unless (aref mark m1 m2)
(mark-pair mark triggers m1 m2))))))
(defun ht-set-to-vector (ht)
(loop with vec = (make-array (hash-table-count ht))
for k being the hash-key of ht
and i from 0 do
(setf (aref vec i) k)
finally (return vec)))
(defun minimize-huffman (a)
"Minimizes A using the standard textbook, Huffman's
algorithm. Complexity: O(N ^ 2), where N is the number of states."
(determinize a)
(totalize a)
(let* ((ss (states a))
(ss-cnt (hash-table-count ss))
(trs (make-array ss-cnt))
(states (ht-set-to-vector ss))
(mark (make-array `(,ss-cnt ,ss-cnt) :element-type 'boolean
:initial-element nil))
(triggers (make-array `(,ss-cnt ,ss-cnt) :initial-element nil))
(numclasses 0))
(loop for n1 below ss-cnt do
(setf (num (aref states n1)) n1
(aref trs n1) (sorted-transition-vector (aref states n1) nil))
(loop for n2 from (1+ n1) below ss-cnt
unless (eq (accept (aref states n1)) (accept (aref states n2))) do
(setf (aref mark n1 n2) t)))
(loop for n1 below ss-cnt do
(loop for n2 from (1+ n1) below ss-cnt
unless (aref mark n1 n2) do
(if (states-agree trs mark n1 n2)
(add-triggers trs triggers n1 n2)
(mark-pair mark triggers n1 n2))))
(loop for n below ss-cnt do
(setf (num (aref states n)) -1))
(loop for n1 below ss-cnt
when (= (num (aref states n1)) -1) do
(setf (num (aref states n1)) numclasses)
(loop for n2 from (1+ n1) below ss-cnt
unless (aref mark n1 n2) do
(setf (num (aref states n2)) numclasses))
(incf numclasses))
(let ((newstates (make-array numclasses)))
(loop for n below numclasses do
(setf (aref newstates n) (make-instance 'state)))
(loop for n below ss-cnt do
(setf (num (aref newstates (num (aref states n)))) n)
(when (eq (aref states n) (initial a))
(setf (initial a) (aref newstates (num (aref states n))))))
(loop for n below numclasses do
(let ((s (aref newstates n)))
(setf (accept s) (accept (aref states (num s))))
(with-ht (tr nil) (transitions (aref states (num s)))
(htadd (transitions s)
(make-instance
'transition :minc (minc tr) :maxc (maxc tr)
:to (aref newstates (num (to tr))))))))
(remove-dead-transitions a))))
(defun minimize-brzozowski (a)
"Minimizes A using Brzozowski's algorithm. Complexity: O(2 ^ N),
where N is the number of states, but works very well in practice (even
better than Hopcroft's)."
(if (singleton a)
nil
(progn
(determinize2 a (make-instance 'state-set :ht (areverse a)))
(determinize2 a (make-instance 'state-set :ht (areverse a))))))
(defun minimize-hopcroft (a)
"Minimizes A using Hopcroft's algorithm. Complexity: O(N log N),
regarded as one of the most generally efficient existing algorithms."
(determinize a)
(let ((trs (transitions (initial a))))
(when (= (cnt trs) 1)
(with-ht (tr nil) trs
(when (and (eq (to tr) (initial a))
(= (minc tr) +min-char-code+)
(= (maxc tr) +max-char-code+))
(return-from minimize-hopcroft)))))
(totalize a)
(let* ((ss (states a))
(ss-cnt (hash-table-count ss))
(states (ht-set-to-vector ss)))
(set-state-nums ss)
(let* ((sigma (start-points a))
(sigma-cnt (length sigma))
(rvrs (make-array `(,ss-cnt ,sigma-cnt) :initial-element nil))
(rvrs-ne (make-array `(,ss-cnt ,sigma-cnt) :element-type 'boolean
:initial-element nil))
(partition (make-array ss-cnt :initial-element nil))
(block (make-array ss-cnt :element-type 'fixnum))
(active (make-array `(,ss-cnt ,sigma-cnt)))
(active2 (make-array `(,ss-cnt ,sigma-cnt) :initial-element nil))
(pending nil)
(pending2 (make-array `(,sigma-cnt ,ss-cnt) :element-type 'boolean
:initial-element nil))
(split nil)
(split2 (make-array ss-cnt :element-type 'boolean
:initial-element nil))
(refine nil)
(refine2 (make-array ss-cnt :element-type 'boolean
:initial-element nil))
(splitblock (make-array ss-cnt :initial-element nil))
(k 2))
(loop for j below ss-cnt do
(loop for i below sigma-cnt do
(setf (aref active j i) (make-instance 'state-list))))
(loop for q below ss-cnt
for qq = (aref states q) do
(let ((j (if (accept qq) 0 1)))
(push qq (aref partition j))
(setf (aref block (num qq)) j)
(loop for i below sigma-cnt do
(let* ((aa (code-char (aref sigma i)))
(p (sstep qq aa)))
(push qq (aref rvrs (num p) i))
(setf (aref rvrs-ne (num p) i) t)))))
(loop for j from 0 to 1 do
(loop for i below sigma-cnt do
(loop for qq in (aref partition j)
when (aref rvrs-ne (num qq) i) do
(setf (aref active2 (num qq) i)
(slnadd (aref active j i) qq)))))
(loop for i below sigma-cnt
for i0 = (size (aref active 0 i))
and i1 = (size (aref active 1 i)) do
(let ((j (if (<= i0 i1) 0 1)))
(push (make-instance 'int-pair :n1 j :n2 i) pending)
(setf (aref pending2 i j) t)))
(loop while pending
for ip = (pop pending)
for p = (n1 ip) and i = (n2 ip) do
(setf (aref pending2 i p) nil)
(loop for m = (fst (aref active p i)) then (succ m)
while m do
(loop for s in (aref rvrs (num (q m)) i)
unless (aref split2 (num s)) do
(setf (aref split2 (num s)) t)
(push s split)
(let ((j (aref block (num s))))
(push s (aref splitblock j))
(unless (aref refine2 j)
(setf (aref refine2 j) t)
(push j refine)))))
(loop for j in refine do
(when (< (length (aref splitblock j))
(length (aref partition j)))
(loop for s in (aref splitblock j) do
(setf (aref partition j) (remove s (aref partition j)))
(push s (aref partition k))
(setf (aref block (num s)) k)
(loop for c below sigma-cnt
for sn = (aref active2 (num s) c)
when (and sn (eq (sl sn) (aref active j c))) do
(slnremove sn)
(setf (aref active2 (num s) c)
(slnadd (aref active k c) s))))
(loop for c below sigma-cnt
for ij = (size (aref active j c))
and ik = (size (aref active k c))
if (and (not (aref pending2 c j)) (< 0 ij) (<= ij ik)) do
(setf (aref pending2 c j) t)
(push (make-instance 'int-pair :n1 j :n2 c) pending)
else do
(setf (aref pending2 c k) t)
(push (make-instance 'int-pair :n1 k :n2 c) pending))
(incf k))
(loop for s in (aref splitblock j) do
(setf (aref split2 (num s)) nil))
(setf (aref refine2 j) nil)
(setf (aref splitblock j) nil))
(setq split nil)
(setq refine nil))
(let ((newstates (make-array k)))
(loop for n below k
for s = (make-instance 'state) do
(setf (aref newstates n) s)
(loop for q in (aref partition n) do
(when (eq q (initial a))
(setf (initial a) s))
(setf (accept s) (accept q)
(num s) (num q)
(num q) n)))
(loop for n below k
for s = (aref newstates n) do
(with-ht (tr nil) (transitions (aref states (num s)))
(setf (num s) n)
(htadd (transitions s)
(make-instance
'transition :minc (minc tr) :maxc (maxc tr)
:to (aref newstates (num (to tr)))))))
(remove-dead-transitions a)))))
(defun areverse (a)
"Reverses the language of non-singleton A. Returns a hash table of
new initial states."
(let ((m (make-hash-table))
(states (states a))
(astates (accepting-states a)))
(loop for r being the hash-key of states do
(setf (gethash r m)
(make-generalized-hash-table +equalp-key-situation+)
(accept r) nil))
(loop for r being the hash-key of states do
(with-ht (tr nil) (transitions r)
(htadd (gethash (to tr) m)
(make-instance
'transition :minc (minc tr) :maxc (maxc tr) :to r))))
(loop for r being the hash-key of states do
(setf (transitions r) (gethash r m)))
(setf (accept (initial a)) t
(initial a) (make-instance 'state))
(loop for r being the hash-key of astates do
(add-epsilon (initial a) r))
(setf (deterministic a) nil)
astates))
(defun add-epsilons (a pairs)
"Adds epsilon transitions to A and returns it. This is done by
adding extra character interval transitions that are equivalent to the
given set of epsilon transitions. PAIRS is a list of state-pair
objects representing pairs of source-destination states where the
epsilon transitions should be added."
(expand-singleton a)
(let ((forward (make-hash-table))
(back (make-hash-table)))
(loop for p in pairs do