-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMain.hs
341 lines (285 loc) · 10.8 KB
/
Main.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE ScopedTypeVariables #-}
{- LIQUID "--fullcheck" -}
module Main where
import GHC.Generics (Generic)
import Data.Maybe
import Data.Bits ((.&.), (.|.))
import qualified Data.Bits as Bits
import Data.List (foldl')
import Data.Set (Set)
import qualified Data.Set as Set
(|>) :: a -> (a -> b) -> b
(|>) = flip ($)
infixl 0 |>
(.>) :: (a -> b) -> (b -> c) -> a -> c
(.>) = flip (.)
infixl 9 .>
-- | The type of generalized regular expressions over a given alphabet.
data Regex tok
= Ø
-- ^ The regular expression corresponding to the empty language.
| Ɛ
-- ^ The regular expression matching only the empty string.
| Tok !tok
-- ^ The regular expression matching exactly one token.
| Star !(Regex tok)
-- ^ Kleene's star operator.
| Comp !(Regex tok)
-- ^ The complement of a regular expression.
| (:.:) !(Regex tok) !(Regex tok)
-- ^ The concatenation of two regular expressions.
| (:|:) !(Regex tok) !(Regex tok)
-- ^ The union of two regular expressions.
| (:&:) !(Regex tok) !(Regex tok)
-- ^ The intersection of two regular expressions.
deriving (Eq, Functor, Generic)
{-@ autosize Regex @-}
-- | Returns true if the language of the given regular expression contains Ɛ,
-- and otherwise returns false.
isNullable :: Regex tok -> Bool
isNullable Ø = False
isNullable Ɛ = True
isNullable (Tok _) = False
isNullable (Comp r) = not (isNullable r)
isNullable (Star r) = True
isNullable (r :|: s) = isNullable r || isNullable s
isNullable (r :&: s) = isNullable r && isNullable s
isNullable (r :.: s) = isNullable r && isNullable s
-- | Returns Ɛ if the language of the given regular expression contains Ɛ, and
-- otherwise returns Ø.
nullable :: Regex tok -> Regex tok
nullable r = if isNullable r then Ɛ else Ø
-- | The Brzozowski derivative of a regular expression with respect to a token.
deriv :: (Eq tok) => tok -> Regex tok -> Regex tok
deriv a Ø = Ø
deriv a Ɛ = Ø
deriv a (Tok t) = if t == a then Ɛ else Ø
deriv a (Comp r) = Comp (deriv a r)
deriv a (Star r) = deriv a r :.: Star r
deriv a (r :|: s) = deriv a r :|: deriv a s
deriv a (r :&: s) = deriv a r :&: deriv a s
deriv a (r :.: s) = let d1 = deriv a r
d2 = deriv a s
in if isNullable r
then (d1 :.: s) :|: d2
else d1 :.: s
-- | The Brzozowski derivative of a regular expression with respect to a string.
derivative :: (Eq tok, Foldable t) => t tok -> Regex tok -> Regex tok
derivative tokens rx = foldr deriv rx tokens
{-@ measure matchDeriv :: Regex tok -> [tok] -> Bool @-}
-- | Match the given string against the given regular expression by taking the
-- Brzozowski derivative.
matchDeriv :: (Eq tok) => Regex tok -> [tok] -> Bool
matchDeriv rx str = isNullable (derivative str rx)
-- | A refinement type alias for Int.
type Natural = Int
{-@ type Natural = Nat @-}
-- | The type representing states in an NFA.
data NatSet = MkNatSet {-# UNPACK #-} !Natural !Integer
deriving (Eq, Ord, Generic)
-- | FIXME: doc
emptyNS :: NatSet
emptyNS = MkNatSet 0 0
-- | FIXME: doc
singletonNS :: Natural -> NatSet
singletonNS x = MkNatSet 1 (Bits.shiftL 1 x)
-- | FIXME: doc
intersectionNS :: NatSet -> NatSet -> NatSet
intersectionNS (MkNatSet m x) (MkNatSet n y) = MkNatSet (max m n) (x .&. y)
-- | FIXME: doc
(∩) :: NatSet -> NatSet -> NatSet
(∩) = intersectionNS
-- | FIXME: doc
unionNS :: NatSet -> NatSet -> NatSet
unionNS (MkNatSet m x) (MkNatSet n y) = MkNatSet (max m n) (x .|. y)
-- | FIXME: doc
(∪) :: NatSet -> NatSet -> NatSet
(∪) = unionNS
-- | FIXME: doc
shiftNS :: Natural -> NatSet -> NatSet
shiftNS k (MkNatSet n x) = MkNatSet (n + k) (Bits.shiftL x k)
-- | FIXME: doc
nullNS :: NatSet -> Bool
nullNS (MkNatSet _ x) = (x == 0)
{-@ assume sizeNS :: NatSet -> Natural @-} -- FIXME
-- | FIXME: doc
sizeNS :: NatSet -> Natural
sizeNS (MkNatSet _ x) = Bits.popCount x
-- | FIXME: doc
containsNS :: NatSet -> Natural -> Bool
containsNS (MkNatSet _ x) k = Bits.testBit x k
{-@ assume rangeNS :: NatSet -> Natural @-} -- FIXME
-- | FIXME: doc
rangeNS :: NatSet -> Natural
rangeNS (MkNatSet n _) = n
-- | FIXME: doc
foldlNS :: forall b. (b -> Natural -> b) -> b -> NatSet -> b
foldlNS f acc0 ns = go (rangeNS ns) acc0 (sizeNS ns) 0
where
{-@ go :: d:Nat -> b -> Nat -> Nat -> b / [d] @-}
go :: Natural -> b -> Natural -> Natural -> b
go d !acc n b | (n <= 0) || (d <= 0) = acc
| containsNS ns b = go (d - 1) (f acc b) (n - 1) (b + 1)
| otherwise = go (d - 1) acc n (b + 1)
-- | FIXME: doc
unionsNS :: (Foldable t) => t NatSet -> NatSet
unionsNS = foldr unionNS emptyNS
-- | The type representing an NFA.
--
-- This type and many of the functions using it are based on Gabriel Gonzalez's
-- <http://github.com/Gabriel439/slides/blob/master/regex/regex.md notes>.
data NFA tok
= MkNFA
{ nfaNumStates :: Natural
-- ^ The number of states.
, nfaStarting :: NatSet
-- ^ The set of starting states.
, nfaTransition :: tok -> Natural -> NatSet
-- ^ The transition function.
, nfaAccepting :: NatSet
-- ^ The set of accepting states.
}
deriving (Generic)
-- | Match a sequence of tokens against an NFA.
matchNFA :: NFA tok -> [tok] -> Bool
matchNFA (MkNFA n as f bs) ts = intersectionNS bs (foldl' step' as ts)
|> nullNS |> not
where
step' s0 i = foldlNS (\acc j -> unionNS acc (f i j)) emptyNS s0
-- | The NFA that accepts a single token if it satisfies a given predicate.
satisfyNFA :: (tok -> Bool) -> NFA tok
satisfyNFA predicate = MkNFA n as f bs
where
n = 2
as = singletonNS 0
bs = singletonNS 0
f t s = if (s == 0) && predicate t then singletonNS 1 else emptyNS
-- | The NFA that matches no strings.
empNFA :: NFA tok
empNFA = MkNFA 0 emptyNS (\_ _ -> emptyNS) emptyNS
-- | The NFA matching only an empty string.
epsNFA :: NFA tok
epsNFA = MkNFA 1 (singletonNS 0) (\_ _ -> emptyNS) (singletonNS 0)
-- | The union of two NFAs.
unionNFA :: NFA tok -> NFA tok -> NFA tok
unionNFA (MkNFA nL asL fL bsL) (MkNFA nR asR fR bsR) = MkNFA n as f bs
where
n = nL + nR
as = asL ∪ shiftNS nL asR
bs = bsL ∪ shiftNS nL bsR
f i s | s < nL = fL i s
| otherwise = shiftNS nL (fR i (s - nL))
-- | Sequential composition of NFAs.
sequenceNFA :: NFA tok -> NFA tok -> NFA tok
sequenceNFA (MkNFA nL asL fL bsL) (MkNFA nR asR fR bsR) = MkNFA n as f bs
where
n = nL + nR
as = if nullNS (asL ∩ bsL)
then asL
else asL ∪ shiftNS nL asR
bs = shiftNS nL bsR
f i s = let x = fL i s
in if s < nL
then if nullNS (x ∩ bsL)
then x
else x ∪ shiftNS nL asR
else shiftNS nL (fR i (s - nL))
{- measure dfaStates :: DFA st tok -> Set st @-}
{-@
data DFA st tok
= MkDFA
{ dfaStates :: Set st
, dfaStarting :: {q : st | Set_mem q dfaStates}
, dfaTransition :: tok
-> {qI : st | Set_mem qI dfaStates}
-> {qO : st | Set_mem qO dfaStates}
, dfaAccepting :: {q : st | Set_mem q dfaStates} -> Bool
}
@-}
data DFA st tok
= MkDFA
{ dfaStates :: Set st
-- ^ The set of states.
, dfaStarting :: st
-- ^ The starting state.
, dfaTransition :: tok -> st -> st
-- ^ The transition function.
, dfaAccepting :: st -> Bool
-- ^ The accepting states.
}
deriving (Generic)
{-@ autosize DFA @-}
{-@ type VState dfa = {q : _ | Prop (Set_mem q (dfaStates dfa))} @-}
-- | The "free" NFA corresponding to a given DFA.
fromDFAtoNFA :: (Ord st) => DFA st tok -> NFA tok
fromDFAtoNFA (MkDFA ss s f acc) = MkNFA (Set.size ss) s' f' acc'
where
s' = toNS s
f' = \t q -> toNS $ f t (toSt q)
acc' = unionsNS $ map toNS $ filter acc $ Set.toList ss
toSt n = Set.elemAt n ss
toNS q = singletonNS $ Set.findIndex q ss
-- | The powerset DFA corresponding to a given NFA.
fromNFAtoDFA :: NFA tok -> DFA NatSet tok
fromNFAtoDFA (MkNFA n as f bs) = MkDFA ss as f' (== bs) -- FIXME
where
f' tok st = fromNS st |> Set.map (f tok) |> unionsNS
ss :: Set NatSet
ss = [0 .. (n - 1)] |> powerset |> Set.map toNS
powerset :: (Ord a) => [a] -> Set (Set a)
powerset [] = Set.singleton Set.empty
powerset (x:xs) = let xs' = powerset xs
in Set.union xs' (Set.map (Set.insert x) xs')
fromNS :: NatSet -> Set Natural
fromNS = foldlNS (flip (:)) [] .> Set.fromList
toNS :: Set Natural -> NatSet
toNS = Set.map singletonNS .> unionsNS
-- | Minimizes the number of states in the given DFA.
minimizeDFA :: DFA st tok -> DFA st tok
-- FIXME: use a better DFA minimization algorithm
minimizeDFA (MkDFA ss s f acc) = MkDFA ss s f acc
-- | The complement of a given DFA.
complementDFA :: DFA st tok -> DFA st tok
complementDFA (MkDFA ss s f acc) = MkDFA ss s f (acc .> not)
-- | The intersection of two DFAs.
intersectionDFA :: (Ord st) => DFA st tok -> DFA st tok -> DFA (st, st) tok
intersectionDFA (MkDFA ssL sL fL accL) (MkDFA ssR sR fR accR) = MkDFA ss s f acc
where
ss = Set.fromList [(x, y) | x <- Set.toList ssL, y <- Set.toList ssR]
s = (sL, sR)
f = \t (a, b) -> (fL t a, fR t b)
acc = \(a, b) -> accL a && accR b
-- | The complement of a given NFA.
complementNFA :: NFA tok -> NFA tok
complementNFA = fromNFAtoDFA .> minimizeDFA .> complementDFA .> fromDFAtoNFA
-- | The intersection of two NFAs.
intersectionNFA :: NFA tok -> NFA tok -> NFA tok
intersectionNFA nL nR = let toDFA = fromNFAtoDFA .> minimizeDFA
(dL, dR) = (toDFA nL, toDFA nR)
in intersectionDFA dL dR |> minimizeDFA |> fromDFAtoNFA
-- | An implementation of Thompson's construction, which converts regular
-- expressions to nondeterministic finite automata.
thompson :: forall tok. (Eq tok) => Regex tok -> NFA tok
thompson Ø = empNFA
thompson Ɛ = epsNFA
thompson (Tok t) = satisfyNFA (== t)
thompson (Comp r) = complementNFA (thompson r)
thompson (Star r) = let MkNFA n as f bs = thompson r
f' i s = let x = f i s
in if nullNS (x ∩ bs) then x else x ∪ as
in MkNFA n as f' bs
thompson (r :|: s) = unionNFA (thompson r) (thompson s)
thompson (r :&: s) = intersectionNFA (thompson r) (thompson s)
thompson (r :.: s) = sequenceNFA (thompson r) (thompson s)
{-@ matchRegex
:: rx:(Regex tok)
-> str:[tok]
-> {v : Bool | ((Prop v) <=> (Prop (matchDeriv rx str)))}
@-}
matchRegex :: (Eq tok) => Regex tok -> [tok] -> Bool
matchRegex rx toks = matchNFA (thompson rx) toks
main :: IO ()
main = pure ()