-
Notifications
You must be signed in to change notification settings - Fork 704
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
40 changed files
with
1,498 additions
and
39 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,266 @@ | ||
{-# LANGUAGE OverloadedStrings #-} | ||
{-# LANGUAGE ScopedTypeVariables #-} | ||
module Distribution.FieldGrammar.Described ( | ||
Described (..), | ||
describeDoc, | ||
-- * Regular expressions | ||
Regex (..), | ||
RTerm (..), | ||
reHsString, | ||
reEps, | ||
reChar, | ||
reChars, | ||
reDot, | ||
reComma, | ||
reSpacedComma, | ||
reMunchCS, | ||
reMunch1CS, | ||
-- * Character Sets | ||
csChar, | ||
csAlphaNum, | ||
csNotSpace, | ||
csNotSpaceOrComma, | ||
-- * Pretty-printing | ||
regexDoc, | ||
-- * Generation | ||
generate, | ||
) where | ||
|
||
import Data.Char (isControl) | ||
import Distribution.Compat.Prelude | ||
import Prelude () | ||
|
||
import Distribution.Parsec (Parsec) | ||
import Distribution.Pretty (Pretty) | ||
|
||
import qualified Distribution.Utils.AnsiCharSet as ACS | ||
import qualified Text.PrettyPrint as PP | ||
|
||
-- | Class describing the pretty/parsec format of a. | ||
class (Pretty a, Parsec a) => Described a where | ||
-- | A pretty document of "regex" describing the field format | ||
describe :: proxy a -> Regex RTerm | ||
|
||
-- | Pretty-print description. | ||
-- | ||
-- >>> describeDoc ([] :: [Bool]) | ||
-- \mathop{\mathord{"}\mathtt{True}\mathord{"}}\mid\mathop{\mathord{"}\mathtt{False}\mathord{"}} | ||
-- | ||
describeDoc :: Described a => proxy a -> PP.Doc | ||
describeDoc p = regexDoc (describe p) | ||
|
||
instance Described Bool where | ||
describe _ = REUnion ["True", "False"] | ||
|
||
instance Described a => Described (Identity a) where | ||
describe _ = describe ([] :: [a]) | ||
|
||
------------------------------------------------------------------------------- | ||
-- Regex | ||
------------------------------------------------------------------------------- | ||
|
||
-- | Regular expressions tuned for 'Described' use-case. | ||
data Regex a | ||
= REAppend [Regex a] -- ^ append @ab@ | ||
| REUnion [Regex a] -- ^ union @a|b@ | ||
| REMunch (Regex a) (Regex a) -- ^ star @a*@, with a separator | ||
| REMunch1 (Regex a) (Regex a) -- ^ plus @a+@, with a separator | ||
| REOpt (Regex a) -- ^ optional @r?@ | ||
| REString String -- ^ literal string @abcd@ | ||
| RECharSet ACS.AnsiCharSet -- ^ charset @[:alnum:]@ | ||
| RESpaces -- ^ zero-or-more spaces | ||
| RESpaces1 -- ^ one-or-more spaces | ||
| REVar a -- ^ variable | ||
| RELet String (Regex a) | ||
(Regex (Maybe a)) -- ^ named expression | ||
| RERec String (Regex (Maybe a)) -- ^ recursive expressions | ||
|
||
| RETodo -- ^ unspecified | ||
deriving (Eq, Ord, Show) | ||
|
||
-- | Terminals used by field grammars. | ||
-- | ||
-- TODO: remove | ||
data RTerm | ||
= RHaskellString | ||
| RUnqualName | ||
deriving (Eq, Ord, Show) | ||
|
||
reHsString :: Regex RTerm | ||
reHsString = REVar RHaskellString | ||
|
||
reEps :: Regex a | ||
reEps = REAppend [] | ||
|
||
reChar :: Char -> Regex a | ||
reChar = RECharSet . ACS.singleton | ||
|
||
reChars :: [Char] -> Regex a | ||
reChars = RECharSet . ACS.fromList | ||
|
||
reDot :: Regex a | ||
reDot = reChar '.' | ||
|
||
reComma :: Regex a | ||
reComma = reChar ',' | ||
|
||
reSpacedComma :: Regex a | ||
reSpacedComma = RESpaces <> reComma <> RESpaces | ||
|
||
reMunch1CS :: ACS.AnsiCharSet -> Regex a | ||
reMunch1CS = REMunch1 reEps . RECharSet | ||
|
||
reMunchCS :: ACS.AnsiCharSet -> Regex a | ||
reMunchCS = REMunch reEps . RECharSet | ||
|
||
instance IsString (Regex a) where | ||
fromString = REString | ||
|
||
instance Semigroup (Regex a) where | ||
x <> y = REAppend (unAppend x ++ unAppend y) where | ||
unAppend (REAppend rs) = rs | ||
unAppend r = [r] | ||
|
||
instance Monoid (Regex a) where | ||
mempty = REAppend [] | ||
mappend = (<>) | ||
|
||
------------------------------------------------------------------------------- | ||
-- Character sets | ||
------------------------------------------------------------------------------- | ||
|
||
csChar :: Char -> ACS.AnsiCharSet | ||
csChar = ACS.singleton | ||
|
||
csAlphaNum :: ACS.AnsiCharSet | ||
csAlphaNum = ACS.alphanum | ||
|
||
csNotSpace :: ACS.AnsiCharSet | ||
csNotSpace = ACS.filter (\c -> not (isControl c) && c /= ' ') ACS.full | ||
|
||
csNotSpaceOrComma :: ACS.AnsiCharSet | ||
csNotSpaceOrComma = ACS.filter (/= ',') csNotSpace | ||
|
||
------------------------------------------------------------------------------- | ||
-- Pretty-printing | ||
------------------------------------------------------------------------------- | ||
|
||
-- | | ||
-- | ||
-- >>> regexDoc $ REString "True" | ||
-- \mathop{\mathord{"}\mathtt{True}\mathord{"}} | ||
-- | ||
-- Note: we don't simplify regexps yet: | ||
-- | ||
-- >>> regexDoc $ REString "foo" <> REString "bar" | ||
-- \mathop{\mathord{"}\mathtt{foo}\mathord{"}}\mathop{\mathord{"}\mathtt{bar}\mathord{"}} | ||
-- | ||
regexDoc :: Regex RTerm -> PP.Doc | ||
regexDoc = go termDoc 0 where | ||
go :: (a -> PP.Doc) -> Int -> Regex a -> PP.Doc | ||
go f d (REAppend rs) = parensIf (d > 2) $ PP.hcat (map (go f 2) rs) | ||
go f d (REUnion rs) = parensIf (d > 1) $ PP.hcat (PP.punctuate (PP.text "\\mid") (map (go f 1) rs)) | ||
|
||
go f d (REMunch sep r) = parensIf (d > 3) $ | ||
PP.text "{" <<>> go f 3 r <<>> PP.text "}^\\ast_{" <<>> go f 0 sep <<>> PP.text "}" | ||
go f d (REMunch1 sep r) = parensIf (d > 3) $ | ||
PP.text "{" <<>> go f 3 r <<>> PP.text "}^+_{" <<>> go f 0 sep <<>> PP.text "}" | ||
go f d (REOpt r) = parensIf (d > 3) $ | ||
PP.text "{" <<>> go f 3 r <<>> PP.text "}^?" | ||
|
||
go _ _ (REString s) = PP.text "\\mathop{\\mathord{\"}\\mathtt{" <<>> PP.hcat (map charDoc s) <<>> PP.text "}\\mathord{\"}}" | ||
go _ _ (RECharSet cs) = charsetDoc cs | ||
|
||
go _ _ RESpaces = "\\circ" | ||
go _ _ RESpaces1 = "\\bullet" | ||
|
||
go f _ (REVar a) = f a | ||
go f d (RELet n _ r) = go (maybe (terminalDoc n) f) d r | ||
go _ _ (RERec n _) = terminalDoc n | ||
|
||
go _ _ RETodo = PP.text "\\mathsf{\\color{red}{TODO}}" | ||
|
||
parensIf :: Bool -> PP.Doc -> PP.Doc | ||
parensIf True d = PP.text "\\left(" <<>> d <<>> PP.text "\\right)" | ||
parensIf False d = d | ||
|
||
termDoc :: RTerm -> PP.Doc | ||
termDoc RHaskellString = terminalDoc "hs-string" | ||
termDoc RUnqualName = terminalDoc "unqual-name" | ||
|
||
terminalDoc :: String -> PP.Doc | ||
terminalDoc s = PP.text "\\mathop{\\mathit{" <<>> PP.hcat (map charDoc s) <<>> PP.text "}}" | ||
|
||
charDoc :: Char -> PP.Doc | ||
charDoc ' ' = PP.text " " | ||
charDoc '{' = PP.text "\\{" | ||
charDoc '}' = PP.text "\\}" | ||
charDoc c | ||
| isAlphaNum c = PP.char c | ||
| otherwise = PP.text ("\\text{" ++ c : "}") | ||
|
||
inquotes :: PP.Doc -> PP.Doc | ||
inquotes d = "\\mathop{\\mathord{\"}" <<>> d <<>> "\\mathord{\"}}" | ||
|
||
charsetDoc :: ACS.AnsiCharSet -> PP.Doc | ||
charsetDoc acs | ||
| acs == csAlphaNum = terminalDoc "alpha-num" | ||
| acs == csNotSpace = terminalDoc "not-space" | ||
| acs == csNotSpaceOrComma = terminalDoc "not-space-nor-comma" | ||
charsetDoc acs = case ACS.ranges acs of | ||
[] -> PP.brackets PP.empty | ||
[(x,y)] | x == y -> inquotes (charDoc x) | ||
rs -> PP.brackets $ PP.hcat $ map rangeDoc rs | ||
where | ||
rangeDoc :: (Char, Char) -> PP.Doc | ||
rangeDoc (x, y) | x == y = inquotes (charDoc x) | ||
| otherwise = inquotes (charDoc x) <<>> PP.char '-' <<>> inquotes (charDoc y) | ||
|
||
------------------------------------------------------------------------------- | ||
-- Generation | ||
------------------------------------------------------------------------------- | ||
|
||
-- | Generate an example string. | ||
generate | ||
:: (Monad m, Applicative m) | ||
=> (Int -> Int -> m Int) -- ^ generate integer in range | ||
-> (a -> m String) -- ^ generate variables | ||
-> Regex a -- ^ regex | ||
-> m String -- ^ an example string. | ||
generate rnd f (REAppend rs) = do | ||
xs <- traverse (generate rnd f) rs | ||
return (concat xs) | ||
generate rnd f (REUnion rs) = do | ||
n <- rnd 0 (length rs - 1) | ||
generate rnd f (rs !! n) | ||
generate rnd f (REMunch sep r) = do | ||
n <- rnd 0 5 | ||
xs <- traverse (generate rnd f) (intersperse sep (replicate n r)) | ||
return (concat xs) | ||
generate rnd f (REMunch1 sep r) = do | ||
n <- rnd 1 5 | ||
xs <- traverse (generate rnd f) (intersperse sep (replicate n r)) | ||
return (concat xs) | ||
generate rnd f (REOpt r) = do | ||
n <- rnd 0 2 | ||
case n of | ||
0 -> return "" | ||
_ -> generate rnd f r | ||
generate _ _ (REString str) = return str | ||
generate rnd _ (RECharSet cs) = return <$> generateCS rnd cs | ||
generate rnd _ RESpaces1 = (\n -> replicate n ' ') <$> rnd 1 3 | ||
generate rnd _ RESpaces = (\n -> replicate n ' ') <$> rnd 0 3 | ||
|
||
generate _ f (REVar x) = f x | ||
generate _ _ (RELet _ _ _) = error "generate let" | ||
generate _ _ (RERec _ _) = error "generate rec" | ||
generate _ _ RETodo = return "TODO" | ||
|
||
generateCS | ||
:: Monad m | ||
=> (Int -> Int -> m Int) -- ^ generate integer in range | ||
-> ACS.AnsiCharSet | ||
-> m Char | ||
generateCS rnd asc = do | ||
n <- rnd 0 (ACS.size asc - 1) | ||
return (ACS.toList asc !! n) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.