Skip to content

Commit

Permalink
Abstracted the parsing of XML files to a template scheme. Uses ByteSt…
Browse files Browse the repository at this point in the history
…rings internally and the Hexml parser. Added examples/Reading.hs to demo the interface.
  • Loading branch information
thisiswhereitype committed Apr 23, 2017
1 parent 809c6fd commit 30f9f45
Show file tree
Hide file tree
Showing 9 changed files with 102 additions and 101 deletions.
38 changes: 25 additions & 13 deletions Test.hs
Original file line number Diff line number Diff line change
@@ -1,21 +1,33 @@
{-# LANGUAGE OverloadedStrings #-}

module Main where

import qualified Pangraph.XMLTemplate as PT
import Data.ByteString.Char8 (pack)
import qualified Data.ByteString as BS
import qualified Pangraph as P
import qualified Text.XML.Hexml as H
import qualified Pangraph.GraphML.Parser as GraphML_P

import Data.Either
type Parser = GraphML_P.Template -> BS.ByteString -> Either BS.ByteString P.Pangraph

main :: IO ()
main = do
x <- readFile "examples/graphs/small.graphml"
let bs = head $ rights [H.parse $ PT.strToByteString x]
let (nodes, e) = PT.resolvePath bs n
let nodes' =map (\h -> PT.getAttPairs h e) nodes
putStr "#-\t"
putStrLn $ show nodes
putStr "#-\t"
putStrLn $ show nodes'
xs <- readFile paths
case runParser parser (pack xs) of
Left x -> error (show x)
Right y -> if y == graphs
then putStr "Test Passed"
else error $ "Test failed----\n" ++ show y
where
n::(PT.Path, PT.Element)
n =(PT.strToByteString "graphml graph node", PT.strToByteString "id")
paths = "examples/graphs/small.graphml"
parser = GraphML_P.parseGraph
graphs =
P.makePangraph
[P.makeNode [P.makeAtt ("id","n0")],
P.makeNode [P.makeAtt ("id","n1")],
P.makeNode [P.makeAtt ("id","n2")]]
[P.makeEdge
[P.makeAtt ("source","n0"),
P.makeAtt ("target","n2")]]

runParser :: Parser -> BS.ByteString ->Either BS.ByteString P.Pangraph
runParser p file = p (head GraphML_P.template) $ file
15 changes: 8 additions & 7 deletions examples/code/Reading.hs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
module Reading where

import System.IO
import Pangraph
import qualified Pangraph.GraphML.Parser as G
import qualified Pangraph.GraphML.Parser as GraphML_P
import Data.ByteString.Char8 (pack)

main::IO()
main=do
main:: IO ()
main = do
fileName <- getLine
file <- readFile fileName
let graph = G.parseAsString file
putStr $ show graph
let graph = GraphML_P.parseGraph (head GraphML_P.template) $ pack file
case graph of
Left x -> putStrLn (show x)
Right y -> putStrLn (show y)
11 changes: 0 additions & 11 deletions examples/code/VHDL.hs

This file was deleted.

12 changes: 0 additions & 12 deletions examples/code/Writing.hs

This file was deleted.

2 changes: 1 addition & 1 deletion examples/graphs/small.graphml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@
<node id="n0"/>
<node id="n1"/>
<node id="n2"/>
<edge source="n0" target="n2"/>
<edge id="e1" source="n0" target="n2"/>
</graph>
</graphml>
37 changes: 20 additions & 17 deletions pangraph.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,26 @@ copyright: 2016 Joe Scott
category: Web
build-type: Simple
cabal-version: >=1.10
tested-with:
GHC==8.0.2
tested-with: GHC==8.0.2

library
hs-source-dirs: src
exposed-modules: Pangraph
other-modules: Pangraph.XMLTemplate
, Pangraph.Graph
build-depends: base >= 4.7 && < 5
, hexml
, bytestring
default-language: Haskell2010
hs-source-dirs: src
exposed-modules: Pangraph
, Pangraph.GraphML.Parser
other-modules: Pangraph.XMLTemplate
, Pangraph.Graph
build-depends: base >= 4.8 && < 5
, hexml
, bytestring
default-language: Haskell2010
GHC-options: -Wall -fwarn-tabs -O2

-- test-suite pangraph-test
-- type: exitcode-stdio-1.0
-- main-is: Test.hs
-- build-depends: base >= 4.8 && < 5
-- , pangraph
-- default-language: Haskell2010
-- GHC-options: -Wall -fwarn-tabs
test-suite pangraph-test
type: exitcode-stdio-1.0
main-is: Test.hs
build-depends: base >= 4.8 && < 5
, pangraph
, hexml
, bytestring
default-language: Haskell2010
GHC-options: -Wall -fwarn-tabs
20 changes: 10 additions & 10 deletions src/Pangraph.hs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Pangraph,
Node,
Edge,
Att,
attributes,
att,
nodes,
edges,
key,
Expand All @@ -16,21 +16,21 @@ makePangraph

import qualified Data.ByteString as BS

data Pangraph = Pangraph [Node] [Edge] deriving (Show)
data Node = Node [Att] deriving (Show)
data Edge = Edge [Att] deriving (Show)
data Att = Att (BS.ByteString, BS.ByteString) deriving (Show)
data Pangraph = Pangraph [Node] [Edge] deriving (Show, Eq)

This comment has been minimized.

Copy link
@snowleopard

snowleopard Apr 24, 2017

Deriving Show breaks the abstraction: if you change the implementation, the users will be able to see this. I suggest to implement Show manually via makePangraph (see Data.Set as an example).

data Node = Node [Att] deriving (Show, Eq)
data Edge = Edge [Att] deriving (Show, Eq)
data Att = Att (BS.ByteString, BS.ByteString) deriving (Show, Eq)

This comment has been minimized.

Copy link
@snowleopard

snowleopard Apr 24, 2017

I'm not a big fan of contractions, could we have Attribute instead?


type Identifier = BS.ByteString
type Field = BS.ByteString

class HasAtt a where
attributes:: a -> [Att]

This comment has been minimized.

Copy link
@snowleopard

snowleopard Apr 24, 2017

I think attributes was a better name.

This comment has been minimized.

Copy link
@snowleopard

snowleopard Apr 24, 2017

I think conceptually, attributes is a mapping from keys to values. So, perhaps this is how it should be implemented: Map Key Value, instead of essentially [(Key, Value)]. The latter is not very convenient to work with and may also be slower if there are many attributes.

If the number of attributes is small Map comes with substantial overhead compared to a list. If you don't want to commit to a specific implementation, you can simply expose the lookup function, i.e.:

-- Use newtype to hide the implementation
newtype Key   = Key BS.Bytestring
newtype Value = Value BS.Bytestring

newtype Attributes = Attributes { lookup :: Key -> Maybe Value }

This comment has been minimized.

Copy link
@snowleopard

snowleopard Apr 24, 2017

With this approach you can use either [(Key, Value)] or Map Key Value internally and the user won't be able to tell.

att:: a -> [Att]

instance HasAtt Node where
attributes (Node a) = a
att (Node a) = a
instance HasAtt Edge where
attributes (Edge a) = a
att (Edge a) = a

-- Pangraph type getters

Expand All @@ -48,8 +48,8 @@ value (Att a) = snd a

-- Pangraph type contructors

makeAtt:: Identifier -> Field -> Att
makeAtt i f = Att (i, f)
makeAtt:: (Identifier , Field) -> Att

This comment has been minimized.

Copy link
@snowleopard

snowleopard Apr 24, 2017

I think Key and Value are more clear/commonly used names compared to Identifier and Field.

makeAtt a = Att a

makeNode:: [Att] -> Node
makeNode as = Node as
Expand Down
19 changes: 19 additions & 0 deletions src/Pangraph/GraphML/Parser.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
module Pangraph.GraphML.Parser
( parseGraph
, template
, PT.Template
) where

import qualified Pangraph as P
import qualified Text.XML.Hexml as H
import qualified Data.ByteString as BS
import qualified Pangraph.XMLTemplate as PT

parseGraph:: PT.Template -> BS.ByteString -> Either BS.ByteString P.Pangraph
parseGraph t file =
case H.parse file of
Left x -> Left x
Right x -> Right $ PT.parseFromTemplate t x

template:: [PT.Template]
template = PT.graphMLTemplate
49 changes: 19 additions & 30 deletions src/Pangraph/XMLTemplate.hs
Original file line number Diff line number Diff line change
@@ -1,25 +1,17 @@
{-# LANGUAGE OverloadedStrings #-}

module Pangraph.XMLTemplate
-- ( Template
-- -- Name,
-- -- Keys,
-- -- Text,
-- -- root,
-- -- name,
-- -- keys,
-- -- text
-- )
where
( Template,
graphMLTemplate,
parseFromTemplate
) where

import Data.Maybe

import qualified Pangraph as P
import qualified Text.XML.Hexml as H

import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BC

-- import qualified Data.ByteString.Lazy as BS
-- import qualified Data.ByteString.Lazy.Char8 as BC

data Template = XML [Node] [Edge]
data Node = Node [(Path, Element)]
Expand All @@ -29,16 +21,13 @@ type Path = BS.ByteString
type Element = BS.ByteString
type HexmlNode = H.Node

strToByteString:: String -> BS.ByteString
strToByteString ws = BC.pack ws

graphMLTemplate:: Template
graphMLTemplate:: [Template]
graphMLTemplate=
XML
[Node [(strToByteString"graphml graph node",
strToByteString"id")]]
[Edge [(strToByteString"graphml graph edge",
strToByteString"id source target")]]
[XML
[Node [("graphml graph node", "id")]]
[Edge [("graphml graph edge", "source target")]]
]

parseFromTemplate:: Template -> HexmlNode -> P.Pangraph
parseFromTemplate (XML nt et) root=P.makePangraph (concat $ map (nodes root) nt) (concat $ map (edges root) et)
Expand All @@ -50,26 +39,26 @@ edges:: HexmlNode -> Edge -> [P.Edge]
edges n (Edge pe) = concat $ map (makeEntity n (P.makeEdge)) pe

makeEntity:: HexmlNode -> ([P.Att] -> a) -> (Path, Element) -> [a]
makeEntity root f a = do
let (nodes, e) = resolvePath root a
let attList = fmap (\h -> getAttPairs h e) nodes
return $ fmap f attList
makeEntity root f a = fmap f attList
where
(hexmlNodes, e) = resolvePath root a
attList = fmap (\h -> getAttPairs h e) hexmlNodes

resolvePath:: HexmlNode -> (Path, Element) -> ([HexmlNode], Element)
resolvePath h (p, e) =
let delim = BS.head $ strToByteString " "
let delim = BS.head " "
in (resolvePathRecursive h $ BS.split delim p, e)

resolvePathRecursive:: HexmlNode -> [Path] -> [HexmlNode]
resolvePathRecursive h [] = [h]
resolvePathRecursive h bs = concat $
fmap (\h -> resolvePathRecursive h (tail bs)) children
fmap (\c -> resolvePathRecursive c (tail bs)) children
where
children = H.childrenBy h $ head bs

getAttPairs:: HexmlNode -> Element -> [P.Att]
getAttPairs h e =fmap toAtt $ catMaybes $ fmap (\a ->H.attributeBy h a) $ BS.split delim e
where
delim = BS.head $ strToByteString " "
delim = BS.head " "
toAtt:: H.Attribute -> P.Att
toAtt a = P.makeAtt (H.attributeName a) (H.attributeValue a)
toAtt a = P.makeAtt (H.attributeName a, H.attributeValue a)

0 comments on commit 30f9f45

Please sign in to comment.