Skip to content

Commit

Permalink
Merge pull request #39 from zouroboros/feature/pajek-support
Browse files Browse the repository at this point in the history
Pajek parser, writer and test cases for pangraph.
  • Loading branch information
thisiswhereitype authored Feb 7, 2019
2 parents 029bb0c + 4de4cd8 commit a6c4701
Show file tree
Hide file tree
Showing 6 changed files with 409 additions and 9 deletions.
19 changes: 11 additions & 8 deletions pangraph.cabal
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
name: pangraph
version: 0.2.1
synopsis: A set of parsers for graph languages and conversions to
synopsis: A set of parsers for graph languages and conversions to
graph libaries.
description: A package allowing parsing of graph files into graph
library datatypes. With aim the cope with large networks
and provide translations between graph libraries. Like a
pandoc but for graphs. This is my first library so any
feedback and help is appreicated. For example use please
description: A package allowing parsing of graph files into graph
library datatypes. With aim the cope with large networks
and provide translations between graph libraries. Like a
pandoc but for graphs. This is my first library so any
feedback and help is appreicated. For example use please
see the homepage.
homepage: https://github.com/tuura/pangraph#readme
license: BSD3
Expand Down Expand Up @@ -38,6 +38,9 @@ library
, Pangraph.Gml.Ast
, Pangraph.Gml.Parser
, Pangraph.Gml.Writer
, Pangraph.Pajek.Ast
, Pangraph.Pajek.Parser
, Pangraph.Pajek.Writer
build-depends: base >= 4.8 && < 5
, algebraic-graphs == 0.2.*
, attoparsec == 0.13.*
Expand All @@ -60,11 +63,11 @@ test-suite pangraph-test
, Show
, TestPangraph
, Gml
, Pajek
build-depends: base >= 4.8 && < 5
, containers
, bytestring
, HUnit
, pangraph
, pangraph
default-language: Haskell2010
GHC-options: -Wall -fwarn-tabs -fbreak-on-exception

37 changes: 37 additions & 0 deletions src/Pangraph/Pajek/Ast.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
module Pangraph.Pajek.Ast where
{-|
Module : Pangraph.Pajek.Ast
Description : Abstract syntax tree for pajek (.net) files
-}
data Pajek a = Graph (Maybe a) [Vertex a] [Edge] deriving (Show, Eq, Ord)

type Vertex a = (Integer, Maybe a)
data Edge = Unweighted Bool Integer Integer
| Weighted Bool Integer Integer Double deriving (Show, Eq, Ord)

name :: Pajek a -> Maybe a
name (Graph n _ _) = n

vertices :: Pajek a -> [Vertex a]
vertices (Graph _ vs _) = vs

edges :: Pajek a -> [Edge]
edges (Graph _ _ ed) = ed

label :: Vertex a -> Maybe a
label (_, l) = l

vertexId :: Vertex a -> Integer
vertexId (i, _) = i

endpoints :: Edge -> (Integer, Integer)
endpoints (Unweighted _ n1 n2) = (n1, n2)
endpoints (Weighted _ n1 n2 _) = (n1, n2)

weight :: Edge -> Maybe Double
weight (Unweighted _ _ _) = Nothing
weight (Weighted _ _ _ w) = Just w

directed :: Edge -> Bool
directed (Unweighted d _ _) = d
directed (Weighted d _ _ _) = d
194 changes: 194 additions & 0 deletions src/Pangraph/Pajek/Parser.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
{-# LANGUAGE OverloadedStrings #-}
{-|
Module : Pangraph.Pajek.Parser
Description : Parse pajek (.net) files
Functions for parseing pajek files.
Roughly follows the specification at <https://gephi.org/users/supported-graph-formats/pajek-net-format/>
All input is assumed to be encoded in UTF8
-}
module Pangraph.Pajek.Parser (parse, parsePajek, pajekToPangraph) where

import Data.Attoparsec.Text hiding (parse)
import qualified Data.Text as T
import Data.Text.Encoding (decodeUtf8)
import Control.Applicative ((<|>), (<*))
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC
import qualified Data.Text.Encoding as EC
import Data.Char (isSpace)
import Prelude hiding (takeWhile, id, lines, unlines, reverse)

import qualified Pangraph as P
import Pangraph.Pajek.Ast

-- | Parses the 'ByteString' into a 'Pangraph'.
-- Please note that this function treats directed and undirected edges in same
-- way. So if the panjek file contains an arc (undirected edge) between node 1
-- and node 2 the resulting pangraph will only contain an edge between 1 and 2.
parse :: B.ByteString -> Maybe P.Pangraph
parse contents = (parsePajek contents) >>= pajekToPangraph

-- | Converts a 'Pajek' into a 'Pangraph'
pajekToPangraph :: Pajek T.Text -> Maybe P.Pangraph
pajekToPangraph pajek = let
pVertices = map (vertexToPangraph) (vertices pajek)
pEdges = map (edgeToPangraph) (edges pajek)
in P.makePangraph pVertices pEdges

vertexToPangraph :: Vertex T.Text -> P.Vertex
vertexToPangraph (vid, label') =
P.makeVertex (BC.pack (show vid))
(maybe [] (\l -> [("label", (EC.encodeUtf8 l))]) label')

edgeToPangraph :: Edge -> P.Edge
edgeToPangraph (Unweighted _ start end) =
P.makeEdge (BC.pack (show start), BC.pack (show end)) []
edgeToPangraph (Weighted _ start end w) =
P.makeEdge (BC.pack (show start), BC.pack (show end))
[("weight", BC.pack (show w))]

-- | Parses the 'ByteString' into a 'Pajek' ast.
parsePajek :: B.ByteString -> Maybe (Pajek T.Text)
parsePajek contents = either (const Nothing) Just
(parseText (decodeUtf8 contents))

parseText :: T.Text -> Either String (Pajek T.Text)
parseText = parseOnly (pajekParser <* endOfInput) . removeComments

removeComments :: T.Text -> T.Text
removeComments text = T.unlines (filter (not . T.isPrefixOf "%") (T.lines text))

pajekParser :: Parser (Pajek T.Text)
pajekParser = do
gname <- option Nothing (fmap (Just) networkHeader)
_ <- verticesHeader
vs <- vertexList
skipSpace
edges' <- sepBy (choice [arcsAsPairs, arcsList, edgesAsPairs, edgesList])
skipSpace
_ <- manyTill (skipRest) endOfInput
return (Graph gname vs (concat edges'))

networkHeader :: Parser (T.Text)
networkHeader = do
star
_ <- asciiCI "network"
skipSpace
gname <- fmap unquoted line
endOfLine
return gname

verticesHeader :: Parser Integer
verticesHeader = do
star
_ <- asciiCI "vertices"
skipSpace
n <- decimal
skipRest
return n

vertexList :: Parser [Vertex T.Text]
vertexList = sepBy vertexEntry endOfLine

vertexEntry :: Parser (Vertex T.Text)
vertexEntry = do
skipSpace
vid <- decimal
skipSpace
label' <- option Nothing $ fmap (Just) maybeQuoted
_ <- line
return (vid, label')

arcsAsPairs :: Parser [Edge]
arcsAsPairs = do
star
_ <- asciiCI "arcs"
_ <- line
endOfLine
pairs <- sepBy (pair False) endOfLine
return pairs

edgesAsPairs :: Parser [Edge]
edgesAsPairs = do
star
_ <- asciiCI "edges"
skipRest
pairs <- sepBy (pair True) endOfLine
return pairs

edgesList :: Parser [Edge]
edgesList = do
star
_ <- asciiCI "edgeslist"
skipRest
adjacencyList True

arcsList :: Parser [Edge]
arcsList = do
star
_ <- asciiCI "arcslist"
skipRest
adjacencyList False

adjacencyList :: Bool -> Parser [Edge]
adjacencyList directed' = do
edges' <- sepBy edgeList endOfLine
let flatEdges = concatMap (\(s, tids) -> map (\t -> (s, t)) tids) edges'
return (map (uncurry (Unweighted directed')) flatEdges)

edgeList :: Parser (Integer, [Integer])
edgeList = do
_ <- option ' ' (char '-')
sId <- decimal
skipSpace1
tIds <- sepBy decimal skipSpace1
_ <- line
return (sId, tIds)

pair :: Bool -> Parser Edge
pair directed' = do
skipSpace
source <- decimal
_ <- skipSpace1
target <- decimal
edge <- option (Unweighted directed' source target)
(fmap (Weighted directed' source target) edgeWeight)
_ <- line
return edge

edgeWeight :: Parser Double
edgeWeight = do
_ <- skipSpace1
w <- double
return w

star :: Parser ()
star = skip (== '*')

line :: Parser T.Text
line = takeTill (isEndOfLine)

maybeQuoted :: Parser T.Text
maybeQuoted = quoted <|> (fmap unquoted (takeTill isSpace))

quoted :: Parser T.Text
quoted = do
_ <- satisfy (== '\"')
t <- takeTill (== '\"')
_ <- satisfy (== '\"')
return t

unquoted :: T.Text -> T.Text
unquoted = T.filter (/= '\"')

skipSpace1 :: Parser ()
skipSpace1 = do
_ <- takeWhile1 isHorizontalSpace
return ()

skipRest :: Parser ()
skipRest = do
_ <- line
endOfLine
77 changes: 77 additions & 0 deletions src/Pangraph/Pajek/Writer.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
{-# LANGUAGE OverloadedStrings #-}
{-|
Module : Pangraph.Pajek.Writer
Description : Write pajek (.net) files
Functions for writeing pajek files.
Roughly follows the specification <https://gephi.org/users/supported-graph-formats/pajek-net-format/>
-}
module Pangraph.Pajek.Writer (write, pangraphToPajek, writePajek) where

import Data.Maybe
import Data.ByteString (ByteString)
import qualified Data.ByteString.Char8 as BC
import Data.ByteString.Builder
import qualified Data.ByteString.Lazy as L
import Data.Monoid
import Prelude hiding (concat, (++))

import qualified Pangraph as P
import Pangraph.Pajek.Ast

-- | Writes a 'Pangraph' into a bytestring.
-- | Since there is no way of knowing weather a edge is directed or undirected
-- | in a 'Pangraph' all edges are written as directed edges.
write :: P.Pangraph -> ByteString
write = writePajek . pangraphToPajek

-- | Converts a 'Pangraph' into a 'Pajek' graph.
pangraphToPajek :: P.Pangraph -> Pajek ByteString
pangraphToPajek graph = let
pEdges = P.edgeList graph
pVertices = P.vertexList graph
in Graph Nothing (map vertexToPajek pVertices) (map edgeToPajek pEdges)

vertexToPajek :: P.Vertex -> Vertex ByteString
vertexToPajek vertex = let
label' = fmap snd $ listToMaybe $ filter ((== "label") . fst)
(P.vertexAttributes vertex)
vId = read $ BC.unpack $ P.vertexID vertex
in (vId, label')

edgeToPajek :: P.Edge -> Edge
edgeToPajek edge = let
weight' = fmap (read . BC.unpack . snd) $ listToMaybe $ filter ((== "weight") . fst)
(P.edgeAttributes edge)
(source', target') = P.edgeEndpoints edge
source = read $ BC.unpack source'
target = read $ BC.unpack target'
in maybe (Unweighted False source target) (\w -> Weighted False source target w) weight'

vertexLine :: Vertex ByteString -> Builder
vertexLine vertex = let
vId = show $ vertexId vertex
label' = maybe (stringUtf8 "")
(\s -> charUtf8 '\"' <> byteString s <> charUtf8 '\"') (label vertex)
in stringUtf8 vId <> charUtf8 ' ' <> label'

edgeLine :: Edge -> Builder
edgeLine edge = let
(source, target) = endpoints edge
weight' = maybe (stringUtf8 "")
(\w -> charUtf8 ' ' <> stringUtf8 (show w)) (weight edge)
in stringUtf8 (show source) <> charUtf8 ' ' <> stringUtf8 (show target)
<> weight'

pajekBuilder :: Pajek ByteString -> Builder
pajekBuilder pajek = do
let vertexCount = length (vertices pajek)
let edgeCount = length (edges pajek)
stringUtf8 "*Vertices " <> stringUtf8 (show vertexCount) <> charUtf8 '\n'
<> mconcat [vertexLine v <> charUtf8 '\n' | v <- vertices pajek]
<> "*Arcs " <> stringUtf8 (show edgeCount) <> charUtf8 '\n'
<> mconcat [edgeLine e <> charUtf8 '\n' | e <- edges pajek]

-- | Writes a 'Pajek' into a 'ByteString'
writePajek :: Pajek ByteString -> ByteString
writePajek pajek = L.toStrict (toLazyByteString (pajekBuilder pajek))
3 changes: 2 additions & 1 deletion test/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ import Containers
import TestPangraph
import FGL
import Gml
import Pajek

import Test.HUnit

main :: IO Counts
main = (runTestTT . TestList . concat) [containersTests, fglTests, graphmlTests, showTests, pangraphTests, gmlTests]
main = (runTestTT . TestList . concat) [containersTests, fglTests, graphmlTests, showTests, pangraphTests, gmlTests, pajekTests]
Loading

0 comments on commit a6c4701

Please sign in to comment.