-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #39 from zouroboros/feature/pajek-support
Pajek parser, writer and test cases for pangraph.
- Loading branch information
Showing
6 changed files
with
409 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
module Pangraph.Pajek.Ast where | ||
{-| | ||
Module : Pangraph.Pajek.Ast | ||
Description : Abstract syntax tree for pajek (.net) files | ||
-} | ||
data Pajek a = Graph (Maybe a) [Vertex a] [Edge] deriving (Show, Eq, Ord) | ||
|
||
type Vertex a = (Integer, Maybe a) | ||
data Edge = Unweighted Bool Integer Integer | ||
| Weighted Bool Integer Integer Double deriving (Show, Eq, Ord) | ||
|
||
name :: Pajek a -> Maybe a | ||
name (Graph n _ _) = n | ||
|
||
vertices :: Pajek a -> [Vertex a] | ||
vertices (Graph _ vs _) = vs | ||
|
||
edges :: Pajek a -> [Edge] | ||
edges (Graph _ _ ed) = ed | ||
|
||
label :: Vertex a -> Maybe a | ||
label (_, l) = l | ||
|
||
vertexId :: Vertex a -> Integer | ||
vertexId (i, _) = i | ||
|
||
endpoints :: Edge -> (Integer, Integer) | ||
endpoints (Unweighted _ n1 n2) = (n1, n2) | ||
endpoints (Weighted _ n1 n2 _) = (n1, n2) | ||
|
||
weight :: Edge -> Maybe Double | ||
weight (Unweighted _ _ _) = Nothing | ||
weight (Weighted _ _ _ w) = Just w | ||
|
||
directed :: Edge -> Bool | ||
directed (Unweighted d _ _) = d | ||
directed (Weighted d _ _ _) = d |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,194 @@ | ||
{-# LANGUAGE OverloadedStrings #-} | ||
{-| | ||
Module : Pangraph.Pajek.Parser | ||
Description : Parse pajek (.net) files | ||
Functions for parseing pajek files. | ||
Roughly follows the specification at <https://gephi.org/users/supported-graph-formats/pajek-net-format/> | ||
All input is assumed to be encoded in UTF8 | ||
-} | ||
module Pangraph.Pajek.Parser (parse, parsePajek, pajekToPangraph) where | ||
|
||
import Data.Attoparsec.Text hiding (parse) | ||
import qualified Data.Text as T | ||
import Data.Text.Encoding (decodeUtf8) | ||
import Control.Applicative ((<|>), (<*)) | ||
import qualified Data.ByteString as B | ||
import qualified Data.ByteString.Char8 as BC | ||
import qualified Data.Text.Encoding as EC | ||
import Data.Char (isSpace) | ||
import Prelude hiding (takeWhile, id, lines, unlines, reverse) | ||
|
||
import qualified Pangraph as P | ||
import Pangraph.Pajek.Ast | ||
|
||
-- | Parses the 'ByteString' into a 'Pangraph'. | ||
-- Please note that this function treats directed and undirected edges in same | ||
-- way. So if the panjek file contains an arc (undirected edge) between node 1 | ||
-- and node 2 the resulting pangraph will only contain an edge between 1 and 2. | ||
parse :: B.ByteString -> Maybe P.Pangraph | ||
parse contents = (parsePajek contents) >>= pajekToPangraph | ||
|
||
-- | Converts a 'Pajek' into a 'Pangraph' | ||
pajekToPangraph :: Pajek T.Text -> Maybe P.Pangraph | ||
pajekToPangraph pajek = let | ||
pVertices = map (vertexToPangraph) (vertices pajek) | ||
pEdges = map (edgeToPangraph) (edges pajek) | ||
in P.makePangraph pVertices pEdges | ||
|
||
vertexToPangraph :: Vertex T.Text -> P.Vertex | ||
vertexToPangraph (vid, label') = | ||
P.makeVertex (BC.pack (show vid)) | ||
(maybe [] (\l -> [("label", (EC.encodeUtf8 l))]) label') | ||
|
||
edgeToPangraph :: Edge -> P.Edge | ||
edgeToPangraph (Unweighted _ start end) = | ||
P.makeEdge (BC.pack (show start), BC.pack (show end)) [] | ||
edgeToPangraph (Weighted _ start end w) = | ||
P.makeEdge (BC.pack (show start), BC.pack (show end)) | ||
[("weight", BC.pack (show w))] | ||
|
||
-- | Parses the 'ByteString' into a 'Pajek' ast. | ||
parsePajek :: B.ByteString -> Maybe (Pajek T.Text) | ||
parsePajek contents = either (const Nothing) Just | ||
(parseText (decodeUtf8 contents)) | ||
|
||
parseText :: T.Text -> Either String (Pajek T.Text) | ||
parseText = parseOnly (pajekParser <* endOfInput) . removeComments | ||
|
||
removeComments :: T.Text -> T.Text | ||
removeComments text = T.unlines (filter (not . T.isPrefixOf "%") (T.lines text)) | ||
|
||
pajekParser :: Parser (Pajek T.Text) | ||
pajekParser = do | ||
gname <- option Nothing (fmap (Just) networkHeader) | ||
_ <- verticesHeader | ||
vs <- vertexList | ||
skipSpace | ||
edges' <- sepBy (choice [arcsAsPairs, arcsList, edgesAsPairs, edgesList]) | ||
skipSpace | ||
_ <- manyTill (skipRest) endOfInput | ||
return (Graph gname vs (concat edges')) | ||
|
||
networkHeader :: Parser (T.Text) | ||
networkHeader = do | ||
star | ||
_ <- asciiCI "network" | ||
skipSpace | ||
gname <- fmap unquoted line | ||
endOfLine | ||
return gname | ||
|
||
verticesHeader :: Parser Integer | ||
verticesHeader = do | ||
star | ||
_ <- asciiCI "vertices" | ||
skipSpace | ||
n <- decimal | ||
skipRest | ||
return n | ||
|
||
vertexList :: Parser [Vertex T.Text] | ||
vertexList = sepBy vertexEntry endOfLine | ||
|
||
vertexEntry :: Parser (Vertex T.Text) | ||
vertexEntry = do | ||
skipSpace | ||
vid <- decimal | ||
skipSpace | ||
label' <- option Nothing $ fmap (Just) maybeQuoted | ||
_ <- line | ||
return (vid, label') | ||
|
||
arcsAsPairs :: Parser [Edge] | ||
arcsAsPairs = do | ||
star | ||
_ <- asciiCI "arcs" | ||
_ <- line | ||
endOfLine | ||
pairs <- sepBy (pair False) endOfLine | ||
return pairs | ||
|
||
edgesAsPairs :: Parser [Edge] | ||
edgesAsPairs = do | ||
star | ||
_ <- asciiCI "edges" | ||
skipRest | ||
pairs <- sepBy (pair True) endOfLine | ||
return pairs | ||
|
||
edgesList :: Parser [Edge] | ||
edgesList = do | ||
star | ||
_ <- asciiCI "edgeslist" | ||
skipRest | ||
adjacencyList True | ||
|
||
arcsList :: Parser [Edge] | ||
arcsList = do | ||
star | ||
_ <- asciiCI "arcslist" | ||
skipRest | ||
adjacencyList False | ||
|
||
adjacencyList :: Bool -> Parser [Edge] | ||
adjacencyList directed' = do | ||
edges' <- sepBy edgeList endOfLine | ||
let flatEdges = concatMap (\(s, tids) -> map (\t -> (s, t)) tids) edges' | ||
return (map (uncurry (Unweighted directed')) flatEdges) | ||
|
||
edgeList :: Parser (Integer, [Integer]) | ||
edgeList = do | ||
_ <- option ' ' (char '-') | ||
sId <- decimal | ||
skipSpace1 | ||
tIds <- sepBy decimal skipSpace1 | ||
_ <- line | ||
return (sId, tIds) | ||
|
||
pair :: Bool -> Parser Edge | ||
pair directed' = do | ||
skipSpace | ||
source <- decimal | ||
_ <- skipSpace1 | ||
target <- decimal | ||
edge <- option (Unweighted directed' source target) | ||
(fmap (Weighted directed' source target) edgeWeight) | ||
_ <- line | ||
return edge | ||
|
||
edgeWeight :: Parser Double | ||
edgeWeight = do | ||
_ <- skipSpace1 | ||
w <- double | ||
return w | ||
|
||
star :: Parser () | ||
star = skip (== '*') | ||
|
||
line :: Parser T.Text | ||
line = takeTill (isEndOfLine) | ||
|
||
maybeQuoted :: Parser T.Text | ||
maybeQuoted = quoted <|> (fmap unquoted (takeTill isSpace)) | ||
|
||
quoted :: Parser T.Text | ||
quoted = do | ||
_ <- satisfy (== '\"') | ||
t <- takeTill (== '\"') | ||
_ <- satisfy (== '\"') | ||
return t | ||
|
||
unquoted :: T.Text -> T.Text | ||
unquoted = T.filter (/= '\"') | ||
|
||
skipSpace1 :: Parser () | ||
skipSpace1 = do | ||
_ <- takeWhile1 isHorizontalSpace | ||
return () | ||
|
||
skipRest :: Parser () | ||
skipRest = do | ||
_ <- line | ||
endOfLine |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
{-# LANGUAGE OverloadedStrings #-} | ||
{-| | ||
Module : Pangraph.Pajek.Writer | ||
Description : Write pajek (.net) files | ||
Functions for writeing pajek files. | ||
Roughly follows the specification <https://gephi.org/users/supported-graph-formats/pajek-net-format/> | ||
-} | ||
module Pangraph.Pajek.Writer (write, pangraphToPajek, writePajek) where | ||
|
||
import Data.Maybe | ||
import Data.ByteString (ByteString) | ||
import qualified Data.ByteString.Char8 as BC | ||
import Data.ByteString.Builder | ||
import qualified Data.ByteString.Lazy as L | ||
import Data.Monoid | ||
import Prelude hiding (concat, (++)) | ||
|
||
import qualified Pangraph as P | ||
import Pangraph.Pajek.Ast | ||
|
||
-- | Writes a 'Pangraph' into a bytestring. | ||
-- | Since there is no way of knowing weather a edge is directed or undirected | ||
-- | in a 'Pangraph' all edges are written as directed edges. | ||
write :: P.Pangraph -> ByteString | ||
write = writePajek . pangraphToPajek | ||
|
||
-- | Converts a 'Pangraph' into a 'Pajek' graph. | ||
pangraphToPajek :: P.Pangraph -> Pajek ByteString | ||
pangraphToPajek graph = let | ||
pEdges = P.edgeList graph | ||
pVertices = P.vertexList graph | ||
in Graph Nothing (map vertexToPajek pVertices) (map edgeToPajek pEdges) | ||
|
||
vertexToPajek :: P.Vertex -> Vertex ByteString | ||
vertexToPajek vertex = let | ||
label' = fmap snd $ listToMaybe $ filter ((== "label") . fst) | ||
(P.vertexAttributes vertex) | ||
vId = read $ BC.unpack $ P.vertexID vertex | ||
in (vId, label') | ||
|
||
edgeToPajek :: P.Edge -> Edge | ||
edgeToPajek edge = let | ||
weight' = fmap (read . BC.unpack . snd) $ listToMaybe $ filter ((== "weight") . fst) | ||
(P.edgeAttributes edge) | ||
(source', target') = P.edgeEndpoints edge | ||
source = read $ BC.unpack source' | ||
target = read $ BC.unpack target' | ||
in maybe (Unweighted False source target) (\w -> Weighted False source target w) weight' | ||
|
||
vertexLine :: Vertex ByteString -> Builder | ||
vertexLine vertex = let | ||
vId = show $ vertexId vertex | ||
label' = maybe (stringUtf8 "") | ||
(\s -> charUtf8 '\"' <> byteString s <> charUtf8 '\"') (label vertex) | ||
in stringUtf8 vId <> charUtf8 ' ' <> label' | ||
|
||
edgeLine :: Edge -> Builder | ||
edgeLine edge = let | ||
(source, target) = endpoints edge | ||
weight' = maybe (stringUtf8 "") | ||
(\w -> charUtf8 ' ' <> stringUtf8 (show w)) (weight edge) | ||
in stringUtf8 (show source) <> charUtf8 ' ' <> stringUtf8 (show target) | ||
<> weight' | ||
|
||
pajekBuilder :: Pajek ByteString -> Builder | ||
pajekBuilder pajek = do | ||
let vertexCount = length (vertices pajek) | ||
let edgeCount = length (edges pajek) | ||
stringUtf8 "*Vertices " <> stringUtf8 (show vertexCount) <> charUtf8 '\n' | ||
<> mconcat [vertexLine v <> charUtf8 '\n' | v <- vertices pajek] | ||
<> "*Arcs " <> stringUtf8 (show edgeCount) <> charUtf8 '\n' | ||
<> mconcat [edgeLine e <> charUtf8 '\n' | e <- edges pajek] | ||
|
||
-- | Writes a 'Pajek' into a 'ByteString' | ||
writePajek :: Pajek ByteString -> ByteString | ||
writePajek pajek = L.toStrict (toLazyByteString (pajekBuilder pajek)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.