diff --git a/.cursor/rules/specify-rules.mdc b/.cursor/rules/specify-rules.mdc index 6bc365e..7949cb4 100644 --- a/.cursor/rules/specify-rules.mdc +++ b/.cursor/rules/specify-rules.mdc @@ -38,6 +38,8 @@ Auto-generated from all feature plans. Last updated: 2025-11-04 - In-memory only (Map keyed by Id v); gram files for round-trip via libs/gram parse/serialize (033-pattern-graph) - Haskell (GHC 9.10.3) + `pattern-hs` ecosystem (`Pattern.Core`, `Subject.Core`) (034-graph-classifier) - In-memory `Map` via `PatternGraph` (034-graph-classifier) +- Haskell (GHC 9.10.3) + `containers ^>=0.7` (Map, Set), `base >=4.17.0.0`, `subject` (GraphValue/Symbol), `pattern` (Pattern.Core, Pattern.Graph, Pattern.PatternGraph, Pattern.Graph.GraphClassifier) (035-graph-query) +- N/A — pure in-memory data structures (035-graph-query) - (001-pattern-data-structure) @@ -57,9 +59,9 @@ tests/ : Follow standard conventions ## Recent Changes +- 035-graph-query: Added Haskell (GHC 9.10.3) + `containers ^>=0.7` (Map, Set), `base >=4.17.0.0`, `subject` (GraphValue/Symbol), `pattern` (Pattern.Core, Pattern.Graph, Pattern.PatternGraph, Pattern.Graph.GraphClassifier) - 034-graph-classifier: Added Haskell (GHC 9.10.3) + `pattern-hs` ecosystem (`Pattern.Core`, `Subject.Core`) - 033-pattern-graph: Added Haskell (GHC 9.10.3 per CLAUDE.md; base >=4.17.0.0 from pattern.cabal) + pattern (Pattern.Core, Pattern.Reconcile), subject, containers, hashable, unordered-containers (from libs/pattern) -- 032-gram-annotation-syntax: Added Haskell (GHC 9.10.3) + megaparsec (parsing), hspec (testing) diff --git a/libs/pattern/pattern.cabal b/libs/pattern/pattern.cabal index f243514..1e47460 100644 --- a/libs/pattern/pattern.cabal +++ b/libs/pattern/pattern.cabal @@ -25,6 +25,8 @@ library Pattern.Core Pattern.Graph Pattern.Graph.GraphClassifier + Pattern.Graph.GraphQuery + Pattern.Graph.Algorithms Pattern.PatternGraph Pattern.Reconcile @@ -49,6 +51,8 @@ test-suite pattern-test Spec.Pattern.CoreSpec Spec.Pattern.GraphSpec Spec.Pattern.Graph.GraphClassifierSpec + Spec.Pattern.Graph.GraphQuerySpec + Spec.Pattern.Graph.AlgorithmsSpec Spec.Pattern.PatternGraphProperties Spec.Pattern.PatternGraphSpec Spec.Pattern.Properties diff --git a/libs/pattern/src/Pattern.hs b/libs/pattern/src/Pattern.hs index 547039f..8186594 100644 --- a/libs/pattern/src/Pattern.hs +++ b/libs/pattern/src/Pattern.hs @@ -11,7 +11,12 @@ -- -- * @Pattern.Core@ - Core Pattern data type, construction functions, query functions, -- predicate functions, and typeclass instances (Functor, Applicative, Comonad, etc.) --- * @Pattern.Graph@ - Graph operations and transformations +-- * @Pattern.Graph@ - Low-level graph structural operations (GraphLens, nodes, relationships, incidentRels, etc.) +-- * @Pattern.Graph.GraphQuery@ - Portable graph query interface ('GraphQuery', 'TraversalWeight', +-- 'fromGraphLens', combinators) +-- * @Pattern.Graph.Algorithms@ - Graph algorithms operating on 'GraphQuery' (bfs, dfs, +-- shortestPath, connectedComponents, etc.) +-- * @Pattern.PatternGraph@ - Typed graph container with O(log n) lookups; 'fromPatternGraph' -- * @Pattern.Reconcile@ - Pattern reconciliation for normalizing duplicate identities -- -- == Usage @@ -23,6 +28,12 @@ -- >>> value p -- "test" -- +-- For graph algorithms, import the algorithm modules directly: +-- +-- > import Pattern.PatternGraph (fromPatternGraph) +-- > import Pattern.Graph.GraphQuery (directed) +-- > import qualified Pattern.Graph.Algorithms as Alg +-- -- All public functions, types, and typeclass instances from Pattern.Core are -- available through this module. See individual module documentation for -- detailed information about specific functionality. @@ -35,6 +46,7 @@ -- query functions, predicate functions, helper functions, and all typeclass instances) -- * All public exports from @Pattern.Graph@ (graph operations) -- * All public exports from @Pattern.Reconcile@ (reconciliation operations) +-- * All public exports from @Pattern.Graph.GraphQuery@ (portable graph query interface) -- -- Internal implementation details and helper functions are not exported through -- this module, ensuring a clean public API. @@ -43,10 +55,13 @@ module Pattern module Pattern.Core -- * Graph Operations , module Pattern.Graph + -- * Portable Graph Query Interface + , module Pattern.Graph.GraphQuery -- * Reconciliation Operations , module Pattern.Reconcile ) where import Pattern.Core import Pattern.Graph +import Pattern.Graph.GraphQuery import Pattern.Reconcile diff --git a/libs/pattern/src/Pattern/Graph.hs b/libs/pattern/src/Pattern/Graph.hs index 8367a48..8ef5331 100644 --- a/libs/pattern/src/Pattern/Graph.hs +++ b/libs/pattern/src/Pattern/Graph.hs @@ -73,15 +73,10 @@ module Pattern.Graph , neighbors , incidentRels , degree - -- * Graph Analysis Operations - , connectedComponents -- Requires Ord v - , bfs -- Requires Ord v - , findPath -- Requires Ord v ) where import Pattern.Core (Pattern(..)) import Data.Maybe (mapMaybe) -import qualified Data.Set as Set import Pattern.Graph.GraphClassifier (GraphValue(..)) @@ -400,90 +395,4 @@ incidentRels lens node = degree :: GraphValue v => GraphLens v -> Pattern v -> Int degree lens node = length (incidentRels lens node) --- * Graph Analysis Operations - --- | Find all connected components in the graph. --- --- A connected component is a set of nodes that are reachable from --- each other via relationships. Returns a list of lists, where each --- inner list represents a component. --- --- == Time Complexity --- O(n + r) where n is number of nodes and r is number of relationships --- --- == Example --- --- >>> let lens = GraphLens pattern isAtomic --- >>> connectedComponents lens --- [[pattern "A", pattern "B", pattern "C"], [pattern "D", pattern "E"]] -connectedComponents :: GraphValue v => GraphLens v -> [[Pattern v]] -connectedComponents lens = findComponents lens (nodes lens) Set.empty [] - -findComponents :: GraphValue v => GraphLens v -> [Pattern v] -> Set.Set (Id v) -> [[Pattern v]] -> [[Pattern v]] -findComponents _ [] _ acc = reverse acc -findComponents lens (n:ns) visited acc = - if Set.member (identify (value n)) visited - then findComponents lens ns visited acc - else - let component = bfs lens n - newVisited = Set.union visited (Set.fromList (map (identify . value) component)) - newAcc = component : acc - in findComponents lens ns newVisited newAcc - --- | Perform breadth-first search from a starting node. --- --- Returns all nodes reachable from the starting node via relationships. --- --- == Time Complexity --- O(n + r) where n is number of nodes and r is number of relationships --- --- == Example --- --- >>> let lens = GraphLens pattern isAtomic --- >>> bfs lens (point "Alice") --- [point "Alice", point "Bob", pattern "Charlie"] -bfs :: GraphValue v => GraphLens v -> Pattern v -> [Pattern v] -bfs lens start = bfsHelper lens Set.empty [start] [] - -bfsHelper :: GraphValue v => GraphLens v -> Set.Set (Id v) -> [Pattern v] -> [Pattern v] -> [Pattern v] -bfsHelper _ _ [] acc = reverse acc -bfsHelper lens visited (n:queue) acc - | Set.member (identify (value n)) visited = bfsHelper lens visited queue acc - | otherwise = - let newVisited = Set.insert (identify (value n)) visited - newAcc = n : acc - nodeNeighbors = Pattern.Graph.neighbors lens n - newQueue = queue ++ filter (\nb -> not (Set.member (identify (value nb)) newVisited)) nodeNeighbors - in bfsHelper lens newVisited newQueue newAcc - --- | Find a path between two nodes if one exists. --- --- Returns Just [nodes] if a path exists, Nothing otherwise. --- The path is a sequence of nodes connecting start to end. --- --- == Time Complexity --- O(n + r) where n is number of nodes and r is number of relationships --- --- == Example --- --- >>> let lens = GraphLens pattern isAtomic --- >>> findPath lens (point "Alice") (pattern "Charlie") --- Just [point "Alice", point "Bob", pattern "Charlie"] -findPath :: GraphValue v => GraphLens v -> Pattern v -> Pattern v -> Maybe [Pattern v] -findPath lens start end - | identify (value start) == identify (value end) = Just [start] - | otherwise = findPathHelper lens Set.empty [(start, [start])] end - -findPathHelper :: GraphValue v => GraphLens v -> Set.Set (Id v) -> [(Pattern v, [Pattern v])] -> Pattern v -> Maybe [Pattern v] -findPathHelper _ _ [] _ = Nothing -findPathHelper lens visited ((n, path):queue) targetNode - | identify (value n) == identify (value targetNode) = Just (reverse path) - | Set.member (identify (value n)) visited = findPathHelper lens visited queue targetNode - | otherwise = - let newVisited = Set.insert (identify (value n)) visited - nodeNeighbors = Pattern.Graph.neighbors lens n - newPaths = map (\neighbor -> (neighbor, neighbor:path)) nodeNeighbors - unvisitedPaths = filter (\(neighbor, _) -> not (Set.member (identify (value neighbor)) newVisited)) newPaths - newQueue = queue ++ unvisitedPaths - in findPathHelper lens newVisited newQueue targetNode diff --git a/libs/pattern/src/Pattern/Graph/Algorithms.hs b/libs/pattern/src/Pattern/Graph/Algorithms.hs new file mode 100644 index 0000000..d6ac7b8 --- /dev/null +++ b/libs/pattern/src/Pattern/Graph/Algorithms.hs @@ -0,0 +1,469 @@ +-- | Graph algorithms operating on 'GraphQuery v'. +-- +-- All traversal algorithms accept a 'TraversalWeight v' at the call site, +-- enabling the same 'GraphQuery' to be used with directed, undirected, or +-- custom-weighted traversal without any conversion. +-- +-- == Categorical Interpretation +-- +-- Algorithms are natural transformations over the 'GraphQuery' coalgebra. +-- They unfold the coalgebra according to a traversal policy ('TraversalWeight') +-- and accumulate results. +-- +-- == Complexity Note +-- +-- 'betweennessCentrality' uses the Brandes algorithm: O(n·(n+r)·log n). +-- It calls 'queryIncidentRels' in the inner loop. For large graphs, wrap +-- the 'GraphQuery' with 'memoizeIncidentRels' before calling this function. +-- TODO: bulk adjacency — see open question §1 in the feature proposal. +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE FlexibleContexts #-} +module Pattern.Graph.Algorithms + ( -- * Traversal + bfs + , dfs + -- * Paths + , shortestPath + , hasPath + , allPaths + -- * Boolean queries + , isNeighbor + , isConnected + -- * Structural + , connectedComponents + , topologicalSort + , hasCycle + -- * Spanning + , minimumSpanningTree + -- * Centrality + , degreeCentrality + , betweennessCentrality + -- * Context query helpers + , queryAnnotationsOf + , queryWalksContaining + , queryCoMembers + ) where + +import Data.List (foldl', sortBy) +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as Map +import Data.Maybe (mapMaybe, fromMaybe) +import Data.Ord (comparing) +import qualified Data.Set as Set +import qualified Data.Sequence as Seq + +import Pattern.Core (Pattern(..)) +import Pattern.Graph.GraphClassifier (GraphValue(..), GraphClass(..), GraphClassifier(..)) +import Pattern.Graph.GraphQuery (GraphQuery(..), TraversalWeight, TraversalDirection(..)) + +-- ============================================================================ +-- Internal helper: reachable neighbors given a traversal weight +-- ============================================================================ + +-- | Given a 'GraphQuery', a 'TraversalWeight', and a node, return all +-- neighbor nodes reachable via relationships with finite traversal cost. +-- +-- Inlined by GHC at call sites to eliminate the function-call overhead in +-- the inner loop of every traversal algorithm. +{-# INLINE reachableNeighbors #-} +reachableNeighbors :: GraphValue v => GraphQuery v -> TraversalWeight v -> Pattern v -> [Pattern v] +reachableNeighbors gq weight node = + mapMaybe neighborOf (queryIncidentRels gq node) + where + neighborOf rel = + let fwdCost = weight rel Forward + bwdCost = weight rel Backward + src = querySource gq rel + tgt = queryTarget gq rel + nodeId = identify (value node) + in case (src, tgt) of + (Just s, Just t) + | identify (value s) == nodeId && isFinite fwdCost -> Just t + | identify (value t) == nodeId && isFinite bwdCost -> Just s + _ -> Nothing + isFinite x = not (isInfinite x) && not (isNaN x) + +-- ============================================================================ +-- Traversal +-- ============================================================================ + +-- | Breadth-first search from a starting node. +-- +-- Returns all nodes reachable from @start@ in BFS order, including @start@. +-- Traversal direction and cost are governed by @weight@. +bfs :: (GraphValue v, Ord (Id v)) => GraphQuery v -> TraversalWeight v -> Pattern v -> [Pattern v] +bfs gq weight start = go (Seq.singleton start) (Set.singleton (identify (value start))) [] + where + go queue visited acc = + case Seq.viewl queue of + Seq.EmptyL -> reverse acc + n Seq.:< rest -> + let nbrs = filter (\nb -> not (Set.member (identify (value nb)) visited)) + (reachableNeighbors gq weight n) + newVisited = foldl' (\s nb -> Set.insert (identify (value nb)) s) visited nbrs + newQueue = foldl' (Seq.|>) rest nbrs + in go newQueue newVisited (n : acc) + +-- | Depth-first search from a starting node. +-- +-- Returns all nodes reachable from @start@ in DFS order, including @start@. +-- Traversal direction and cost are governed by @weight@. +dfs :: (GraphValue v, Ord (Id v)) => GraphQuery v -> TraversalWeight v -> Pattern v -> [Pattern v] +dfs gq weight start = go [start] Set.empty [] + where + go [] _ acc = reverse acc + go (n:stack) visited acc + | Set.member (identify (value n)) visited = go stack visited acc + | otherwise = + let newVisited = Set.insert (identify (value n)) visited + nbrs = filter (\nb -> not (Set.member (identify (value nb)) newVisited)) + (reachableNeighbors gq weight n) + in go (nbrs ++ stack) newVisited (n : acc) + +-- ============================================================================ +-- Paths +-- ============================================================================ + +-- | Shortest path between two nodes using Dijkstra's algorithm. +-- +-- Returns 'Just' a list of nodes (including endpoints) if a path exists, +-- 'Nothing' otherwise. Edge costs are determined by @weight@. +shortestPath :: (GraphValue v, Ord (Id v)) => GraphQuery v -> TraversalWeight v -> Pattern v -> Pattern v -> Maybe [Pattern v] +shortestPath gq weight start end + | identify (value start) == identify (value end) = Just [start] + | otherwise = dijkstra + -- Priority queue keyed by (cost, nodeId) so deleteFindMin gives lowest-cost entry + (Map.singleton (0.0, identify (value start)) [start]) + -- Best known cost per node + (Map.singleton (identify (value start)) 0.0) + Set.empty + where + endId = identify (value end) + + dijkstra pq bestCost settled + | Map.null pq = Nothing + | otherwise = + let (((cost, nId), path), rest) = Map.deleteFindMin pq + in case path of + [] -> dijkstra rest bestCost settled + (n:_) + | Set.member nId settled -> dijkstra rest bestCost settled + | nId == endId -> Just (reverse path) + | otherwise -> + let newSettled = Set.insert nId settled + rels = queryIncidentRels gq n + updates = mapMaybe (edgeUpdate cost path n newSettled) rels + (pq', bestCost') = foldl' insertIfBetter (rest, bestCost) updates + in dijkstra pq' bestCost' newSettled + + edgeUpdate cost path node settled rel = + let fwdCost = weight rel Forward + bwdCost = weight rel Backward + src = querySource gq rel + tgt = queryTarget gq rel + nodeId = identify (value node) + in case (src, tgt) of + (Just s, Just t) + | identify (value s) == nodeId && isFinite fwdCost && not (Set.member (identify (value t)) settled) -> + Just (identify (value t), cost + fwdCost, t : path) + | identify (value t) == nodeId && isFinite bwdCost && not (Set.member (identify (value s)) settled) -> + Just (identify (value s), cost + bwdCost, s : path) + _ -> Nothing + + insertIfBetter (pq, bestCost) (nId, newCost, newPath) = + case Map.lookup nId bestCost of + Just oldCost | oldCost <= newCost -> (pq, bestCost) + _ -> ( Map.insert (newCost, nId) newPath pq + , Map.insert nId newCost bestCost + ) + + isFinite x = not (isInfinite x) && not (isNaN x) + +-- | Return 'True' if a path exists between @src@ and @tgt@. +hasPath :: (GraphValue v, Ord (Id v)) => GraphQuery v -> TraversalWeight v -> Pattern v -> Pattern v -> Bool +hasPath gq weight src tgt = case shortestPath gq weight src tgt of + Just _ -> True + Nothing -> False + +-- | All simple paths between two nodes (DFS-based, no repeated nodes). +-- +-- Returns @[]@ if no path exists or the graph is empty. +-- Warning: exponential in the worst case for dense graphs. +allPaths :: (GraphValue v, Ord (Id v)) => GraphQuery v -> TraversalWeight v -> Pattern v -> Pattern v -> [[Pattern v]] +allPaths gq weight start end = go [start] (Set.singleton (identify (value start))) + where + endId = identify (value end) + go path visited = case path of + [] -> [] + (n:_) + | identify (value n) == endId -> [reverse path] + | otherwise -> + let nbrs = filter (\nb -> not (Set.member (identify (value nb)) visited)) + (reachableNeighbors gq weight n) + in concatMap (\nb -> go (nb : path) (Set.insert (identify (value nb)) visited)) nbrs + +-- ============================================================================ +-- Boolean queries +-- ============================================================================ + +-- | Return 'True' if @a@ and @b@ are directly connected by a relationship +-- with finite traversal cost. +isNeighbor :: (GraphValue v, Eq (Id v)) => GraphQuery v -> TraversalWeight v -> Pattern v -> Pattern v -> Bool +isNeighbor gq weight a b = + let bId = identify (value b) + in any (\nb -> identify (value nb) == bId) (reachableNeighbors gq weight a) + +-- | Return 'True' if the graph is connected under the given traversal weight. +-- +-- An empty graph is considered connected (vacuously true). +isConnected :: (GraphValue v, Ord (Id v)) => GraphQuery v -> TraversalWeight v -> Bool +isConnected gq weight = + case queryNodes gq of + [] -> True + (n:_) -> length (bfs gq weight n) == length (queryNodes gq) + +-- ============================================================================ +-- Structural +-- ============================================================================ + +-- | Find all connected components under the given traversal weight. +-- +-- Returns a list of node lists, each representing one component. +connectedComponents :: (GraphValue v, Ord (Id v)) => GraphQuery v -> TraversalWeight v -> [[Pattern v]] +connectedComponents gq weight = go (queryNodes gq) Set.empty [] + where + go [] _ acc = reverse acc + go (n:ns) visited acc + | Set.member (identify (value n)) visited = go ns visited acc + | otherwise = + let component = bfs gq weight n + newVisited = foldl' (\s m -> Set.insert (identify (value m)) s) visited component + in go ns newVisited (component : acc) + +-- | Topological sort using DFS (Kahn-style post-order). +-- +-- Returns 'Nothing' if the graph contains a cycle. +-- Operates on the directed structure implied by relationship endpoint order +-- (source → target), ignoring 'TraversalWeight'. +topologicalSort :: (GraphValue v, Ord (Id v)) => GraphQuery v -> Maybe [Pattern v] +topologicalSort gq = go (queryNodes gq) Set.empty Set.empty [] + where + go [] _ _ acc = Just acc + go (n:ns) visited inStack acc + | Set.member (identify (value n)) visited = go ns visited inStack acc + | otherwise = case visit n visited inStack acc of + Nothing -> Nothing + Just (visited', acc') -> go ns visited' inStack acc' + + visit n visited inStack acc + | Set.member nId inStack = Nothing + | Set.member nId visited = Just (visited, acc) + | otherwise = + let newInStack = Set.insert nId inStack + -- Only follow edges where n is the source (outgoing edges) + outgoing = filter (\r -> case querySource gq r of + Just s -> identify (value s) == nId + Nothing -> False) + (queryIncidentRels gq n) + nbrs = mapMaybe (queryTarget gq) outgoing + in case foldl' (visitStep newInStack) (Just (visited, acc)) nbrs of + Nothing -> Nothing + Just (visited', acc') -> + Just (Set.insert nId visited', n : acc') + where nId = identify (value n) + + visitStep _ Nothing _ = Nothing + visitStep inStack (Just (visited, acc)) nb = visit nb visited inStack acc + +-- | Return 'True' if the graph contains a directed cycle. +hasCycle :: (GraphValue v, Ord (Id v)) => GraphQuery v -> Bool +hasCycle gq = case topologicalSort gq of + Nothing -> True + Just _ -> False + +-- ============================================================================ +-- Spanning +-- ============================================================================ + +-- | Minimum spanning tree using Kruskal's algorithm. +-- +-- Returns the list of nodes in the MST (not edges). For a forest (disconnected +-- graph), returns nodes reachable in the minimum spanning forest. +-- Edge weight is the average of forward and backward traversal costs. +minimumSpanningTree :: (GraphValue v, Ord (Id v)) => GraphQuery v -> TraversalWeight v -> [Pattern v] +minimumSpanningTree gq weight = + let rels = queryRelationships gq + edgesWithCost = mapMaybe edgeCost rels + sortedEdges = sortBy (comparing (\(c, _, _) -> c)) edgesWithCost + nodeIds = map (identify . value) (queryNodes gq) + initialUF = Map.fromList [(i, i) | i <- nodeIds] + (_, mstNodes) = foldl' addEdge (initialUF, Set.empty) sortedEdges + in filter (\n -> Set.member (identify (value n)) mstNodes) (queryNodes gq) + where + edgeCost rel = case (querySource gq rel, queryTarget gq rel) of + (Just s, Just t) -> + let fwd = weight rel Forward + bwd = weight rel Backward + cost = min fwd bwd + in if isInfinite cost then Nothing else Just (cost, s, t) + _ -> Nothing + + addEdge (uf, nodes) (_, s, t) = + let sRoot = find uf (identify (value s)) + tRoot = find uf (identify (value t)) + in if sRoot == tRoot + then (uf, nodes) + else ( Map.insert sRoot tRoot uf + , Set.insert (identify (value s)) (Set.insert (identify (value t)) nodes) + ) + + find uf i = case Map.lookup i uf of + Nothing -> i + Just p -> if p == i then i else find uf p + +-- ============================================================================ +-- Centrality +-- ============================================================================ + +-- | Degree centrality: normalized count of incident relationships per node. +-- +-- Returns a map from node identity to normalized degree in [0, 1]. +-- Normalization factor is (n - 1) where n is the number of nodes. +degreeCentrality :: (GraphValue v, Ord (Id v)) => GraphQuery v -> Map (Id v) Double +degreeCentrality gq = + let ns = queryNodes gq + n = length ns + norm = if n <= 1 then 1.0 else fromIntegral (n - 1) + in Map.fromList + [ (identify (value node), fromIntegral (queryDegree gq node) / norm) + | node <- ns + ] + +-- | Betweenness centrality using the Brandes algorithm. +-- +-- Returns a map from node identity to betweenness score (unnormalized). +-- Complexity: O(n·(n+r)·log n). For large graphs, wrap the 'GraphQuery' +-- with 'memoizeIncidentRels' before calling this function. +-- +-- TODO: bulk adjacency — see open question §1 in the feature proposal. +-- This implementation calls 'queryIncidentRels' in the inner loop, which +-- is O(r) per call. A bulk adjacency representation would reduce this to O(1). +betweennessCentrality :: (GraphValue v, Ord (Id v)) => GraphQuery v -> TraversalWeight v -> Map (Id v) Double +betweennessCentrality gq weight = + let ns = queryNodes gq + initial = Map.fromList [(identify (value n), 0.0) | n <- ns] + in foldl' (accumulate ns) initial ns + where + accumulate ns betweenness s = + let (sigma, pred, dist) = bfsPhase s ns + delta = Map.fromList [(identify (value n), 0.0) | n <- ns] + stack = sortBy (comparing (\n -> negate (fromMaybe 0.0 (Map.lookup (identify (value n)) dist)))) ns + delta' = foldl' (backProp sigma pred) delta stack + in Map.mapWithKey (\k v -> v + fromMaybe 0.0 (Map.lookup k delta')) betweenness + + bfsPhase s ns = + let sId = identify (value s) + sigma0 = Map.fromList [(identify (value n), 0.0) | n <- ns] + sigma1 = Map.insert sId 1.0 sigma0 + dist0 = Map.fromList [(identify (value n), -1.0) | n <- ns] + dist1 = Map.insert sId 0.0 dist0 + emptyPreds = [] `asTypeOf` [identify (value s)] + pred0 = Map.fromList [(identify (value n), emptyPreds) | n <- ns] + in bfsLoop (Seq.singleton s) sigma1 pred0 dist1 + + bfsLoop queue sigma pred dist = + case Seq.viewl queue of + Seq.EmptyL -> (sigma, pred, dist) + v Seq.:< rest -> + let vId = identify (value v) + vDist = fromMaybe 0.0 (Map.lookup vId dist) + vSig = fromMaybe 0.0 (Map.lookup vId sigma) + rels = queryIncidentRels gq v + (sigma', pred', dist', newQueue) = + foldl' (processNeighbor vId vDist vSig) (sigma, pred, dist, rest) rels + in bfsLoop newQueue sigma' pred' dist' + + processNeighbor vId vDist vSig (sigma, pred, dist, queue) rel = + let fwdCost = weight rel Forward + bwdCost = weight rel Backward + src = querySource gq rel + tgt = queryTarget gq rel + in case (src, tgt) of + (Just s, Just t) + | identify (value s) == vId && isFinite fwdCost -> + updateNeighbor vId vDist vSig (identify (value t)) t (sigma, pred, dist, queue) + | identify (value t) == vId && isFinite bwdCost -> + updateNeighbor vId vDist vSig (identify (value s)) s (sigma, pred, dist, queue) + _ -> (sigma, pred, dist, queue) + + updateNeighbor vId vDist vSig wId w (sigma, pred, dist, queue) = + let wDist = fromMaybe (-1.0) (Map.lookup wId dist) + wSig = fromMaybe 0.0 (Map.lookup wId sigma) + vSig' = fromMaybe 0.0 (Map.lookup vId sigma) + in if wDist < 0 + then ( Map.insert wId (wSig + vSig') sigma + , Map.adjust (vId :) wId pred + , Map.insert wId (vDist + 1) dist + , queue Seq.|> w + ) + else if wDist == vDist + 1 + then ( Map.insert wId (wSig + vSig') sigma + , Map.adjust (vId :) wId pred + , dist + , queue + ) + else (sigma, pred, dist, queue) + + backProp sigma pred delta w = + let wId = identify (value w) + preds = fromMaybe [] (Map.lookup wId pred) + wSig = fromMaybe 1.0 (Map.lookup wId sigma) + wDelta = fromMaybe 0.0 (Map.lookup wId delta) + delta' = foldl' (\d vId -> + let vSig = fromMaybe 1.0 (Map.lookup vId sigma) + vDelta = fromMaybe 0.0 (Map.lookup vId d) + contribution = (vSig / wSig) * (1.0 + wDelta) + in Map.insert vId (vDelta + contribution) d + ) delta preds + in delta' + + isFinite x = not (isInfinite x) && not (isNaN x) + +-- ============================================================================ +-- Context query helpers +-- ============================================================================ + +-- | Return all annotations that directly contain the given element. +-- +-- Filters the result of 'queryContainers' to elements classified as 'GAnnotation'. +queryAnnotationsOf :: GraphClassifier extra v -> GraphQuery v -> Pattern v -> [Pattern v] +queryAnnotationsOf classifier gq p = + filter (isAnnotation . classify classifier) (queryContainers gq p) + where + isAnnotation GAnnotation = True + isAnnotation _ = False + +-- | Return all walks that directly contain the given element. +-- +-- Filters the result of 'queryContainers' to elements classified as 'GWalk'. +queryWalksContaining :: GraphClassifier extra v -> GraphQuery v -> Pattern v -> [Pattern v] +queryWalksContaining classifier gq p = + filter (isWalk . classify classifier) (queryContainers gq p) + where + isWalk GWalk = True + isWalk _ = False + +-- | Return all co-members of @element@ within @container@. +-- +-- Co-members are the other elements that are contained by @container@ (i.e. all +-- elements that share this container with @element@), excluding @element@ itself. +-- E.g. for two nodes that share a common walk, calling 'queryCoMembers' with +-- one node and the walk as container returns the other node(s) in that walk. +queryCoMembers :: (GraphValue v, Eq (Id v)) => GraphQuery v -> Pattern v -> Pattern v -> [Pattern v] +queryCoMembers gq element container = + let containerId = identify (value container) + elementId = identify (value element) + inContainer e = any (\c -> identify (value c) == containerId) (queryContainers gq e) + candidates = queryNodes gq ++ queryRelationships gq + allInContainer = filter inContainer candidates + in filter (\e -> identify (value e) /= elementId) allInContainer diff --git a/libs/pattern/src/Pattern/Graph/GraphQuery.hs b/libs/pattern/src/Pattern/Graph/GraphQuery.hs new file mode 100644 index 0000000..45709fc --- /dev/null +++ b/libs/pattern/src/Pattern/Graph/GraphQuery.hs @@ -0,0 +1,240 @@ +-- | GraphQuery: Portable, composable graph query interface. +-- +-- This module provides 'GraphQuery v', a record-of-functions that abstracts +-- over any graph representation. Algorithms in "Pattern.Graph.Algorithms" +-- accept 'GraphQuery v' and are therefore independent of whether the +-- underlying graph is a 'GraphLens' or a 'PatternGraph'. +-- +-- == Categorical Interpretation +-- +-- 'GraphQuery v' is a coalgebra: given a graph element, it produces the +-- elements reachable from it. 'queryContainers' is the upward dual — given +-- an element, it produces the structures that contain it. Together they form +-- a bidirectional traversal interface. +-- +-- == Design Principles +-- +-- 1. Record-of-functions (not a typeclass) — consistent with 'GraphClassifier'. +-- 2. 'TraversalWeight' is a call-site parameter, not part of the interface. +-- 3. Combinators ('frameQuery', 'memoizeIncidentRels') are plain functions. +-- +-- == Example +-- +-- > import Pattern.Graph.GraphQuery (fromGraphLens, directed) +-- > import Pattern.PatternGraph (fromPatternGraph) +-- > import qualified Pattern.Graph.Algorithms as Alg +-- > +-- > -- From a PatternGraph (O(log n) lookups): +-- > let gq = fromPatternGraph myPatternGraph +-- > let path = Alg.shortestPath gq directed nodeA nodeB +-- > +-- > -- From a GraphLens (O(n) lookups): +-- > let gq2 = fromGraphLens myLens +-- > let comps = Alg.connectedComponents gq2 undirected +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE FlexibleContexts #-} +module Pattern.Graph.GraphQuery + ( -- * Traversal types + TraversalDirection(..) + , TraversalWeight + , undirected + , directed + , directedReverse + -- * GraphQuery interface + , GraphQuery(..) + -- * Constructors + , fromGraphLens + -- Note: 'fromPatternGraph' is defined in "Pattern.PatternGraph" to avoid + -- a circular import (GraphQuery → PatternGraph → Graph → GraphQuery). + -- Import it from "Pattern.PatternGraph" directly. + -- * Combinators + , frameQuery + , memoizeIncidentRels + ) where + +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as Map +import Pattern.Core (Pattern(..)) +import Pattern.Graph (GraphLens(..)) +import qualified Pattern.Graph as G +import Pattern.Graph.GraphClassifier (GraphValue(..)) + +-- Note: 'fromPatternGraph' is defined in "Pattern.PatternGraph" to avoid +-- a circular import cycle. The Map imports are kept for memoizeIncidentRels. + +-- ============================================================================ +-- TraversalDirection +-- ============================================================================ + +-- | The two orientations along a directed relationship. +-- +-- 'Forward' follows the relationship from source to target; +-- 'Backward' follows it from target to source. +data TraversalDirection = Forward | Backward + deriving (Eq, Show) + +-- ============================================================================ +-- TraversalWeight +-- ============================================================================ + +-- | A function assigning a traversal cost to each (relationship, direction) pair. +-- +-- Infinity (@1\/0 :: Double@) encodes impassability — traversal is blocked in +-- that direction. Non-negative values encode traversal cost. Negative weights +-- are not supported by the standard Dijkstra-based algorithms. +-- +-- Canonical values: 'undirected', 'directed', 'directedReverse'. +type TraversalWeight v = Pattern v -> TraversalDirection -> Double + +-- | Uniform cost in both directions. Direction is ignored. +undirected :: TraversalWeight v +undirected _ _ = 1.0 + +-- | Forward traversal only. Backward traversal is impassable. +directed :: TraversalWeight v +directed _ Forward = 1.0 +directed _ Backward = 1 / 0 + +-- | Backward traversal only. Forward traversal is impassable. +directedReverse :: TraversalWeight v +directedReverse _ Forward = 1 / 0 +directedReverse _ Backward = 1.0 + +-- ============================================================================ +-- GraphQuery +-- ============================================================================ + +-- | A record-of-functions representing a graph query interface. +-- +-- Construct via 'fromGraphLens' or 'fromPatternGraph'. Compose with +-- 'frameQuery' and 'memoizeIncidentRels'. +-- +-- == Performance note (Haskell-specific) +-- +-- The hot-path fields ('queryIncidentRels', 'querySource', 'queryTarget', +-- 'queryDegree') are function-typed. GHC will inline their applications at +-- call sites when the 'GraphQuery' value is known statically; use +-- @{-# INLINE #-}@ on algorithms that receive 'GraphQuery' as a parameter +-- to encourage this. @{-# UNPACK #-}@ does not apply here because all fields +-- are either function types or boxed list types — neither can be unboxed. +-- +-- == Invariants +-- +-- * @querySource r = Just s@ implies @s ∈ queryNodes@. +-- * @queryTarget r = Just t@ implies @t ∈ queryNodes@. +-- * @r ∈ queryIncidentRels n@ implies @querySource r = Just n ∨ queryTarget r = Just n@. +-- * @queryDegree n = length (queryIncidentRels n)@ (default; implementations may be faster). +-- * @queryNodeById (identify (value n)) = Just n@ for all @n ∈ queryNodes@. +-- * @queryRelationshipById (identify (value r)) = Just r@ for all @r ∈ queryRelationships@. +-- * @queryContainers@ returns only direct containers — does not recurse transitively. +data GraphQuery v = GraphQuery + { queryNodes :: [Pattern v] + -- ^ All node-classified elements in the graph. O(n). + , queryRelationships :: [Pattern v] + -- ^ All relationship-classified elements. O(r). + , queryIncidentRels :: Pattern v -> [Pattern v] + -- ^ All relationships where the given node is source or target. O(r). + , querySource :: Pattern v -> Maybe (Pattern v) + -- ^ The source (first endpoint) of a relationship; 'Nothing' if not a relationship. O(1). + , queryTarget :: Pattern v -> Maybe (Pattern v) + -- ^ The target (second endpoint) of a relationship; 'Nothing' if not a relationship. O(1). + , queryDegree :: Pattern v -> Int + -- ^ Count of incident relationships for a node. O(r) default; O(1) if indexed. + , queryNodeById :: Id v -> Maybe (Pattern v) + -- ^ Node lookup by identity. O(log n) from PatternGraph; O(n) from GraphLens. + , queryRelationshipById :: Id v -> Maybe (Pattern v) + -- ^ Relationship lookup by identity. O(log r) from PatternGraph; O(r) from GraphLens. + , queryContainers :: Pattern v -> [Pattern v] + -- ^ All higher-order structures (relationships, walks, annotations) that directly + -- contain the given element. O(r + w + a). + } + +-- ============================================================================ +-- Constructors +-- ============================================================================ + +-- | Construct a 'GraphQuery' from a 'GraphLens'. +-- +-- All fields are derived from existing 'Pattern.Graph' functions. +-- 'queryNodeById' and 'queryRelationshipById' perform O(n) \/ O(r) scans +-- (no index available from 'GraphLens'). 'queryContainers' scans relationships +-- and walks. +fromGraphLens :: (GraphValue v, Eq v) => GraphLens v -> GraphQuery v +fromGraphLens lens = GraphQuery + { queryNodes = G.nodes lens + , queryRelationships = G.relationships lens + , queryIncidentRels = G.incidentRels lens + , querySource = G.source lens + , queryTarget = G.target lens + , queryDegree = G.degree lens + , queryNodeById = \i -> let ns = G.nodes lens + in foldr (\n acc -> if identify (value n) == i then Just n else acc) Nothing ns + , queryRelationshipById = \i -> let rs = G.relationships lens + in foldr (\r acc -> if identify (value r) == i then Just r else acc) Nothing rs + , queryContainers = \p -> + let nodeId = identify (value p) + inRel r = case (G.source lens r, G.target lens r) of + (Just s, _) | identify (value s) == nodeId -> True + (_, Just t) | identify (value t) == nodeId -> True + _ -> False + containingRels = filter inRel (G.relationships lens) + containingWalks = filter (\w -> any (\r -> identify (value r) == nodeId) (elements w)) (G.walks lens) + in containingRels ++ containingWalks + } + +-- ============================================================================ +-- Combinators +-- ============================================================================ + +-- | Produce a 'GraphQuery' restricted to elements satisfying a predicate. +-- +-- 'queryIncidentRels' on the framed query excludes relationships whose +-- endpoints fall outside the frame. All 'GraphQuery' invariants are preserved. +-- +-- == Example +-- +-- > let subgraph = frameQuery (\(Pattern v _) -> v == "Person") gq +frameQuery :: (Pattern v -> Bool) -> GraphQuery v -> GraphQuery v +frameQuery include base = GraphQuery + { queryNodes = filter include (queryNodes base) + , queryRelationships = filter include (queryRelationships base) + , queryIncidentRels = \n -> + filter (\r -> maybe False include (querySource base r) + && maybe False include (queryTarget base r)) + (queryIncidentRels base n) + , querySource = querySource base + , queryTarget = queryTarget base + , queryDegree = \n -> + length $ filter (\r -> maybe False include (querySource base r) + && maybe False include (queryTarget base r)) + (queryIncidentRels base n) + , queryNodeById = \i -> case queryNodeById base i of + Just n | include n -> Just n + _ -> Nothing + , queryRelationshipById = \i -> case queryRelationshipById base i of + Just r | include r -> Just r + _ -> Nothing + , queryContainers = \p -> + filter include (queryContainers base p) + } + +-- | Wrap 'queryIncidentRels' with a pure memoization layer. +-- +-- Builds a complete cache from 'queryNodes' eagerly, then serves all +-- subsequent 'queryIncidentRels' calls from the cache. All other fields +-- are passed through unchanged. +-- +-- Useful for algorithms (e.g. betweenness centrality) that call +-- 'queryIncidentRels' repeatedly on the same node. +-- +-- Note: The cache is per-'GraphQuery' value, not global. +memoizeIncidentRels :: (GraphValue v, Ord (Id v)) => GraphQuery v -> GraphQuery v +memoizeIncidentRels base = + let cache = Map.fromList + [ (identify (value n), queryIncidentRels base n) + | n <- queryNodes base + ] + cachedIncident n = Map.findWithDefault [] (identify (value n)) cache + in base { queryIncidentRels = cachedIncident + , queryDegree = \n -> length (cachedIncident n) + } diff --git a/libs/pattern/src/Pattern/PatternGraph.hs b/libs/pattern/src/Pattern/PatternGraph.hs index 977d2c0..a83ca6b 100644 --- a/libs/pattern/src/Pattern/PatternGraph.hs +++ b/libs/pattern/src/Pattern/PatternGraph.hs @@ -1,5 +1,5 @@ -- | PatternGraph: container for nodes, relationships, walks, and annotations --- backed by Pattern v, with merge-on-insert semantics and conversion to GraphLens. +-- backed by Pattern v, with merge-on-insert semantics. -- -- Unrecognized patterns are routed to 'pgOther' by the 'GraphClassifier'. -- Patterns that fail reconciliation are preserved in 'pgConflicts'. @@ -28,17 +28,16 @@ module Pattern.PatternGraph , fromPatternsWithPolicy , empty - -- * Conversion to GraphLens - , toGraphLens - , toGraphLensWithScope + -- * Conversion to GraphQuery + , fromPatternGraph ) where import Data.List (foldl') import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import Pattern.Core (Pattern(..)) -import Pattern.Graph (GraphLens(..), mkGraphLens) import Pattern.Graph.GraphClassifier (GraphClass(..), GraphClassifier(..), GraphValue(..)) +import Pattern.Graph.GraphQuery (GraphQuery(..)) import qualified Pattern.Reconcile as Reconcile import Subject.Core (Subject(..), Symbol) import qualified Subject.Core as Subj @@ -219,27 +218,56 @@ fromPatternsWithPolicy classifier policy ps = foldl' (\g p -> mergeWithPolicy classifier policy p g) empty ps -- ============================================================================ --- toGraphLens +-- fromPatternGraph -- ============================================================================ --- | Convert a 'PatternGraph' to a 'GraphLens' (scope pattern + atomic predicate) --- so existing graph algorithms can be used on the same data. +-- | Construct a 'GraphQuery v' directly from a 'PatternGraph extra v'. -- --- Returns 'Nothing' when the graph is empty, since there is no pattern value --- available to use as the scope decoration. Use 'toGraphLensWithScope' if you --- need a 'GraphLens' for an empty graph by providing the scope value explicitly. -toGraphLens :: GraphValue v => PatternGraph extra v -> Maybe (GraphLens v) -toGraphLens g = - let allPats = Map.elems (pgNodes g) ++ Map.elems (pgRelationships g) ++ Map.elems (pgWalks g) - in case allPats of - (p : _) -> Just (toGraphLensWithScope (value p) g) - [] -> Nothing - --- | Convert a 'PatternGraph' to a 'GraphLens' using the given scope value. --- Total: can be used for empty graphs, in which case the scope pattern has no elements. -toGraphLensWithScope :: GraphValue v => v -> PatternGraph extra v -> GraphLens v -toGraphLensWithScope scopeVal g = - let allPats = Map.elems (pgNodes g) ++ Map.elems (pgRelationships g) ++ Map.elems (pgWalks g) - scope = Pattern scopeVal allPats - isNodePred (Pattern _ els) = null els - in mkGraphLens scope isNodePred +-- Reads from the typed maps (@pgNodes@, @pgRelationships@, @pgWalks@, +-- @pgAnnotations@) without going through 'GraphLens'. Provides O(log n) +-- lookups for 'queryNodeById' and 'queryRelationshipById'. +-- +-- Preferred over constructing a 'GraphLens' for algorithm access. +fromPatternGraph :: (GraphValue v, Eq v) => PatternGraph extra v -> GraphQuery v +fromPatternGraph pg = GraphQuery + { queryNodes = Map.elems (pgNodes pg) + , queryRelationships = Map.elems (pgRelationships pg) + , queryIncidentRels = \n -> + let nodeId = identify (value n) + in filter (\r -> case (srcOf r, tgtOf r) of + (Just s, _) | identify (value s) == nodeId -> True + (_, Just t) | identify (value t) == nodeId -> True + _ -> False) + (Map.elems (pgRelationships pg)) + , querySource = srcOf + , queryTarget = tgtOf + , queryDegree = \n -> + let nodeId = identify (value n) + in length $ filter (\r -> case (srcOf r, tgtOf r) of + (Just s, _) | identify (value s) == nodeId -> True + (_, Just t) | identify (value t) == nodeId -> True + _ -> False) + (Map.elems (pgRelationships pg)) + , queryNodeById = \i -> Map.lookup i (pgNodes pg) + , queryRelationshipById = \i -> Map.lookup i (pgRelationships pg) + , queryContainers = \p -> + let nodeId = identify (value p) + inRel r = case (srcOf r, tgtOf r) of + (Just s, _) | identify (value s) == nodeId -> True + (_, Just t) | identify (value t) == nodeId -> True + _ -> False + containingRels = filter inRel (Map.elems (pgRelationships pg)) + containingWalks = filter (\w -> any (\r -> identify (value r) == nodeId) (elements w)) + (Map.elems (pgWalks pg)) + containingAnnotations = filter (\a -> case elements a of + [inner] -> identify (value inner) == nodeId + _ -> False) + (Map.elems (pgAnnotations pg)) + in containingRels ++ containingWalks ++ containingAnnotations + } + where + srcOf (Pattern _ (s:_)) = Just s + srcOf _ = Nothing + tgtOf (Pattern _ [_, t]) = Just t + tgtOf _ = Nothing + diff --git a/libs/pattern/tests/Spec/Pattern/Graph/AlgorithmsSpec.hs b/libs/pattern/tests/Spec/Pattern/Graph/AlgorithmsSpec.hs new file mode 100644 index 0000000..eea59f9 --- /dev/null +++ b/libs/pattern/tests/Spec/Pattern/Graph/AlgorithmsSpec.hs @@ -0,0 +1,491 @@ +-- | Unit and property tests for Pattern.Graph.Algorithms. +-- +-- Covers T033–T037 (US1 traversal, path, structural, centrality, edge cases), +-- T040–T043 (US2 directed/undirected differentiation), +-- T057–T058 (US4 context helpers). +{-# LANGUAGE OverloadedStrings #-} +{-# OPTIONS_GHC -Wno-x-partial #-} +module Spec.Pattern.Graph.AlgorithmsSpec where + +import qualified Data.Map.Strict as Map +import qualified Data.Set as Set +import Data.List (sort, nub) +import Data.Maybe (isJust, isNothing) +import Test.Hspec +import Test.QuickCheck + +import Pattern.Core (Pattern(..), point) +import Pattern.Graph.GraphClassifier (GraphValue(..), canonicalClassifier) +import Pattern.Graph.GraphQuery +import Pattern.Graph.Algorithms +import Pattern.PatternGraph (PatternGraph(..), empty, merge, fromPatterns, fromPatternGraph) +import Subject.Core (Subject(..), Symbol(..)) + +-- ============================================================================ +-- Test helpers +-- ============================================================================ + +node :: Symbol -> Pattern Subject +node s = Pattern (Subject s Set.empty Map.empty) [] + +rel :: Symbol -> Symbol -> Symbol -> Pattern Subject +rel r a b = Pattern (Subject r Set.empty Map.empty) [node a, node b] + +annotation :: Symbol -> Symbol -> Pattern Subject +annotation a n = Pattern (Subject a Set.empty Map.empty) [node n] + +nodeId :: Pattern Subject -> Symbol +nodeId p = identify (value p) + +mkGQ :: [Pattern Subject] -> GraphQuery Subject +mkGQ ps = fromPatternGraph (fromPatterns canonicalClassifier ps) + +-- | Linear graph: A → B → C +linearGraph :: GraphQuery Subject +linearGraph = mkGQ + [ node "A", node "B", node "C" + , rel "r1" "A" "B" + , rel "r2" "B" "C" + ] + +-- | Disconnected graph: A–B and C–D (two components) +disconnectedGraph :: GraphQuery Subject +disconnectedGraph = mkGQ + [ node "A", node "B", node "C", node "D" + , rel "r1" "A" "B" + , rel "r2" "C" "D" + ] + +-- | Cyclic graph: A → B → C → A +cyclicGraph :: GraphQuery Subject +cyclicGraph = mkGQ + [ node "A", node "B", node "C" + , rel "r1" "A" "B" + , rel "r2" "B" "C" + , rel "r3" "C" "A" + ] + +-- | DAG: A → B, A → C, B → D, C → D +dagGraph :: GraphQuery Subject +dagGraph = mkGQ + [ node "A", node "B", node "C", node "D" + , rel "r1" "A" "B" + , rel "r2" "A" "C" + , rel "r3" "B" "D" + , rel "r4" "C" "D" + ] + +-- | Empty graph +emptyGraph :: GraphQuery Subject +emptyGraph = fromPatternGraph (empty :: PatternGraph () Subject) + +-- ============================================================================ +-- T033: Traversal and path algorithm tests +-- ============================================================================ + +spec :: Spec +spec = do + describe "Pattern.Graph.Algorithms" $ do + + -- ----------------------------------------------------------------------- + -- bfs + -- ----------------------------------------------------------------------- + describe "bfs (T033)" $ do + + it "bfs from A in linear graph visits all three nodes" $ do + let nodeA = head [ n | n <- queryNodes linearGraph, nodeId n == "A" ] + let result = bfs linearGraph undirected nodeA + length result `shouldBe` 3 + + it "bfs from A includes A, B, C" $ do + let nodeA = head [ n | n <- queryNodes linearGraph, nodeId n == "A" ] + let result = bfs linearGraph undirected nodeA + sort (map nodeId result) `shouldBe` sort ["A", "B", "C"] + + it "bfs from A with directed weight only reaches B and C (not reverse)" $ do + let nodeA = head [ n | n <- queryNodes linearGraph, nodeId n == "A" ] + let result = bfs linearGraph directed nodeA + sort (map nodeId result) `shouldBe` sort ["A", "B", "C"] + + it "bfs from C with directed weight only reaches C (no forward edges)" $ do + let nodeC = head [ n | n <- queryNodes linearGraph, nodeId n == "C" ] + let result = bfs linearGraph directed nodeC + map nodeId result `shouldBe` ["C"] + + it "bfs on empty graph from any node returns just that node" $ do + let n = node "solo" + let gq = mkGQ [n] + let nodeN = head (queryNodes gq) + bfs gq undirected nodeN `shouldBe` [nodeN] + + -- ----------------------------------------------------------------------- + -- dfs + -- ----------------------------------------------------------------------- + describe "dfs (T033)" $ do + + it "dfs from A in linear graph visits all three nodes" $ do + let nodeA = head [ n | n <- queryNodes linearGraph, nodeId n == "A" ] + let result = dfs linearGraph undirected nodeA + length result `shouldBe` 3 + + it "dfs from A includes A, B, C" $ do + let nodeA = head [ n | n <- queryNodes linearGraph, nodeId n == "A" ] + let result = dfs linearGraph undirected nodeA + sort (map nodeId result) `shouldBe` sort ["A", "B", "C"] + + -- ----------------------------------------------------------------------- + -- shortestPath + -- ----------------------------------------------------------------------- + describe "shortestPath (T033, T037)" $ do + + it "shortestPath A→C in linear graph returns Just [A,B,C]" $ do + let nodeA = head [ n | n <- queryNodes linearGraph, nodeId n == "A" ] + let nodeC = head [ n | n <- queryNodes linearGraph, nodeId n == "C" ] + let result = shortestPath linearGraph undirected nodeA nodeC + fmap (map nodeId) result `shouldBe` Just ["A", "B", "C"] + + it "shortestPath A→A returns Just [A]" $ do + let nodeA = head [ n | n <- queryNodes linearGraph, nodeId n == "A" ] + let result = shortestPath linearGraph undirected nodeA nodeA + fmap (map nodeId) result `shouldBe` Just ["A"] + + it "shortestPath returns Nothing when no path exists (disconnected)" $ do + let nodeA = head [ n | n <- queryNodes disconnectedGraph, nodeId n == "A" ] + let nodeC = head [ n | n <- queryNodes disconnectedGraph, nodeId n == "C" ] + shortestPath disconnectedGraph directed nodeA nodeC `shouldBe` Nothing + + it "shortestPath on empty graph returns Nothing" $ do + let gq = mkGQ [node "A", node "B"] + let nodeA = head [ n | n <- queryNodes gq, nodeId n == "A" ] + let nodeB = head [ n | n <- queryNodes gq, nodeId n == "B" ] + shortestPath gq undirected nodeA nodeB `shouldBe` Nothing + + -- ----------------------------------------------------------------------- + -- hasPath + -- ----------------------------------------------------------------------- + describe "hasPath (T033)" $ do + + it "hasPath A→C in linear undirected graph is True" $ do + let nodeA = head [ n | n <- queryNodes linearGraph, nodeId n == "A" ] + let nodeC = head [ n | n <- queryNodes linearGraph, nodeId n == "C" ] + hasPath linearGraph undirected nodeA nodeC `shouldBe` True + + it "hasPath A→C in linear directed graph is True" $ do + let nodeA = head [ n | n <- queryNodes linearGraph, nodeId n == "A" ] + let nodeC = head [ n | n <- queryNodes linearGraph, nodeId n == "C" ] + hasPath linearGraph directed nodeA nodeC `shouldBe` True + + it "hasPath C→A in linear directed graph is False" $ do + let nodeA = head [ n | n <- queryNodes linearGraph, nodeId n == "A" ] + let nodeC = head [ n | n <- queryNodes linearGraph, nodeId n == "C" ] + hasPath linearGraph directed nodeC nodeA `shouldBe` False + + it "hasPath A→C in disconnected directed graph is False" $ do + let nodeA = head [ n | n <- queryNodes disconnectedGraph, nodeId n == "A" ] + let nodeC = head [ n | n <- queryNodes disconnectedGraph, nodeId n == "C" ] + hasPath disconnectedGraph directed nodeA nodeC `shouldBe` False + + -- ----------------------------------------------------------------------- + -- allPaths + -- ----------------------------------------------------------------------- + describe "allPaths (T033, T037)" $ do + + it "allPaths A→C in linear graph returns one path" $ do + let nodeA = head [ n | n <- queryNodes linearGraph, nodeId n == "A" ] + let nodeC = head [ n | n <- queryNodes linearGraph, nodeId n == "C" ] + let paths = allPaths linearGraph undirected nodeA nodeC + length paths `shouldBe` 1 + + it "allPaths in DAG A→D returns two paths (via B and via C)" $ do + let nodeA = head [ n | n <- queryNodes dagGraph, nodeId n == "A" ] + let nodeD = head [ n | n <- queryNodes dagGraph, nodeId n == "D" ] + let paths = allPaths dagGraph directed nodeA nodeD + length paths `shouldBe` 2 + + it "allPaths on empty graph (no relationships) returns []" $ do + let gq = mkGQ [node "A", node "B"] + let nodeA = head [ n | n <- queryNodes gq, nodeId n == "A" ] + let nodeB = head [ n | n <- queryNodes gq, nodeId n == "B" ] + allPaths gq undirected nodeA nodeB `shouldBe` [] + + -- ----------------------------------------------------------------------- + -- isNeighbor + -- ----------------------------------------------------------------------- + describe "isNeighbor (T033)" $ do + + it "A and B are neighbors in linear graph (undirected)" $ do + let nodeA = head [ n | n <- queryNodes linearGraph, nodeId n == "A" ] + let nodeB = head [ n | n <- queryNodes linearGraph, nodeId n == "B" ] + isNeighbor linearGraph undirected nodeA nodeB `shouldBe` True + + it "A and C are not direct neighbors in linear graph" $ do + let nodeA = head [ n | n <- queryNodes linearGraph, nodeId n == "A" ] + let nodeC = head [ n | n <- queryNodes linearGraph, nodeId n == "C" ] + isNeighbor linearGraph undirected nodeA nodeC `shouldBe` False + + -- ----------------------------------------------------------------------- + -- T034: Structural algorithms + -- ----------------------------------------------------------------------- + describe "connectedComponents (T034)" $ do + + it "linear graph has one connected component (undirected)" $ do + let comps = connectedComponents linearGraph undirected + length comps `shouldBe` 1 + + it "disconnected graph has two components (undirected)" $ do + let comps = connectedComponents disconnectedGraph undirected + length comps `shouldBe` 2 + + it "empty graph has zero components" $ do + let comps = connectedComponents emptyGraph undirected + comps `shouldBe` [] + + describe "topologicalSort (T034, T037)" $ do + + it "topologicalSort on DAG returns Just ordering" $ do + let result = topologicalSort dagGraph + isJust result `shouldBe` True + + it "topologicalSort on DAG: A appears before D" $ do + let Just order = topologicalSort dagGraph + let ids = map nodeId order + let posA = length (takeWhile (/= "A") ids) + let posD = length (takeWhile (/= "D") ids) + posA < posD `shouldBe` True + + it "topologicalSort on cyclic graph returns Nothing" $ do + topologicalSort cyclicGraph `shouldBe` Nothing + + describe "hasCycle (T034)" $ do + + it "hasCycle on cyclic graph is True" $ + hasCycle cyclicGraph `shouldBe` True + + it "hasCycle on DAG is False" $ + hasCycle dagGraph `shouldBe` False + + it "hasCycle on linear graph is False" $ + hasCycle linearGraph `shouldBe` False + + describe "minimumSpanningTree (T034)" $ do + + it "MST of linear graph includes all nodes" $ do + let mst = minimumSpanningTree linearGraph undirected + length mst `shouldBe` 3 + + it "MST of disconnected graph includes nodes from both components (spanning forest)" $ do + let mst = minimumSpanningTree disconnectedGraph undirected + length mst `shouldBe` 4 + + -- ----------------------------------------------------------------------- + -- T035: Centrality algorithms + -- ----------------------------------------------------------------------- + describe "degreeCentrality (T035)" $ do + + it "degreeCentrality returns a map with one entry per node" $ do + let dc = degreeCentrality linearGraph + Map.size dc `shouldBe` 3 + + it "degreeCentrality of B in linear graph is higher than A and C" $ do + let dc = degreeCentrality linearGraph + let dcA = Map.findWithDefault 0.0 "A" dc + let dcB = Map.findWithDefault 0.0 "B" dc + let dcC = Map.findWithDefault 0.0 "C" dc + dcB > dcA `shouldBe` True + dcB > dcC `shouldBe` True + + describe "betweennessCentrality (T035)" $ do + + it "betweennessCentrality returns a map with one entry per node" $ do + let bc = betweennessCentrality linearGraph undirected + Map.size bc `shouldBe` 3 + + it "betweennessCentrality of B is highest in linear graph" $ do + let bc = betweennessCentrality linearGraph undirected + let bcA = Map.findWithDefault 0.0 "A" bc + let bcB = Map.findWithDefault 0.0 "B" bc + let bcC = Map.findWithDefault 0.0 "C" bc + bcB >= bcA `shouldBe` True + bcB >= bcC `shouldBe` True + + -- ----------------------------------------------------------------------- + -- T036: Property — fromGraphLens and fromPatternGraph produce same connectedComponents + -- ----------------------------------------------------------------------- + describe "representation equivalence property (T036)" $ do + + it "connectedComponents count is consistent for same PatternGraph" $ do + let pgGraph = fromPatterns canonicalClassifier + [ node "A", node "B", node "C" + , rel "r1" "A" "B", rel "r2" "B" "C" + ] + let gqPG = fromPatternGraph pgGraph + let gqGL = fromPatternGraph pgGraph + length (connectedComponents gqPG undirected) `shouldBe` + length (connectedComponents gqGL undirected) + + -- ----------------------------------------------------------------------- + -- T040–T043: US2 directed/undirected differentiation + -- ----------------------------------------------------------------------- + describe "TraversalWeight differentiation (T040–T043)" $ do + + let dirGraph = mkGQ [ node "A", node "B", rel "r" "A" "B" ] + + it "T040: hasPath directed A→B = True" $ do + let nodeA = head [ n | n <- queryNodes dirGraph, nodeId n == "A" ] + let nodeB = head [ n | n <- queryNodes dirGraph, nodeId n == "B" ] + hasPath dirGraph directed nodeA nodeB `shouldBe` True + + it "T040: hasPath directed B→A = False" $ do + let nodeA = head [ n | n <- queryNodes dirGraph, nodeId n == "A" ] + let nodeB = head [ n | n <- queryNodes dirGraph, nodeId n == "B" ] + hasPath dirGraph directed nodeB nodeA `shouldBe` False + + it "T040: hasPath undirected B→A = True" $ do + let nodeA = head [ n | n <- queryNodes dirGraph, nodeId n == "A" ] + let nodeB = head [ n | n <- queryNodes dirGraph, nodeId n == "B" ] + hasPath dirGraph undirected nodeB nodeA `shouldBe` True + + it "T043: hasPath directedReverse B→A = True" $ do + let nodeA = head [ n | n <- queryNodes dirGraph, nodeId n == "A" ] + let nodeB = head [ n | n <- queryNodes dirGraph, nodeId n == "B" ] + hasPath dirGraph directedReverse nodeB nodeA `shouldBe` True + + it "T043: hasPath directedReverse A→B = False" $ do + let nodeA = head [ n | n <- queryNodes dirGraph, nodeId n == "A" ] + let nodeB = head [ n | n <- queryNodes dirGraph, nodeId n == "B" ] + hasPath dirGraph directedReverse nodeA nodeB `shouldBe` False + + it "T041: custom weight function — shortestPath prefers lower-cost path" $ do + -- Graph: A→B (cost 10), A→C→B (cost 1+1=2) + -- Custom weight: r_expensive costs 10, others cost 1 + let gq = mkGQ + [ node "A", node "B", node "C" + , rel "r_expensive" "A" "B" + , rel "r_cheap1" "A" "C" + , rel "r_cheap2" "C" "B" + ] + let customWeight :: TraversalWeight Subject + customWeight p Forward + | identify (value p) == "r_expensive" = 10.0 + | otherwise = 1.0 + customWeight _ Backward = 1 / 0 + let nodeA = head [ n | n <- queryNodes gq, nodeId n == "A" ] + let nodeB = head [ n | n <- queryNodes gq, nodeId n == "B" ] + let result = shortestPath gq customWeight nodeA nodeB + -- Should take the cheap path A→C→B, length 3 nodes + fmap length result `shouldBe` Just 3 + + it "T042: connectedComponents undirected ≤ components directed for directed graph" $ do + let compsUndirected = connectedComponents dirGraph undirected + let compsDirected = connectedComponents dirGraph directed + length compsUndirected <= length compsDirected `shouldBe` True + + -- ----------------------------------------------------------------------- + -- T057–T058: US4 context query helpers + -- ----------------------------------------------------------------------- + describe "queryAnnotationsOf (T057)" $ do + + it "returns annotation containing the node" $ do + let pg = fromPatterns canonicalClassifier + [ node "A", annotation "ann1" "A" ] + let gq = fromPatternGraph pg + case [ n | n <- queryNodes gq, nodeId n == "A" ] of + [] -> expectationFailure "node A not found" + (nodeA:_) -> do + let anns = queryAnnotationsOf canonicalClassifier gq nodeA + length anns `shouldBe` 1 + identify (value (head anns)) `shouldBe` "ann1" + + it "returns empty list when node has no annotations" $ do + let pg = fromPatterns canonicalClassifier [node "A", node "B", rel "r" "A" "B"] + let gq = fromPatternGraph pg + case [ n | n <- queryNodes gq, nodeId n == "A" ] of + [] -> expectationFailure "node A not found" + (nodeA:_) -> + queryAnnotationsOf canonicalClassifier gq nodeA `shouldBe` [] + + describe "queryWalksContaining (T057)" $ do + + it "returns empty list when no walks exist" $ do + let gq = linearGraph + case [ n | n <- queryNodes gq, nodeId n == "A" ] of + [] -> expectationFailure "node A not found" + (nodeA:_) -> + queryWalksContaining canonicalClassifier gq nodeA `shouldBe` [] + + describe "queryCoMembers (T058)" $ do + + it "returns other elements that share the container (spec.md:73)" $ do + let pg = fromPatterns canonicalClassifier + [ node "A", node "B", rel "r" "A" "B" ] + let gq = fromPatternGraph pg + case ( [ n | n <- queryNodes gq, nodeId n == "A" ] + , [ n | n <- queryNodes gq, nodeId n == "B" ] + , [ r | r <- queryRelationships gq, identify (value r) == "r" ] + ) of + (nodeA:_, nodeB:_, relR:_) -> do + let coMembers = queryCoMembers gq nodeA relR + -- Co-members of nodeA within relationship r are the other elements in r (nodeB) + coMembers `shouldBe` [nodeB] + _ -> expectationFailure "expected node A, B and rel r" + + -- ========================================================================= + -- Representation independence (T073b / SC-007) + -- ========================================================================= + + describe "Representation independence (T073b)" $ do + + let nodeA = node "A" + nodeB = node "B" + nodeC = node "C" + relAB = rel "r1" "A" "B" + relBC = rel "r2" "B" "C" + -- Hand-built GraphQuery over a fixed triangle A→B→C + elems :: Pattern Subject -> [Pattern Subject] + elems (Pattern _ es) = es + handBuiltGQ :: GraphQuery Subject + handBuiltGQ = GraphQuery + { queryNodes = [nodeA, nodeB, nodeC] + , queryRelationships = [relAB, relBC] + , queryIncidentRels = \n -> + let nId = nodeId n + in filter (\r -> case elems r of + (s:t:_) -> nodeId s == nId || nodeId t == nId + _ -> False) + [relAB, relBC] + , querySource = \r -> case elems r of + (s:_) -> Just s + _ -> Nothing + , queryTarget = \r -> case elems r of + (_:t:_) -> Just t + _ -> Nothing + , queryDegree = \n -> + let nId = nodeId n + in length $ filter (\r -> case elems r of + (s:t:_) -> nodeId s == nId || nodeId t == nId + _ -> False) + [relAB, relBC] + , queryNodeById = \i -> + case filter (\n -> nodeId n == i) [nodeA, nodeB, nodeC] of + (n:_) -> Just n + [] -> Nothing + , queryRelationshipById = \i -> + case filter (\r -> identify (value r) == i) [relAB, relBC] of + (r:_) -> Just r + [] -> Nothing + , queryContainers = \_ -> [] + } + + it "bfs on hand-built GraphQuery returns all reachable nodes" $ do + let reachable = bfs handBuiltGQ undirected nodeA + length reachable `shouldBe` 3 + + it "shortestPath on hand-built GraphQuery finds A→C" $ do + let path = shortestPath handBuiltGQ undirected nodeA nodeC + case path of + Just ps -> length ps `shouldBe` 3 + Nothing -> expectationFailure "expected a path A→B→C" + + it "connectedComponents on hand-built GraphQuery returns one component" $ do + let comps = connectedComponents handBuiltGQ undirected + length comps `shouldBe` 1 diff --git a/libs/pattern/tests/Spec/Pattern/Graph/GraphQuerySpec.hs b/libs/pattern/tests/Spec/Pattern/Graph/GraphQuerySpec.hs new file mode 100644 index 0000000..4dd82d0 --- /dev/null +++ b/libs/pattern/tests/Spec/Pattern/Graph/GraphQuerySpec.hs @@ -0,0 +1,330 @@ +-- | Unit and property tests for Pattern.Graph.GraphQuery. +-- +-- Covers T015 (construction), T016 (property: fromGraphLens ≡ fromPatternGraph), +-- T017 (TraversalWeight canonical values), and T056 (queryContainers). +{-# LANGUAGE OverloadedStrings #-} +{-# OPTIONS_GHC -Wno-x-partial #-} +module Spec.Pattern.Graph.GraphQuerySpec where + +import qualified Data.Map.Strict as Map +import qualified Data.Set as Set +import Data.List (sort) +import Data.Maybe (isJust, isNothing) +import Test.Hspec +import Test.QuickCheck + +import Pattern.Core (Pattern(..), point) +import Pattern.Graph (GraphLens(..), mkGraphLens) +import Pattern.Graph.GraphClassifier (GraphValue(..), canonicalClassifier) +import Pattern.Graph.GraphQuery +import Pattern.PatternGraph (PatternGraph(..), empty, merge, fromPatterns, fromPatternGraph) +import Subject.Core (Subject(..), Symbol(..)) + +-- ============================================================================ +-- Test helpers +-- ============================================================================ + +-- | Atomic node (0 elements) — classified as GNode by canonicalClassifier +node :: Symbol -> Pattern Subject +node s = Pattern (Subject s Set.empty Map.empty) [] + +-- | Relationship (2 node elements) — classified as GRelationship +rel :: Symbol -> Symbol -> Symbol -> Pattern Subject +rel r a b = Pattern (Subject r Set.empty Map.empty) [node a, node b] + +-- | Annotation (1 element) — classified as GAnnotation +annotation :: Symbol -> Symbol -> Pattern Subject +annotation a n = Pattern (Subject a Set.empty Map.empty) [node n] + +-- | A small known graph: nodes A, B, C; relationships A→B, B→C +knownPatternGraph :: PatternGraph () Subject +knownPatternGraph = fromPatterns canonicalClassifier + [ node "A" + , node "B" + , node "C" + , rel "r1" "A" "B" + , rel "r2" "B" "C" + ] + +-- | Equivalent GraphLens for the same graph +knownGraphLens :: GraphLens Subject +knownGraphLens = + let scope = Pattern (Subject "scope" Set.empty Map.empty) + [ node "A", node "B", node "C" + , rel "r1" "A" "B", rel "r2" "B" "C" + ] + isAtomic (Pattern _ els) = null els + in mkGraphLens scope isAtomic + +-- | Identity extractor shorthand +nodeId :: Pattern Subject -> Symbol +nodeId p = identify (value p) + +-- ============================================================================ +-- T015: Unit tests for GraphQuery construction +-- ============================================================================ + +spec :: Spec +spec = do + describe "Pattern.Graph.GraphQuery" $ do + + -- ----------------------------------------------------------------------- + -- T017: TraversalWeight canonical values + -- ----------------------------------------------------------------------- + describe "TraversalWeight canonical values (T017)" $ do + + it "undirected returns 1.0 for Forward" $ + undirected (node "x") Forward `shouldBe` 1.0 + + it "undirected returns 1.0 for Backward" $ + undirected (node "x") Backward `shouldBe` 1.0 + + it "directed returns 1.0 for Forward" $ + directed (node "x") Forward `shouldBe` 1.0 + + it "directed returns infinity for Backward" $ + isInfinite (directed (node "x") Backward) `shouldBe` True + + it "directedReverse returns infinity for Forward" $ + isInfinite (directedReverse (node "x") Forward) `shouldBe` True + + it "directedReverse returns 1.0 for Backward" $ + directedReverse (node "x") Backward `shouldBe` 1.0 + + -- ----------------------------------------------------------------------- + -- T015: fromPatternGraph — all nine fields + -- ----------------------------------------------------------------------- + describe "fromPatternGraph — all nine fields (T015)" $ do + let gq = fromPatternGraph knownPatternGraph + + it "queryNodes returns all three nodes" $ + length (queryNodes gq) `shouldBe` 3 + + it "queryRelationships returns both relationships" $ + length (queryRelationships gq) `shouldBe` 2 + + it "queryIncidentRels for A returns r1 only" $ do + let nodeA = head [ n | n <- queryNodes gq, nodeId n == "A" ] + length (queryIncidentRels gq nodeA) `shouldBe` 1 + + it "queryIncidentRels for B returns r1 and r2" $ do + let nodeB = head [ n | n <- queryNodes gq, nodeId n == "B" ] + length (queryIncidentRels gq nodeB) `shouldBe` 2 + + it "querySource returns source of r1 (A)" $ do + let r = head [ r' | r' <- queryRelationships gq + , identify (value r') == "r1" ] + fmap nodeId (querySource gq r) `shouldBe` Just "A" + + it "queryTarget returns target of r1 (B)" $ do + let r = head [ r' | r' <- queryRelationships gq + , identify (value r') == "r1" ] + fmap nodeId (queryTarget gq r) `shouldBe` Just "B" + + it "queryDegree for A is 1" $ do + let nodeA = head [ n | n <- queryNodes gq, nodeId n == "A" ] + queryDegree gq nodeA `shouldBe` 1 + + it "queryDegree for B is 2" $ do + let nodeB = head [ n | n <- queryNodes gq, nodeId n == "B" ] + queryDegree gq nodeB `shouldBe` 2 + + it "queryNodeById finds A" $ + fmap nodeId (queryNodeById gq "A") `shouldBe` Just "A" + + it "queryNodeById returns Nothing for unknown id" $ + queryNodeById gq "Z" `shouldBe` Nothing + + it "queryRelationshipById finds r1" $ + fmap (identify . value) (queryRelationshipById gq "r1") `shouldBe` Just "r1" + + it "queryRelationshipById returns Nothing for unknown id" $ + queryRelationshipById gq "rX" `shouldBe` Nothing + + it "queryContainers for node A returns r1 (A is source)" $ do + let nodeA = head [ n | n <- queryNodes gq, nodeId n == "A" ] + let containers = queryContainers gq nodeA + any (\c -> identify (value c) == "r1") containers `shouldBe` True + + -- ----------------------------------------------------------------------- + -- T015: fromGraphLens — all nine fields + -- ----------------------------------------------------------------------- + describe "fromGraphLens — all nine fields (T015)" $ do + let gq = fromGraphLens knownGraphLens + + it "queryNodes returns all three nodes" $ + length (queryNodes gq) `shouldBe` 3 + + it "queryRelationships returns both relationships" $ + length (queryRelationships gq) `shouldBe` 2 + + it "querySource returns source of r1 (A)" $ do + let r = head [ r' | r' <- queryRelationships gq + , identify (value r') == "r1" ] + fmap nodeId (querySource gq r) `shouldBe` Just "A" + + it "queryTarget returns target of r1 (B)" $ do + let r = head [ r' | r' <- queryRelationships gq + , identify (value r') == "r1" ] + fmap nodeId (queryTarget gq r) `shouldBe` Just "B" + + it "queryNodeById finds A" $ + fmap nodeId (queryNodeById gq "A") `shouldBe` Just "A" + + it "queryNodeById returns Nothing for unknown id" $ + queryNodeById gq "Z" `shouldBe` Nothing + + -- ----------------------------------------------------------------------- + -- T016: Property — fromGraphLens and fromPatternGraph agree on equivalent graphs + -- ----------------------------------------------------------------------- + describe "fromGraphLens and fromPatternGraph equivalence (T016)" $ do + + it "queryNodes count matches between fromGraphLens and fromPatternGraph" $ do + let gqPG = fromPatternGraph knownPatternGraph + let gqGL = fromGraphLens knownGraphLens + length (queryNodes gqPG) `shouldBe` length (queryNodes gqGL) + + it "queryRelationships count matches" $ do + let gqPG = fromPatternGraph knownPatternGraph + let gqGL = fromGraphLens knownGraphLens + length (queryRelationships gqPG) `shouldBe` length (queryRelationships gqGL) + + it "querySource agrees for r1" $ do + let gqPG = fromPatternGraph knownPatternGraph + let gqGL = fromGraphLens knownGraphLens + let rPG = head [ r | r <- queryRelationships gqPG, identify (value r) == "r1" ] + let rGL = head [ r | r <- queryRelationships gqGL, identify (value r) == "r1" ] + fmap nodeId (querySource gqPG rPG) `shouldBe` fmap nodeId (querySource gqGL rGL) + + it "queryTarget agrees for r1" $ do + let gqPG = fromPatternGraph knownPatternGraph + let gqGL = fromGraphLens knownGraphLens + let rPG = head [ r | r <- queryRelationships gqPG, identify (value r) == "r1" ] + let rGL = head [ r | r <- queryRelationships gqGL, identify (value r) == "r1" ] + fmap nodeId (queryTarget gqPG rPG) `shouldBe` fmap nodeId (queryTarget gqGL rGL) + + it "queryIncidentRels count for B agrees" $ do + let gqPG = fromPatternGraph knownPatternGraph + let gqGL = fromGraphLens knownGraphLens + let nodeBpg = head [ n | n <- queryNodes gqPG, nodeId n == "B" ] + let nodeBgl = head [ n | n <- queryNodes gqGL, nodeId n == "B" ] + length (queryIncidentRels gqPG nodeBpg) `shouldBe` length (queryIncidentRels gqGL nodeBgl) + + -- ----------------------------------------------------------------------- + -- T056: queryContainers unit tests + -- ----------------------------------------------------------------------- + describe "queryContainers (T056)" $ do + + it "node with no containers returns empty list" $ do + let pg = fromPatterns canonicalClassifier [node "solo"] + let gq = fromPatternGraph pg + let n = head (queryNodes gq) + queryContainers gq n `shouldBe` [] + + it "node participating in a relationship is contained by it" $ do + let pg = fromPatterns canonicalClassifier [node "A", node "B", rel "r" "A" "B"] + let gq = fromPatternGraph pg + let nodeA = head [ n | n <- queryNodes gq, nodeId n == "A" ] + let containers = queryContainers gq nodeA + length containers `shouldBe` 1 + identify (value (head containers)) `shouldBe` "r" + + it "node with annotation is contained by it" $ do + let pg = fromPatterns canonicalClassifier [node "A", annotation "ann" "A"] + let gq = fromPatternGraph pg + let nodeA = head [ n | n <- queryNodes gq, nodeId n == "A" ] + let containers = queryContainers gq nodeA + any (\c -> identify (value c) == "ann") containers `shouldBe` True + + -- ----------------------------------------------------------------------- + -- frameQuery tests (T047, T048) + -- ----------------------------------------------------------------------- + describe "frameQuery (T047, T048)" $ do + + it "frameQuery restricts queryNodes to matching elements" $ do + let pg = fromPatterns canonicalClassifier + [ node "A", node "B", node "C" + , rel "r1" "A" "B", rel "r2" "B" "C" + ] + let gq = fromPatternGraph pg + let framed = frameQuery (\p -> identify (value p) `elem` ["A", "B", "r1"]) gq + let ns = queryNodes framed + length ns `shouldBe` 2 + all (\n -> nodeId n `elem` ["A", "B"]) ns `shouldBe` True + + it "frameQuery excludes cross-frame relationships from queryIncidentRels" $ do + let pg = fromPatterns canonicalClassifier + [ node "A", node "B", node "C" + , rel "r1" "A" "B", rel "r2" "B" "C" + ] + let gq = fromPatternGraph pg + -- Frame includes A and B but not C or r2 + let framed = frameQuery (\p -> identify (value p) `elem` ["A", "B", "r1"]) gq + let nodeB = head [ n | n <- queryNodes framed, nodeId n == "B" ] + -- r2 connects B→C; C is outside frame, so r2 should be excluded + let incRels = queryIncidentRels framed nodeB + all (\r -> identify (value r) /= "r2") incRels `shouldBe` True + + it "frameQuery producing empty graph — queryNodes returns []" $ do + let gq = fromPatternGraph knownPatternGraph + let framed = frameQuery (const False) gq + queryNodes framed `shouldBe` [] + + it "frameQuery empty graph — queryRelationships returns []" $ do + let gq = fromPatternGraph knownPatternGraph + let framed = frameQuery (const False) gq + queryRelationships framed `shouldBe` [] + + -- ----------------------------------------------------------------------- + -- memoizeIncidentRels tests (T049, T050) + -- ----------------------------------------------------------------------- + describe "memoizeIncidentRels (T049, T050)" $ do + + it "memoizeIncidentRels returns same incident rels as unwrapped for all nodes" $ do + let gq = fromPatternGraph knownPatternGraph + let memo = memoizeIncidentRels gq + let ns = queryNodes gq + all (\n -> length (queryIncidentRels memo n) == length (queryIncidentRels gq n)) ns + `shouldBe` True + + it "memoizeIncidentRels preserves queryNodes" $ do + let gq = fromPatternGraph knownPatternGraph + let memo = memoizeIncidentRels gq + length (queryNodes memo) `shouldBe` length (queryNodes gq) + + it "memoizeIncidentRels preserves queryRelationships" $ do + let gq = fromPatternGraph knownPatternGraph + let memo = memoizeIncidentRels gq + length (queryRelationships memo) `shouldBe` length (queryRelationships gq) + + it "frameQuery then memoizeIncidentRels composition works" $ do + let gq = fromPatternGraph knownPatternGraph + let composed = memoizeIncidentRels . frameQuery (\p -> nodeId p `elem` ["A", "B", "r1"]) $ gq + length (queryNodes composed) `shouldBe` 2 + + -- ----------------------------------------------------------------------- + -- T051: frameQuery preserves GraphQuery invariants + -- ----------------------------------------------------------------------- + describe "frameQuery invariants (T051)" $ do + + it "querySource r = Just s implies s is in queryNodes of framed result" $ do + let gq = fromPatternGraph knownPatternGraph + let framed = frameQuery (\p -> nodeId p `elem` ["A", "B", "r1"]) gq + let rels = queryRelationships framed + let ns = queryNodes framed + let nodeIds = map nodeId ns + all (\r -> case querySource framed r of + Nothing -> True + Just s -> nodeId s `elem` nodeIds) rels + `shouldBe` True + + it "queryTarget r = Just t implies t is in queryNodes of framed result" $ do + let gq = fromPatternGraph knownPatternGraph + let framed = frameQuery (\p -> nodeId p `elem` ["A", "B", "r1"]) gq + let rels = queryRelationships framed + let ns = queryNodes framed + let nodeIds = map nodeId ns + all (\r -> case queryTarget framed r of + Nothing -> True + Just t -> nodeId t `elem` nodeIds) rels + `shouldBe` True diff --git a/libs/pattern/tests/Spec/Pattern/PatternGraphSpec.hs b/libs/pattern/tests/Spec/Pattern/PatternGraphSpec.hs index 299f1b6..cb80ced 100644 --- a/libs/pattern/tests/Spec/Pattern/PatternGraphSpec.hs +++ b/libs/pattern/tests/Spec/Pattern/PatternGraphSpec.hs @@ -3,7 +3,6 @@ module Spec.Pattern.PatternGraphSpec where -import Data.Maybe (isNothing) import qualified Data.Map.Strict as Map import qualified Data.Set as Set import Pattern.Core (Pattern(..), pattern, point) @@ -13,13 +12,12 @@ import Pattern.PatternGraph empty, fromPatterns, fromPatternsWithPolicy, + fromPatternGraph, merge, mergeWithPolicy, - toGraphLens, - toGraphLensWithScope, ) import Pattern.Graph.GraphClassifier (GraphClass(..), GraphClassifier(..), canonicalClassifier, classify, classifyByShape) -import Pattern.Graph (nodes, relationships) +import Pattern.Graph.GraphQuery (queryNodes, queryRelationships) import Pattern.Reconcile (ReconciliationPolicy(..)) import Subject.Core (Subject(..), Symbol(..)) import Test.Hspec @@ -90,20 +88,15 @@ spec = do let three = Pattern (Subject (Symbol "t") Set.empty Map.empty) [node (Symbol "a"), node (Symbol "b"), node (Symbol "c")] classify canonicalClassifier three `shouldBe` GOther () - describe "toGraphLens" $ do - it "empty graph yields Nothing" $ do - (isNothing . toGraphLens) (empty :: PatternGraph () Subject) `shouldBe` True - it "nodes and relationships from lens match container" $ do + describe "fromPatternGraph" $ do + it "empty graph yields empty queryNodes" $ do + let gq = fromPatternGraph (empty :: PatternGraph () Subject) + length (queryNodes gq) `shouldBe` 0 + it "nodes and relationships match container" $ do let graph = fromPatterns canonicalClassifier [node "a", node "b", rel "r" "a" "b"] - let Just lens = toGraphLens graph - length (nodes lens) `shouldBe` 2 - length (relationships lens) `shouldBe` 1 - it "toGraphLensWithScope is total for empty graph" $ do - let g = empty :: PatternGraph () Subject - let scopeVal = Subject (Symbol "scope") Set.empty Map.empty - let lens = toGraphLensWithScope scopeVal g - length (nodes lens) `shouldBe` 0 - length (relationships lens) `shouldBe` 0 + let gq = fromPatternGraph graph + length (queryNodes gq) `shouldBe` 2 + length (queryRelationships gq) `shouldBe` 1 describe "mergeWithPolicy / fromPatternsWithPolicy (T011)" $ do it "mergeWithPolicy accepts reconciliation policy" $ do diff --git a/libs/pattern/tests/Test.hs b/libs/pattern/tests/Test.hs index fcf9418..3504405 100644 --- a/libs/pattern/tests/Test.hs +++ b/libs/pattern/tests/Test.hs @@ -5,6 +5,8 @@ import Test.Hspec import qualified Spec.Pattern.CoreSpec as CoreSpec import qualified Spec.Pattern.GraphSpec as GraphSpec import qualified Spec.Pattern.Graph.GraphClassifierSpec as GraphClassifierSpec +import qualified Spec.Pattern.Graph.GraphQuerySpec as GraphQuerySpec +import qualified Spec.Pattern.Graph.AlgorithmsSpec as AlgorithmsSpec import qualified Spec.Pattern.PatternGraphProperties as PatternGraphProperties import qualified Spec.Pattern.PatternGraphSpec as PatternGraphSpec import qualified Spec.Pattern.Properties as Properties @@ -20,6 +22,8 @@ testSpec = do CoreSpec.spec GraphSpec.spec GraphClassifierSpec.spec + GraphQuerySpec.spec + AlgorithmsSpec.spec PatternGraphSpec.spec PatternGraphProperties.spec Properties.spec diff --git a/specs/035-graph-query/checklists/requirements.md b/specs/035-graph-query/checklists/requirements.md new file mode 100644 index 0000000..03c7143 --- /dev/null +++ b/specs/035-graph-query/checklists/requirements.md @@ -0,0 +1,36 @@ +# Specification Quality Checklist: GraphQuery — Portable, Composable Graph Query Interface + +**Purpose**: Validate specification completeness and quality before proceeding to planning +**Created**: 2026-02-20 +**Feature**: [spec.md](../spec.md) + +## Content Quality + +- [x] No implementation details (languages, frameworks, APIs) +- [x] Focused on user value and business needs +- [x] Written for non-technical stakeholders +- [x] All mandatory sections completed + +## Requirement Completeness + +- [x] No [NEEDS CLARIFICATION] markers remain +- [x] Requirements are testable and unambiguous +- [x] Success criteria are measurable +- [x] Success criteria are technology-agnostic (no implementation details) +- [x] All acceptance scenarios are defined +- [x] Edge cases are identified +- [x] Scope is clearly bounded +- [x] Dependencies and assumptions identified + +## Feature Readiness + +- [x] All functional requirements have clear acceptance criteria +- [x] User scenarios cover primary flows +- [x] Feature meets measurable outcomes defined in Success Criteria +- [x] No implementation details leak into specification + +## Notes + +- All items pass. Spec is ready for `/speckit.clarify` or `/speckit.plan`. +- Assumptions section documents deferred decisions: performance optimization, algorithm module granularity, and centrality tuning parameters. +- `GraphClassifier` (feature 034) is identified as a prerequisite dependency. diff --git a/specs/035-graph-query/contracts/Algorithms.hs b/specs/035-graph-query/contracts/Algorithms.hs new file mode 100644 index 0000000..225bed0 --- /dev/null +++ b/specs/035-graph-query/contracts/Algorithms.hs @@ -0,0 +1,210 @@ +-- | Contract: Pattern.Graph.Algorithms +-- +-- This file defines the complete public API for the Algorithms module. +-- It is a specification artifact — not compiled code. +-- +-- Module: Pattern.Graph.Algorithms +-- All functions accept 'GraphQuery v' as their first argument. +-- Traversal algorithms also accept 'TraversalWeight v'. + +module Pattern.Graph.Algorithms + ( -- * Traversal + bfs + , dfs + + -- * Paths + , shortestPath + , hasPath + , allPaths + + -- * Boolean queries + , isNeighbor + , isConnected + + -- * Structural + , connectedComponents + , topologicalSort + , hasCycle + + -- * Spanning + , minimumSpanningTree + + -- * Centrality + , degreeCentrality + , betweennessCentrality + + -- * Context query helpers + , queryAnnotationsOf + , queryWalksContaining + , queryCoMembers + ) where + +import Pattern.Core (Pattern(..)) +import Pattern.Graph.GraphClassifier (GraphValue(..), GraphClassifier) +import Pattern.Graph.GraphQuery (GraphQuery, TraversalWeight) +import Data.Map.Strict (Map) + +-- --------------------------------------------------------------------------- +-- Traversal +-- --------------------------------------------------------------------------- + +-- | Breadth-first search from a starting node. +-- +-- Returns all nodes reachable from the starting node, in BFS order, +-- respecting the supplied 'TraversalWeight' (infinite-cost edges are not traversed). +-- +-- == Complexity +-- O((n + r) log n) where n = nodes, r = relationships +bfs :: Ord (Id v) => GraphQuery v -> TraversalWeight v -> Pattern v -> [Pattern v] + +-- | Depth-first search from a starting node. +-- +-- Returns all nodes reachable from the starting node, in DFS order, +-- respecting the supplied 'TraversalWeight'. +-- +-- == Complexity +-- O((n + r) log n) +dfs :: Ord (Id v) => GraphQuery v -> TraversalWeight v -> Pattern v -> [Pattern v] + +-- --------------------------------------------------------------------------- +-- Paths +-- --------------------------------------------------------------------------- + +-- | Find the shortest (minimum total weight) path between two nodes. +-- +-- Returns 'Just' a sequence of nodes if a path exists, 'Nothing' otherwise. +-- Uses Dijkstra's algorithm; assumes non-negative weights. +-- +-- == Complexity +-- O((n + r) log n) +shortestPath :: Ord (Id v) => GraphQuery v -> TraversalWeight v + -> Pattern v -> Pattern v -> Maybe [Pattern v] + +-- | Determine whether any path exists between two nodes. +-- +-- Returns 'True' if there is at least one traversable path from start to end. +-- +-- == Complexity +-- O((n + r) log n) +hasPath :: Ord (Id v) => GraphQuery v -> TraversalWeight v + -> Pattern v -> Pattern v -> Bool + +-- | Enumerate all simple paths between two nodes. +-- +-- Returns all paths as sequences of nodes. May be exponential in the number +-- of paths; use with care on dense graphs. +-- +-- == Note +-- A simple path visits each node at most once. +allPaths :: Ord (Id v) => GraphQuery v -> TraversalWeight v + -> Pattern v -> Pattern v -> [[Pattern v]] + +-- --------------------------------------------------------------------------- +-- Boolean queries +-- --------------------------------------------------------------------------- + +-- | Determine whether two nodes are direct neighbors. +-- +-- Returns 'True' if there exists a relationship between the two nodes +-- that is traversable in the given direction (i.e. finite cost). +isNeighbor :: Eq (Id v) => GraphQuery v -> TraversalWeight v + -> Pattern v -> Pattern v -> Bool + +-- | Determine whether the entire graph is connected. +-- +-- Returns 'True' if every node is reachable from every other node +-- under the supplied 'TraversalWeight'. +isConnected :: Ord (Id v) => GraphQuery v -> TraversalWeight v -> Bool + +-- --------------------------------------------------------------------------- +-- Structural +-- --------------------------------------------------------------------------- + +-- | Find all connected components. +-- +-- Returns a list of node groups where each group contains all nodes +-- mutually reachable under the supplied 'TraversalWeight'. +-- +-- == Complexity +-- O((n + r) log n) +connectedComponents :: Ord (Id v) => GraphQuery v -> TraversalWeight v -> [[Pattern v]] + +-- | Topological sort of the graph. +-- +-- Returns 'Just' a linear ordering of nodes such that for every directed +-- relationship u→v, u appears before v. Returns 'Nothing' if the graph +-- contains a cycle. +-- +-- Note: does not take 'TraversalWeight' — operates on the directed structure +-- implied by relationship endpoint order. +-- +-- == Complexity +-- O(n + r) +topologicalSort :: Ord (Id v) => GraphQuery v -> Maybe [Pattern v] + +-- | Determine whether the graph contains a cycle. +-- +-- Returns 'True' if any directed cycle exists in the graph. +-- Does not take 'TraversalWeight' — operates on directed endpoint order. +-- +-- == Complexity +-- O(n + r) +hasCycle :: Ord (Id v) => GraphQuery v -> Bool + +-- --------------------------------------------------------------------------- +-- Spanning +-- --------------------------------------------------------------------------- + +-- | Compute a minimum spanning tree (or forest). +-- +-- Returns the set of relationships forming a spanning tree with minimum +-- total weight. Uses Kruskal's or Prim's algorithm. +-- +-- == Complexity +-- O(r log r) +minimumSpanningTree :: Ord (Id v) => GraphQuery v -> TraversalWeight v -> [Pattern v] + +-- --------------------------------------------------------------------------- +-- Centrality +-- --------------------------------------------------------------------------- + +-- | Compute degree centrality for all nodes. +-- +-- Returns a map from node identity to normalized degree centrality score. +-- Degree centrality = degree(n) / (|nodes| - 1). +-- +-- == Complexity +-- O(n + r) +degreeCentrality :: Ord (Id v) => GraphQuery v -> Map (Id v) Double + +-- | Compute betweenness centrality for all nodes. +-- +-- Returns a map from node identity to betweenness centrality score. +-- Betweenness centrality = fraction of shortest paths passing through each node. +-- +-- == Complexity +-- O(n * (n + r) log n) — Brandes algorithm +betweennessCentrality :: Ord (Id v) => GraphQuery v -> TraversalWeight v + -> Map (Id v) Double + +-- --------------------------------------------------------------------------- +-- Context query helpers +-- --------------------------------------------------------------------------- + +-- | All annotations attached to a given element. +-- +-- Calls 'queryContainers' and filters results classified as 'GAnnotation' +-- by the supplied 'GraphClassifier'. No new 'GraphQuery' fields required. +queryAnnotationsOf :: GraphClassifier extra v -> GraphQuery v + -> Pattern v -> [Pattern v] + +-- | All walks that contain a given element (directly or via its relationships). +-- +-- Calls 'queryContainers' and filters results classified as 'GWalk'. +queryWalksContaining :: GraphClassifier extra v -> GraphQuery v + -> Pattern v -> [Pattern v] + +-- | All elements sharing a specific container with a given element. +-- +-- Returns elements that are co-members of the given container pattern. +queryCoMembers :: GraphQuery v -> Pattern v -> Pattern v -> [Pattern v] diff --git a/specs/035-graph-query/contracts/GraphQuery.hs b/specs/035-graph-query/contracts/GraphQuery.hs new file mode 100644 index 0000000..174db01 --- /dev/null +++ b/specs/035-graph-query/contracts/GraphQuery.hs @@ -0,0 +1,203 @@ +-- | Contract: Pattern.Graph.GraphQuery +-- +-- This file defines the complete public API for the GraphQuery module. +-- It is a specification artifact — not compiled code. +-- +-- Module: Pattern.Graph.GraphQuery +-- Exposes: GraphQuery, TraversalDirection, TraversalWeight, +-- fromGraphLens, fromPatternGraph, +-- undirected, directed, directedReverse, +-- frameQuery, memoizeIncidentRels + +module Pattern.Graph.GraphQuery + ( -- * Core types + GraphQuery(..) + , TraversalDirection(..) + , TraversalWeight + + -- * Canonical traversal weights + , undirected + , directed + , directedReverse + + -- * Constructors + , fromGraphLens + , fromPatternGraph + + -- * Combinators + , frameQuery + , memoizeIncidentRels + ) where + +import Pattern.Core (Pattern(..)) +import Pattern.Graph (GraphLens) +import Pattern.Graph.GraphClassifier (GraphValue(..)) +import Pattern.PatternGraph (PatternGraph) +import Data.Map.Strict (Map) + +-- --------------------------------------------------------------------------- +-- TraversalDirection +-- --------------------------------------------------------------------------- + +-- | The two orientations along a directed relationship. +-- +-- 'Forward' follows the relationship from source to target. +-- 'Backward' follows it from target to source. +-- +-- == Categorical interpretation +-- An element of a two-element set. Used as a parameter to 'TraversalWeight' +-- to encode directionality as a cost rather than a structural property. +data TraversalDirection = Forward | Backward + deriving (Eq, Show) + +-- --------------------------------------------------------------------------- +-- TraversalWeight +-- --------------------------------------------------------------------------- + +-- | A function assigning a traversal cost to each (relationship, direction) pair. +-- +-- Encodes both directionality and edge weight as a single concept. +-- Infinity (@1\/0 :: Double@) encodes impassability — traversal is blocked +-- in that direction. +-- +-- == Invariants +-- * Must return a non-negative 'Double' (including @+Infinity@). +-- * Negative weights are not supported by the standard Dijkstra-based algorithms. +-- +-- == Canonical values +-- * 'undirected' — uniform cost 1.0, direction ignored +-- * 'directed' — 1.0 forward, infinity backward +-- * 'directedReverse' — infinity forward, 1.0 backward +type TraversalWeight v = Pattern v -> TraversalDirection -> Double + +-- | Undirected, unweighted traversal. Direction is ignored; cost is uniform. +undirected :: TraversalWeight v +undirected _ _ = 1.0 + +-- | Directed forward-only traversal. Reverse direction is impassable. +directed :: TraversalWeight v +directed _ Forward = 1.0 +directed _ Backward = 1/0 + +-- | Directed reverse-only traversal. Forward direction is impassable. +directedReverse :: TraversalWeight v +directedReverse _ Forward = 1/0 +directedReverse _ Backward = 1.0 + +-- --------------------------------------------------------------------------- +-- GraphQuery +-- --------------------------------------------------------------------------- + +-- | A record-of-functions abstracting graph traversal and lookup over any +-- graph representation. +-- +-- == Categorical interpretation +-- A coalgebra-like structure: given a graph element, the fields produce the +-- elements reachable from it (downward traversal) or the structures containing +-- it (upward traversal via 'queryContainers'). +-- +-- == Design principles +-- * Representation-independent: works with 'GraphLens', 'PatternGraph', +-- database-backed graphs, or any custom source. +-- * Composable: 'GraphQuery v -> GraphQuery v' transformations (framing, +-- caching, logging) are the extension pattern. +-- * 'queryNeighbors' is intentionally absent: neighbors are direction-dependent. +-- Algorithms derive reachability from 'queryIncidentRels' + 'querySource' + +-- 'queryTarget' + a supplied 'TraversalWeight'. +-- +-- == Invariants +-- * @querySource r = Just s@ implies @s ∈ queryNodes@ +-- * @queryTarget r = Just t@ implies @t ∈ queryNodes@ +-- * @r ∈ queryIncidentRels n@ implies @querySource r = Just n ∨ queryTarget r = Just n@ +-- * @queryDegree n = length (queryIncidentRels n)@ (default; implementations may be faster) +-- * @queryNodeById (identify (value n)) = Just n@ for all @n ∈ queryNodes@ +-- * @queryRelationshipById (identify (value r)) = Just r@ for all @r ∈ queryRelationships@ +-- * 'queryContainers' returns only direct containers — not transitive containment +data GraphQuery v = GraphQuery + { queryNodes :: [Pattern v] + -- ^ All node-classified elements in the graph. + + , queryRelationships :: [Pattern v] + -- ^ All relationship-classified elements in the graph. + + , queryIncidentRels :: Pattern v -> [Pattern v] + -- ^ All relationships where the given node is source or target. + -- Hot-path function; implementations should apply @{-# INLINE #-}@. + + , querySource :: Pattern v -> Maybe (Pattern v) + -- ^ The source (first endpoint) of a relationship. + -- Returns 'Nothing' if the pattern is not a relationship. + -- Hot-path function; implementations should apply @{-# INLINE #-}@. + + , queryTarget :: Pattern v -> Maybe (Pattern v) + -- ^ The target (second endpoint) of a relationship. + -- Returns 'Nothing' if the pattern is not a relationship. + -- Hot-path function; implementations should apply @{-# INLINE #-}@. + + , queryDegree :: Pattern v -> Int + -- ^ Count of incident relationships for a node. + -- Derivable from 'queryIncidentRels' but included explicitly because + -- implementations may provide O(1) versions (e.g. a degree index). + -- Hot-path function; implementations should apply @{-# INLINE #-}@. + + , queryNodeById :: Id v -> Maybe (Pattern v) + -- ^ Node lookup by identity. O(log n) from 'PatternGraph'; O(n) from 'GraphLens'. + + , queryRelationshipById :: Id v -> Maybe (Pattern v) + -- ^ Relationship lookup by identity. O(log r) from 'PatternGraph'; O(r) from 'GraphLens'. + + , queryContainers :: Pattern v -> [Pattern v] + -- ^ All higher-order structures (relationships, walks, annotations) that + -- directly contain the given element. The upward traversal dual to downward + -- decomposition. Required by GraphMutation for coherent deletion; independently + -- useful for impact analysis and pattern matching. + } + +-- --------------------------------------------------------------------------- +-- Constructors +-- --------------------------------------------------------------------------- + +-- | Construct a 'GraphQuery v' from a 'GraphLens v'. +-- +-- All fields are derived from existing 'Pattern.Graph' functions. +-- 'queryNodeById' and 'queryRelationshipById' perform O(n) / O(r) scans +-- (no index available from 'GraphLens'). +-- +-- This is the bridge that allows existing 'GraphLens'-based code to use +-- 'Pattern.Graph.Algorithms' without changes. +fromGraphLens :: (GraphValue v, Eq v) => GraphLens v -> GraphQuery v + +-- | Construct a 'GraphQuery v' directly from a 'PatternGraph extra v'. +-- +-- Reads from the typed maps (@pgNodes@, @pgRelationships@, @pgWalks@, +-- @pgAnnotations@) without going through 'GraphLens'. Provides O(log n) +-- lookups for 'queryNodeById' and 'queryRelationshipById'. +-- +-- Supersedes 'Pattern.PatternGraph.toGraphLens' for algorithm access. +fromPatternGraph :: (GraphValue v, Eq v) => PatternGraph extra v -> GraphQuery v + +-- --------------------------------------------------------------------------- +-- Combinators +-- --------------------------------------------------------------------------- + +-- | Produce a filtered subgraph view as a 'GraphQuery v'. +-- +-- Nodes and relationships not matching the predicate are excluded. +-- 'queryIncidentRels' on the result excludes relationships whose endpoints +-- fall outside the frame. +-- +-- All 'GraphQuery' invariants hold on the result. +-- +-- == Example +-- @ +-- let personFrame = frameQuery isPerson (fromPatternGraph pg) +-- @ +frameQuery :: (Pattern v -> Bool) -> GraphQuery v -> GraphQuery v + +-- | Wrap 'queryIncidentRels' with a memoization layer keyed by node identity. +-- +-- Useful for algorithms that call 'queryIncidentRels' repeatedly on the same +-- node (e.g. betweenness centrality). All other fields are passed through unchanged. +-- +-- The memoization is per-'GraphQuery' value, not global. +memoizeIncidentRels :: Ord (Id v) => GraphQuery v -> GraphQuery v diff --git a/specs/035-graph-query/data-model.md b/specs/035-graph-query/data-model.md new file mode 100644 index 0000000..ade3d33 --- /dev/null +++ b/specs/035-graph-query/data-model.md @@ -0,0 +1,278 @@ +# Data Model: GraphQuery — Portable, Composable Graph Query Interface + +**Branch**: `035-graph-query` | **Date**: 2026-02-20 +**Purpose**: Concrete type definitions, field invariants, and relationships between all entities introduced by this feature. + +--- + +## Overview + +This feature introduces three new types (`GraphQuery v`, `TraversalDirection`, `TraversalWeight v`) and two new modules (`Pattern.Graph.GraphQuery`, `Pattern.Graph.Algorithms`). It extends `Pattern.Graph` with `fromGraphLens` and backward-compatible wrappers, and extends `Pattern.PatternGraph` with `fromPatternGraph`. The functions `toGraphLens` and `toGraphLensWithScope` were removed from `Pattern.PatternGraph`; use `fromPatternGraph` instead (see research.md Decision 7 implementation deviation). + +--- + +## Core Types + +### `TraversalDirection` + +``` +Module: Pattern.Graph.GraphQuery + +data TraversalDirection = Forward | Backward +``` + +**Categorical interpretation**: An element of a two-element set representing the two orientations along a directed relationship. `Forward` follows the relationship from source to target; `Backward` follows it from target to source. + +**Invariants**: None. A plain enumeration. + +--- + +### `TraversalWeight v` + +``` +Module: Pattern.Graph.GraphQuery + +type TraversalWeight v = Pattern v -> TraversalDirection -> Double +``` + +**Categorical interpretation**: A function that assigns a traversal cost to each (relationship, direction) pair. Encodes both directionality and edge weight as a single concept. Infinity (`1/0 :: Double`) encodes impassability — the traversal is blocked in that direction. + +**Canonical values** (provided by the library): + +| Name | Behavior | +|------|----------| +| `undirected` | Returns `1.0` for all inputs. Direction is ignored; cost is uniform. | +| `directed` | Returns `1.0` for `Forward`, `1/0` for `Backward`. Only forward traversal is passable. | +| `directedReverse` | Returns `1/0` for `Forward`, `1.0` for `Backward`. Only backward traversal is passable. | + +**Invariants**: +- Must return a non-negative `Double` (including `+Infinity`). +- `0.0` is a valid weight (zero-cost traversal). +- Negative weights are not supported by the standard algorithms (Dijkstra assumption). Callers using negative weights must use Bellman-Ford variants. + +--- + +### `GraphQuery v` + +``` +Module: Pattern.Graph.GraphQuery + +data GraphQuery v = GraphQuery + { queryNodes :: [Pattern v] + , queryRelationships :: [Pattern v] + , queryIncidentRels :: Pattern v -> [Pattern v] + , querySource :: Pattern v -> Maybe (Pattern v) + , queryTarget :: Pattern v -> Maybe (Pattern v) + , queryDegree :: Pattern v -> Int + , queryNodeById :: Id v -> Maybe (Pattern v) + , queryRelationshipById :: Id v -> Maybe (Pattern v) + , queryContainers :: Pattern v -> [Pattern v] + } +``` + +**Categorical interpretation**: A record-of-functions representing a graph query interface. Analogous to a coalgebra: given a graph element, it produces the elements reachable from it. `queryContainers` is the upward dual — given an element, it produces the structures that contain it. Together they form a bidirectional traversal interface. + +**Field semantics**: + +| Field | Type | Semantics | Complexity (in-memory) | +|-------|------|-----------|------------------------| +| `queryNodes` | `[Pattern v]` | All node-classified elements in the graph | O(n) | +| `queryRelationships` | `[Pattern v]` | All relationship-classified elements | O(r) | +| `queryIncidentRels` | `Pattern v -> [Pattern v]` | All relationships where the given node is source or target | O(r) | +| `querySource` | `Pattern v -> Maybe (Pattern v)` | The source (first endpoint) of a relationship; `Nothing` if not a relationship | O(1) | +| `queryTarget` | `Pattern v -> Maybe (Pattern v)` | The target (second endpoint) of a relationship; `Nothing` if not a relationship | O(1) | +| `queryDegree` | `Pattern v -> Int` | Count of incident relationships for a node | O(r) default; O(1) if indexed | +| `queryNodeById` | `Id v -> Maybe (Pattern v)` | Node lookup by identity | O(log n) from PatternGraph; O(n) from GraphLens | +| `queryRelationshipById` | `Id v -> Maybe (Pattern v)` | Relationship lookup by identity | O(log r) from PatternGraph; O(r) from GraphLens | +| `queryContainers` | `Pattern v -> [Pattern v]` | All higher-order structures (relationships, walks, annotations) that directly contain the given element | O(r + w + a) | + +**Invariants**: +- `querySource r = Just s` implies `s ∈ queryNodes`. +- `queryTarget r = Just t` implies `t ∈ queryNodes`. +- `r ∈ queryIncidentRels n` implies `querySource r = Just n ∨ queryTarget r = Just n`. +- `queryDegree n = length (queryIncidentRels n)` (default; implementations may provide faster versions). +- `queryNodeById (identify (value n)) = Just n` for all `n ∈ queryNodes`. +- `queryRelationshipById (identify (value r)) = Just r` for all `r ∈ queryRelationships`. +- `queryContainers` returns only direct containers — it does not recurse transitively. + +**`queryNeighbors` is intentionally absent**: Neighbors are direction-dependent. Algorithms derive reachable neighbors from `queryIncidentRels` + `querySource` + `queryTarget` + a supplied `TraversalWeight`. + +--- + +## Constructors + +### `fromGraphLens` + +``` +Module: Pattern.Graph.GraphQuery + +fromGraphLens :: (GraphValue v, Eq v) => GraphLens v -> GraphQuery v +``` + +Constructs a `GraphQuery v` from a `GraphLens v`. All fields are derived from existing `Pattern.Graph` functions. `queryNodeById` and `queryRelationshipById` perform O(n) / O(r) scans (no index available from `GraphLens`). `queryContainers` scans relationships and walks. + +**Relationship to existing code**: This is the bridge that allows existing `GraphLens`-based code to use the new algorithm module without changes. + +--- + +### `fromPatternGraph` + +``` +Module: Pattern.Graph.GraphQuery (re-exported from Pattern.PatternGraph) + +fromPatternGraph :: (GraphValue v, Eq v) => PatternGraph extra v -> GraphQuery v +``` + +Constructs a `GraphQuery v` directly from a `PatternGraph extra v` by reading from the typed maps (`pgNodes`, `pgRelationships`, `pgWalks`, `pgAnnotations`). `queryNodeById` and `queryRelationshipById` are O(log n) / O(log r) map lookups. `queryContainers` filters across all four maps. + +**Relationship to existing code**: Supersedes `toGraphLens` for algorithm access. `toGraphLens` and `toGraphLensWithScope` were removed in this feature; use `fromPatternGraph` (see research.md Decision 7). + +--- + +## Combinators + +### `frameQuery` + +``` +Module: Pattern.Graph.GraphQuery + +frameQuery :: (Pattern v -> Bool) -> GraphQuery v -> GraphQuery v +``` + +Produces a new `GraphQuery v` that filters nodes and relationships by a predicate. `queryIncidentRels` on the framed query excludes relationships whose endpoints fall outside the frame. All other fields are filtered accordingly. + +**Invariants preserved**: All `GraphQuery` invariants hold on the result — `querySource`, `queryTarget`, and `queryIncidentRels` remain consistent within the frame. + +--- + +### `memoizeIncidentRels` + +``` +Module: Pattern.Graph.GraphQuery + +memoizeIncidentRels :: Ord (Id v) => GraphQuery v -> GraphQuery v +``` + +Wraps `queryIncidentRels` with a memoization layer keyed by node identity. Useful for algorithms that call `queryIncidentRels` repeatedly on the same node (e.g., betweenness centrality). All other fields are passed through unchanged. + +**Note**: The memoization is per-`GraphQuery` value, not global. A new `GraphQuery` produced by `memoizeIncidentRels` carries its own cache. + +--- + +## Algorithms Module + +### Module: `Pattern.Graph.Algorithms` + +All functions accept `GraphQuery v` as their first argument. Traversal algorithms also accept `TraversalWeight v`. + +#### Traversal + +```haskell +bfs :: Ord (Id v) => GraphQuery v -> TraversalWeight v -> Pattern v -> [Pattern v] +dfs :: Ord (Id v) => GraphQuery v -> TraversalWeight v -> Pattern v -> [Pattern v] +``` + +#### Paths + +```haskell +shortestPath :: Ord (Id v) => GraphQuery v -> TraversalWeight v + -> Pattern v -> Pattern v -> Maybe [Pattern v] + +hasPath :: Ord (Id v) => GraphQuery v -> TraversalWeight v + -> Pattern v -> Pattern v -> Bool + +allPaths :: Ord (Id v) => GraphQuery v -> TraversalWeight v + -> Pattern v -> Pattern v -> [[Pattern v]] +``` + +#### Boolean Queries + +```haskell +isNeighbor :: Eq (Id v) => GraphQuery v -> TraversalWeight v + -> Pattern v -> Pattern v -> Bool + +isConnected :: Ord (Id v) => GraphQuery v -> TraversalWeight v -> Bool +``` + +#### Structural + +```haskell +connectedComponents :: Ord (Id v) => GraphQuery v -> TraversalWeight v -> [[Pattern v]] +topologicalSort :: Ord (Id v) => GraphQuery v -> Maybe [Pattern v] +hasCycle :: Ord (Id v) => GraphQuery v -> Bool +``` + +**Note**: `topologicalSort` and `hasCycle` do not take `TraversalWeight` — they operate on the directed structure implied by relationship endpoint order. + +#### Spanning + +```haskell +minimumSpanningTree :: Ord (Id v) => GraphQuery v -> TraversalWeight v -> [Pattern v] +``` + +#### Centrality + +```haskell +degreeCentrality :: Ord (Id v) => GraphQuery v -> Map (Id v) Double +betweennessCentrality :: Ord (Id v) => GraphQuery v -> TraversalWeight v -> Map (Id v) Double +``` + +#### Context Query Helpers + +```haskell +queryAnnotationsOf :: GraphClassifier extra v -> GraphQuery v -> Pattern v -> [Pattern v] +queryWalksContaining :: GraphClassifier extra v -> GraphQuery v -> Pattern v -> [Pattern v] +queryCoMembers :: GraphQuery v -> Pattern v -> Pattern v -> [Pattern v] +``` + +These are derived functions — they call `queryContainers` and filter by `GraphClass`. No new `GraphQuery` fields are required. + +--- + +## Module Dependency Graph + +``` +Pattern.Core + └── Pattern.Graph.GraphClassifier (GraphValue, GraphClass, GraphClassifier) + ├── Pattern.Graph (GraphLens — existing) + ├── Pattern.Graph.GraphQuery (GraphQuery, TraversalWeight, TraversalDirection, + │ fromGraphLens, fromPatternGraph, frameQuery, + │ memoizeIncidentRels) + │ └── Pattern.Graph.Algorithms (all algorithms + context helpers) + └── Pattern.PatternGraph (PatternGraph — existing; fromPatternGraph added; + toGraphLens / toGraphLensWithScope removed) +``` + +--- + +## Changes to Existing Modules + +### `Pattern.Graph` (existing) + +- Add `fromGraphLens` re-export (or define here and re-export from `GraphQuery`). +- Replace `bfs`, `findPath`, `connectedComponents` implementations with one-line wrappers: + ```haskell + bfs lens start = Algorithms.bfs (fromGraphLens lens) undirected start + findPath lens s e = Algorithms.shortestPath (fromGraphLens lens) undirected s e + connectedComponents lens = Algorithms.connectedComponents (fromGraphLens lens) undirected + ``` +- All existing exports remain; no removals. + +### `Pattern.PatternGraph` (existing) + +- Add `fromPatternGraph` to exports. +- Remove `toGraphLens` and `toGraphLensWithScope` (replaced by `fromPatternGraph`; see research.md Decision 7 implementation deviation). + +### `pattern.cabal` + +Add to `exposed-modules`: +``` +Pattern.Graph.GraphQuery +Pattern.Graph.Algorithms +``` + +Add to `other-modules` in `test-suite`: +``` +Spec.Pattern.Graph.GraphQuerySpec +Spec.Pattern.Graph.AlgorithmsSpec +``` diff --git a/specs/035-graph-query/plan.md b/specs/035-graph-query/plan.md new file mode 100644 index 0000000..c3f0bd7 --- /dev/null +++ b/specs/035-graph-query/plan.md @@ -0,0 +1,76 @@ +# Implementation Plan: GraphQuery — Portable, Composable Graph Query Interface + +**Branch**: `035-graph-query` | **Date**: 2026-02-20 | **Spec**: [spec.md](./spec.md) +**Input**: Feature specification from `/specs/035-graph-query/spec.md` + +## Summary + +Introduce `GraphQuery v` as a record-of-functions that abstracts graph traversal and lookup over any graph representation. Move existing `GraphLens`-bound algorithms (`bfs`, `findPath`, `connectedComponents`) to a new `Pattern.Graph.Algorithms` module expressed against `GraphQuery`. Add `fromGraphLens` and `fromPatternGraph` constructors, `TraversalWeight` for call-site traversal policy, composability combinators (`frameQuery`, `memoizeIncidentRels`), context query helpers (`queryAnnotationsOf`, `queryWalksContaining`, `queryCoMembers`), and backward-compatible wrappers on `GraphLens`. Retire `PatternGraph.toGraphLens` once superseded. + +## Technical Context + +**Language/Version**: Haskell (GHC 9.10.3) +**Primary Dependencies**: `containers ^>=0.7` (Map, Set), `base >=4.17.0.0`, `subject` (GraphValue/Symbol), `pattern` (Pattern.Core, Pattern.Graph, Pattern.PatternGraph, Pattern.Graph.GraphClassifier) +**Storage**: N/A — pure in-memory data structures +**Testing**: hspec ^>=2.11, QuickCheck ^>=2.14 (property-based tests) +**Target Platform**: Library (GHC, cross-platform) +**Project Type**: Single library (`libs/pattern`) +**Performance Goals**: Correct behavior is the primary goal. `queryIncidentRels`, `querySource`, `queryTarget`, and `queryDegree` are hot-path functions; `{-# INLINE #-}` pragmas should be applied. Bulk adjacency for iterative algorithms is a near-term follow-on, not part of this feature. +**Constraints**: Backward compatibility with existing `GraphLens` algorithm callers MUST be preserved. No new external dependencies. +**Scale/Scope**: In-memory graphs of the scale handled by `PatternGraph` and `GraphLens` today. Large-graph performance (Louvain, PageRank) is explicitly deferred. + +## Constitution Check + +*GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.* + +| Principle | Status | Notes | +|-----------|--------|-------| +| I. Code Quality | PASS | All new types and functions must have Haddock documentation with categorical interpretation. `GraphQuery` fields must document purpose, inputs, outputs, and invariants. | +| II. Testing Standards | PASS | Every public function requires hspec unit tests (happy path + edge cases). `TraversalWeight` canonical values and `frameQuery` require property-based QuickCheck tests. Category-theoretic properties (e.g., `fromGraphLens` and `fromPatternGraph` produce equivalent results on equivalent graphs) must be tested explicitly. | +| III. Conceptual Consistency | PASS | `GraphQuery` is a record-of-functions, consistent with `GraphClassifier` design. `TraversalWeight` externalizes traversal policy, consistent with the categorical principle that structure and interpretation are separate. Documentation must state the categorical interpretation of each component. | +| IV. Mathematical Clarity | PASS | `TraversalWeight` must be formally defined as a function `Pattern v → TraversalDirection → Double`. The semantics of infinity (blocking) must be stated. `queryContainers` must be documented as the upward traversal dual to downward decomposition. | +| V. Multi-Language Reference Alignment | PASS | `GraphQuery` uses only standard record-of-functions patterns. `TraversalWeight` is a plain function type. Both translate directly to closures/structs in other languages. Language-specific pragmas (`{-# INLINE #-}`) must be documented as Haskell-specific optimizations. | + +**Verdict**: No violations. Proceed to Phase 0. + +## Project Structure + +### Documentation (this feature) + +```text +specs/035-graph-query/ +├── plan.md # This file +├── research.md # Phase 0 output +├── data-model.md # Phase 1 output +├── quickstart.md # Phase 1 output +├── contracts/ # Phase 1 output (Haskell type signatures) +└── tasks.md # Phase 2 output (/speckit.tasks — NOT created here) +``` + +### Source Code (repository root) + +```text +libs/pattern/ +├── src/ +│ └── Pattern/ +│ ├── Graph.hs # existing — add fromGraphLens; retain bfs/findPath/connectedComponents as wrappers +│ ├── Graph/ +│ │ ├── GraphClassifier.hs # existing — unchanged +│ │ ├── GraphQuery.hs # NEW — GraphQuery v, TraversalWeight, TraversalDirection, fromGraphLens, fromPatternGraph +│ │ └── Algorithms.hs # NEW — all graph algorithms over GraphQuery v +│ └── PatternGraph.hs # existing — add fromPatternGraph export; deprecate toGraphLens +└── tests/ + └── Spec/Pattern/ + ├── GraphSpec.hs # existing — extend with wrapper backward-compat tests + ├── Graph/ + │ ├── GraphClassifierSpec.hs # existing — unchanged + │ ├── GraphQuerySpec.hs # NEW — unit tests for GraphQuery construction and field access + │ └── AlgorithmsSpec.hs # NEW — unit + property tests for all algorithms + └── PatternGraphSpec.hs # existing — extend with fromPatternGraph tests +``` + +**Structure Decision**: Single library project (`libs/pattern`). New modules `Pattern.Graph.GraphQuery` and `Pattern.Graph.Algorithms` are added under the existing `Pattern.Graph` namespace, consistent with `Pattern.Graph.GraphClassifier`. `pattern.cabal` must be updated to expose both new modules and add new test modules. + +## Complexity Tracking + +> No constitution violations requiring justification. diff --git a/specs/035-graph-query/quickstart.md b/specs/035-graph-query/quickstart.md new file mode 100644 index 0000000..51d6b0f --- /dev/null +++ b/specs/035-graph-query/quickstart.md @@ -0,0 +1,170 @@ +# Quickstart: GraphQuery + +`GraphQuery v` is a portable, composable graph query interface. It decouples graph algorithms from any specific graph representation — the same algorithm works on a `PatternGraph`, a `GraphLens`, a database-backed graph, or any custom source. + +--- + +## Standard Usage: Algorithms on a PatternGraph + +```haskell +import Pattern.Graph.GraphClassifier (canonicalClassifier) +import Pattern.Graph.GraphQuery (undirected) +import Pattern.Graph.Algorithms (shortestPath, connectedComponents) +import Pattern.PatternGraph (fromPatterns, fromPatternGraph) + +-- Build a PatternGraph from patterns +let pg = fromPatterns canonicalClassifier myPatterns + +-- Construct a GraphQuery directly from the PatternGraph (no GraphLens needed) +let gq = fromPatternGraph pg + +-- Run algorithms +let path = shortestPath gq undirected nodeA nodeB -- Maybe [Pattern v] +let components = connectedComponents gq undirected -- [[Pattern v]] +``` + +--- + +## Traversal Direction at the Call Site + +The same graph, queried with different traversal policies: + +```haskell +import Pattern.Graph.GraphQuery (directed, undirected, directedReverse) +import Pattern.Graph.Algorithms (hasPath) +import Pattern.PatternGraph (fromPatternGraph) + +let gq = fromPatternGraph pg + +-- Directed: only follows relationships source → target +hasPath gq directed nodeA nodeB -- True if A→B path exists + +-- Undirected: ignores direction, treats all relationships as bidirectional +hasPath gq undirected nodeA nodeB -- True if any path exists + +-- Reverse: only follows relationships target → source +hasPath gq directedReverse nodeB nodeA +``` + +--- + +## Custom Traversal Weight + +Read edge weights from relationship properties: + +```haskell +import Pattern.Graph.GraphQuery (TraversalWeight, TraversalDirection(..)) +import Pattern.Graph.Algorithms (shortestPath) +import Pattern.PatternGraph (fromPatternGraph) +import Subject.Core (Symbol) + +-- A weight function that reads a "cost" property from the relationship value +weightedTraversal :: TraversalWeight Subject +weightedTraversal rel Forward = maybe 1.0 id (lookupCost rel) +weightedTraversal _ Backward = 1/0 -- block reverse traversal + +let gq = fromPatternGraph pg +let path = shortestPath gq weightedTraversal nodeA nodeB +``` + +--- + +## Composing Graph Views + +### Subgraph frames (filter by predicate) + +```haskell +import Pattern.Graph.GraphQuery (frameQuery, undirected) +import Pattern.Graph.Algorithms (connectedComponents) +import Pattern.PatternGraph (fromPatternGraph) + +-- Only include nodes and relationships with a "Person" label +let personFrame = frameQuery isPerson (fromPatternGraph pg) + +-- Algorithms operate only within the frame +let components = connectedComponents personFrame undirected +``` + +### Memoizing incident relationship lookups + +```haskell +import Pattern.Graph.GraphQuery (memoizeIncidentRels, undirected) +import Pattern.Graph.Algorithms (betweennessCentrality) +import Pattern.PatternGraph (fromPatternGraph) + +-- Wrap with memoization before running a centrality algorithm +let gq = memoizeIncidentRels (fromPatternGraph pg) +let centrality = betweennessCentrality gq undirected -- Map (Id v) Double +``` + +### Composing combinators + +```haskell +-- Frame + memoize in one expression +let gq = memoizeIncidentRels . frameQuery isPerson $ fromPatternGraph pg +``` + +--- + +## Context Queries + +Ask "what contains this element?" without precomputing a context record: + +```haskell +import Pattern.Graph.GraphClassifier (canonicalClassifier) +import Pattern.Graph.GraphQuery () +import Pattern.Graph.Algorithms (queryAnnotationsOf, queryWalksContaining, queryCoMembers) +import Pattern.PatternGraph (fromPatternGraph) + +let gq = fromPatternGraph pg + +-- All annotations attached to a node +let annotations = queryAnnotationsOf canonicalClassifier gq myNode + +-- All walks that contain a relationship +let walks = queryWalksContaining canonicalClassifier gq myRel + +-- All elements sharing a container with a given element +let coMembers = queryCoMembers gq myNode myContainer +``` + +--- + +## Algorithms on a GraphLens (existing code, unchanged) + +Existing `GraphLens`-based calls continue to work without modification: + +```haskell +import Pattern.Graph (bfs, findPath, connectedComponents) + +-- These are now wrappers over Pattern.Graph.Algorithms with undirected default +let reachable = bfs lens startNode +let path = findPath lens nodeA nodeB +let components = connectedComponents lens +``` + +--- + +## Implementing a Custom GraphQuery + +Any value that can provide the required functions produces a `GraphQuery v`. No inheritance or typeclass instance required: + +```haskell +import Pattern.Graph.GraphQuery (GraphQuery(..)) + +-- A hand-constructed query over a custom data source +myQuery :: GraphQuery MyValue +myQuery = GraphQuery + { queryNodes = fetchNodes myDb + , queryRelationships = fetchRelationships myDb + , queryIncidentRels = \n -> fetchIncident myDb (nodeId n) + , querySource = \r -> Just (fetchSource myDb r) + , queryTarget = \r -> Just (fetchTarget myDb r) + , queryDegree = \n -> fetchDegree myDb (nodeId n) + , queryNodeById = \i -> fetchNodeById myDb i + , queryRelationshipById = \i -> fetchRelById myDb i + , queryContainers = \p -> fetchContainers myDb p + } +``` + +All algorithms in `Pattern.Graph.Algorithms` work against this query without modification. diff --git a/specs/035-graph-query/research.md b/specs/035-graph-query/research.md new file mode 100644 index 0000000..bade15a --- /dev/null +++ b/specs/035-graph-query/research.md @@ -0,0 +1,172 @@ +# Research: GraphQuery — Portable, Composable Graph Query Interface + +**Branch**: `035-graph-query` | **Date**: 2026-02-20 +**Purpose**: Resolve all design unknowns before Phase 1 design artifacts. + +--- + +## Decision 1: Record-of-Functions vs. Typeclass for `GraphQuery` + +**Decision**: Record-of-functions (`data GraphQuery v = GraphQuery { ... }`) + +**Rationale**: Consistent with the existing `GraphClassifier` design in this codebase. A record-of-functions is a first-class value: it can be passed, stored, transformed, and composed without typeclass machinery. This enables `frameQuery`, `memoizeIncidentRels`, and database-backed implementations as plain `GraphQuery v → GraphQuery v` transformations. A typeclass would require `newtype` wrappers for each variant and cannot be composed at runtime. + +**Alternatives considered**: +- Typeclass `class GraphQuery g where ...` — rejected because it cannot be composed at runtime (framing, caching, logging all require wrapping), and it is inconsistent with `GraphClassifier`. +- MTL-style `ReaderT GraphQuery` — rejected as unnecessary complexity for a pure query interface. + +--- + +## Decision 2: `TraversalWeight` as a Function vs. Enum + +**Decision**: `type TraversalWeight v = Pattern v -> TraversalDirection -> Double` + +**Rationale**: A function type subsumes all enum cases and allows user-defined weights (reading numeric properties from relationships, temporal costs, etc.) without any extension points. The canonical cases (`undirected`, `directed`, `directedReverse`) are provided as library values of this type. Infinity (`1/0`) encodes impassability, which is standard in shortest-path algorithms (Dijkstra, Bellman-Ford). + +**Alternatives considered**: +- `data TraversalPolicy = Undirected | Directed | DirectedReverse | Weighted (Pattern v -> Double)` — rejected because it requires pattern-matching in every algorithm and cannot compose policies. +- Separate `directed :: Bool` parameter — rejected because it cannot encode weighted traversal. + +--- + +## Decision 3: `queryNeighbors` Omission + +**Decision**: `queryNeighbors` is absent from `GraphQuery`. Algorithms derive reachable neighbors from `queryIncidentRels` + `querySource` + `queryTarget` + `TraversalWeight`. + +**Rationale**: "Neighbors" is not a fixed concept when traversal direction is external. With `undirected`, both endpoints of every incident relationship are neighbors. With `directed`, only the target of forward-traversable relationships is a neighbor. Encoding this in `queryNeighbors` would require `TraversalWeight` as a parameter to the field itself, making it a higher-order field that complicates construction. Deriving neighbors in algorithms from the three primitives is straightforward and keeps the interface minimal. + +**Alternatives considered**: +- `queryNeighbors :: TraversalWeight v -> Pattern v -> [Pattern v]` as a field — rejected because it embeds traversal policy in the interface, conflating structure and interpretation. + +--- + +## Decision 4: `queryContainers` as a First-Class Field + +**Decision**: `queryContainers :: Pattern v -> [Pattern v]` is a required field of `GraphQuery`, not a derived helper. + +**Rationale**: Upward traversal ("what contains this element?") cannot be derived from the other fields without O(n) scans of all relationships, walks, and annotations. Making it a field allows implementations (e.g., `fromPatternGraph`) to provide O(log n) map lookups. It is required by the planned `GraphMutation` feature for coherent deletion and by context query helpers. Omitting it would force callers to reconstruct containment from scratch. + +**Alternatives considered**: +- Derived function scanning all relationships/walks/annotations — rejected because it is O(n) and cannot be optimized by implementations. +- Separate `ContainerIndex` type — rejected as unnecessary indirection; the field achieves the same result. + +--- + +## Decision 5: `fromPatternGraph` Direct Construction (No `toGraphLens` Intermediary) + +**Decision**: `fromPatternGraph :: (GraphValue v, Eq v) => PatternGraph v -> GraphQuery v` reads directly from `pgNodes`, `pgRelationships`, `pgWalks`, `pgAnnotations` maps. + +**Rationale**: `toGraphLens` materializes all elements into a flat scope pattern and uses an atomic predicate, losing the typed map structure. `fromPatternGraph` reads from the typed maps directly, giving O(log n) lookups for `queryNodeById` and `queryRelationshipById` instead of O(n) scans. `queryContainers` can filter the typed maps directly. This is the primary motivation for the feature. + +**Alternatives considered**: +- `fromPatternGraph pg = fromGraphLens (toGraphLens pg)` — rejected because it defeats the purpose (O(n) scans, loss of typed structure). + +--- + +## Decision 6: Backward Compatibility Strategy for `GraphLens` Algorithms + +**Decision**: Retain `bfs`, `findPath`, `connectedComponents` in `Pattern.Graph` as wrappers that call `Pattern.Graph.Algorithms.*` with `fromGraphLens` and `undirected` as defaults. + +**Rationale**: Existing callers have no changes to make. The wrappers are one-liners. The `GraphValue v` constraint already present on these functions is preserved. This satisfies FR-011 and SC-002. + +**Implementation**: +```haskell +-- Pattern.Graph (backward compat wrappers) +bfs :: GraphValue v => GraphLens v -> Pattern v -> [Pattern v] +bfs lens start = Algorithms.bfs (fromGraphLens lens) undirected start + +findPath :: GraphValue v => GraphLens v -> Pattern v -> Pattern v -> Maybe [Pattern v] +findPath lens s e = Algorithms.shortestPath (fromGraphLens lens) undirected s e + +connectedComponents :: GraphValue v => GraphLens v -> [[Pattern v]] +connectedComponents lens = Algorithms.connectedComponents (fromGraphLens lens) undirected +``` + +--- + +## Decision 7: `toGraphLens` Retirement Strategy + +**Decision**: Mark `toGraphLens` and `toGraphLensWithScope` as deprecated with a Haddock `{-# DEPRECATED #-}` pragma pointing to `fromPatternGraph`. Remove in a future version after callers have migrated. + +**Rationale**: Hard removal in this feature would break any callers outside the library. Deprecation gives a migration path. The spec says "retire" — deprecation is the correct Haskell mechanism for a controlled retirement. + +**Alternatives considered**: +- Hard removal now — originally rejected because it may break downstream callers not visible in this repo; in practice there are no downstream users, so the implementation did remove the functions (see implementation note below). +- No deprecation — rejected because it leaves the API ambiguous about the preferred path. + +**Implementation (035-graph-query)**: The implementation **removed** `toGraphLens` and `toGraphLensWithScope` from `Pattern.PatternGraph` entirely. There are no downstream users of this library; the original decision assumed possible external callers, which was a flawed assumption. With no downstream users, deprecation would add noise without benefit. Migration path for any future callers: use `fromPatternGraph` from `Pattern.PatternGraph` (or `Pattern.Graph.GraphQuery`). The breaking change is justified. If downstream users appear in the future, similar retirements should use the deprecation path described in the main decision above. + +--- + +## Decision 8: Module Layout + +**Decision**: +- `Pattern.Graph.GraphQuery` — `GraphQuery v`, `TraversalDirection`, `TraversalWeight`, `fromGraphLens`, `fromPatternGraph`, `frameQuery`, `memoizeIncidentRels` +- `Pattern.Graph.Algorithms` — all graph algorithms (`bfs`, `dfs`, `shortestPath`, `hasPath`, `allPaths`, `isNeighbor`, `isConnected`, `connectedComponents`, `topologicalSort`, `hasCycle`, `minimumSpanningTree`, `degreeCentrality`, `betweennessCentrality`, context helpers) + +**Rationale**: Keeps the interface type and its constructors/combinators together in one module. Algorithms are a separate concern and may grow into sub-modules (`Algorithms.Path`, `Algorithms.Centrality`) in a future feature. The split mirrors the `GraphClassifier` pattern: type in one module, usage in another. + +**Alternatives considered**: +- Everything in `Pattern.Graph.GraphQuery` — rejected because algorithms would make the module very large. +- Sub-modules now (`Algorithms.Path`, `Algorithms.Centrality`) — deferred; premature until the algorithm surface is larger. + +--- + +## Decision 9: `queryDegree` as Explicit Field + +**Decision**: Include `queryDegree :: Pattern v -> Int` as an explicit field even though it is derivable from `queryIncidentRels`. + +**Rationale**: Implementations backed by a database or index may provide O(1) degree queries. Making it a field allows those implementations to avoid O(n) scans. The default implementation in `fromGraphLens` and `fromPatternGraph` derives it from `queryIncidentRels`, so there is no cost for in-memory implementations. + +--- + +## Decision 10: Context Query Helpers as Derived Functions + +**Decision**: `queryAnnotationsOf`, `queryWalksContaining`, and `queryCoMembers` are plain functions in `Pattern.Graph.Algorithms`, not fields of `GraphQuery`. + +**Rationale**: They are derived from `queryContainers` + `GraphClassifier` filtering. No new interface fields are needed. Callers pay only for what they use. This is consistent with the proposal's "derived, not primitive" decision. + +--- + +## Decision 12: `endpoints` Helper in `frameQuery` (Proposal Example Clarification) + +**Decision**: The proposal's `frameQuery` example uses `filter (any include . endpoints)` to filter incident relationships. `endpoints` is **not** an existing exported function in `Pattern.Graph`. The correct implementation uses `querySource` and `queryTarget` from the `GraphQuery` being wrapped. + +**Correct `frameQuery` filter for `queryIncidentRels`**: +```haskell +queryIncidentRels = \n -> + filter (\r -> maybe False include (querySource base r) + && maybe False include (queryTarget base r)) + (queryIncidentRels base n) +``` + +This excludes any relationship where either endpoint falls outside the frame. It uses only `querySource` and `queryTarget` — both already fields of `GraphQuery` — with no need for a separate `endpoints` helper. + +**Rationale**: The proposal example was illustrative pseudocode. The actual implementation must use the `GraphQuery` interface consistently. Introducing a separate `endpoints` function would add surface area without benefit. + +**Impact on tasks**: T045 description is correct in referencing `querySource`/`queryTarget`. No task changes needed; this decision documents the implementation approach. + +--- + +## Decision 11: `TraversalWeight` Module Placement (Deviation from Proposal) + +**Decision**: `TraversalDirection`, `TraversalWeight`, and its canonical values (`undirected`, `directed`, `directedReverse`) are defined in `Pattern.Graph.GraphQuery`, not in a separate `Pattern.Graph.TraversalWeight` module. + +**Rationale**: The proposal's "What changes" table lists `Pattern.Graph.TraversalWeight` as a distinct new module. However, `TraversalWeight` is a type alias and three small values — insufficient to justify a standalone module. It is conceptually inseparable from `GraphQuery`: every `GraphQuery` consumer also needs `TraversalWeight`. Co-locating them reduces import boilerplate and is consistent with how `GraphClassifier` co-locates `GraphClass`, `GraphValue`, and `GraphClassifier` in one module. The proposal's module table is a design sketch; this plan supersedes it on module granularity. + +**Alternatives considered**: +- `Pattern.Graph.TraversalWeight` as a separate module (per proposal) — rejected because the module would be tiny (one type alias + three values) and every `GraphQuery` user would need both imports. +- Re-export from `Pattern.Graph` — possible in a future polish pass; not needed for this feature. + +**Impact on artifacts**: `pattern.cabal` exposes `Pattern.Graph.GraphQuery` (not `Pattern.Graph.TraversalWeight`). Contracts, tasks, and data-model all reflect this decision. + +--- + +## Open Questions Resolved + +| Question from Proposal | Resolution | +|------------------------|------------| +| `queryAdjacency` for bulk algorithms | Deferred to a follow-on feature (`Pattern.Graph.Algorithms.Bulk`). Not in scope for this feature. | +| `Pattern.Graph.Algorithms` module granularity | Single module for now; sub-modules deferred until algorithm surface grows. | +| Centrality algorithm tuning parameters | Exact parameters (convergence tolerance, damping factor) determined during implementation. Signatures are representative. | +| Record-of-functions performance vs. typeclass | `{-# INLINE #-}` pragmas on hot-path functions (`queryIncidentRels`, `querySource`, `queryTarget`, `queryDegree`). Benchmarking against typeclass deferred to a performance-focused follow-on. | diff --git a/specs/035-graph-query/spec.md b/specs/035-graph-query/spec.md new file mode 100644 index 0000000..eda1dd1 --- /dev/null +++ b/specs/035-graph-query/spec.md @@ -0,0 +1,144 @@ +# Feature Specification: GraphQuery — Portable, Composable Graph Query Interface + +**Feature Branch**: `035-graph-query` +**Created**: 2026-02-20 +**Status**: Draft +**Input**: User description: "GraphQuery — A Portable, Composable Graph Query Interface as described in @proposals/graph-query.md" + +## User Scenarios & Testing *(mandatory)* + +### User Story 1 - Run Graph Algorithms on Any Representation (Priority: P1) + +A developer working with a `PatternGraph` wants to run a shortest-path or connected-components query without first converting the graph to a `GraphLens`. They construct a `GraphQuery` from their `PatternGraph` and call the algorithm directly, receiving results without any intermediate conversion or data loss. + +**Why this priority**: This is the core value of the feature — decoupling algorithms from representation. Without it, the feature delivers nothing. Every other story depends on this foundation. + +**Independent Test**: Can be fully tested by constructing a `GraphQuery` from a `PatternGraph`, calling `shortestPath` or `connectedComponents`, and verifying correct results — without ever touching `GraphLens`. + +**Acceptance Scenarios**: + +1. **Given** a `PatternGraph` with nodes and relationships, **When** `fromPatternGraph` is called, **Then** a `GraphQuery` is produced that correctly enumerates nodes, relationships, incident relationships, sources, targets, and degrees. +2. **Given** a `GraphQuery` derived from a `PatternGraph`, **When** `shortestPath` is called with two nodes, **Then** the correct path (or `Nothing`) is returned. +3. **Given** a `GraphQuery` derived from a `GraphLens`, **When** `connectedComponents` is called, **Then** results match those previously returned by the `GraphLens`-specific implementation. +4. **Given** a `GraphQuery` derived from either source, **When** `bfs` or `dfs` is called with a start node, **Then** all reachable nodes are returned in the correct order. + +--- + +### User Story 2 - Control Traversal Direction and Weight at the Call Site (Priority: P2) + +A developer running a path-finding algorithm wants to treat the same graph as directed in one call and undirected in another, without creating separate graph structures. They supply a `TraversalWeight` value (`directed`, `undirected`, or a custom function) as a parameter to each algorithm call. + +**Why this priority**: Traversal policy is the primary differentiator between many graph use cases. Without `TraversalWeight`, the interface is less expressive than what already exists on `GraphLens`. + +**Independent Test**: Can be fully tested by calling `hasPath` on the same `GraphQuery` with `directed` and `undirected` weights and verifying that results differ when the graph contains one-way relationships. + +**Acceptance Scenarios**: + +1. **Given** a graph with a directed relationship A→B, **When** `hasPath` is called with `directed` weight from A to B, **Then** the result is `True`. +2. **Given** the same graph, **When** `hasPath` is called with `directed` weight from B to A, **Then** the result is `False`. +3. **Given** the same graph, **When** `hasPath` is called with `undirected` weight from B to A, **Then** the result is `True`. +4. **Given** a custom `TraversalWeight` that reads a numeric property from a relationship, **When** `shortestPath` is called, **Then** the path with the lowest total weight is returned. + +--- + +### User Story 3 - Compose Graph Views Without New Types (Priority: P3) + +A developer wants to run algorithms over a filtered subgraph — for example, only nodes with a specific label, or only relationships within a time window. They use `frameQuery` to wrap an existing `GraphQuery` with a predicate, producing a new `GraphQuery` that algorithms treat as a complete graph. + +**Why this priority**: Composability is what makes the interface extensible without new types or instances. It enables database-backed graphs, caching, and logging as well as subgraph views. + +**Independent Test**: Can be fully tested by applying `frameQuery` to a `GraphQuery`, calling `queryNodes` on the result, and verifying only matching nodes are returned; then running an algorithm and confirming it operates only within the frame. + +**Acceptance Scenarios**: + +1. **Given** a `GraphQuery` with mixed-label nodes, **When** `frameQuery` is applied with a label predicate, **Then** `queryNodes` on the result returns only matching nodes. +2. **Given** a framed `GraphQuery`, **When** `queryIncidentRels` is called for a node in the frame, **Then** only relationships whose endpoints are both within the frame are returned. +3. **Given** a `GraphQuery` wrapped with `memoizeIncidentRels`, **When** `queryIncidentRels` is called repeatedly for the same node, **Then** the underlying function is invoked only once. +4. **Given** composing `memoizeIncidentRels . frameQuery predicate $ fromPatternGraph pg`, **When** any algorithm is run, **Then** it operates correctly on the filtered, memoized view. + +--- + +### User Story 4 - Upward Context Traversal (Priority: P4) + +A developer needs to know what higher-order structures contain a given node or relationship — which walks include it, which annotations are attached to it, which containers reference it. They call `queryContainers` or the derived helpers (`queryAnnotationsOf`, `queryWalksContaining`, `queryCoMembers`) to answer these questions without precomputing a context record. + +**Why this priority**: Required by the planned GraphMutation feature for coherent deletion, and independently useful for impact analysis and pattern matching. Lower priority than core traversal because it is not needed for basic algorithm use. + +**Independent Test**: Can be fully tested by constructing a graph with annotations and walks, calling `queryContainers` on a node, and verifying all containing structures are returned. + +**Acceptance Scenarios**: + +1. **Given** a node that participates in a walk and has an annotation, **When** `queryContainers` is called, **Then** both the walk and the annotation are returned. +2. **Given** a `GraphClassifier` and a `GraphQuery`, **When** `queryAnnotationsOf` is called for a node, **Then** only annotation-classified containers are returned. +3. **Given** two nodes that share a common walk, **When** `queryCoMembers` is called with one node and the walk as container, **Then** the other node is returned. + +--- + +### User Story 5 - Backward-Compatible GraphLens Algorithms (Priority: P5) + +A developer using the existing `bfs`, `findPath`, or `connectedComponents` functions on `GraphLens` makes no changes to their code after this feature is introduced. The functions continue to work as before, now implemented as wrappers over the new algorithm module. + +**Why this priority**: Preserving backward compatibility is a constraint, not a new capability. It must hold but is not a primary user goal. + +**Independent Test**: Can be fully tested by running the existing test suite for `GraphLens` algorithms without modification and verifying all tests pass. + +**Acceptance Scenarios**: + +1. **Given** existing code calling `bfs lens startNode`, **When** the feature is deployed, **Then** the call compiles and returns the same result as before. +2. **Given** existing code calling `connectedComponents lens`, **When** the feature is deployed, **Then** results are identical to the pre-feature implementation. + +--- + +### Edge Cases + +- What happens when `queryNodeById` or `queryRelationshipById` is called with an identifier that does not exist in the graph? The result must be `Nothing`, not an error. +- What happens when `shortestPath` is called between two nodes with no path (given the supplied `TraversalWeight`)? The result must be `Nothing`. +- What happens when `topologicalSort` is called on a graph with a cycle? The result must be `Nothing` (or an explicit failure value), not an infinite loop. +- What happens when `frameQuery` produces an empty graph (no nodes match the predicate)? Algorithms must terminate correctly and return empty results. +- What happens when `queryContainers` is called on an element that belongs to no containers? An empty list is returned. +- What happens when a custom `TraversalWeight` returns infinity for all relationships from a node? That node is treated as a dead end; traversal does not proceed through it. + +## Requirements *(mandatory)* + +### Functional Requirements + +- **FR-001**: The library MUST provide a `GraphQuery v` type that abstracts graph traversal and lookup as a record of functions, independent of any specific graph representation. +- **FR-002**: `GraphQuery v` MUST include fields for: enumerating all nodes, enumerating all relationships, finding incident relationships for a node, finding the source of a relationship, finding the target of a relationship, computing the degree of a node, looking up a node by identifier, looking up a relationship by identifier, and finding all containers of an element. +- **FR-003**: The library MUST provide a `TraversalWeight v` type that encodes both traversal direction and edge weight as a function from a relationship and direction to a numeric cost. +- **FR-004**: The library MUST provide canonical `TraversalWeight` values: `undirected` (uniform cost, direction ignored), `directed` (forward only), and `directedReverse` (backward only). +- **FR-005**: The library MUST provide `fromGraphLens` to construct a `GraphQuery v` from a `GraphLens v`. +- **FR-006**: The library MUST provide `fromPatternGraph` to construct a `GraphQuery v` directly from a `PatternGraph v`, without going through `GraphLens`. +- **FR-007**: The library MUST provide a `Pattern.Graph.Algorithms` module with graph algorithms expressed as functions over `GraphQuery v` and `TraversalWeight v`, including: `bfs`, `dfs`, `shortestPath`, `hasPath`, `allPaths`, `isNeighbor`, `isConnected`, `connectedComponents`, `minimumSpanningTree`, `degreeCentrality`, and `betweennessCentrality`. `topologicalSort` and `hasCycle` MUST also be provided but do NOT accept a `TraversalWeight` parameter — they operate on the directed structure implied by relationship endpoint order (source → target). +- **FR-008**: The library MUST provide `frameQuery` to construct a filtered subgraph view as a `GraphQuery v -> GraphQuery v` transformation. +- **FR-009**: The library MUST provide `memoizeIncidentRels` as a `GraphQuery v -> GraphQuery v` transformation that caches incident relationship lookups. +- **FR-010**: The library MUST provide context query helpers `queryAnnotationsOf`, `queryWalksContaining`, and `queryCoMembers` as derived functions built on `GraphQuery` primitives. +- **FR-011**: The existing `bfs`, `findPath`, and `connectedComponents` functions on `GraphLens` MUST be retained as backward-compatible wrappers that delegate to the new `Algorithms` module with `undirected` as the default `TraversalWeight`. +- **FR-012**: `PatternGraph.toGraphLens` MUST be retired; `fromPatternGraph` supersedes it for algorithm access. *Implemented as removal of `toGraphLens` and `toGraphLensWithScope` in this feature (breaking change); migration path is `fromPatternGraph`. See research.md Decision 7.* +- **FR-013**: All traversal algorithms MUST accept a `TraversalWeight v` parameter so that direction and weight are determined at the call site, not embedded in the graph structure. + +### Key Entities + +- **`GraphQuery v`**: A record of traversal and lookup functions that abstracts over any graph representation. Parameterized by the graph value type `v`. Provides upward traversal via `queryContainers` as well as downward traversal via incident relationship and endpoint accessors. +- **`TraversalWeight v`**: A function from a relationship pattern and traversal direction to a numeric cost. Encodes both directionality and edge weight. Supplied by callers to traversal algorithms; not embedded in the graph structure. +- **`TraversalDirection`**: An enumeration (`Forward` | `Backward`) indicating which direction along a relationship is being considered during traversal. +- **Graph Algorithm**: A pure function in `Pattern.Graph.Algorithms` that accepts a `GraphQuery v` (and for traversal algorithms, a `TraversalWeight v`) and returns a result. Algorithms have no dependency on any specific graph representation. + +## Success Criteria *(mandatory)* + +### Measurable Outcomes + +- **SC-001**: All graph algorithms (`bfs`, `dfs`, `shortestPath`, `hasPath`, `allPaths`, `isNeighbor`, `isConnected`, `connectedComponents`, `topologicalSort`, `hasCycle`, `minimumSpanningTree`, `degreeCentrality`, `betweennessCentrality`) produce correct results when given a `GraphQuery` derived from either `GraphLens` or `PatternGraph`. +- **SC-002**: All existing tests for `GraphLens`-based algorithms pass without modification after the feature is introduced. +- **SC-003**: A `GraphQuery` derived from `PatternGraph` produces the same algorithm results as a `GraphQuery` derived from an equivalent `GraphLens`, for all implemented algorithms. +- **SC-004**: Calling the same traversal algorithm with `directed` and `undirected` `TraversalWeight` values on a graph containing directed relationships produces demonstrably different results, confirming traversal policy is correctly applied. +- **SC-005**: `frameQuery` correctly restricts node and relationship enumeration so that algorithms operating on a framed view never access elements outside the frame. +- **SC-006**: `queryContainers` returns all and only the higher-order structures (walks, annotations, relationships) that directly contain a given element, with no false positives or false negatives. +- **SC-007**: The `GraphQuery` interface can be implemented for a custom graph source (e.g., a hand-constructed record) and all algorithms operate correctly against it, confirming representation independence. + +## Assumptions + +- `GraphValue v` (providing identity extraction via `identify`) is a prerequisite typeclass already available in the codebase; `GraphQuery` depends on it but does not extend it. +- `GraphClassifier` (from feature 034) is available and stable before this feature is implemented, as context query helpers depend on it for category filtering. +- Performance benchmarking of the record-of-functions approach against a typeclass-based equivalent is treated as an implementation concern, not a specification requirement. The specification requires correct behavior; performance optimization (`INLINE`, `UNPACK` pragmas, bulk adjacency) is deferred to the planning phase. +- The `Pattern.Graph.Algorithms` module is initially a single module. Subdivision into sub-modules (`Algorithms.Path`, `Algorithms.Centrality`, etc.) is deferred until the algorithm surface grows large enough to warrant it. +- Centrality algorithm signatures (betweenness, closeness, PageRank) are representative of the category. Exact tuning parameters (convergence tolerance, damping factor) are determined during implementation, not specified here. diff --git a/specs/035-graph-query/tasks.md b/specs/035-graph-query/tasks.md new file mode 100644 index 0000000..5e232c0 --- /dev/null +++ b/specs/035-graph-query/tasks.md @@ -0,0 +1,273 @@ +# Tasks: GraphQuery — Portable, Composable Graph Query Interface + +**Input**: Design documents from `/specs/035-graph-query/` +**Branch**: `035-graph-query` +**Prerequisites**: plan.md ✓, spec.md ✓, research.md ✓, data-model.md ✓, contracts/ ✓, quickstart.md ✓ + +**Organization**: Tasks are grouped by user story to enable independent implementation and testing of each story. + +## Format: `[ID] [P?] [Story] Description` + +- **[P]**: Can run in parallel (different files, no dependencies) +- **[Story]**: Which user story this task belongs to (US1–US5) +- Paths are relative to `libs/pattern/` + +--- + +## Phase 1: Setup (Shared Infrastructure) + +**Purpose**: Register new modules in cabal and create empty file stubs so all phases can compile incrementally. + +- [X] T001 Add `Pattern.Graph.GraphQuery` and `Pattern.Graph.Algorithms` to `exposed-modules` in `libs/pattern/pattern.cabal` +- [X] T002 Add `Spec.Pattern.Graph.GraphQuerySpec` and `Spec.Pattern.Graph.AlgorithmsSpec` to `other-modules` in the `pattern-test` stanza of `libs/pattern/pattern.cabal` +- [X] T003 [P] Create empty module stub `libs/pattern/src/Pattern/Graph/GraphQuery.hs` with module declaration and exports list (no implementations yet) +- [X] T004 [P] Create empty module stub `libs/pattern/src/Pattern/Graph/Algorithms.hs` with module declaration and exports list (no implementations yet) +- [X] T005 [P] Create empty test file `libs/pattern/tests/Spec/Pattern/Graph/GraphQuerySpec.hs` with module declaration +- [X] T006 [P] Create empty test file `libs/pattern/tests/Spec/Pattern/Graph/AlgorithmsSpec.hs` with module declaration +- [X] T007 Verify the project builds with stubs: run `cabal build pattern` from repo root + +**Checkpoint**: Project builds with new empty modules — ready for foundational work. + +--- + +## Phase 2: Foundational (Blocking Prerequisites) + +**Purpose**: Core types and constructors that every user story depends on. MUST be complete before any user story phase begins. + +**⚠️ CRITICAL**: No user story work can begin until this phase is complete. + +- [X] T008 Implement `TraversalDirection` data type (`Forward | Backward`) with `Eq`, `Show` instances in `libs/pattern/src/Pattern/Graph/GraphQuery.hs` +- [X] T009 Implement `TraversalWeight v` type alias and the three canonical values (`undirected`, `directed`, `directedReverse`) in `libs/pattern/src/Pattern/Graph/GraphQuery.hs` +- [X] T010 Implement `GraphQuery v` record type with all nine fields and full Haddock documentation (categorical interpretation, field semantics, invariants) in `libs/pattern/src/Pattern/Graph/GraphQuery.hs` +- [X] T011 Implement `fromGraphLens :: (GraphValue v, Eq v) => GraphLens v -> GraphQuery v` in `libs/pattern/src/Pattern/Graph/GraphQuery.hs` — derive all fields from existing `Pattern.Graph` functions; `queryNodeById`/`queryRelationshipById` use O(n) scans; `queryContainers` scans relationships and walks +- [X] T012 Implement `fromPatternGraph :: (GraphValue v, Eq v) => PatternGraph extra v -> GraphQuery v` in `libs/pattern/src/Pattern/Graph/GraphQuery.hs` — read directly from `pgNodes`, `pgRelationships`, `pgWalks`, `pgAnnotations` maps; O(log n) lookups for `queryNodeById`/`queryRelationshipById` +- [X] T013 Export `fromPatternGraph` from `libs/pattern/src/Pattern/PatternGraph.hs` — NOTE: circular import prevents re-export; `fromPatternGraph` is available directly from `Pattern.Graph.GraphQuery` +- [X] T014 Remove `toGraphLens` and `toGraphLensWithScope` from `libs/pattern/src/Pattern/PatternGraph.hs`; migration path is `fromPatternGraph` (from `Pattern.PatternGraph` or `Pattern.Graph.GraphQuery`). *Implementation chose removal over deprecation; see research.md Decision 7 "Implementation deviation".* +- [X] T015 Write unit tests for `GraphQuery` construction in `libs/pattern/tests/Spec/Pattern/Graph/GraphQuerySpec.hs`: verify all nine fields return correct values for a known `PatternGraph` and a known `GraphLens` +- [X] T016 Write property test in `libs/pattern/tests/Spec/Pattern/Graph/GraphQuerySpec.hs`: `fromGraphLens` and `fromPatternGraph` on equivalent graphs produce the same `queryNodes`, `queryRelationships`, `querySource`, `queryTarget` results +- [X] T017 Write unit tests for canonical `TraversalWeight` values in `libs/pattern/tests/Spec/Pattern/Graph/GraphQuerySpec.hs`: verify `undirected` returns 1.0 for both directions; `directed` returns 1.0 forward and infinity backward; `directedReverse` is the inverse +- [X] T018 Verify the project builds and foundational tests pass: `cabal test pattern-test` + +**Checkpoint**: Foundation ready — `GraphQuery`, `TraversalWeight`, `fromGraphLens`, `fromPatternGraph` all tested. User story phases can now begin. + +--- + +## Phase 3: User Story 1 — Run Graph Algorithms on Any Representation (Priority: P1) 🎯 MVP + +**Goal**: All graph algorithms work against `GraphQuery v` derived from either `GraphLens` or `PatternGraph`, without any intermediate conversion. + +**Independent Test**: Construct a `GraphQuery` from a `PatternGraph`, call `shortestPath` and `connectedComponents`, verify correct results — without ever touching `GraphLens`. + +- [X] T019 [US1] Implement internal neighbor-derivation helper in `libs/pattern/src/Pattern/Graph/Algorithms.hs`: given a `GraphQuery v`, a `TraversalWeight v`, and a node, return all reachable neighbor nodes (filters infinite-cost edges) +- [X] T020 [US1] Implement `bfs :: Ord (Id v) => GraphQuery v -> TraversalWeight v -> Pattern v -> [Pattern v]` in `libs/pattern/src/Pattern/Graph/Algorithms.hs` +- [X] T021 [US1] Implement `dfs :: Ord (Id v) => GraphQuery v -> TraversalWeight v -> Pattern v -> [Pattern v]` in `libs/pattern/src/Pattern/Graph/Algorithms.hs` +- [X] T022 [US1] Implement `shortestPath :: Ord (Id v) => GraphQuery v -> TraversalWeight v -> Pattern v -> Pattern v -> Maybe [Pattern v]` (Dijkstra) in `libs/pattern/src/Pattern/Graph/Algorithms.hs` +- [X] T023 [US1] Implement `hasPath :: Ord (Id v) => GraphQuery v -> TraversalWeight v -> Pattern v -> Pattern v -> Bool` in `libs/pattern/src/Pattern/Graph/Algorithms.hs` +- [X] T024 [US1] Implement `allPaths :: Ord (Id v) => GraphQuery v -> TraversalWeight v -> Pattern v -> Pattern v -> [[Pattern v]]` in `libs/pattern/src/Pattern/Graph/Algorithms.hs` +- [X] T025 [US1] Implement `isNeighbor :: Eq (Id v) => GraphQuery v -> TraversalWeight v -> Pattern v -> Pattern v -> Bool` in `libs/pattern/src/Pattern/Graph/Algorithms.hs` +- [X] T026 [US1] Implement `isConnected :: Ord (Id v) => GraphQuery v -> TraversalWeight v -> Bool` in `libs/pattern/src/Pattern/Graph/Algorithms.hs` +- [X] T027 [US1] Implement `connectedComponents :: Ord (Id v) => GraphQuery v -> TraversalWeight v -> [[Pattern v]]` in `libs/pattern/src/Pattern/Graph/Algorithms.hs` +- [X] T028 [US1] Implement `topologicalSort :: Ord (Id v) => GraphQuery v -> Maybe [Pattern v]` (DFS-based; returns `Nothing` on cycle) in `libs/pattern/src/Pattern/Graph/Algorithms.hs` +- [X] T029 [US1] Implement `hasCycle :: Ord (Id v) => GraphQuery v -> Bool` in `libs/pattern/src/Pattern/Graph/Algorithms.hs` +- [X] T030 [US1] Implement `minimumSpanningTree :: Ord (Id v) => GraphQuery v -> TraversalWeight v -> [Pattern v]` (Kruskal's or Prim's) in `libs/pattern/src/Pattern/Graph/Algorithms.hs` +- [X] T031 [US1] Implement `degreeCentrality :: Ord (Id v) => GraphQuery v -> Map (Id v) Double` in `libs/pattern/src/Pattern/Graph/Algorithms.hs` +- [X] T032 [US1] Implement `betweennessCentrality :: Ord (Id v) => GraphQuery v -> TraversalWeight v -> Map (Id v) Double` (Brandes algorithm) in `libs/pattern/src/Pattern/Graph/Algorithms.hs` +- [X] T033 [US1] Write unit tests for all traversal and path algorithms in `libs/pattern/tests/Spec/Pattern/Graph/AlgorithmsSpec.hs`: test `bfs`, `dfs`, `shortestPath`, `hasPath`, `allPaths` on a known graph derived from `PatternGraph` (not `GraphLens`) +- [X] T034 [US1] Write unit tests for structural algorithms in `libs/pattern/tests/Spec/Pattern/Graph/AlgorithmsSpec.hs`: `connectedComponents`, `topologicalSort` (DAG and cyclic), `hasCycle`, `minimumSpanningTree` +- [X] T035 [US1] Write unit tests for centrality algorithms in `libs/pattern/tests/Spec/Pattern/Graph/AlgorithmsSpec.hs`: `degreeCentrality` and `betweennessCentrality` on a small known graph +- [X] T036 [US1] Write property test: `fromGraphLens` and `fromPatternGraph` on equivalent graphs produce identical `connectedComponents` results in `libs/pattern/tests/Spec/Pattern/Graph/AlgorithmsSpec.hs` +- [X] T037 [US1] Write edge case tests in `libs/pattern/tests/Spec/Pattern/Graph/AlgorithmsSpec.hs`: `shortestPath` returns `Nothing` when no path exists; `topologicalSort` returns `Nothing` on a cycle; `allPaths` on empty graph returns `[]` +- [X] T038 [US1] Add `{-# INLINE #-}` pragma to `reachableNeighbors` hot-path helper in `libs/pattern/src/Pattern/Graph/Algorithms.hs`; documented in `GraphQuery.hs` why `{-# UNPACK #-}` does not apply (all fields are function/boxed types) +- [X] T039 [US1] Run full test suite and verify all US1 tests pass: `cabal test pattern-test` + +**Checkpoint**: All graph algorithms work on `GraphQuery` from either source. US1 independently verified. + +--- + +## Phase 4: User Story 2 — Traversal Direction and Weight at the Call Site (Priority: P2) + +**Goal**: The same `GraphQuery` produces different traversal results when called with `directed` vs `undirected` `TraversalWeight`, and custom weight functions work correctly. + +**Independent Test**: Call `hasPath` on the same `GraphQuery` with `directed` and `undirected` weights on a graph with one-way relationships; verify results differ. + +- [X] T040 [US2] Write directed/undirected differentiation tests in `libs/pattern/tests/Spec/Pattern/Graph/AlgorithmsSpec.hs`: graph with A→B relationship; `hasPath directed A B = True`, `hasPath directed B A = False`, `hasPath undirected B A = True` +- [X] T041 [US2] Write custom `TraversalWeight` test in `libs/pattern/tests/Spec/Pattern/Graph/AlgorithmsSpec.hs`: define a weight function reading a numeric property; verify `shortestPath` returns the minimum-weight path, not just the shortest-hop path +- [X] T042 [US2] Write property test in `libs/pattern/tests/Spec/Pattern/Graph/AlgorithmsSpec.hs`: for any graph with at least one directed relationship, `connectedComponents undirected` has ≤ components than `connectedComponents directed` +- [X] T043 [US2] Write `directedReverse` test: `hasPath directedReverse B A = True` on the same A→B graph +- [X] T044 [US2] Run US2 tests: `cabal test pattern-test` — all TraversalWeight differentiation tests pass + +**Checkpoint**: Traversal policy is confirmed to be call-site controlled. US2 independently verified. + +--- + +## Phase 5: User Story 3 — Compose Graph Views Without New Types (Priority: P3) + +**Goal**: `frameQuery` and `memoizeIncidentRels` produce correct `GraphQuery` values that algorithms treat as complete graphs. + +**Independent Test**: Apply `frameQuery` with a predicate, call `queryNodes` on the result, verify only matching nodes are returned; run an algorithm and confirm it operates only within the frame. + +- [X] T045 [US3] Implement `frameQuery :: (Pattern v -> Bool) -> GraphQuery v -> GraphQuery v` in `libs/pattern/src/Pattern/Graph/GraphQuery.hs` +- [X] T046 [US3] Implement `memoizeIncidentRels :: Ord (Id v) => GraphQuery v -> GraphQuery v` in `libs/pattern/src/Pattern/Graph/GraphQuery.hs` +- [X] T047 [US3] Write unit tests for `frameQuery` in `libs/pattern/tests/Spec/Pattern/Graph/GraphQuerySpec.hs` +- [X] T048 [US3] Write unit test: `frameQuery` producing an empty graph — algorithms terminate and return empty results +- [X] T049 [US3] Write unit test for `memoizeIncidentRels` in `libs/pattern/tests/Spec/Pattern/Graph/GraphQuerySpec.hs` +- [X] T050 [US3] Write composition test in `libs/pattern/tests/Spec/Pattern/Graph/GraphQuerySpec.hs` +- [X] T051 [US3] Write property test: `frameQuery` preserves all `GraphQuery` invariants +- [X] T052 [US3] Run US3 tests: `cabal test pattern-test` — all frameQuery and memoizeIncidentRels tests pass + +**Checkpoint**: Composability combinators work correctly. US3 independently verified. + +--- + +## Phase 6: User Story 4 — Upward Context Traversal (Priority: P4) + +**Goal**: `queryContainers` and the derived context helpers correctly identify all containing structures for any element. + +**Independent Test**: Construct a graph with annotations and walks, call `queryContainers` on a node, verify all containing structures are returned. + +- [X] T053 [US4] Implement `queryAnnotationsOf :: GraphClassifier extra v -> GraphQuery v -> Pattern v -> [Pattern v]` in `libs/pattern/src/Pattern/Graph/Algorithms.hs` +- [X] T054 [US4] Implement `queryWalksContaining :: GraphClassifier extra v -> GraphQuery v -> Pattern v -> [Pattern v]` in `libs/pattern/src/Pattern/Graph/Algorithms.hs` +- [X] T055 [US4] Implement `queryCoMembers :: GraphQuery v -> Pattern v -> Pattern v -> [Pattern v]` in `libs/pattern/src/Pattern/Graph/Algorithms.hs` +- [X] T056 [US4] Write unit tests for `queryContainers` in `libs/pattern/tests/Spec/Pattern/Graph/GraphQuerySpec.hs` +- [X] T057 [US4] Write unit tests for `queryAnnotationsOf` and `queryWalksContaining` in `libs/pattern/tests/Spec/Pattern/Graph/AlgorithmsSpec.hs` +- [X] T058 [US4] Write unit test for `queryCoMembers` in `libs/pattern/tests/Spec/Pattern/Graph/AlgorithmsSpec.hs` +- [X] T059 [US4] Run US4 tests: `cabal test pattern-test` — all context query helper tests pass + +**Checkpoint**: Upward context traversal works correctly. US4 independently verified. + +--- + +## Phase 7: User Story 5 — Backward-Compatible GraphLens Algorithms (Priority: P5) + +**Goal**: Existing `bfs`, `findPath`, `connectedComponents` on `GraphLens` continue to work unchanged, now as wrappers over `Pattern.Graph.Algorithms`. + +**Independent Test**: Run the existing `GraphSpec` test suite without modification; all tests pass. + +- [X] T060 [US5] `bfs`, `findPath`, `connectedComponents` in `Pattern.Graph` retain their original implementations — wrapping via `Pattern.Graph.Algorithms` is architecturally blocked by a module cycle (`Graph` → `GraphQuery` → `PatternGraph` → `Graph`). `fromGraphLens` was moved to `Pattern.Graph.GraphQuery` (imports `Pattern.Graph`); `fromPatternGraph` was moved to `Pattern.PatternGraph` (imports `Pattern.Graph.GraphQuery`). The cycle is broken; backward-compatible functions remain in `Pattern.Graph`. +- [X] T061 [US5] (see T060 — wrapper approach blocked by module cycle; original implementation retained) +- [X] T062 [US5] (see T060 — wrapper approach blocked by module cycle; original implementation retained) +- [X] T063 [US5] Internal helpers (`bfsHelper`, `findPathHelper`, `findComponents`) retained as they back the public API +- [X] T064 [US5] Existing `Pattern.Graph` tests pass without modification — backward compatibility confirmed +- [X] T065 [US5] Full test suite passes: 786 examples, 0 failures + +**Checkpoint**: All five user stories complete. Full test suite passes. + +--- + +## Phase 8: Polish & Cross-Cutting Concerns + +**Purpose**: Documentation, Haddock coverage, cabal export hygiene, and quickstart validation. + +- [X] T066 [P] Haddock module-level documentation in `Pattern.Graph.GraphQuery.hs`: categorical interpretation, design principles, updated usage example +- [X] T067 [P] Haddock module-level documentation in `Pattern.Graph.Algorithms.hs`: all public functions documented with purpose, complexity, and examples +- [X] T068 [P] All public functions in `Pattern.Graph.GraphQuery` and `Pattern.Graph.Algorithms` have Haddock comments +- [X] T069 Updated `libs/pattern/src/Pattern.hs` to re-export `Pattern.Graph.GraphQuery`; `Pattern.Graph.Algorithms` not re-exported (algorithm names would conflict with `Pattern.Graph` names) +- [X] T070 `pattern.cabal` `exposed-modules` verified complete: `Pattern.Graph.GraphQuery`, `Pattern.Graph.Algorithms`, `Pattern.PatternGraph` all present +- [X] T071 Quickstart smoke test: `fromPatternGraph` moved to `Pattern.PatternGraph`; quickstart.md updated to reflect correct import paths; all examples verified against passing tests +- [X] T073b Representation-independence test written and passing: hand-built `GraphQuery` over A→B→C; `bfs`, `shortestPath`, `connectedComponents` all produce correct results (SC-007) +- [X] T072 `cabal build all` and `cabal test pattern-test` pass: 789 examples, 0 failures +- [ ] T073 Commit all changes on branch `035-graph-query` with a checkpoint message + +--- + +## Dependencies & Execution Order + +### Phase Dependencies + +- **Phase 1 (Setup)**: No dependencies — start immediately +- **Phase 2 (Foundational)**: Depends on Phase 1 — BLOCKS all user story phases +- **Phase 3 (US1)**: Depends on Phase 2 — core algorithms; no other story dependency +- **Phase 4 (US2)**: Depends on Phase 2 — traversal weight tests extend Phase 3 algorithms; can run in parallel with Phase 3 once Phase 2 is done +- **Phase 5 (US3)**: Depends on Phase 2 — combinators; can run in parallel with Phase 3/4 +- **Phase 6 (US4)**: Depends on Phase 2 — context helpers; can run in parallel with Phase 3/4/5 +- **Phase 7 (US5)**: Depends on Phase 3 (algorithms must exist before wrappers delegate to them) +- **Phase 8 (Polish)**: Depends on all user story phases + +### User Story Dependencies + +- **US1 (P1)**: After Phase 2 only — no story dependencies +- **US2 (P2)**: After Phase 2 only — tests extend US1 algorithms but US2 is independently testable +- **US3 (P3)**: After Phase 2 only — combinators are independent of algorithms +- **US4 (P4)**: After Phase 2 only — context helpers are independent of traversal algorithms +- **US5 (P5)**: After Phase 3 (US1) — wrappers delegate to `Algorithms` module + +### Within Each Phase + +- Tasks within a phase run top-to-bottom unless marked `[P]` +- `[P]` tasks within the same phase can run in parallel (different files) +- Tests run after the implementation tasks they cover + +### Parallel Opportunities + +- T003, T004, T005, T006 (Phase 1) — all parallel, different files +- T008–T014 (Phase 2) — T008/T009 first, then T010; T011/T012 after T010; T013/T014 after T012 +- T020–T032 (Phase 3 implementations) — T019 first (helper), then T020–T032 are largely parallel (different functions in same file; coordinate on imports) +- T033–T037 (Phase 3 tests) — all parallel after implementations +- T040–T043 (Phase 4) — all parallel +- T045–T046 (Phase 5 implementations) — parallel (different functions) +- T047–T051 (Phase 5 tests) — parallel after implementations +- T053–T055 (Phase 6 implementations) — parallel +- T056–T058 (Phase 6 tests) — parallel after implementations +- T066–T068 (Phase 8) — all parallel + +--- + +## Parallel Example: Phase 3 (US1) + +``` +# After T019 (neighbor helper), launch algorithm implementations in parallel: +Task T020: bfs in libs/pattern/src/Pattern/Graph/Algorithms.hs +Task T021: dfs in libs/pattern/src/Pattern/Graph/Algorithms.hs +Task T022: shortestPath in libs/pattern/src/Pattern/Graph/Algorithms.hs +Task T023: hasPath in libs/pattern/src/Pattern/Graph/Algorithms.hs +Task T028: topologicalSort in libs/pattern/src/Pattern/Graph/Algorithms.hs +Task T029: hasCycle in libs/pattern/src/Pattern/Graph/Algorithms.hs +Task T031: degreeCentrality in libs/pattern/src/Pattern/Graph/Algorithms.hs + +# After implementations, launch tests in parallel: +Task T033: traversal/path tests in AlgorithmsSpec.hs +Task T034: structural algorithm tests in AlgorithmsSpec.hs +Task T035: centrality tests in AlgorithmsSpec.hs +``` + +--- + +## Implementation Strategy + +### MVP First (User Story 1 Only) + +1. Complete Phase 1: Setup (~7 tasks) +2. Complete Phase 2: Foundational (~11 tasks, including tests) +3. Complete Phase 3: US1 — all algorithms (~21 tasks) +4. **STOP and VALIDATE**: `cabal test pattern-test` — all algorithms work on `PatternGraph` directly +5. Demo: construct `PatternGraph`, call `shortestPath`, `connectedComponents` — no `GraphLens` needed + +### Incremental Delivery + +1. Phase 1 + 2 → Foundation ready (types, constructors, tests) +2. Phase 3 → US1 complete: all algorithms work on any `GraphQuery` (MVP) +3. Phase 4 → US2 complete: traversal policy confirmed call-site controlled +4. Phase 5 → US3 complete: composability (framing, memoization) working +5. Phase 6 → US4 complete: upward context traversal working +6. Phase 7 → US5 complete: backward compatibility confirmed +7. Phase 8 → Polish, docs, final validation + +### Parallel Team Strategy + +With multiple developers after Phase 2 is complete: + +- Developer A: Phase 3 (US1 — algorithms) +- Developer B: Phase 5 (US3 — combinators, independent of algorithms) +- Developer C: Phase 6 (US4 — context helpers, independent of algorithms) +- Phase 4 (US2) and Phase 7 (US5) follow naturally after Phase 3 + +--- + +## Notes + +- `[P]` tasks operate on different files or different functions within a file — coordinate on imports +- Each user story phase is independently completable and testable via `cabal test pattern-test` +- T014 implemented as removal of `toGraphLens` and `toGraphLensWithScope` (breaking change); callers must migrate to `fromPatternGraph`. See research.md Decision 7. +- The `memoizeIncidentRels` implementation (T046) should use a pure approach (pre-build a `Map (Id v) [Pattern v]` from `queryNodes`) rather than `IORef` to stay in pure Haskell +- Constitution requires property-based tests (QuickCheck) for mathematical properties — T016, T036, T042, T051 are the key property tests +- Commit after each phase checkpoint (constitution: frequent checkpoint commits)