From 461283ba6e510645923746b640b73d09d6e4d776 Mon Sep 17 00:00:00 2001 From: fr33domlover Date: Wed, 20 Jul 2016 11:24:01 +0000 Subject: [PATCH] Simpler path selection in SQL transitive reduction The transitive reduction query works by removing all the edges which aren't the only paths between their nodes, i.e. longer paths exist. The first step is to pick all the paths which include 2 or more edges. The initial code did that appending in-edges to all paths, which results with unnecessary duplicates and an INNER JOIN. Now, instead, just pick all the paths with length of more than 3 nodes. This is hopefully not just simpler, but also faster. --- .../Persist/Sql/Graph/TransitiveReduction.hs | 56 +++++++++---------- 1 file changed, 26 insertions(+), 30 deletions(-) diff --git a/src/Database/Persist/Sql/Graph/TransitiveReduction.hs b/src/Database/Persist/Sql/Graph/TransitiveReduction.hs index 2b26df8..da34a30 100644 --- a/src/Database/Persist/Sql/Graph/TransitiveReduction.hs +++ b/src/Database/Persist/Sql/Graph/TransitiveReduction.hs @@ -58,11 +58,12 @@ import Database.Persist.Local.Sql.Orphan.Common -- > -- > EXCEPT -- > --- > SELECT e.* --- > FROM edge AS pre --- > INNER JOIN temp ON pre.dest = temp.path[1] --- > INNER JOIN edge AS e ON e.source = pre.source AND e.dest = temp.id --- > WHERE NOT temp.cycle +-- > SELECT edge.* +-- > FROM edge INNER JOIN temp +-- > ON edge.source = temp.path[1] AND +-- > edge.dest = temp.id +-- > WHERE array_length(temp.path, 1) > 2 AND +-- > NOT temp.cycle trrSelect :: ( MonadIO m , PersistEntityGraph node edge @@ -81,8 +82,6 @@ trrSelect proxy = do tid = DBName "id" tpath = DBName "path" tcycle = DBName "cycle" - edgeP = DBName "pre" - edgeE = DBName "e" dbname = connEscapeName conn ecols = T.intercalate ", " $ entityColumnNames tEdge conn qecols name = @@ -121,14 +120,13 @@ trrSelect proxy = do , " SELECT ", ecols , " FROM ", dbname $ entityDB tEdge , " EXCEPT " - , " SELECT ", qecols edgeE - , " FROM ", dbname $ entityDB tEdge, " AS ", dbname edgeP - , " INNER JOIN ", dbname temp - , " ON ", edgeP ^* fieldDB fwd, " = ", temp ^* tpath, "[1]" - , " INNER JOIN ", dbname $ entityDB tEdge, " AS ", dbname edgeE - , " ON ", edgeE ^* fieldDB bwd, " = ", edgeP ^* fieldDB bwd - , " AND ", edgeE ^* fieldDB fwd, " = ", temp ^* tid - , " WHERE NOT ", temp ^* tcycle + , " SELECT ", qecols $ entityDB tEdge + , " FROM ", entityDB tEdge <#> temp + , " ON " + , entityDB tEdge ^* fieldDB bwd, " = ", temp ^* tpath, "[1] AND " + , entityDB tEdge ^* fieldDB fwd, " = ", temp ^* tid + , " WHERE array_length(", temp ^* tpath, ", 1) > 2 AND NOT " + , temp ^* tcycle ] rawSql sql [] @@ -148,11 +146,12 @@ trrSelect proxy = do -- > ) -- > DELETE FROM edge -- > WHERE id IN ( --- > SELECT e.id --- > FROM edge AS pre --- > INNER JOIN temp ON pre.dest = temp.path[1] --- > INNER JOIN edge AS e ON e.source = pre.source AND e.dest = temp.id --- > WHERE NOT temp.cycle +-- > SELECT edge.id +-- > FROM edge INNER JOIN temp +-- > ON edge.source = temp.path[1] AND +-- > edge.dest = temp.id +-- > WHERE array_length(temp.path, 1) > 2 AND +-- > NOT temp.cycle -- > ) trrApply :: ( MonadIO m @@ -172,8 +171,6 @@ trrApply proxy = do tid = DBName "id" tpath = DBName "path" tcycle = DBName "cycle" - edgeP = DBName "pre" - edgeE = DBName "e" dbname = connEscapeName conn t ^* f = dbname t <> "." <> dbname f t <#> s = dbname t <> " INNER JOIN " <> dbname s @@ -205,14 +202,13 @@ trrApply proxy = do , sqlStep fwd bwd , " ) DELETE FROM ", dbname $ entityDB tEdge , " WHERE ", entityDB tEdge ^* fieldDB (entityId tEdge), " IN (" - , " SELECT ", edgeE ^* fieldDB (entityId tEdge) - , " FROM ", dbname $ entityDB tEdge, " AS ", dbname edgeP - , " INNER JOIN ", dbname temp - , " ON ", edgeP ^* fieldDB fwd, " = ", temp ^* tpath, "[1]" - , " INNER JOIN ", dbname $ entityDB tEdge, " AS ", dbname edgeE - , " ON ", edgeE ^* fieldDB bwd, " = ", edgeP ^* fieldDB bwd - , " AND ", edgeE ^* fieldDB fwd, " = ", temp ^* tid - , " WHERE NOT ", temp ^* tcycle + , " SELECT ", entityDB tEdge ^* fieldDB (entityId tEdge) + , " FROM ", entityDB tEdge <#> temp + , " ON " + , entityDB tEdge ^* fieldDB bwd, " = ", temp ^* tpath + , "[1] AND ", entityDB tEdge ^* fieldDB fwd, " = ", temp ^* tid + , " WHERE array_length(", temp ^* tpath, ", 1) > 2 AND NOT " + , temp ^* tcycle , " )" ] rawExecuteCount sql []