From 9ba6761459d88ecff6f3bdf09c49583d8b16ac99 Mon Sep 17 00:00:00 2001 From: fr33domlover Date: Mon, 16 May 2016 14:02:43 +0000 Subject: [PATCH] Darcs inventory parser: Add hash types and parse previous inventory --- src/Darcs/Local/PatchInfo/Parser.hs | 182 +++++++++++++++------------- src/Darcs/Local/PatchInfo/Types.hs | 71 +++++++++-- src/Vervis/Darcs.hs | 2 +- vervis.cabal | 2 + 4 files changed, 164 insertions(+), 93 deletions(-) diff --git a/src/Darcs/Local/PatchInfo/Parser.hs b/src/Darcs/Local/PatchInfo/Parser.hs index 3811664..41645b8 100644 --- a/src/Darcs/Local/PatchInfo/Parser.hs +++ b/src/Darcs/Local/PatchInfo/Parser.hs @@ -27,12 +27,12 @@ module Darcs.Local.PatchInfo.Parser ) where -import Prelude hiding (takeWhile) +import Prelude hiding (take, takeWhile) -import Control.Applicative (many) -import Control.Monad (replicateM, replicateM_) +import Control.Applicative (many, optional, liftA2) +import Control.Arrow (second) +import Control.Monad (replicateM_) import Crypto.Hash -import Crypto.Hash.Algorithms (SHA1 (SHA1)) import Data.Attoparsec.ByteString import Data.ByteArray (convert) import Data.ByteString (ByteString) @@ -43,7 +43,8 @@ import System.FilePath (()) import qualified Data.ByteString as B import qualified Data.ByteString.Char8 as BC -import qualified Storage.Hashed.Hash as H +import qualified Data.ByteString.Base16 as B16 +import qualified Data.ByteString.Lex.Integral as BX import Control.Applicative.Local import Darcs.Local.PatchInfo.Types @@ -51,15 +52,22 @@ import Data.Attoparsec.ByteString.Local import Data.ByteString.Local (stripPrefix) import Data.Text.UTF8.Local (decodeStrict) -lf, space, star, dash, zero, nine, sqrOpen, sqrClose :: Word8 +lf :: Word8 lf = 10 +space :: Word8 space = 32 +star :: Word8 star = 42 +dash :: Word8 dash = 45 +zero :: Word8 zero = 48 +nine :: Word8 nine = 57 +sqrOpen :: Word8 sqrOpen = 91 -sqrClose = 93 +--sqrClose :: Word8 +--sqrClose = 93 digit :: Parser Word8 digit = satisfy $ \ w -> zero <= w && w <= nine @@ -87,67 +95,92 @@ patchTimeP = do month <- decimal2P day <- decimal2P - hour <- decimal2P - minute <- decimal2P - second <- decimal2P + hours <- decimal2P + minutes <- decimal2P + seconds <- decimal2P case fromGregorianValid year month day of Nothing -> fail "Invalid patch date" Just uday -> return UTCTime { utctDay = uday , utctDayTime = - secondsToDiffTime $ 3600 * hour + 60 * minute + second + secondsToDiffTime $ 3600 * hours + 60 * minutes + seconds } +line :: Parser ByteString +line = restOfLine + +restOfLine :: Parser ByteString +restOfLine = takeWhile (/= lf) + +eol :: Parser () +eol = skip (== lf) + skipLine :: Parser () -skipLine = do - skipWhile (/= lf) - skip (== lf) +skipLine = skipWhile (/= lf) skipRestOfLine :: Parser () skipRestOfLine = skipLine skipPatchP :: Parser () -skipPatchP = do +skipPatchP = -- title - skipLine + skipLine *> eol *> -- author, inverted, time - skipLine + skipLine *> eol *> -- ignore, description - skipMany $ do - skip (== space) - skipRestOfLine + (skipMany $ skip (== space) *> skipRestOfLine *> eol) *> -- end of info - string "] \n" + (string "] \n") *> -- hash - skipWhile (/= lf) + skipLine -pristineP :: Parser (Maybe Int, H.Hash) -pristineP = do - string "pristine:" - (,) Nothing . H.decodeBase16 <$> takeWhile (/= lf) +sha256P :: Parser ByteString +sha256P = do + bs <- take 64 + case second B.null $ B16.decode bs of + (h, True) -> return h + _ -> fail "SHA256 decoding from hex failed" + +sizeP :: Parser Int +sizeP = do + bs <- take 10 + case second B.null <$> BX.readDecimal bs of + Just (n, True) -> return n + _ -> fail "sizeP failed" + +sizeSha256P :: Parser (Int, ByteString) +sizeSha256P = liftA2 (,) sizeP (skip (== dash) *> sha256P) + +pristineP :: Parser PristineHash +pristineP = string "pristine:" *> (PristineHash <$> sha256P) + +prevInvP :: Parser InventoryHash +prevInvP = + string "Starting with inventory" *> eol *> + (uncurry InventoryHash <$> sizeSha256P) patchInfoRawP :: Parser PatchInfoRaw patchInfoRawP = do word8 sqrOpen title <- takeWhile1 (/= lf) - word8 lf + eol author <- takeWhile1 (/= star) word8 star inverted <- (/= star) <$> (satisfy $ \ c -> c == star || c == dash) (timeRaw, time) <- match patchTimeP - word8 lf + eol word8 space junkp <- string "Ignore-this: " junkc <- takeWhile1 (/= lf) - word8 lf + eol - lines <- many $ word8 space *> takeWhile (/= lf) <* word8 lf + lines <- many $ word8 space *> takeWhile (/= lf) <* eol string "] \nhash: " - hash <- takeWhile1 (/= lf) + hash <- sizeSha256P return PatchInfoRaw { pirAuthor = author @@ -185,7 +218,7 @@ refinePatchInfo pir = l -> Just $ BC.unlines l in PatchInfo { piAuthor = decodeStrict $ pirAuthor pir - , piHash = pirHash pir + , piHash = uncurry ContentHash $ pirHash pir , piTitle = decodeStrict title , piDescription = decodeStrict <$> description , piTag = tag @@ -194,71 +227,54 @@ refinePatchInfo pir = -- | Parse patch metadata and compute the metadata's hash, which can be used as -- a patch identifier for lookup and matching. -patchInfoP :: Parser (PatchInfo, ByteString) +patchInfoP :: Parser (PatchInfo, PatchHash) patchInfoP = do pir <- patchInfoRawP - return (refinePatchInfo pir, convert $ hashPatchInfo SHA1 pir) - --- NEXT current plan: --- --- (0) all parses are incremental!!! --- (1) use CountP below to determine number of patches --- (2) given pagination details determine offset and limit --- (3) parse the patches we want --- (4) reverse their order... --- (5) generate a whamlet widget from that --- (*) FOR NOW no pagination, just read the entire thing once... + return (refinePatchInfo pir, PatchHash $ convert $ hashPatchInfo SHA1 pir) patchInfosCountP :: Parser Int patchInfosCountP = do - skipWhile (/= lf) -- pristine hash - n <- length <$> (many $ word8 lf >> skipPatchP) - word8 lf + -- pristine hash + skipLine + -- previous inventory + optional $ + eol *> string "Starting" *> skipRestOfLine *> eol *> + skipLine + -- patch info + n <- length <$> (many $ eol *> skipPatchP) + eol return n patchInfosAllP :: Parser PatchSeq -patchInfosAllP = do - (psize, phash) <- pristineP - ps <- many $ word8 lf >> patchInfoP - word8 lf - return PatchSeq - { psPristineHash = phash - , psPristineSize = psize - , psPatches = ps - } +patchInfosAllP = PatchSeq + <$> pristineP + <*> optional (eol *> prevInvP) + <*> many (eol *> patchInfoP) + <* eol patchInfosOffsetP :: Int -> Parser PatchSeq -patchInfosOffsetP off = do - (psize, phash) <- pristineP - replicateM_ off $ word8 lf >> skipPatchP - ps <- many $ word8 lf >> patchInfoP - word8 lf - return PatchSeq - { psPristineHash = phash - , psPristineSize = psize - , psPatches = ps - } +patchInfosOffsetP off = PatchSeq + <$> pristineP + <*> optional (eol *> prevInvP) + <*> ( replicateM_ off (eol *> skipPatchP) *> + many (eol *> patchInfoP) + ) + <* eol + patchInfosLimitP :: Int -> Parser PatchSeq -patchInfosLimitP lim = do - (psize, phash) <- pristineP - ps <- atMost lim $ word8 lf >> patchInfoP - return PatchSeq - { psPristineHash = phash - , psPristineSize = psize - , psPatches = ps - } +patchInfosLimitP lim = PatchSeq + <$> pristineP + <*> optional (eol *> prevInvP) + <*> atMost lim (eol *> patchInfoP) patchInfosOffsetLimitP :: Int -> Int -> Parser PatchSeq -patchInfosOffsetLimitP off lim = do - (psize, phash) <- pristineP - replicateM_ off $ word8 lf >> skipPatchP - ps <- atMost lim $ word8 lf >> patchInfoP - return PatchSeq - { psPristineHash = phash - , psPristineSize = psize - , psPatches = ps - } +patchInfosOffsetLimitP off lim = PatchSeq + <$> pristineP + <*> optional (eol *> prevInvP) + <*> ( replicateM_ off (eol *> skipPatchP) *> + atMost lim (eol *> patchInfoP) + ) darcsDir :: FilePath darcsDir = "_darcs" diff --git a/src/Darcs/Local/PatchInfo/Types.hs b/src/Darcs/Local/PatchInfo/Types.hs index f3779ba..e9291f4 100644 --- a/src/Darcs/Local/PatchInfo/Types.hs +++ b/src/Darcs/Local/PatchInfo/Types.hs @@ -15,23 +15,54 @@ module Darcs.Local.PatchInfo.Types ( PatchInfoRaw (..) + , PatchHash (..) + , ContentHash (..) + , InventoryHash (..) + , PristineHash (..) , PatchInfo (..) , PatchSeq (..) ) where +-- TODO +-- +-- Apparently, after a while, some of the patches are moved from +-- hashed_inventory into the inventories/ dir. So the patch set contains more +-- than one group. This means I need to extend my parser to cover this case. +-- Sources for info about this thing: +-- +-- * Darcs source code +-- * Darcs wiki +-- * Local Darcs repos I have +-- +-- From Darcs source code: +-- +-- > The patches in a repository are stored in chunks broken up at \"clean\" +-- > tags. A tag is clean if the only patches before it in the current +-- > repository ordering are ones that the tag depends on (either directly +-- > or indirectly). Each chunk is stored in a separate inventory file on disk. +-- > +-- > A 'PatchSet' represents a repo's history as the list of patches since the +-- > last clean tag, and then a list of patch lists each delimited by clean tags. +-- > +-- > A 'Tagged' is a single chunk of a 'PatchSet'. It has a 'PatchInfo' +-- > representing a clean tag, the hash of the previous inventory (if it exists), +-- > and the list of patches since that previous inventory. +-- +-- Let's start with finding out the format of the binary inventories and +-- parsing them. + import Prelude import Data.ByteString (ByteString) import Data.Text (Text) import Data.Time.Clock (UTCTime) -import Storage.Hashed.Hash (Hash) -- | Patch metadata in raw form. This is intended for accurate hashing of the -- patch info. data PatchInfoRaw = PatchInfoRaw { pirAuthor :: ByteString - , pirHash :: ByteString + , pirHash :: (Int, ByteString) , pirTitle :: ByteString , pirDescription :: [ByteString] , pirJunkPrefix :: ByteString @@ -40,14 +71,36 @@ data PatchInfoRaw = PatchInfoRaw , pirInverted :: Bool } +-- | A SHA1 hash of the patch info (author, title, description including junk, +-- timestamp). The hash is in binary form, not hex, i.e. its size is always 20 +-- bytes. +newtype PatchHash = PatchHash { unPatchHash :: ByteString } + +-- | Content size and SHA256 hash of a patch's info and content. The hash is in +-- binary form, not hex, i.e. its size is always 32 bytes. +data ContentHash = ContentHash + { chSize :: Int + , chHash :: ByteString + } + +-- | Content size and SHA256 hash of an inventory (a patch set in a single +-- invetory file). The hash is in binary form, not hex, i.e. its size is always +-- 32 bytes. +data InventoryHash = InventoryHash + { ihSize :: Int + , ihHash :: ByteString + } + +-- | A SHA256 hash of the entire recorded state of the repo. The hash is in +-- binary form, not hex, i.e. its size is always 32 bytes. +newtype PristineHash = PristineHash { unPristineHash :: ByteString } + -- | Patch metadata read from the inventory file. data PatchInfo = PatchInfo { -- | Author name and email piAuthor :: Text - -- | Currently this is the patch hash in textual form. Possibly I'll - -- change to binary form when I find out the encoding scheme of the hash - -- string. - , piHash :: ByteString + -- | Patch content hash + , piHash :: ContentHash -- | Single message line , piTitle :: Text -- | Optional description, may contain several lines @@ -60,7 +113,7 @@ data PatchInfo = PatchInfo -- | The information from the hashed inventory file. data PatchSeq = PatchSeq - { psPristineHash :: Hash - , psPristineSize :: Maybe Int - , psPatches :: [(PatchInfo, ByteString)] + { psPristineHash :: PristineHash + , psPrevious :: Maybe InventoryHash + , psPatches :: [(PatchInfo, PatchHash)] } diff --git a/src/Vervis/Darcs.hs b/src/Vervis/Darcs.hs index e8ed775..9633007 100644 --- a/src/Vervis/Darcs.hs +++ b/src/Vervis/Darcs.hs @@ -132,7 +132,7 @@ readChangesView path off lim = fmap maybeRight $ runExceptT $ do let toLE pi h = LogEntry { leAuthor = T.stripEnd $ T.takeWhile (/= '<') $ piAuthor pi - , leHash = decodeStrict $ B16.encode h + , leHash = decodeStrict $ B16.encode $ unPatchHash h , leMessage = piTitle pi , leTime = intervalToEventTime $ diff --git a/vervis.cabal b/vervis.cabal index 59aa614..649c1bf 100644 --- a/vervis.cabal +++ b/vervis.cabal @@ -134,6 +134,8 @@ library , blaze-markup , byteable , bytestring + -- for Darcs.Local.PatchInfo.Parser + , bytestring-lexing , case-insensitive , classy-prelude , classy-prelude-conduit