From a0945bfd8717abc97f63a26d2ed0859114728fdd Mon Sep 17 00:00:00 2001 From: fr33domlover Date: Sun, 17 Apr 2016 17:55:23 +0000 Subject: [PATCH] Use Pandoc for document rendering, for now just Markdown --- src/Text/FilePath/Local.hs | 45 +++++++ src/Vervis/Handler/Repo.hs | 14 ++- src/Vervis/MediaType.hs | 121 +++++++++++++++++++ src/Vervis/Readme.hs | 9 +- src/Vervis/Render.hs | 237 ++++++++++++++++++++++++++++++++++--- vervis.cabal | 6 + 6 files changed, 411 insertions(+), 21 deletions(-) create mode 100644 src/Text/FilePath/Local.hs create mode 100644 src/Vervis/MediaType.hs diff --git a/src/Text/FilePath/Local.hs b/src/Text/FilePath/Local.hs new file mode 100644 index 0000000..ea15c8f --- /dev/null +++ b/src/Text/FilePath/Local.hs @@ -0,0 +1,45 @@ +{- This file is part of Vervis. + - + - Written in 2016 by fr33domlover . + - + - ♡ Copying is an act of love. Please copy, reuse and share. + - + - The author(s) have dedicated all copyright and related and neighboring + - rights to this software to the public domain worldwide. This software is + - distributed without any warranty. + - + - You should have received a copy of the CC0 Public Domain Dedication along + - with this software. If not, see + - . + -} + +-- | File path utilities for 'Data.Text.Text'. +module Text.FilePath.Local + ( -- * Types + FileName + , FileBaseName + , FileExtension + -- * Functions + , breakExt + ) +where + +import Prelude + +import Control.Arrow ((***)) +import Data.Text (Text) +import System.FilePath + +import qualified Data.Text as T + +type FileName = Text + +type FileBaseName = Text + +type FileExtension = Text + +breakExt :: FileName -> (FileBaseName, FileExtension) +breakExt name = + case id *** T.uncons $ T.break isExtSeparator name of + (_, Nothing) -> (name, T.empty) + (p, Just (_, r)) -> (p, r) diff --git a/src/Vervis/Handler/Repo.hs b/src/Vervis/Handler/Repo.hs index 79ed110..0889f2b 100644 --- a/src/Vervis/Handler/Repo.hs +++ b/src/Vervis/Handler/Repo.hs @@ -64,10 +64,12 @@ import qualified Data.Set as S (member) import qualified Data.Text.Lazy.Encoding as L (decodeUtf8With) import Data.ByteString.Char8.Local (takeLine) +import Text.FilePath.Local (breakExt) import Vervis.Form.Repo import Vervis.Foundation import Vervis.Git (timeAgo') import Vervis.Path +import Vervis.MediaType (chooseMediaType) import Vervis.Model import Vervis.Readme import Vervis.Render @@ -147,8 +149,8 @@ getRepoSource repository user repo ref dir = do Nothing -> return Nothing Just tree -> do let dir' = map (entName . encodeUtf8) dir - mRootOid <- resolveTreePath git tree dir' - target <- case mRootOid of + mTargetOid <- resolveTreePath git tree dir' + target <- case mTargetOid of Nothing -> return $ Right tree Just oid -> do obj <- getObject_ git (unObjId oid) True @@ -164,7 +166,7 @@ getRepoSource repository user repo ref dir = do let r = case mreadme of Nothing -> Nothing Just (t, b) -> - Just (t, renderReadme t b) + Just (t, renderReadme dir t b) return $ Right (v, r) return $ Just (branches, tags, view) else return Nothing @@ -177,7 +179,11 @@ getRepoSource repository user repo ref dir = do ) display <- case view of Left b -> return $ Left $ - renderSource (unpack $ last dir) (blobGetContent b) + let name = last dir + parent = init dir + (base, ext) = breakExt name + mediaType = chooseMediaType parent base ext () () + in renderSource mediaType (blobGetContent b) Right (v, mr) -> return $ Right (map mkrow v, mr) let parent = if null dir then [] else init dir dirs = zip parent (tail $ inits parent) diff --git a/src/Vervis/MediaType.hs b/src/Vervis/MediaType.hs new file mode 100644 index 0000000..1bf237f --- /dev/null +++ b/src/Vervis/MediaType.hs @@ -0,0 +1,121 @@ +{- This file is part of Vervis. + - + - Written in 2016 by fr33domlover . + - + - ♡ Copying is an act of love. Please copy, reuse and share. + - + - The author(s) have dedicated all copyright and related and neighboring + - rights to this software to the public domain worldwide. This software is + - distributed without any warranty. + - + - You should have received a copy of the CC0 Public Domain Dedication along + - with this software. If not, see + - . + -} + +-- | File content types and tools for detecting them. The focus is on content +-- that gets special treatment in Vervis, and not general MIME type modeling or +-- detection (although that could be done in the future). +module Vervis.MediaType + ( MediaType (..) + , FileName + , FileBaseName + , FileExtension + , WorkType + , SourceViewOptions + , chooseMediaType + ) +where + +import Prelude + +import Data.Text (Text) + +data MediaType + = PlainText + | XML + | JSON + | YAML + | HTML + | CSS + | Markdown + | CSource + | CHeader + | Haskell + | LiterateHaskell + | CabalPackageDescription + | PersistentTemplate + | YesodRouteTemplate + | Hamlet + | Cassius + deriving Show + +type FileName = Text + +type FileBaseName = Text + +type FileExtension = Text + +type WorkType = () + +type SourceViewOptions = () + +chooseMediaType + :: [FileName] + -> FileBaseName + -> FileExtension + -> WorkType -- project type + -> SourceViewOptions -- e.g. whether to see rendered pages or their sources + -> MediaType +chooseMediaType dir base ext wt opts = + case (dir, base, ext, wt) of + -- * Data interchange + (_, _, "xml" , _) -> PlainText + (_, _, "json" , _) -> PlainText + (_, _, "yml" , _) -> PlainText + (_, _, "yaml" , _) -> PlainText + -- * Documents + (_, _, "txt" , _) -> PlainText + (_, _, "md" , _) -> Markdown + (_, _, "mdwn" , _) -> Markdown + (_, _, "mkdn" , _) -> Markdown + (_, _, "markdown", _) -> Markdown + -- * Web page basics + (_, _, "html" , _) -> PlainText + (_, _, "xhtml" , _) -> PlainText + (_, _, "css" , _) -> PlainText + (_, _, "js" , _) -> PlainText + -- * Programming languages + -- ** C + (_, _, "c" , _) -> PlainText + (_, _, "h" , _) -> PlainText + -- ** C++ + (_, _, "cc" , _) -> PlainText + (_, _, "cpp" , _) -> PlainText + (_, _, "cxx" , _) -> PlainText + (_, _, "hh" , _) -> PlainText + (_, _, "hpp" , _) -> PlainText + -- ** Haskell + (_, _, "hs" , _) -> Haskell + (_, _, "lhs" , _) -> PlainText + (_, _, "cabal" , _) -> PlainText + (_, _, "hamlet" , _) -> PlainText + (_, _, "cassius" , _) -> PlainText + -- ** Java + (_, _, "java" , _) -> PlainText + -- ** Lisp + (_, _, "cl" , _) -> PlainText + (_, _, "el" , _) -> PlainText + -- ** Lua + (_, _, "lua" , _) -> PlainText + -- ** Perl + (_, _, "pl" , _) -> PlainText + -- ** PHP + (_, _, "php" , _) -> PlainText + -- ** Python + (_, _, "py" , _) -> PlainText + -- ** Ruby + (_, _, "rb" , _) -> PlainText + -- ** Scheme + (_, _, "scm" , _) -> PlainText + (_, _, _ , _) -> PlainText diff --git a/src/Vervis/Readme.hs b/src/Vervis/Readme.hs index 00402a7..6a59599 100644 --- a/src/Vervis/Readme.hs +++ b/src/Vervis/Readme.hs @@ -33,7 +33,9 @@ import Data.Text.Encoding.Error (strictDecode) import System.FilePath (isExtSeparator) import Vervis.Foundation (Widget) +import Vervis.MediaType (chooseMediaType) import Vervis.Render (renderSource) +import Text.FilePath.Local (breakExt) -- | Check if the given filename should be considered as README file. Assumes -- a flat filename which doesn't contain a directory part. @@ -59,5 +61,8 @@ findReadme git tree = go $ treeGetEnts tree else go es -- | Render README content into a widget for inclusion in a page. -renderReadme :: Text -> ByteString -> Widget -renderReadme name content = renderSource (unpack name) content +renderReadme :: [Text] -> Text -> ByteString -> Widget +renderReadme dir name content = + let (base, ext) = breakExt name + mediaType = chooseMediaType dir base ext () () + in renderSource mediaType content diff --git a/src/Vervis/Render.hs b/src/Vervis/Render.hs index 561732c..97d78f4 100644 --- a/src/Vervis/Render.hs +++ b/src/Vervis/Render.hs @@ -13,47 +13,253 @@ - . -} +{-# Language CPP #-} + -- | Tools for rendering repository file contents and other source files. +-- +-- There are several ways to render a file: +-- +-- (1) As a source file, plain text and with line numbers +-- (2) As a source file, syntax highlighted and with line numbers +-- (3) As a plain text document +-- (4) As a document rendered to HTML, e.g. Markdown is a popular format +-- (5) As a document rendered to a custom format, e.g. presentation +-- +-- The difference between 3 and 5 is line numbers and font (3 would use regular +-- text font, while 5 would use monospaced font). +-- +-- At the time of writing, not all rendering modes are implemented. The current +-- status, assuming I'm keeping it updated, is: +-- +-- (1) Partially implemented: No line numbers +-- (2) Implemented, using line numbers generated by @highlighter2@ formatter +-- (3) Not implemented +-- (4) Not implemented +-- (5) Not implmented module Vervis.Render - ( renderPlain - , renderHighlight - , renderSource + ( --renderPlain + --, renderHighlight + renderSource ) where import Prelude import Control.Monad.Logger (logDebug, logWarn) -import Data.ByteString.Lazy (ByteString, toStrict) +import Data.Foldable (for_) +import Data.Maybe (fromMaybe) import Data.Monoid ((<>)) -import Data.Text (pack) -import Data.Text.Encoding.Error (lenientDecode) -import Data.Text.Lazy.Encoding (decodeUtf8With) -import Formatting hiding (format) +--import Formatting hiding (format) +import Text.Blaze.Html (preEscapedToMarkup) +import Text.Blaze.Html.Renderer.Text (renderHtml) import Text.Highlighter (lexerFromFilename, runLexer, Lexer (lName)) import Text.Highlighter.Formatters.Html (format) +import Text.Highlighting.Kate.Styles (tango) +import Text.HTML.SanitizeXSS (sanitizeBalance) +import Text.Pandoc.Definition (Pandoc) +import Text.Pandoc.Options +import Text.Pandoc.Readers.Markdown +import Text.Pandoc.Writers.HTML import Yesod.Core.Widget (whamlet, toWidget) -import Vervis.Foundation (Widget) +import qualified Data.ByteString.Lazy as BL +import qualified Data.Text as T +import qualified Data.Text.Encoding.Error as TE +import qualified Data.Text.Lazy as TL +import qualified Data.Text.Lazy.Encoding as TLE +import qualified Text.Highlighter.Lexers.Haskell as L.Haskell -renderPlain :: ByteString -> Widget +import Vervis.Foundation (Widget) +import Vervis.MediaType (MediaType (..)) + +renderPlain :: BL.ByteString -> Widget renderPlain content = [whamlet|
-            #{decodeUtf8With lenientDecode content}
+            #{TLE.decodeUtf8With TE.lenientDecode content}
     |]
 
-renderHighlight
-    :: FilePath -> ByteString -> Either (Maybe Lexer) (Lexer, Widget)
+{-renderHighlight
+    :: FilePath -> BL.ByteString -> Either (Maybe Lexer) (Lexer, Widget)
 renderHighlight name content =
     case lexerFromFilename name of
         Nothing    -> Left Nothing
         Just lexer ->
-            case runLexer lexer $ toStrict content of
+            case runLexer lexer $ BL.toStrict content of
                 Left err     -> Left $ Just lexer
                 Right tokens -> Right (lexer, toWidget $ format True tokens)
+-}
 
-renderSource :: FilePath -> ByteString -> Widget
+-- * File uploads and wiki attachments
+-- * Wiki pages
+-- * READMEs
+-- * Source files which happen to be documents, e.g. Markdown, manpages,
+--   OrgMode, LaTeX, and
+-- * Literate Haskell files
+--
+-- For now, let's ignore the first two. Which source files, README or other, do
+-- we want to offer to display as HTML rendering?
+--
+-- * [ ] native
+-- * [ ] json
+-- * [x] markdown
+-- * [ ] markdown_strict
+-- * [ ] markdown_phpextra
+-- * [ ] markdown_github
+-- * [ ] markdown_mmd
+-- * [ ] commonmark
+-- * [ ] rst
+-- * [ ] mediawiki
+-- * [ ] docbook
+-- * [ ] opml
+-- * [ ] org
+-- * [ ] textile
+-- * [ ] html
+-- * [ ] latex
+-- * [ ] haddock
+-- * [ ] twiki
+-- * [ ] docx
+-- * [ ] odt
+-- * [ ] t2t
+-- * [ ] epub
+--
+-- Any others not in this list, maybe using other libraries?
+--
+-- * [ ] asciidoc
+-- * [ ] groff manpage
+
+renderHighlight :: Lexer -> BL.ByteString -> Maybe Widget
+renderHighlight lexer content =
+    case runLexer lexer $ BL.toStrict content of
+        Left err     -> Nothing
+        Right tokens -> Just $ toWidget $ format True tokens
+
+renderCode :: Lexer -> BL.ByteString -> Widget
+renderCode lexer content =
+    fromMaybe (renderPlain content) $ renderHighlight lexer content
+
+readerOptions :: ReaderOptions
+readerOptions = def
+    { readerExtensions            = pandocExtensions
+    , readerSmart                 = True
+    , readerStandalone            = False
+    , readerParseRaw              = True
+    , readerColumns               = 80
+    , readerTabStop               = 4
+--  , readerOldDashes             = False
+--  , readerApplyMacros           = True
+--  , readerIndentedCodeClasses   = []
+--  , readerDefaultImageExtension = ""
+    , readerTrace                 =
+#if DEVELOPMENT
+        True
+#else
+        False
+#endif
+--  , readerTrackChanges          = AcceptChanges
+--  , readerFileScope             = False
+    }
+
+writerOptions :: WriterOptions
+writerOptions = def
+    { writerStandalone       = False
+--  , writerTemplate         = ""
+--  , writerVariables        = []
+    , writerTabStop          = 4
+    , writerTableOfContents  = True
+--  , writerSlideVariant     = NoSlides
+--  , writerIncremental      = False
+--  , writerHTMLMathMethod   = PlainMath
+--  , writerIgnoreNotes      = False
+--  , writerNumberSections   = False
+--  , writerNumberOffset     = [0,0,0,0,0,0]
+--  , writerSectionDivs      = False
+    , writerExtensions       = pandocExtensions
+--  , writerReferenceLinks   = False
+--  , writerDpi              = 96
+    , writerWrapText         = WrapAuto
+    , writerColumns          = 79
+    , writerEmailObfuscation = ReferenceObfuscation
+--  , writerIdentifierPrefix = ""
+--  , writerSourceURL        = Nothing
+--  , writerUserDataDir      = Nothing
+--  , writerCiteMethod       = Citeproc
+    , writerHtml5            = True
+--  , writerHtmlQTags        = False
+--  , writerBeamer           = False
+--  , writerSlideLevel       = Nothing
+--  , writerChapters         = False
+--  , writerListings         = False
+    , writerHighlight        = True
+    , writerHighlightStyle   = tango
+--  , writerSetextHeaders    = True
+--  , writerTeXLigatures     = True
+--  , writerEpubVersion      = Nothing
+--  , writerEpubMetadata     = ""
+--  , writerEpubStylesheet   = Nothing
+--  , writerEpubFonts        = []
+--  , writerEpubChapterLevel = 1
+--  , writerTOCDepth         = 3
+--  , writerReferenceODT     = Nothing
+--  , writerReferenceDocx    = Nothing
+--  , writerMediaBag         = mempty
+    , writerVerbose          =
+#if DEVELOPMENT
+        True
+#else
+        False
+#endif
+--  , writerLaTeXArgs        = []
+    }
+
+renderPandoc :: Pandoc -> Widget
+renderPandoc =
+    toWidget .
+    preEscapedToMarkup .
+    sanitizeBalance .
+    TL.toStrict .
+    renderHtml .
+    writeHtml writerOptions
+
+renderSource :: MediaType -> BL.ByteString -> Widget
+renderSource mt content =
+    let contentBL = content
+        contentTL = TLE.decodeUtf8With TE.lenientDecode contentBL
+        contentS = TL.unpack contentTL
+
+        mtName = T.pack $ show mt
+
+        failed e =
+            "Failed to parse " <> mtName <> "content: " <> T.pack (show e)
+
+        -- Plain text with line numbers
+        plain = renderPlain content
+        -- Syntax highlighted source code with line numbers
+        code l = renderCode l content
+        -- Rendered document from String source
+        docS r =
+            case r readerOptions contentS of
+                Left err  -> $logWarn (failed err) >> plain
+                Right doc -> renderPandoc doc
+        -- Rendered document from String source, with warnings
+        docSW r =
+            case r readerOptions contentS of
+                Left err           -> $logWarn (failed err) >> plain
+                Right (doc, warns) -> do
+                    for_ warns $ \ warn ->
+                        $logDebug $ mtName <> " reader warning: " <> T.pack warn
+                    renderPandoc doc
+    in  case mt of
+            -- * Documents
+            PlainText -> plain
+            Markdown  -> docSW readMarkdownWithWarnings
+            -- * Programming languages
+            -- ** Haskell
+            Haskell   -> code L.Haskell.lexer
+            -- * Misc
+            _         -> plain
+
+{-renderSource :: FilePath -> BL.ByteString -> Widget
 renderSource name content =
     let plain = renderPlain content
     in  case renderHighlight name content of
@@ -71,3 +277,4 @@ renderSource name content =
                 $logDebug $ sformat
                     ("Lexed " % string % " with " % string) name (lName lexer)
                 widget
+-}
diff --git a/vervis.cabal b/vervis.cabal
index 507a7d5..7500bb1 100644
--- a/vervis.cabal
+++ b/vervis.cabal
@@ -39,6 +39,7 @@ library
                        Data.Char.Local
                        Data.List.Local
                        Network.SSH.Local
+                       Text.FilePath.Local
                        Vervis.Application
                        Vervis.Field.Key
                        Vervis.Field.Person
@@ -52,6 +53,7 @@ library
                        Vervis.Git
                        Vervis.Import
                        Vervis.Import.NoFoundation
+                       Vervis.MediaType
                        Vervis.Model
                        Vervis.Readme
                        Vervis.Render
@@ -107,6 +109,7 @@ library
                      , formatting
                      , hashable
                      , highlighter2
+                     , highlighting-kate
                      , hit
                      , hit-graph >= 0.1
                      , hjsmin
@@ -115,6 +118,8 @@ library
                      , http-types
                      , monad-control
                      , monad-logger
+                     , pandoc
+                     , pandoc-types
                      , persistent
                      , persistent-postgresql
                      , persistent-template
@@ -130,6 +135,7 @@ library
                      , wai-extra
                      , wai-logger
                      , warp
+                     , xss-sanitize
                      , yaml
                      , yesod
                      , yesod-auth