{- This file is part of Vervis. - - Written in 2016, 2018, 2019, 2022 by fr33domlover <fr33domlover@riseup.net>. - - ♡ Copying is an act of love. Please copy, reuse and share. - - The author(s) have dedicated all copyright and related and neighboring - rights to this software to the public domain worldwide. This software is - distributed without any warranty. - - You should have received a copy of the CC0 Public Domain Dedication along - with this software. If not, see - <http://creativecommons.org/publicdomain/zero/1.0/>. -} module Web.Text ( HTML () , PandocMarkdown () , Escaped () , renderHTML , markupHTML , renderPandocMarkdown , pandocMarkdownFromText , encodeEntities , decodeEntities ) where import Control.Exception import Data.Aeson import Data.Bifunctor import Data.Text (Text) import Database.Persist import Database.Persist.Sql import HTMLEntities.Decoder import Text.Blaze (preEscapedText) import Text.Blaze.Html (Html) import Text.Blaze.Html.Renderer.Text import Text.HTML.SanitizeXSS import Text.Pandoc.Class (runPure) import Text.Pandoc.Highlighting import Text.Pandoc.Options import Text.Pandoc.Readers.Markdown import Text.Pandoc.Writers.HTML import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Builder as TLB import qualified HTMLEntities.Text as HET newtype HTML = HTML { unHTML :: Text } deriving (ToJSON, PersistField, PersistFieldSql) instance FromJSON HTML where parseJSON = fmap (HTML . sanitizeBalance) . parseJSON newtype PandocMarkdown = PandocMarkdown { _unPandocMarkdown :: Text } deriving (FromJSON, ToJSON, PersistField, PersistFieldSql) newtype Escaped = Escaped { unEscaped :: Text } deriving (ToJSON, PersistField, PersistFieldSql) escape :: Text -> Text escape = HET.text unescape :: Text -> Text unescape = TL.toStrict . TLB.toLazyText . htmlEncodedText instance FromJSON Escaped where parseJSON = withText "Escaped" $ \ t -> let decoded = unescape t in if escape decoded == t then return $ Escaped t else fail "HTML contains more than just HTML-escaped plain text" renderHTML :: Html -> HTML renderHTML = HTML . TL.toStrict . renderHtml markupHTML :: HTML -> Html markupHTML = preEscapedText . unHTML readerOptions :: ReaderOptions readerOptions = def { readerExtensions = pandocExtensions , readerStandalone = False , readerColumns = 80 , readerTabStop = 4 -- , readerIndentedCodeClasses = [] -- , readerAbbreviations = defaultAbbrevs -- , readerDefaultImageExtension = "" -- , readerTrackChanges = AcceptChanges -- , readerStripComments = False } writerOptions :: WriterOptions writerOptions = def { -- writerTemplate = Nothing -- , writerVariables = [] writerTabStop = 4 , writerTableOfContents = True -- , writerIncremental = False -- , writerHTMLMathMethod = PlainMath -- , writerNumberSections = False -- , writerNumberOffset = [0,0,0,0,0,0] -- , writerSectionDivs = False , writerExtensions = pandocExtensions -- , writerReferenceLinks = False -- , writerDpi = 96 , writerWrapText = WrapAuto , writerColumns = 79 , writerEmailObfuscation = ReferenceObfuscation -- , writerIdentifierPrefix = "" -- , writerCiteMethod = Citeproc -- , writerHtmlQTags = False -- , writerSlideLevel = Nothing -- , writerTopLevelDivision = TopLevelDefault -- , writerListings = False , writerHighlightStyle = Just tango -- , writerSetextHeaders = True -- , writerEpubSubdirectory = "EPUB" -- , writerEpubMetadata = Nothing -- , writerEpubFonts = [] -- , writerEpubChapterLevel = 1 -- , writerTOCDepth = 3 -- , writerReferenceDoc = Nothing -- , writerReferenceLocation = EndOfDocument -- , writerSyntaxMap = defaultSyntaxMap } renderPandocMarkdown :: PandocMarkdown -> Either Text HTML renderPandocMarkdown (PandocMarkdown input) = do doc <- runPure' $ readMarkdown readerOptions input HTML . sanitizeBalance . TL.toStrict . renderHtml <$> runPure' (writeHtml5 writerOptions doc) where runPure' = first (T.pack . displayException) . runPure pandocMarkdownFromText :: Text -> PandocMarkdown pandocMarkdownFromText = PandocMarkdown encodeEntities :: Text -> Escaped encodeEntities = Escaped . escape decodeEntities :: Escaped -> Text decodeEntities = unescape . unEscaped