1
0
Fork 0
mirror of https://code.sup39.dev/repos/Wqawg synced 2025-03-20 04:46:22 +09:00
vervis/src/Web/Text.hs
2022-09-23 05:20:39 +00:00

147 lines
4.6 KiB
Haskell

{- This file is part of Vervis.
-
- Written in 2016, 2018, 2019, 2022 by fr33domlover <fr33domlover@riseup.net>.
-
- ♡ Copying is an act of love. Please copy, reuse and share.
-
- The author(s) have dedicated all copyright and related and neighboring
- rights to this software to the public domain worldwide. This software is
- distributed without any warranty.
-
- You should have received a copy of the CC0 Public Domain Dedication along
- with this software. If not, see
- <http://creativecommons.org/publicdomain/zero/1.0/>.
-}
module Web.Text
( HTML ()
, PandocMarkdown ()
, Escaped ()
, renderHTML
, markupHTML
, renderPandocMarkdown
, pandocMarkdownFromText
, encodeEntities
, decodeEntities
)
where
import Control.Exception
import Data.Aeson
import Data.Bifunctor
import Data.Text (Text)
import Database.Persist
import Database.Persist.Sql
import HTMLEntities.Decoder
import Text.Blaze (preEscapedText)
import Text.Blaze.Html (Html)
import Text.Blaze.Html.Renderer.Text
import Text.HTML.SanitizeXSS
import Text.Pandoc.Class (runPure)
import Text.Pandoc.Highlighting
import Text.Pandoc.Options
import Text.Pandoc.Readers.Markdown
import Text.Pandoc.Writers.HTML
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as TLB
import qualified HTMLEntities.Text as HET
newtype HTML = HTML { unHTML :: Text }
deriving (ToJSON, PersistField, PersistFieldSql)
instance FromJSON HTML where
parseJSON = fmap (HTML . sanitizeBalance) . parseJSON
newtype PandocMarkdown = PandocMarkdown { _unPandocMarkdown :: Text }
deriving (FromJSON, ToJSON, PersistField, PersistFieldSql)
newtype Escaped = Escaped { unEscaped :: Text }
deriving (ToJSON, PersistField, PersistFieldSql)
escape :: Text -> Text
escape = HET.text
unescape :: Text -> Text
unescape = TL.toStrict . TLB.toLazyText . htmlEncodedText
instance FromJSON Escaped where
parseJSON =
withText "Escaped" $ \ t ->
let decoded = unescape t
in if escape decoded == t
then return $ Escaped t
else fail "HTML contains more than just HTML-escaped plain text"
renderHTML :: Html -> HTML
renderHTML = HTML . TL.toStrict . renderHtml
markupHTML :: HTML -> Html
markupHTML = preEscapedText . unHTML
readerOptions :: ReaderOptions
readerOptions = def
{ readerExtensions = pandocExtensions
, readerStandalone = False
, readerColumns = 80
, readerTabStop = 4
-- , readerIndentedCodeClasses = []
-- , readerAbbreviations = defaultAbbrevs
-- , readerDefaultImageExtension = ""
-- , readerTrackChanges = AcceptChanges
-- , readerStripComments = False
}
writerOptions :: WriterOptions
writerOptions = def
{
-- writerTemplate = Nothing
-- , writerVariables = []
writerTabStop = 4
, writerTableOfContents = True
-- , writerIncremental = False
-- , writerHTMLMathMethod = PlainMath
-- , writerNumberSections = False
-- , writerNumberOffset = [0,0,0,0,0,0]
-- , writerSectionDivs = False
, writerExtensions = pandocExtensions
-- , writerReferenceLinks = False
-- , writerDpi = 96
, writerWrapText = WrapAuto
, writerColumns = 79
, writerEmailObfuscation = ReferenceObfuscation
-- , writerIdentifierPrefix = ""
-- , writerCiteMethod = Citeproc
-- , writerHtmlQTags = False
-- , writerSlideLevel = Nothing
-- , writerTopLevelDivision = TopLevelDefault
-- , writerListings = False
, writerHighlightStyle = Just tango
-- , writerSetextHeaders = True
-- , writerEpubSubdirectory = "EPUB"
-- , writerEpubMetadata = Nothing
-- , writerEpubFonts = []
-- , writerEpubChapterLevel = 1
-- , writerTOCDepth = 3
-- , writerReferenceDoc = Nothing
-- , writerReferenceLocation = EndOfDocument
-- , writerSyntaxMap = defaultSyntaxMap
}
renderPandocMarkdown :: PandocMarkdown -> Either Text HTML
renderPandocMarkdown (PandocMarkdown input) = do
doc <- runPure' $ readMarkdown readerOptions input
HTML . sanitizeBalance . TL.toStrict . renderHtml <$>
runPure' (writeHtml5 writerOptions doc)
where
runPure' = first (T.pack . displayException) . runPure
pandocMarkdownFromText :: Text -> PandocMarkdown
pandocMarkdownFromText = PandocMarkdown
encodeEntities :: Text -> Escaped
encodeEntities = Escaped . escape
decodeEntities :: Escaped -> Text
decodeEntities = unescape . unEscaped