mirror of
https://code.sup39.dev/repos/Wqawg
synced 2025-03-20 04:46:22 +09:00
147 lines
4.6 KiB
Haskell
147 lines
4.6 KiB
Haskell
{- This file is part of Vervis.
|
|
-
|
|
- Written in 2016, 2018, 2019, 2022 by fr33domlover <fr33domlover@riseup.net>.
|
|
-
|
|
- ♡ Copying is an act of love. Please copy, reuse and share.
|
|
-
|
|
- The author(s) have dedicated all copyright and related and neighboring
|
|
- rights to this software to the public domain worldwide. This software is
|
|
- distributed without any warranty.
|
|
-
|
|
- You should have received a copy of the CC0 Public Domain Dedication along
|
|
- with this software. If not, see
|
|
- <http://creativecommons.org/publicdomain/zero/1.0/>.
|
|
-}
|
|
|
|
module Web.Text
|
|
( HTML ()
|
|
, PandocMarkdown ()
|
|
, Escaped ()
|
|
, renderHTML
|
|
, markupHTML
|
|
, renderPandocMarkdown
|
|
, pandocMarkdownFromText
|
|
, encodeEntities
|
|
, decodeEntities
|
|
)
|
|
where
|
|
|
|
import Control.Exception
|
|
import Data.Aeson
|
|
import Data.Bifunctor
|
|
import Data.Text (Text)
|
|
import Database.Persist
|
|
import Database.Persist.Sql
|
|
import HTMLEntities.Decoder
|
|
import Text.Blaze (preEscapedText)
|
|
import Text.Blaze.Html (Html)
|
|
import Text.Blaze.Html.Renderer.Text
|
|
import Text.HTML.SanitizeXSS
|
|
import Text.Pandoc.Class (runPure)
|
|
import Text.Pandoc.Highlighting
|
|
import Text.Pandoc.Options
|
|
import Text.Pandoc.Readers.Markdown
|
|
import Text.Pandoc.Writers.HTML
|
|
|
|
import qualified Data.Text as T
|
|
import qualified Data.Text.Lazy as TL
|
|
import qualified Data.Text.Lazy.Builder as TLB
|
|
import qualified HTMLEntities.Text as HET
|
|
|
|
newtype HTML = HTML { unHTML :: Text }
|
|
deriving (ToJSON, PersistField, PersistFieldSql)
|
|
|
|
instance FromJSON HTML where
|
|
parseJSON = fmap (HTML . sanitizeBalance) . parseJSON
|
|
|
|
newtype PandocMarkdown = PandocMarkdown { _unPandocMarkdown :: Text }
|
|
deriving (FromJSON, ToJSON, PersistField, PersistFieldSql)
|
|
|
|
newtype Escaped = Escaped { unEscaped :: Text }
|
|
deriving (ToJSON, PersistField, PersistFieldSql)
|
|
|
|
escape :: Text -> Text
|
|
escape = HET.text
|
|
|
|
unescape :: Text -> Text
|
|
unescape = TL.toStrict . TLB.toLazyText . htmlEncodedText
|
|
|
|
instance FromJSON Escaped where
|
|
parseJSON =
|
|
withText "Escaped" $ \ t ->
|
|
let decoded = unescape t
|
|
in if escape decoded == t
|
|
then return $ Escaped t
|
|
else fail "HTML contains more than just HTML-escaped plain text"
|
|
|
|
renderHTML :: Html -> HTML
|
|
renderHTML = HTML . TL.toStrict . renderHtml
|
|
|
|
markupHTML :: HTML -> Html
|
|
markupHTML = preEscapedText . unHTML
|
|
|
|
readerOptions :: ReaderOptions
|
|
readerOptions = def
|
|
{ readerExtensions = pandocExtensions
|
|
, readerStandalone = False
|
|
, readerColumns = 80
|
|
, readerTabStop = 4
|
|
-- , readerIndentedCodeClasses = []
|
|
-- , readerAbbreviations = defaultAbbrevs
|
|
-- , readerDefaultImageExtension = ""
|
|
-- , readerTrackChanges = AcceptChanges
|
|
-- , readerStripComments = False
|
|
}
|
|
|
|
writerOptions :: WriterOptions
|
|
writerOptions = def
|
|
{
|
|
-- writerTemplate = Nothing
|
|
-- , writerVariables = []
|
|
writerTabStop = 4
|
|
, writerTableOfContents = True
|
|
-- , writerIncremental = False
|
|
-- , writerHTMLMathMethod = PlainMath
|
|
-- , writerNumberSections = False
|
|
-- , writerNumberOffset = [0,0,0,0,0,0]
|
|
-- , writerSectionDivs = False
|
|
, writerExtensions = pandocExtensions
|
|
-- , writerReferenceLinks = False
|
|
-- , writerDpi = 96
|
|
, writerWrapText = WrapAuto
|
|
, writerColumns = 79
|
|
, writerEmailObfuscation = ReferenceObfuscation
|
|
-- , writerIdentifierPrefix = ""
|
|
-- , writerCiteMethod = Citeproc
|
|
-- , writerHtmlQTags = False
|
|
-- , writerSlideLevel = Nothing
|
|
-- , writerTopLevelDivision = TopLevelDefault
|
|
-- , writerListings = False
|
|
, writerHighlightStyle = Just tango
|
|
-- , writerSetextHeaders = True
|
|
-- , writerEpubSubdirectory = "EPUB"
|
|
-- , writerEpubMetadata = Nothing
|
|
-- , writerEpubFonts = []
|
|
-- , writerEpubChapterLevel = 1
|
|
-- , writerTOCDepth = 3
|
|
-- , writerReferenceDoc = Nothing
|
|
-- , writerReferenceLocation = EndOfDocument
|
|
-- , writerSyntaxMap = defaultSyntaxMap
|
|
}
|
|
|
|
renderPandocMarkdown :: PandocMarkdown -> Either Text HTML
|
|
renderPandocMarkdown (PandocMarkdown input) = do
|
|
doc <- runPure' $ readMarkdown readerOptions input
|
|
HTML . sanitizeBalance . TL.toStrict . renderHtml <$>
|
|
runPure' (writeHtml5 writerOptions doc)
|
|
where
|
|
runPure' = first (T.pack . displayException) . runPure
|
|
|
|
pandocMarkdownFromText :: Text -> PandocMarkdown
|
|
pandocMarkdownFromText = PandocMarkdown
|
|
|
|
encodeEntities :: Text -> Escaped
|
|
encodeEntities = Escaped . escape
|
|
|
|
decodeEntities :: Escaped -> Text
|
|
decodeEntities = unescape . unEscaped
|