diff options
Diffstat (limited to 'src/Parser/Detail.hs')
-rw-r--r-- | src/Parser/Detail.hs | 40 |
1 files changed, 37 insertions, 3 deletions
diff --git a/src/Parser/Detail.hs b/src/Parser/Detail.hs index 3a91ac2..3f424e9 100644 --- a/src/Parser/Detail.hs +++ b/src/Parser/Detail.hs @@ -5,6 +5,11 @@ module Parser.Detail import Data.Text (Text) import qualified Data.Text as T +import Data.Map (Map) +import qualified Data.Map as M + +import Data.Maybe (catMaybes, fromMaybe) + import Text.HTML.TagSoup import Model.Detail @@ -14,9 +19,11 @@ import Parser.Utils parseDetail :: Text -> Detail parseDetail page = let tags = parseTags page - description = parseDescription tags - images = getTagAttributes "<meta itemprop=image>" (T.pack "content") tags - in Detail { description = description, images = images } + in Detail + { description = parseDescription tags + , images = getTagAttributes "<meta itemprop=image>" (T.pack "content") tags + , properties = parseProperties tags + } parseDescription :: [Tag Text] -> Maybe Text parseDescription tags = @@ -27,3 +34,30 @@ parseDescription tags = else let replaceBr = map (\tag -> if tag ~== "<br>" then TagText (T.pack "\n") else tag) in Just . T.strip . renderTags . replaceBr $ descriptionTags + +parseProperties :: [Tag Text] -> Map Text Text +parseProperties tags = + let mbUtagData = getTagTextAfter "<script>" . getTagsAfter "<body>" $ tags + in fromMaybe M.empty (fmap parseUtagData mbUtagData) + +parseUtagData :: Text -> Map Text Text +parseUtagData = + M.fromList + . catMaybes + . fmap parseUtag + . T.splitOn (T.pack ",") + . T.takeWhile (/= '}') + . T.drop 1 + . T.dropWhile (/= '{') + +parseUtag :: Text -> Maybe (Text, Text) +parseUtag utag = + case T.splitOn (T.pack ":") utag of + [x, y] -> Just (T.strip x, removeQuotes y) + _ -> Nothing + +removeQuotes :: Text -> Text +removeQuotes = + T.takeWhile (/= '\"') + . T.dropWhile (== '\"') + . T.strip |