diff options
author | Joris | 2015-08-30 21:17:27 +0200 |
---|---|---|
committer | Joris | 2015-08-30 21:17:27 +0200 |
commit | c4eb771fa09e3972106d80ada6b3c4a023b85249 (patch) | |
tree | 9214b64628e34089b73ff6b4bdcb0edbc668a51e /src/PerfumeParser.hs |
Fetch perfumes according to multiple materials
Diffstat (limited to 'src/PerfumeParser.hs')
-rw-r--r-- | src/PerfumeParser.hs | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/src/PerfumeParser.hs b/src/PerfumeParser.hs new file mode 100644 index 0000000..1b200d9 --- /dev/null +++ b/src/PerfumeParser.hs @@ -0,0 +1,31 @@ +{-# LANGUAGE OverloadedStrings #-} + +module PerfumeParser + ( parsePerfumes + ) where + +import Data.Text (Text) +import qualified Data.Text as T +import Text.HTML.TagSoup +import Data.List (find) + +import Model.URL +import Model.Perfume + +parsePerfumes :: Text -> [Perfume] +parsePerfumes page = getPerfumes . getSecondTagsInside "tbody" $ parseTags page + +getSecondTagsInside :: String -> [Tag Text] -> [Tag Text] +getSecondTagsInside selector = + takeWhile (~/= ("</" ++ selector ++ ">")) + . dropWhile (~/= ("<" ++ selector ++ ">")) + . drop 1 + . dropWhile (~/= ("<" ++ selector ++ ">")) + +getPerfumes :: [Tag Text] -> [Perfume] +getPerfumes (TagOpen "a" attributes : TagText name : xs) = + case find ((==) "href" . fst) attributes of + Just (_, url) -> Perfume name (T.concat [site, "/", url]) : getPerfumes xs + Nothing -> getPerfumes xs +getPerfumes (_:xs) = getPerfumes xs +getPerfumes [] = [] |