aboutsummaryrefslogtreecommitdiff
path: root/src/PerfumeParser.hs
diff options
context:
space:
mode:
authorJoris2015-08-30 21:17:27 +0200
committerJoris2015-08-30 21:17:27 +0200
commitc4eb771fa09e3972106d80ada6b3c4a023b85249 (patch)
tree9214b64628e34089b73ff6b4bdcb0edbc668a51e /src/PerfumeParser.hs
Fetch perfumes according to multiple materials
Diffstat (limited to 'src/PerfumeParser.hs')
-rw-r--r--src/PerfumeParser.hs31
1 files changed, 31 insertions, 0 deletions
diff --git a/src/PerfumeParser.hs b/src/PerfumeParser.hs
new file mode 100644
index 0000000..1b200d9
--- /dev/null
+++ b/src/PerfumeParser.hs
@@ -0,0 +1,31 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+module PerfumeParser
+ ( parsePerfumes
+ ) where
+
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.HTML.TagSoup
+import Data.List (find)
+
+import Model.URL
+import Model.Perfume
+
+parsePerfumes :: Text -> [Perfume]
+parsePerfumes page = getPerfumes . getSecondTagsInside "tbody" $ parseTags page
+
+getSecondTagsInside :: String -> [Tag Text] -> [Tag Text]
+getSecondTagsInside selector =
+ takeWhile (~/= ("</" ++ selector ++ ">"))
+ . dropWhile (~/= ("<" ++ selector ++ ">"))
+ . drop 1
+ . dropWhile (~/= ("<" ++ selector ++ ">"))
+
+getPerfumes :: [Tag Text] -> [Perfume]
+getPerfumes (TagOpen "a" attributes : TagText name : xs) =
+ case find ((==) "href" . fst) attributes of
+ Just (_, url) -> Perfume name (T.concat [site, "/", url]) : getPerfumes xs
+ Nothing -> getPerfumes xs
+getPerfumes (_:xs) = getPerfumes xs
+getPerfumes [] = []