blob: 1b200d9b274643fba340ecdce913e3f700a0e693 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
|
{-# LANGUAGE OverloadedStrings #-}
module PerfumeParser
( parsePerfumes
) where
import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup
import Data.List (find)
import Model.URL
import Model.Perfume
parsePerfumes :: Text -> [Perfume]
parsePerfumes page = getPerfumes . getSecondTagsInside "tbody" $ parseTags page
getSecondTagsInside :: String -> [Tag Text] -> [Tag Text]
getSecondTagsInside selector =
takeWhile (~/= ("</" ++ selector ++ ">"))
. dropWhile (~/= ("<" ++ selector ++ ">"))
. drop 1
. dropWhile (~/= ("<" ++ selector ++ ">"))
getPerfumes :: [Tag Text] -> [Perfume]
getPerfumes (TagOpen "a" attributes : TagText name : xs) =
case find ((==) "href" . fst) attributes of
Just (_, url) -> Perfume name (T.concat [site, "/", url]) : getPerfumes xs
Nothing -> getPerfumes xs
getPerfumes (_:xs) = getPerfumes xs
getPerfumes [] = []
|