blob: 8940be7b1844162b0f436242195e754bdbc7f228 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
|
module Parser.Resume
( parse
) where
import Data.Maybe (catMaybes, isJust)
import Data.List (find)
import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup
import Model.Resume (Resume(Resume))
import Parser.Utils
parse :: Text -> [Resume]
parse page =
case dropWhile (not . hasClass (T.pack "section") (T.pack "tabsContent")) (parseTags page) of
[] ->
[]
sectionTags ->
let lbcTags = takeWhile (not . hasClass (T.pack "div") (T.pack "information-immo")) sectionTags
in catMaybes . fmap parseResume $ partitions (~== "<a>") lbcTags
parseResume :: [Tag Text] -> Maybe Resume
parseResume item = do
name <- getTagTextAfter "<h2 class=item_title>" item
let price = getTagTextAfter "<h3 class=item_price>" item
url <- getTagAttribute "<a>" (T.pack "href") item
let isPro = isJust . find (~== "<span class=ispro>") $ item
return (Resume name price (T.concat [T.pack "https:", url]) isPro)
|