module Lemmatchers.TagRecords where

import Data.Csv
import qualified Data.ByteString.Lazy as BS
import qualified Data.Vector as V
import qualified Data.Text as T
import Data.Text.Encoding
import Text.Read
import Text.ParserCombinators.ReadP
import Control.Monad

-- All possible tags ----------

data Tag
  = AJ | AV | CNJ | DET | DN | DP | EN | GN | IP | J | LN
  | MN | MOD | N | NA | NU | PN | PRP | QN | QP | REL | RN
  | SN | TN | V | WN | XP
  deriving (Tag -> Tag -> Bool
(Tag -> Tag -> Bool) -> (Tag -> Tag -> Bool) -> Eq Tag
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Tag -> Tag -> Bool
== :: Tag -> Tag -> Bool
$c/= :: Tag -> Tag -> Bool
/= :: Tag -> Tag -> Bool
Eq, Int -> Tag -> ShowS
[Tag] -> ShowS
Tag -> String
(Int -> Tag -> ShowS)
-> (Tag -> String) -> ([Tag] -> ShowS) -> Show Tag
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Tag -> ShowS
showsPrec :: Int -> Tag -> ShowS
$cshow :: Tag -> String
show :: Tag -> String
$cshowList :: [Tag] -> ShowS
showList :: [Tag] -> ShowS
Show, ReadPrec [Tag]
ReadPrec Tag
Int -> ReadS Tag
ReadS [Tag]
(Int -> ReadS Tag)
-> ReadS [Tag] -> ReadPrec Tag -> ReadPrec [Tag] -> Read Tag
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
$creadsPrec :: Int -> ReadS Tag
readsPrec :: Int -> ReadS Tag
$creadList :: ReadS [Tag]
readList :: ReadS [Tag]
$creadPrec :: ReadPrec Tag
readPrec :: ReadPrec Tag
$creadListPrec :: ReadPrec [Tag]
readListPrec :: ReadPrec [Tag]
Read)

-- Lemma structure parsing ----

data Lemma = Lemma
  { Lemma -> String
lword         :: String
  , Lemma -> String
ltranslation  :: String
  , Lemma -> Tag
ltag          :: Tag
  } deriving (Lemma -> Lemma -> Bool
(Lemma -> Lemma -> Bool) -> (Lemma -> Lemma -> Bool) -> Eq Lemma
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Lemma -> Lemma -> Bool
== :: Lemma -> Lemma -> Bool
$c/= :: Lemma -> Lemma -> Bool
/= :: Lemma -> Lemma -> Bool
Eq, Int -> Lemma -> ShowS
[Lemma] -> ShowS
Lemma -> String
(Int -> Lemma -> ShowS)
-> (Lemma -> String) -> ([Lemma] -> ShowS) -> Show Lemma
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Lemma -> ShowS
showsPrec :: Int -> Lemma -> ShowS
$cshow :: Lemma -> String
show :: Lemma -> String
$cshowList :: [Lemma] -> ShowS
showList :: [Lemma] -> ShowS
Show)

instance Read Lemma where
  readsPrec :: Int -> ReadS Lemma
readsPrec Int
_ = ReadP Lemma -> ReadS Lemma
forall a. ReadP a -> ReadS a
readP_to_S ReadP Lemma
parseLemma

parseLemma :: ReadP Lemma
parseLemma :: ReadP Lemma
parseLemma = do
  String
w <- (Char -> Bool) -> ReadP String
munch1 (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'[')
  String
t <- ReadP Char -> ReadP Char -> ReadP String -> ReadP String
forall open close a.
ReadP open -> ReadP close -> ReadP a -> ReadP a
between (Char -> ReadP Char
char Char
'[') (Char -> ReadP Char
char Char
']') (ReadP String -> ReadP String) -> ReadP String -> ReadP String
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> ReadP String
munch (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
']')
  (Just Tag
tag) <- String -> Maybe Tag
forall a. Read a => String -> Maybe a
readMaybe (String -> Maybe Tag) -> ReadP String -> ReadP (Maybe Tag)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> ReadP String
munch (Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
'A'..Char
'Z'])
  Lemma -> ReadP Lemma
forall a. a -> ReadP a
forall (m :: * -> *) a. Monad m => a -> m a
return (Lemma -> ReadP Lemma) -> Lemma -> ReadP Lemma
forall a b. (a -> b) -> a -> b
$ String -> String -> Tag -> Lemma
Lemma String
w String
t Tag
tag

instance FromField Lemma where
  parseField :: Field -> Parser Lemma
parseField = Parser Lemma
-> (Lemma -> Parser Lemma) -> Maybe Lemma -> Parser Lemma
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Parser Lemma
forall a. Parser a
forall (m :: * -> *) a. MonadPlus m => m a
mzero Lemma -> Parser Lemma
forall a. a -> Parser a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Maybe Lemma -> Parser Lemma)
-> (Field -> Maybe Lemma) -> Field -> Parser Lemma
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Maybe Lemma
forall a. Read a => String -> Maybe a
readMaybe (String -> Maybe Lemma)
-> (Field -> String) -> Field -> Maybe Lemma
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack (Text -> String) -> (Field -> Text) -> Field -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Field -> Text
decodeUtf8

-- Lemma stream parsing -------

newtype Lemmas = Lemmas
  { Lemmas -> [Lemma]
lemmas :: [Lemma]
  } deriving (Lemmas -> Lemmas -> Bool
(Lemmas -> Lemmas -> Bool)
-> (Lemmas -> Lemmas -> Bool) -> Eq Lemmas
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Lemmas -> Lemmas -> Bool
== :: Lemmas -> Lemmas -> Bool
$c/= :: Lemmas -> Lemmas -> Bool
/= :: Lemmas -> Lemmas -> Bool
Eq, Int -> Lemmas -> ShowS
[Lemmas] -> ShowS
Lemmas -> String
(Int -> Lemmas -> ShowS)
-> (Lemmas -> String) -> ([Lemmas] -> ShowS) -> Show Lemmas
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Lemmas -> ShowS
showsPrec :: Int -> Lemmas -> ShowS
$cshow :: Lemmas -> String
show :: Lemmas -> String
$cshowList :: [Lemmas] -> ShowS
showList :: [Lemmas] -> ShowS
Show)

instance Read Lemmas where
  readsPrec :: Int -> ReadS Lemmas
readsPrec Int
_ = ReadP Lemmas -> ReadS Lemmas
forall a. ReadP a -> ReadS a
readP_to_S ReadP Lemmas
parseLemmas

parseLemmas :: ReadP Lemmas
parseLemmas :: ReadP Lemmas
parseLemmas = do
  [Lemma]
ls <- ReadP Lemma -> ReadP Char -> ReadP [Lemma]
forall a sep. ReadP a -> ReadP sep -> ReadP [a]
sepBy ReadP Lemma
parseLemma (Char -> ReadP Char
char Char
' ')
  ReadP ()
eof
  Lemmas -> ReadP Lemmas
forall a. a -> ReadP a
forall (m :: * -> *) a. Monad m => a -> m a
return (Lemmas -> ReadP Lemmas) -> Lemmas -> ReadP Lemmas
forall a b. (a -> b) -> a -> b
$ [Lemma] -> Lemmas
Lemmas [Lemma]
ls

instance FromField Lemmas where
  parseField :: Field -> Parser Lemmas
parseField = Parser Lemmas
-> (Lemmas -> Parser Lemmas) -> Maybe Lemmas -> Parser Lemmas
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Parser Lemmas
forall a. Parser a
forall (m :: * -> *) a. MonadPlus m => m a
mzero Lemmas -> Parser Lemmas
forall a. a -> Parser a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Maybe Lemmas -> Parser Lemmas)
-> (Field -> Maybe Lemmas) -> Field -> Parser Lemmas
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Maybe Lemmas
forall a. Read a => String -> Maybe a
readMaybe (String -> Maybe Lemmas)
-> (Field -> String) -> Field -> Maybe Lemmas
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack (Text -> String) -> (Field -> Text) -> Field -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Field -> Text
decodeUtf8

-- CSV records ----------------

data LemmaRecord = LR
  { LemmaRecord -> String
idText      :: String
  , LemmaRecord -> String
designation :: String
  , LemmaRecord -> Lemmas
lemmata     :: Lemmas
  } deriving (LemmaRecord -> LemmaRecord -> Bool
(LemmaRecord -> LemmaRecord -> Bool)
-> (LemmaRecord -> LemmaRecord -> Bool) -> Eq LemmaRecord
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: LemmaRecord -> LemmaRecord -> Bool
== :: LemmaRecord -> LemmaRecord -> Bool
$c/= :: LemmaRecord -> LemmaRecord -> Bool
/= :: LemmaRecord -> LemmaRecord -> Bool
Eq, Int -> LemmaRecord -> ShowS
[LemmaRecord] -> ShowS
LemmaRecord -> String
(Int -> LemmaRecord -> ShowS)
-> (LemmaRecord -> String)
-> ([LemmaRecord] -> ShowS)
-> Show LemmaRecord
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> LemmaRecord -> ShowS
showsPrec :: Int -> LemmaRecord -> ShowS
$cshow :: LemmaRecord -> String
show :: LemmaRecord -> String
$cshowList :: [LemmaRecord] -> ShowS
showList :: [LemmaRecord] -> ShowS
Show)

instance FromNamedRecord LemmaRecord where
  parseNamedRecord :: NamedRecord -> Parser LemmaRecord
parseNamedRecord NamedRecord
r = String -> String -> Lemmas -> LemmaRecord
LR (String -> String -> Lemmas -> LemmaRecord)
-> Parser String -> Parser (String -> Lemmas -> LemmaRecord)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> NamedRecord
r NamedRecord -> Field -> Parser String
forall a. FromField a => NamedRecord -> Field -> Parser a
.: Field
"id_text"
                          Parser (String -> Lemmas -> LemmaRecord)
-> Parser String -> Parser (Lemmas -> LemmaRecord)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> NamedRecord
r NamedRecord -> Field -> Parser String
forall a. FromField a => NamedRecord -> Field -> Parser a
.: Field
"Designation"
                          Parser (Lemmas -> LemmaRecord)
-> Parser Lemmas -> Parser LemmaRecord
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> NamedRecord
r NamedRecord -> Field -> Parser Lemmas
forall a. FromField a => NamedRecord -> Field -> Parser a
.: Field
"lemma"

csvToLemmaRecords :: BS.ByteString -> [LemmaRecord]
csvToLemmaRecords :: ByteString -> [LemmaRecord]
csvToLemmaRecords = (String -> [LemmaRecord])
-> ((Header, Vector LemmaRecord) -> [LemmaRecord])
-> Either String (Header, Vector LemmaRecord)
-> [LemmaRecord]
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> [LemmaRecord]
forall a. String -> [a]
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (Vector LemmaRecord -> [LemmaRecord]
forall a. Vector a -> [a]
V.toList (Vector LemmaRecord -> [LemmaRecord])
-> ((Header, Vector LemmaRecord) -> Vector LemmaRecord)
-> (Header, Vector LemmaRecord)
-> [LemmaRecord]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Header, Vector LemmaRecord) -> Vector LemmaRecord
forall a b. (a, b) -> b
snd) (Either String (Header, Vector LemmaRecord) -> [LemmaRecord])
-> (ByteString -> Either String (Header, Vector LemmaRecord))
-> ByteString
-> [LemmaRecord]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Either String (Header, Vector LemmaRecord)
forall a.
FromNamedRecord a =>
ByteString -> Either String (Header, Vector a)
decodeByName

-- Short Stringification ------

class ShortShow a where
  shortShow :: a -> String
instance ShortShow Tag where shortShow :: Tag -> String
shortShow = Tag -> String
forall a. Show a => a -> String
show
instance ShortShow Lemma where
  shortShow :: Lemma -> String
shortShow Lemma
l =  Lemma -> String
lword Lemma
l
              String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"[" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Lemma -> String
ltranslation Lemma
l String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"]"
              String -> ShowS
forall a. [a] -> [a] -> [a]
++ Tag -> String
forall a. Show a => a -> String
show (Lemma -> Tag
ltag Lemma
l)
instance ShortShow Lemmas where
  shortShow :: Lemmas -> String
shortShow = [String] -> String
unwords ([String] -> String) -> (Lemmas -> [String]) -> Lemmas -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Lemma -> String) -> [Lemma] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map Lemma -> String
forall a. ShortShow a => a -> String
shortShow ([Lemma] -> [String]) -> (Lemmas -> [Lemma]) -> Lemmas -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Lemmas -> [Lemma]
lemmas