{-# LANGUAGE ViewPatterns #-}
module NLP.Minimorph.English where
import Data.Monoid ((<>))
import Data.Char (isSpace, isUpper, toLower)
import Data.Text (Text)
import qualified Data.Text as T
import NLP.Minimorph.Util
commas :: Text -> [Text] -> Text
commas :: Text -> [Text] -> Text
commas _ [] = ""
commas _ [x :: Text
x] = Text
x
commas et :: Text
et xs :: [Text]
xs = Text -> [Text] -> Text
T.intercalate ", " ([Text] -> [Text]
forall a. [a] -> [a]
init [Text]
xs) Text -> Text -> Text
<+> Text
et Text -> Text -> Text
<+> [Text] -> Text
forall a. [a] -> a
last [Text]
xs
cardinal :: Int -> Text
cardinal :: Int -> Text
cardinal n :: Int
n = case Int
n of
0 -> "zero"
1 -> "one"
2 -> "two"
3 -> "three"
4 -> "four"
5 -> "five"
6 -> "six"
7 -> "seven"
8 -> "eight"
9 -> "nine"
10 -> "ten"
_ -> Int -> Text
forall a. Show a => a -> Text
tshow Int
n
ordinalNotSpelled :: Int -> Text
ordinalNotSpelled :: Int -> Text
ordinalNotSpelled k :: Int
k = case Int -> Int
forall a. Num a => a -> a
abs Int
k Int -> Int -> Int
forall a. Integral a => a -> a -> a
`rem` 100 of
n :: Int
n | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 3 Bool -> Bool -> Bool
&& Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< 21 -> Int
k Int -> Text -> Text
forall a. Show a => a -> Text -> Text
`suf` "th"
| Int
n Int -> Int -> Int
forall a. Integral a => a -> a -> a
`rem` 10 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 1 -> Int
k Int -> Text -> Text
forall a. Show a => a -> Text -> Text
`suf` "st"
| Int
n Int -> Int -> Int
forall a. Integral a => a -> a -> a
`rem` 10 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 2 -> Int
k Int -> Text -> Text
forall a. Show a => a -> Text -> Text
`suf` "nd"
| Int
n Int -> Int -> Int
forall a. Integral a => a -> a -> a
`rem` 10 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 3 -> Int
k Int -> Text -> Text
forall a. Show a => a -> Text -> Text
`suf` "rd"
| Bool
otherwise -> Int
k Int -> Text -> Text
forall a. Show a => a -> Text -> Text
`suf` "th"
where
num :: a
num suf :: a -> Text -> Text
`suf` s :: Text
s = a -> Text
forall a. Show a => a -> Text
tshow a
num Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
s
ordinal :: Int -> Text
ordinal :: Int -> Text
ordinal n :: Int
n = case Int
n of
0 -> "zeroth"
1 -> "first"
2 -> "second"
3 -> "third"
4 -> "fourth"
5 -> "fifth"
6 -> "sixth"
7 -> "seventh"
8 -> "eighth"
9 -> "ninth"
10 -> "tenth"
k :: Int
k -> Int -> Text
ordinalNotSpelled Int
k
defaultNounPlural :: Text -> Text
defaultNounPlural :: Text -> Text
defaultNounPlural x :: Text
x
| "is" Text -> Text -> Bool
`T.isSuffixOf` Text
x = Text
thesis
| Text -> Bool
hasSibilantSuffix Text
x = Text
sibilant_o
| Text -> Bool
hasCoSuffix Text
x = Text
sibilant_o
| Text -> Bool
hasCySuffix Text
x = Text
y_final
| "ff" Text -> Text -> Bool
`T.isSuffixOf` Text
x = Text
ff_final
| "f" Text -> Text -> Bool
`T.isSuffixOf` Text
x = Text
f_final
| Bool
otherwise = Text
plain
where
plain :: Text
plain = Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> "s"
sibilant_o :: Text
sibilant_o = Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> "es"
y_final :: Text
y_final = Text -> Text
T.init Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> "ies"
f_final :: Text
f_final = Text -> Text
T.init Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> "ves"
ff_final :: Text
ff_final = Int -> Text -> Text
T.dropEnd 2 Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> "ves"
thesis :: Text
thesis = Int -> Text -> Text
T.dropEnd 2 Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> "es"
defaultVerbStuff :: Text -> (Text, Text)
defaultVerbStuff :: Text -> (Text, Text)
defaultVerbStuff x :: Text
x
| Text -> Bool
hasSibilantSuffix Text
x = (Text, Text)
sibilant_o
| Text -> Bool
hasCoSuffix Text
x = (Text, Text)
sibilant_o
| Text -> Bool
hasCySuffix Text
x = (Text, Text)
y_final
| "e" Text -> Text -> Bool
`T.isSuffixOf` Text
x = (Text, Text)
e_final
| Bool
otherwise = (Text, Text)
plain
where
plain :: (Text, Text)
plain = (Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> "s" , Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> "ed")
sibilant_o :: (Text, Text)
sibilant_o = (Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> "es" , Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> "ed")
e_final :: (Text, Text)
e_final = (Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> "s" , Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> "d")
y_final :: (Text, Text)
y_final = (Text -> Text
T.init Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> "ies", Text -> Text
T.init Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> "ied")
defaultPossesive :: Text -> Text
defaultPossesive :: Text -> Text
defaultPossesive t :: Text
t =
case Text -> Char
T.last Text
t of
's' -> Text
t Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> "'"
'S' -> Text
t Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> "'"
'\'' -> Text
t Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> "s"
_ -> Text
t Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> "'s"
anNumerals :: [Text]
anNumerals :: [Text]
anNumerals = [ "11", "11th", "18", "18th" ]
indefiniteDet :: Text -> Text
indefiniteDet :: Text -> Text
indefiniteDet t :: Text
t = if Text -> Bool
wantsAn Text
t then "an" else "a"
wantsAn :: Text -> Bool
wantsAn :: Text -> Bool
wantsAn t_ :: Text
t_ =
if Text -> Bool
startsWithAcronym Text
t_
then Text -> Bool
acronymWantsAn Text
t_
else Bool
useAn0 Bool -> Bool -> Bool
|| Bool
useAn1
where
t :: Text
t = (Text, Text) -> Text
forall a b. (a, b) -> a
fst ((Text, Text) -> Text) -> (Text, Text) -> Text
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Text -> (Text, Text)
T.break Char -> Bool
isSep (Text -> (Text, Text)) -> Text -> (Text, Text)
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.toLower Text
t_
useAn0 :: Bool
useAn0 = Text
t Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text]
anNumerals
useAn1 :: Bool
useAn1 = case Text -> Maybe (Char, Text)
T.uncons Text
t of
Just (h :: Char
h, "") -> Char -> Bool
isLetterWithInitialVowelSound Char
h
Just ('8',_) -> Bool
True
Just ('u',_) -> Text -> Bool
hasVowel_U_Prefix Text
t
Just (h :: Char
h, _) -> Char -> Bool
isVowel Char
h Bool -> Bool -> Bool
`butNot` Text -> Bool
hasSemivowelPrefix Text
t
Nothing -> Bool
False
x :: Bool
x butNot :: Bool -> Bool -> Bool
`butNot` y :: Bool
y = Bool
x Bool -> Bool -> Bool
&& Bool -> Bool
not Bool
y
isSep :: Char -> Bool
isSep c :: Char
c = Char -> Bool
isSpace Char
c Bool -> Bool -> Bool
|| Char
c Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ("-" :: String)
acronymWantsAn :: Text -> Bool
acronymWantsAn :: Text -> Bool
acronymWantsAn (Text -> Text
T.toLower -> Text
t) =
Bool
useAn0 Bool -> Bool -> Bool
|| Bool
useAn1
where
useAn0 :: Bool
useAn0 = Text
t Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text]
anNumerals
useAn1 :: Bool
useAn1 = case Text -> Maybe (Char, Text)
T.uncons Text
t of
Just ('8',_) -> Bool
True
Just (h :: Char
h,_) -> Char -> Bool
isLetterWithInitialVowelSound Char
h
Nothing -> Bool
False
looksLikeAcronym :: Text -> Bool
looksLikeAcronym :: Text -> Bool
looksLikeAcronym "" = Bool
False
looksLikeAcronym x :: Text
x = (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isUpper (if Text -> Int
T.length Text
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 1 then Int -> Text -> Text
T.drop 1 Text
x else Text
x)
startsWithAcronym :: Text -> Bool
startsWithAcronym :: Text -> Bool
startsWithAcronym =
Text -> Bool
looksLikeAcronym (Text -> Bool) -> (Text -> Text) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
firstWord
where
firstWord :: Text -> Text
firstWord = (Text, Text) -> Text
forall a b. (a, b) -> a
fst ((Text, Text) -> Text) -> (Text -> (Text, Text)) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> (Text, Text)
T.break Char -> Bool
isSep
isSep :: Char -> Bool
isSep c :: Char
c = Char -> Bool
isSpace Char
c Bool -> Bool -> Bool
|| Char
c Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ("-" :: String)
hasSibilantSuffix :: Text -> Bool
hasSibilantSuffix :: Text -> Bool
hasSibilantSuffix x :: Text
x = (Text -> Bool) -> [Text] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Text -> Text -> Bool
`T.isSuffixOf` Text
x) ["x","s","ch","sh","z","j"]
hasSemivowelPrefix :: Text -> Bool
hasSemivowelPrefix :: Text -> Bool
hasSemivowelPrefix ls :: Text
ls = (Text -> Bool) -> [Text] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Text -> Text -> Bool
`T.isPrefixOf` Text
ls) ["y","w","eu","ewe"]
hasVowel_U_Prefix :: Text -> Bool
hasVowel_U_Prefix :: Text -> Bool
hasVowel_U_Prefix t :: Text
t =
case Text -> [Char]
T.unpack Text
t of
['u'] -> Bool
False
['u',_] -> Bool
True
('u':c :: Char
c:v :: Char
v:_) -> Bool -> Bool
not (Char -> Bool
isConsonant Char
c Bool -> Bool -> Bool
&& Char -> Bool
isVowel Char
v)
_ -> Bool
False
hasCySuffix :: Text -> Bool
hasCySuffix :: Text -> Bool
hasCySuffix (Text -> [Char]
T.unpack (Text -> [Char]) -> (Text -> Text) -> Text -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Text -> Text
T.takeEnd 2 -> [x :: Char
x, 'y']) = Char -> Bool
isConsonant Char
x
hasCySuffix _ = Bool
False
hasCoSuffix :: Text -> Bool
hasCoSuffix :: Text -> Bool
hasCoSuffix (Text -> [Char]
T.unpack (Text -> [Char]) -> (Text -> Text) -> Text -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Text -> Text
T.takeEnd 2 -> [x :: Char
x, 'o']) = Char -> Bool
isConsonant Char
x
hasCoSuffix _ = Bool
False
isVowel :: Char -> Bool
isVowel :: Char -> Bool
isVowel = (Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ("aeiou" :: String)) (Char -> Bool) -> (Char -> Char) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Char
toLower
isLetterWithInitialVowelSound :: Char -> Bool
isLetterWithInitialVowelSound :: Char -> Bool
isLetterWithInitialVowelSound = (Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ("aeiofhlmnrsx" :: String)) (Char -> Bool) -> (Char -> Char) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Char
toLower
isConsonant :: Char -> Bool
isConsonant :: Char -> Bool
isConsonant = Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isVowel