Skip to content

Commit

Permalink
refactor: Rework terminology
Browse files Browse the repository at this point in the history
Trigram: a sequence of 3 letters
Trigraph: a mapping of digrams to potential trigram completions
  • Loading branch information
sgillespie committed Aug 29, 2023
1 parent c2982e6 commit 7309f03
Show file tree
Hide file tree
Showing 9 changed files with 55 additions and 37 deletions.
4 changes: 2 additions & 2 deletions app/gen-trigraph.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module Main (main) where

import Data.Gibberish.GenTrigrams (mapTrigrams)
import Data.Gibberish.GenTrigraph (genTrigraph)

import Data.Aeson.Encode.Pretty (encodePretty)
import Data.ByteString.Lazy (ByteString ())
Expand Down Expand Up @@ -28,7 +28,7 @@ main = execParser opts >>= run
run :: Options -> IO ()
run Options {..} = writeOutput' . genTrigrams =<< readInputFile
where
genTrigrams = encodePretty . mapTrigrams . Text.lines
genTrigrams = encodePretty . genTrigraph . Text.lines
writeOutput' = writeOutput optOutput
readInputFile = Text.readFile optInputFile

Expand Down
File renamed without changes.
6 changes: 3 additions & 3 deletions gibberish.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ library
Data.Elocrypt,
Data.Elocrypt.Trigraph,
Data.Elocrypt.Utils,
Data.Gibberish.GenTrigrams,
Data.Gibberish.GenTrigraph,
Data.Gibberish.Types
build-depends:
MonadRandom,
Expand All @@ -81,7 +81,7 @@ executable gibber
hs-source-dirs: app
main-is: Main.hs

executable gibber-gen-trigram
executable gibber-gen-trigraph
import: common-options
build-depends:
base >= 4.14 && <5,
Expand Down Expand Up @@ -133,7 +133,7 @@ test-suite test
Test.Elocrypt.TrigraphTest,
Test.Elocrypt.UtilsTest,
Test.ElocryptTest,
Test.Gibberish.GenTrigramsTest,
Test.Gibberish.GenTrigraphTest,
Test.Gibberish.TypesTest
default-extensions:
TemplateHaskell
Expand Down
Original file line number Diff line number Diff line change
@@ -1,28 +1,30 @@
{-# LANGUAGE OverloadedLists #-}

module Data.Gibberish.GenTrigrams (mapTrigrams) where
module Data.Gibberish.GenTrigraph (genTrigraph) where

import Data.Gibberish.Types
import Data.Map.Strict (Map ())
import Data.Map.Strict qualified as Map
import Data.Text (Text ())
import Data.Text qualified as Text

mapTrigrams :: [Text] -> Map Digram Frequencies
mapTrigrams [] = Map.empty
mapTrigrams (x : xs) = Map.unionWith combine (mkTrigrams x) (mapTrigrams xs)
-- | Generate trigraphs from a list of words
genTrigraph :: [Text] -> Trigraph
genTrigraph = Trigraph . foldr foldWord Map.empty
where
foldWord = Map.unionWith combine . mkTrigraph
combine (Frequencies f1) (Frequencies f2) = Frequencies $ Map.unionWith (+) f1 f2

mkTrigrams :: Text -> Map Digram Frequencies
mkTrigrams word = foldr insert' Map.empty $ scanTrigrams word
-- | Generate a trigraph from a single word
mkTrigraph :: Text -> Map Digram Frequencies
mkTrigraph word = foldr insert' Map.empty $ scanTrigrams word
where
insert' (a, b, c) =
insert' (Trigram a b c) =
Map.insertWith combineFrequencies (Digram a b) (mkFrequencies c)
combineFrequencies (Frequencies m1) (Frequencies m2) = Frequencies (Map.unionWith (+) m1 m2)
mkFrequencies c = Frequencies $ Map.singleton (Unigram c) 1

scanTrigrams :: Text -> [(Char, Char, Char)]
scanTrigrams :: Text -> [Trigram]
scanTrigrams word = case Text.take 3 word of
[a, b, c] -> (a, b, c) : scanTrigrams (Text.tail word)
[a, b, c] -> Trigram a b c : scanTrigrams (Text.tail word)
_ -> []
25 changes: 20 additions & 5 deletions src/Data/Gibberish/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
module Data.Gibberish.Types
( Unigram (..),
Digram (..),
Trigram (..),
Frequency (..),
Frequencies (..),
Trigram (..),
Trigraph (..),
) where

import Control.DeepSeq (NFData)
Expand All @@ -17,23 +18,35 @@ import Data.Text (Text ())
import GHC.Generics (Generic ())
import TextShow (TextShow (..), fromString)

-- | A unigram is a single letter
newtype Unigram = Unigram {unUnigram :: Char}
deriving stock (Eq, Ord, Show)
deriving newtype (FromJSON, FromJSONKey, NFData, ToJSON, ToJSONKey)

-- | A digram is a sequence of two letters
data Digram = Digram Char Char
deriving stock (Eq, Generic, Show)
deriving anyclass (NFData)

-- | A trigrams is a sequence of three letters
data Trigram = Trigram Char Char Char
deriving stock (Eq, Generic, Show)
deriving anyclass (NFData)

-- | A frequency represents the number of times a given trigram occurs
-- in a language
newtype Frequency = Frequency {unFequency :: Int}
deriving stock (Eq, Show)
deriving newtype (FromJSON, ToJSON, NFData, Enum, Integral, Num, Ord, Real)

-- | Frequencies maps a unigram to a frequency
newtype Frequencies = Frequencies {unFrequencies :: Map Unigram Frequency}
deriving stock (Eq, Show)
deriving newtype (FromJSON, ToJSON, NFData)

newtype Trigram = Trigram {unTrigram :: Map Digram Frequencies}
-- | A trigraph is a mapping of all digrams to frequencies. That is, for a set of
-- digrams, it contains the frequencies of all possible trigram candidates.
newtype Trigraph = Trigraph {unTrigraph :: Map Digram Frequencies}
deriving stock (Eq, Show)
deriving newtype (FromJSON, ToJSON, NFData)

Expand All @@ -57,9 +70,11 @@ instance FromJSONKey Digram where

instance Ord Digram where
(Digram a1 b1) `compare` (Digram a2 b2) =
case a1 `compare` a2 of
EQ -> b1 `compare` b2
c -> c
(a1, b1) `compare` (a2, b2)

instance Ord Trigram where
(Trigram a1 b1 c1) `compare` (Trigram a2 b2 c2) =
(a1, b1, c1) `compare` (a2, b2, c2)

parseDigram :: Text -> Parser Digram
parseDigram = (uncurry Digram <$>) . fromText
Expand Down
10 changes: 5 additions & 5 deletions test/Golden.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module Main (main) where

import Data.Gibberish.GenTrigrams (mapTrigrams)
import Data.Gibberish.GenTrigraph (genTrigraph)

import Data.Aeson.Encode.Pretty (encodePretty)
import Data.ByteString.Lazy (ByteString ())
Expand All @@ -15,8 +15,8 @@ import Prelude hiding (writeFile)
dictsDir :: IO FilePath
dictsDir = (</> "data" </> "dicts") <$> getDataDir

trigramsDir :: IO FilePath
trigramsDir = (</> "data" </> "trigrams") <$> getDataDir
trigraphsDir :: IO FilePath
trigraphsDir = (</> "data" </> "trigraphs") <$> getDataDir

main :: IO ()
main = defaultMain =<< tests
Expand All @@ -28,10 +28,10 @@ tests = testGroup "Golden Tests" <$> tests'

createTest :: FilePath -> IO TestTree
createTest dict = do
goldenFile <- (</> replaceExtension (takeBaseName dict) "json") <$> trigramsDir
goldenFile <- (</> replaceExtension (takeBaseName dict) "json") <$> trigraphsDir
pure $ goldenVsString (takeBaseName dict) goldenFile (runTest dict)

runTest :: FilePath -> IO ByteString
runTest f = genTrigrams <$> Text.readFile f
where
genTrigrams = encodePretty . mapTrigrams . Text.lines
genTrigrams = encodePretty . genTrigraph . Text.lines
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{-# LANGUAGE OverloadedLists #-}

module Test.Gibberish.GenTrigramsTest (tests) where
module Test.Gibberish.GenTrigraphTest (tests) where

import Data.Gibberish.GenTrigrams
import Data.Gibberish.GenTrigraph
import Data.Gibberish.Types
import Test.Gibberish.Gen qualified as Gen

Expand Down Expand Up @@ -35,7 +35,7 @@ prop_len_trigrams = property $ do
cover 10 "no duplicates" $ not (hasDuplicates trigrams')

assert $
length (mapTrigrams [word]) <= max 0 (Text.length word - 2)
length (unTrigraph $ genTrigraph [word]) <= max 0 (Text.length word - 2)

prop_len_frequencies :: Property
prop_len_frequencies = property $ do
Expand All @@ -49,14 +49,15 @@ prop_len_frequencies = property $ do
sum
. concatMap (Map.elems . unFrequencies)
. Map.elems
. mapTrigrams
. unTrigraph
. genTrigraph
$ [word]
length wordTrigrams' === fromIntegral totalTrigrams

prop_trigrams_all :: Property
prop_trigrams_all = property $ do
words' <- forAll $ Gen.list (Range.linear 0 10) Gen.word
let wordTrigrams = mapTrigrams words'
let wordTrigrams = unTrigraph $ genTrigraph words'
trigrams' = map (List.sort . trigrams) words'

cover 10 "with duplicates" $ List.any hasDuplicates trigrams'
Expand All @@ -67,16 +68,16 @@ prop_trigrams_all = property $ do
concatNub :: Ord a => [[a]] -> [a]
concatNub = List.nub . List.sort . List.concat

trigrams :: Text -> [(Char, Char, Char)]
trigrams :: Text -> [Trigram]
trigrams ts = case Text.take 3 ts of
[a, b, c] -> (a, b, c) : trigrams (Text.tail ts)
[a, b, c] -> Trigram a b c : trigrams (Text.tail ts)
_ -> []

allTrigrams :: Map Digram Frequencies -> [(Char, Char, Char)]
allTrigrams :: Map Digram Frequencies -> [Trigram]
allTrigrams tris = concatMap (uncurry mapFrequencies) $ Map.toList tris
where
mapFrequencies (Digram c1 c2) (Frequencies freqs) =
map (\(Unigram c3) -> (c1, c2, c3)) $ Map.keys freqs
map (\(Unigram c3) -> Trigram c1 c2 c3) $ Map.keys freqs

hasDuplicates :: Ord a => [a] -> Bool
hasDuplicates ls = ls /= List.nub ls
4 changes: 2 additions & 2 deletions test/Tests.hs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module Main (main) where
import Test.Elocrypt.TrigraphTest qualified as TrigraphTest
import Test.Elocrypt.UtilsTest qualified as UtilsTest
import Test.ElocryptTest qualified as PasswordTest
import Test.Gibberish.GenTrigramsTest qualified as GenTrigramsTest
import Test.Gibberish.GenTrigraphTest qualified as GenTrigraphTest
import Test.Gibberish.TypesTest qualified as TypesTest

import Test.Tasty
Expand All @@ -19,5 +19,5 @@ tests =
TrigraphTest.tests,
UtilsTest.tests,
TypesTest.tests,
GenTrigramsTest.tests
GenTrigraphTest.tests
]
4 changes: 2 additions & 2 deletions testlib/Test/Gibberish/Gen.hs
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ import Hedgehog
import Hedgehog.Gen qualified as Gen
import Hedgehog.Range qualified as Range

trigram :: Gen Trigram
trigram :: Gen Trigraph
trigram = do
Trigram <$> Gen.map (Range.linear 0 250) kv
Trigraph <$> Gen.map (Range.linear 0 250) kv
where
kv = (,) <$> digram <*> frequencies

Expand Down

0 comments on commit 7309f03

Please sign in to comment.