Skip to content

Commit

Permalink
feat: Add a module for generating trigrams from a dictionary
Browse files Browse the repository at this point in the history
  • Loading branch information
sgillespie committed Aug 15, 2023
1 parent 74cbada commit a7504b8
Show file tree
Hide file tree
Showing 7 changed files with 146 additions and 8 deletions.
14 changes: 8 additions & 6 deletions gibberish.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ extra-source-files:
test/Test/Elocrypt/*.hs

common common-options
build-depends:
base >=4.14 && <5,
containers,
text,
text-show
ghc-options:
-Wall
-Wcompat
Expand Down Expand Up @@ -51,15 +56,12 @@ library
Data.Elocrypt,
Data.Elocrypt.Trigraph,
Data.Elocrypt.Utils,
Data.Gibberish.GenTrigrams,
Data.Gibberish.Types
build-depends:
base >=4.14 && <5,
MonadRandom,
aeson,
containers,
random,
text,
text-show
random
hs-source-dirs: src

executable gibber
Expand Down Expand Up @@ -91,7 +93,6 @@ test-suite test
MonadRandom,
proctest,
QuickCheck,
containers,
hedgehog,
random,
tasty,
Expand All @@ -106,6 +107,7 @@ test-suite test
Test.Elocrypt.TrigraphTest,
Test.Elocrypt.UtilsTest,
Test.ElocryptTest,
Test.Gibberish.GenTrigramsTest,
Test.Gibberish.TypesTest
default-extensions:
TemplateHaskell
Expand Down
28 changes: 28 additions & 0 deletions src/Data/Gibberish/GenTrigrams.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{-# LANGUAGE OverloadedLists #-}
module Data.Gibberish.GenTrigrams
(mapTrigrams) where

import Data.Gibberish.Types
import Data.Map (Map ())
import Data.Map qualified as Map
import Data.Text (Text ())
import Data.Text qualified as Text

mapTrigrams :: [Text] -> Map Digram Frequencies
mapTrigrams [] = Map.empty
mapTrigrams (x : xs) = Map.unionWith combine (mkTrigrams x) (mapTrigrams xs)
where
combine (Frequencies f1) (Frequencies f2) = Frequencies $ Map.unionWith (+) f1 f2

mkTrigrams :: Text -> Map Digram Frequencies
mkTrigrams word = foldr insert' Map.empty $ scanTrigrams word
where
insert' (a, b, c) map' =
Map.insertWith combineFrequencies (Digram a b) (mkFrequencies c) map'
combineFrequencies (Frequencies m1) (Frequencies m2) = Frequencies (Map.unionWith (+) m1 m2)
mkFrequencies c = Frequencies $ Map.singleton (Unigram c) 1

scanTrigrams :: Text -> [(Char, Char, Char)]
scanTrigrams word = case Text.take 3 word of
[a, b, c] -> (a, b, c) : scanTrigrams (Text.tail word)
_ -> []
2 changes: 1 addition & 1 deletion src/Data/Gibberish/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ data Digram = Digram Char Char

newtype Frequency = Frequency {unFequency :: Int}
deriving stock (Eq, Show)
deriving newtype (Num, FromJSON, ToJSON)
deriving newtype (FromJSON, ToJSON, Enum, Integral, Num, Ord, Real)

newtype Frequencies = Frequencies {unFrequencies :: Map Unigram Frequency}
deriving stock (Eq, Show)
Expand Down
82 changes: 82 additions & 0 deletions test/Test/Gibberish/GenTrigramsTest.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
{-# LANGUAGE OverloadedLists #-}

module Test.Gibberish.GenTrigramsTest (tests) where

import Data.Gibberish.GenTrigrams
import Data.Gibberish.Types
import Test.Gibberish.Gen qualified as Gen

import Data.List qualified as List
import Data.Map (Map ())
import Data.Map qualified as Map
import Data.Text (Text ())
import Data.Text qualified as Text
import Hedgehog
import Hedgehog.Gen qualified as Gen hiding (word)
import Hedgehog.Range qualified as Range
import Test.Tasty (TestTree (), testGroup)
import Test.Tasty.Hedgehog (testPropertyNamed)

tests :: TestTree
tests =
testGroup
"Test.Gibberish.GenTrigrams"
[ testPropertyNamed "length trigrams" "prop_len_trigrams" prop_len_trigrams,
testPropertyNamed "length frequencies" "prop_len_frequencies" prop_len_frequencies,
testPropertyNamed "contains all trigrams" "prop_trigrams_all" prop_trigrams_all
]

prop_len_trigrams :: Property
prop_len_trigrams = property $ do
word <- forAll Gen.word
let trigrams' = List.sort $ trigrams word

cover 10 "with duplicates" $ hasDuplicates trigrams'
cover 10 "no duplicates" $ not (hasDuplicates trigrams')

assert $
length (mapTrigrams [word]) <= max 0 (Text.length word - 2)

prop_len_frequencies :: Property
prop_len_frequencies = property $ do
word <- forAll Gen.word
let wordTrigrams' = List.sort $ trigrams word

cover 10 "with duplicates" $ hasDuplicates wordTrigrams'
cover 10 "no duplicates" $ not (hasDuplicates wordTrigrams')

let totalTrigrams =
sum
. concatMap (Map.elems . unFrequencies)
. Map.elems
. mapTrigrams
$ [word]
length wordTrigrams' === fromIntegral totalTrigrams

prop_trigrams_all :: Property
prop_trigrams_all = property $ do
words' <- forAll $ Gen.list (Range.linear 0 10) Gen.word
let wordTrigrams = mapTrigrams words'
trigrams' = map (List.sort . trigrams) words'

cover 10 "with duplicates" $ List.any hasDuplicates trigrams'
cover 10 "no duplicates" $ List.any (not . hasDuplicates) trigrams'

concatNub trigrams' === List.sort (allTrigrams wordTrigrams)
where
concatNub :: Ord a => [[a]] -> [a]
concatNub = List.nub . List.sort . List.concat

trigrams :: Text -> [(Char, Char, Char)]
trigrams ts = case Text.take 3 ts of
[a, b, c] -> (a, b, c) : trigrams (Text.tail ts)
_ -> []

allTrigrams :: Map Digram Frequencies -> [(Char, Char, Char)]
allTrigrams tris = concatMap (uncurry mapFrequencies) $ Map.toList tris
where
mapFrequencies (Digram c1 c2) (Frequencies freqs) =
map (\(Unigram c3) -> (c1, c2, c3)) $ Map.keys freqs

hasDuplicates :: Ord a => [a] -> Bool
hasDuplicates ls = ls /= List.nub ls
19 changes: 19 additions & 0 deletions test/Test/Gibberish/TypesTest.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
module Test.Gibberish.TypesTest (tests) where

import Test.Gibberish.Gen qualified as Gen

import Data.Aeson (fromJSON, toJSON)
import Hedgehog
import Test.Tasty (TestTree (), testGroup)
import Test.Tasty.Hedgehog (testPropertyNamed)

tests :: TestTree
tests =
testGroup
"Test.Gibberish.Types"
[testPropertyNamed "toJSON roundtrip" "prop_toJSON_roundtrip" prop_toJSON_roundtrip]

prop_toJSON_roundtrip :: Property
prop_toJSON_roundtrip = property $ do
unigram <- forAll Gen.trigram
tripping unigram toJSON fromJSON
4 changes: 3 additions & 1 deletion test/Tests.hs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module Main (main) where
import Test.Elocrypt.TrigraphTest qualified as TrigraphTest
import Test.Elocrypt.UtilsTest qualified as UtilsTest
import Test.ElocryptTest qualified as PasswordTest
import Test.Gibberish.GenTrigramsTest qualified as GenTrigramsTest
import Test.Gibberish.TypesTest qualified as TypesTest

import Test.Tasty
Expand All @@ -17,5 +18,6 @@ tests =
[ PasswordTest.tests,
TrigraphTest.tests,
UtilsTest.tests,
TypesTest.tests
TypesTest.tests,
GenTrigramsTest.tests
]
5 changes: 5 additions & 0 deletions testlib/Test/Gibberish/Gen.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ module Test.Gibberish.Gen
unigram,
frequencies,
frequency,
word,
) where

import Data.Gibberish.Types

import Data.Text (Text ())
import Hedgehog
import Hedgehog.Gen qualified as Gen
import Hedgehog.Range qualified as Range
Expand All @@ -33,3 +35,6 @@ frequencies =

frequency :: Gen Frequency
frequency = Frequency <$> Gen.int (Range.linear 0 maxBound)

word :: Gen Text
word = Gen.text (Range.linear 3 30) $ Gen.enum 'a' 'e'

0 comments on commit a7504b8

Please sign in to comment.