Skip to content

Commit

Permalink
allow parsing unicode relational operators
Browse files Browse the repository at this point in the history
  • Loading branch information
msakai committed Mar 23, 2024
1 parent 8e21fd8 commit f1816b7
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 8 deletions.
4 changes: 3 additions & 1 deletion pseudo-boolean.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,9 @@ library
deepseq >=1.4.4.0,
hashable >=1.2.7.0 && <1.5.0.0,
void,
OptDir >= 0.1.0
OptDir >= 0.1.0,
utf8-string >=1.0.1.1 && <1.1,
text >=1.2.3.1 && <3.0
hs-source-dirs: src
default-language: Haskell2010

Expand Down
6 changes: 6 additions & 0 deletions src/Data/PseudoBoolean/Attoparsec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import Data.Attoparsec.ByteString.Char8 hiding (isDigit)
import qualified Data.Attoparsec.ByteString.Lazy as L
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy as BSLazy
import qualified Data.ByteString.UTF8 as UTF8
import Data.Char
import Data.Maybe
import Data.PseudoBoolean.Types
Expand Down Expand Up @@ -164,7 +165,12 @@ relational_operator = msum
, string ">" >> return Gt
, string "<=" >> return Le
, string "<" >> return Lt
, u8string "" >> return NEq
, u8string "" >> return Ge
, u8string "" >> return Le
]
where
u8string = string . UTF8.fromString

-- <variablename>::= "x" <unsigned_integer>
variablename :: Parser Var
Expand Down
17 changes: 13 additions & 4 deletions src/Data/PseudoBoolean/Megaparsec.hs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{-# LANGUAGE BangPatterns, FlexibleContexts, TypeFamilies, ConstraintKinds #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeOperators #-}
{-# OPTIONS_GHC -Wall #-}
-----------------------------------------------------------------------------
Expand Down Expand Up @@ -42,8 +43,10 @@ module Data.PseudoBoolean.Megaparsec
import Prelude hiding (sum)
import Control.Monad
import Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy.Char8 as BL
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Lazy.UTF8 as UTF8
import Data.Maybe
import Data.Proxy
import Data.String
import Data.Word
import Data.Void
Expand Down Expand Up @@ -173,15 +176,21 @@ objective_type :: C e s m => m OptDir
objective_type = (try (string "min:") >> return OptMin) <|> (string "max:" >> return OptMax)

-- <relational_operator>::= ">=" | "="
relational_operator :: C e s m => m Op
relational_operator :: forall e s m. C e s m => m Op
relational_operator = msum $ map try
[ string "=" >> return Eq
, string "!=" >> return NEq
, string ">=" >> return Ge
, string ">" >> return Gt
, string "<=" >> return Le
, string "<" >> return Lt
, u8string "" >> return NEq
, u8string "" >> return Ge
, u8string "" >> return Le
]
where
-- XXX: We cannot assume Tokens s ~ ByteString
u8string = string . tokensToChunk (Proxy :: Proxy s) . BL.unpack . UTF8.fromString

-- <variablename>::= "x" <unsigned_integer>
variablename :: C e s m => m Var
Expand Down Expand Up @@ -231,7 +240,7 @@ type ParseError = MP.ParseErrorBundle BL.ByteString Void

-- | Parse a OPB format string containing pseudo boolean problem.
parseOPBString :: String -> String -> Either ParseError Formula
parseOPBString info s = parse (formula <* eof) info (BL.pack s)
parseOPBString info s = parse (formula <* eof) info (UTF8.fromString s)

-- | Parse a OPB format lazy bytestring containing pseudo boolean problem.
parseOPBByteString :: String -> ByteString -> Either ParseError Formula
Expand Down Expand Up @@ -294,7 +303,7 @@ softconstraint = do

-- | Parse a WBO format string containing weighted boolean optimization problem.
parseWBOString :: String -> String -> Either ParseError SoftFormula
parseWBOString info s = parse (softformula <* eof) info (BL.pack s)
parseWBOString info s = parse (softformula <* eof) info (UTF8.fromString s)

-- | Parse a WBO format lazy bytestring containing pseudo boolean problem.
parseWBOByteString :: String -> ByteString -> Either ParseError SoftFormula
Expand Down
14 changes: 11 additions & 3 deletions src/Data/PseudoBoolean/Parsec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@ module Data.PseudoBoolean.Parsec
import Prelude hiding (sum)
import Control.Monad
import Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as BL
import Data.Maybe
import qualified Data.Text.Lazy.Encoding as TL
import Text.Parsec
import qualified Text.Parsec.ByteString.Lazy as ParsecBS
import Data.PseudoBoolean.Types
import Data.PseudoBoolean.Internal.TextUtil

Expand Down Expand Up @@ -163,6 +164,9 @@ relational_operator = msum $ map try
, string ">" >> return Gt
, string "<=" >> return Le
, string "<" >> return Lt
, string "" >> return NEq
, string "" >> return Ge
, string "" >> return Le
]

-- <variablename>::= "x" <unsigned_integer>
Expand Down Expand Up @@ -222,7 +226,9 @@ parseOPBByteString = parse (formula <* eof)

-- | Parse a OPB file containing pseudo boolean problem.
parseOPBFile :: FilePath -> IO (Either ParseError Formula)
parseOPBFile = ParsecBS.parseFromFile (formula <* eof)
parseOPBFile fname = do
input <- BL.readFile fname
return $ runP (formula <* eof) () fname (TL.decodeUtf8 input)


-- <softformula>::= <sequence_of_comments> <softheader> <sequence_of_comments_or_constraints>
Expand Down Expand Up @@ -284,4 +290,6 @@ parseWBOByteString = parse (softformula <* eof)

-- | Parse a WBO file containing weighted boolean optimization problem.
parseWBOFile :: FilePath -> IO (Either ParseError SoftFormula)
parseWBOFile = ParsecBS.parseFromFile (softformula <* eof)
parseWBOFile fname = do
input <- BL.readFile fname
return $ runP (softformula <* eof) () fname (TL.decodeUtf8 input)
24 changes: 24 additions & 0 deletions test/TestPBFile.hs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,19 @@ case_invalid_lhs_empty_sum_wbo = checkWBOFile "test/samples/invalid-lhs-empty-su

case_general_testlin_max_file = checkOPBFile "test/samples/general/testlin-max.pb"
case_general_relational_operator_file = checkOPBFile "test/samples/general/test-relational-operator.pb"
case_general_relational_operator_unicode_file = checkOPBFile "test/samples/general/test-relational-operator-unicode.pb"
case_general_relational_operator = checkOPBString "general relational operator" exampleGeneralRelationalOperator
case_general_relational_operator_unicode = checkOPBString "general relational operator unicode" exampleGeneralRelationalOperatorUnicode

case_general_relational_operator_unicode_equivalence = do
Right expected <- parseOPBFile "test/samples/general/test-relational-operator.pb"
Right opbP <- parseOPBFile "test/samples/general/test-relational-operator-unicode.pb"
expected @?= opbP
Right opbM <- M.parseOPBFile "test/samples/general/test-relational-operator-unicode.pb"
expected @?= opbM
Right opbA <- A.parseOPBFile "test/samples/general/test-relational-operator-unicode.pb"
expected @?= opbA


case_trailing_junk = do
isError (parseOPBString "" trailingJunk) @?= True
Expand Down Expand Up @@ -313,6 +325,18 @@ exampleGeneralRelationalOperator = unlines $
, "-1 x6 -2 x7 != -3;"
]

exampleGeneralRelationalOperatorUnicode :: String
exampleGeneralRelationalOperatorUnicode = unlines $
[ "* #variable= 7 #constraint= 6"
, "max: 1 x1 ;"
, "+1 x1 +2 x2 ≥ 1;"
, "-1 x2 -2 x3 ≤ -1;"
, "+1 x3 +2 x4 > 1;"
, "-1 x4 -2 x5 < 0;"
, "+1 x5 +2 x6 = 3;"
, "-1 x6 -2 x7 ≠ -3;"
]

------------------------------------------------------------------------
-- Utilities

Expand Down
8 changes: 8 additions & 0 deletions test/samples/general/test-relational-operator-unicode.pb
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
* #variable= 7 #constraint= 6
max: 1 x1 ;
+1 x1 +2 x2 ≥ 1;
-1 x2 -2 x3 ≤ -1;
+1 x3 +2 x4 > 1;
-1 x4 -2 x5 < 0;
+1 x5 +2 x6 = 3;
-1 x6 -2 x7 ≠ -3;

0 comments on commit f1816b7

Please sign in to comment.