Skip to content

Commit f1816b7

Browse files
committed
allow parsing unicode relational operators
1 parent 8e21fd8 commit f1816b7

File tree

6 files changed

+65
-8
lines changed

6 files changed

+65
-8
lines changed

pseudo-boolean.cabal

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,9 @@ library
6767
deepseq >=1.4.4.0,
6868
hashable >=1.2.7.0 && <1.5.0.0,
6969
void,
70-
OptDir >= 0.1.0
70+
OptDir >= 0.1.0,
71+
utf8-string >=1.0.1.1 && <1.1,
72+
text >=1.2.3.1 && <3.0
7173
hs-source-dirs: src
7274
default-language: Haskell2010
7375

src/Data/PseudoBoolean/Attoparsec.hs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ import Data.Attoparsec.ByteString.Char8 hiding (isDigit)
3939
import qualified Data.Attoparsec.ByteString.Lazy as L
4040
import qualified Data.ByteString.Char8 as BS
4141
import qualified Data.ByteString.Lazy as BSLazy
42+
import qualified Data.ByteString.UTF8 as UTF8
4243
import Data.Char
4344
import Data.Maybe
4445
import Data.PseudoBoolean.Types
@@ -164,7 +165,12 @@ relational_operator = msum
164165
, string ">" >> return Gt
165166
, string "<=" >> return Le
166167
, string "<" >> return Lt
168+
, u8string "" >> return NEq
169+
, u8string "" >> return Ge
170+
, u8string "" >> return Le
167171
]
172+
where
173+
u8string = string . UTF8.fromString
168174

169175
-- <variablename>::= "x" <unsigned_integer>
170176
variablename :: Parser Var

src/Data/PseudoBoolean/Megaparsec.hs

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
{-# LANGUAGE BangPatterns, FlexibleContexts, TypeFamilies, ConstraintKinds #-}
22
{-# LANGUAGE OverloadedStrings #-}
3+
{-# LANGUAGE ScopedTypeVariables #-}
34
{-# LANGUAGE TypeOperators #-}
45
{-# OPTIONS_GHC -Wall #-}
56
-----------------------------------------------------------------------------
@@ -42,8 +43,10 @@ module Data.PseudoBoolean.Megaparsec
4243
import Prelude hiding (sum)
4344
import Control.Monad
4445
import Data.ByteString.Lazy (ByteString)
45-
import qualified Data.ByteString.Lazy.Char8 as BL
46+
import qualified Data.ByteString.Lazy as BL
47+
import qualified Data.ByteString.Lazy.UTF8 as UTF8
4648
import Data.Maybe
49+
import Data.Proxy
4750
import Data.String
4851
import Data.Word
4952
import Data.Void
@@ -173,15 +176,21 @@ objective_type :: C e s m => m OptDir
173176
objective_type = (try (string "min:") >> return OptMin) <|> (string "max:" >> return OptMax)
174177

175178
-- <relational_operator>::= ">=" | "="
176-
relational_operator :: C e s m => m Op
179+
relational_operator :: forall e s m. C e s m => m Op
177180
relational_operator = msum $ map try
178181
[ string "=" >> return Eq
179182
, string "!=" >> return NEq
180183
, string ">=" >> return Ge
181184
, string ">" >> return Gt
182185
, string "<=" >> return Le
183186
, string "<" >> return Lt
187+
, u8string "" >> return NEq
188+
, u8string "" >> return Ge
189+
, u8string "" >> return Le
184190
]
191+
where
192+
-- XXX: We cannot assume Tokens s ~ ByteString
193+
u8string = string . tokensToChunk (Proxy :: Proxy s) . BL.unpack . UTF8.fromString
185194

186195
-- <variablename>::= "x" <unsigned_integer>
187196
variablename :: C e s m => m Var
@@ -231,7 +240,7 @@ type ParseError = MP.ParseErrorBundle BL.ByteString Void
231240

232241
-- | Parse a OPB format string containing pseudo boolean problem.
233242
parseOPBString :: String -> String -> Either ParseError Formula
234-
parseOPBString info s = parse (formula <* eof) info (BL.pack s)
243+
parseOPBString info s = parse (formula <* eof) info (UTF8.fromString s)
235244

236245
-- | Parse a OPB format lazy bytestring containing pseudo boolean problem.
237246
parseOPBByteString :: String -> ByteString -> Either ParseError Formula
@@ -294,7 +303,7 @@ softconstraint = do
294303

295304
-- | Parse a WBO format string containing weighted boolean optimization problem.
296305
parseWBOString :: String -> String -> Either ParseError SoftFormula
297-
parseWBOString info s = parse (softformula <* eof) info (BL.pack s)
306+
parseWBOString info s = parse (softformula <* eof) info (UTF8.fromString s)
298307

299308
-- | Parse a WBO format lazy bytestring containing pseudo boolean problem.
300309
parseWBOByteString :: String -> ByteString -> Either ParseError SoftFormula

src/Data/PseudoBoolean/Parsec.hs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,10 @@ module Data.PseudoBoolean.Parsec
3737
import Prelude hiding (sum)
3838
import Control.Monad
3939
import Data.ByteString.Lazy (ByteString)
40+
import qualified Data.ByteString.Lazy as BL
4041
import Data.Maybe
42+
import qualified Data.Text.Lazy.Encoding as TL
4143
import Text.Parsec
42-
import qualified Text.Parsec.ByteString.Lazy as ParsecBS
4344
import Data.PseudoBoolean.Types
4445
import Data.PseudoBoolean.Internal.TextUtil
4546

@@ -163,6 +164,9 @@ relational_operator = msum $ map try
163164
, string ">" >> return Gt
164165
, string "<=" >> return Le
165166
, string "<" >> return Lt
167+
, string "" >> return NEq
168+
, string "" >> return Ge
169+
, string "" >> return Le
166170
]
167171

168172
-- <variablename>::= "x" <unsigned_integer>
@@ -222,7 +226,9 @@ parseOPBByteString = parse (formula <* eof)
222226

223227
-- | Parse a OPB file containing pseudo boolean problem.
224228
parseOPBFile :: FilePath -> IO (Either ParseError Formula)
225-
parseOPBFile = ParsecBS.parseFromFile (formula <* eof)
229+
parseOPBFile fname = do
230+
input <- BL.readFile fname
231+
return $ runP (formula <* eof) () fname (TL.decodeUtf8 input)
226232

227233

228234
-- <softformula>::= <sequence_of_comments> <softheader> <sequence_of_comments_or_constraints>
@@ -284,4 +290,6 @@ parseWBOByteString = parse (softformula <* eof)
284290

285291
-- | Parse a WBO file containing weighted boolean optimization problem.
286292
parseWBOFile :: FilePath -> IO (Either ParseError SoftFormula)
287-
parseWBOFile = ParsecBS.parseFromFile (softformula <* eof)
293+
parseWBOFile fname = do
294+
input <- BL.readFile fname
295+
return $ runP (softformula <* eof) () fname (TL.decodeUtf8 input)

test/TestPBFile.hs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,19 @@ case_invalid_lhs_empty_sum_wbo = checkWBOFile "test/samples/invalid-lhs-empty-su
4949

5050
case_general_testlin_max_file = checkOPBFile "test/samples/general/testlin-max.pb"
5151
case_general_relational_operator_file = checkOPBFile "test/samples/general/test-relational-operator.pb"
52+
case_general_relational_operator_unicode_file = checkOPBFile "test/samples/general/test-relational-operator-unicode.pb"
5253
case_general_relational_operator = checkOPBString "general relational operator" exampleGeneralRelationalOperator
54+
case_general_relational_operator_unicode = checkOPBString "general relational operator unicode" exampleGeneralRelationalOperatorUnicode
55+
56+
case_general_relational_operator_unicode_equivalence = do
57+
Right expected <- parseOPBFile "test/samples/general/test-relational-operator.pb"
58+
Right opbP <- parseOPBFile "test/samples/general/test-relational-operator-unicode.pb"
59+
expected @?= opbP
60+
Right opbM <- M.parseOPBFile "test/samples/general/test-relational-operator-unicode.pb"
61+
expected @?= opbM
62+
Right opbA <- A.parseOPBFile "test/samples/general/test-relational-operator-unicode.pb"
63+
expected @?= opbA
64+
5365

5466
case_trailing_junk = do
5567
isError (parseOPBString "" trailingJunk) @?= True
@@ -313,6 +325,18 @@ exampleGeneralRelationalOperator = unlines $
313325
, "-1 x6 -2 x7 != -3;"
314326
]
315327

328+
exampleGeneralRelationalOperatorUnicode :: String
329+
exampleGeneralRelationalOperatorUnicode = unlines $
330+
[ "* #variable= 7 #constraint= 6"
331+
, "max: 1 x1 ;"
332+
, "+1 x1 +2 x2 ≥ 1;"
333+
, "-1 x2 -2 x3 ≤ -1;"
334+
, "+1 x3 +2 x4 > 1;"
335+
, "-1 x4 -2 x5 < 0;"
336+
, "+1 x5 +2 x6 = 3;"
337+
, "-1 x6 -2 x7 ≠ -3;"
338+
]
339+
316340
------------------------------------------------------------------------
317341
-- Utilities
318342

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
* #variable= 7 #constraint= 6
2+
max: 1 x1 ;
3+
+1 x1 +2 x2 ≥ 1;
4+
-1 x2 -2 x3 ≤ -1;
5+
+1 x3 +2 x4 > 1;
6+
-1 x4 -2 x5 < 0;
7+
+1 x5 +2 x6 = 3;
8+
-1 x6 -2 x7 ≠ -3;

0 commit comments

Comments
 (0)