-
Notifications
You must be signed in to change notification settings - Fork 0
/
Main.hs
299 lines (279 loc) · 10.5 KB
/
Main.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
{-|
Module : GUI
Description : A quick-and-dirty GUI for L2-UD.
Stability : experimental
-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveGeneric #-}
module Main where
import Text.Read (readMaybe)
import Data.Text.Lazy.Encoding (encodeUtf8, decodeUtf8)
import Data.Maybe
import Data.List.Utils
import Text.PrettyPrint (render)
import UDStandard
import UDTrees
import UDPatterns
import UDVisualizations
import Utils.UDConcepts
import Utils.Output
import Align
import Match hiding (matchingSubtrees)
import Errors
import Data.Text.Lazy (Text)
import qualified Data.Text.Lazy as T
import qualified Data.Map as M
import qualified Data.List as L
import Data.Char
import Data.Either
import Web.Scotty
import qualified Web.Scotty as S
import Network.Wai.Middleware.RequestLogger
import Network.Wai.Middleware.Static
import Network.Wai.Parse (fileName, fileContent, defaultParseRequestBodyOptions)
import Network.HTTP.Types.Status (mkStatus)
import System.Directory
import System.IO.Temp
import Data.Aeson (ToJSON)
import GHC.Generics
import Debug.Trace
data Mode = TextMode | CoNNLUMode | TreeMode deriving (Eq, Read, Show, Enum)
-- Result of the check_* API endpoints
data ParseStatus = Status {
status :: Text, -- valid or invalid
msg :: Text, -- usually empty when valid
parsesOrErrors :: Maybe [String]
} deriving (Generic, Show)
-- Result of the search_treebank endpoint
data AlignmentResult = Result {
t1 :: [String],
t2 :: [String],
t1file :: Maybe String,
t2file :: Maybe String,
t1t2file :: Maybe String
} deriving (Generic, Show)
-- Both can be serialized to JSON
instance ToJSON ParseStatus
instance ToJSON AlignmentResult
-- Directory for temporary files
-- OBS: Will be removed and re-created on startup
tmpPath :: String
tmpPath = "tmp"
-- Debug flag and debug method in the ActionM monad
debugOn = False
debug :: String -> String -> ActionM ()
debug msg var =
if debugOn then
liftIO $ putStrLn $ msg ++ "::\n" ++ var
else
return ()
-- Handler for the landing page
handleRoot :: ActionM ()
handleRoot =
do
-- Redirect to the stund web interface
liftIO $ putStrLn "Redirecting"
redirect "static/stund.html"
-- Check the validity of the query expression
checkQuery :: ActionM ()
checkQuery =
do
queryTxt <- queryParam "query"
-- fieldVals is defined in UDConcepts
let patterns = parseQuery fieldVals queryTxt
if null patterns then
json (Status "invalid" "could not parse query" Nothing)
else
json (Status "valid" "" (Just $ map show patterns))
-- Check the validity of the replacement expression
checkReplacement :: ActionM ()
checkReplacement =
do
replacementTxt <- queryParam "replacement"
let replacement = readMaybe replacementTxt :: Maybe UDReplacement
if isNothing replacement then
json (Status "invalid" "could not parse replacement" Nothing)
else
json (Status "valid" "" (Just [show $ fromJust replacement]))
-- Check the validity of a CONLL file
checkConll :: ActionM ()
checkConll =
do
results <- map (prsUDText . T.unpack . decodeUtf8 . fileContent . snd) <$> files
if (all isLeft results) then
json (Status "valid" "" (Just $ map show $ lefts results))
else
json (Status "invalid" "input is not in valid CoNLL-U format" $ Just $ concat $ rights results)
-- Parse an uploaded plain text file to CONLL using UDPipe API
parsePlaintext :: ActionM ()
parsePlaintext =
do
text "Not implemented yet"
-- Search the treebank(s) using the query and replacement parameters
searchTreebanks :: ActionM ()
searchTreebanks =
do
-- Get a map of all uploaded files from filename to file info
formFiles <- M.fromList <$> files
-- Get the file mode
mode <- read <$> formParam "mode"
-- By default do not highlight "diff"
diff <- fromMaybe False <$> fmap read <$> formParamMaybe "diff"
t1file <- maybeTmpFile <$> formParamMaybe "t1file"
liftIO $ putStrLn $ show t1file
t2file <- maybeTmpFile <$> formParamMaybe "t2file"
t1t2file <- maybeTmpFile <$> formParamMaybe "t1t2file"
-- Get text for both files
let t1Text = decodeUtf8 $ fileContent $ formFiles M.! "treebank1"
let t2Text = decodeUtf8 $ fileContent $ formFiles M.! "treebank2"
-- Get pattern and replacement
queryTxt <- formParam "query"
replacementTxt <- formParam "replacement"
let patterns = if null queryTxt
then [(DEPREL_ "root",DEPREL_ "root")]
else parseQuery fieldVals queryTxt
let mreplacement = if null replacementTxt
then Just $ CHANGES []
else readMaybe replacementTxt
-- Convert to sentences
let t1Sents = prsUDText $ T.unpack t1Text
-- If the treebank 2 is empty, fill with dummy sentences
-- (better than using the treebank 1 again, because alignment complexity
-- will be negligible if the trees are empty)
let t2Sents = if (not . null . T.unpack) t2Text
then prsUDText $ T.unpack t2Text
else Left $ repeat (tree2sentence dummyUDTree)
-- Align sentences
let treebank = (fromLeft [] t1Sents) `zip` (fromLeft [] t2Sents)
let alignments = map align treebank
-- true bilingual matches
let bimatches = treebank `zip` map (match patterns) alignments
-- all matches (add treebank 1-only with dummy alignments)
let matches = map
(\bms@((s1,s2),ms) ->
let pattern = patterns !! 0
in if isMonolingual pattern
then ((s1,s2), ms ++ zip (filter
(\t -> not $ t `elem` (map fst ms))
(matchingSubtrees (fst $ (pattern)) (sentence2tree s1)))
(repeat $ dummyUDTree))
else bms)
bimatches
let matches' =
map
(\(s,es) ->
(s,map (applyReplacement (fromJust $ mreplacement)) es))
(filter (\(_,ms) -> not $ null ms) matches)
let (t1Col,t2Col) =
unzip $ concatMap
(\((s1,s2),ms) ->
map
(\(m1,m2) ->
let m1' =
if m1 == dummyUDTree
then tree2sentence m1
else tree2sentence (subtree2tree m1)
m2' = tree2sentence (subtree2tree m2)
mark content =
if diff && m1 /= m2 && isJust t2file
then "<mark>" ++ content ++ "</mark>"
else content
in ((mark (case mode of
TextMode -> highlin s1 (tree2sentence m1) HTML
CoNNLUMode -> (prt m1') ++ "\n"
TreeMode -> sentence2svgFragment $ m1'),
mark (case mode of
TextMode -> highlin s2 (tree2sentence m2) HTML
CoNNLUMode -> (prt m2') ++ "\n"
TreeMode -> sentence2svgFragment $ m2'))))
ms)
matches'
t1t2Tmpfile <- liftIO $ writeMaybeTempFile t1t2file "t1-t2-.tsv" $ unlines $ map
(\(t1,t2) -> t1 ++ "\t" ++ t2)
((map rmMarkup t1Col) `zip` (map rmMarkup t2Col))
t1Tmpfile <- liftIO $ writeMaybeTempFile t1file "t1-.htm" $ case mode of { TextMode -> rmMarkup $ mkUpper $ unlines t1Col ; _ -> rmMarkup $ unlines t1Col }
t2Tmpfile <- liftIO $ writeMaybeTempFile t2file "t2-.htm" $ case mode of { TextMode -> rmMarkup $ mkUpper $ unlines t2Col ; _ -> rmMarkup $ unlines t2Col }
json $ if (not . null . T.unpack) t2Text
then Result { -- parallel treebank
t1 = t1Col,
t2 = t2Col,
t1file = Just t1Tmpfile,
t2file = Just t2Tmpfile,
t1t2file = Just t1t2Tmpfile }
else Result { -- single treebank
t1 = t1Col,
t2 = [],
t1file = Just t1Tmpfile,
t2file = Nothing,
t1t2file = Nothing }
where
-- remove markup tags <b> and <mark>
rmMarkup s = replace "</mark>" "" $ replace "<mark>" "" $ replace "</b>" "" $ replace "<b>" "" s
-- replace <b>text</b> markup by upppercase TEXT
mkUpper s
| L.isPrefixOf "<b>" s = mkUpper' $ drop 3 s
| null s = ""
| otherwise = head s:mkUpper (tail s)
where
mkUpper' s
| L.isPrefixOf "</b>" s = mkUpper $ drop 4 s
| otherwise = toUpper (head s):mkUpper' (tail s)
-- Writes the content either to a given file if it exists or to a new temporary file otherwise
writeMaybeTempFile :: Maybe FilePath -> String -> String -> IO FilePath
writeMaybeTempFile maybeFile tmpFilePattern content =
if isJust maybeFile then
do
let fn = fromJust maybeFile
writeFile fn content
return fn
else
writeTempFile tmpPath tmpFilePattern content
-- Checks the content of a Maybe String and make it Nothing if the String is empty or invalid
maybeTmpFile :: Maybe String -> Maybe FilePath
maybeTmpFile Nothing = Nothing
maybeTmpFile (Just []) = Nothing
-- Check if it is a file within the tmpPath
maybeTmpFile (Just s)
| L.isPrefixOf tmpPath s = Just s
| otherwise = Nothing
applyReplacement r (e1,e2) =
(fst $ replacementsWithUDPattern r e1,
fst $ replacementsWithUDPattern r e2)
-- Downloads a temp file
downloadTmpFile :: ActionM ()
downloadTmpFile =
do
fileName <- queryParam "filename"
if L.isPrefixOf tmpPath fileName then
do
setHeader "Content-Type" "text/plain; charset=utf-8"
file fileName
else
S.status $ mkStatus 403 "Access denied"
-- Main method for the server
main :: IO ()
main =
do
-- Cleanup old temporary directory if it exists
tmpExists <- doesDirectoryExist tmpPath
if tmpExists then
removeDirectoryRecursive tmpPath
else
return ()
-- Create directory for temporary files
createDirectory tmpPath
-- Start the web server
scotty 3000 $
do
-- Logs requests
middleware logStdoutDev
-- Handles static files
middleware static
get "/" $ handleRoot
get "/index.html" $ handleRoot
get "/check_query" $ checkQuery
get "/check_replacement" $ checkReplacement
post "/check_conll" $ checkConll
post "/parse_plaintext" $ parsePlaintext
post "/search_treebanks" $ searchTreebanks
get "/tmp_file/" $ downloadTmpFile