-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtableConverter.hs
77 lines (63 loc) · 2.92 KB
/
tableConverter.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
module Main where
import Data.Char
import Data.Int
import Data.List
import Data.Array
import qualified Data.ByteString as L
import qualified Data.ByteString.Char8 as C
import Text.Regex.TDFA
main = do
--contents <- L.readFile "../data/dbo.AgentActivityLog.sql.clean.unix"
contents <- L.readFile "../data/dbo.CallDetail.clean"
let n = head $ L.unpack $ C.singleton '\n'
let r = head $ L.unpack $ C.singleton '\r'
let pv = head $ L.unpack $ C.singleton ';'
--let myLines = L.split n contents
putStrLn "before splitCrlf"
let myLines = splitCrlf [] contents
putStrLn "after splitCrlf"
let myParsedLines = map parseLine2 myLines
putStrLn "after map parseLine2"
let myParsedEndedLines = map (\bs -> L.snoc (L.snoc bs pv) n ) myParsedLines
putStrLn "after map snoc"
let result = L.concat myParsedEndedLines
putStrLn "after concat"
--L.writeFile "../data/dbo.AgentActivityLog.sql.clean.unix.dated" result
L.writeFile "../data/dbo.CallDetail.clean.dated.sql" result
--parseFile inFile outFile =
-- hIn = mkFileHandle "../data/dbo.CallDetail.clean" ReadMode
splitCrlf :: [L.ByteString] -> L.ByteString -> [L.ByteString]
splitCrlf acc line =
let pos = searchCrlf line
in if pos == -1
then acc ++ [line]
else if pos == 0
then splitCrlf (acc ++ [L.empty]) (L.drop 2 line)
else splitCrlf (acc ++ [L.take pos line]) (L.drop (pos+2) line)
searchCrlf :: L.ByteString -> Int
searchCrlf line = fromIntegral $ searchCrlf_ 0 line
searchCrlf_ :: Int -> L.ByteString -> Int
searchCrlf_ cpt line =
let cr = fromIntegral (ord '\r')
lf = fromIntegral (ord '\n')
posCr = L.elemIndex cr line
in case posCr of Nothing -> -1
Just pos -> if (L.index line (pos+1)) == lf
then pos
else searchCrlf_ (cpt+pos) (L.drop pos line)
parseLine2 :: L.ByteString -> L.ByteString
parseLine2 line =
-- pattern of type '20110629 05:00:25:000' 213 6 51 01 76 22
let timestampPattern = "'[0-9]{8} ([0-9]{2}:){3}[0-9]{3}'"
lOffLen = getAllMatches (line =~ timestampPattern :: AllMatches [] (Int, Int))
-- for each timestamp that matches in a line, insert -
-- we start from the last pattern that match to be able to modify the line using all
-- the (off, len) (if we start from begin, the (off,len) are nor more good after 1st replace)
in fst $ mapAccumL replacePatternInLine line (reverse lOffLen)
replacePatternInLine :: L.ByteString -> (Int, Int) -> (L.ByteString, (Int, Int))
replacePatternInLine line (off_, len_) =
let tiret = C.singleton '-'
off = fromIntegral off_
slice from to l = L.take (to-from+1) (L.drop from l)
-- insert tiret in 2011-06-29, and remove milliseconds :000
in (L.concat [L.take (off+5) line, tiret, slice (off+5) (off+6) line, tiret, slice (off+7) (off+17) line, L.drop (off+22) line], (off_, len_))