-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtestObfuscationSentiment.py
50 lines (35 loc) · 1.82 KB
/
testObfuscationSentiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import numpy as np
import pandas as pd
from typing import List
import multiprocessing as mp
from pypantera.src.AbstractTextObfuscationDPMechanism import AbstractTextObfuscationDPMechanism
from pypantera.src.utils.helper import createLogger, createParser, selectMechanism, saveResults
if __name__ == '__main__':
#create a logger
logger:object = createLogger()
#define the arguments parser
parser = createParser()
args:object = parser.parse_args()
#log the arguments
logger.info(f"Task to perform: {args.task}")
logger.info(f"Embeddings file path: {args.embPath}")
logger.info(f"Input file path: {args.inputPath}")
logger.info(f"Mechanism to use: {args.mechanism}")
logger.info(f"Epsilon values to use: {args.epsilons}")
#initialize the mechanisms
mechanisms:List[AbstractTextObfuscationDPMechanism] = selectMechanism(args, logger)
logger.info('Starting the obfuscation process...')
#define iterable
data:pd.DataFrame = pd.read_csv(args.inputPath, sep = ',')
sentiment = data['sentiment']
data = data[['id', 'text']]
#obfuscate the queries using multiprocessing
num_cores:int = mp.cpu_count()
with mp.Pool(num_cores) as pool:
results:List[pd.DataFrame] = pool.starmap(AbstractTextObfuscationDPMechanism.obfuscateText, [(mech, data, args.numberOfObfuscations) for mech in mechanisms])
logger.info(f"Obfuscation finished! {len(results)} results obtained (one for each epsilon).")
#results is a list of pandas dataframe, each dataframe contains the obfuscated queries for a specific epsilon value
logger.info('Saving the obfuscated queries to a csv file...')
#save the results to a csv file
saveResults(results, mechanisms, args, logger, sentiment)
logger.info("Program terminated successfully!")