-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_ours_restaurant.R
53 lines (45 loc) · 2.26 KB
/
run_ours_restaurant.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
library(furrr)
plan(multisession(workers=4))
library(exchanger)
library(tidyverse)
source("util.R")
source("run_ours.R")
setwd("./datasets")
source("load_restaurant.R")
setwd("../")
snbinom_var <- n_records^2
snbinom_size <- (n_records - 1)^2 / (snbinom_var - n_records + 1)
snbinom_prob <- (n_records - 1) / snbinom_var
expt_configs = list(
list(name = "coupon", clust_prior = GeneralizedCouponRP(ShiftedNegBinomRV(snbinom_size, snbinom_prob), GammaRV(1, 1/100))),
list(name = "py", clust_prior = PitmanYorRP(GammaRV(1, 1/100), BetaRV(1, 1))),
list(name = "ewens", clust_prior = EwensRP(GammaRV(1, 1/100))),
list(name = "blinkcoupon", clust_prior = GeneralizedCouponRP(nrow(records), Inf))
)
future_map(expt_configs, function(e) {
distort_prior <- BetaRV(1, 4)
dist_1 <- function(x, y) {
x <- strsplit(x, '\\s+')
y <- strsplit(y, '\\s+')
FuzzyTokenSet(Abbreviation(), deletion = 0.3, insertion = 0.5)(x, y)
}
attr_params <- c(
"name" = Attribute(transform_dist_fn(dist_1, 3.0, scaling_factor = 10.0),
distort_prob_prior = distort_prior,
distort_dist_prior = DirichletProcess(GammaRV(2, 1e-4)),
entity_dist_prior = DirichletRV(1.0)),
"addr" = Attribute(transform_dist_fn(dist_1, 3.0, scaling_factor = 10.0),
distort_prob_prior = distort_prior,
distort_dist_prior = DirichletProcess(GammaRV(2, 1e-4)),
entity_dist_prior = DirichletRV(1.0)),
"city" = CategoricalAttribute(distort_prob_prior = distort_prior,
distort_dist_prior = DirichletProcess(GammaRV(2, 1e-4)),
entity_dist_prior = DirichletRV(1.0)),
"type" = CategoricalAttribute(distort_prob_prior = distort_prior,
distort_dist_prior = DirichletProcess(GammaRV(2, 1e-4)),
entity_dist_prior = DirichletRV(1.0))
)
model <- exchanger(records, attr_params, e$clust_prior)
expt_name <- paste0("restaurant_ours_", e$name, "_", gsub("[ :]", "_", date()))
run_ours(expt_name, model, true_membership, n_samples = 10000, burnin_interval = 100000)
}, .options = furrr_options(packages=c("comparator", "exchanger", "clevr")))