Delegated Private Matching for Compute (DPMC) (#116)

Summary: ## Types of changes - [ ] Bug fix (non-breaking change which fixes an issue) - [x] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Docs change / refactoring / dependency upgrade ## Motivation and Context / Related issue 1) Extending multi-key Private-ID to Delegated Private Matching for Compute (DPMC) which supports multiple partners with the help of a helper/delegate server. * Company has a csv with identifiers (e.g., `etc/example/dpmc/Ex0_company.csv`). * Partner has a csv with identifiers (e.g., `etc/example/dpmc/Ex0_partner_1.csv`) and a csv with features (e.g., `etc/example/dpmc/Ex0_partner_1_features.csv`). 2) Extending DPMC to Delegated Private Matching for Compute with Secure Shuffling (DSPMC) which supports multiple partners with the help of two helpers/delegate servers. * Company has a csv with identifiers (e.g., `etc/example/dspmc/Ex0_company.csv`). * Partner has a csv with identifiers (e.g., `etc/example/dspmc/Ex0_partner_1.csv`) and a csv with features (e.g., `etc/example/dspmc/Ex0_partner_1_features.csv`). ## How Has This Been Tested (if it applies) Tests are located in `etc/example/dpmc` and `etc/example/dspmc`. ## Checklist - [x] The documentation is up-to-date with the changes I made. - [x] I have read the **CONTRIBUTING** document and completed the CLA (see **CONTRIBUTING**). - [x] All tests passed, and additional code has been covered with new tests. Pull Request resolved: #116 Reviewed By: shubho Differential Revision: D50680360 Pulled By: prasadbuddhavarapu fbshipit-source-id: 3f82635e61ee86c8041864d3bbb3f293df3996d8
facebookresearch · Nov 14, 2023 · 26b08dd · 26b08dd
1 parent b27743a
commit 26b08dd
Show file tree

Hide file tree

Showing 94 changed files with 8,743 additions and 111 deletions.
diff --git a/.gitignore b/.gitignore
@@ -72,6 +72,7 @@ gens
 *.der
 *.srl
 *.seq
+*.cnf
 
 #latex artifacts
 *.aux

diff --git a/Dockerfile b/Dockerfile
@@ -27,8 +27,14 @@ cp bin/release/pjc-client exec && \
 cp bin/release/pjc-server exec && \
 cp bin/release/datagen exec && \
 cp bin/release/private-id-multi-key-server exec && \
-cp bin/release/private-id-multi-key-client exec
-
+cp bin/release/private-id-multi-key-client exec && \
+cp bin/release/dpmc-company-server exec && \
+cp bin/release/dpmc-helper exec && \
+cp bin/release/dpmc-partner-server exec && \
+cp bin/release/dspmc-company-server exec && \
+cp bin/release/dspmc-helper-server exec && \
+cp bin/release/dspmc-partner-server exec && \
+cp bin/release/dspmc-shuffler exec
 
 # thin container with binaries
 # base image is taken from here https://hub.docker.com/_/debian/

diff --git a/README.md b/README.md
diff --git a/common/Cargo.toml b/common/Cargo.toml
@@ -14,7 +14,7 @@ path = "datagen/datagen.rs"
 [dependencies]
 log = "0.4"
 env_logger = "0.7.1"
-rayon = "1.3.0"
+rayon = "1.8.0"
 clap = "2.33.0"
 csv = "1.1.1"
 rand = { version = "0.8", features = ["small_rng"] }
@@ -23,10 +23,10 @@ hex = "0.3.0"
 serde = {version = "1.0.104", features = ["derive"] }
 num = "0.2.1"
 wasm-timer = "0.2.5"
-aws-config = "0.54.1"
-aws-credential-types = "0.54.1"
-aws-sdk-s3 = "0.24.0"
-aws-smithy-http = "0.54.0"
+aws-config = "0.56.1"
+aws-credential-types = "0.56.1"
+aws-sdk-s3 = "0.34.0"
+aws-smithy-http = "0.56.0"
 lazy_static = "1.4.0"
 regex = "1.5.4"
 tempfile = "3.2.0"

diff --git a/common/datagen/datagen.rs b/common/datagen/datagen.rs
@@ -19,7 +19,7 @@ pub mod gen {
         pub player_a: Vec<String>,
         pub player_a_values: Option<Vec<u32>>,
         pub player_b: Vec<String>,
-        pub player_b_values: Option<Vec<u32>>,
+        pub player_b_values: Option<Vec<String>>,
     }
 
     pub fn random_data(
@@ -40,11 +40,15 @@ pub mod gen {
         player_b.extend_from_slice(&intersection);
         player_b.shuffle(&mut rng);
 
+        let player_b_features = (0..(player_a_size + intersection_size))
+            .map(|_| random_u8().to_string())
+            .collect::<Vec<String>>();
+
         Data {
             player_a,
             player_b,
             player_a_values: None,
-            player_b_values: None,
+            player_b_values: Some(player_b_features),
         }
     }
 
@@ -76,6 +80,12 @@ pub mod gen {
         s
     }
 
+    fn random_u8() -> u8 {
+        let mut r = thread_rng();
+        let s: u8 = r.gen();
+        s
+    }
+
     pub fn write_slice_to_file(source: &[String], cols: usize, path: &str) -> Result<(), String> {
         use indicatif::ProgressBar;
 
@@ -132,6 +142,14 @@ fn main() {
                 .takes_value(true)
                 .default_value("0"),
         )
+        .arg(
+            Arg::with_name("features")
+                .short("f")
+                .long("features")
+                .value_name("FEATURES")
+                .help("number of features")
+                .takes_value(false),
+        )
         .get_matches();
 
     let size = matches
@@ -145,16 +163,23 @@ fn main() {
         .unwrap()
         .parse::<usize>()
         .expect("size param");
+
+    let gen_features = matches.is_present("features");
     let dir = matches.value_of("dir").unwrap_or("./");
 
     let fn_a = format!("{}/input_{}_size_{}_cols_{}.csv", dir, "a", size, cols);
     let fn_b = format!("{}/input_{}_size_{}_cols_{}.csv", dir, "b", size, cols);
+    let fn_b_features = format!(
+        "{}/input_{}_size_{}_cols_{}_features.csv",
+        dir, "b", size, cols
+    );
 
     info!("Generating output of size {}", size);
     info!("Player a output: {}", fn_a);
     info!("Player b output: {}", fn_b);
+    info!("Player b features: {}", fn_b_features);
 
-    let intrsct = size / 2 as usize;
+    let intrsct = size / 2_usize;
     let size_player = size - intrsct;
     let data = gen::random_data(size_player, size_player, intrsct);
     info!("Data generation done, writing to files");
@@ -164,6 +189,11 @@ fn main() {
     gen::write_slice_to_file(&data.player_b, cols, &fn_b).unwrap();
     info!("File {} finished", fn_b);
 
+    if gen_features {
+        gen::write_slice_to_file(&data.player_b_values.unwrap(), 0, &fn_b_features).unwrap();
+        info!("File {} finished", fn_b_features);
+    }
+
     info!("Bye!");
 }
 

diff --git a/common/src/files.rs b/common/src/files.rs
@@ -88,6 +88,32 @@ where
     .collect::<Vec<Vec<String>>>()
 }
 
+/// Reads CSV file into vector of rows,
+/// where each row is represented as a vector of u64
+/// All zero length fields are removed
+pub fn read_csv_as_u64<T>(filename: T) -> Vec<Vec<u64>>
+where
+    T: AsRef<Path>,
+{
+    let mut reader = csv::ReaderBuilder::new()
+        .delimiter(b',')
+        .flexible(false)
+        .has_headers(false)
+        .from_path(filename)
+        .expect("Failure reading CSV file");
+
+    let it = reader.records();
+    it.map(|x| {
+        x.unwrap()
+            .iter()
+            .map(|z| {
+                u64::from_str(z.trim()).unwrap_or_else(|_| panic!("Cannot format {} as u64", z))
+            })
+            .collect::<Vec<u64>>()
+    })
+    .collect::<Vec<Vec<u64>>>()
+}
+
 /// Reads CSV file into vector of rows,
 /// where each row is a first as key, and then as interger-like values
 pub fn read_csv_as_keyed_nums<T>(filename: T, has_headers: bool) -> Vec<KeyedNums<u64>>

diff --git a/crypto/Cargo.toml b/crypto/Cargo.toml
@@ -19,7 +19,7 @@ rand = "0.8"
 rand_core = "0.5.1"
 curve25519-dalek = "3.2"
 Cupcake = { git = "https://github.com/facebookresearch/Cupcake"}
-rayon = "1.3.0"
+rayon = "1.8.0"
 serde = {version = "1.0.104", features = ["derive"] }
 bincode = "1.2.1"
 num-bigint = { version = "0.4", features = ["rand"] }

diff --git a/crypto/src/prelude.rs b/crypto/src/prelude.rs
@@ -8,6 +8,7 @@ pub use curve25519_dalek::ristretto::RistrettoPoint;
 pub use curve25519_dalek::scalar;
 pub use curve25519_dalek::scalar::Scalar;
 pub use curve25519_dalek::traits::Identity;
+pub use curve25519_dalek::traits::IsIdentity;
 
 pub use crate::spoint::ByteBuffer;