scrambledb/
join.rs

1//! # Pseudonym Conversion
2use hacspec_lib::Randomness;
3use oprf::coprf::coprf_setup::BlindingPublicKey;
4
5use crate::{
6    data_transformations::{blind_pseudonymized_datum, convert_blinded_datum},
7    data_types::{BlindedPseudonymizedData, PseudonymizedData},
8    error::Error,
9    setup::{ConverterContext, StoreContext, StoreEncryptionKey},
10    table::Table,
11    SECPAR_BYTES,
12};
13
14/// ## Blinding Pseudonymous Tables
15///
16/// Prepare a table of pseudonymous data values for join conversion by applying
17/// the blinding operation on each entry and shuffling the result.
18///
19/// Inputs:
20/// - `store_context`: The data store's pseudonymization context
21/// - `ek_receiver`: The receiver's public encryption key
22/// - `bpk_receiver`: The receiver's public blinding key
23/// - `pseudonymized_table`: A table of pseudonymous data values
24/// - `randomness`: Random bytes
25///
26/// Outputs:
27/// A table of blinded pseudonymous data values.
28pub fn blind_pseudonymous_table(
29    store_context: &StoreContext,
30    bpk_receiver: BlindingPublicKey,
31    ek_receiver: &StoreEncryptionKey,
32    pseudonymized_table: Table<PseudonymizedData>,
33    randomness: &mut Randomness,
34) -> Result<Table<BlindedPseudonymizedData>, Error> {
35    let mut blinded_data = pseudonymized_table
36        .data()
37        .iter()
38        .map(|entry| {
39            blind_pseudonymized_datum(store_context, &bpk_receiver, ek_receiver, entry, randomness)
40        })
41        .collect::<Result<Vec<BlindedPseudonymizedData>, Error>>()?;
42
43    blinded_data.sort();
44    Ok(Table::new(
45        pseudonymized_table.identifier().into(),
46        blinded_data,
47    ))
48}
49
50pub fn join_identifier(identifier: String) -> String {
51    let mut join_identifier = identifier;
52    join_identifier.push('-');
53    join_identifier.push_str("Join");
54    join_identifier
55}
56
57/// ## Oblivious Conversion
58///
59/// Obliviously convert a table of blinded pseudonymous data values to fresh
60/// join-pseudonyms by applying the pseudonym conversion transformation to
61/// each entry and shuffling the result.
62///
63/// Inputs:
64/// - `converter_context`: The Converter's coPRF conversion context
65/// - `bpk_receiver`: The receiver's public blinding key
66/// - `ek_receiver`: The receiver's public encryption key
67/// - `table`: A table of blinded pseudonymous data values
68/// - `randomness`: Random bytes
69///
70/// Outputs:
71/// A table of consistently join-pseudonymized data values.
72pub fn convert_blinded_table(
73    converter_context: &ConverterContext,
74    bpk_receiver: BlindingPublicKey,
75    ek_receiver: &StoreEncryptionKey,
76    table: Table<BlindedPseudonymizedData>,
77    randomness: &mut Randomness,
78) -> Result<Table<BlindedPseudonymizedData>, Error> {
79    let conversion_target = randomness.bytes(SECPAR_BYTES)?.to_owned();
80    let mut converted_data = table
81        .data()
82        .iter()
83        .map(|entry| {
84            convert_blinded_datum(
85                &converter_context.coprf_context,
86                &bpk_receiver,
87                ek_receiver,
88                &conversion_target,
89                entry,
90                randomness,
91            )
92        })
93        .collect::<Result<Vec<BlindedPseudonymizedData>, Error>>()?;
94
95    converted_data.sort();
96
97    Ok(Table::new(table.identifier().into(), converted_data))
98}
99
100#[cfg(test)]
101mod tests {
102    use std::collections::HashSet;
103
104    use crate::{setup::StoreContext, test_util::generate_plain_table};
105
106    use super::*;
107
108    #[test]
109    fn test_join_full() {
110        use rand::prelude::*;
111
112        let mut rng = rand::thread_rng();
113        let mut randomness = [0u8; 1000000];
114        rng.fill_bytes(&mut randomness);
115        let mut randomness = Randomness::new(randomness.to_vec());
116
117        let converter_context = ConverterContext::setup(&mut randomness).unwrap();
118        let lake_context = StoreContext::setup(&mut randomness).unwrap();
119
120        // == Generate Plain Table ==
121        let plain_table = generate_plain_table();
122
123        let (lake_ek, lake_bpk) = lake_context.public_keys();
124
125        // == Blind Table for Pseudonymization ==
126        let blind_table = crate::split::blind_orthonymous_table(
127            &lake_ek,
128            lake_bpk,
129            plain_table.clone(),
130            &mut randomness,
131        )
132        .unwrap();
133
134        // == Blind Pseudonymized Table ==
135        let converted_tables = crate::split::pseudonymize_blinded_table(
136            &converter_context,
137            lake_bpk,
138            &lake_ek,
139            blind_table,
140            &mut randomness,
141        )
142        .unwrap();
143
144        // == Unblinded Pseudonymized Table ==
145        let lake_tables =
146            crate::finalize::finalize_blinded_table(&lake_context, converted_tables).unwrap();
147
148        let mut pseudonym_set = HashSet::new();
149
150        for entry in lake_tables.data() {
151            // store lake_pseudonyms for test against join pseudonyms
152            pseudonym_set.insert(entry.handle.clone());
153        }
154
155        // select first two lake tables for join
156        let join_table = Table::new(
157            "Join".into(),
158            lake_tables
159                .data()
160                .iter()
161                .filter_map(|entry| {
162                    if entry.data_value.attribute_name == "Address"
163                        || entry.data_value.attribute_name == "Favorite Color"
164                    {
165                        Some(entry.clone())
166                    } else {
167                        None
168                    }
169                })
170                .collect(),
171        );
172
173        let processor_context = StoreContext::setup(&mut randomness).unwrap();
174
175        let (ek_processor, bpk_processor) = processor_context.public_keys();
176        let blind_tables = crate::join::blind_pseudonymous_table(
177            &lake_context,
178            bpk_processor,
179            &ek_processor,
180            join_table,
181            &mut randomness,
182        )
183        .unwrap();
184
185        let converted_join_tables = crate::join::convert_blinded_table(
186            &converter_context,
187            bpk_processor,
188            &ek_processor,
189            blind_tables,
190            &mut randomness,
191        )
192        .unwrap();
193
194        let joined_tables =
195            crate::finalize::finalize_blinded_table(&processor_context, converted_join_tables)
196                .unwrap();
197
198        for entry in joined_tables.data() {
199            let mut lake_pseudonyms = pseudonym_set.clone();
200
201            // test if all pseudonyms are fresh compared to lake_pseudonyms
202
203            debug_assert!(
204                lake_pseudonyms.insert(entry.handle.clone()),
205                "Generated pseudonyms are not unique."
206            );
207
208            debug_assert!(
209                plain_table
210                    .data()
211                    .iter()
212                    .any(|entry| entry.data_value == entry.data_value),
213                "Data was not preserved during join."
214            );
215        }
216    }
217}