scrambledb/
split.rs

1//! # Pseudonymization
2use hacspec_lib::Randomness;
3use oprf::coprf::coprf_setup::BlindingPublicKey;
4
5use crate::{
6    data_transformations::{blind_identifiable_datum, pseudonymize_blinded_datum},
7    data_types::{BlindedIdentifiableData, BlindedPseudonymizedData, IdentifiableData},
8    error::Error,
9    setup::{ConverterContext, StoreEncryptionKey},
10    table::Table,
11};
12
13/// ## Blinding Orthonymous Tables
14///
15/// Prepare a table of orthonymous data values for pseudonymization by applying
16/// the blinding operation on each entry and shuffling the result.
17///
18/// Inputs:
19/// - `ek_receiver`: The receiver's public encryption key
20/// - `bpk_receiver`: The receiver's public blinding key
21/// - `table`: A table of identifiable data values
22/// - `randomness`: Random bytes
23///
24/// Outputs:
25/// A table of blinded identifiable data.
26pub fn blind_orthonymous_table(
27    ek_receiver: &StoreEncryptionKey,
28    bpk_receiver: BlindingPublicKey,
29    table: Table<IdentifiableData>,
30    randomness: &mut Randomness,
31) -> Result<Table<BlindedIdentifiableData>, Error> {
32    let mut blinded_table_entries = table
33        .data()
34        .iter()
35        .map(|entry| blind_identifiable_datum(&bpk_receiver, ek_receiver, entry, randomness))
36        .collect::<Result<Vec<BlindedIdentifiableData>, Error>>()?;
37
38    blinded_table_entries.sort();
39
40    Ok(Table::new(table.identifier().into(), blinded_table_entries))
41}
42
43/// ## Oblivious Pseudonymization
44///
45/// Obliviously pseudonymize a table of blinded orthonymous data values by
46/// applying the oblivious pseudonymization operation on each entry and
47/// shuffling the result.
48///
49/// Inputs:
50/// - `converter_context`: The Converter's coPRF evaluation context
51/// - `ek_receiver`: The receiver's public encryption key
52/// - `bpk_receiver`: The receiver's public blinding key
53/// - `blinded_table`: A table of blinded identifiable data values
54/// - `randomness`: Random bytes
55///
56/// Outputs:
57/// A table of blinded pseudonymized data.
58pub fn pseudonymize_blinded_table(
59    converter_context: &ConverterContext,
60    bpk_receiver: BlindingPublicKey,
61    ek_receiver: &StoreEncryptionKey,
62    blinded_table: Table<BlindedIdentifiableData>,
63    randomness: &mut Randomness,
64) -> Result<Table<BlindedPseudonymizedData>, Error> {
65    let mut blinded_pseudonymized_entries = blinded_table
66        .data()
67        .iter()
68        .map(|entry| {
69            pseudonymize_blinded_datum(
70                &converter_context.coprf_context,
71                &bpk_receiver,
72                ek_receiver,
73                entry,
74                randomness,
75            )
76        })
77        .collect::<Result<Vec<BlindedPseudonymizedData>, Error>>()?;
78    blinded_pseudonymized_entries.sort();
79
80    Ok(Table::new(
81        blinded_table.identifier().into(),
82        blinded_pseudonymized_entries,
83    ))
84}
85
86#[cfg(test)]
87mod tests {
88    use std::collections::HashSet;
89
90    use crate::{setup::StoreContext, test_util::generate_plain_table};
91
92    use super::*;
93
94    #[test]
95    fn test_split_full() {
96        use rand::prelude::*;
97
98        let mut rng = rand::thread_rng();
99        let mut randomness = [0u8; 1000000];
100        rng.fill_bytes(&mut randomness);
101        let mut randomness = Randomness::new(randomness.to_vec());
102
103        let converter_context = ConverterContext::setup(&mut randomness).unwrap();
104        let lake_context = StoreContext::setup(&mut randomness).unwrap();
105
106        // == Generate Plain Table ==
107        let plain_table = generate_plain_table();
108
109        let (lake_ek, lake_bpk) = lake_context.public_keys();
110
111        // == Blind Table for Pseudonymization ==
112        let blind_table = crate::split::blind_orthonymous_table(
113            &lake_ek,
114            lake_bpk,
115            plain_table.clone(),
116            &mut randomness,
117        )
118        .unwrap();
119
120        // == Blind Pseudonymized Table ==
121        let converted_tables = crate::split::pseudonymize_blinded_table(
122            &converter_context,
123            lake_bpk,
124            &lake_ek,
125            blind_table,
126            &mut randomness,
127        )
128        .unwrap();
129
130        // == Unblinded Pseudonymized Table ==
131        let lake_tables =
132            crate::finalize::finalize_blinded_table(&lake_context, converted_tables).unwrap();
133
134        let mut pseudonym_set = HashSet::new();
135        // test that data is preserved
136        for pseudonymized_data in lake_tables.data() {
137            debug_assert!(
138                // plain_values.iter().any(|set| { *set == table_values }),
139                plain_table
140                    .data()
141                    .iter()
142                    .any(|entry| entry.data_value == pseudonymized_data.data_value),
143                "Data was not preserved during pseudonymization."
144            );
145
146            // test if all pseudonyms are unique
147            debug_assert!(
148                pseudonym_set.insert(pseudonymized_data.handle.clone()),
149                "Generated pseudonyms are not unique."
150            );
151        }
152    }
153}