scrambledb/
data_transformations.rs

1//! This module defines ScrambleDB transformations at the level of individual
2//! pieces of data as defined in [`data_types`](crate::data_types).
3//!
4//! These transformations are:
5//! - blinding identifiable and pseudonymous data
6//! - pseudonymizing blinded identifiable data
7//! - converting blinded pseudonymous data
8//! - finalizing blinded pseudonymous data
9
10use hacspec_lib::Randomness;
11use oprf::coprf::{
12    coprf_online::{blind, blind_convert, blind_evaluate, prepare_blind_convert},
13    coprf_setup::{derive_key, BlindingPublicKey, CoPRFEvaluatorContext},
14};
15
16use crate::{
17    data_types::*,
18    error::Error,
19    setup::{StoreContext, StoreEncryptionKey},
20};
21
22pub(crate) mod double_hpke;
23
24/// CoPRF context string for domain separation of intial pseudonymization.
25const PSEUDONYMIZATION_CONTEXT: &[u8] = b"CoPRF-Context-Pseudonymization";
26
27/// Blind an identifiable datum as a first step in initial pseudonym
28/// generation.
29///
30/// Inputs:
31/// - `bpk`: Receiver's blinding public key
32/// - `ek`: Receiver's public encryption key
33/// - `datum`: Identifiable data
34/// - `randomness`: Random bytes
35///
36/// Output:
37/// [Blinded data](crate::data_types::BlindedIdentifiableData) such that the
38/// datum's handle is blinded for CoPRF evaluation and the datum's value is
39/// level-1 encrypted.
40pub fn blind_identifiable_datum(
41    bpk: &BlindingPublicKey,
42    ek: &StoreEncryptionKey,
43    datum: &IdentifiableData,
44    randomness: &mut Randomness,
45) -> Result<BlindedIdentifiableData, Error> {
46    // Blind orthonym towards receiver.
47    let blinded_handle = BlindedIdentifiableHandle(blind(
48        *bpk,
49        datum.handle.as_bytes(),
50        PSEUDONYMIZATION_CONTEXT.to_vec(),
51        randomness,
52    )?);
53
54    // Encrypt data value towards receiver.
55    let encrypted_data_value = encrypt_data_value(&datum.data_value, ek, randomness)?;
56
57    Ok(BlindedIdentifiableData {
58        blinded_handle,
59        encrypted_data_value,
60    })
61}
62
63/// Encrypt a data value towards a data store.
64///
65/// Inputs:
66/// - `data`: The data value to encrypt.
67/// - `ek`: The receiver's public encryption key.
68/// - `randomness`: Random bytes
69///
70/// Output:
71/// A new [EncryptedDataValue], the encryption of `data`.
72fn encrypt_data_value(
73    data: &DataValue,
74    ek: &StoreEncryptionKey,
75    randomness: &mut Randomness,
76) -> Result<EncryptedDataValue, Error> {
77    let encrypted_data_value = double_hpke::hpke_seal_level_1(data, &ek.0, randomness)?;
78    Ok(encrypted_data_value)
79}
80
81/// Blind a pseudonymous datum as a first step in pseudonym
82/// conversion.
83///
84/// Inputs:
85/// - `store_context`: The data store's long term private state including the pseudonym
86///   hardening keys
87/// - `bpk`: Receiver's blinding public key
88/// - `ek`: Receiver's public encryption key
89/// - `datum`: Pseudonymized data
90/// - `randomness`: Random bytes
91///
92/// Output:
93/// [Blinded pseudonymized data](BlindedPseudonymizedData) such that the
94/// datum's handle is blinded for CoPRF conversion and the datum's value is
95/// level-1 encrypted.
96pub fn blind_pseudonymized_datum(
97    store_context: &StoreContext,
98    bpk: &BlindingPublicKey,
99    ek: &StoreEncryptionKey,
100    datum: &PseudonymizedData,
101    randomness: &mut Randomness,
102) -> Result<BlindedPseudonymizedData, Error> {
103    // Blind recovered raw pseudonym towards receiver.
104    let blinded_handle = BlindedPseudonymizedHandle(prepare_blind_convert(
105        *bpk,
106        store_context.recover_raw_pseudonym(datum.handle)?,
107        randomness,
108    )?);
109
110    // Encrypt data value towards receiver.
111    let encrypted_data_value = encrypt_data_value(&datum.data_value, ek, randomness)?;
112
113    Ok(BlindedPseudonymizedData {
114        blinded_handle,
115        encrypted_data_value,
116    })
117}
118
119/// Obliviously pseudonymmize a blinded identifiable datum.
120///
121/// Inputs:
122/// - `coprf_context`: The converter's CoPRF evaluation context
123/// - `bpk`: The receiver's blinding public key
124/// - `ek`: The receiver's public encryption key
125/// - `datum`: A blinded datum output by [`blind_identifiable_datum`]
126/// - `randomness`: Random bytes
127///
128/// Output:
129/// [Blinded pseudonymized data](BlindedPseudonymizedData) such that the
130///  datum's blinded handle has been obliviously evaluated to a pseudonym and
131///  the datum's value has been level-2 encrypted towards the receiver.
132pub fn pseudonymize_blinded_datum(
133    coprf_context: &CoPRFEvaluatorContext,
134    bpk: &BlindingPublicKey,
135    ek: &StoreEncryptionKey,
136    datum: &BlindedIdentifiableData,
137    randomness: &mut Randomness,
138) -> Result<BlindedPseudonymizedData, Error> {
139    let key = derive_key(
140        coprf_context,
141        datum.encrypted_data_value.attribute_name.as_bytes(),
142    )?;
143
144    // Obliviously generate raw pseudonym.
145    let blinded_handle = BlindedPseudonymizedHandle(blind_evaluate(
146        key,
147        *bpk,
148        datum.blinded_handle.0,
149        randomness,
150    )?);
151
152    // Rerandomize encrypted data value towards receiver.
153    let encrypted_data_value = rerandomize_encryption(&datum.encrypted_data_value, ek, randomness)?;
154
155    Ok(BlindedPseudonymizedData {
156        blinded_handle,
157        encrypted_data_value,
158    })
159}
160
161/// Rerandomize the encryption of an encrypted data value.
162///
163/// Inputs:
164/// - `data`: The encrypted data value.
165/// - `ek`: The receiver's public encryption key.
166/// - `randomness`: Random bytes
167///
168/// Output:
169/// A new, rerandomized [EncryptedDataValue].
170fn rerandomize_encryption(
171    data: &EncryptedDataValue,
172    ek: &StoreEncryptionKey,
173    randomness: &mut Randomness,
174) -> Result<EncryptedDataValue, Error> {
175    double_hpke::hpke_seal_level_2(data, &ek.0, randomness)
176}
177
178/// Obliviously convert a blinded pseudonymous datum to a given target pseudonym key.
179///
180/// Inputs:
181/// - `coprf_context`: The Converters CoPRF evaluation context
182/// - `bpk`: The receiver's blinding public key
183/// - `ek`: The receiver's public encryption key
184/// - `conversion_target`: Target pseudonym key identifier
185/// - `randomness`: Random bytes
186///
187/// Output:
188/// [Blinded pseudonymized data](BlindedPseudonymizedData)such that the
189/// datum's pseudonymous handle is converted to the target pseudonym key and
190/// the datum's value is level-2 encrypted towards the receiver.
191pub fn convert_blinded_datum(
192    coprf_context: &CoPRFEvaluatorContext,
193    bpk: &BlindingPublicKey,
194    ek: &StoreEncryptionKey,
195    conversion_target: &[u8],
196    datum: &BlindedPseudonymizedData,
197    randomness: &mut Randomness,
198) -> Result<BlindedPseudonymizedData, Error> {
199    // Re-derive original pseudonymization key.
200    let key_from = derive_key(
201        coprf_context,
202        datum.encrypted_data_value.attribute_name.as_bytes(),
203    )?;
204
205    // Derive target key.
206    let key_to = derive_key(coprf_context, conversion_target)?;
207
208    // Obliviously convert pseudonym.
209    let blinded_handle = BlindedPseudonymizedHandle(blind_convert(
210        *bpk,
211        key_from,
212        key_to,
213        datum.blinded_handle.0,
214        randomness,
215    )?);
216
217    // Rerandomize encrypted data value towards receiver.
218    let encrypted_data_value = rerandomize_encryption(&datum.encrypted_data_value, ek, randomness)?;
219
220    Ok(BlindedPseudonymizedData {
221        blinded_handle,
222        encrypted_data_value,
223    })
224}
225
226/// Finalize a blinded pseudonymous datum for storage or analysis.
227///
228/// Inputs:
229/// - `store_context`: The data store's long term private state including the
230///   receiver's coPRF unblinding key, private decryption key, as well as
231///   pseudonym hardening key
232/// - `datum`: blinded pseudonymous datum output by [`convert_blinded_datum`] or
233///   [`pseudonymize_blinded_datum`]
234///
235/// Output:
236/// [Pseudonymized data](PseudonymizedData) such that the datum's pseudonymous
237/// handle has been unblinded and hardened and the datum's value has been
238/// decrypted.
239pub fn finalize_blinded_datum(
240    store_context: &StoreContext,
241    datum: &BlindedPseudonymizedData,
242) -> Result<PseudonymizedData, Error> {
243    // Finalize pseudonym for storage.
244    let handle = store_context.finalize_pseudonym(datum.blinded_handle)?;
245
246    // Decrypt data value for storage.
247    let data_value = decrypt_data_value(&datum.encrypted_data_value, store_context)?;
248
249    Ok(PseudonymizedData { handle, data_value })
250}
251
252/// Decrypt an encrypted data value.
253///
254/// Inputs:
255/// - `data`: The value to decrypt.
256/// - `store_context`: The data store's long term private state, including in particular its decryption key.
257///
258/// Output:
259/// The decrypted [DataValue] or an [Error] on decryption failure.
260fn decrypt_data_value(
261    data: &EncryptedDataValue,
262    store_context: &StoreContext,
263) -> Result<DataValue, Error> {
264    double_hpke::hpke_open_level_2(data, &store_context.dk.0)
265}