230 lines
6.2 KiB
C++
230 lines
6.2 KiB
C++
// Copyright 2014 Google Inc. All rights reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include <stdio.h>
|
|
#include <time.h> // time
|
|
|
|
#include <cassert> // assert
|
|
#include <cstdlib> // strtol, strtof
|
|
#include <iostream>
|
|
#include <vector>
|
|
|
|
#include "encoder.h"
|
|
#include "libc_rand_impl.h"
|
|
#include "unix_kernel_rand_impl.h"
|
|
#include "openssl_hash_impl.h"
|
|
|
|
// Like atoi, but with basic (not exhaustive) error checking.
|
|
bool StringToInt(const char* s, int* result) {
|
|
bool ok = true;
|
|
char* end; // mutated by strtol
|
|
|
|
*result = strtol(s, &end, 10); // base 10
|
|
// If strol didn't consume any characters, it failed.
|
|
if (end == s) {
|
|
ok = false;
|
|
}
|
|
return ok;
|
|
}
|
|
|
|
// Like atof, but with basic (not exhaustive) error checking.
|
|
bool StringToFloat(const char* s, float* result) {
|
|
bool ok = true;
|
|
char* end; // mutated by strtof
|
|
|
|
*result = strtof(s, &end);
|
|
// If strof didn't consume any characters, it failed.
|
|
if (end == s) {
|
|
ok = false;
|
|
}
|
|
return ok;
|
|
}
|
|
|
|
// Copy a report into a string, which can go in a protobuf.
|
|
void BitsToString(rappor::Bits b, std::string* output, int num_bytes) {
|
|
output->assign(num_bytes, '\0');
|
|
for (int i = 0; i < num_bytes; ++i) {
|
|
// "little endian" string
|
|
(*output)[i] = b & 0xFF; // last byte
|
|
b >>= 8;
|
|
}
|
|
}
|
|
|
|
// Print a report, with the most significant bit first.
|
|
void PrintBitString(const std::string& s) {
|
|
for (int i = s.size() - 1; i >= 0; --i) {
|
|
unsigned char byte = s[i];
|
|
for (int j = 7; j >= 0; --j) {
|
|
bool bit = byte & (1 << j);
|
|
std::cout << (bit ? "1" : "0");
|
|
}
|
|
}
|
|
}
|
|
|
|
int main(int argc, char** argv) {
|
|
if (argc != 7) {
|
|
rappor::log(
|
|
"Usage: rappor_encode <num bits> <num hashes> <num cohorts> p q f");
|
|
exit(1);
|
|
}
|
|
|
|
int num_bits, num_hashes, num_cohorts;
|
|
float prob_p, prob_q, prob_f;
|
|
|
|
bool ok1 = StringToInt(argv[1], &num_bits);
|
|
bool ok2 = StringToInt(argv[2], &num_hashes);
|
|
bool ok3 = StringToInt(argv[3], &num_cohorts);
|
|
|
|
bool ok4 = StringToFloat(argv[4], &prob_p);
|
|
bool ok5 = StringToFloat(argv[5], &prob_q);
|
|
bool ok6 = StringToFloat(argv[6], &prob_f);
|
|
|
|
if (!ok1) {
|
|
rappor::log("Invalid number of bits: '%s'", argv[1]);
|
|
exit(1);
|
|
}
|
|
if (!ok2) {
|
|
rappor::log("Invalid number of hashes: '%s'", argv[2]);
|
|
exit(1);
|
|
}
|
|
if (!ok3) {
|
|
rappor::log("Invalid number of cohorts: '%s'", argv[3]);
|
|
exit(1);
|
|
}
|
|
if (!ok4) {
|
|
rappor::log("Invalid float p: '%s'", argv[4]);
|
|
exit(1);
|
|
}
|
|
if (!ok5) {
|
|
rappor::log("Invalid float q: '%s'", argv[5]);
|
|
exit(1);
|
|
}
|
|
if (!ok6) {
|
|
rappor::log("Invalid float f: '%s'", argv[6]);
|
|
exit(1);
|
|
}
|
|
|
|
rappor::Params params(num_bits, num_hashes, num_cohorts, prob_f, prob_p,
|
|
prob_q);
|
|
|
|
//rappor::log("k: %d, h: %d, m: %d", params.num_bits(), params.num_hashes(), params.num_cohorts());
|
|
//rappor::log("f: %f, p: %f, q: %f", prob_f, prob_p, prob_q);
|
|
|
|
int num_bytes = params.num_bits() / 8;
|
|
|
|
// TODO: Add a flag for
|
|
// - -r libc / kernel
|
|
// - -c openssl / nacl crpto
|
|
|
|
rappor::IrrRandInterface* irr_rand;
|
|
if (false) {
|
|
FILE* fp = fopen("/dev/urandom", "r");
|
|
irr_rand = new rappor::UnixKernelRand(fp);
|
|
} else {
|
|
int seed = time(NULL);
|
|
srand(seed); // seed with nanoseconds
|
|
irr_rand = new rappor::LibcRand();
|
|
}
|
|
|
|
std::string line;
|
|
|
|
// CSV header
|
|
std::cout << "client,cohort,bloom,prr,irr\n";
|
|
|
|
// Consume header line
|
|
std::getline(std::cin, line);
|
|
if (line != "client,cohort,value") {
|
|
rappor::log("Expected CSV header 'client,cohort,value'");
|
|
return 1;
|
|
}
|
|
|
|
while (true) {
|
|
std::getline(std::cin, line); // no trailing newline
|
|
// rappor::log("Got line %s", line.c_str());
|
|
|
|
if (line.empty()) {
|
|
break; // EOF
|
|
}
|
|
|
|
size_t comma1_pos = line.find(',');
|
|
if (comma1_pos == std::string::npos) {
|
|
rappor::log("Expected , in line '%s'", line.c_str());
|
|
return 1;
|
|
}
|
|
size_t comma2_pos = line.find(',', comma1_pos + 1);
|
|
if (comma2_pos == std::string::npos) {
|
|
rappor::log("Expected second , in line '%s'", line.c_str());
|
|
return 1;
|
|
}
|
|
|
|
// The C++ API substr(pos, length) not (pos, end)
|
|
|
|
// everything before comma
|
|
std::string client_str = line.substr(0, comma1_pos);
|
|
// everything between first and second comma.
|
|
// TODO(andychu): Remove unused second column.
|
|
std::string unused = line.substr(comma1_pos + 1, comma2_pos-comma1_pos);
|
|
// everything after
|
|
std::string value = line.substr(comma2_pos + 1);
|
|
|
|
rappor::Deps deps(rappor::Md5, client_str /*client_secret*/,
|
|
rappor::HmacSha256, *irr_rand);
|
|
|
|
// For now, construct a new encoder every time. We could construct one for
|
|
// each client. We are simulating many clients reporting the same metric,
|
|
// so the encoder ID is constant.
|
|
rappor::Encoder e("metric-name", params, deps);
|
|
|
|
// rappor::log("CLIENT %s VALUE %s COHORT %d", client_str.c_str(),
|
|
// value.c_str(), cohort);
|
|
|
|
rappor::Bits bloom;
|
|
rappor::Bits prr;
|
|
rappor::Bits irr;
|
|
bool ok = e._EncodeStringInternal(value, &bloom, &prr, &irr);
|
|
|
|
// NOTE: Are there really encoding errors?
|
|
if (!ok) {
|
|
rappor::log("Error encoding string %s", line.c_str());
|
|
break;
|
|
}
|
|
|
|
std::string bloom_str;
|
|
BitsToString(bloom, &bloom_str, num_bytes);
|
|
|
|
std::string prr_str;
|
|
BitsToString(prr, &prr_str, num_bytes);
|
|
|
|
std::string irr_str;
|
|
BitsToString(irr, &irr_str, num_bytes);
|
|
|
|
// Output CSV row.
|
|
|
|
std::cout << client_str;
|
|
std::cout << ',';
|
|
std::cout << e.cohort(); // cohort the encoder assigned
|
|
std::cout << ',';
|
|
PrintBitString(bloom_str);
|
|
std::cout << ',';
|
|
PrintBitString(prr_str);
|
|
std::cout << ',';
|
|
PrintBitString(irr_str);
|
|
|
|
std::cout << "\n";
|
|
}
|
|
|
|
// Cleanup
|
|
delete irr_rand;
|
|
}
|