1 #ifndef SRC_SKETCH_GRAPH_SKETCH_H_
2 #define SRC_SKETCH_GRAPH_SKETCH_H_
4 #include "node_sketch.h"
29 const std::vector<int>* nodes_id = NULL) {
31 should_calc_z_value_ =
false;
32 nodes_ads_.resize(max_node_id);
33 reserve_size_ = K_ * log2(max_node_id);
34 prunning_thresholds_.resize(max_node_id);
35 CreateNodesDistribution(max_node_id, NULL, nodes_id);
38 int GetK()
const {
return K_; }
42 should_calc_z_value_ = should_calc;
50 if (all_distance_sketch.has_configuration()) {
51 K_ = all_distance_sketch.configuration().k();
53 if (all_distance_sketch.has_configuration()) {
54 should_calc_z_value_ = all_distance_sketch.configuration().should_calc_z_values();
57 int num_nodes = all_distance_sketch.node_thresholds_size();
58 prunning_thresholds_.resize(num_nodes);
59 nodes_random_id_.resize(num_nodes);
60 if (LoadThresholdsAndRandomId(all_distance_sketch) ==
false) {
61 LOG_M(NOTICE,
"Unable to load threshold and random ID");
64 reserve_size_ = K_ * log2(prunning_thresholds_.size());
65 if (LoadNodesSketch(all_distance_sketch) ==
false) {
66 LOG_M(NOTICE,
"Unable to load nodes NodeSketch");
70 CalculateAllDistanceNeighborhood();
76 all_distance_sketch->mutable_configuration()->set_k(K_);
78 SaveThresholdAndRandomId(all_distance_sketch);
80 SaveNodesSketch(all_distance_sketch);
86 int num_nodes = all_distance_sketch.node_thresholds_size();
87 prunning_thresholds_.resize(num_nodes);
88 nodes_random_id_.resize(num_nodes);
90 for (
int i=0; i < num_nodes; i++) {
91 if (all_distance_sketch.node_thresholds(i).has_node_id() ==
false ||
92 all_distance_sketch.node_thresholds(i).has_threshold() ==
false ||
93 all_distance_sketch.node_thresholds(i).has_random_id() ==
false){
94 LOG_M(NOTICE,
"Load from bad file!!!, missing data " <<
95 " node id == " << all_distance_sketch.node_thresholds(i).has_node_id() <<
96 " threshold == " << all_distance_sketch.node_thresholds(i).has_threshold() <<
97 " random id == " << all_distance_sketch.node_thresholds(i).has_random_id());
100 node_id = all_distance_sketch.node_thresholds(i).node_id();
101 if (node_id >= prunning_thresholds_.size() ||
102 node_id >= nodes_random_id_.size()) {
103 prunning_thresholds_.resize(node_id);
104 nodes_random_id_.resize(node_id);
106 prunning_thresholds_[node_id].SetDistance(
107 all_distance_sketch.node_thresholds(i).threshold());
108 nodes_random_id_[node_id] =
109 all_distance_sketch.node_thresholds(i).random_id();
111 ExtractSortedVersionFromDist();
115 bool LoadNodesSketch(
const AllDistanceSketchGpb& all_distance_sketch) {
116 for (
int i=0; i < all_distance_sketch.nodes_sketches_size(); i++) {
117 if (all_distance_sketch.nodes_sketches(i).has_node_id() ==
false ||
118 all_distance_sketch.nodes_sketches(i).has_node_random_id() ==
false) {
120 "Loading nodes NodeSketch, Load from bad file!!! missing "
123 << all_distance_sketch.nodes_sketches(i).has_node_id()
125 << all_distance_sketch.nodes_sketches(i)
126 .has_node_random_id());
130 int node_id = all_distance_sketch.nodes_sketches(i).node_id();
131 double node_random_id = all_distance_sketch.nodes_sketches(i).node_random_id();
132 if (node_id >= nodes_ads_.size()) {
133 nodes_ads_.resize(node_id + 1);
136 nodes_ads_[node_id].InitNodeSketch(K_, node_id, node_random_id,
137 &prunning_thresholds_,
139 should_calc_z_value_);
143 k < all_distance_sketch.nodes_sketches(i).node_in_sketch_size();
145 const NodeSummaryDetailsGpb& node_in_sketch = \
146 all_distance_sketch.nodes_sketches(i).node_in_sketch(k);
147 if (node_in_sketch.has_node_id() ==
false ||
148 node_in_sketch.has_distance() ==
false) {
151 "Loading nodes NodeSketch, Load from bad file!!! missing "
153 <<
" node id == " << node_in_sketch.has_node_id()
154 <<
" distance == " << node_in_sketch.has_distance());
157 int sketch_node_id = node_in_sketch.node_id();
158 graph::EdgeWeight distance = node_in_sketch.distance();
160 nodes_ads_[node_id].nodes_id_distance_.push_back(
161 node_in_sketch_details);
165 j < all_distance_sketch.nodes_sketches(i).z_value_size(); ++j) {
166 const ZValuesGpb& z_value_in_sketch = \
167 all_distance_sketch.nodes_sketches(i).z_value(j);
168 graph::EdgeWeight distance = z_value_in_sketch.distance();
169 int node_id = z_value_in_sketch.node_id();
173 nodes_ads_[node_id].set_z_values(&z);
175 SetDisributionToNodes();
179 void SaveThresholdAndRandomId(AllDistanceSketchGpb* all_distance_sketch) {
180 if (nodes_random_id_.size() != prunning_thresholds_.size()) {
181 LOG_M(NOTICE,
" PROBLEM threshold and random ids are not equal ");
184 for (
int i = 0; i < prunning_thresholds_.size(); i++) {
185 NodeThresholdGpb *node_threshold = all_distance_sketch->add_node_thresholds();
186 node_threshold->set_node_id(i);
187 node_threshold->set_threshold(prunning_thresholds_[i].GetDistance());
188 node_threshold->set_random_id(nodes_random_id_[i]);
192 void SaveNodesSketch(AllDistanceSketchGpb* all_distance_sketch) {
193 for (
int i = 0; i < nodes_random_id_.size(); i++) {
194 if (nodes_ads_[i].IsInit() ==
false) {
198 SingleNodeSketchGpb* single_node_sketch = all_distance_sketch->add_nodes_sketches();
199 single_node_sketch->set_node_id(i);
200 single_node_sketch->set_node_random_id(nodes_random_id_[i]);
203 &(nodes_ads_[i].nodes_id_distance_);
204 for (
int j=0; j < nodes_in_sketch->size(); j++) {
205 NodeSummaryDetailsGpb * node_details = single_node_sketch->add_node_in_sketch();
206 node_details->set_node_id((*nodes_in_sketch)[j].GetNId());
207 node_details->set_distance((*nodes_in_sketch)[j].GetDistance());
209 for (
const auto zvalue : nodes_ads_[i].GetZValues() ) {
210 ZValuesGpb* z_value = single_node_sketch->add_z_value();
211 z_value->set_distance(zvalue.first);
212 z_value->set_node_id(zvalue.second.GetNId());
218 bool ShouldPrune(graph::EdgeWeight distance,
int node_id) {
219 if (distance > prunning_thresholds_[node_id].GetDistance()) {
222 if (should_calc_z_value_) {
223 return (distance == prunning_thresholds_[node_id].GetDistance() &&
224 nodes_ads_[node_id].HasZValue(distance));
226 return distance >= prunning_thresholds_[node_id].GetDistance();
229 void CalculateInsertProb(
int node_id, std::vector<NodeProb> * insert_prob) {
230 nodes_ads_[node_id].CalculateInsertProb(insert_prob);
233 void SetPrunningThresholds() {
234 for (
unsigned int i = 0; i < nodes_ads_.size(); i++) {
235 nodes_ads_[i].SetPrunningThresholds(&prunning_thresholds_);
239 NodeSketch* UTGetNodeSketch(
int node_id) {
return &nodes_ads_[node_id]; }
242 NodeSketch* GetNodeSketch(
const NodeIdRandomIdData& node_details) {
243 if (nodes_ads_.size() <= (
unsigned int)node_details.GetNId()) {
246 if (nodes_ads_[node_details.GetNId()].IsInit() ==
false) {
247 nodes_ads_[node_details.GetNId()].InitNodeSketch(
248 K_, node_details.GetNId(), node_details.GetRandomId(),
249 &prunning_thresholds_, reserve_size_, should_calc_z_value_);
251 return &nodes_ads_[node_details.GetNId()];
255 void Printthresholds() {
256 for (
unsigned int i = 0; i < prunning_thresholds_.size(); i++) {
257 std::cout <<
"i=" << i
258 <<
" thresholds=" << prunning_thresholds_[i].GetDistance()
263 int InsertCandidatesNodes() {
265 for (
unsigned int i = 0; i < nodes_ads_.size(); i++) {
266 numCleaned += nodes_ads_[i].InsertCandidatesNodes();
271 int InsertCandidatesNodes(
unsigned int start,
unsigned int end) {
273 for (
unsigned int i = start; (i < nodes_ads_.size() && i < end); i++) {
274 numCleaned += nodes_ads_[i].InsertCandidatesNodes();
279 void CalculateAllDistanceNeighborhood() {
280 for (
unsigned int i = 0; i < nodes_ads_.size(); i++) {
281 nodes_ads_[i].CalculateAllDistanceNeighborhood();
285 const std::vector<PrunningThreshold>* GetThresholds()
const {
286 return &prunning_thresholds_;
289 const NodesSketch& GetNodesSketch()
const {
return nodes_ads_; }
292 if (GetK() != other.GetK()) {
293 LOG_M(NOTICE,
"K is different!" <<
294 "lhs = " << GetK() <<
295 "rhs = " << other.GetK());
299 LOG_M(NOTICE,
"Distribution are not euqal!");
303 if ( (*GetThresholds()) != (*other.GetThresholds())) {
304 LOG_M(NOTICE,
"thresholds are not equal!");
308 if ((*GetNodesDistribution()) != (*GetNodesDistribution())) {
309 LOG_M(NOTICE,
"Distribution sorted are not equal!");
313 if (GetNodesSketch() != other.GetNodesSketch()) {
314 LOG_M(NOTICE,
"Nodes NodeSketch are not equal!");
320 void CreateNodesDistribution(
unsigned int max_node_id,
321 UniformRankCalculator* calculator = NULL,
322 const std::vector<int>* nodes_id = NULL) {
323 UniformRankCalculator c;
324 c.InitUniformRankCalculator();
325 if (calculator == NULL ) {
328 typedef boost::minstd_rand base_generator_type;
329 base_generator_type generator(42u);
330 boost::uniform_real<> uni_dist(0, 1);
331 boost::variate_generator<base_generator_type&, boost::uniform_real<> > uni(generator, uni_dist);
332 nodes_random_id_.resize(max_node_id);
333 int num_nodes = nodes_id == NULL ? max_node_id : nodes_id->size();
334 for (
unsigned int i = 0 ; i < num_nodes; i++) {
335 int node_id = nodes_id == NULL ? i : (*nodes_id)[i];
336 nodes_random_id_[node_id] =
337 calculator->CalculateNodeRank(node_id);
340 ExtractSortedVersionFromDist();
341 SetDisributionToNodes();
354 const std::vector<NodeDistanceIdRandomIdData>* GetNodesDistribution()
356 return &nodes_random_id_sorted_increasing_;
360 NodeSketch* GetNodeSketch(NodeDistanceIdRandomIdData node_details) {
361 return GetNodeSketch(node_details.GetDetails());
367 return &nodes_random_id_;
372 if ((
unsigned int)node_id > nodes_random_id_.size()) {
375 return nodes_random_id_[node_id];
381 nodes_random_id_.clear();
382 nodes_random_id_.resize(nodes_random_id->size());
383 std::copy(nodes_random_id->begin(), nodes_random_id->end(),
384 nodes_random_id_.begin());
385 ExtractSortedVersionFromDist();
386 SetDisributionToNodes();
389 NodesSketchItr Begin() {
390 return nodes_ads_.begin();
393 NodesSketchItr End() {
394 return nodes_ads_.end();
398 void SetDisributionToNodes() {
399 for (
int i=0; i < nodes_ads_.size(); i++) {
400 nodes_ads_[i].SetDisribution(&nodes_random_id_);
403 void ExtractSortedVersionFromDist() {
404 nodes_random_id_sorted_increasing_.clear();
405 for (
unsigned int i = 0; i < nodes_random_id_.size(); i++) {
406 NodeDistanceIdRandomIdData b(0, i, nodes_random_id_[i]);
407 nodes_random_id_sorted_increasing_.push_back(b);
410 std::sort(nodes_random_id_sorted_increasing_.begin(),
411 nodes_random_id_sorted_increasing_.end(),
412 compare_node_randomid_decreasing());
415 bool should_calc_z_value_;
416 unsigned int reserve_size_;
418 NodesSketch nodes_ads_;
419 std::vector<PrunningThreshold> prunning_thresholds_;
420 std::vector<RandomId> nodes_random_id_;
421 std::vector<NodeDistanceIdRandomIdData> nodes_random_id_sorted_increasing_;
429 #endif // SRC_SKETCH_GRAPH_SKETCH_H_
void InitGraphSketch(unsigned int K, int max_node_id, const std::vector< int > *nodes_id=NULL)
Init the class.
Definition: graph_sketch.h:28
Single node sketch.
Definition: node_sketch.h:204
NodesSketch::iterator NodesSketchItr
Definition: graph_sketch.h:14
std::vector< NodeIdDistanceData > NodeIdDistanceVector
Container for node id and distance.
Definition: node_sketch.h:193
void set_should_calc_zvalues(bool should_calc)
if true when calculating the sketch we will also calculate the insertion probabilities of each node...
Definition: graph_sketch.h:41
void SetNodesDistribution(const std::vector< RandomId > *nodes_random_id)
sets the random id for each node. The default distribution is unifrom [0,1]
Definition: graph_sketch.h:380
const std::vector< RandomId > * GetNodesDistributionLean() const
returns the random id that was given to each node
Definition: graph_sketch.h:366
RandomId GetNodeRandomId(const int &node_id)
returns specific node random id
Definition: graph_sketch.h:371
std::vector< NodeSketch > NodesSketch
Definition: graph_sketch.h:11
Data structure for the graph sketch.
Definition: graph_sketch.h:17
void SaveGraphSketchToGpb(AllDistanceSketchGpb *all_distance_sketch)
Save the graph sketch to a Gpb class.
Definition: graph_sketch.h:75
double RandomId
Type for random ids.
Definition: node_sketch.h:14
bool LoadGraphSketchFromGpb(const AllDistanceSketchGpb &all_distance_sketch)
Load the graph sketch from a Gpb class.
Definition: graph_sketch.h:48
Container class to store node id and distance.
Definition: node_sketch.h:26