1 #ifndef SRC_SKETCH_NODE_SKETCH_H_
2 #define SRC_SKETCH_NODE_SKETCH_H_
6 #include "rank_calculator.h"
8 #include "../proto/all_distance_sketch.pb.h"
17 typedef proto::AllDistanceSketchGpb AllDistanceSketchGpb;
18 typedef proto::AllDistanceSketchGpb::NodeThresholdGpb NodeThresholdGpb;
19 typedef proto::SingleNodeSketchGpb SingleNodeSketchGpb;
20 typedef proto::SingleNodeSketchGpb::NodeSummaryDetailsGpb NodeSummaryDetailsGpb;
21 typedef proto::SingleNodeSketchGpb::ZValuesGpb ZValuesGpb;
30 : node_id_(node_id), distance_(distance) {}
32 inline int GetNId()
const {
return node_id_; }
34 inline graph::EdgeWeight GetDistance()
const {
return distance_; }
37 return (GetNId() == other.GetNId()) &&
38 (GetDistance() == other.GetDistance());
41 friend std::ostream& operator<<(std::ostream& os,
43 os <<
" NodeId=" << node_details.GetNId()
44 <<
" Distance=" << node_details.GetDistance();
50 graph::EdgeWeight distance_;
53 class NodeIdRandomIdData {
55 NodeIdRandomIdData() {}
56 NodeIdRandomIdData(
int node_id, RandomId random_id)
57 : node_id_(node_id), random_id_(random_id) {}
58 inline int GetNId()
const {
return node_id_; }
60 inline RandomId GetRandomId()
const {
return random_id_; }
67 class NodeDistanceIdRandomIdData {
69 NodeDistanceIdRandomIdData() {}
70 NodeDistanceIdRandomIdData(NodeIdRandomIdData aDetails,
71 graph::EdgeWeight distance)
72 : details(aDetails), distance_(distance) {}
74 NodeDistanceIdRandomIdData(graph::EdgeWeight distance,
int node_id,
76 : details(node_id, random_id), distance_(distance) {}
78 inline graph::EdgeWeight GetDistance()
const {
return distance_; }
80 inline int GetNId()
const {
return details.GetNId(); }
82 inline RandomId GetRandomId()
const {
return details.GetRandomId(); }
84 inline const NodeIdRandomIdData& GetDetails() {
return details; }
86 bool operator==(
const NodeDistanceIdRandomIdData& other)
const {
87 return (GetNId() == other.GetNId()) &&
88 (GetRandomId() == other.GetRandomId()) &&
89 (GetDistance() == other.GetDistance());
92 friend std::ostream& operator<<(std::ostream& os,
93 const NodeDistanceIdRandomIdData& node_details) {
94 os <<
" NodeId=" << node_details.GetNId()
95 <<
" Distance=" << node_details.GetDistance()
96 <<
" HashId=" << node_details.GetRandomId();
101 NodeIdRandomIdData details;
102 graph::EdgeWeight distance_;
105 struct compare_node_randomid_decreasing {
106 bool operator()(
const NodeDistanceIdRandomIdData& n1,
107 const NodeDistanceIdRandomIdData& n2)
const {
108 return n1.GetRandomId() < n2.GetRandomId();
112 struct compare_node_distance_increasing {
113 bool operator()(
const NodeIdDistanceData& n1,
114 const NodeIdDistanceData& n2)
const {
115 return n1.GetDistance() > n2.GetDistance();
119 struct compare_node_distance_decreasing {
120 bool operator()(
const NodeIdDistanceData& n1,
121 const NodeIdDistanceData& n2)
const {
122 return n1.GetDistance() < n2.GetDistance();
126 class Neighbourhood {
129 Neighbourhood(
int distance,
int size) : distance_(distance), size_(size) {}
131 int GetSize()
const {
return size_; }
133 int GetDistance()
const {
return distance_; }
135 bool operator==(
const Neighbourhood& other)
const {
136 return (GetSize() == other.GetSize()) &&
137 (GetDistance() == other.GetDistance());
146 struct compare_neighbourhood_distance {
147 bool operator()(
const Neighbourhood& n1,
const Neighbourhood& n2)
const {
148 return n1.GetDistance() < n2.GetDistance();
152 struct compare_neighbourhood_size {
153 bool operator()(
const Neighbourhood& n1,
const Neighbourhood& n2)
const {
154 return n1.GetSize() < n2.GetSize();
158 class PrunningThreshold {
161 : is_distance_equal_to_following_node_distance_in_ads(false),
164 graph::EdgeWeight GetDistance()
const {
return distance_; }
166 void SetDistance(graph::EdgeWeight distance) { distance_ = distance; }
168 bool GetIsEqualToNext()
const {
169 return is_distance_equal_to_following_node_distance_in_ads;
172 void SetIsEqualToNext(
bool aIsEqual) {
173 is_distance_equal_to_following_node_distance_in_ads = aIsEqual;
175 bool operator==(
const PrunningThreshold& other)
const {
176 return GetDistance() == other.GetDistance();
180 bool is_distance_equal_to_following_node_distance_in_ads;
181 graph::EdgeWeight distance_;
184 typedef struct NodeProb_t {
189 typedef std::vector<Neighbourhood> NeighbourhoodVector;
190 typedef std::unordered_map<graph::EdgeWeight, NodeIdDistanceData> ZValues;
197 typedef std::vector<NodeDistanceIdRandomIdData>
198 NodeDistanceIdRandomIdDataVector;
199 typedef std::vector<NodeDistanceIdRandomIdData>::iterator
200 NodeDistanceIdRandomIdDataVectorItr;
211 int K,
int node_id, RandomId random_id,
212 std::vector<PrunningThreshold>* prunning_thresholds = NULL,
213 unsigned int reserve_size = 1,
214 bool should_calc_z_value =
false) {
217 random_id_ = random_id;
219 nodes_id_distance_.clear();
220 prunning_thresholds_ = prunning_thresholds;
221 should_calc_z_value_ = should_calc_z_value;
224 bool IsInit()
const {
return was_init_; }
226 bool AddToCandidates(NodeDistanceIdRandomIdData node_details) {
227 candidate_nodes_.push_back(node_details);
231 int InsertCandidatesNodes() {
232 compare_node_randomid_decreasing obj;
233 sort(candidate_nodes_.begin(), candidate_nodes_.end(), obj);
235 for (
unsigned int i = 0; i < candidate_nodes_.size(); i++) {
236 Add(candidate_nodes_[i]);
238 int numInserted = candidate_nodes_.size();
239 candidate_nodes_.clear();
243 bool Add(NodeDistanceIdRandomIdData node_details) {
245 node_details.GetDistance());
246 return Add(node_id_distance);
251 NodeIdDistanceVector::reverse_iterator it_k =
252 nodes_id_distance_.rbegin() + (K_ - 1);
254 if (node_details.GetDistance() > it_k->GetDistance()) {
255 (*is_zvalue) =
false;
256 (*should_insert) =
false;
259 if (node_details.GetDistance() == it_k->GetDistance()) {
260 if (z_values_.find(node_details.GetDistance()) != z_values_.end()) {
261 (*should_insert) =
false;
262 (*is_zvalue) =
false;
264 (*should_insert) =
false;
269 (*should_insert) =
true;
270 (*is_zvalue) =
false;
277 if (nodes_id_distance_.size() < K_) {
278 LOG_M(DEBUG4,
" Adding to NodeSketch since size < k, "
279 <<
" size=" << nodes_id_distance_.size()
282 compare_node_distance_increasing obj;
283 NodeIdDistanceVectorItr up =
284 std::upper_bound(nodes_id_distance_.begin(),
285 nodes_id_distance_.end(), node_details, obj);
286 nodes_id_distance_.insert(up, node_details);
293 ShouldInsert(node_details, &should_insert, &is_z_value);
297 std::make_pair(node_details.GetDistance(), node_details));
301 if (!should_insert) {
308 compare_node_distance_decreasing obj;
310 NodeIdDistanceVector::reverse_iterator up = std::upper_bound(
311 nodes_id_distance_.rbegin(), nodes_id_distance_.rbegin() + K_,
313 unsigned int position_upper = up - nodes_id_distance_.rbegin();
315 LOG_M(DEBUG4,
" Size of array "
316 << nodes_id_distance_.size()
317 <<
" upper position=" << position_upper
318 <<
" up=" << *(up) <<
" First element "
319 << nodes_id_distance_[0] <<
" Last element "
320 << nodes_id_distance_[nodes_id_distance_.size() - 1]);
331 LOG_M(DEBUG5,
"Before inserting");
333 if (position_upper <= K_) {
334 unsigned int position_relative_to_begin =
335 nodes_id_distance_.rend() - up;
336 NodeIdDistanceVectorItr it;
337 if (position_relative_to_begin == nodes_id_distance_.size()) {
338 it = nodes_id_distance_.end();
340 it = nodes_id_distance_.begin() + position_relative_to_begin;
342 nodes_id_distance_.insert(it, node_details);
343 LOG_M(DEBUG4,
" Inserting to NodeSketch at location "
344 << position_relative_to_begin);
345 if (prunning_thresholds_ != NULL) {
346 (*prunning_thresholds_)[node_id_].SetDistance(
347 (nodes_id_distance_.rbegin() + (K_ - 1))->GetDistance());
348 if (nodes_id_distance_.size() >= K_) {
349 bool is_distance_equal_to_following_node_distance_in_ads =
350 ((nodes_id_distance_.rbegin() + (K_ - 1))->GetDistance() ==
351 (nodes_id_distance_.rbegin() + (K_))->GetDistance());
352 (*prunning_thresholds_)[node_id_].SetIsEqualToNext(
353 is_distance_equal_to_following_node_distance_in_ads);
363 void Get(graph::EdgeWeight distance,
364 NodeIdDistanceVector* nodes_id_distance_vector) {
365 nodes_id_distance_vector->clear();
366 for (NodeIdDistanceVector::reverse_iterator iter =
367 nodes_id_distance_.rbegin();
368 iter != nodes_id_distance_.rend(); iter++) {
369 if (iter->GetDistance() <= distance) {
370 LOG_M(DEBUG5,
"Inserting to vector node " << *iter);
371 nodes_id_distance_vector->push_back(*(iter));
376 void GetAllDistances(NodeIdDistanceVector* nodes_id_distance_vector) {
377 nodes_id_distance_vector->clear();
378 *nodes_id_distance_vector = nodes_id_distance_;
382 int GetSketchSize() {
return nodes_id_distance_.size(); }
388 double GetDistanceCoverNeighborhood(
int neighborhood_size) {
389 compare_neighbourhood_size obj;
390 Neighbourhood entry(0, neighborhood_size);
391 NeighbourhoodVector::iterator up = std::upper_bound(
392 neighbourhoods_.begin(), neighbourhoods_.end(), entry, obj);
394 if (neighbourhoods_.size() == 0) {
395 LOG_M(DEBUG3,
"neighbourhoods vector is empty, returning 0");
398 if (up == neighbourhoods_.begin()) {
399 LOG_M(DEBUG3,
"Match first element");
400 if (up->GetSize() == neighbourhoods_.begin()->GetSize()) {
401 return up->GetDistance();
405 if (up == neighbourhoods_.end()) {
406 LOG_M(DEBUG3,
"Match last element");
407 return neighbourhoods_.back().GetDistance() + 1;
409 LOG_M(DEBUG3,
" Distance= " << up->GetDistance() <<
410 " Size=" << up->GetSize() <<
411 " Wanted size=" << neighborhood_size);
412 return up->GetDistance();
417 int GetNeighborhoodDistanceIndex(graph::EdgeWeight distance) {
418 compare_neighbourhood_distance obj;
419 Neighbourhood entry(distance, 0);
420 NeighbourhoodVector::iterator up = std::upper_bound(
421 neighbourhoods_.begin(), neighbourhoods_.end(), entry, obj);
422 if (neighbourhoods_.size() == 0) {
425 if (up == neighbourhoods_.begin()) {
426 if (up->GetDistance() == neighbourhoods_.begin()->GetDistance()) {
432 if (up == neighbourhoods_.end()) {
433 return neighbourhoods_.size() - 1;
435 if (up->GetDistance() > distance) {
438 return up - neighbourhoods_.begin();
442 double GetNeighborhoodDistanceByIndex(
int index) {
446 if (index >= neighbourhoods_.size()) {
447 return neighbourhoods_.back().GetSize();
449 return (neighbourhoods_.begin() + index)->GetSize();
455 int GetSizeNeighborhoodUpToDistance(graph::EdgeWeight distance) {
456 compare_neighbourhood_distance obj;
457 Neighbourhood entry(distance, 0);
458 NeighbourhoodVector::iterator up = std::upper_bound(
459 neighbourhoods_.begin(), neighbourhoods_.end(), entry, obj);
461 if (up == neighbourhoods_.begin()) {
462 LOG_M(DEBUG3,
"Distance is in the begin of the vector" <<
463 " Distance= " << up->GetDistance() <<
464 " Size=" << up->GetSize());
465 if (up->GetDistance() == neighbourhoods_.begin()->GetDistance()) {
466 return up->GetSize();
472 if (up == neighbourhoods_.end()) {
473 LOG_M(DEBUG3,
"Distance is in the end of the vector" <<
474 " Size=" << neighbourhoods_.back().GetSize());
475 return neighbourhoods_.back().GetSize();
477 if (up->GetDistance() > distance) {
480 LOG_M(DEBUG3,
"Distance is in the midddle of the vector" <<
481 " Distance= " << up->GetDistance() <<
482 " Size=" << up->GetSize());
483 return up->GetSize();
488 void CalculateAllDistanceNeighborhood() {
489 neighbourhoods_.clear();
490 if (nodes_id_distance_.size() == 0) {
494 std::vector<NodeDistanceIdRandomIdData> neighborhoodVector;
495 NodeIdDistanceVector::reverse_iterator it;
496 neighbourhoods_.clear();
497 unsigned int currentDistance = 0;
498 compare_node_randomid_decreasing obj;
499 for (it = nodes_id_distance_.rbegin(); it != nodes_id_distance_.rend();
501 LOG_M(DEBUG3,
"Iterting " << *it <<
" Current distance=" << currentDistance);
502 if (it->GetDistance() != currentDistance) {
503 LOG_M(DEBUG3,
" Changing distance to " << it->GetDistance());
506 if (neighborhoodVector.size() < K_) {
507 if (neighborhoodVector.size() == 0) {
510 size = neighborhoodVector.size() - 1;
513 RandomId omega = neighborhoodVector[K_ - 1].GetRandomId();
514 size = (K_ - 1) / omega;
516 LOG_M(DEBUG3,
" Size of neighborhood " << size);
517 Neighbourhood entry(currentDistance, size);
518 neighbourhoods_.push_back(entry);
519 currentDistance = it->GetDistance();
522 NodeDistanceIdRandomIdData t(it->GetDistance(), it->GetNId(),
523 (*node_distribution_)[it->GetNId()]);
524 std::vector<NodeDistanceIdRandomIdData>::iterator up =
525 std::upper_bound(neighborhoodVector.begin(),
526 neighborhoodVector.end(), t, obj);
527 neighborhoodVector.insert(up, t);
532 if (neighborhoodVector.size() < K_) {
533 if (neighborhoodVector.size() == 0) {
536 size = neighborhoodVector.size() - 1;
540 RandomId omega = neighborhoodVector[K_ - 1].GetRandomId();
541 size = (K_ - 1) / omega;
543 Neighbourhood entry(nodes_id_distance_.begin()->GetDistance(), size);
544 neighbourhoods_.push_back(entry);
547 NeighbourhoodVector* UTGetNeighbourhoodVector() {
return &neighbourhoods_; }
549 NodeIdDistanceVector* UTGetNodeAdsVector() {
return &nodes_id_distance_; }
550 void SetPrunningThresholds(std::vector<PrunningThreshold>* athresholds) {
551 prunning_thresholds_ = athresholds;
555 const NodeIdDistanceVector* GetNodeAdsVector()
const {
556 return &nodes_id_distance_;
559 int GetK()
const {
return K_; }
561 int GetNId()
const {
return node_id_; }
563 RandomId GetRandomId()
const {
return random_id_; }
567 const NeighbourhoodVector& GetNeighbourHoodVector()
const {
568 return neighbourhoods_;
571 const std::vector<NodeDistanceIdRandomIdData>& GetCandidates()
const {
572 return candidate_nodes_;
575 bool operator==(
const NodeSketch& other)
const {
576 if (IsInit() ==
false && other.IsInit() ==
false) {
579 if (GetK() != other.GetK()) {
580 LOG_M(NOTICE,
" K is not euqal!" <<
581 " lhs K = " << GetK() <<
582 " rhs K = " << other.GetK());
586 if (GetNId() != other.GetNId()) {
587 LOG_M(NOTICE,
" Node id is not equal!");
591 if (GetRandomId() != other.GetRandomId()) {
592 LOG_M(NOTICE,
" RandomId is not equal!");
594 if (*GetNodeAdsVector() != (*other.GetNodeAdsVector())) {
595 LOG_M(NOTICE,
" Node Sketches are not equal!");
598 if (GetNeighbourHoodVector() != other.GetNeighbourHoodVector()) {
599 LOG_M(NOTICE,
" NeighbourHood Vector are not equal!");
602 if (GetCandidates() != other.GetCandidates()) {
603 LOG_M(NOTICE,
" Candidates Vector are not equal!");
610 const ZValues& GetZValues() {
return z_values_; }
612 bool HasZValue(graph::EdgeWeight distance) {
613 return z_values_.find(distance) != z_values_.end();
616 void set_z_values(ZValues* z) {
620 void CalculateInsertProb(std::vector<NodeProb>* insert_prob) {
622 insert_prob->clear();
623 if (nodes_id_distance_.size() < K_) {
624 for (
int i = 0; i < nodes_id_distance_.size(); i++) {
625 NodeProb node_thresh_prob;
626 node_thresh_prob.prob = 1;
627 node_thresh_prob.node_id = nodes_id_distance_[i].GetNId();
628 insert_prob->push_back(node_thresh_prob);
633 std::vector<NodeDistanceIdRandomIdData> moving_threshold;
634 graph::EdgeWeight distance_covered = -1;
635 compare_node_randomid_decreasing comparator;
636 for (NodeIdDistanceVector::reverse_iterator r_it =
637 nodes_id_distance_.rbegin();
638 r_it < nodes_id_distance_.rend(); r_it++) {
640 "id:" << r_it->GetNId() <<
" distane:" << r_it->GetDistance()
641 <<
" random id: " << (*node_distribution_)[r_it->GetNId()]);
643 if (distance_covered != r_it->GetDistance()) {
644 for (NodeIdDistanceVector::reverse_iterator runner_it = r_it;
645 (runner_it->GetDistance() == r_it->GetDistance()) &&
646 runner_it != nodes_id_distance_.rend();
648 NodeDistanceIdRandomIdData node_details(
649 runner_it->GetDistance(), runner_it->GetNId(),
650 (*node_distribution_)[runner_it->GetNId()]);
652 moving_threshold.insert(std::upper_bound(moving_threshold.begin(),
653 moving_threshold.end(),
659 ZValues::iterator z_it = z_values_.find(r_it->GetDistance());
660 if (z_it != z_values_.end()) {
661 NodeDistanceIdRandomIdData node_details(
662 z_it->second.GetDistance(), z_it->second.GetNId(),
663 (*node_distribution_)[z_it->second.GetNId()]);
665 moving_threshold.insert(std::upper_bound(moving_threshold.begin(),
666 moving_threshold.end(),
672 distance_covered = r_it->GetDistance();
674 NodeProb node_thresh_prob;
675 node_thresh_prob.node_id = r_it->GetNId();
676 if (moving_threshold.size() <= K_) {
677 LOG_M(DEBUG3,
"node id:" << r_it->GetNId() <<
" prob: 1");
678 node_thresh_prob.prob = 1;
680 std::vector<NodeDistanceIdRandomIdData>::iterator it_k_plus_1 =
681 moving_threshold.begin() + K_;
682 if (node_thresh_prob.node_id == it_k_plus_1->GetNId()) {
685 LOG_M(DEBUG3,
" node id:" << r_it->GetNId() <<
" Skip Z value");
688 node_thresh_prob.prob = it_k_plus_1->GetRandomId();
690 insert_prob->push_back(node_thresh_prob);
692 std::reverse(insert_prob->begin(), insert_prob->end());
695 void SetDisribution(std::vector<RandomId>* node_distribution) {
696 node_distribution_ = node_distribution;
700 NodeIdDistanceVectorItr Begin() {
701 return nodes_id_distance_.begin();
704 NodeIdDistanceVectorItr End() {
705 return nodes_id_distance_.end();
711 bool should_calc_z_value_;
714 std::vector<RandomId>* node_distribution_;
716 NodeIdDistanceVector nodes_id_distance_;
717 std::vector<NodeDistanceIdRandomIdData> candidate_nodes_;
718 std::vector<PrunningThreshold>* prunning_thresholds_;
719 NeighbourhoodVector neighbourhoods_;
726 #endif // SRC_SKETCH_NODE_SKETCH_H_
Single node sketch.
Definition: node_sketch.h:204
std::vector< NodeIdDistanceData >::iterator NodeIdDistanceVectorItr
Definition: node_sketch.h:196
std::vector< NodeIdDistanceData > NodeIdDistanceVector
Container for node id and distance.
Definition: node_sketch.h:193
const float UNREACHABLE
Definition: common.h:62
Data structure for the graph sketch.
Definition: graph_sketch.h:17
double RandomId
Type for random ids.
Definition: node_sketch.h:14
Container class to store node id and distance.
Definition: node_sketch.h:26