All Distance Sketch  0.1
All distance sketch based algorithms
t_skim.h
Go to the documentation of this file.
1 #ifndef SRC_ALGORITHMS_T_SKIM_H_
2 #define SRC_ALGORITHMS_T_SKIM_H_
3 
4 
5 #include "./sketch_calculation.h"
6 #include "../sketch/graph_sketch.h"
7 #include "../graph/graph.h"
8 #include "./reverse_rank.h"
9 #include "../proto/cover.pb.h"
14 namespace all_distance_sketch {
15 typedef proto::SeedInfoGpb SeedInfoGpb;
16 typedef proto::CoverGpb CoverGpb;
17 
18 
26 typedef struct SeedCover_t {
27  int seed;
28  int index;
29  std::vector<int> covered_nodes;
30 } SeedCover;
31 
34 class Cover {
35  public:
36  typedef std::unordered_map< int, SeedCover >::iterator Iterator;
37 
38  Cover() {
39  Clear();
40  }
41 
42  inline void Clear() {
43  cover.clear();
44  is_covered.clear();
45  }
46 
47  void LoadCoverFromGpb(const CoverGpb& cover) {
48  for (int i=0; i < cover.seeds_size(); i++) {
49  int seed_id = cover.seeds(i).seed_node_id();
50  AddSeed(seed_id);
51  for (int j=0; j < cover.seeds(i).node_ids_size(); j++) {
52  int covered_node_id = cover.seeds(i).node_ids(j);
53  AddNodeToSeed(seed_id, covered_node_id);
54  }
55  }
56  }
57  void SaveGraphSketchToGpb(CoverGpb* cover) {
58  for (Iterator it=Begin(); it != End(); it++) {
59  SeedInfoGpb* seed = cover->add_seeds();
60  int seed_id = it->second.seed;
61  seed->set_seed_node_id(seed_id);
62  for (int i=0; i < it->second.covered_nodes.size(); i++) {
63  int node_id = it->second.covered_nodes[i];
64  seed->add_node_ids(node_id);
65  }
66  }
67  }
68  inline void AddSeed(int seed) {
69  // is_covered[seed] = true;
70  cover[seed] = SeedCover();
71  cover[seed].index = cover.size();
72  LOG_M(DEBUG5, "seed=" << seed);
73  cover[seed].seed = seed;
74  }
75  inline void SetSeedEstimate(int seed, double estimate_cover_size) {
76  estimated_cover[seed] = estimate_cover_size;
77  }
78  inline double GetSeedEstimate(int seed) {
79  return estimated_cover[seed];
80  }
81  inline void AddNodeToSeed(int seed, int node_id) {
82  LOG_M(DEBUG5, "seed=" << seed << " node_id=" << node_id);
83  is_covered[node_id] = true;
84  if (cover.count(seed) == 0) {
85  AddSeed(seed);
86  }
87  cover[seed].covered_nodes.push_back(node_id);
88  }
89  inline bool IsCovered(int node_id) {
90  return (is_covered.count(node_id) != 0);
91  }
92  inline unsigned int Size() {
93  return cover.size();
94  }
95  inline const SeedCover& GetSeedCover(int seed) {
96  if (cover.count(seed) == 0) {
97  return empty_cover;
98  }
99  return cover[seed];
100  }
101  inline Iterator Begin() {
102  return cover.begin();
103  }
104  inline Iterator End() {
105  return cover.end();
106  }
107 
108  private:
109  SeedCover empty_cover;
110  std::unordered_map< int, SeedCover > cover;
111  std::unordered_map< int, bool > is_covered;
112  std::unordered_map< int, double> estimated_cover;
113 };
114 
117 template<class Z>
118 class TSkimDijkstraCallBacksDistancePrune {
119  public:
120  void InitTSkimDijkstraCallBacksDistancePrune(double distance_stop) {
121  num_nodes_visited_ = 0;
122  visited_nodes_including_self_.clear();
123  distance_stop_ = distance_stop;
124  }
125 
126  void SetWantedNodes(const std::unordered_map<int, bool>& nodes) {
127  wanted_nodes_ = nodes;
128  }
129 
130  inline void Started(int source_node_id, graph::Graph<Z>* graph) {
131  LOG_M(DEBUG4, "Source node id=" << source_node_id);
132  source_node_id_ = source_node_id;
133  }
134 
135  inline void NodePopedFromHeap(int poped_node_id, const NodeIdDistanceData& heap_value) {
136  LOG_M(DEBUG4, "poped node=" << poped_node_id);
137  if (wanted_nodes_.size() != 0 && wanted_nodes_.count(poped_node_id) == 0) {
138  return;
139  }
140  LOG_M(DEBUG3, " poped node=" << poped_node_id <<
141  " Distance=" << heap_value.GetDistance() <<
142  " Distance to stop=" << distance_stop_);
143  if (heap_value.GetDistance() < distance_stop_) {
144  visited_nodes_including_self_.push_back(poped_node_id);
145  ++num_nodes_visited_;
146  }
147  return;
148  }
149 
150  inline bool ShouldPrune(int visited_node_id, graph::EdgeWeight distance_from_source_to_visited_node) {
151  return distance_from_source_to_visited_node >= distance_stop_;
152  }
153 
154  inline bool ShouldStop() { return false; }
155 
156  const std::vector<int>& get_visited_nodes() {
157  return visited_nodes_including_self_;
158  }
159  inline void RelaxedPath(int node_id) { }
160  std::unordered_map<int, bool> wanted_nodes_;
161  int source_node_id_;
162  std::vector<int> visited_nodes_including_self_;
163  int num_nodes_visited_;
164  double distance_stop_;
165 };
169 template<class Z>
170 class TSkimDijkstraCallBacks {
171  public:
172  void InitTSkimDijkstraCallBacks(int T) {
173  T_ = T;
174  num_nodes_visited_ = 0;
175  visited_nodes_including_self_.clear();
176  }
177 
178  void SetWantedNodes(const std::unordered_map<int, bool>& nodes) {
179  wanted_nodes_ = nodes;
180  }
181 
182  inline void Started(int source_node_id, graph::Graph<Z>* graph) {
183  LOG_M(DEBUG4, "Source node id=" << source_node_id);
184  source_node_id_ = source_node_id;
185  }
186 
187  inline void NodePopedFromHeap(int poped_node_id, const NodeIdDistanceData& heap_value) {
188  LOG_M(DEBUG4, "poped node=" << poped_node_id);
189  if (wanted_nodes_.size() != 0 && wanted_nodes_.count(poped_node_id) == 0) {
190  return;
191  }
192  if (num_nodes_visited_ < T_) {
193  visited_nodes_including_self_.push_back(poped_node_id);
194  }
195  ++num_nodes_visited_;
196  return;
197  }
198 
199  inline bool ShouldPrune(int visited_node_id, graph::EdgeWeight distance_from_source_to_visited_node) { return false; }
200 
201  inline bool ShouldStop() {
202  return num_nodes_visited_ >= T_;
203  }
204 
205  const std::vector<int>& get_visited_nodes() {
206  return visited_nodes_including_self_;
207  }
208  inline void RelaxedPath(int node_id) { }
209  std::unordered_map<int, bool> wanted_nodes_;
210  int source_node_id_;
211  int T_;
212  std::vector<int> visited_nodes_including_self_;
213  int num_nodes_visited_;
214 };
215 
216 
217 
218 int random_gen_func (int i) { return 42 % i;}
219 
220 bool cmp(const std::pair<int, int> &p1, const std::pair<int, int> &p2) {
221  return p1.second > p2.second;
222 }
223 
224 struct cmp_pair {
225  bool operator()(const std::pair<int, int> &p1, const std::pair<int, int> &p2) const {
226  if (p1.second > p2.second) {
227  return true;
228  }
229  if (p1.second < p2.second) {
230  return false;
231  }
232  if (p1.first > p2.first) {
233  return true;
234  }
235  if (p1.first < p2.first) {
236  return false;
237  }
238  return false;
239  }
240 };
241 
242 template <class Z>
243 class TSkimBase {
244  public:
245  virtual ~TSkimBase() {};
246 
247  void InitTSkimBase(int T,
248  int min_influence_for_seed,
249  Cover * cover,
250  graph::Graph<Z>* graph) {
251  T_ = T;
252  min_influence_for_seed_ = min_influence_for_seed;
253  cover_ = cover;
254  graph_ = graph;
255  node_influence_.clear();
256  reachable_nodes_.clear();
257  }
258 
259  void set_rankers_nodes(const std::vector<int>& rankers_nodes) {
260  rankers_nodes_ = rankers_nodes;
261  }
262 
263  void set_rankees_nodes(const std::vector<int>& rankees_nodes) {
264  for (int i=0; i < rankees_nodes.size(); i++) {
265  rankees_nodes_[rankees_nodes[i]] = true;
266  }
267  }
268 
269  void set_wanted_cover_nodes(const std::vector<int>& wanted_cover_nodes) {
270  for (int i=0; i < wanted_cover_nodes.size(); i++) {
271  wanted_nodes_[wanted_cover_nodes[i]] = true;
272  }
273  }
274 
275  void CalculateRankeeNodes() {
276  ExtractNodesFromGraph(&rankees_nodes_);
277  }
278 
279  void CalculateRankerNodes() {
280  ExtractNodesFromGraph(&rankers_nodes_);
281  }
282 
283  void CalculateWantedNodes() {
284  ExtractNodesFromGraph(&wanted_nodes_);
285  }
286 
287  void CalculateRandomNodePermutation(std::vector<int>* nodes) {
288  for (int i=0; i < nodes->size(); i++) {
289  boost::random::mt19937 gen;
290  boost::random::uniform_int_distribution<> dis(i, nodes->size() - 1);
291  int swap_index = dis(gen);
292  int a = (*nodes)[i];
293  int b = (*nodes)[swap_index];
294  (*nodes)[i] = b;
295  (*nodes)[swap_index] = a;
296  }
297  }
298 
299  void ExtractNodesFromGraph(std::unordered_map<int, bool>* candidates) {
300  if (candidates->size() == 0) {
301  for (auto node_itr = graph_->BegNI(); /*node_itr.HasMore()*/ node_itr != graph_->EndNI() ; node_itr++ ){
302  (*candidates)[node_itr.GetId()] = true;
303  }
304  }
305  }
306 
307  void ExtractNodesFromGraph(std::vector<int>* candidates) {
308  if (candidates->size() == 0) {
309  for (auto node_itr = graph_->BegNI(); /*node_itr.HasMore()*/ node_itr != graph_->EndNI() ; node_itr++ ){
310  candidates->push_back(node_itr.GetId());
311  }
312  }
313  }
314 
315  void PreRunInit() {
316  CalculateRankerNodes();
317  CalculateRankeeNodes();
318  CalculateWantedNodes();
319  CalculateRandomNodePermutation(&rankers_nodes_);
320  }
321 
322  int UpdateCover(int seed, std::unordered_map<int, int>* influence_change, const std::vector<int>& covered_nodes) {
323  int num_covered_nodes = 0;
324  for (int j=0; j < covered_nodes.size(); j++) {
325  int covered_node = covered_nodes[j];
326  if (cover_->IsCovered(covered_node)) {
327  continue;
328  }
329  LOG_M(DEBUG3, "Seed=" << seed << "Covered node= " << covered_node);
330  ++num_covered_nodes;
331  cover_->AddNodeToSeed(seed, covered_node);
332  node_influence_.erase(covered_node);
333  }
334  for (int j=0; j < covered_nodes.size(); j++) {
335  int covered_node = covered_nodes[j];
336  if (reachable_nodes_.count(covered_node) == 0) {
337  continue;
338  }
339  for (int i=0; i < reachable_nodes_[covered_node].size(); i++) {
340  int reachable_node_from_covered_node = reachable_nodes_[covered_node][i];
341  if (cover_->IsCovered(reachable_node_from_covered_node)) {
342  // Covered by a different node
343  continue;
344  }
345  LOG_M(DEBUG3, "Node " << reachable_node_from_covered_node << " reachable from " << covered_node);
346  if (influence_change->count(reachable_node_from_covered_node) == 0) {
347  (*influence_change)[reachable_node_from_covered_node] = node_influence_[reachable_node_from_covered_node];
348  }
349  node_influence_[reachable_node_from_covered_node] = node_influence_[reachable_node_from_covered_node] - 1;
350  }
351  reachable_nodes_.erase(covered_node);
352  }
353  LOG_M(DEBUG3, "Seed=" << seed << " num actual covered nodes= " << num_covered_nodes);
354  return num_covered_nodes;
355  }
356 
357  virtual int AddSeed(int seed, std::unordered_map<int, int>* influence_change) = 0;
358 
359  /*
360  * Invariant: Once a node is covered he will not have any influnce.
361  * We will erase his influence and not update it any more.
362  */
363  virtual void UpdateInfluceAndSeedSet(int source_node,
364  int visited_node,
365  std::vector<int>* seed_set) {
366  if (node_influence_.count(visited_node) == 0) {
367  node_influence_[visited_node] = 0;
368  }
369  ++(node_influence_)[visited_node];
370  if (node_influence_[visited_node] >= min_influence_for_seed_) {
371  LOG_M(DEBUG3, "adding seed " << visited_node << " influence=" << node_influence_[visited_node]);
372  seed_set->push_back(visited_node);
373  }
374  if (reachable_nodes_.count(source_node) == 0) {
375  reachable_nodes_[source_node] = std::vector<int>();
376  }
377  reachable_nodes_[source_node].push_back(visited_node);
378  }
379 
380  virtual const std::vector<int>& CalculateVisitedNodes(int source) = 0;
381 
382  int Run(bool ShouldRunPreRunInit = true) {
383  if (ShouldRunPreRunInit) {
384  PreRunInit();
385  }
386  int num_covered_nodes = 0;
387  // Itrating the permutation of nodes
388  int num_passed = 0;
389  for (int i=0; i < rankers_nodes_.size(); i++) {
390  int source_node_id = rankers_nodes_[i];
391  ++num_passed;
392  if (cover_->IsCovered(source_node_id)) {
393  continue;
394  }
395  LOG_M(DEBUG3, "Iterating permutation, current node = " << source_node_id);
396  std::vector<int> current_iteration_seed_set;
397  const std::vector<int>& visited_nodes = CalculateVisitedNodes(source_node_id);
398  for (int i=0; i < visited_nodes.size(); i++) {
399  int visited_node = visited_nodes[i];
400  LOG_M(DEBUG3, "visited node = " << visited_node);
401  if (cover_->IsCovered(visited_node)) {
402  continue;
403  }
404  UpdateInfluceAndSeedSet(source_node_id, visited_node, &current_iteration_seed_set);
405  }
406 
407  for (int i=0; i <current_iteration_seed_set.size(); i++) {
408  int seed = current_iteration_seed_set[i];
409  if (cover_->IsCovered(source_node_id) || node_influence_[seed] < min_influence_for_seed_) {
410  continue;
411  }
412  std::unordered_map<int, int> influence_change;
413  LOG_M(DEBUG5, "Adding Seed node =" << seed );
414  double estimate_seed_cover_size = ( min_influence_for_seed_ * (static_cast<double>(rankers_nodes_.size()) / static_cast<double>(num_passed)) );
415  cover_->SetSeedEstimate(seed, estimate_seed_cover_size);
416  num_covered_nodes += AddSeed(seed, &influence_change);
417  }
418  }
419 
420  LOG_M(DEBUG3, " num covered nodes =" << num_covered_nodes <<
421  " num nodes left =" << node_influence_.size());
422  std::set< std::pair<int, int> , cmp_pair> heap;
423  for (auto it = node_influence_.begin(); it != node_influence_.end(); it++) {
424  LOG_M(DEBUG3, "Node influence raw node =" << it->first << " Influence=" << it->second);
425  heap.insert(std::make_pair(it->first, it->second));
426  }
427  int last_cover_size = INT_MAX;
428  int last_influence = INT_MAX;
429  while (!heap.empty()) {
430  auto node_details = (*heap.begin());
431  heap.erase(heap.begin());
432  int seed = node_details.first;
433  int seed_influence = node_details.second;
434  if (seed_influence > last_influence) {
435  LOG_M(NOTICE, "Assumption failed num influence increased" <<
436  "last influence=" << last_influence <<
437  "current influence=" << seed_influence);
438  assert(seed_influence <= last_influence);
439  }
440  last_influence = seed_influence;
441  if (cover_->IsCovered(seed)) {
442  assert(node_influence_.count(seed) == 0);
443  continue;
444  }
445  std::unordered_map<int, int> influence_change;
446  int num_covered_nodes_by_seed = AddSeed(seed, &influence_change);
447  if (this->wanted_nodes_.size() == this->graph_->GetNumNodes()) {
448  if (num_covered_nodes_by_seed != seed_influence) {
449  LOG_M(NOTICE, "Assumption failed num covered nodes not equal to Influence" <<
450  " num_covered_nodes=" << num_covered_nodes <<
451  " seed_influence="<< seed_influence <<
452  " seed=" << seed);
453  }
454  assert(num_covered_nodes_by_seed == seed_influence);
455  }
456  if (num_covered_nodes_by_seed > last_cover_size &&
457  this->wanted_nodes_.size() == this->graph_->GetNumNodes()) {
458  LOG_M(NOTICE, "Assumption failed num covered nodes increased" <<
459  " last=" << last_cover_size <<
460  " current="<< num_covered_nodes_by_seed <<
461  " seed=" << seed <<
462  " seed influence=" << seed_influence);
463  assert(num_covered_nodes_by_seed <= last_cover_size);
464  }
465  last_cover_size = num_covered_nodes_by_seed;
466  LOG_M(DEBUG3, "Adding Seed node =" << seed << " Influence=" << seed_influence << "covered nodes=" << num_covered_nodes_by_seed);
467  num_covered_nodes += num_covered_nodes_by_seed;
468  cover_->SetSeedEstimate(seed, num_covered_nodes_by_seed);
469  for (std::unordered_map<int, int>::iterator change_it = influence_change.begin();
470  change_it != influence_change.end();
471  change_it++) {
472  heap.erase(std::make_pair(change_it->first, change_it->second));
473  heap.insert( std::make_pair(change_it->first, node_influence_[change_it->first]));
474  }
475  }
476  return 0;
477  }
478 
479 protected:
480  int T_;
481  int min_influence_for_seed_;
482  Cover * cover_;
483  graph::Graph<Z>* graph_;
484  std::vector<int> rankers_nodes_;
485  std::unordered_map<int, bool> rankees_nodes_;
486  std::unordered_map<int, bool> wanted_nodes_;
487 
488  std::unordered_map<int, int> node_influence_;
489  std::unordered_map<int, std::vector<int> > reachable_nodes_;
490 };
491 
492 } // all_distance_sketch
493 #endif // SRC_ALGORITHMS_T_SKIM_H_
std::vector< int > covered_nodes
Definition: t_skim.h:29
Definition: common.h:53
Single seed information.
Definition: t_skim.h:26
Contains all reverse rank algorithms.
int seed
Definition: t_skim.h:27
Cover extracted by the influence maximization algorithms.
Definition: t_skim.h:34
int index
Definition: t_skim.h:28
Container class to store node id and distance.
Definition: node_sketch.h:26
Sketch calculation algorithms.
struct all_distance_sketch::SeedCover_t SeedCover
Single seed information.