1 #ifndef SRC_ALGORITHMS_T_SKIM_H_
2 #define SRC_ALGORITHMS_T_SKIM_H_
6 #include "../sketch/graph_sketch.h"
7 #include "../graph/graph.h"
9 #include "../proto/cover.pb.h"
15 typedef proto::SeedInfoGpb SeedInfoGpb;
16 typedef proto::CoverGpb CoverGpb;
36 typedef std::unordered_map< int, SeedCover >::iterator Iterator;
47 void LoadCoverFromGpb(
const CoverGpb& cover) {
48 for (
int i=0; i < cover.seeds_size(); i++) {
49 int seed_id = cover.seeds(i).seed_node_id();
51 for (
int j=0; j < cover.seeds(i).node_ids_size(); j++) {
52 int covered_node_id = cover.seeds(i).node_ids(j);
53 AddNodeToSeed(seed_id, covered_node_id);
57 void SaveGraphSketchToGpb(CoverGpb* cover) {
58 for (Iterator it=Begin(); it != End(); it++) {
59 SeedInfoGpb* seed = cover->add_seeds();
60 int seed_id = it->second.seed;
61 seed->set_seed_node_id(seed_id);
62 for (
int i=0; i < it->second.covered_nodes.size(); i++) {
63 int node_id = it->second.covered_nodes[i];
64 seed->add_node_ids(node_id);
68 inline void AddSeed(
int seed) {
71 cover[seed].index = cover.size();
72 LOG_M(DEBUG5,
"seed=" << seed);
73 cover[seed].seed = seed;
75 inline void SetSeedEstimate(
int seed,
double estimate_cover_size) {
76 estimated_cover[seed] = estimate_cover_size;
78 inline double GetSeedEstimate(
int seed) {
79 return estimated_cover[seed];
81 inline void AddNodeToSeed(
int seed,
int node_id) {
82 LOG_M(DEBUG5,
"seed=" << seed <<
" node_id=" << node_id);
83 is_covered[node_id] =
true;
84 if (cover.count(seed) == 0) {
87 cover[seed].covered_nodes.push_back(node_id);
89 inline bool IsCovered(
int node_id) {
90 return (is_covered.count(node_id) != 0);
92 inline unsigned int Size() {
95 inline const SeedCover& GetSeedCover(
int seed) {
96 if (cover.count(seed) == 0) {
101 inline Iterator Begin() {
102 return cover.begin();
104 inline Iterator End() {
110 std::unordered_map< int, SeedCover > cover;
111 std::unordered_map< int, bool > is_covered;
112 std::unordered_map< int, double> estimated_cover;
118 class TSkimDijkstraCallBacksDistancePrune {
120 void InitTSkimDijkstraCallBacksDistancePrune(
double distance_stop) {
121 num_nodes_visited_ = 0;
122 visited_nodes_including_self_.clear();
123 distance_stop_ = distance_stop;
126 void SetWantedNodes(
const std::unordered_map<int, bool>& nodes) {
127 wanted_nodes_ = nodes;
131 LOG_M(DEBUG4,
"Source node id=" << source_node_id);
132 source_node_id_ = source_node_id;
135 inline void NodePopedFromHeap(
int poped_node_id,
const NodeIdDistanceData& heap_value) {
136 LOG_M(DEBUG4,
"poped node=" << poped_node_id);
137 if (wanted_nodes_.size() != 0 && wanted_nodes_.count(poped_node_id) == 0) {
140 LOG_M(DEBUG3,
" poped node=" << poped_node_id <<
141 " Distance=" << heap_value.GetDistance() <<
142 " Distance to stop=" << distance_stop_);
143 if (heap_value.GetDistance() < distance_stop_) {
144 visited_nodes_including_self_.push_back(poped_node_id);
145 ++num_nodes_visited_;
150 inline bool ShouldPrune(
int visited_node_id, graph::EdgeWeight distance_from_source_to_visited_node) {
151 return distance_from_source_to_visited_node >= distance_stop_;
154 inline bool ShouldStop() {
return false; }
156 const std::vector<int>& get_visited_nodes() {
157 return visited_nodes_including_self_;
159 inline void RelaxedPath(
int node_id) { }
160 std::unordered_map<int, bool> wanted_nodes_;
162 std::vector<int> visited_nodes_including_self_;
163 int num_nodes_visited_;
164 double distance_stop_;
170 class TSkimDijkstraCallBacks {
172 void InitTSkimDijkstraCallBacks(
int T) {
174 num_nodes_visited_ = 0;
175 visited_nodes_including_self_.clear();
178 void SetWantedNodes(
const std::unordered_map<int, bool>& nodes) {
179 wanted_nodes_ = nodes;
183 LOG_M(DEBUG4,
"Source node id=" << source_node_id);
184 source_node_id_ = source_node_id;
187 inline void NodePopedFromHeap(
int poped_node_id,
const NodeIdDistanceData& heap_value) {
188 LOG_M(DEBUG4,
"poped node=" << poped_node_id);
189 if (wanted_nodes_.size() != 0 && wanted_nodes_.count(poped_node_id) == 0) {
192 if (num_nodes_visited_ < T_) {
193 visited_nodes_including_self_.push_back(poped_node_id);
195 ++num_nodes_visited_;
199 inline bool ShouldPrune(
int visited_node_id, graph::EdgeWeight distance_from_source_to_visited_node) {
return false; }
201 inline bool ShouldStop() {
202 return num_nodes_visited_ >= T_;
205 const std::vector<int>& get_visited_nodes() {
206 return visited_nodes_including_self_;
208 inline void RelaxedPath(
int node_id) { }
209 std::unordered_map<int, bool> wanted_nodes_;
212 std::vector<int> visited_nodes_including_self_;
213 int num_nodes_visited_;
218 int random_gen_func (
int i) {
return 42 % i;}
220 bool cmp(
const std::pair<int, int> &p1,
const std::pair<int, int> &p2) {
221 return p1.second > p2.second;
225 bool operator()(
const std::pair<int, int> &p1,
const std::pair<int, int> &p2)
const {
226 if (p1.second > p2.second) {
229 if (p1.second < p2.second) {
232 if (p1.first > p2.first) {
235 if (p1.first < p2.first) {
245 virtual ~TSkimBase() {};
247 void InitTSkimBase(
int T,
248 int min_influence_for_seed,
252 min_influence_for_seed_ = min_influence_for_seed;
255 node_influence_.clear();
256 reachable_nodes_.clear();
259 void set_rankers_nodes(
const std::vector<int>& rankers_nodes) {
260 rankers_nodes_ = rankers_nodes;
263 void set_rankees_nodes(
const std::vector<int>& rankees_nodes) {
264 for (
int i=0; i < rankees_nodes.size(); i++) {
265 rankees_nodes_[rankees_nodes[i]] =
true;
269 void set_wanted_cover_nodes(
const std::vector<int>& wanted_cover_nodes) {
270 for (
int i=0; i < wanted_cover_nodes.size(); i++) {
271 wanted_nodes_[wanted_cover_nodes[i]] =
true;
275 void CalculateRankeeNodes() {
276 ExtractNodesFromGraph(&rankees_nodes_);
279 void CalculateRankerNodes() {
280 ExtractNodesFromGraph(&rankers_nodes_);
283 void CalculateWantedNodes() {
284 ExtractNodesFromGraph(&wanted_nodes_);
287 void CalculateRandomNodePermutation(std::vector<int>* nodes) {
288 for (
int i=0; i < nodes->size(); i++) {
289 boost::random::mt19937 gen;
290 boost::random::uniform_int_distribution<> dis(i, nodes->size() - 1);
291 int swap_index = dis(gen);
293 int b = (*nodes)[swap_index];
295 (*nodes)[swap_index] = a;
299 void ExtractNodesFromGraph(std::unordered_map<int, bool>* candidates) {
300 if (candidates->size() == 0) {
301 for (
auto node_itr = graph_->BegNI(); node_itr != graph_->EndNI() ; node_itr++ ){
302 (*candidates)[node_itr.GetId()] =
true;
307 void ExtractNodesFromGraph(std::vector<int>* candidates) {
308 if (candidates->size() == 0) {
309 for (
auto node_itr = graph_->BegNI(); node_itr != graph_->EndNI() ; node_itr++ ){
310 candidates->push_back(node_itr.GetId());
316 CalculateRankerNodes();
317 CalculateRankeeNodes();
318 CalculateWantedNodes();
319 CalculateRandomNodePermutation(&rankers_nodes_);
322 int UpdateCover(
int seed, std::unordered_map<int, int>* influence_change,
const std::vector<int>& covered_nodes) {
323 int num_covered_nodes = 0;
324 for (
int j=0; j < covered_nodes.size(); j++) {
325 int covered_node = covered_nodes[j];
326 if (cover_->IsCovered(covered_node)) {
329 LOG_M(DEBUG3,
"Seed=" << seed <<
"Covered node= " << covered_node);
331 cover_->AddNodeToSeed(seed, covered_node);
332 node_influence_.erase(covered_node);
334 for (
int j=0; j < covered_nodes.size(); j++) {
335 int covered_node = covered_nodes[j];
336 if (reachable_nodes_.count(covered_node) == 0) {
339 for (
int i=0; i < reachable_nodes_[covered_node].size(); i++) {
340 int reachable_node_from_covered_node = reachable_nodes_[covered_node][i];
341 if (cover_->IsCovered(reachable_node_from_covered_node)) {
345 LOG_M(DEBUG3,
"Node " << reachable_node_from_covered_node <<
" reachable from " << covered_node);
346 if (influence_change->count(reachable_node_from_covered_node) == 0) {
347 (*influence_change)[reachable_node_from_covered_node] = node_influence_[reachable_node_from_covered_node];
349 node_influence_[reachable_node_from_covered_node] = node_influence_[reachable_node_from_covered_node] - 1;
351 reachable_nodes_.erase(covered_node);
353 LOG_M(DEBUG3,
"Seed=" << seed <<
" num actual covered nodes= " << num_covered_nodes);
354 return num_covered_nodes;
357 virtual int AddSeed(
int seed, std::unordered_map<int, int>* influence_change) = 0;
363 virtual void UpdateInfluceAndSeedSet(
int source_node,
365 std::vector<int>* seed_set) {
366 if (node_influence_.count(visited_node) == 0) {
367 node_influence_[visited_node] = 0;
369 ++(node_influence_)[visited_node];
370 if (node_influence_[visited_node] >= min_influence_for_seed_) {
371 LOG_M(DEBUG3,
"adding seed " << visited_node <<
" influence=" << node_influence_[visited_node]);
372 seed_set->push_back(visited_node);
374 if (reachable_nodes_.count(source_node) == 0) {
375 reachable_nodes_[source_node] = std::vector<int>();
377 reachable_nodes_[source_node].push_back(visited_node);
380 virtual const std::vector<int>& CalculateVisitedNodes(
int source) = 0;
382 int Run(
bool ShouldRunPreRunInit =
true) {
383 if (ShouldRunPreRunInit) {
386 int num_covered_nodes = 0;
389 for (
int i=0; i < rankers_nodes_.size(); i++) {
390 int source_node_id = rankers_nodes_[i];
392 if (cover_->IsCovered(source_node_id)) {
395 LOG_M(DEBUG3,
"Iterating permutation, current node = " << source_node_id);
396 std::vector<int> current_iteration_seed_set;
397 const std::vector<int>& visited_nodes = CalculateVisitedNodes(source_node_id);
398 for (
int i=0; i < visited_nodes.size(); i++) {
399 int visited_node = visited_nodes[i];
400 LOG_M(DEBUG3,
"visited node = " << visited_node);
401 if (cover_->IsCovered(visited_node)) {
404 UpdateInfluceAndSeedSet(source_node_id, visited_node, ¤t_iteration_seed_set);
407 for (
int i=0; i <current_iteration_seed_set.size(); i++) {
408 int seed = current_iteration_seed_set[i];
409 if (cover_->IsCovered(source_node_id) || node_influence_[seed] < min_influence_for_seed_) {
412 std::unordered_map<int, int> influence_change;
413 LOG_M(DEBUG5,
"Adding Seed node =" << seed );
414 double estimate_seed_cover_size = ( min_influence_for_seed_ * (
static_cast<double>(rankers_nodes_.size()) / static_cast<double>(num_passed)) );
415 cover_->SetSeedEstimate(seed, estimate_seed_cover_size);
416 num_covered_nodes += AddSeed(seed, &influence_change);
420 LOG_M(DEBUG3,
" num covered nodes =" << num_covered_nodes <<
421 " num nodes left =" << node_influence_.size());
422 std::set< std::pair<int, int> , cmp_pair> heap;
423 for (
auto it = node_influence_.begin(); it != node_influence_.end(); it++) {
424 LOG_M(DEBUG3,
"Node influence raw node =" << it->first <<
" Influence=" << it->second);
425 heap.insert(std::make_pair(it->first, it->second));
427 int last_cover_size = INT_MAX;
428 int last_influence = INT_MAX;
429 while (!heap.empty()) {
430 auto node_details = (*heap.begin());
431 heap.erase(heap.begin());
432 int seed = node_details.first;
433 int seed_influence = node_details.second;
434 if (seed_influence > last_influence) {
435 LOG_M(NOTICE,
"Assumption failed num influence increased" <<
436 "last influence=" << last_influence <<
437 "current influence=" << seed_influence);
438 assert(seed_influence <= last_influence);
440 last_influence = seed_influence;
441 if (cover_->IsCovered(seed)) {
442 assert(node_influence_.count(seed) == 0);
445 std::unordered_map<int, int> influence_change;
446 int num_covered_nodes_by_seed = AddSeed(seed, &influence_change);
447 if (this->wanted_nodes_.size() == this->graph_->GetNumNodes()) {
448 if (num_covered_nodes_by_seed != seed_influence) {
449 LOG_M(NOTICE,
"Assumption failed num covered nodes not equal to Influence" <<
450 " num_covered_nodes=" << num_covered_nodes <<
451 " seed_influence="<< seed_influence <<
454 assert(num_covered_nodes_by_seed == seed_influence);
456 if (num_covered_nodes_by_seed > last_cover_size &&
457 this->wanted_nodes_.size() == this->graph_->GetNumNodes()) {
458 LOG_M(NOTICE,
"Assumption failed num covered nodes increased" <<
459 " last=" << last_cover_size <<
460 " current="<< num_covered_nodes_by_seed <<
462 " seed influence=" << seed_influence);
463 assert(num_covered_nodes_by_seed <= last_cover_size);
465 last_cover_size = num_covered_nodes_by_seed;
466 LOG_M(DEBUG3,
"Adding Seed node =" << seed <<
" Influence=" << seed_influence <<
"covered nodes=" << num_covered_nodes_by_seed);
467 num_covered_nodes += num_covered_nodes_by_seed;
468 cover_->SetSeedEstimate(seed, num_covered_nodes_by_seed);
469 for (std::unordered_map<int, int>::iterator change_it = influence_change.begin();
470 change_it != influence_change.end();
472 heap.erase(std::make_pair(change_it->first, change_it->second));
473 heap.insert( std::make_pair(change_it->first, node_influence_[change_it->first]));
481 int min_influence_for_seed_;
484 std::vector<int> rankers_nodes_;
485 std::unordered_map<int, bool> rankees_nodes_;
486 std::unordered_map<int, bool> wanted_nodes_;
488 std::unordered_map<int, int> node_influence_;
489 std::unordered_map<int, std::vector<int> > reachable_nodes_;
493 #endif // SRC_ALGORITHMS_T_SKIM_H_
std::vector< int > covered_nodes
Definition: t_skim.h:29
Single seed information.
Definition: t_skim.h:26
Contains all reverse rank algorithms.
int seed
Definition: t_skim.h:27
Cover extracted by the influence maximization algorithms.
Definition: t_skim.h:34
int index
Definition: t_skim.h:28
Container class to store node id and distance.
Definition: node_sketch.h:26
Sketch calculation algorithms.
struct all_distance_sketch::SeedCover_t SeedCover
Single seed information.