🤬
Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
  • ■ ■ ■ ■ ■ ■
    isp_data_pollution.py
    skipped 335 lines
    336 336   def link_count(self):
    337 337   return int(np.array([len(self.domain_links[dmn]) for dmn in self.domain_links]).sum())
    338 338   
     339 + def domain_entropy(self):
     340 + result = 0.
     341 + domain_count = np.array([(dmn, len(self.domain_links[dmn])) for dmn in self.domain_links])
     342 + p = np.array([np.float(c) for d, c in domain_count])
     343 + count_total = p.sum()
     344 + if count_total > 0:
     345 + p = p / p.sum()
     346 + result = self.entropy(p)
     347 + return result
     348 + 
     349 + def entropy(self,p):
     350 + return -np.fromiter((self.xlgx(x) for x in p.flatten()),dtype=p.dtype).sum()
     351 + 
     352 + def xlgx(self,x):
     353 + x = max(0.,min(1.,x))
     354 + y = 0.
     355 + if not (x == 0. or x == 1.):
     356 + y = x*np.log2(x)
     357 + return y
     358 + 
    339 359   def seed_links(self):
    340 360   # bias with non-random seed links
    341 361   self.bias_links()
    skipped 234 lines
    576 596   except Exception as e:
    577 597   if self.debug: print('.current_url exception:\n{}'.format(e))
    578 598   if self.debug:
    579  - print("'{}': {:d} links added, {:d} total".format(current_url,k,self.link_count()))
     599 + print("'{}': {:d} links added, {:d} total, {:d} bits domain entropy".format(current_url,k,self.link_count(),int(np.round(self.domain_entropy()))))
    580 600   elif self.verbose:
    581 601   self.print_progress(k,current_url)
    582 602   
    skipped 78 lines
Please wait...
Page is in error, reload to recover