get-stats-non-obfs4-email.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. #!/usr/bin/env python3
  2. import csv
  3. import numpy
  4. import os
  5. import sys
  6. # Starting day: February Nth
  7. #N = 1
  8. N = 20
  9. # See note in readme
  10. JAN_31 = 2459245
  11. # 2021 February Nth as Julian date
  12. FIRST_DAY = JAN_31 + N
  13. TOTAL_BRIDGES = 1890
  14. OBFS4_EMAIL_BRIDGES = 93
  15. email_bridges = set()
  16. non_obfs4_email_bridges = set()
  17. other_bridges = set()
  18. with open ("data/obfs4-email-bridges", 'r') as f:
  19. for line in f:
  20. if line != "":
  21. email_bridges.add(line.strip())
  22. with open ("data/non-obfs4-email-bridges", 'r') as f:
  23. for line in f:
  24. if line != "":
  25. non_obfs4_email_bridges.add(line.strip())
  26. with open ("data/all-bridges", 'r') as f:
  27. for line in f:
  28. if line != "":
  29. bridge = line.strip()
  30. if not bridge in email_bridges and not bridge in non_obfs4_email_bridges:
  31. other_bridges.add(bridge)
  32. def max_counts (bridges):
  33. other_bridge_data = []
  34. other_bridge_max = []
  35. other_bridge_fpr = []
  36. for fingerprint in bridges:
  37. # We're going to get all the data for each bridge
  38. bridge_data = dict()
  39. begun = False
  40. max_count = 0
  41. filename = f"data/bridge_data_cleaned/{fingerprint.upper()}"
  42. if os.path.isfile(filename) and os.path.getsize(filename) > 0:
  43. with open(filename, 'r') as csvfile:
  44. data = csv.reader(csvfile, delimiter=',')
  45. for line in data:
  46. # Ignore 0 values until we see a non-zero value
  47. if not begun:
  48. if line[1] != "0":
  49. begun = True
  50. if begun:
  51. date = int(line[0][:line[0].find(' ')])
  52. if date > FIRST_DAY:
  53. break
  54. val = int(line[1])
  55. bridge_data[date] = val
  56. max_count = max(max_count, val)
  57. if begun:
  58. other_bridge_data.append(bridge_data)
  59. other_bridge_max.append(max_count)
  60. other_bridge_fpr.append(fingerprint)
  61. max_overall_count = 0
  62. for count in other_bridge_max:
  63. if count > max_overall_count:
  64. max_overall_count = count
  65. if max_overall_count == 0:
  66. print ("No bridges received any connections.")
  67. return
  68. # We want to count how many bridges have at least 8*i connections
  69. at_least_count = [0] * (max_overall_count // 8)
  70. # Note: We ignore index 0
  71. for count in other_bridge_max:
  72. for i in range(1, count//8):
  73. at_least_count[i] += 1
  74. last_value = at_least_count[1]
  75. for i in range(1, len(at_least_count)):
  76. count = at_least_count[i]
  77. if i == len(at_least_count) - 1:
  78. print (f"Bridges with at least {i*8} connections: {count}")
  79. elif i == 1:
  80. continue
  81. elif count != last_value:
  82. # print last number
  83. print (f"Bridges with at least {(i-1)*8} connections: {at_least_count[i-1]}")
  84. last_value = count
  85. non_obfs4_email_bridges = list(non_obfs4_email_bridges)
  86. other_bridges = list(other_bridges)
  87. print ("Max counts for bridges that were distributed by email but did not support obfs4:")
  88. max_counts (non_obfs4_email_bridges)
  89. print ("\nMax counts for bridges that were not distributed by email:")
  90. max_counts (other_bridges)