11 年之前 · 9d0af78e3c
--- a/src/config/README.geoip
+++ b/src/config/README.geoip
@@ -1,90 +0,0 @@
 
				-README.geoip -- information on the IP-to-country-code file shipped with tor
			
 
				-===========================================================================
			
 
				-
			
 
				-The IP-to-country-code file in src/config/geoip is based on MaxMind's
			
 
				-GeoLite Country database with the following modifications:
			
 
				-
			
 
				- - Those "A1" ("Anonymous Proxy") entries lying inbetween two entries with
			
 
				-   the same country code are automatically changed to that country code.
			
 
				-   These changes can be overriden by specifying a different country code
			
 
				-   in src/config/geoip-manual.
			
 
				-
			
 
				- - Other "A1" entries are replaced with country codes specified in
			
 
				-   src/config/geoip-manual, or are left as is if there is no corresponding
			
 
				-   entry in that file.  Even non-"A1" entries can be modified by adding a
			
 
				-   replacement entry to src/config/geoip-manual.  Handle with care.
			
 
				-
			
 
				-
			
 
				-1. Updating the geoip file from a MaxMind database file
			
 
				--------------------------------------------------------
			
 
				-
			
 
				-Download the most recent MaxMind GeoLite Country database:
			
 
				-http://geolite.maxmind.com/download/geoip/database/GeoIPCountryCSV.zip
			
 
				-
			
 
				-Run `python deanonymind.py` in the local directory.  Review the output to
			
 
				-learn about applied automatic/manual changes and watch out for any
			
 
				-warnings.
			
 
				-
			
 
				-Possibly edit geoip-manual to make more/fewer/different manual changes and
			
 
				-re-run `python deanonymind.py`.
			
 
				-
			
 
				-When done, prepend the new geoip file with a comment like this:
			
 
				-
			
 
				-  # Last updated based on $DATE Maxmind GeoLite Country
			
 
				-  # See README.geoip for details on the conversion.
			
 
				-
			
 
				-
			
 
				-2. Verifying automatic and manual changes using diff
			
 
				-----------------------------------------------------
			
 
				-
			
 
				-To unzip the original MaxMind file and look at the automatic changes, run:
			
 
				-
			
 
				-  unzip GeoIPCountryCSV.zip
			
 
				-  diff -U1 GeoIPCountryWhois.csv AutomaticGeoIPCountryWhois.csv
			
 
				-
			
 
				-To look at subsequent manual changes, run:
			
 
				-
			
 
				-  diff -U1 AutomaticGeoIPCountryWhois.csv ManualGeoIPCountryWhois.csv
			
 
				-
			
 
				-To manually generate the geoip file and compare it to the automatically
			
 
				-created one, run:
			
 
				-
			
 
				-  cut -d, -f3-5 < ManualGeoIPCountryWhois.csv | sed 's/"//g' > mygeoip
			
 
				-  diff -U1 geoip mygeoip
			
 
				-
			
 
				-
			
 
				-3. Verifying automatic and manual changes using blockfinder
			
 
				------------------------------------------------------------
			
 
				-
			
 
				-Blockfinder is a powerful tool to handle multiple IP-to-country data
			
 
				-sources.  Blockfinder has a function to specify a country code and compare
			
 
				-conflicting country code assignments in different data sources.
			
 
				-
			
 
				-We can use blockfinder to compare A1 entries in the original MaxMind file
			
 
				-with the same or overlapping blocks in the file generated above and in the
			
 
				-RIR delegation files:
			
 
				-
			
 
				-  git clone https://github.com/ioerror/blockfinder
			
 
				-  cd blockfinder/
			
 
				-  python blockfinder -i
			
 
				-  python blockfinder -r ../GeoIPCountryWhois.csv
			
 
				-  python blockfinder -r ../ManualGeoIPCountryWhois.csv
			
 
				-  python blockfinder -p A1 > A1-comparison.txt
			
 
				-
			
 
				-The output marks conflicts between assignments using either '*' in case of
			
 
				-two different opinions or '#' for three or more different opinions about
			
 
				-the country code for a given block.
			
 
				-
			
 
				-The '*' conflicts are most likely harmless, because there will always be
			
 
				-at least two opinions with the original MaxMind file saying A1 and the
			
 
				-other two sources saying something more meaningful.
			
 
				-
			
 
				-However, watch out for '#' conflicts.  In these cases, the original
			
 
				-MaxMind file ("A1"), the updated MaxMind file (hopefully the correct
			
 
				-country code), and the RIR delegation files (some other country code) all
			
 
				-disagree.
			
 
				-
			
 
				-There are perfectly valid cases where the updated MaxMind file and the RIR
			
 
				-delegation files don't agree.  But each of those cases must be verified
			
 
				-manually.
			
 
				-
			
--- a/src/config/deanonymind.py
+++ b/src/config/deanonymind.py
@@ -1,205 +0,0 @@
 
				-#!/usr/bin/env python
			
 
				-import optparse
			
 
				-import os
			
 
				-import sys
			
 
				-import zipfile
			
 
				-
			
 
				-"""
			
 
				-Take a MaxMind GeoLite Country database as input and replace A1 entries
			
 
				-with the country code and name of the preceding entry iff the preceding
			
 
				-(subsequent) entry ends (starts) directly before (after) the A1 entry and
			
 
				-both preceding and subsequent entries contain the same country code.
			
 
				-
			
 
				-Then apply manual changes, either replacing A1 entries that could not be
			
 
				-replaced automatically or overriding previously made automatic changes.
			
 
				-"""
			
 
				-
			
 
				-def main():
			
 
				-    options = parse_options()
			
 
				-    assignments = read_file(options.in_maxmind)
			
 
				-    assignments = apply_automatic_changes(assignments)
			
 
				-    write_file(options.out_automatic, assignments)
			
 
				-    manual_assignments = read_file(options.in_manual, must_exist=False)
			
 
				-    assignments = apply_manual_changes(assignments, manual_assignments)
			
 
				-    write_file(options.out_manual, assignments)
			
 
				-    write_file(options.out_geoip, assignments, long_format=False)
			
 
				-
			
 
				-def parse_options():
			
 
				-    parser = optparse.OptionParser()
			
 
				-    parser.add_option('-i', action='store', dest='in_maxmind',
			
 
				-            default='GeoIPCountryCSV.zip', metavar='FILE',
			
 
				-            help='use the specified MaxMind GeoLite Country .zip or .csv '
			
 
				-                 'file as input [default: %default]')
			
 
				-    parser.add_option('-g', action='store', dest='in_manual',
			
 
				-            default='geoip-manual', metavar='FILE',
			
 
				-            help='use the specified .csv file for manual changes or to '
			
 
				-                 'override automatic changes [default: %default]')
			
 
				-    parser.add_option('-a', action='store', dest='out_automatic',
			
 
				-            default="AutomaticGeoIPCountryWhois.csv", metavar='FILE',
			
 
				-            help='write full input file plus automatic changes to the '
			
 
				-                 'specified .csv file [default: %default]')
			
 
				-    parser.add_option('-m', action='store', dest='out_manual',
			
 
				-            default='ManualGeoIPCountryWhois.csv', metavar='FILE',
			
 
				-            help='write full input file plus automatic and manual '
			
 
				-                 'changes to the specified .csv file [default: %default]')
			
 
				-    parser.add_option('-o', action='store', dest='out_geoip',
			
 
				-            default='geoip', metavar='FILE',
			
 
				-            help='write full input file plus automatic and manual '
			
 
				-                 'changes to the specified .csv file that can be shipped '
			
 
				-                 'with tor [default: %default]')
			
 
				-    (options, args) = parser.parse_args()
			
 
				-    return options
			
 
				-
			
 
				-def read_file(path, must_exist=True):
			
 
				-    if not os.path.exists(path):
			
 
				-        if must_exist:
			
 
				-            print 'File %s does not exist.  Exiting.' % (path, )
			
 
				-            sys.exit(1)
			
 
				-        else:
			
 
				-            return
			
 
				-    if path.endswith('.zip'):
			
 
				-        zip_file = zipfile.ZipFile(path)
			
 
				-        csv_content = zip_file.read('GeoIPCountryWhois.csv')
			
 
				-        zip_file.close()
			
 
				-    else:
			
 
				-        csv_file = open(path)
			
 
				-        csv_content = csv_file.read()
			
 
				-        csv_file.close()
			
 
				-    assignments = []
			
 
				-    for line in csv_content.split('\n'):
			
 
				-        stripped_line = line.strip()
			
 
				-        if len(stripped_line) > 0 and not stripped_line.startswith('#'):
			
 
				-            assignments.append(stripped_line)
			
 
				-    return assignments
			
 
				-
			
 
				-def apply_automatic_changes(assignments):
			
 
				-    print '\nApplying automatic changes...'
			
 
				-    result_lines = []
			
 
				-    prev_line = None
			
 
				-    a1_lines = []
			
 
				-    for line in assignments:
			
 
				-        if '"A1"' in line:
			
 
				-            a1_lines.append(line)
			
 
				-        else:
			
 
				-            if len(a1_lines) > 0:
			
 
				-                new_a1_lines = process_a1_lines(prev_line, a1_lines, line)
			
 
				-                for new_a1_line in new_a1_lines:
			
 
				-                    result_lines.append(new_a1_line)
			
 
				-                a1_lines = []
			
 
				-            result_lines.append(line)
			
 
				-            prev_line = line
			
 
				-    if len(a1_lines) > 0:
			
 
				-        new_a1_lines = process_a1_lines(prev_line, a1_lines, None)
			
 
				-        for new_a1_line in new_a1_lines:
			
 
				-            result_lines.append(new_a1_line)
			
 
				-    return result_lines
			
 
				-
			
 
				-def process_a1_lines(prev_line, a1_lines, next_line):
			
 
				-    if not prev_line or not next_line:
			
 
				-        return a1_lines   # Can't merge first or last line in file.
			
 
				-    if len(a1_lines) > 1:
			
 
				-        return a1_lines   # Can't merge more than 1 line at once.
			
 
				-    a1_line = a1_lines[0].strip()
			
 
				-    prev_entry = parse_line(prev_line)
			
 
				-    a1_entry = parse_line(a1_line)
			
 
				-    next_entry = parse_line(next_line)
			
 
				-    touches_prev_entry = int(prev_entry['end_num']) + 1 == \
			
 
				-            int(a1_entry['start_num'])
			
 
				-    touches_next_entry = int(a1_entry['end_num']) + 1 == \
			
 
				-            int(next_entry['start_num'])
			
 
				-    same_country_code = prev_entry['country_code'] == \
			
 
				-            next_entry['country_code']
			
 
				-    if touches_prev_entry and touches_next_entry and same_country_code:
			
 
				-        new_line = format_line_with_other_country(a1_entry, prev_entry)
			
 
				-        print '-%s\n+%s' % (a1_line, new_line, )
			
 
				-        return [new_line]
			
 
				-    else:
			
 
				-        return a1_lines
			
 
				-
			
 
				-def parse_line(line):
			
 
				-    if not line:
			
 
				-        return None
			
 
				-    keys = ['start_str', 'end_str', 'start_num', 'end_num',
			
 
				-            'country_code', 'country_name']
			
 
				-    stripped_line = line.replace('"', '').strip()
			
 
				-    parts = stripped_line.split(',')
			
 
				-    entry = dict((k, v) for k, v in zip(keys, parts))
			
 
				-    return entry
			
 
				-
			
 
				-def format_line_with_other_country(original_entry, other_entry):
			
 
				-    return '"%s","%s","%s","%s","%s","%s"' % (original_entry['start_str'],
			
 
				-            original_entry['end_str'], original_entry['start_num'],
			
 
				-            original_entry['end_num'], other_entry['country_code'],
			
 
				-            other_entry['country_name'], )
			
 
				-
			
 
				-def apply_manual_changes(assignments, manual_assignments):
			
 
				-    if not manual_assignments:
			
 
				-        return assignments
			
 
				-    print '\nApplying manual changes...'
			
 
				-    manual_dict = {}
			
 
				-    for line in manual_assignments:
			
 
				-        start_num = parse_line(line)['start_num']
			
 
				-        if start_num in manual_dict:
			
 
				-            print ('Warning: duplicate start number in manual '
			
 
				-                   'assignments:\n  %s\n  %s\nDiscarding first entry.' %
			
 
				-                   (manual_dict[start_num], line, ))
			
 
				-        manual_dict[start_num] = line
			
 
				-    result = []
			
 
				-    for line in assignments:
			
 
				-        entry = parse_line(line)
			
 
				-        start_num = entry['start_num']
			
 
				-        if start_num in manual_dict:
			
 
				-            manual_line = manual_dict[start_num]
			
 
				-            manual_entry = parse_line(manual_line)
			
 
				-            if entry['start_str'] == manual_entry['start_str'] and \
			
 
				-                    entry['end_str'] == manual_entry['end_str'] and \
			
 
				-                    entry['end_num'] == manual_entry['end_num']:
			
 
				-                if len(manual_entry['country_code']) != 2:
			
 
				-                    print '-%s' % (line, )  # only remove, don't replace
			
 
				-                    del manual_dict[start_num]
			
 
				-                elif entry['country_code'] != \
			
 
				-                        manual_entry['country_code']:
			
 
				-                    new_line = format_line_with_other_country(entry,
			
 
				-                            manual_entry)
			
 
				-                    print '-%s\n+%s' % (line, new_line, )
			
 
				-                    result.append(new_line)
			
 
				-                    del manual_dict[start_num]
			
 
				-                else:
			
 
				-                    print ('Warning: not applying ineffective manual '
			
 
				-                           'change:\n  %s\n  %s' % (line, manual_line, ))
			
 
				-                    result.append(line)
			
 
				-            else:
			
 
				-                print ('Warning: not applying manual change that is only '
			
 
				-                       'a partial match:\n  %s\n  %s' %
			
 
				-                       (line, manual_line, ))
			
 
				-                result.append(line)
			
 
				-        elif 'country_code' in entry and \
			
 
				-                entry['country_code'] == 'A1':
			
 
				-            print ('Warning: no manual replacement for A1 entry:\n  %s'
			
 
				-                % (line, ))
			
 
				-            result.append(line)
			
 
				-        else:
			
 
				-            result.append(line)
			
 
				-    if len(manual_dict) > 0:
			
 
				-        print 'Warning: could not apply all manual assignments:'
			
 
				-        for line in manual_dict.values():
			
 
				-            print '  %s' % (line, )
			
 
				-    return result
			
 
				-
			
 
				-def write_file(path, assignments, long_format=True):
			
 
				-    if long_format:
			
 
				-        output_lines = assignments
			
 
				-    else:
			
 
				-        output_lines = []
			
 
				-        for long_line in assignments:
			
 
				-            entry = parse_line(long_line)
			
 
				-            short_line = "%s,%s,%s" % (entry['start_num'],
			
 
				-                    entry['end_num'], entry['country_code'], )
			
 
				-            output_lines.append(short_line)
			
 
				-    out_file = open(path, 'w')
			
 
				-    out_file.write('\n'.join(output_lines))
			
 
				-    out_file.close()
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    main()
			
 
				-
			
--- a/src/config/geoip-manual
+++ b/src/config/geoip-manual
@@ -1,80 +0,0 @@
 
				-# This file contains manual overrides of A1 entries (and possibly others)
			
 
				-# in MaxMind's GeoLite Country database.  Use deanonymind.py in the same
			
 
				-# directory to process this file when producing a new geoip file.  See
			
 
				-# README.geoip in the same directory for details.
			
 
				-
			
 
				-# GB, because RIR delegation files say exactly this range
			
 
				-# 46.16.32.0-46.16.39.255 is GB, even though neither previous nor next
			
 
				-# MaxMind range is GB.  Both previous and next MaxMind ranges match RIR
			
 
				-# delegation files, too.  -KL 2013-03-07
			
 
				-"46.16.32.0","46.16.39.255","772808704","772810751","GB","United Kingdom"
			
 
				-
			
 
				-# CH, because previous MaxMind entry 46.19.141.0-46.19.142.255 is CH, and
			
 
				-# RIR delegation files say 46.19.136.0-46.19.143.255 is CH.
			
 
				-# -KL 2012-11-27
			
 
				-"46.19.143.0","46.19.143.255","773033728","773033983","CH","Switzerland"
			
 
				-
			
 
				-# GB, because next MaxMind entry 46.166.129.0-46.166.134.255 is GB, and
			
 
				-# RIR delegation files say 46.166.128.0-46.166.191.255 is GB.
			
 
				-# -KL 2012-11-27
			
 
				-"46.166.128.0","46.166.128.255","782663680","782663935","GB","United Kingdom"
			
 
				-
			
 
				-# US, because previous MaxMind entry 70.159.21.51-70.232.244.255 is US,
			
 
				-# because next MaxMind entry 70.232.245.58-70.232.245.59 is A2 ("Satellite
			
 
				-# Provider") which is a country information about as useless as A1, and
			
 
				-# because RIR delegation files say 70.224.0.0-70.239.255.255 is US.
			
 
				-# -KL 2012-11-27
			
 
				-"70.232.245.0","70.232.245.57","1189672192","1189672249","US","United States"
			
 
				-
			
 
				-# US, because next MaxMind entry 70.232.246.0-70.240.141.255 is US,
			
 
				-# because previous MaxMind entry 70.232.245.58-70.232.245.59 is A2
			
 
				-# ("Satellite Provider") which is a country information about as useless
			
 
				-# as A1, and because RIR delegation files say 70.224.0.0-70.239.255.255 is
			
 
				-# US.  -KL 2012-11-27
			
 
				-"70.232.245.60","70.232.245.255","1189672252","1189672447","US","United States"
			
 
				-
			
 
				-# GB, despite neither previous (GE) nor next (LV) MaxMind entry being GB,
			
 
				-# but because RIR delegation files agree with both previous and next
			
 
				-# MaxMind entry and say GB for 91.228.0.0-91.228.3.255.  -KL 2012-11-27
			
 
				-"91.228.0.0","91.228.3.255","1541668864","1541669887","GB","United Kingdom"
			
 
				-
			
 
				-# NL, because next MaxMind entry 176.56.173.0-176.56.173.63 is NL, and RIR
			
 
				-# delegation files say 176.56.160.0-176.56.191.255 is NL.  -KL 2013-05-13
			
 
				-"176.56.172.0","176.56.172.255","2956504064","2956504319","NL","Netherlands"
			
 
				-
			
 
				-# NL, despite neither previous (RU) nor next (GB) MaxMind entry being NL,
			
 
				-# but because RIR delegation files say entire range
			
 
				-# 176.56.160.0-176.56.191.255 is NL.  -KL 2013-05-13
			
 
				-"176.56.174.0","176.56.174.255","2956504576","2956504831","NL","Netherlands"
			
 
				-
			
 
				-# GB, because RIR delegation files say exactly this range
			
 
				-# 185.25.84.0-185.25.87.255 is GB, even though neither previous nor next
			
 
				-# MaxMind range is GB.  Both previous and next MaxMind ranges match RIR
			
 
				-# delegation files, too.  -KL 2013-05-13
			
 
				-"185.25.84.0","185.25.87.255","3105444864","3105445887","GB","United Kingdom"
			
 
				-
			
 
				-# US, because next MaxMind entry 199.101.193.0-199.101.195.255 is US, and,
			
 
				-# together with next entries, matches RIR delegation file entry
			
 
				-# 199.101.192.0-199.101.199.255 which is US.  -KL 2013-05-13
			
 
				-"199.101.192.0","199.101.192.255","3345334272","3345334527","US","United States"
			
 
				-
			
 
				-# US, because ARIN says 199.255.208.0-199.255.215.255 is US.
			
 
				-# Changed entry start from 199.255.213.0 to 199.255.208.0 on 2013-08-12.
			
 
				-# Split up into 199.255.208.0-199.255.209.127 and
			
 
				-# 199.255.210.0-199.255.215.255 on 2013-10-11. -KL 2013-10-11
			
 
				-"199.255.208.0","199.255.209.127","3355430912","3355431295","US","United States"
			
 
				-"199.255.210.0","199.255.215.255","3355431424","3355432959","US","United States"
			
 
				-
			
 
				-# EU, despite neither previous (RU) nor next (SE) MaxMind entry being EU,
			
 
				-# but because RIR delegation files agree with previous MaxMind entry and
			
 
				-# say EU for 217.15.160.0-217.15.175.255.  -KL 2013-05-13
			
 
				-"217.15.160.0","217.15.164.255","3641679872","3641681151","EU","Europe"
			
 
				-
			
 
				-# FR, because previous MaxMind entry 217.15.166.0-217.15.166.255 is FR,
			
 
				-# and RIR delegation files contain a block 217.15.160.0-217.15.175.255
			
 
				-# which, however, is EU, not FR.  But merging with next MaxMind entry
			
 
				-# 217.15.176.0-217.15.191.255 which is KZ and which fully matches what
			
 
				-# the RIR delegation files say seems unlikely to be correct.
			
 
				-# -KL 2012-11-27
			
 
				-"217.15.167.0","217.15.175.255","3641681664","3641683967","FR","France"
			
 
				-
			
--- a/src/config/mmdb-convert.py
+++ b/src/config/mmdb-convert.py
@@ -0,0 +1,446 @@
 
				+#!/usr/bin/python3
			
 
				+
			
 
				+#   This software has been dedicated to the public domain under the CC0
			
 
				+#   public domain dedication.
			
 
				+#
			
 
				+#   To the extent possible under law, the person who associated CC0
			
 
				+#   with mmdb-convert.py has waived all copyright and related or
			
 
				+#   neighboring rights to mmdb-convert.py.
			
 
				+#
			
 
				+#   You should have received a copy of the CC0 legalcode along with this
			
 
				+#   work in doc/cc0.txt.  If not, see
			
 
				+#      <http://creativecommons.org/publicdomain/zero/1.0/>.
			
 
				+
			
 
				+#  Nick Mathewson is responsible for this kludge, but takes no
			
 
				+#  responsibility for it.
			
 
				+
			
 
				+"""This kludge is meant to
			
 
				+   parse mmdb files in sufficient detail to dump out the old format
			
 
				+   that Tor expects.  It's also meant to be pure-python.
			
 
				+
			
 
				+   When given a simplicity/speed tradeoff, it opts for simplicity.
			
 
				+
			
 
				+   You will not understand the code without undestanding the MaxMind-DB
			
 
				+   file format.  It is specified at:
			
 
				+   https://github.com/maxmind/MaxMind-DB/blob/master/MaxMind-DB-spec.md.
			
 
				+
			
 
				+   This isn't so much tested.  When it breaks, you get to keep both
			
 
				+   pieces.
			
 
				+"""
			
 
				+
			
 
				+import struct
			
 
				+import bisect
			
 
				+import socket
			
 
				+import binascii
			
 
				+import sys
			
 
				+import time
			
 
				+
			
 
				+METADATA_MARKER = b'\xab\xcd\xefMaxMind.com'
			
 
				+
			
 
				+# Here's some python2/python3 junk.  Better solutions wanted.
			
 
				+try:
			
 
				+    ord(b"1"[0])
			
 
				+except TypeError:
			
 
				+    def byte_to_int(b):
			
 
				+        "convert a single element of a bytestring to an integer."
			
 
				+        return b
			
 
				+else:
			
 
				+    byte_to_int = ord
			
 
				+
			
 
				+# Here's some more python2/python3 junk.  Better solutions wanted.
			
 
				+try:
			
 
				+    str(b"a", "utf8")
			
 
				+except TypeError:
			
 
				+    bytesToStr = str
			
 
				+else:
			
 
				+    def bytesToStr(b):
			
 
				+        "convert a bytestring in utf8 to a string."
			
 
				+        return str(b, 'utf8')
			
 
				+
			
 
				+def to_int(s):
			
 
				+    "Parse a big-endian integer from bytestring s."
			
 
				+    result = 0
			
 
				+    for c in s:
			
 
				+        result *= 256
			
 
				+        result += byte_to_int(c)
			
 
				+    return result
			
 
				+
			
 
				+def to_int24(s):
			
 
				+    "Parse a pair of big-endian 24-bit integers from bytestring s."
			
 
				+    a, b, c = struct.unpack("!HHH", s)
			
 
				+    return ((a <<8)+(b>>8)), (((b&0xff)<<16)+c)
			
 
				+
			
 
				+def to_int32(s):
			
 
				+    "Parse a pair of big-endian 32-bit integers from bytestring s."
			
 
				+    a, b = struct.unpack("!LL", s)
			
 
				+    return a, b
			
 
				+
			
 
				+def to_int28(s):
			
 
				+    "Parse a pair of big-endian 28-bit integers from bytestring s."
			
 
				+    a, b = unpack("!LL", s + b'\x00')
			
 
				+    return (((a & 0xf0) << 20) + (a >> 8)), ((a & 0x0f) << 24) + (b >> 8)
			
 
				+
			
 
				+class Tree(object):
			
 
				+    "Holds a node in the tree"
			
 
				+    def __init__(self, left, right):
			
 
				+        self.left = left
			
 
				+        self.right = right
			
 
				+
			
 
				+def resolve_tree(tree, data):
			
 
				+    """Fill in the left_item and right_item fields for all values in the tree
			
 
				+       so that they point to another Tree, or to a Datum, or to None."""
			
 
				+    d = Datum(None, None, None, None)
			
 
				+    def resolve_item(item):
			
 
				+        "Helper: resolve a single index."
			
 
				+        if item < len(tree):
			
 
				+            return tree[item]
			
 
				+        elif item == len(tree):
			
 
				+            return None
			
 
				+        else:
			
 
				+            d.pos = (item - len(tree) - 16)
			
 
				+            p = bisect.bisect_left(data, d)
			
 
				+            assert data[p].pos == d.pos
			
 
				+            return data[p]
			
 
				+
			
 
				+    for t in tree:
			
 
				+        t.left_item = resolve_item(t.left)
			
 
				+        t.right_item = resolve_item(t.right)
			
 
				+
			
 
				+def parse_search_tree(s, record_size):
			
 
				+    """Given a bytestring and a record size in bits, parse the tree.
			
 
				+       Return a list of nodes."""
			
 
				+    record_bytes = (record_size*2) // 8
			
 
				+    nodes = []
			
 
				+    p = 0
			
 
				+    try:
			
 
				+        to_leftright = { 24: to_int24,
			
 
				+                         28: to_int28,
			
 
				+                         32: to_int32 }[ record_size ]
			
 
				+    except KeyError:
			
 
				+        raise NotImplementedError("Unsupported record size in bits: %d" %
			
 
				+                                  record_size)
			
 
				+    while p < len(s):
			
 
				+        left, right = to_leftright(s[p:p+record_bytes])
			
 
				+        p += record_bytes
			
 
				+
			
 
				+        nodes.append( Tree(left, right ) )
			
 
				+
			
 
				+    return nodes
			
 
				+
			
 
				+class Datum(object):
			
 
				+    """Holds a single entry from the Data section"""
			
 
				+    def __init__(self, pos, kind, ln, data):
			
 
				+        self.pos = pos    # Position of this record within data section
			
 
				+        self.kind = kind  # Type of this record. one of TP_*
			
 
				+        self.ln = ln      # Length field, which might be overloaded.
			
 
				+        self.data = data  # Raw bytes data.
			
 
				+        self.children = None # Used for arrays and maps.
			
 
				+
			
 
				+    def __repr__(self):
			
 
				+        return "Datum(%r,%r,%r,%r)" % (self.pos, self.kind, self.ln, self.data)
			
 
				+
			
 
				+    # Comparison functions used for bsearch
			
 
				+    def __lt__(self, other):
			
 
				+        return self.pos < other.pos
			
 
				+
			
 
				+    def __gt__(self, other):
			
 
				+        return self.pos > other.pos
			
 
				+
			
 
				+    def __eq__(self, other):
			
 
				+        return self.pos == other.pos
			
 
				+
			
 
				+    def build_maps(self):
			
 
				+        """If this is a map or array, fill in its 'map' field if it's a map,
			
 
				+           and the 'map' field of all its children."""
			
 
				+
			
 
				+        if not hasattr(self, 'nChildren'):
			
 
				+            return
			
 
				+
			
 
				+        if self.kind == TP_ARRAY:
			
 
				+            del self.nChildren
			
 
				+            for c in self.children:
			
 
				+                c.build_maps()
			
 
				+
			
 
				+        elif self.kind == TP_MAP:
			
 
				+            del self.nChildren
			
 
				+            self.map = {}
			
 
				+            for i in range(0, len(self.children), 2):
			
 
				+                k = self.children[i].deref()
			
 
				+                v = self.children[i+1].deref()
			
 
				+                v.build_maps()
			
 
				+                if k.kind != TP_UTF8:
			
 
				+                    raise ValueError("Bad dictionary key type %d"% k.kind)
			
 
				+                self.map[bytesToStr(k.data)] = v
			
 
				+
			
 
				+    def int_val(self):
			
 
				+        """If this is an integer type, return its value"""
			
 
				+        assert self.kind in (TP_UINT16, TP_UINT32, TP_UINT64,
			
 
				+                             TP_UINT128, TP_SINT32)
			
 
				+        i = to_int(self.data)
			
 
				+        if self.kind == TP_SINT32:
			
 
				+            if i & 0x80000000:
			
 
				+                i = i - 0x100000000
			
 
				+        return i
			
 
				+
			
 
				+    def deref(self):
			
 
				+        """If this value is a pointer, return its pointed-to-value.  Chase
			
 
				+           through multiple layers of pointers if need be.  If this isn't
			
 
				+           a pointer, return it."""
			
 
				+        n = 0
			
 
				+        s = self
			
 
				+        while s.kind == TP_PTR:
			
 
				+            s = s.ptr
			
 
				+            n += 1
			
 
				+            assert n < 100
			
 
				+        return s
			
 
				+
			
 
				+def resolve_pointers(data):
			
 
				+    """Fill in the ptr field of every pointer in data."""
			
 
				+    search = Datum(None, None, None, None)
			
 
				+    for d in data:
			
 
				+        if d.kind == TP_PTR:
			
 
				+            search.pos = d.ln
			
 
				+            p = bisect.bisect_left(data, search)
			
 
				+            assert data[p].pos == d.ln
			
 
				+            d.ptr = data[p]
			
 
				+
			
 
				+TP_PTR = 1
			
 
				+TP_UTF8 = 2
			
 
				+TP_DBL = 3
			
 
				+TP_BYTES = 4
			
 
				+TP_UINT16 = 5
			
 
				+TP_UINT32 = 6
			
 
				+TP_MAP = 7
			
 
				+TP_SINT32 = 8
			
 
				+TP_UINT64 = 9
			
 
				+TP_UINT128 = 10
			
 
				+TP_ARRAY = 11
			
 
				+TP_DCACHE = 12
			
 
				+TP_END = 13
			
 
				+TP_BOOL = 14
			
 
				+TP_FLOAT = 15
			
 
				+
			
 
				+def get_type_and_len(s):
			
 
				+    """Data parsing helper: decode the type value and much-overloaded 'length'
			
 
				+       field for the value starting at s.  Return a 3-tuple of type, length,
			
 
				+       and number of bytes used to encode type-plus-length."""
			
 
				+    c = byte_to_int(s[0])
			
 
				+    tp = c >> 5
			
 
				+    skip = 1
			
 
				+    if tp == 0:
			
 
				+        tp = byte_to_int(s[1])+7
			
 
				+        skip = 2
			
 
				+    ln = c & 31
			
 
				+
			
 
				+    # I'm sure I don't know what they were thinking here...
			
 
				+    if tp == TP_PTR:
			
 
				+        len_len = (ln >> 3) + 1
			
 
				+        if len_len < 4:
			
 
				+            ln &= 7
			
 
				+            ln <<= len_len * 8
			
 
				+        else:
			
 
				+            ln = 0
			
 
				+        ln += to_int(s[skip:skip+len_len])
			
 
				+        ln += (0, 0, 2048, 526336, 0)[len_len]
			
 
				+        skip += len_len
			
 
				+    elif ln >= 29:
			
 
				+        len_len = ln - 28
			
 
				+        ln = to_int(s[skip:skip+len_len])
			
 
				+        ln += (0, 29, 285, 65821)[len_len]
			
 
				+        skip += len_len
			
 
				+
			
 
				+    return tp, ln, skip
			
 
				+
			
 
				+# Set of types for which 'length' doesn't mean length.
			
 
				+IGNORE_LEN_TYPES = set([
			
 
				+    TP_MAP,    # Length is number of key-value pairs that follow.
			
 
				+    TP_ARRAY,  # Length is number of members that follow.
			
 
				+    TP_PTR,    # Length is index to pointed-to data element.
			
 
				+    TP_BOOL,   # Length is 0 or 1.
			
 
				+    TP_DCACHE, # Length isnumber of members that follow
			
 
				+])
			
 
				+
			
 
				+def parse_data_section(s):
			
 
				+    """Given a data section encoded in a bytestring, return a list of
			
 
				+       Datum items."""
			
 
				+
			
 
				+    # Stack of possibly nested containers.  We use the 'nChildren' member of
			
 
				+    # the last one to tell how many moreitems nest directly inside.
			
 
				+    stack = []
			
 
				+
			
 
				+    # List of all items, including nested ones.
			
 
				+    data = []
			
 
				+
			
 
				+    # Byte index within the data section.
			
 
				+    pos = 0
			
 
				+
			
 
				+    while s:
			
 
				+        tp, ln, skip = get_type_and_len(s)
			
 
				+        if tp in IGNORE_LEN_TYPES:
			
 
				+            real_len = 0
			
 
				+        else:
			
 
				+            real_len = ln
			
 
				+
			
 
				+        d = Datum(pos, tp, ln, s[skip:skip+real_len])
			
 
				+        data.append(d)
			
 
				+        pos += skip+real_len
			
 
				+        s = s[skip+real_len:]
			
 
				+
			
 
				+        if stack:
			
 
				+            stack[-1].children.append(d)
			
 
				+            stack[-1].nChildren -= 1
			
 
				+            if stack[-1].nChildren == 0:
			
 
				+                del stack[-1]
			
 
				+
			
 
				+        if d.kind == TP_ARRAY:
			
 
				+            d.nChildren = d.ln
			
 
				+            d.children = []
			
 
				+            stack.append(d)
			
 
				+        elif d.kind == TP_MAP:
			
 
				+            d.nChildren = d.ln * 2
			
 
				+            d.children = []
			
 
				+            stack.append(d)
			
 
				+
			
 
				+    return data
			
 
				+
			
 
				+def parse_mm_file(s):
			
 
				+    """Parse a MaxMind-DB file."""
			
 
				+    try:
			
 
				+        metadata_ptr = s.rindex(METADATA_MARKER)
			
 
				+    except ValueError:
			
 
				+        raise ValueError("No metadata!")
			
 
				+
			
 
				+    metadata = parse_data_section(s[metadata_ptr+len(METADATA_MARKER):])
			
 
				+
			
 
				+    if metadata[0].kind != TP_MAP:
			
 
				+        raise ValueError("Bad map")
			
 
				+
			
 
				+    metadata[0].build_maps()
			
 
				+    mm = metadata[0].map
			
 
				+
			
 
				+    tree_size = (((mm['record_size'].int_val() * 2) // 8 ) *
			
 
				+                 mm['node_count'].int_val())
			
 
				+
			
 
				+    if s[tree_size:tree_size+16] != b'\x00'*16:
			
 
				+        raise ValueError("Missing section separator!")
			
 
				+
			
 
				+    tree = parse_search_tree(s[:tree_size], mm['record_size'].int_val())
			
 
				+
			
 
				+    data = parse_data_section(s[tree_size+16:metadata_ptr])
			
 
				+
			
 
				+    resolve_pointers(data)
			
 
				+    resolve_tree(tree, data)
			
 
				+
			
 
				+    for d in data:
			
 
				+        d.build_maps()
			
 
				+
			
 
				+    return metadata, tree, data
			
 
				+
			
 
				+def format_datum(datum):
			
 
				+    """Given a Datum at a leaf of the tree, return the string that we should
			
 
				+       write as its value.
			
 
				+    """
			
 
				+    try:
			
 
				+        return bytesToStr(datum.map['country'].map['iso_code'].data)
			
 
				+    except KeyError:
			
 
				+        pass
			
 
				+    return None
			
 
				+
			
 
				+IPV4_PREFIX = "0"*96
			
 
				+
			
 
				+def dump_item_ipv4(entries, prefix, val):
			
 
				+    """Dump the information for an IPv4 address to entries, where 'prefix'
			
 
				+       is a string holding a binary prefix for the address, and 'val' is the
			
 
				+       value to dump.  If the prefix is not an IPv4 address (it does not start
			
 
				+       with 96 bits of 0), then print nothing.
			
 
				+    """
			
 
				+    if not prefix.startswith(IPV4_PREFIX):
			
 
				+        return
			
 
				+    prefix = prefix[96:]
			
 
				+    v = int(prefix, 2)
			
 
				+    shift = 32 - len(prefix)
			
 
				+    lo = v << shift
			
 
				+    hi = ((v+1) << shift) - 1
			
 
				+    entries.append((lo, hi, val))
			
 
				+
			
 
				+def fmt_item_ipv4(entry):
			
 
				+    """Format an IPv4 range with lo and hi addresses in decimal form."""
			
 
				+    return "%d,%d,%s\n"%(entry[0], entry[1], entry[2])
			
 
				+
			
 
				+def fmt_ipv6_addr(v):
			
 
				+    """Given a 128-bit integer representing an ipv6 address, return a
			
 
				+       string for that ipv6 address."""
			
 
				+    return socket.inet_ntop(socket.AF_INET6, binascii.unhexlify("%032x"%v))
			
 
				+
			
 
				+def fmt_item_ipv6(entry):
			
 
				+    """Format an IPv6 range with lo and hi addresses in hex form."""
			
 
				+    return "%s,%s,%s\n"%(fmt_ipv6_addr(entry[0]),
			
 
				+                         fmt_ipv6_addr(entry[1]),
			
 
				+                         entry[2])
			
 
				+
			
 
				+IPV4_MAPPED_IPV6_PREFIX = "0"*80 + "1"*16
			
 
				+IPV6_6TO4_PREFIX = "0010000000000010"
			
 
				+TEREDO_IPV6_PREFIX = "0010000000000001" + "0"*16
			
 
				+
			
 
				+def dump_item_ipv6(entries, prefix, val):
			
 
				+    """Dump the information for an IPv6 address prefix to entries, where
			
 
				+       'prefix' is a string holding a binary prefix for the address,
			
 
				+       and 'val' is the value to dump.  If the prefix is an IPv4 address
			
 
				+       (starts with 96 bits of 0), is an IPv4-mapped IPv6 address
			
 
				+       (::ffff:0:0/96), or is in the 6to4 mapping subnet (2002::/16), then
			
 
				+       print nothing.
			
 
				+    """
			
 
				+    if prefix.startswith(IPV4_PREFIX) or \
			
 
				+       prefix.startswith(IPV4_MAPPED_IPV6_PREFIX) or \
			
 
				+       prefix.startswith(IPV6_6TO4_PREFIX) or \
			
 
				+       prefix.startswith(TEREDO_IPV6_PREFIX):
			
 
				+        return
			
 
				+    v = int(prefix, 2)
			
 
				+    shift = 128 - len(prefix)
			
 
				+    lo = v << shift
			
 
				+    hi = ((v+1) << shift) - 1
			
 
				+    entries.append((lo, hi, val))
			
 
				+
			
 
				+def dump_tree(entries, node, dump_item, prefix=""):
			
 
				+    """Walk the tree rooted at 'node', and call dump_item on the
			
 
				+       format_datum output of every leaf of the tree."""
			
 
				+
			
 
				+    if isinstance(node, Tree):
			
 
				+        dump_tree(entries, node.left_item, dump_item, prefix+"0")
			
 
				+        dump_tree(entries, node.right_item, dump_item, prefix+"1")
			
 
				+    elif isinstance(node, Datum):
			
 
				+        assert node.kind == TP_MAP
			
 
				+        code = format_datum(node)
			
 
				+        if code:
			
 
				+            dump_item(entries, prefix, code)
			
 
				+    else:
			
 
				+        assert node == None
			
 
				+
			
 
				+def write_geoip_file(filename, metadata, the_tree, dump_item, fmt_item):
			
 
				+    """Write the entries in the_tree to filename."""
			
 
				+    entries = []
			
 
				+    dump_tree(entries, the_tree[0], dump_item)
			
 
				+    fobj = open(filename, 'w')
			
 
				+
			
 
				+    build_epoch = metadata[0].map['build_epoch'].int_val()
			
 
				+    fobj.write("# Last updated based on %s Maxmind GeoLite2 Country\n"%
			
 
				+               time.strftime('%B %-d %Y', time.gmtime(build_epoch)))
			
 
				+
			
 
				+    unwritten = None
			
 
				+    for entry in entries:
			
 
				+        if not unwritten:
			
 
				+            unwritten = entry
			
 
				+        elif unwritten[1] + 1 == entry[0] and unwritten[2] == entry[2]:
			
 
				+            unwritten = (unwritten[0], entry[1], unwritten[2])
			
 
				+        else:
			
 
				+            fobj.write(fmt_item(unwritten))
			
 
				+            unwritten = entry
			
 
				+    if unwritten:
			
 
				+        fobj.write(fmt_item(unwritten))
			
 
				+    fobj.close()
			
 
				+
			
 
				+content = open(sys.argv[1], 'rb').read()
			
 
				+metadata, the_tree, _ = parse_mm_file(content)
			
 
				+
			
 
				+write_geoip_file('geoip', metadata, the_tree, dump_item_ipv4, fmt_item_ipv4)
			
 
				+write_geoip_file('geoip6', metadata, the_tree, dump_item_ipv6, fmt_item_ipv6)