diff --git a/centinel/client.py b/centinel/client.py index 7c9f538..fccbc71 100644 --- a/centinel/client.py +++ b/centinel/client.py @@ -223,7 +223,7 @@ def run(self, data_dir=None): logging.info("Finished running experiments. " "Look in %s for results." % (self.config['dirs']['results_dir'])) - def run_exp(self, name, exp_config=None, schedule_name=None): + def run_exp(self, name, exp_config=None, schedule_name=None, throw=False): if name[-3:] == ".py": name = name[:-3] if name not in self.experiments: @@ -336,14 +336,20 @@ def run_exp(self, name, exp_config=None, schedule_name=None): except Exception as exp: logging.exception("Failed to run tcpdump: %s" % (exp,)) - try: - # run the experiment - exp.run() - except Exception as exception: - logging.exception("Error running %s: %s" % (name, exception)) - results["runtime_exception"] = str(exception) - except KeyboardInterrupt: - logging.warn("Keyboard interrupt received, stopping experiment...") + if throw: + try: + exp.run() + except KeyboardInterrupt: + logging.warn("Keyboard interrupt received, stopping experiment...") + else: + try: + # run the experiment + exp.run() + except Exception as exception: + logging.exception("Error running %s: %s" % (name, exception)) + results["runtime_exception"] = str(exception) + except KeyboardInterrupt: + logging.warn("Keyboard interrupt received, stopping experiment...") # save any external results that the experiment has generated @@ -373,6 +379,7 @@ def run_exp(self, name, exp_config=None, schedule_name=None): "%s" % exp) logging.debug("Finished writing external files for %s" % name) + pcap_file_path = None if tcpdump_started: logging.info("Waiting for tcpdump to process packets...") # 5 seconds should be enough. this hasn't been tested on @@ -465,15 +472,16 @@ def run_exp(self, name, exp_config=None, schedule_name=None): logging.info("%s took %s to finish." % (name, time_taken)) logging.debug("Saving %s results to file" % name) + result_file_path = None try: # pretty printing results will increase file size, but files are # compressed before sending. result_file_path = self\ .get_result_file(name, start_time.strftime("%Y-%m-%dT%H%M%S.%f")) result_file = bz2.BZ2File(result_file_path, "w") - json.dump(results, result_file, indent=2, separators=(',', ': '), - # ignore encoding errors, these will be dealt with on the server - ensure_ascii=False) + # ignore encoding errors, these will be dealt with on the server + # Use dumps instead of dump because it handles unicode objects better + result_file.write(json.dumps(results, ensure_ascii=False).encode('utf-8')) result_file.close() # free up memory by deleting results from memory @@ -483,6 +491,7 @@ def run_exp(self, name, exp_config=None, schedule_name=None): logging.exception("Error saving results for " "%s to file: %s" % (name, exception)) logging.debug("Done saving %s results to file" % name) + return (result_file_path, pcap_file_path) def consolidate_results(self): # bundle and compress result files diff --git a/centinel/command.py b/centinel/command.py index afa63df..e59d780 100644 --- a/centinel/command.py +++ b/centinel/command.py @@ -42,6 +42,7 @@ def __init__(self, command, output_callback, timeout=10): self.exception = None self.error = False self.notifications = "" + self.kill_switch = lambda: None self.thread = threading.Thread(target=self._invoke_cmd) self.thread.setDaemon(1) diff --git a/centinel/experiments/baseline_with_browser.py b/centinel/experiments/baseline_with_browser.py new file mode 100644 index 0000000..9d288d4 --- /dev/null +++ b/centinel/experiments/baseline_with_browser.py @@ -0,0 +1,323 @@ +# +# Abbas Razaghpanah (arazaghpanah@cs.stonybrook.edu) +# February 2015, Stony Brook University +# +# baseline.py: baseline experiment that runs through +# lists of URLs and does HTTP + DNS + traceroute for +# every URL in the list. This is done concurrently +# for each test. +# +# Input files can be either simple URL lists or CSV +# files. In case of CSV input, the first column is +# assumed to be the URL and the rest of the columns +# are included in the results as metadata. + + +import csv +import logging +import os +import time +import urlparse +from random import shuffle + +import centinel.primitives.headless_chrome as http +import centinel.primitives.traceroute as traceroute +from centinel.experiment import Experiment +from centinel.primitives import dnslib + +try: + from centinel.primitives import tcp_connect +except ImportError: + # we should disable this if the primitive doesn't exist + tcp_connect = None + +from centinel.primitives import tls + + +class BaselineExperiment(Experiment): + name = "baseline_with_browser" + # country-specific, world baseline + # this can be overridden by the main thread + input_files = ['country.csv', 'world.csv'] + tls_for_all = True + + def __init__(self, input_files): + self.input_files = input_files + self.results = [] + self.exclude_nameservers = [] + self.traceroute_methods = [] + + if self.params is not None: + # process parameters + if "traceroute_methods" in self.params: + self.traceroute_methods = self.params['traceroute_methods'] + if "exclude_nameservers" in self.params: + self.exclude_nameservers = self.params['exclude_nameservers'] + if "tls_for_all" in self.params: + self.tls_for_all = self.params['tls_for_all'] + + if os.geteuid() != 0: + logging.info("Centinel is not running as root, " + "traceroute will be limited to UDP.") + # only change to udp if method list was not empty before + if self.traceroute_methods: + self.traceroute_methods = ["udp"] + + def run(self): + for input_file in self.input_files.items(): + logging.info("Testing input file %s..." % (input_file[0])) + # Initialize the results for this input file. + # This can be anything from file name to version + # to any useful information. + result = {"file_name": input_file[0]} + + try: + self.run_file(input_file, result) + except KeyboardInterrupt: + logging.warn("Experiment interrupted, storing partial results...") + + self.results.append(result) + + def run_file(self, input_file, result): + file_name, file_contents = input_file + + run_start_time = time.time() + + tcp_connect_inputs = [] + http_inputs = [] + tls_inputs = [] + dns_inputs = [] + traceroute_inputs = [] + url_metadata_results = {} + file_metadata = {} + file_comments = [] + index_row = None + + # first parse the input and create data structures + csvreader = csv.reader(file_contents, delimiter=',', quotechar='"') + for row in csvreader: + """ + First few lines are expected to be comments in key: value + format. The first line after that could be our column header + row, starting with "url", and the rest are data rows. + This is a sample input file we're trying to parse: + + # comment: Global List,,,,, + # date: 03-17-2015,,,,, + # version: 1,,,,, + # description: This is the global list. Last updated in 2012.,,,, + url,country,category,description,rationale,provider + http://8thstreetlatinas.com,glo,PORN,,,PRIV + http://abpr2.railfan.net,glo,MISC,Pictures of trains,,PRIV + + """ + + # parse file comments, if it looks like "key : value", + # parse it as a key-value pair. otherwise, just + # store it as a raw comment. + if row[0][0] == '#': + row = row[0][1:].strip() + if len(row.split(':')) > 1: + key, value = row.split(':', 1) + key = key.strip() + value = value.strip() + file_metadata[key] = value + else: + file_comments.append(row) + continue + + # detect the header row and store it + # it is usually the first row and starts with "url," + if row[0].strip().lower() == "url": + index_row = row + continue + + url = row[0].strip().encode('utf-8') + if url is None: + continue + + meta = row[1:] + http_ssl = False + ssl_port = 443 + port = 80 + + # parse the URL to extract netlocation, HTTP path, domain name, + # and HTTP method (SSL or plain) + try: + urlparse_object = urlparse.urlparse(url) + http_netloc = urlparse_object.netloc + + # if netloc is not urlparse-able, add // to the start + # of URL + if http_netloc == '': + urlparse_object = urlparse.urlparse('//%s' % url) + http_netloc = urlparse_object.netloc + + domain_name = http_netloc.split(':')[0] + + http_path = urlparse_object.path + if http_path == '': + http_path = '/' + + # we assume scheme is either empty, or "http", or "https" + # other schemes (e.g. "ftp") are out of the scope of this + # measuremnt + if urlparse_object.scheme == "https": + http_ssl = True + if len(http_netloc.split(':')) == 2: + ssl_port = http_netloc.split(':')[1] + + if len(http_netloc.split(':')) == 2: + port = int(http_netloc.split(':')[1]) + + except Exception as exp: + logging.exception("%s: failed to parse URL: %s" % (url, exp)) + http_netloc = url + http_ssl = False + port = 80 + ssl_port = 443 + http_path = '/' + domain_name = url + + # TCP connect + if http_ssl: + if (domain_name, ssl_port) not in tcp_connect_inputs: + tcp_connect_inputs.append((domain_name, ssl_port)) + else: + if (domain_name, port) not in tcp_connect_inputs: + tcp_connect_inputs.append((domain_name, port)) + + + # HTTP GET + http_inputs.append({"host": http_netloc, + "path": http_path, + "ssl": http_ssl, + "url": url}) + + # TLS certificate + # this will only work if the URL starts with https://, or + # if tls_for_all config parameter is set + if self.tls_for_all or http_ssl: + key = "%s:%s" % (domain_name, ssl_port) + if key not in tls_inputs: + tls_inputs.append(key) + + # DNS Lookup + if domain_name not in dns_inputs: + dns_inputs.append(domain_name) + + # Traceroute + if domain_name not in traceroute_inputs: + traceroute_inputs.append(domain_name) + + # Meta-data + url_metadata_results[url] = meta + + # the actual tests are run concurrently here + + if tcp_connect is not None: + shuffle(tcp_connect_inputs) + start = time.time() + logging.info("Running TCP connect tests...") + result["tcp_connect"] = {} + tcp_connect.tcp_connect_batch(tcp_connect_inputs, results=result["tcp_connect"]) + elapsed = time.time() - start + logging.info("Running TCP requests took " + "%d seconds for %d hosts and ports." % (elapsed, + len(tcp_connect_inputs))) + + shuffle(http_inputs) + start = time.time() + logging.info("Running HTTP GET requests...") + result["http"] = {} + + try: + http.get_requests_batch(http_inputs, results=result["http"]) + # backward-compatibility with verions that don't support this + except TypeError: + result["http"] = http.get_requests_batch(http_inputs) + + elapsed = time.time() - start + logging.info("HTTP GET requests took " + "%d seconds for %d URLs." % (elapsed, + len(http_inputs))) + shuffle(tls_inputs) + start = time.time() + logging.info("Running TLS certificate requests...") + result["tls"] = {} + + try: + tls.get_fingerprint_batch(tls_inputs, results=result["tls"]) + # backward-compatibility with verions that don't support this + except TypeError: + result["tls"] = tls.get_fingerprint_batch(tls_inputs) + + elapsed = time.time() - start + logging.info("TLS certificate requests took " + "%d seconds for %d domains." % (elapsed, + len(tls_inputs))) + shuffle(dns_inputs) + start = time.time() + logging.info("Running DNS requests...") + result["dns"] = {} + if len(self.exclude_nameservers) > 0: + logging.info("Excluding nameservers: %s" % ", ".join(self.exclude_nameservers)) + + try: + dnslib.lookup_domains(dns_inputs, results=result["dns"], + exclude_nameservers=self.exclude_nameservers) + # backward-compatibility with verions that don't support this + except TypeError: + result["dns"] = dnslib.lookup_domains(dns_inputs, + exclude_nameservers=self.exclude_nameservers) + else: + try: + dnslib.lookup_domains(dns_inputs, results=result["dns"]) + # backward-compatibility with verions that don't support this + except TypeError: + result["dns"] = dnslib.lookup_domains(dns_inputs) + + elapsed = time.time() - start + logging.info("DNS requests took " + "%d seconds for %d domains." % (elapsed, + len(dns_inputs))) + + for method in self.traceroute_methods: + shuffle(traceroute_inputs) + start = time.time() + logging.info("Running %s traceroutes..." % (method.upper())) + result["traceroute.%s" % method] = {} + + try: + traceroute.traceroute_batch(traceroute_inputs, results=result["traceroute.%s" % method], method=method) + # backward-compatibility with verions that don't support this + except TypeError: + result["traceroute.%s" % method] = traceroute.traceroute_batch(traceroute_inputs, method) + + elapsed = time.time() - start + logging.info("Traceroutes took %d seconds for %d " + "domains." % (elapsed, len(traceroute_inputs))) + + # if we have an index row, we should turn URL metadata + # into dictionaries + if index_row is not None: + indexed_url_metadata = {} + for url, meta in url_metadata_results.items(): + indexed_meta = {} + try: + for i in range(1, len(index_row)): + indexed_meta[index_row[i]] = meta[i - 1] + indexed_url_metadata[url] = indexed_meta + except: + indexed_url_metadata[url] = indexed_meta + continue + url_metadata_results = indexed_url_metadata + + result["url_metadata"] = url_metadata_results + result["file_metadata"] = file_metadata + result["file_comments"] = file_comments + + run_finish_time = time.time() + elapsed = run_finish_time - run_start_time + result["total_time"] = elapsed + logging.info("Testing took a total of %d seconds." % elapsed) diff --git a/centinel/primitives/headless_chrome.py b/centinel/primitives/headless_chrome.py new file mode 100644 index 0000000..b3856db --- /dev/null +++ b/centinel/primitives/headless_chrome.py @@ -0,0 +1,19 @@ +import json, logging, base64 + +from headlesschrome import Client + +def get_requests_batch(input_list, results={}): + for row in input_list: + results[row['url']] = get_request(row) + return results + +def get_request(http_input): + url = http_input['url'] + logging.debug("Sending HTTP GET request for %s.".format(url)) + client = Client() + capture = client.capture(url) + with open(capture['har'], 'r') as f: + har = json.load(f, encoding='utf-8') + with open(capture['screenshot'], 'rb') as f: + screenshot = 'data:image/png;base64,' + base64.b64encode(f.read()) + return { 'har': har, 'screenshot': screenshot } diff --git a/centinel/primitives/http.py b/centinel/primitives/http.py index 8fa8ba1..442bf33 100644 --- a/centinel/primitives/http.py +++ b/centinel/primitives/http.py @@ -5,6 +5,8 @@ import random import BeautifulSoup import re +import datetime +import pytz from urlparse import urlparse from http_helper import ICHTTPConnection @@ -13,6 +15,8 @@ REDIRECT_LOOP_THRESHOLD = 5 MAX_THREAD_START_RETRY = 10 THREAD_START_DELAY = 3 +CONNECTION_TIMEOUT = 30 +REQUEST_TIMEOUT = 60 def meta_redirect(content): """ @@ -74,10 +78,10 @@ def _get_http_request(netloc, path="/", headers=None, ssl=False): response = {} + request['startedDateTime'] = datetime.datetime.now(pytz.utc).isoformat() try: - conn = ICHTTPConnection(host=host, port=port, timeout=10) - - conn.request(path, headers, ssl, timeout=10) + conn = ICHTTPConnection(host=host, port=port, timeout=CONNECTION_TIMEOUT) + conn.request(path, headers, ssl, timeout=REQUEST_TIMEOUT) response["status"] = conn.status response["reason"] = conn.reason response["headers"] = conn.headers @@ -93,7 +97,8 @@ def _get_http_request(netloc, path="/", headers=None, ssl=False): response["failure"] = str(err) result = {"response": response, - "request": request} + "request": request, + "timings": conn.timings} return result @@ -113,6 +118,7 @@ def get_request(netloc, path="/", headers=None, ssl=False, first_response_information = {"redirect_count": 0, "redirect_loop": False, "full_url": url, + "timings": first_response['timings'], "response": first_response["response"], "request": first_response["request"]} http_results = first_response_information @@ -154,6 +160,7 @@ def get_request(netloc, path="/", headers=None, ssl=False, if is_redirecting: http_results["redirects"] = {} first_response_information = {"full_url": url, + "timings": first_response['timings'], "response": first_response["response"], "request": first_response["request"]} http_results["redirects"][0] = first_response_information @@ -220,6 +227,7 @@ def get_request(netloc, path="/", headers=None, ssl=False, http_results["full_url"] = redirect_url redirect_information = {"full_url": redirect_url, + "timings": first_response['timings'], "response": redirect_http_result["response"], "request": redirect_http_result["request"]} http_results["redirects"][redirect_number] = redirect_information @@ -230,6 +238,7 @@ def get_request(netloc, path="/", headers=None, ssl=False, first_response_information = {"redirect_count": 0, "redirect_loop": False, "full_url": url, + "timings": first_response['timings'], "response": first_response["response"], "request": first_response["request"]} http_results = first_response_information diff --git a/centinel/primitives/http_helper.py b/centinel/primitives/http_helper.py index e31e790..4add191 100644 --- a/centinel/primitives/http_helper.py +++ b/centinel/primitives/http_helper.py @@ -13,6 +13,14 @@ def __init__(self, host='127.0.0.1', port=None, timeout=10): self.host = host self.port = port self.timeout = timeout + # Follows the HAR timing spec + self.timings = { + "send": 0, + "wait": 0, + "receive": 0, + "ssl": -1 + } + def header_function(self, header_line): # HTTP standard specifies that headers are encoded in iso-8859-1. @@ -81,6 +89,28 @@ def request(self, path="/", header=None, ssl=False, timeout=None): self.status = c.getinfo(pycurl.RESPONSE_CODE) + # It's not exactly clear from the docs + # (https://curl.haxx.se/libcurl/c/curl_easy_getinfo.html#TIMES) how to + # map those times to HAR timings, but here's my best guess. + ROUND_TO = 3 + self.timings['dns'] = round(c.getinfo(pycurl.NAMELOOKUP_TIME) * 1000, ROUND_TO) + self.timings['wait'] = round((c.getinfo(pycurl.STARTTRANSFER_TIME) - + c.getinfo(pycurl.PRETRANSFER_TIME)) * 1000, ROUND_TO) + self.timings['receive'] = round((c.getinfo(pycurl.TOTAL_TIME) - + c.getinfo(pycurl.STARTTRANSFER_TIME)) * 1000, ROUND_TO) + if ssl: + self.timings['ssl'] = round((c.getinfo(pycurl.APPCONNECT_TIME) - + c.getinfo(pycurl.CONNECT_TIME)) * 1000, ROUND_TO) + self.timings['connect'] = round((c.getinfo(pycurl.APPCONNECT_TIME) - + c.getinfo(pycurl.NAMELOOKUP_TIME)) * 1000, ROUND_TO) + self.timings['send'] = round((c.getinfo(pycurl.PRETRANSFER_TIME) - + c.getinfo(pycurl.APPCONNECT_TIME)) * 1000, ROUND_TO) + else: + self.timings['connect'] = round((c.getinfo(pycurl.CONNECT_TIME) - + c.getinfo(pycurl.NAMELOOKUP_TIME)) * 1000, ROUND_TO) + self.timings['send'] = round((c.getinfo(pycurl.PRETRANSFER_TIME) - + c.getinfo(pycurl.CONNECT_TIME)) * 1000, ROUND_TO) + c.close() encoding = None diff --git a/centinel/vpn/cli.py b/centinel/vpn/cli.py index 80a1f3c..df11c74 100755 --- a/centinel/vpn/cli.py +++ b/centinel/vpn/cli.py @@ -12,6 +12,7 @@ import signal import dns.resolver import json +from contextlib import contextmanager import centinel.backend import centinel.client @@ -21,11 +22,11 @@ import centinel.vpn.ipvanish as ipvanish import centinel.vpn.purevpn as purevpn import centinel.vpn.vpngate as vpngate +import centinel.vpn.nordvpn as nordvpn PID_FILE = "/tmp/centinel.lock" - -def parse_args(): +def arg_parser(): parser = argparse.ArgumentParser() parser.add_argument('--auth-file', '-u', dest='auth_file', default=None, help=("File with HMA username on first line, \n" @@ -49,6 +50,9 @@ def parse_args(): g1.add_argument('--create-ipvanish-configs', dest='create_IPVANISH', action='store_true', help='Create the openvpn config files for IPVanish') + g1.add_argument('--create-nordvpn-configs', dest='create_NORDVPN', + action='store_true', + help='Create the openvpn config files for NordVPN') g1.add_argument('--create-purevpn-configs', dest='create_PUREVPN', action='store_true', help='Create the openvpn config files for PureVPN') @@ -82,66 +86,15 @@ def parse_args(): parser.add_argument('--vm-index', dest='vm_index', type=int, default=1, help='The index of current VM, must be >= 1 and ' '<= vm_num') - return parser.parse_args() - - -def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, - exclude_list, shuffle_lists, vm_num, vm_index, reduce_vp): - """ - For each VPN, check if there are experiments and scan with it if - necessary - - Note: the expected directory structure is - args.directory - -----vpns (contains the OpenVPN config files - -----configs (contains the Centinel config files) - -----exps (contains the experiments directories) + return parser - :param directory: root directory that contains vpn configs and - centinel client configs - :param auth_file: a text file with username at first line and - password at second line - :param crt_file: optional root certificate file - :param tls_auth: additional key - :param key_direction: must specify if tls_auth is used - :param exclude_list: optional list of exluded countries - :param shuffle_lists: shuffle vpn list if set true - :param vm_num: number of VMs that are running currently - :param vm_index: index of current VM - :param reduce_vp: reduce number of vantage points - :return: - """ - - logging.info("Starting to run the experiments for each VPN") - logging.warn("Excluding vantage points from: %s" % exclude_list) +def parse_args(): + return arg_parser().parse_args() - # iterate over each VPN - vpn_dir = return_abs_path(directory, "vpns") +def get_vpn_config_files(directory, vm_num, vm_index, shuffle_lists, reduce_vp): conf_dir = return_abs_path(directory, "configs") - home_dir = return_abs_path(directory, "home") - if auth_file is not None: - auth_file = return_abs_path(directory, auth_file) - if crt_file is not None: - crt_file = return_abs_path(directory, crt_file) - if tls_auth is not None: - tls_auth = return_abs_path(directory, tls_auth) conf_list = sorted(os.listdir(conf_dir)) - # determine VPN provider - vpn_provider = None - if "hma" in directory: - vpn_provider = "hma" - elif "ipvanish" in directory: - vpn_provider = "ipvanish" - elif "purevpn" in directory: - vpn_provider = "purevpn" - elif "vpngate" in directory: - vpn_provider = "vpngate" - if vpn_provider: - logging.info("Detected VPN provider is %s" % vpn_provider) - else: - logging.warning("Cannot determine VPN provider!") - # reduce size of list if reduce_vp is true if reduce_vp: logging.info("Reducing list size. Original size: %d" % len(conf_list)) @@ -186,6 +139,85 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, if shuffle_lists: shuffle(conf_list) + return conf_list + +@contextmanager +def vpn_connection(timeout=60, **kwargs): + vpn = openvpn.OpenVPN(timeout=timeout, **kwargs) + try: + vpn.start() + yield vpn + finally: + vpn.stop() + +def vpn_config_file_to_ip(filename): + return os.path.splitext(filename)[0] + +def determine_provider(directory): + vpn_provider = None + if "hma" in directory: + vpn_provider = "hma" + elif "ipvanish" in directory: + vpn_provider = "ipvanish" + elif "purevpn" in directory: + vpn_provider = "purevpn" + elif "vpngate" in directory: + vpn_provider = "vpngate" + elif "nordvpn" in directory: + vpn_provider = "nordvpn" + if vpn_provider: + logging.info("Detected VPN provider is %s" % vpn_provider) + else: + logging.warning("Cannot determine VPN provider!") + return vpn_provider + +def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, + exclude_list, shuffle_lists, vm_num, vm_index, reduce_vp): + """ + For each VPN, check if there are experiments and scan with it if + necessary + + Note: the expected directory structure is + args.directory + -----vpns (contains the OpenVPN config files + -----configs (contains the Centinel config files) + -----exps (contains the experiments directories) + + :param directory: root directory that contains vpn configs and + centinel client configs + :param auth_file: a text file with username at first line and + password at second line + :param crt_file: optional root certificate file + :param tls_auth: additional key + :param key_direction: must specify if tls_auth is used + :param exclude_list: optional list of exluded countries + :param shuffle_lists: shuffle vpn list if set true + :param vm_num: number of VMs that are running currently + :param vm_index: index of current VM + :param reduce_vp: reduce number of vantage points + :return: + """ + + logging.info("Starting to run the experiments for each VPN") + logging.warn("Excluding vantage points from: %s" % exclude_list) + + # iterate over each VPN + vpn_dir = return_abs_path(directory, "vpns") + conf_dir = return_abs_path(directory, "configs") + home_dir = return_abs_path(directory, "home") + if auth_file is not None: + auth_file = return_abs_path(directory, auth_file) + if crt_file is not None: + crt_file = return_abs_path(directory, crt_file) + if tls_auth is not None: + tls_auth = return_abs_path(directory, tls_auth) + conf_list = sorted(os.listdir(conf_dir)) + + vpn_provider = determine_provider(directory) + + conf_list = get_vpn_config_files(directory, vm_num, + vm_index, shuffle_lists, reduce_vp) + number = 1 total = len(conf_list) @@ -240,7 +272,7 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, # [ip-address].ovpn, we can extract IP address from filename # and use it to geolocate and fetch experiments before connecting # to VPN. - vpn_address, extension = os.path.splitext(filename) + vpn_address = vpn_config_file_to_ip(filename) country = None try: meta = centinel.backend.get_meta(config.params, @@ -296,28 +328,26 @@ def scan_vpns(directory, auth_file, crt_file, tls_auth, key_direction, logging.info("%s: Starting VPN." % filename) - vpn = openvpn.OpenVPN(timeout=60, auth_file=auth_file, config_file=vpn_config, - crt_file=crt_file, tls_auth=tls_auth, key_direction=key_direction) + with vpn_connection(auth_file=auth_file, config_file=vpn_config, + crt_file=crt_file, tls_auth=tls_auth, key_direction=key_direction) as vpn: + if not vpn.started: + logging.error("%s: Failed to start VPN!" % filename) + vpn.stop() + time.sleep(5) + continue - vpn.start() - if not vpn.started: - logging.error("%s: Failed to start VPN!" % filename) - vpn.stop() - time.sleep(5) - continue + logging.info("%s: Running Centinel." % filename) + try: + client = centinel.client.Client(config.params, vpn_provider) + centinel.conf = config.params + # do not use client logging config + # client.setup_logging() + client.run() + except Exception as exp: + logging.exception("%s: Error running Centinel: %s" % (filename, exp)) - logging.info("%s: Running Centinel." % filename) - try: - client = centinel.client.Client(config.params, vpn_provider) - centinel.conf = config.params - # do not use client logging config - # client.setup_logging() - client.run() - except Exception as exp: - logging.exception("%s: Error running Centinel: %s" % (filename, exp)) + logging.info("%s: Stopping VPN." % filename) - logging.info("%s: Stopping VPN." % filename) - vpn.stop() time.sleep(5) logging.info("%s: Synchronizing." % filename) @@ -511,6 +541,9 @@ def _run(): elif args.create_IPVANISH: ipvanish_dir = return_abs_path(args.create_conf_dir, 'vpns') ipvanish.create_config_files(ipvanish_dir) + elif args.create_NORDVPN: + nordvpn_dir = return_abs_path(args.create_conf_dir, 'vpns') + nordvpn.create_config_files(nordvpn_dir) elif args.create_PUREVPN: purevpn_dir = return_abs_path(args.create_conf_dir, 'vpns') purevpn.create_config_files(purevpn_dir) diff --git a/centinel/vpn/nordvpn.py b/centinel/vpn/nordvpn.py new file mode 100644 index 0000000..24d48c8 --- /dev/null +++ b/centinel/vpn/nordvpn.py @@ -0,0 +1,78 @@ +import httplib2 +import logging +import os +import shutil +import socket +import sys +import urllib +import zipfile + + +def unzip(source_filename, dest_dir): + with zipfile.ZipFile(source_filename) as zf: + zf.extractall(dest_dir) + + +def create_config_files(directory): + """ + Initialize directory ready for vpn walker + :param directory: the path where you want this to happen + :return: + """ + # Some constant strings + config_zip_url = "https://downloads.nordcdn.com/configs/archives/servers/ovpn.zip" + + if not os.path.exists(directory): + os.makedirs(directory) + + logging.info("Starting to download NordVPN config file zip") + url_opener = urllib.URLopener() + zip_path = os.path.join(directory, '../configs.zip') + unzip_path = os.path.join(directory, '../unzipped') + if not os.path.exists(unzip_path): + os.makedirs(unzip_path) + + url_opener.retrieve(config_zip_url, zip_path) + logging.info("Extracting zip file") + unzip(zip_path, unzip_path) + + # remove zip file + os.remove(zip_path) + + # move all config files to /vpns + server_country = {} + configs_path = os.path.join(unzip_path, 'ovpn_tcp') + for filename in os.listdir(configs_path): + if filename.endswith('.ovpn'): + country = filename[0:2] + + file_path = os.path.join(configs_path, filename) + lines = [line.rstrip('\n') for line in open(file_path)] + + # get ip address for this vpn + ip = "" + for line in lines: + if line.startswith('remote'): + ip = line.split(' ')[1] + break + + if len(ip) > 0: + new_path = os.path.join(directory, ip + '.ovpn') + shutil.copyfile(file_path, new_path) + server_country[ip] = country + else: + logging.warn("Unable to resolve hostname and remove %s" % filename) + os.remove(file_path) + + with open(os.path.join(directory, 'servers.txt'), 'w') as f: + for ip in server_country: + f.write('|'.join([ip, server_country[ip]]) + '\n') + + # remove extracted folder + shutil.rmtree(unzip_path) + +if __name__ == "__main__": + if len(sys.argv) != 2: + print "Usage {0} ".format(sys.argv[0]) + sys.exit(1) + create_config_files(sys.argv[1]) diff --git a/centinel/vpn/openvpn.py b/centinel/vpn/openvpn.py index 3627b0c..0c9de22 100644 --- a/centinel/vpn/openvpn.py +++ b/centinel/vpn/openvpn.py @@ -8,6 +8,13 @@ import threading import time +class VPNConnectionError(Exception): + def __init__(self, value, log): + self.value = value + self.log = log + + def __str__(self): + return repr(self.value) class OpenVPN: connected_instances = [] @@ -85,9 +92,11 @@ def start(self, timeout=None): # append instance to connected list OpenVPN.connected_instances.append(self) else: - logging.warn("OpenVPN not started") - for line in self.notifications.split('\n'): + logging.warn('OpenVPN not started') + log_lines = self.notifications.split('\n') + for line in log_lines: logging.warn("OpenVPN output:\t\t%s" % line) + raise VPNConnectionError("OpenVPN not started", log_lines) def stop(self, timeout=None): """ @@ -97,7 +106,15 @@ def stop(self, timeout=None): """ if not timeout: timeout = self.timeout - os.killpg(os.getpgid(self.process.pid), signal.SIGTERM) + + process_group_id = os.getpgid(self.process.pid) + try: + os.killpg(process_group_id, signal.SIGTERM) + except OSError: + # Because sometimes we have to sudo to send the signal + cmd = ['sudo', 'kill', '-' + str(process_group_id)] + process = subprocess.call(cmd) + self.thread.join(timeout) if self.stopped: logging.info("OpenVPN stopped") diff --git a/setup.py b/setup.py index 82f749c..00a0cba 100644 --- a/setup.py +++ b/setup.py @@ -21,12 +21,15 @@ "requests >= 2.9.1", "trparse >= 0.2.1", "pycurl >= 7.19.5", + "pytz >= 2017.3", "urllib3 >= 1.9.1", "dnspython >= 1.12.0", "BeautifulSoup >= 3.2.1", "httplib2 >= 0.9.2", "bs4 >= 0.0.1", - "geopy >= 1.11.0"], + "numpy", + "geopy >= 1.11.0", + "headlesschrome >= 0.2.0"], include_package_data=True, entry_points={ 'console_scripts': ['centinel=centinel.cli:run',