From c98494fd4b7cd2238661856b8a056419eb988d5c Mon Sep 17 00:00:00 2001 From: Lynn Bendixsen Date: Tue, 26 Nov 2019 10:45:10 -0700 Subject: [PATCH 1/2] Added significant speed increase for smaller runs (select 25/30) Signed-off-by: Lynn Bendixsen --- text/5001-node-selection-algorithm/select.py | 86 ++++++++++++++------ 1 file changed, 63 insertions(+), 23 deletions(-) diff --git a/text/5001-node-selection-algorithm/select.py b/text/5001-node-selection-algorithm/select.py index 8fb980847..4aee87b77 100644 --- a/text/5001-node-selection-algorithm/select.py +++ b/text/5001-node-selection-algorithm/select.py @@ -6,7 +6,8 @@ different rows for stewards, and different numbers for N and M, should also be supported. ''' -import csv, os, sys, re, unittest +import csv, os, sys, re, unittest, datetime +from itertools import combinations max_f_for_steward_list = -1 f_from_data_file = 0 @@ -53,7 +54,7 @@ def parse_headers(rows, skip_f=False): rule = rules[rule_idx] row = rows[row_idx] row_idx += 1 - #print('Testing rule %d against row %d (%s)' % (rule_idx, row_idx, ','.join(row))) + print('Testing rule %d against row %d (%s)' % (rule_idx, row_idx, ','.join(row))) if apply_rule(row, vars, rule[0], rule[1], rule[2]): rule_idx += 1 continue @@ -76,6 +77,7 @@ def parse_stewards(rows, row_idx): faults = [] while row_idx < len(rows): row = rows[row_idx] + print(row) if is_steward_row(row): stewards.append(row[0]) mttrs.append(float(row[1])) @@ -132,11 +134,17 @@ def max_f_for_steward_count(n): return max(int((n - 1) / 3), 0) def load_data(fname, requested_f=max_f_for_steward_list): + print("In 'Load Data'") rows = load_clean_csv(fname) + print(rows) file_f, scenarios, liks, row_idx = parse_headers(rows) + print(file_f, scenarios, liks, row_idx) stewards, mttrs, faults = parse_stewards(rows, row_idx) + print(stewards, mttrs, faults) # Check validity of the f value we've been given. max_f = max_f_for_steward_count(len(stewards)) + if max_f > 8: + max_f=8 if file_f > 0 and (requested_f == f_from_data_file): requested_f = file_f elif requested_f == max_f_for_steward_list: @@ -209,9 +217,26 @@ def keep_if_better(self, candidate): class ComboAnalysis: '''Encapsulate info about a single combination of stewards.''' - def __init__(self, combo, stewards): + def __init__(self, faults, combo, stewards, mttrs): self.combo = sorted(combo) - self.steward_indexes = [stewards.index(s) for s in combo] + self.steward_indexes = {} # row index + self.combo_faults = [0,0,0,0,0,0,0,0,0,0,0,0] + for i in combo: # for every row index... + stewards_index = stewards.index(i) # row + self.steward_indexes[stewards_index] = mttrs[stewards_index] + self.combo_faults[0] += faults[stewards_index][0] + self.combo_faults[1] += faults[stewards_index][1] + self.combo_faults[2] += faults[stewards_index][2] + self.combo_faults[3] += faults[stewards_index][3] + self.combo_faults[4] += faults[stewards_index][4] + self.combo_faults[5] += faults[stewards_index][5] + self.combo_faults[6] += faults[stewards_index][6] + self.combo_faults[7] += faults[stewards_index][7] + self.combo_faults[8] += faults[stewards_index][8] + self.combo_faults[9] += faults[stewards_index][9] + self.combo_faults[10] += faults[stewards_index][10] + self.combo_faults[11] += faults[stewards_index][11] + #print(self.combo_faults) self.results = [] self._total = None def __getattr__(self, item): @@ -227,24 +252,24 @@ def __str__(self): class ScenarioResult: '''Encapsulate info about one combination of stewards in one scenario.''' - def __init__(self, scenario, scenarios, liks, faults, combo_indexes, f, mttrs): - self.name = scenario - self.idx = scenarios.index(scenario) - self.likelihood = liks[self.idx] + def __init__(self, index, liks, combo_indexes, combo_summed_faults, f, mttrs): + #self.name = scenario + #self.idx = scenarios.index(scenario) + self.likelihood = liks[index] self.combo_indexes = combo_indexes self.f = f - self.fault_count = 0 - relevant_mttrs = [] - profile = [] - for ci in combo_indexes: - relevant_mttrs.append(mttrs[ci]) - faults_for_member = faults[ci] - n = faults_for_member[self.idx] - profile.append(n) + #self.fault_count = 0 + #relevant_mttrs = [] + #profile = [] + """ for ci in combo_indexes.keys(): + #relevant_mttrs.append(mttrs[ci]) + n = faults[ci][index] + #profile.append(n) if n: - self.fault_count += 1 - self.profile = profile - self.failure_distance = self.f - self.fault_count + self.fault_count += 1 """ + #self.profile = profile + relevant_mttrs = list(combo_indexes.values()) + self.failure_distance = self.f - combo_summed_faults if self.failure_distance < 0: relevant_mttrs.sort() # The MTTR of the scenario is the time it will take for the i-th node to @@ -265,13 +290,25 @@ def __lt__(self, other): def analyze(f, scenarios, liks, stewards, mttrs, faults, bestN, quiet=False): m = (3 * f) + 1 + print ("m= %s",m) n = len(stewards) + print ("n= %s",n) total_combinations = factorial(n) / (factorial(m) * factorial(n - m)) if not quiet: print('Analyzing %d total %d-steward combinations (n=%d, f=%d).' % (total_combinations, m, n, f)) best = BestN(lambda x: x.combined_score, bestN) - for combo in unique_combinations(stewards, m): + combo_num=0 + print(datetime.datetime.now()) + #for combo in unique_combinations(stewards, m): + for combo in combinations(stewards, m): + combo_num = combo_num + 1 + if (combo_num % 10000) == 0: + print("10K") + print(datetime.datetime.now()) analyze_combo(combo, best, scenarios, liks, stewards, mttrs, faults, f) + if (combo_num % 100000000) == 0: + print(datetime.datetime.now()) + print(best) return best def analyze_combo(combo, best, scenarios, liks, stewards, mttrs, faults, f): @@ -280,11 +317,14 @@ def analyze_combo(combo, best, scenarios, liks, stewards, mttrs, faults, f): downtime than one of the current "top N" combinations, put this one into the "top N" list. ''' - ca = ComboAnalysis(combo, stewards) - for scenario in scenarios: - sr = ScenarioResult(scenario, scenarios, liks, faults, ca.steward_indexes, f, mttrs) + ca = ComboAnalysis(faults, combo, stewards, mttrs) + for index in range(len(scenarios)): + #print(ca.combo_summed_faults) + summed_faults = ca.combo_faults[index] + sr = ScenarioResult(index, liks, ca.steward_indexes, summed_faults, f, mttrs) ca.results.append(sr) best.keep_if_better(ca) + del ca def unique_combinations(items, n): if n == 0: From a815e6b2ad23449175d4b668d95714462eee4b0e Mon Sep 17 00:00:00 2001 From: Adam Burdett Date: Mon, 2 Dec 2019 10:57:34 -0700 Subject: [PATCH 2/2] alot faster code with room for more speed up Signed-off-by: Adam Burdett --- text/5001-node-selection-algorithm/select.py | 247 ++++++------------- 1 file changed, 70 insertions(+), 177 deletions(-) diff --git a/text/5001-node-selection-algorithm/select.py b/text/5001-node-selection-algorithm/select.py index 4aee87b77..82248eefb 100644 --- a/text/5001-node-selection-algorithm/select.py +++ b/text/5001-node-selection-algorithm/select.py @@ -7,11 +7,15 @@ ''' import csv, os, sys, re, unittest, datetime -from itertools import combinations +from itertools import combinations, islice, chain +#from concurrent.futures import ProcessPoolExecutor +#import numpy as np +import collections +from multiprocessing import Pool max_f_for_steward_list = -1 f_from_data_file = 0 - +config_best = 10 def load_clean_csv(fname): # Read file and parse into rows and cells. @@ -170,27 +174,30 @@ class BestN: worst item and keep replacing it with something better -- then sort once at the end. ''' - def __init__(self, quantifier, max=default_best_N): + def __init__(self, max=default_best_N): ''' Define a quantifier func that will be used to get numeric values for items in the list. By default, the quantifier should return numbers that are bigger if better; to sort ascending, make the quantifier return such a sequence times -1. Set max size of list.''' - assert quantifier + #assert quantifier assert type(max) is type(3) assert max > 0 assert max <= 10000 self._items = [] - self.quantifier = quantifier + #self.quantifier = lambda x: x.combined_score, config_best self._sorted = False self.max = max self.worst_idx = invalid_worst_idx self.worst_score = worst_best_score + + def _quantifier(self, x): + return x.combined_score, config_best def _find_worst(self): self.worst_idx = invalid_worst_idx self.worst_score = worst_best_score for i in range(len(self._items)): - n = self.quantifier(self._items[i]) + n = self._quantifier(self._items[i]) if (self.worst_idx == invalid_worst_idx) or (n < self.worst_score): self.worst_idx = i self.worst_score = n @@ -209,11 +216,14 @@ def keep_if_better(self, candidate): else: if self.worst_idx == invalid_worst_idx: self._find_worst() - score = self.quantifier(candidate) + score = self._quantifier(candidate) if score > self.worst_score: self._items[self.worst_idx] = candidate self.worst_idx = invalid_worst_idx # Need to recalculate self._sorted = False + def get_items(self): + return self._items + class ComboAnalysis: '''Encapsulate info about a single combination of stewards.''' @@ -221,27 +231,18 @@ def __init__(self, faults, combo, stewards, mttrs): self.combo = sorted(combo) self.steward_indexes = {} # row index self.combo_faults = [0,0,0,0,0,0,0,0,0,0,0,0] - for i in combo: # for every row index... + for i in combo: # needs to be optamized, maybe use panda.... stewards_index = stewards.index(i) # row - self.steward_indexes[stewards_index] = mttrs[stewards_index] - self.combo_faults[0] += faults[stewards_index][0] - self.combo_faults[1] += faults[stewards_index][1] - self.combo_faults[2] += faults[stewards_index][2] - self.combo_faults[3] += faults[stewards_index][3] - self.combo_faults[4] += faults[stewards_index][4] - self.combo_faults[5] += faults[stewards_index][5] - self.combo_faults[6] += faults[stewards_index][6] - self.combo_faults[7] += faults[stewards_index][7] - self.combo_faults[8] += faults[stewards_index][8] - self.combo_faults[9] += faults[stewards_index][9] - self.combo_faults[10] += faults[stewards_index][10] - self.combo_faults[11] += faults[stewards_index][11] + indexes = mttrs[stewards_index] + self.steward_indexes[stewards_index] = indexes + fault = faults[stewards_index] + self.combo_faults = [x+y for x, y in zip(self.combo_faults, fault)] #print(self.combo_faults) self.results = [] self._total = None def __getattr__(self, item): if item == 'combined_score': - if self._total is None: + if not self._total: self._total = sum([r.score for r in self.results]) return self._total raise AttributeError(item) @@ -268,14 +269,17 @@ def __init__(self, index, liks, combo_indexes, combo_summed_faults, f, mttrs): if n: self.fault_count += 1 """ #self.profile = profile - relevant_mttrs = list(combo_indexes.values()) + combo_values = combo_indexes.values() + relevant_mttrs = list( combo_values ) self.failure_distance = self.f - combo_summed_faults if self.failure_distance < 0: relevant_mttrs.sort() # The MTTR of the scenario is the time it will take for the i-th node to # repair its fault, where i is the number of the node that finally # gets the whole network back into consensus. - self.mttr = relevant_mttrs[-(self.failure_distance + 1)] + index = self.failure_distance + 1 + index = -index + self.mttr = relevant_mttrs[index] self.importance = self.likelihood * self.mttr else: # We don't have any repair time if we never lost consensus. @@ -288,30 +292,8 @@ def __init__(self, index, liks, combo_indexes, combo_summed_faults, f, mttrs): def __lt__(self, other): return self.score < other.score -def analyze(f, scenarios, liks, stewards, mttrs, faults, bestN, quiet=False): - m = (3 * f) + 1 - print ("m= %s",m) - n = len(stewards) - print ("n= %s",n) - total_combinations = factorial(n) / (factorial(m) * factorial(n - m)) - if not quiet: - print('Analyzing %d total %d-steward combinations (n=%d, f=%d).' % (total_combinations, m, n, f)) - best = BestN(lambda x: x.combined_score, bestN) - combo_num=0 - print(datetime.datetime.now()) - #for combo in unique_combinations(stewards, m): - for combo in combinations(stewards, m): - combo_num = combo_num + 1 - if (combo_num % 10000) == 0: - print("10K") - print(datetime.datetime.now()) - analyze_combo(combo, best, scenarios, liks, stewards, mttrs, faults, f) - if (combo_num % 100000000) == 0: - print(datetime.datetime.now()) - print(best) - return best - -def analyze_combo(combo, best, scenarios, liks, stewards, mttrs, faults, f): + +def analyze_combo(combo, scenarios, liks, stewards, mttrs, faults, f): ''' Given a single combination, evaluate its downtime across all scenarios. If it has less downtime than one of the current "top N" combinations, put this one into the "top N" @@ -323,16 +305,7 @@ def analyze_combo(combo, best, scenarios, liks, stewards, mttrs, faults, f): summed_faults = ca.combo_faults[index] sr = ScenarioResult(index, liks, ca.steward_indexes, summed_faults, f, mttrs) ca.results.append(sr) - best.keep_if_better(ca) - del ca - -def unique_combinations(items, n): - if n == 0: - yield [] - else: - for i in range(len(items)): - for cc in unique_combinations(items[i + 1:], n - 1): - yield [items[i]] + cc + return ca def report(best, f): m = (3 * f) + 1 @@ -343,124 +316,25 @@ def report(best, f): combo = best.items[i] print('%d: %s' % (i + 1, combo)) -def select(fname, suggested_f, bestN): - f, scenarios, liks, stewards, mttrs, faults = load_data(fname, suggested_f) - best = analyze(f, scenarios, liks, stewards, mttrs, faults, bestN) - report(best, f) - -class Tests(unittest.TestCase): - - def test_ScenarioResult(self): - stewards = 'A,B,C,D,E,F'.split(',') - scenarios = 'a,b,c'.split(',') - liks = [.5, .4, .3] - mttrs = [6,7,8,9,10,11] - combo_indexes = [0,2,3,4] - faults = [[0,1,1],[1,0,0],[0,0,0],[1,1,1],[0,1,0],[1,0,1]] - sr = ScenarioResult('b', scenarios, liks, faults, combo_indexes, 1, mttrs) - self.assertEquals(sr.name, 'b') - self.assertEquals(sr.idx, 1) - self.assertAlmostEquals(sr.likelihood, .4) - self.assertEquals(sr.mttr, 8) - self.assertAlmostEquals(sr.score, -6.4) - - def test_analyze(self): - stewards = 'A,B,C,D,E,F'.split(',') - scenarios = 'a,b,c'.split(',') - liks = [.5, .4, .3] - mttrs = [6,7,8,9,10,11] - faults = [[0,1,1],[1,0,0],[0,0,0],[1,1,1],[0,1,0],[1,0,1]] - best = analyze(1,scenarios,liks,stewards,mttrs,faults,3, quiet=True) - self.assertTrue(best.items[0].combined_score > best.items[1].combined_score) - self.assertTrue(best.items[1].combined_score > best.items[2].combined_score) - - def test_unique_combinations(self): - fruit = 'apple,banana,orange,pear'.split(',') - combos = '\n'.join(sorted(['+'.join(x) for x in unique_combinations(fruit, 2)])) - self.assertEquals(combos, 'apple+banana\napple+orange\napple+pear\nbanana+orange\nbanana+pear\norange+pear') - - def test_is_empty_row(self): - self.assertTrue(is_empty_row((None,None,None))) - self.assertTrue(is_empty_row(('','',''))) - self.assertFalse(is_empty_row((1,'',None))) - - def test_convert_float(self): - self.assertAlmostEquals(convert_float('1'), 1.0) - self.assertAlmostEquals(convert_float(1), 1.0) - self.assertAlmostEquals(convert_float('0.01'), 0.01) - self.assertAlmostEquals(convert_float('10%'), 0.1) - - def test_factorial(self): - self.assertEquals(factorial(1), 1) - self.assertEquals(factorial(2), 2) - self.assertEquals(factorial(3), 6) - self.assertEquals(factorial(4), 24) - - def test_has_string(self): - self.assertTrue(has_string(' abc ')) - self.assertTrue(has_string('X')) - self.assertFalse(has_string(' 123')) - - def test_has_num(self): - self.assertFalse(has_num(' abc ')) - self.assertFalse(has_num('X')) - self.assertTrue(has_num(' 123')) - self.assertTrue(has_num('-2')) - self.assertTrue(has_num('1%')) - - def test_is_steward_row(self): - self.assertTrue(is_steward_row(('x', '25', 1, '0', '0', '1'))) - self.assertFalse(is_steward_row(('x', '25', 5, '0', '0', '1'))) - self.assertFalse(is_steward_row(('x', '25', '1', '0', '0.1', '1'))) - - def test_max_f_for_steward_count(self): - self.assertEquals(max_f_for_steward_count(0), 0) - self.assertEquals(max_f_for_steward_count(3), 0) - self.assertEquals(max_f_for_steward_count(4), 1) - self.assertEquals(max_f_for_steward_count(5), 1) - self.assertEquals(max_f_for_steward_count(7), 2) - - def test_parse_headers(self): - rows = [x.split(',') for x in ''',,scheduled maintenance coincidence,botched upgrade,foo -likelihood per year (from MTBF),,1%,85%,17% -Steward,MTTR,fault?,fault?,fault? -Bank A,5,1,1,0 -Tech Firm B,13,1,0,1'''.replace('\r', '').split('\n')] - f, scenarios, liks, row_idx = parse_headers(rows) - self.assertEquals(row_idx, 3) - self.assertEquals(liks, [0.01,0.85,0.17]) - self.assertEquals(f, 0) - - def test_load_data_good(self): - fname = os.path.join(os.path.dirname(__file__), 'sample-data.csv') - f, scenarios, liks, stewards, mttrs, faults = load_data(fname, f_from_data_file) - self.assertEquals(f, 1) - self.assertEquals(scenarios[0], 'scheduled maintenance coincidence') - self.assertEquals(scenarios[11], 'major natural disaster, US East Coast') - self.assertEquals(liks, [0.01, 0.85, 0.0001, 0.05, 0.01, 0.8, 0.03, 0.5, 0.02, 0.03, 0.1, 0.0001]) - self.assertEquals(stewards, ['Bank A', 'Tech Firm B', 'University C', 'Law Firm D', 'NGO E', 'Government F', 'Consortium G', 'Tech Firm H', 'Biotech Firm J']) - self.assertEquals(mttrs, [5.0, 13.0, 11.0, 6.0, 6.0, 9.0, 12.0, 8.0, 7.0]) - self.assertEquals(faults[0], [1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0]) - self.assertEquals(faults[8], [1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0]) - - def test_not_enough_stewards(self): - fname = os.path.join(os.path.dirname(__file__), 'sample-data.csv') - self.assertRaises(Exception, load_data, fname, 5) - - def test_BestN(self): - # Keep a list of the 3 best integers. Always give it back in sorted form. - b = BestN(lambda x: x, 3) - def try_this(*args): - b = BestN(lambda x: x, 3) - for arg in args: - b.keep_if_better(arg) - return b - b = try_this(3,10,4,11,9) - self.assertEquals(b.items, [11,10,9]) - self.assertEquals(b.worst_idx, 2) - self.assertEquals(b.worst_score, 9) - b = try_this(-10,-9,-3.14,4.0,-1) - self.assertEquals(b.items, [4,-1,-3.14]) +def batch(iterable, n=1): # https://stackoverflow.com/questions/8290397/how-to-split-an-iterable-in-constant-size-chunks + l = len(iterable) + for ndx in range(0, l, n): + yield iterable[ndx:min(ndx + n, l)] + +""" def grouper(n, iterable): + "grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx" + args = [iter(iterable)] * n + return zip_longest(*args) """ + +def grouper_it(n, iterable): # https://pastebin.com/YkKFvm8b + it = iter(iterable) + while True: + chunk_it = islice(it, n) + try: + first_el = next(chunk_it) + except StopIteration: + return + yield chain((first_el,), chunk_it) if __name__ == '__main__': if len(sys.argv) >= 2 and sys.argv[1] == 'test': @@ -473,4 +347,23 @@ def try_this(*args): parser.add_argument('--f', '-f', help='Number of faulted nodes to allow before consensus is lost. -1=max allowed by steward list (default); 0=as in data file', type=int, default=max_f_for_steward_list) parser.add_argument('--best', help='Specify how many of the best steward combinations to show.', type=int, default=10) args = parser.parse_args() - select(args.fname, args.f, args.best) + + f, scenarios, liks, stewards, mttrs, faults = load_data(args.fname, args.f) + # analyze + m = (3 * f) + 1 + print ("m= %s",m) + n = len(stewards) + print ("n= %s",n) + total_combinations = factorial(n) / (factorial(m) * factorial(n - m)) + print('Analyzing %d total %d-steward combinations (n=%d, f=%d).' % (total_combinations, m, n, f)) + + def calculate_combination(combo): + return analyze_combo(combo, scenarios, liks, stewards, mttrs, faults, f) + + with Pool() as pool: + best = BestN() + combos = grouper_it(800, combinations(stewards, m)) + for cs in combos: + combos_with_score = pool.map_async( calculate_combination, cs) + [best.keep_if_better(combo) for combo in combos_with_score.get() ] + report(best, f)