From d2ae806c207c190ff8705d81481c2d4c3d197363 Mon Sep 17 00:00:00 2001 From: "Carlos G. Oliver" Date: Mon, 18 Mar 2019 17:34:10 -0400 Subject: [PATCH] python 3 compatible --- src/algorithms.py | 4 +- src/algorithms_distances.py | 38 ++--- src/graph.py | 8 +- src/main.py | 154 +++++++++--------- src/struc2vec.py | 314 +++++++++++++++++------------------- src/utils.py | 9 +- 6 files changed, 261 insertions(+), 266 deletions(-) diff --git a/src/algorithms.py b/src/algorithms.py index 37da7b0..7cf8e94 100644 --- a/src/algorithms.py +++ b/src/algorithms.py @@ -22,7 +22,7 @@ def generate_parameters_random_walk(workers): logging.info('Executing layer {}...'.format(layer)) weights = restoreVariableFromDisk('distances_nets_weights-layer-'+str(layer)) - for k,list_weights in weights.iteritems(): + for k,list_weights in weights.items(): if(layer not in sum_weights): sum_weights[layer] = 0 if(layer not in amount_edges): @@ -51,7 +51,7 @@ def generate_parameters_random_walk(workers): amount_neighbours[layer] = {} - for k,list_weights in weights.iteritems(): + for k,list_weights in weights.items(): cont_neighbours = 0 for w in list_weights: if(w > average_weight[layer]): diff --git a/src/algorithms_distances.py b/src/algorithms_distances.py index 67db2e1..a71c95e 100644 --- a/src/algorithms_distances.py +++ b/src/algorithms_distances.py @@ -63,7 +63,7 @@ def getCompactDegreeLists(g, root, maxDegree,calcUntilLayer): if(timeToDepthIncrease == 0): list_d = [] - for degree,freq in l.iteritems(): + for degree,freq in l.items(): list_d.append((degree,freq)) list_d.sort(key=lambda x: x[0]) listas[depth] = np.array(list_d,dtype=np.int32) @@ -164,19 +164,19 @@ def preprocess_degreeLists(): dList = {} dFrequency = {} - for v,layers in degreeList.iteritems(): + for v,layers in degreeList.items(): dFrequency[v] = {} - for layer,degreeListLayer in layers.iteritems(): + for layer,degreeListLayer in layers.items(): dFrequency[v][layer] = {} for degree in degreeListLayer: if(degree not in dFrequency[v][layer]): dFrequency[v][layer][degree] = 0 dFrequency[v][layer][degree] += 1 - for v,layers in dFrequency.iteritems(): + for v,layers in dFrequency.items(): dList[v] = {} - for layer,frequencyList in layers.iteritems(): + for layer,frequencyList in layers.items(): list_d = [] - for degree,freq in frequencyList.iteritems(): + for degree,freq in frequencyList.items(): list_d.append((degree,freq)) list_d.sort(key=lambda x: x[0]) dList[v][layer] = np.array(list_d,dtype='float') @@ -294,7 +294,7 @@ def calc_distances(part, compactDegree = False): else: dist_func = cost - for v1,nbs in vertices.iteritems(): + for v1,nbs in vertices.items(): lists_v1 = degreeList[v1] for v2 in nbs: @@ -363,7 +363,7 @@ def selectVertices(layer,fractionCalcDists): vertices_selected = deque() - for vertices,layers in distances.iteritems(): + for vertices,layers in distances.items(): if(previousLayer not in layers): continue if(layers[previousLayer] <= threshold): @@ -380,7 +380,7 @@ def preprocess_consolides_distances(distances, startLayer = 1): logging.info('Consolidating distances...') - for vertices,layers in distances.iteritems(): + for vertices,layers in distances.items(): keys_layers = sorted(layers.keys()) startLayer = min(len(keys_layers),startLayer) for layer in range(0,startLayer): @@ -399,7 +399,7 @@ def exec_bfs_compact(G,workers,calcUntilLayer): degreeList = {} t0 = time() - vertices = G.keys() + vertices = list(G.keys()) parts = workers chunks = partition(vertices,parts) @@ -437,7 +437,7 @@ def exec_bfs(G,workers,calcUntilLayer): degreeList = {} t0 = time() - vertices = G.keys() + vertices = list(G.keys()) parts = workers chunks = partition(vertices,parts) @@ -445,6 +445,7 @@ def exec_bfs(G,workers,calcUntilLayer): part = 1 for c in chunks: + print(G, c, calcUntilLayer) job = executor.submit(getDegreeListsVertices,G,c,calcUntilLayer) futures[job] = part part += 1 @@ -459,6 +460,7 @@ def exec_bfs(G,workers,calcUntilLayer): t1 = time() logging.info('Execution time - BFS: {}m'.format((t1-t0)/60)) + print("BFS DONE") return @@ -471,8 +473,8 @@ def generate_distances_network_part1(workers): logging.info('Executing part {}...'.format(part)) distances = restoreVariableFromDisk('distances-'+str(part)) - for vertices,layers in distances.iteritems(): - for layer,distance in layers.iteritems(): + for vertices,layers in distances.items(): + for layer,distance in layers.items(): vx = vertices[0] vy = vertices[1] if(layer not in weights_distances): @@ -481,7 +483,7 @@ def generate_distances_network_part1(workers): logging.info('Part {} executed.'.format(part)) - for layer,values in weights_distances.iteritems(): + for layer,values in weights_distances.items(): saveVariableOnDisk(values,'weights_distances-layer-'+str(layer)) return @@ -493,8 +495,8 @@ def generate_distances_network_part2(workers): logging.info('Executing part {}...'.format(part)) distances = restoreVariableFromDisk('distances-'+str(part)) - for vertices,layers in distances.iteritems(): - for layer,distance in layers.iteritems(): + for vertices,layers in distances.items(): + for layer,distance in layers.items(): vx = vertices[0] vy = vertices[1] if(layer not in graphs): @@ -507,7 +509,7 @@ def generate_distances_network_part2(workers): graphs[layer][vy].append(vx) logging.info('Part {} executed.'.format(part)) - for layer,values in graphs.iteritems(): + for layer,values in graphs.items(): saveVariableOnDisk(values,'graphs-layer-'+str(layer)) return @@ -524,7 +526,7 @@ def generate_distances_network_part3(): alias_method_q = {} weights = {} - for v,neighbors in graphs.iteritems(): + for v,neighbors in graphs.items(): e_list = deque() sum_w = 0.0 diff --git a/src/graph.py b/src/graph.py index d488275..e103366 100644 --- a/src/graph.py +++ b/src/graph.py @@ -38,7 +38,7 @@ def nodes(self): return self.keys() def adjacency_iter(self): - return self.iteritems() + return self.items() def subgraph(self, nodes={}): subgraph = Graph() @@ -124,12 +124,12 @@ def number_of_nodes(self): def gToDict(self): d = {} - for k,v in self.iteritems(): + for k,v in self.items(): d[k] = v return d def printAdjList(self): - for key,value in self.iteritems(): + for key,value in self.items(): print (key,":",value) @@ -280,7 +280,7 @@ def from_adjlist_unchecked(adjlist): def from_dict(d): G = Graph() - for k,v in d.iteritems(): + for k,v in d.items(): G[k] = v return G diff --git a/src/main.py b/src/main.py index 9c37f04..a782141 100644 --- a/src/main.py +++ b/src/main.py @@ -13,117 +13,117 @@ logging.basicConfig(filename='struc2vec.log',filemode='w',level=logging.DEBUG,format='%(asctime)s %(message)s') def parse_args(): - ''' - Parses the struc2vec arguments. - ''' - parser = argparse.ArgumentParser(description="Run struc2vec.") + ''' + Parses the struc2vec arguments. + ''' + parser = argparse.ArgumentParser(description="Run struc2vec.") - parser.add_argument('--input', nargs='?', default='graph/karate.edgelist', - help='Input graph path') + parser.add_argument('--input', nargs='?', default='graph/karate.edgelist', + help='Input graph path') - parser.add_argument('--output', nargs='?', default='emb/karate.emb', - help='Embeddings path') + parser.add_argument('--output', nargs='?', default='emb/karate.emb', + help='Embeddings path') - parser.add_argument('--dimensions', type=int, default=128, - help='Number of dimensions. Default is 128.') + parser.add_argument('--dimensions', type=int, default=128, + help='Number of dimensions. Default is 128.') - parser.add_argument('--walk-length', type=int, default=80, - help='Length of walk per source. Default is 80.') + parser.add_argument('--walk-length', type=int, default=80, + help='Length of walk per source. Default is 80.') - parser.add_argument('--num-walks', type=int, default=10, - help='Number of walks per source. Default is 10.') + parser.add_argument('--num-walks', type=int, default=10, + help='Number of walks per source. Default is 10.') - parser.add_argument('--window-size', type=int, default=10, - help='Context size for optimization. Default is 10.') + parser.add_argument('--window-size', type=int, default=10, + help='Context size for optimization. Default is 10.') - parser.add_argument('--until-layer', type=int, default=None, - help='Calculation until the layer.') + parser.add_argument('--until-layer', type=int, default=None, + help='Calculation until the layer.') - parser.add_argument('--iter', default=5, type=int, + parser.add_argument('--iter', default=5, type=int, help='Number of epochs in SGD') - parser.add_argument('--workers', type=int, default=4, - help='Number of parallel workers. Default is 8.') + parser.add_argument('--workers', type=int, default=4, + help='Number of parallel workers. Default is 8.') - parser.add_argument('--weighted', dest='weighted', action='store_true', - help='Boolean specifying (un)weighted. Default is unweighted.') - parser.add_argument('--unweighted', dest='unweighted', action='store_false') - parser.set_defaults(weighted=False) + parser.add_argument('--weighted', dest='weighted', action='store_true', + help='Boolean specifying (un)weighted. Default is unweighted.') + parser.add_argument('--unweighted', dest='unweighted', action='store_false') + parser.set_defaults(weighted=False) - parser.add_argument('--directed', dest='directed', action='store_true', - help='Graph is (un)directed. Default is undirected.') - parser.add_argument('--undirected', dest='undirected', action='store_false') - parser.set_defaults(directed=False) + parser.add_argument('--directed', dest='directed', action='store_true', + help='Graph is (un)directed. Default is undirected.') + parser.add_argument('--undirected', dest='undirected', action='store_false') + parser.set_defaults(directed=False) - parser.add_argument('--OPT1', default=False, type=bool, + parser.add_argument('--OPT1', default=False, type=bool, help='optimization 1') - parser.add_argument('--OPT2', default=False, type=bool, + parser.add_argument('--OPT2', default=False, type=bool, help='optimization 2') - parser.add_argument('--OPT3', default=False, type=bool, - help='optimization 3') - return parser.parse_args() + parser.add_argument('--OPT3', default=False, type=bool, + help='optimization 3') + return parser.parse_args() def read_graph(): - ''' - Reads the input network. - ''' - logging.info(" - Loading graph...") - G = graph.load_edgelist(args.input,undirected=True) - logging.info(" - Graph loaded.") - return G + ''' + Reads the input network. + ''' + logging.info(" - Loading graph...") + G = graph.load_edgelist(args.input,undirected=True) + logging.info(" - Graph loaded.") + return G def learn_embeddings(): - ''' - Learn embeddings by optimizing the Skipgram objective using SGD. - ''' - logging.info("Initializing creation of the representations...") - walks = LineSentence('random_walks.txt') - model = Word2Vec(walks, size=args.dimensions, window=args.window_size, min_count=0, hs=1, sg=1, workers=args.workers, iter=args.iter) - model.wv.save_word2vec_format(args.output) - logging.info("Representations created.") - - return + ''' + Learn embeddings by optimizing the Skipgram objective using SGD. + ''' + logging.info("Initializing creation of the representations...") + walks = LineSentence('random_walks.txt') + model = Word2Vec(walks, size=args.dimensions, window=args.window_size, min_count=0, hs=1, sg=1, workers=args.workers, iter=args.iter) + model.wv.save_word2vec_format(args.output) + logging.info("Representations created.") + + return def exec_struc2vec(args): - ''' - Pipeline for representational learning for all nodes in a graph. - ''' - if(args.OPT3): - until_layer = args.until_layer - else: - until_layer = None + ''' + Pipeline for representational learning for all nodes in a graph. + ''' + if(args.OPT3): + until_layer = args.until_layer + else: + until_layer = None - G = read_graph() - G = struc2vec.Graph(G, args.directed, args.workers, untilLayer = until_layer) + G = read_graph() + G = struc2vec.Graph(G, args.directed, args.workers, untilLayer = until_layer) - if(args.OPT1): - G.preprocess_neighbors_with_bfs_compact() - else: - G.preprocess_neighbors_with_bfs() + if(args.OPT1): + G.preprocess_neighbors_with_bfs_compact() + else: + G.preprocess_neighbors_with_bfs() - if(args.OPT2): - G.create_vectors() - G.calc_distances(compactDegree = args.OPT1) - else: - G.calc_distances_all_vertices(compactDegree = args.OPT1) + if(args.OPT2): + G.create_vectors() + G.calc_distances(compactDegree = args.OPT1) + else: + G.calc_distances_all_vertices(compactDegree = args.OPT1) - G.create_distances_network() - G.preprocess_parameters_random_walk() + G.create_distances_network() + G.preprocess_parameters_random_walk() - G.simulate_walks(args.num_walks, args.walk_length) + G.simulate_walks(args.num_walks, args.walk_length) - return G + return G def main(args): - G = exec_struc2vec(args) + G = exec_struc2vec(args) - learn_embeddings() + learn_embeddings() if __name__ == "__main__": - args = parse_args() - main(args) + args = parse_args() + main(args) diff --git a/src/struc2vec.py b/src/struc2vec.py index 05887d6..48061ad 100644 --- a/src/struc2vec.py +++ b/src/struc2vec.py @@ -14,232 +14,222 @@ class Graph(): - def __init__(self, g, is_directed, workers, untilLayer = None): + def __init__(self, g, is_directed, workers, untilLayer = None): - logging.info(" - Converting graph to dict...") - self.G = g.gToDict() - logging.info("Graph converted.") + logging.info(" - Converting graph to dict...") + self.G = g.gToDict() + logging.info("Graph converted.") - self.num_vertices = g.number_of_nodes() - self.num_edges = g.number_of_edges() - self.is_directed = is_directed - self.workers = workers - self.calcUntilLayer = untilLayer - logging.info('Graph - Number of vertices: {}'.format(self.num_vertices)) - logging.info('Graph - Number of edges: {}'.format(self.num_edges)) + self.num_vertices = g.number_of_nodes() + self.num_edges = g.number_of_edges() + self.is_directed = is_directed + self.workers = workers + self.calcUntilLayer = untilLayer + logging.info('Graph - Number of vertices: {}'.format(self.num_vertices)) + logging.info('Graph - Number of edges: {}'.format(self.num_edges)) - def preprocess_neighbors_with_bfs(self): + def preprocess_neighbors_with_bfs(self): - with ProcessPoolExecutor(max_workers=self.workers) as executor: - job = executor.submit(exec_bfs,self.G,self.workers,self.calcUntilLayer) - - job.result() + with ProcessPoolExecutor(max_workers=self.workers) as executor: + job = executor.submit(exec_bfs,self.G,self.workers,self.calcUntilLayer) + + job.result() - return + return - def preprocess_neighbors_with_bfs_compact(self): + def preprocess_neighbors_with_bfs_compact(self): - with ProcessPoolExecutor(max_workers=self.workers) as executor: - job = executor.submit(exec_bfs_compact,self.G,self.workers,self.calcUntilLayer) - - job.result() + with ProcessPoolExecutor(max_workers=self.workers) as executor: + job = executor.submit(exec_bfs_compact,self.G,self.workers,self.calcUntilLayer) + + job.result() - return + return - def preprocess_degree_lists(self): + def preprocess_degree_lists(self): - with ProcessPoolExecutor(max_workers=self.workers) as executor: - job = executor.submit(preprocess_degreeLists) - - job.result() + with ProcessPoolExecutor(max_workers=self.workers) as executor: + job = executor.submit(preprocess_degreeLists) + + job.result() - return + return - def create_vectors(self): - logging.info("Creating degree vectors...") - degrees = {} - degrees_sorted = set() - G = self.G - for v in G.keys(): - degree = len(G[v]) - degrees_sorted.add(degree) - if(degree not in degrees): - degrees[degree] = {} - degrees[degree]['vertices'] = deque() - degrees[degree]['vertices'].append(v) - degrees_sorted = np.array(list(degrees_sorted),dtype='int') - degrees_sorted = np.sort(degrees_sorted) + def create_vectors(self): + logging.info("Creating degree vectors...") + degrees = {} + degrees_sorted = set() + G = self.G + for v in G.keys(): + degree = len(G[v]) + degrees_sorted.add(degree) + if(degree not in degrees): + degrees[degree] = {} + degrees[degree]['vertices'] = deque() + degrees[degree]['vertices'].append(v) + degrees_sorted = np.array(list(degrees_sorted),dtype='int') + degrees_sorted = np.sort(degrees_sorted) - l = len(degrees_sorted) - for index, degree in enumerate(degrees_sorted): - if(index > 0): - degrees[degree]['before'] = degrees_sorted[index - 1] - if(index < (l - 1)): - degrees[degree]['after'] = degrees_sorted[index + 1] - logging.info("Degree vectors created.") - logging.info("Saving degree vectors...") - saveVariableOnDisk(degrees,'degrees_vector') + l = len(degrees_sorted) + for index, degree in enumerate(degrees_sorted): + if(index > 0): + degrees[degree]['before'] = degrees_sorted[index - 1] + if(index < (l - 1)): + degrees[degree]['after'] = degrees_sorted[index + 1] + logging.info("Degree vectors created.") + logging.info("Saving degree vectors...") + saveVariableOnDisk(degrees,'degrees_vector') - def calc_distances_all_vertices(self,compactDegree = False): + def calc_distances_all_vertices(self,compactDegree = False): - logging.info("Using compactDegree: {}".format(compactDegree)) - if(self.calcUntilLayer): - logging.info("Calculations until layer: {}".format(self.calcUntilLayer)) + logging.info("Using compactDegree: {}".format(compactDegree)) + if(self.calcUntilLayer): + logging.info("Calculations until layer: {}".format(self.calcUntilLayer)) - futures = {} + futures = {} - count_calc = 0 + count_calc = 0 - vertices = list(reversed(sorted(self.G.keys()))) + vertices = list(reversed(sorted(self.G.keys()))) - if(compactDegree): - logging.info("Recovering degreeList from disk...") - degreeList = restoreVariableFromDisk('compactDegreeList') - else: - logging.info("Recovering compactDegreeList from disk...") - degreeList = restoreVariableFromDisk('degreeList') + if(compactDegree): + logging.info("Recovering degreeList from disk...") + degreeList = restoreVariableFromDisk('compactDegreeList') + else: + logging.info("Recovering compactDegreeList from disk...") + degreeList = restoreVariableFromDisk('degreeList') - parts = self.workers - chunks = partition(vertices,parts) + parts = self.workers + chunks = partition(vertices,parts) - t0 = time() + t0 = time() - with ProcessPoolExecutor(max_workers = self.workers) as executor: + with ProcessPoolExecutor(max_workers = self.workers) as executor: - part = 1 - for c in chunks: - logging.info("Executing part {}...".format(part)) - list_v = [] - for v in c: - list_v.append([vd for vd in degreeList.keys() if vd > v]) - job = executor.submit(calc_distances_all, c, list_v, degreeList,part, compactDegree = compactDegree) - futures[job] = part - part += 1 + part = 1 + for c in chunks: + logging.info("Executing part {}...".format(part)) + list_v = [] + for v in c: + list_v.append([vd for vd in degreeList.keys() if vd > v]) + job = executor.submit(calc_distances_all, c, list_v, degreeList,part, compactDegree = compactDegree) + futures[job] = part + part += 1 - logging.info("Receiving results...") + logging.info("Receiving results...") - for job in as_completed(futures): - job.result() - r = futures[job] - logging.info("Part {} Completed.".format(r)) - - logging.info('Distances calculated.') - t1 = time() - logging.info('Time : {}m'.format((t1-t0)/60)) - - return + for job in as_completed(futures): + job.result() + r = futures[job] + logging.info("Part {} Completed.".format(r)) + + logging.info('Distances calculated.') + t1 = time() + logging.info('Time : {}m'.format((t1-t0)/60)) + + return - def calc_distances(self, compactDegree = False): + def calc_distances(self, compactDegree = False): - logging.info("Using compactDegree: {}".format(compactDegree)) - if(self.calcUntilLayer): - logging.info("Calculations until layer: {}".format(self.calcUntilLayer)) + logging.info("Using compactDegree: {}".format(compactDegree)) + if(self.calcUntilLayer): + logging.info("Calculations until layer: {}".format(self.calcUntilLayer)) - futures = {} - #distances = {} + futures = {} + #distances = {} - count_calc = 0 + count_calc = 0 - G = self.G - vertices = G.keys() + G = self.G + vertices = G.keys() - parts = self.workers - chunks = partition(vertices,parts) + parts = self.workers + chunks = partition(vertices,parts) - with ProcessPoolExecutor(max_workers = 1) as executor: + with ProcessPoolExecutor(max_workers = 1) as executor: - logging.info("Split degree List...") - part = 1 - for c in chunks: - job = executor.submit(splitDegreeList,part,c,G,compactDegree) - job.result() - logging.info("degreeList {} completed.".format(part)) - part += 1 + logging.info("Split degree List...") + part = 1 + for c in chunks: + job = executor.submit(splitDegreeList,part,c,G,compactDegree) + job.result() + logging.info("degreeList {} completed.".format(part)) + part += 1 - - with ProcessPoolExecutor(max_workers = self.workers) as executor: + + with ProcessPoolExecutor(max_workers = self.workers) as executor: - part = 1 - for c in chunks: - logging.info("Executing part {}...".format(part)) - job = executor.submit(calc_distances, part, compactDegree = compactDegree) - futures[job] = part - part += 1 + part = 1 + for c in chunks: + logging.info("Executing part {}...".format(part)) + job = executor.submit(calc_distances, part, compactDegree = compactDegree) + futures[job] = part + part += 1 - logging.info("Receiving results...") - for job in as_completed(futures): - job.result() - r = futures[job] - logging.info("Part {} completed.".format(r)) + logging.info("Receiving results...") + for job in as_completed(futures): + job.result() + r = futures[job] + logging.info("Part {} completed.".format(r)) - return + return - def consolide_distances(self): + def consolide_distances(self): - distances = {} + distances = {} - parts = self.workers - for part in range(1,parts + 1): - d = restoreVariableFromDisk('distances-'+str(part)) - preprocess_consolides_distances(distances) - distances.update(d) + parts = self.workers + for part in range(1,parts + 1): + d = restoreVariableFromDisk('distances-'+str(part)) + preprocess_consolides_distances(distances) + distances.update(d) - preprocess_consolides_distances(distances) - saveVariableOnDisk(distances,'distances') + preprocess_consolides_distances(distances) + saveVariableOnDisk(distances,'distances') - def create_distances_network(self): + def create_distances_network(self): - with ProcessPoolExecutor(max_workers=1) as executor: - job = executor.submit(generate_distances_network,self.workers) + with ProcessPoolExecutor(max_workers=1) as executor: + job = executor.submit(generate_distances_network,self.workers) - job.result() + job.result() - return + return - def preprocess_parameters_random_walk(self): + def preprocess_parameters_random_walk(self): - with ProcessPoolExecutor(max_workers=1) as executor: - job = executor.submit(generate_parameters_random_walk,self.workers) + with ProcessPoolExecutor(max_workers=1) as executor: + job = executor.submit(generate_parameters_random_walk,self.workers) - job.result() + job.result() - return + return - def simulate_walks(self,num_walks,walk_length): + def simulate_walks(self,num_walks,walk_length): - # for large graphs, it is serially executed, because of memory use. - if(len(self.G) > 500000): + # for large graphs, it is serially executed, because of memory use. + if(len(self.G) > 500000): - with ProcessPoolExecutor(max_workers=1) as executor: - job = executor.submit(generate_random_walks_large_graphs,num_walks,walk_length,self.workers,self.G.keys()) + with ProcessPoolExecutor(max_workers=1) as executor: + job = executor.submit(generate_random_walks_large_graphs,num_walks,walk_length,self.workers,list(self.G.keys())) - job.result() + job.result() - else: + else: - with ProcessPoolExecutor(max_workers=1) as executor: - job = executor.submit(generate_random_walks,num_walks,walk_length,self.workers,self.G.keys()) + with ProcessPoolExecutor(max_workers=1) as executor: + job = executor.submit(generate_random_walks,num_walks,walk_length,self.workers,list(self.G.keys())) - job.result() - - - return - - - - - - - - + job.result() + return diff --git a/src/utils.py b/src/utils.py index b236857..5098664 100644 --- a/src/utils.py +++ b/src/utils.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- from time import time import logging,inspect -import cPickle as pickle +# import cPickle as pickle +import pickle from itertools import islice import os.path @@ -16,12 +17,12 @@ def isPickle(fname): def chunks(data, SIZE=10000): it = iter(data) - for i in xrange(0, len(data), SIZE): + for i in range(0, len(data), SIZE): yield {k:data[k] for k in islice(it, SIZE)} def partition(lst, n): division = len(lst) / float(n) - return [ lst[int(round(division * i)): int(round(division * (i + 1)))] for i in xrange(n) ] + return [ lst[int(round(division * i)): int(round(division * (i + 1)))] for i in range(n) ] def restoreVariableFromDisk(name): logging.info('Recovering variable...') @@ -38,6 +39,8 @@ def saveVariableOnDisk(f,name): logging.info('Saving variable on disk...') t0 = time() with open(folder_pickles + name + '.pickle', 'wb') as handle: + if type(f).__name__ == 'dict_keys': + f = list(f) pickle.dump(f, handle, protocol=pickle.HIGHEST_PROTOCOL) t1 = time() logging.info('Variable saved. Time: {}m'.format((t1-t0)/60))