Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def generate_parameters_random_walk(workers):
logging.info('Executing layer {}...'.format(layer))
weights = restoreVariableFromDisk('distances_nets_weights-layer-'+str(layer))

for k,list_weights in weights.iteritems():
for k,list_weights in weights.items():
if(layer not in sum_weights):
sum_weights[layer] = 0
if(layer not in amount_edges):
Expand Down Expand Up @@ -51,7 +51,7 @@ def generate_parameters_random_walk(workers):

amount_neighbours[layer] = {}

for k,list_weights in weights.iteritems():
for k,list_weights in weights.items():
cont_neighbours = 0
for w in list_weights:
if(w > average_weight[layer]):
Expand Down
38 changes: 20 additions & 18 deletions src/algorithms_distances.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def getCompactDegreeLists(g, root, maxDegree,calcUntilLayer):
if(timeToDepthIncrease == 0):

list_d = []
for degree,freq in l.iteritems():
for degree,freq in l.items():
list_d.append((degree,freq))
list_d.sort(key=lambda x: x[0])
listas[depth] = np.array(list_d,dtype=np.int32)
Expand Down Expand Up @@ -164,19 +164,19 @@ def preprocess_degreeLists():

dList = {}
dFrequency = {}
for v,layers in degreeList.iteritems():
for v,layers in degreeList.items():
dFrequency[v] = {}
for layer,degreeListLayer in layers.iteritems():
for layer,degreeListLayer in layers.items():
dFrequency[v][layer] = {}
for degree in degreeListLayer:
if(degree not in dFrequency[v][layer]):
dFrequency[v][layer][degree] = 0
dFrequency[v][layer][degree] += 1
for v,layers in dFrequency.iteritems():
for v,layers in dFrequency.items():
dList[v] = {}
for layer,frequencyList in layers.iteritems():
for layer,frequencyList in layers.items():
list_d = []
for degree,freq in frequencyList.iteritems():
for degree,freq in frequencyList.items():
list_d.append((degree,freq))
list_d.sort(key=lambda x: x[0])
dList[v][layer] = np.array(list_d,dtype='float')
Expand Down Expand Up @@ -294,7 +294,7 @@ def calc_distances(part, compactDegree = False):
else:
dist_func = cost

for v1,nbs in vertices.iteritems():
for v1,nbs in vertices.items():
lists_v1 = degreeList[v1]

for v2 in nbs:
Expand Down Expand Up @@ -363,7 +363,7 @@ def selectVertices(layer,fractionCalcDists):

vertices_selected = deque()

for vertices,layers in distances.iteritems():
for vertices,layers in distances.items():
if(previousLayer not in layers):
continue
if(layers[previousLayer] <= threshold):
Expand All @@ -380,7 +380,7 @@ def preprocess_consolides_distances(distances, startLayer = 1):

logging.info('Consolidating distances...')

for vertices,layers in distances.iteritems():
for vertices,layers in distances.items():
keys_layers = sorted(layers.keys())
startLayer = min(len(keys_layers),startLayer)
for layer in range(0,startLayer):
Expand All @@ -399,7 +399,7 @@ def exec_bfs_compact(G,workers,calcUntilLayer):
degreeList = {}

t0 = time()
vertices = G.keys()
vertices = list(G.keys())
parts = workers
chunks = partition(vertices,parts)

Expand Down Expand Up @@ -437,14 +437,15 @@ def exec_bfs(G,workers,calcUntilLayer):
degreeList = {}

t0 = time()
vertices = G.keys()
vertices = list(G.keys())
parts = workers
chunks = partition(vertices,parts)

with ProcessPoolExecutor(max_workers=workers) as executor:

part = 1
for c in chunks:
print(G, c, calcUntilLayer)
job = executor.submit(getDegreeListsVertices,G,c,calcUntilLayer)
futures[job] = part
part += 1
Expand All @@ -459,6 +460,7 @@ def exec_bfs(G,workers,calcUntilLayer):
t1 = time()
logging.info('Execution time - BFS: {}m'.format((t1-t0)/60))

print("BFS DONE")

return

Expand All @@ -471,8 +473,8 @@ def generate_distances_network_part1(workers):
logging.info('Executing part {}...'.format(part))
distances = restoreVariableFromDisk('distances-'+str(part))

for vertices,layers in distances.iteritems():
for layer,distance in layers.iteritems():
for vertices,layers in distances.items():
for layer,distance in layers.items():
vx = vertices[0]
vy = vertices[1]
if(layer not in weights_distances):
Expand All @@ -481,7 +483,7 @@ def generate_distances_network_part1(workers):

logging.info('Part {} executed.'.format(part))

for layer,values in weights_distances.iteritems():
for layer,values in weights_distances.items():
saveVariableOnDisk(values,'weights_distances-layer-'+str(layer))
return

Expand All @@ -493,8 +495,8 @@ def generate_distances_network_part2(workers):
logging.info('Executing part {}...'.format(part))
distances = restoreVariableFromDisk('distances-'+str(part))

for vertices,layers in distances.iteritems():
for layer,distance in layers.iteritems():
for vertices,layers in distances.items():
for layer,distance in layers.items():
vx = vertices[0]
vy = vertices[1]
if(layer not in graphs):
Expand All @@ -507,7 +509,7 @@ def generate_distances_network_part2(workers):
graphs[layer][vy].append(vx)
logging.info('Part {} executed.'.format(part))

for layer,values in graphs.iteritems():
for layer,values in graphs.items():
saveVariableOnDisk(values,'graphs-layer-'+str(layer))

return
Expand All @@ -524,7 +526,7 @@ def generate_distances_network_part3():
alias_method_q = {}
weights = {}

for v,neighbors in graphs.iteritems():
for v,neighbors in graphs.items():
e_list = deque()
sum_w = 0.0

Expand Down
8 changes: 4 additions & 4 deletions src/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def nodes(self):
return self.keys()

def adjacency_iter(self):
return self.iteritems()
return self.items()

def subgraph(self, nodes={}):
subgraph = Graph()
Expand Down Expand Up @@ -124,12 +124,12 @@ def number_of_nodes(self):

def gToDict(self):
d = {}
for k,v in self.iteritems():
for k,v in self.items():
d[k] = v
return d

def printAdjList(self):
for key,value in self.iteritems():
for key,value in self.items():
print (key,":",value)


Expand Down Expand Up @@ -280,7 +280,7 @@ def from_adjlist_unchecked(adjlist):

def from_dict(d):
G = Graph()
for k,v in d.iteritems():
for k,v in d.items():
G[k] = v

return G
Expand Down
154 changes: 77 additions & 77 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,117 +13,117 @@
logging.basicConfig(filename='struc2vec.log',filemode='w',level=logging.DEBUG,format='%(asctime)s %(message)s')

def parse_args():
'''
Parses the struc2vec arguments.
'''
parser = argparse.ArgumentParser(description="Run struc2vec.")
'''
Parses the struc2vec arguments.
'''
parser = argparse.ArgumentParser(description="Run struc2vec.")

parser.add_argument('--input', nargs='?', default='graph/karate.edgelist',
help='Input graph path')
parser.add_argument('--input', nargs='?', default='graph/karate.edgelist',
help='Input graph path')

parser.add_argument('--output', nargs='?', default='emb/karate.emb',
help='Embeddings path')
parser.add_argument('--output', nargs='?', default='emb/karate.emb',
help='Embeddings path')

parser.add_argument('--dimensions', type=int, default=128,
help='Number of dimensions. Default is 128.')
parser.add_argument('--dimensions', type=int, default=128,
help='Number of dimensions. Default is 128.')

parser.add_argument('--walk-length', type=int, default=80,
help='Length of walk per source. Default is 80.')
parser.add_argument('--walk-length', type=int, default=80,
help='Length of walk per source. Default is 80.')

parser.add_argument('--num-walks', type=int, default=10,
help='Number of walks per source. Default is 10.')
parser.add_argument('--num-walks', type=int, default=10,
help='Number of walks per source. Default is 10.')

parser.add_argument('--window-size', type=int, default=10,
help='Context size for optimization. Default is 10.')
parser.add_argument('--window-size', type=int, default=10,
help='Context size for optimization. Default is 10.')

parser.add_argument('--until-layer', type=int, default=None,
help='Calculation until the layer.')
parser.add_argument('--until-layer', type=int, default=None,
help='Calculation until the layer.')

parser.add_argument('--iter', default=5, type=int,
parser.add_argument('--iter', default=5, type=int,
help='Number of epochs in SGD')

parser.add_argument('--workers', type=int, default=4,
help='Number of parallel workers. Default is 8.')
parser.add_argument('--workers', type=int, default=4,
help='Number of parallel workers. Default is 8.')

parser.add_argument('--weighted', dest='weighted', action='store_true',
help='Boolean specifying (un)weighted. Default is unweighted.')
parser.add_argument('--unweighted', dest='unweighted', action='store_false')
parser.set_defaults(weighted=False)
parser.add_argument('--weighted', dest='weighted', action='store_true',
help='Boolean specifying (un)weighted. Default is unweighted.')
parser.add_argument('--unweighted', dest='unweighted', action='store_false')
parser.set_defaults(weighted=False)

parser.add_argument('--directed', dest='directed', action='store_true',
help='Graph is (un)directed. Default is undirected.')
parser.add_argument('--undirected', dest='undirected', action='store_false')
parser.set_defaults(directed=False)
parser.add_argument('--directed', dest='directed', action='store_true',
help='Graph is (un)directed. Default is undirected.')
parser.add_argument('--undirected', dest='undirected', action='store_false')
parser.set_defaults(directed=False)

parser.add_argument('--OPT1', default=False, type=bool,
parser.add_argument('--OPT1', default=False, type=bool,
help='optimization 1')
parser.add_argument('--OPT2', default=False, type=bool,
parser.add_argument('--OPT2', default=False, type=bool,
help='optimization 2')
parser.add_argument('--OPT3', default=False, type=bool,
help='optimization 3')
return parser.parse_args()
parser.add_argument('--OPT3', default=False, type=bool,
help='optimization 3')
return parser.parse_args()

def read_graph():
'''
Reads the input network.
'''
logging.info(" - Loading graph...")
G = graph.load_edgelist(args.input,undirected=True)
logging.info(" - Graph loaded.")
return G
'''
Reads the input network.
'''
logging.info(" - Loading graph...")
G = graph.load_edgelist(args.input,undirected=True)
logging.info(" - Graph loaded.")
return G

def learn_embeddings():
'''
Learn embeddings by optimizing the Skipgram objective using SGD.
'''
logging.info("Initializing creation of the representations...")
walks = LineSentence('random_walks.txt')
model = Word2Vec(walks, size=args.dimensions, window=args.window_size, min_count=0, hs=1, sg=1, workers=args.workers, iter=args.iter)
model.wv.save_word2vec_format(args.output)
logging.info("Representations created.")
return
'''
Learn embeddings by optimizing the Skipgram objective using SGD.
'''
logging.info("Initializing creation of the representations...")
walks = LineSentence('random_walks.txt')
model = Word2Vec(walks, size=args.dimensions, window=args.window_size, min_count=0, hs=1, sg=1, workers=args.workers, iter=args.iter)
model.wv.save_word2vec_format(args.output)
logging.info("Representations created.")
return

def exec_struc2vec(args):
'''
Pipeline for representational learning for all nodes in a graph.
'''
if(args.OPT3):
until_layer = args.until_layer
else:
until_layer = None
'''
Pipeline for representational learning for all nodes in a graph.
'''
if(args.OPT3):
until_layer = args.until_layer
else:
until_layer = None

G = read_graph()
G = struc2vec.Graph(G, args.directed, args.workers, untilLayer = until_layer)
G = read_graph()
G = struc2vec.Graph(G, args.directed, args.workers, untilLayer = until_layer)

if(args.OPT1):
G.preprocess_neighbors_with_bfs_compact()
else:
G.preprocess_neighbors_with_bfs()
if(args.OPT1):
G.preprocess_neighbors_with_bfs_compact()
else:
G.preprocess_neighbors_with_bfs()

if(args.OPT2):
G.create_vectors()
G.calc_distances(compactDegree = args.OPT1)
else:
G.calc_distances_all_vertices(compactDegree = args.OPT1)
if(args.OPT2):
G.create_vectors()
G.calc_distances(compactDegree = args.OPT1)
else:
G.calc_distances_all_vertices(compactDegree = args.OPT1)


G.create_distances_network()
G.preprocess_parameters_random_walk()
G.create_distances_network()
G.preprocess_parameters_random_walk()

G.simulate_walks(args.num_walks, args.walk_length)
G.simulate_walks(args.num_walks, args.walk_length)


return G
return G

def main(args):

G = exec_struc2vec(args)
G = exec_struc2vec(args)

learn_embeddings()
learn_embeddings()


if __name__ == "__main__":
args = parse_args()
main(args)
args = parse_args()
main(args)

Loading