## ## make_graphs.py ## ## authors: Pedro Conceicao, Dejan Govc, Jānis Lazovskis ## affliation: Neuro-Topology Group, Institute of Mathematics, University of Aberdeen ## date: June 2020 ## ## description: Commands to build graphs for paper ## usage: Change the value of 'graph' at the bottom of this file, then run 'python make_graphs.py' ## comment: Needs at least Python 3.7 ## import numpy as np import pandas as pd import networkx as nx # For creating some graphs import subprocess # For getting online files import h5py # For reading online files from scipy.sparse import coo_matrix, load_npz, save_npz, triu from functools import reduce # For combining submatrices ## ## Barabási-Albert (unidirected) graphs ## def ba_undirected_graph(n,m): # Parameters: n number of vertices, m number of edges added at each step # Build matrix, truncating it to upper triangular to avoid undirected edges being represented as pairs of reciprocal edges. graph = nx.barabasi_albert_graph(n,m,seed=None) A = nx.to_scipy_sparse_matrix(graph,format='coo') A = triu(A) save_npz("graphs/ba-" + str(m) + ".npz", A, compressed=True) print('Graph saved at graphs/ba-'+str(m)+'.npz', flush=True) ## ## Barabási-Albert (directed) graphs ## take the undirected BA graph and orient edges randomly ## def ba_directed_graph(n,m): # Set parameters: n number of vertices, m number of edges added at each step; # p0 probability of deleting the edge; p1, p2 are probabilities of orienting the edge in one of the two ways; # 1 - p0 - p1 - p2 is the probability of "orienting the edge in both ways", i.e. replacing it by a pair of reciprocal edges. p0 = 0 p1 = 0.45 p2 = 0.45 # Build matrix, truncating it to upper triangular to avoid undirected edges being represented as pairs of reciprocal edges. # Then, randomly for each edge, either remove it or orient it in one or two ways depending on p0, p1 and p2. graph = nx.barabasi_albert_graph(n,m,seed=None) A = nx.to_scipy_sparse_matrix(graph,format='coo') A = triu(A) rmlist=[] relist=[] for i in range(len(A.row)): x = np.random.uniform() if x < p0: rmlist.append(i) elif x < p0 + p1: pass elif x < p0 + p1 + p2: r = A.row[i] A.row[i] = A.col[i] A.col[i] = r else: relist.append(i) A.row = np.append(A.row,A.col[relist]) A.col = np.append(A.col,A.row[relist]) A.data = np.append(A.data,A.data[relist]) A.row = np.delete(A.row,rmlist) A.col = np.delete(A.col,rmlist) A.data = np.delete(A.data,rmlist) save_npz("graphs/dba-" + str(m) + ".npz", A, compressed=True) print('Graph saved at graphs/dba-'+str(m)+'.npz', flush=True) ## ## Block graphs ## def block_graph(b,A,i): # Set parameters: b is the number of vertices in a block, A is a (square) connection probability matrix, i is an index to output different files # Build matrices of corresponding block graphs. n = len(A) B = np.zeros([n*b,n*b],dtype=int) for s in range(n): for t in range(n): for k in range(b): for l in range(b): if np.random.uniform() < A[s,t] and s*b+k != t*b+l: B[s*b+k,t*b+l] = 1 B = coo_matrix(B) save_npz("graphs/block" + str(i) + ".npz", B, compressed=True) print('Graph saved at graphs/block-'+str(i)+'.npz', flush=True) ## ## BlueBrain microcircuit ## https://bbp.epfl.ch/nmc-portal/web/guest/downloads ## def bb_subgraphs(layer=0): # Download and extract file try: mc2 = h5py.File('cons_locs_pathways_mc2_Column.h5','r') except: download = subprocess.Popen(['wget','https://bbp.epfl.ch/public/nmc-models/pathways_stripped_prod/average/cons_locs_pathways_mc2_Column.tar'],stdout=subprocess.DEVNULL) download.wait() extract = subprocess.Popen(['tar','-xvf','cons_locs_pathways_mc2_Column.tar','cons_locs_pathways_mc2_Column.h5'],stdout=subprocess.DEVNULL) extract.wait() mc2 = h5py.File('cons_locs_pathways_mc2_Column.h5','r') # Save chosen layer if layer != 0: sublayers = [] for layer_from in [l for l in list(mc2['connectivity']) if l[1]==str(layer)]: subsublayers = [] for layer_to in [l for l in list(mc2['connectivity']) if l[1]==str(layer)]: subsublayers.append(np.array(list(mc2['connectivity'][layer_from][layer_to]['cMat']))) sublayers.append(reduce((lambda x,y: np.concatenate((x,y),axis=1)),subsublayers)) matrix_dense = reduce((lambda x,y: np.concatenate((x,y))), sublayers) matrix_sparse = np.nonzero(matrix_dense) save_npz('graphs/mc2_layer'+str(layer)+'.npz', coo_matrix((np.ones(len(matrix_sparse[0])), matrix_sparse), shape=(len(matrix_dense),len(matrix_dense)))) print('Graph saved at graphs/mc2_layer'+str(layer)+'.npz', flush=True) # Save complete circuit else: sublayers = [] for layer_from in list(mc2['connectivity']): subsublayers = [] for layer_to in list(mc2['connectivity']): subsublayers.append(np.array(list(mc2['connectivity'][layer_from][layer_to]['cMat']))) sublayers.append(reduce((lambda x,y: np.concatenate((x,y),axis=1)),subsublayers)) matrix_dense = reduce((lambda x,y: np.concatenate((x,y),axis=1)), sublayers) matrix_sparse = np.nonzero(matrix_dense) save_npz('graphs/mc2.npz', coo_matrix((np.ones(len(matrix_sparse[0])), matrix_sparse), shape=(len(matrix_dense),len(matrix_dense)))) print('Graph saved at graphs/mc2.npz', flush=True) ## ## Erdos-Renyi graphs ## def er_graphs(p): # Create the graph G = nx.erdos_renyi_graph(500, p, seed=None, directed=True) # Convert it ot coo-format M = nx.to_scipy_sparse_matrix(G, nodelist=None, dtype=np.intc, weight=None, format='coo') # Save it in npz format save_npz('graphs/er-' + str(p).replace('.','') + '.npz', M, compressed=True) # Confirm that the procedure worked and pay tribute to George Boole :) print('Graph saved at graphs/er' + str(p).replace('.','') + '.npz. Thanks George Boole',flush=True) ## ## ``Real-world'' graphs ## http://konect.uni-koblenz.de/networks/ ## http://konect.cc/ ## Google+ database def realworld_graphs(): # Set dictionary for graph names graphs = {'congress':'convote', 'dblp':'dblp-cite', 'gnutella':'p2p-Gnutella09', 'gplus':'ego-gplus', 'wiki':'wiki_talk_br'} sizes = {'congress':219, 'dblp':12590, 'gnutella':8114, 'gplus':23628, 'wiki':1181} # Download and extract files for g in graphs.keys(): print('Making '+g+' graph',flush=True) try: f = open('out.'+graphs[g],'r') except: print('Downloading...',flush=True) download = subprocess.Popen(['wget', 'http://konect.cc/files/download.tsv.'+graphs[g]+'.tar.bz2'],stdout=subprocess.DEVNULL) download.wait() print('Extracting...',flush=True) extract = subprocess.Popen(['tar','--strip-components=1','-xvjf','download.tsv.'+graphs[g]+'.tar.bz2',graphs[g]+'/out.'+graphs[g]],stdout=subprocess.DEVNULL) extract.wait() f = open('out.'+graphs[g],'r') # Read file L = f.readlines() f.close() while L[0][0] == '%': L = L[1:] # Assemble rows and colums rows = [] cols = [] edges = [] for edge in L[:-1]: vertices = [int(x)-1 for x in edge.split()[:2]] if vertices[0] != vertices[1]: if vertices not in edges: rows.append(vertices[0]) cols.append(vertices[1]) edges.append(vertices) save_npz('graphs/'+g+'.npz',coo_matrix((np.ones(len(rows),dtype='int64'), (np.array(rows),np.array(cols))), shape=(sizes[g],sizes[g]))) print('Graph saved at graphs/'+g+'.npz', flush=True) ## ## Scale-free graphs ## def scalefree_graph(N,a,c,di,do): # a: probability of adding an edge and an in-vertex # c: probability of adding an edge and an out-vertex # b = 1-a-c probability of adding an edge between existing vertices # di, do: non-negative reals # N: number of steps. # Build matrix as described in "Directed Scale-Free Graphs" paper. # The initial graph is a directed 3-cycle. # Once the graph is generated, we remove loops and multiple edges. b = 1-a-c rows = np.array([0,1,2]) cols = np.array([1,2,0]) n = 3 for t in range(3,N): outdeg = np.bincount(rows,minlength=n) indeg = np.bincount(cols,minlength=n) outprob = [(d+do)/(t+do*n) for d in outdeg] inprob = [(d+di)/(t+di*n) for d in indeg] x = np.random.uniform() yo = np.random.uniform() yi = np.random.uniform() so = 0 si = 0 v0 = n w0 = n for v in range(n): so += outprob[v] if yo < so: v0 = v break for w in range(n): si += inprob[w] if yi < si: w0 = w break if x < a: rows = np.append(rows,n) cols = np.append(cols,w0) n += 1 elif x < a + b: rows = np.append(rows,v0) cols = np.append(cols,w0) else: rows = np.append(rows,v0) cols = np.append(cols,n) n += 1 A = np.zeros([n,n],dtype=int) for t in range(N): if rows[t] != cols[t]: A[rows[t],cols[t]] = 1 A = coo_matrix(A) save_npz("graphs/dsf-graph.npz", A, compressed=True) print('Graph saved at graphs/dsf-graph.npz', flush=True) ## ## Generate the desired graphs ## if __name__=="__main__": graph = 'realworld' # Create new folder for graphs if it doesn't exist #try: # folder = subprocess.Popen(['mkdir','graphs'],stderr=subprocess.DEVNULL) #except: # pass # Undirected Barabási-Albert graphs if graph == 'ba_undirected': for edges_per_step in [5,10,15,20,25]: ba_undirected_graph(500,edges_per_step) # Directed Barabási-Albert graphs elif graph == 'ba_directed': for edges_per_step in [5,10,15,20,25]: ba_directed_graph(500,edges_per_step) # Block graphs elif graph == 'block_graph': p0 = [[0.1,0.1,0.1],[0.1,0.1,0.1],[0.1,0.1,0.1]] p1 = [[0,0.1,0.2],[0.1,0,0.1],[0.2,0.1,0]] p2 = [[0.2,0.1,0],[0.1,0.2,0.1],[0,0.1,0.2]] p3 = [[0.2,0.1,0],[0.2,0.1,0],[0.2,0.1,0]] p4 = [[0,0.2,0],[0,0,0.2],[0.2,0,0]] for index,conn_prob in enumerate([np.array(p) for p in [p0,p1,p2,p3,p4]]): block_graph(100,conn_prob,index) # BlueBrain MC2 elif graph == 'bluebrain': for bblayer in range(7): bb_subgraphs(bblayer) # Erdos-Renyi graphs elif graph == 'erdosrenyi': for prob in [ 0.01, 0.025, 0.05, 0.075, 0.1]: er_graphs(prob) # OpenFlights database elif graph == 'realworld': realworld_graphs() # Directed scalefree graph elif graph == 'scalefree': scalefree_graph(5000, 0.05, 0.05, 0.1, 0.2) scalefree_graph(10000, 0.025, 0.025, 0.2, 0.4) scalefree_graph(15000, 0.0167, 0.0167, 0.4, 0.8) scalefree_graph(20000, 0.0125, 0.0125, 0.8, 1.6) scalefree_graph(25000, 0.01, 0.01, 1.6, 3.2) else: print('Given string "'+graph+'" not recognized. See end of file for acceptable options.',flush=True)