# partition.py # 11/19/03 # Jiwon Hahn from readdata import * G = get_graph2() def graphAnalysis(graph): n_connect = [] #node's total number of edges n_cost = [] #node's number of total cost avgcost, avgconnect = 0,0 avg_cost=0 min_cost=maxTotalcost() max_cost=0 for i in range(N): e1, e2 = 0, 0 for j in range(N): if G[i][j]==0: continue else: e1+=G[i][j]; e2+=1 n_cost.append([e1,i]) n_connect.append([e2,i]) avgcost+=e1 avgconnect+=e2 avgcost /= N avgconnect /= N n_cost.sort() n_connect.sort() for i in range(N): for j in range(N): k = G[i][j] if k>0 and kmax_cost: max_cost = k avg_cost += k avg_cost /= (N*N) print '---------------------------------------------------' print 'Graph analysis: ' print 'loose upper bound of the total cost = \t',maxTotalcost() print "min/avg/max cost of a node is: ", print n_cost[0][0],'/',avgcost,'/',n_cost[-1][0] print "min/avg/max degree of a node is: ", print n_connect[0][1],'/',avgconnect,'/',n_connect[-1][1] print "min/avg/max cost of an edge is: ", print min_cost,'/',avg_cost,'/',max_cost print '--------------------------------------------------' def print_partition(p1,p2): for i in range(N/2): print '%d\t%d'%(p1[i],p2[i]) def getTotalcost(p1, p2): tcost = 0 for i in p1: for j in p2: tcost += G[i][j] return tcost def maxTotalcost(): mcost = 0 for i in range(N): for j in range(i+1,N): mcost += G[i][j] return mcost ##--------------------partition algorithms------------------# def partition0(cost_count): eC = [] #node's total number of edges or total cost p1, p2 = [],[] for i in range(N): e1, e2 = 0, 0 for j in range(N): if G[i][j]==0: continue else: e1+=G[i][j]; e2+=1 if cost_count == 'cost': eC.append([e1,i]) else: eC.append([e2,i]) eC.sort() while len(p1) < N/2: node = eC.pop().pop() p1.append(node) for i in range(N): if G[node][i] >0 : p1.append(i) if len(p1)>N/2: p1.pop() break p1.sort() for i in range(N): if i in p1: continue else: p2.append(i) return p1, p2 def partition1(): #just half-half partitioning P1 = range(N/2) P2 = range(N/2,N) return P1,P2 def partition2(): #edge cost based A = [] p1, p2 = [],[] for i in range(N): for j in range(N): A.append([G[i][j],(i,j)]) A.sort() for i in range(N): a = A.pop().pop() if len(p1) < N/2: if a[0] not in p1: p1.append(a[0]) if a[1] not in p1: p1.append(a[1]) else: p1 = p1[:N/2] break for i in range(N): if i in p1: continue else: p2.append(i) return p1, p2 #KL algorithm def KLinit(A,B): I, E = 0,0 P1, P2 = [],[] for i in range(N): for j in range(N): if (i in A and j in B) or \ (i in B and j in B): #in same partition I+=G[i][j] else: E+=G[i][j] if i in A: P1.append([i,E-I]) #store [node,D] else: P2.append([i,E-I]) #store [node,D] gain = [[-999999 for i in range(N)] for i in range(N)] for i in range(N/2): for j in range(N/2): p1,p2 = P1[i],P2[j] #gain = D1+D2-2C12 gain[p1[0]][p2[0]]= p1[1]+p2[1]-2*G[p1[0]][p2[0]] return P1, P2, gain def KLpartition(P1, P2, gain): maxgain = 0 costs = [] p1history, p2history = [],[] while maxgain > -100: maxgain = max(map(max,gain)) for i in range(N): #search a,b to swap for j in range(N): if gain[i][j]==maxgain: a, b = i, j break #swap X=lambda x:x[0] if a in map(X, P1): D1 = filter(lambda x:x[0]==a, P1).pop()[1] #determine D D2 = filter(lambda x:x[0]==b, P2).pop()[1] #determine D P1 = filter(lambda x: x[0]!=a, P1) #delete node P2 = filter(lambda x: x[0]!=b, P2) #delete node P1.append([b,-D2]) #add [node, D] P2.append([a,-D1]) #add [node, D] else: D1 = filter(lambda x:x[0]==a, P2).pop()[1] #determine D D2 = filter(lambda x:x[0]==b, P1).pop()[1] #determine D P1 = filter(lambda x: x[0]!=b, P1) #delete node P2 = filter(lambda x: x[0]!=a, P2) #delete node P1.append([a,-D1]) #add [node, D] P2.append([b,-D2]) #add [node, D] #update Ds that are connected to (a,b) and the gain table P1unodes, P2unodes = [b],[a] #updated nodes for i in range(N/2): p1,p2 = P1[i][0],P2[i][0] if G[p1][a]: P1[i][1]+=2*G[p1][a] P1unodes.append(p1) if G[p1][b]: P1[i][1]-=2*G[p1][b] P1unodes.append(p1) if G[p2][a]: P2[i][1]-=2*G[p2][a] P2unodes.append(p2) if G[p2][b]: P2[i][1]+=2*G[p2][b] P2unodes.append(p2) for i in range(N/2): for j in range(N/2): p1,p2 = P1[i],P2[j] #gain = D1+D2-2C12 gain[p1[0]][p2[0]]= p1[1]+p2[1]-2*G[p1[0]][p2[0]] for i in range(N): for j in range(N): gain[i][b] = gain[b][i]= -999999 gain[j][a] = gain[a][j]= -999999 A, B = map(X,P1), map(X,P2) cost = getTotalcost(A, B) costs.append(cost) p1history.append(A) p2history.append(B) #print_partition(A, B) #print cost min = 700000 for i in range(len(costs)): if costs[i] threshold: node[i][0].append([friendship,j]) select += 1 value += friendship if friendship > 0: number += 1 node[i][1]=i node[i][2]=number node[i][3]=value node[i][4]=select node[i][0].reverse() #best friend first if value < minval: minval=value #evaluate relation: ones with with few high-value friends get high priority for i in range(N): for j in range(len(node[i][0])): # node[i][0][j][0]/=(1.0*node[i][2]*node[i][4]**2) # node[i][0][j][0]/=(1.0*node[i][4]**2) #th:760, 548245 node[i][0][j][0]/=(1.0*node[i][4]**3) #th:760, 548245 node.sort() #nodes with high priority choose their group members first i = N while len(P1) <50: select = 0 i -= 1 if node[i][1] not in P1: P1.append(node[i][1]) while val < minval and select 0: for i in range((len(P1)-len(P2))/2): P2.append(P1.pop()) #apply KL k1,k2,gain=KLinit(P1,P2) k1,k2 = KLpartition(k1,k2,gain) return k1, k2 def randompartition(): import random min = maxTotalcost() p1 = random.sample(range(N),N/2) p1.sort() p2 = [] count = 0 for i in range(N): if count<50 and i == p1[count]: count+=1 continue else: p2.append(i) return p1, p2 def test(x): #for exploring random solutions min = maxTotalcost() for i in range(x): p1, p2 = randompartition() cost = getTotalcost(p1, p2) if cost < min: min = cost p3 = p1+['-']+p2 print 'random cost = \t\t', min print p3 ##-------------------end of partition algorithms-----------------# if __name__=='__main__': graphAnalysis(G) p1,p2 = partition0('cost') print 'cost-based = \t\t',getTotalcost(p1,p2) k1,k2,gain=KLinit(p1,p2) k1,k2 = KLpartition(k1,k2,gain) print 'cost-based KL= \t\t',getTotalcost(k1,k2) p1,p2 = partition0('count') print 'degree-based = \t\t',getTotalcost(p1,p2) k1,k2,gain=KLinit(p1,p2) k1,k2 = KLpartition(k1,k2,gain) print 'degree-based KL= \t',getTotalcost(k1,k2) p1,p2 = partition1() print 'half/half = \t\t',getTotalcost(p1,p2) k1,k2,gain=KLinit(p1,p2) k1,k2 = KLpartition(k1,k2,gain) print 'half/half KL= \t\t',getTotalcost(k1,k2) p1,p2 = partition2() print 'edge-sorted = \t\t',getTotalcost(p1,p2) k1,k2,gain=KLinit(p1,p2) k1,k2 = KLpartition(k1,k2,gain) print 'edge-sorted KL= \t',getTotalcost(k1,k2) #test(100) min = 700000 for i in range(50): p1,p2=randompartition() k1,k2,gain=KLinit(p1,p2) k1,k2 = KLpartition(k1,k2,gain) cost = getTotalcost(k1,k2) if cost