Commit 7138163a authored by cyts's avatar cyts
Browse files

initial commit

parent 8fdd7e7f
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]
\ No newline at end of file
from pathlib import Path
import os
import sys
fodler_path = Path(r"../../src/").resolve()
if os.fspath(fodler_path) not in sys.path:
sys.path.append(os.fspath(fodler_path))
from heuristic_functions import *
import pandas as pd
import networkx as nx
import geopandas as gpd
from shapely.geometry import Point
from utils import fix_same_location, dist, full_algo, graph_with_calc_edge_capacity,cost_capacity_func
import numpy as np
"""
This script generates solutions for the final large 95 node graph in the discussion of the article
"""
def main():
#Choice of cluster
chosen_clust = 6
#Load emitters DataFrame
emitters = pd.read_pickle(Path("../data/emitters_clustered.pkl").resolve())
#Fix overlapping sources
emitters_cluster = emitters[emitters['spatial_cluster_euc'] == chosen_clust]
emitters_cluster = fix_same_location(emitters_cluster,'em_mean')
#Get location of centre of mass
com = (np.sum(emitters_cluster['x'].values*emitters_cluster['em_mean'].values)/emitters_cluster['em_mean'].sum(),
np.sum(emitters_cluster['y'].values*emitters_cluster['em_mean'].values)/emitters_cluster['em_mean'].sum())
#Create storage node
storage_cap = - emitters_cluster['em_mean'].sum()
sink = gpd.GeoDataFrame(geometry = [Point(com[0],com[1])])
sink['cap'] = storage_cap
sources = emitters_cluster[['em_mean','geometry']]
sources = sources.rename(columns={'em_mean': 'cap'})
sources = sources.rename_axis('Name')
#Create compelte NetworkX graph from DataFrame
full_nodes = pd.concat([sources ,sink],sort=False)
graph_full = nx.Graph()
full_nodes['x'] = full_nodes.geometry.apply(lambda p: p.x)
full_nodes['y'] = full_nodes.geometry.apply(lambda p: p.y)
for j,point in full_nodes.iterrows():
graph_full.add_node(j,pos = (point.x,point.y), cap = full_nodes.loc[j]['cap'])
node_list = list(graph_full.nodes)
for node1 in graph_full.nodes:
for node2 in node_list:
if node2 == node1:
continue
graph_full.add_edge(node1,node2, weight = dist(graph_full.nodes[node1]['pos'],graph_full.nodes[node2]['pos']))
#Minimum Spanning tree from complete graph
spanning_tree = nx.minimum_spanning_tree(graph_full)
spanning_tree = graph_with_calc_edge_capacity(spanning_tree)
#Create results DataFrame
results = gpd.GeoDataFrame(sink)
#List of algorithms to test
# algos = [local_search,vns,tabu,delta_change,
# edge_turn,high_valency_shuffle_edge_turn]
# algo_strings = ["local",
# "vns",
# "tabu",
# "delta",
# "edge",
# "shuffle_edge"]
algos = [high_valency_shuffle_local_search]
algo_strings = ["shuffle_local"]
#Run optimization heuristics
for algo,algo_string in zip(algos,algo_strings):
output_graph, output_cost, calc_time = full_algo(spanning_tree,cost_capacity_func,algo)
results['costs_'+algo_string] = output_cost
results['time_'+algo_string] = calc_time
results['graphs_'+algo_string] = [output_graph]
return results
if __name__ == '__main__':
results = main()
results.to_pickle(Path('../results_paper/large_graph/dataframe_cluster6_local.pkl'))
\ No newline at end of file
from pathlib import Path
import os
import sys
filepath = Path(r"../../src/").resolve()
if os.fspath(filepath) not in sys.path:
sys.path.append(os.fspath(filepath))
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from shapely.geometry import Point, LineString
import geopandas as gpd
from utils import fix_same_location,create_compl_graph_from_other,graph_with_calc_edge_capacity,graph_cost
from shapely import wkt
filepath = "../results_paper/large_graph/dataframe_cluster6.pkl"
#filepath2 = "../results_paper/grid_search/grid_dataframe_cluster6_new.pkl"
filepath3 = '../results_paper/large_graph/dataframe_cluster6_local.pkl'
all_dfs = pd.read_pickle(Path(filepath).resolve())
#all_dfs2 = pd.read_pickle(Path(filepath2).resolve())
all_dfs3 = pd.read_pickle(Path(filepath3).resolve())
mst = graph_with_calc_edge_capacity(nx.minimum_spanning_tree(create_compl_graph_from_other(all_dfs.graphs_vns.iloc[0])))
# all_dfs=all_dfs.drop(["distance_to_com",'geometry'],axis = 1)
# all_dfs.to_pickle(r"C:\Users\yeates\Documents\Git\network-optimization\Results\large_graph\dataframe_cluster6.pkl")
func = lambda x: x**0.6
chosen_clust = 6
frame_limsx = (620000, 1050000)
frame_limsy = (6170000,6980000)
emittersgdf_filt = pd.read_pickle(Path("../data/emitters_clustered.pkl").resolve())
emittersgdf_filt_loc = emittersgdf_filt[emittersgdf_filt['spatial_cluster_euc']==chosen_clust]
emittersgdf_filt_one = fix_same_location(emittersgdf_filt_loc,'em_mean')
deu = gpd.read_file(Path("../data/german_borders.shp").resolve())
deu.crs = {"init": "epsg:4326"}
deu = deu.to_crs(epsg=3857)
max_edge_cap = 0
for s_orig in [all_dfs.graphs_local.values[0], all_dfs.graphs_delta.values[0],
all_dfs.graphs_edge.values[0], all_dfs.graphs_shuffle_edge.values[0],
all_dfs.graphs_tabu.values[0], all_dfs.graphs_vns.values[0],all_dfs3.graphs_shuffle_local.values[0]]:
df = nx.to_pandas_edgelist(s_orig)
for edge in df.iterrows():
if edge[1].edge_cap>max_edge_cap:
max_edge_cap = edge[1].edge_cap
com = (np.sum(emittersgdf_filt_one['x'].values*emittersgdf_filt_one['em_mean'].values)/emittersgdf_filt_one['em_mean'].sum(),
np.sum(emittersgdf_filt_one['y'].values*emittersgdf_filt_one['em_mean'].values)/emittersgdf_filt_one['em_mean'].sum())
s_best = all_dfs3.graphs_shuffle_local.values[0]
fig, ax = plt.subplots(figsize=(7,7), dpi = 200,sharex=True, sharey=True)
ax.set_xlim(frame_limsx[0],frame_limsx[1])
ax.set_ylim(frame_limsy[0],frame_limsy[1])
deu.plot(ax=ax, facecolor=(0.37254902, 0.16078431, 0.3254902, 0.15))
emittersgdf_filt_loc.plot(ax=ax, markersize = 'size',alpha =0.8, color ='orange', edgecolors='k',linewidth=0.3)
plt.title('Sources',size = 20)
ax.axis('off')
df_best = nx.to_edgelist(s_best)
best_edges = [set([x[0],x[1]]) for x in df_best]
stor = gpd.GeoDataFrame( geometry= [Point(com)])
for s_orig,title in zip([all_dfs.graphs_local.values[0], all_dfs.graphs_delta.values[0],
all_dfs.graphs_edge.values[0], all_dfs.graphs_shuffle_edge.values[0],
all_dfs.graphs_tabu.values[0], all_dfs.graphs_vns.values[0],all_dfs3.graphs_shuffle_local.values[0],mst],
['LS','DC','ET','VSET','Tabu','VNS','VSLS','MST']):
fig, ax = plt.subplots(figsize=(7,7), dpi = 300,sharex=True, sharey=True)
ax.set_xlim(frame_limsx[0],frame_limsx[1])
ax.set_ylim(frame_limsy[0],frame_limsy[1])
deu.plot(ax=ax, facecolor=(0.37254902, 0.16078431, 0.3254902, 0.15))
df = nx.to_pandas_edgelist(s_orig)
df2 = nx.to_pandas_edgelist(s_orig)
lines = []
lines_else = []
removed = []
removed2 = []
for edge in df.iterrows():
x1 = s_orig.nodes[edge[1].source]['pos'][0]
x2 = s_orig.nodes[edge[1].target]['pos'][0]
y1 = s_orig.nodes[edge[1].source]['pos'][1]
y2 = s_orig.nodes[edge[1].target]['pos'][1]
if set([edge[1].source,edge[1].target]) in best_edges:
removed2.append(edge[0])
lines.append(LineString([Point(x1 , y1), Point(x2, y2)]).wkt)
else:
removed.append(edge[0])
lines_else.append(LineString([Point(x1 , y1), Point(x2, y2)]).wkt)
df = df.drop(removed)
df2 = df2.drop(removed2)
df['geometry'] = lines
df['geometry'] = df['geometry'].apply(wkt.loads)
df = gpd.GeoDataFrame(df,geometry = 'geometry')
df.to_pickle(Path("../temp_files/pipelines_temp.shp").resolve())
pipe_shape = pd.read_pickle(Path("../temp_files/pipelines_temp.shp").resolve())
df2['geometry'] = lines_else
df2['geometry'] = df2['geometry'].apply(wkt.loads)
df2 = gpd.GeoDataFrame(df2,geometry = 'geometry')
df2.to_pickle(Path("../temp_files/pipelines_temp2.shp").resolve())
pipe_shape2 = pd.read_pickle(Path("../temp_files/pipelines_temp2.shp").resolve())
edge_caps = np.sqrt(pipe_shape.edge_cap)
edge_caps2 = np.sqrt(pipe_shape2.edge_cap)
edge_caps_width = [5*i/np.sqrt(max_edge_cap) + 0.2 for i in edge_caps if i != 0]
edge_caps_width2 = [5*i/np.sqrt(max_edge_cap) + 0.2 for i in edge_caps2 if i != 0]
pipe_shape.plot(ax=ax, color = 'gray', alpha = 0.8,linewidth=edge_caps_width , zorder = 10 )
pipe_shape2.plot(ax=ax, color = 'red', alpha = 0.7,linewidth=edge_caps_width2 , zorder = 10 )
emittersgdf_filt_loc.plot(ax=ax, markersize = 4,alpha =0.8, color ='k', edgecolors='k',linewidth=0.3)
stor.plot(ax=ax, markersize = 150,marker = 's', color = 'k' , edgecolors='k',zorder = 10)
plt.title(title,size = 20)
ax.axis('off')
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
for chosen_cluster in range(1,6):
all_dfs = pd.read_pickle(
Path(
"../results_paper/grid_search/grid_dataframe_cluster"+str(chosen_cluster)+".pkl"
).resolve()
)
if chosen_cluster in [4,5]:
algos = ["local",
"vns",
"tabu",
"delta",
"edge",
"shuffle_edge",
"shuffle_del",
"shuffle_local"]
else:
algos = ["local",
"vns",
"tabu",
"delta",
"delta2",
"edge",
"edge2",
"shuffle_edge",
"shuffle_del",
"shuffle_local"]
costs_strings = ["costs_"+x for x in algos]
costs_diff_strings = ["costs_diff_"+x for x in algos]
color_iso_strings = ["color_iso_"+x for x in algos]
graph_strings = ["graphs_"+x for x in algos]
if chosen_cluster !=1:
all_dfs["costs_opt"] = all_dfs[costs_strings].min(axis=1)
if chosen_cluster !=1:
all_dfs["Min_algo"] = all_dfs[costs_strings].idxmin(axis="columns").map(dict(zip(costs_strings,color_iso_strings)))
opt_colors = []
for i, row in all_dfs.iterrows():
color = row.Min_algo
opt_colors.append(row[color])
all_dfs["graphs_opt"] = all_dfs[costs_strings].idxmin(axis="columns").map(dict(zip(costs_strings,graph_strings)))
opt_gra = []
for i, row in all_dfs.iterrows():
gra = row.graphs_opt
opt_gra.append(row[gra])
all_dfs["graphs_opt"] = opt_gra
all_dfs["color_iso_opt"] = opt_colors
for diff, cost in zip(costs_diff_strings,costs_strings):
all_dfs[diff] = 100*(all_dfs[cost] - all_dfs["costs_opt"])/all_dfs["costs_opt"]
for algo in algos:
not_opt = []
for i, row in all_dfs.iterrows():
not_opt.append(
all_dfs['graphs_'+algo].iloc[i].edges()
!= all_dfs["graphs_opt"].iloc[i].edges()
)
all_dfs['not_opt_'+algo] = not_opt
percentage_optimal_dict = {}
time_dict = {}
for algo in algos:
percentage = 100 - len(all_dfs[all_dfs['not_opt_'+algo] == True]) / len(all_dfs)*100
time_calc = all_dfs['time_'+algo].mean()
percentage_optimal_dict[algo] = percentage
time_dict[algo] = time_calc
al = 0.9
small_mark = 40
big_mark = 130
col = ["forestgreen","m","limegreen","deepskyblue","steelblue"][chosen_cluster-1]
ylims = [(90,100.5),(60,102),(60,102),(0,105),(0,105)][chosen_cluster-1]
fig, ax = plt.subplots(1, figsize=(2.5, 2.5), dpi=300)
plt.scatter(
time_dict['local'],
percentage_optimal_dict['local'],
marker="s",
s=small_mark,
edgecolors=col,
alpha=al,
facecolors="none",
)
plt.scatter(
time_dict['delta'],
percentage_optimal_dict['delta'],
marker=".",
s=small_mark,
edgecolors=col,
alpha=al,
facecolors=col,
linewidth=0,
)
plt.scatter(
time_dict['edge'],
percentage_optimal_dict['edge'],
marker="x",
s=small_mark,
c=col,
alpha=al
)
plt.scatter(
time_dict['tabu'],
percentage_optimal_dict['tabu'],
marker=">",
s=small_mark,
edgecolors=col,
alpha=al,
facecolors="none",
)
plt.scatter(
time_dict['vns'],
percentage_optimal_dict['vns'],
marker="<",
s=small_mark,
edgecolors=col,
alpha=al,
facecolors="none",
)
plt.scatter(
time_dict['shuffle_local'],
percentage_optimal_dict['shuffle_local'],
marker="o",
facecolors="none",
s=big_mark,
edgecolors=col,
alpha=al,
)
plt.scatter(
time_dict['shuffle_local'],
percentage_optimal_dict['shuffle_local'],
marker="s",
s=small_mark,
edgecolors=col,
alpha=al,
facecolors="none",
)
plt.scatter(
time_dict['shuffle_del'],
percentage_optimal_dict['shuffle_del'],
marker="o",
facecolors="none",
s=big_mark,
edgecolors=col,
alpha=al,
)
plt.scatter(
time_dict['shuffle_del'],
percentage_optimal_dict['shuffle_del'],
marker=".",
s=small_mark,
edgecolors=col,
alpha=al,
facecolors=col,
linewidth=0,
)
plt.scatter(
time_dict['shuffle_edge'],
percentage_optimal_dict['shuffle_edge'],
marker="o",
facecolors="none",
s=big_mark,
edgecolors=col,
alpha=al,
)
plt.scatter(
time_dict['shuffle_edge'],
percentage_optimal_dict['shuffle_edge'],
marker="x",
s=small_mark,
c=col,
alpha=al,
)
if chosen_cluster not in [4,5]:
plt.scatter(
time_dict['delta2'],
percentage_optimal_dict['delta2'],
marker="h",
s=small_mark,
edgecolors=col,
alpha=al,
facecolors="none",
)
plt.scatter(
time_dict['edge2'],
percentage_optimal_dict['edge2'],
marker="x",
s=small_mark,
c=col,
alpha=al,
)
plt.scatter(
time_dict['edge2'],
percentage_optimal_dict['edge2'],
marker="+",
s=small_mark,
c=col,
alpha=al,
)
plt.xlabel("Average calc. time (s)")
plt.ylabel("Percent optimal")
plt.axhline(100, c="k", alpha=0.5)
ax.set_xscale("log")
ax.set_title("Cluster "+ str(chosen_cluster))
plt.ylim(ylims)
plt.show()
from pathlib import Path
import os
import sys
filepath = Path(r"../../src/").resolve()
if os.fspath(filepath) not in sys.path:
sys.path.append(os.fspath(filepath))
from heuristic_functions import *
import pandas as pd
import networkx as nx
import shapely.geometry as geom
from shapely.geometry import Point, MultiPoint
import geopandas as gpd
from multiprocessing import Pool
from utils import fix_same_location, dist, full_algo, graph_with_calc_edge_capacity,cost_capacity_func
"""
This script performs a grid search over a group of sources and iterates over the heuristic network optimization algorithms
"""
def make_grid_of_sinks(emitters_cluster,n_pointsx):
"""
Function to generate a regular grid of sinks from a cluster of sources
as input for the graph optimization heuristics
Args:
emitters_cluster: a GeoDatFrame with the emitters' positions
n_pointsx: the number of points in the x direction for the grid of sinks
Returns:
grid_gdfs: a GeoDataFrame containing the grid of sinks,
each with capacity corresponding to the sum of the sources capacity