import os
import pathlib
import plotly
import numpy as np
import pandas as pd
from time import time
from sklearn.cluster import KMeans
from numba import jit, njit, prange
import plotly.express as px
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

import warnings
warnings.filterwarnings("ignore")


ROOTDIR = pathlib.Path(os.path.abspath('.'))
IMG_HTML = ROOTDIR / 'img-html'
IMG_PNG = ROOTDIR / 'img-png'
IMG_SVG = ROOTDIR / 'img-svg'
DATA_RAW = ROOTDIR / 'data-raw'
DATA_COOKED = ROOTDIR / 'data-processed'


weak_grid_data = pd.read_csv(DATA_RAW / '附件1 弱覆盖栅格数据(筛选).csv')
exist_site_data = pd.read_csv(DATA_RAW / '附件2 现网站址坐标(筛选).csv')


# TODO judge
angle45 = np.pi / 4
angle60 = np.pi / 3
angle90 = np.pi / 2
coverage = [10, 30]
cost = [1, 10]

@jit
def get_theta(x, y, m, n):
    if abs(x - m) < 1e-5: return angle90
    return np.arctan((y - n) / (x - m))

@jit
def get_r_th(theta_delta, cover):
    if theta_delta > angle60: return 0
    return cover * (1 - theta_delta / angle60)

@jit
def check_angle(th1, th2):
    return abs(th1 - th2) < angle45

@jit
def judge_in(site, grid):
    x, y, i, j, cover = site[0], site[1], grid[0], grid[1], coverage[int(site[5]) - 1]
#     print(site[5], cover)
    if abs(x - i) > cover or abs(y - j) > cover: return False
    elif (x - i)**2 + (y - j)**2 > cover**2: return False
    else: return True


q1_exist_sites_data = pd.read_csv(DATA_COOKED / "question1-exist_site_type_data.csv", index_col=0).iloc[:, 1:]
q1_build_sites_data = pd.read_csv(DATA_COOKED / "question1-new_build_sites.csv", index_col=0)
q1_exist_sites_data, q1_build_sites_data

z_build = pd.DataFrame([0 for _ in range(len(q1_build_sites_data))])
z_exist = pd.DataFrame([0 for _ in range(len(q1_exist_sites_data))])
for i in range(1, 4):
    q1_build_sites_data.insert(i + 1, f"angle{i}", z_build)
    q1_exist_sites_data.insert(i + 1, f"angle{i}", z_exist)
q1_exist_sites_data, q1_build_sites_data

(         x     y  angle1  angle2  angle3  type
 0      818  2020       0       0       0     2
 1      713  2013       0       0       0     2
 2     2305   291       0       0       0     2
 3      700  1953       0       0       0     2
 4      949  2293       0       0       0     2
 ...    ...   ...     ...     ...     ...   ...
 1469  2324  1625       0       0       0     2
 1470  1135   852       0       0       0     2
 1471  2053  1818       0       0       0     2
 1472  1432  1797       0       0       0     2
 1473  1584   898       0       0       0     2
 
 [1474 rows x 6 columns],
         x     y  angle1  angle2  angle3  type
 0     932  2210       0       0       0     2
 1     972  1238       0       0       0     1
 2    1229  1894       0       0       0     2
 3    1311  2005       0       0       0     1
 4    1611  2233       0       0       0     2
 ..    ...   ...     ...     ...     ...   ...
 783  1171   812       0       0       0     1
 784   250  1065       0       0       0     1
 785  2353    24       0       0       0     1
 786  2422   583       0       0       0     2
 787  2249  1453       0       0       0     2
 
 [788 rows x 6 columns])


@jit
def angle(hu):
    return hu / np.pi * 180


@jit
def cal_sites_angle():
    q1_weak_grid_data_np = np.array(weak_grid_data.copy()).astype(np.float32)
    q1_exist_sites_data_np = np.array(q1_exist_sites_data.copy()).astype(np.float32)
    q1_build_sites_data_np = np.array(q1_build_sites_data.copy()).astype(np.float32)
    l1, l2, l3 = len(q1_weak_grid_data_np), len(q1_exist_sites_data_np), len(q1_build_sites_data_np)
    print(l1)
    site_data = q1_build_sites_data_np
    l2 = len(site_data)
    print(l2)
    
    for i in prange(l2):
        if i % 50 == 0:
            print(i, '/', l2)
        site_data[i, 2] = 0
        site_data[i, 3] = angle60 * 2
        site_data[i, 4] = angle60 * 4
        
        include_weak_grid_list = []
        for j in prange(l1):
            if judge_in(site_data[i, :], q1_weak_grid_data_np[j, :]):
#                 print("/", end = '')
                include_weak_grid_list.append(q1_weak_grid_data_np[j, :])
        if len(include_weak_grid_list) == 0:
            continue
        include_weak_grid_list = pd.DataFrame(include_weak_grid_list)
        include_weak_grid_list.sort_values(by=2, inplace=True, ascending=False)
        include_weak_grid_list = np.array(include_weak_grid_list).reshape(-1, 3)
        
        site_data[i, 2] = get_theta(
            site_data[i, 0], site_data[i, 1], 
            include_weak_grid_list[0, 0], include_weak_grid_list[0, 1], 
        )

        if not len(include_weak_grid_list) > 1:
            continue
        idx = 1
        while idx < len(include_weak_grid_list):
#             print(",", end = '')
            th = get_theta(
                site_data[i, 0], site_data[i, 1], 
                include_weak_grid_list[idx, 0], include_weak_grid_list[idx, 1], 
            )
            idx += 1
            if check_angle(th, site_data[i, 2]):
                site_data[i, 3] = th
                break
        
        if not len(include_weak_grid_list) > 2:
            continue
        while idx < len(include_weak_grid_list):
#             print(".", end = '')
            th = get_theta(
                site_data[i, 0], site_data[i, 1], 
                include_weak_grid_list[idx, 0], include_weak_grid_list[idx, 1], 
            )
            idx += 1
            if check_angle(th, site_data[i, 2]) and check_angle(th, site_data[i, 3]):
#                 print(444, end = '')
                site_data[i, 4] = th
                break
#     print(site_data)
    return site_data


q1_build_sites_data_np = cal_sites_angle()

182807
788
0 / 788
50 / 788
100 / 788
150 / 788
200 / 788
250 / 788
300 / 788
350 / 788
400 / 788
450 / 788
500 / 788
550 / 788
600 / 788
650 / 788
700 / 788
750 / 788


pd.DataFrame(q1_build_sites_data_np, columns=['x','y','𝜃1','𝜃2','𝜃3','type'])


d1 = pd.read_csv(DATA_COOKED / 'question2-exist_sites_angle.csv', index_col=0)
d2 = pd.read_csv(DATA_COOKED / 'question2-build_sites_angle.csv', index_col=0)
data = pd.concat([d1, d2], axis=0)
data.to_csv(DATA_COOKED / 'question2-total_sites_angle.csv')  # 总站点1（后面还会加入问题2中新的新建站点）


@jit
def cal_total_business():
    total_bussiness = 0
    data = np.array(pd.read_csv(DATA_COOKED / 'question2-total_sites_angle.csv', index_col=0))
    LEN = len(data)
    
    grid = np.array(weak_grid_data.copy()).astype(np.float32)
    L = len(grid)
    
    for i in prange(LEN):
        for j in prange(L):
            if grid[j,3]:
                continue
            if judge_in(data[i], grid[j]):
                x1,y1=data[i,0],data[i,1]
                x2,y2=grid[i,1],grid[j,1]
                if get_r_th(
                    get_theta(x1 - x2, y1 - y2) - data[i, 3], 
                    coverage[int(data[i, 5]) - 1]
                )**2 > (x1 - x2)**2 + (y1 - y2)**2:
                    grid[j,3] = 1
                    total_bussiness += grid[j, 2]
    return total_bussiness


weak_grid_data


weak_grid_data_cp  = weak_grid_data.copy()
weak_grid_data_cp


weak_grid_data_cp.insert(3, "ed", pd.DataFrame([0 for _ in range(len(weak_grid_data))]))
weak_grid_data_cp


@jit
def cal_total_business():
    t0 = time()
    total_bussiness = 0
    data = np.array(pd.read_csv(DATA_COOKED / 'question2-total_sites_angle.csv', index_col=0))
    LEN = len(data)
    
    grid = np.array(weak_grid_data_cp.copy()).astype(np.float32)
    L = len(grid)
    
    for i in prange(LEN):
        if i % 300 == 0:
            print(i, '/', LEN)
        for j in prange(L):
            if grid[j,3]:
                continue
            if judge_in(data[i], grid[j]):
                if not np.random.randint(10):
                    continue
                x1,y1=data[i,0],data[i,1]
                x2,y2=grid[i,1],grid[j,1]
                grid[j,3] = 1
                total_bussiness += grid[j, 2]
    print('用时：', time() - t0)
    return total_bussiness, data, grid


to_bu = cal_total_business()  # 1000s

0 / 1576
300 / 1576
600 / 1576
900 / 1576
1200 / 1576
1500 / 1576
用时： 741.3745419979095


# todo 计算总业务量
total_bussiness, data, grid = to_bu
total_bussiness, data, grid

(3922204.01093581,
 array([[9.3200000e+02, 2.2100000e+03, 1.5707964e+00, 2.0943952e+00,
         4.1887903e+00, 2.0000000e+00],
        [9.7200000e+02, 1.2380000e+03, 1.5707964e+00, 2.0943952e+00,
         4.1887903e+00, 1.0000000e+00],
        [1.2290000e+03, 1.8940000e+03, 1.5707964e+00, 2.0943952e+00,
         4.1887903e+00, 2.0000000e+00],
        ...,
        [2.3530000e+03, 2.4000000e+01, 1.5482696e+00, 4.7776027e+00,
         5.3622860e+00, 1.0000000e+00],
        [2.4220000e+03, 5.8300000e+02, 1.7090038e+00, 4.0597887e+00,
         6.1582727e+00, 2.0000000e+00],
        [2.2490000e+03, 1.4530000e+03, 4.2226570e+00, 4.9158216e+00,
         1.0833534e+00, 2.0000000e+00]]),
 array([[  66.      , 1486.      ,  140.58139 ,    0.      ],
        [  67.      , 1486.      ,  140.51883 ,    0.      ],
        [ 177.      , 1486.      ,   48.919178,    0.      ],
        ...,
        [2354.      , 2123.      ,    5.134017,    0.      ],
        [2355.      , 2123.      ,    2.599999,    0.      ],
        [2372.      , 2123.      ,   57.815   ,    0.      ]],
       dtype=float32))


remain_ggrid = pd.DataFrame(grid).copy()
remain_ggrid


# todo 根据总业务量排序
cond = remain_ggrid.iloc[:, 3] == 0
remain_grid = remain_ggrid[cond]
remain_grid.sort_values(by=2, inplace=True, ascending=False)
remain_grid


# todo kmeans 聚类
kmeans = KMeans(n_clusters=100)
kmeans.fit(remain_grid.iloc[:, :2])
kmeans.labels_

array([24, 61, 24, ..., 45, 45, 89])


remain_grid.insert(4, "labels", kmeans.labels_)
remain_grid


# todo 绘制聚类结果
fig = px.scatter(data_frame=remain_grid, x=0, y=1, color='labels')
fig.update_traces(marker={"size": 3})
fig.write_html(IMG_HTML / "question2-kmeans100.html")
fig.write_image(IMG_PNG / "question2-kmeans100.png")
fig.write_image(IMG_SVG / "question2-kmeans100.svg")
fig.show()
del fig


# 业务量
now_bn = 5905185.385538206  # 现有业务量
targe_bn = 6350607.1031652  # 目标业务量


def existed(x, y):
    return x not in data[:, 0] and y not in data[:, 1]


remain_grid


angle37_5 = np.pi * 37.5 / 180
anglex = np.pi * 60 / 180

@jit
def is_far(x,y,m,n,c):
    return abs(x-m) > c or abs(y-n) > c
@jit
def is_in(x,y,m,n,c):
    return (x-m)**2 + (y-n)**2 < c**2
@jit
def calcal(remain_grid=remain_grid):
    total_data = []
    now_bn = 5905185.385538206
    targe_bn = 6350607.1031652
    
    remain_grid = np.array(remain_grid.copy())
    for i in prange(len(remain_grid)):
        if not remain_grid[i, 3]:
            x, y, t = int(remain_grid[i, 0]), int(remain_grid[i, 1]), np.random.randint(2) + 1
            t1, t2, t3 = [(angle60*2*(j-1)+i+np.random.uniform(-anglex, anglex)) % (np.pi*2) \
                          for j in range(2, 5)]
            total_data.append([x, y, t1, t2, t3, t])
            now_bn += remain_grid[i, 2]
            for j in prange(i + 1, len(remain_grid)):
                m,n,c = int(remain_grid[j, 0]), int(remain_grid[j, 1]), coverage[t - 1]
                if not is_far(x,y,m,n,c) and is_in(x,y,m,n,c):
                    now_bn += remain_grid[j, 2]
                    remain_grid[j, 3] = 1
            if now_bn > targe_bn:
                break
        if i % 50 == 0:
            print(i, '/', len(remain_grid))
    return total_data, now_bn


total_data, tolbn = calcal()

0 / 108338


print(tolbn)
pd.DataFrame(total_data)

6358173.907919989


pd.DataFrame(
    total_data, 
    columns=['x','y','𝜃1','𝜃2','𝜃3','type'],
).to_csv(DATA_COOKED / "question2-add_sites_angle.csv")


total_sites_data = pd.read_csv(DATA_COOKED / "question2-total_sites_angle.csv", index_col=0)
total_sites_data
add_sites_data = pd.read_csv(DATA_COOKED / "question2-add_sites_angle.csv", index_col=0)
add_sites_data
total_sites_data.append(add_sites_data).to_csv(DATA_COOKED / "question2-total_sites_angle.csv")
total_sites_data  # 最终所有站点数据


SUM = 0
for i in range(len(total_sites_data)):
    SUM += cost[int(total_sites_data.iloc[i, 5]) - 1]
SUM  # 问题2新建站点之前总站点数

10324


add_sites_data


for i in range(len(add_sites_data)):
    SUM += cost[int(add_sites_data.iloc[i, 5]) - 1]
SUM  # 问题2新建站点之后总站点数

10412


def plot_4kinds_sites():
    trace1 = go.Scatter(
        mode='markers', 
        x=weak_grid_data.loc[:, "x"], 
        y=weak_grid_data.loc[:, "y"], 
        marker={"color": "blue", "size": 0.5},
        name="弱覆盖点",
    )
    trace2 = go.Scatter(
        mode='markers', 
        x=total_sites_data.iloc[:1474, 0], 
        y=total_sites_data.iloc[:1474, 1], 
        marker={"color": "pink", "size": 4},
        name="现网基站",
    )
    trace3 = go.Scatter(
        mode='markers', 
        x=total_sites_data.iloc[1474:, 0], 
        y=total_sites_data.iloc[1474:, 1], 
        marker={"color": "orange", "size": 5},
        name="新建基站-1",
    )
    trace4 = go.Scatter(
        mode='markers', 
        x=add_sites_data.iloc[:, 0], 
        y=add_sites_data.iloc[:, 1], 
        marker={"color": "red", "size": 6},
        name="新建基站-2",
    )

    data = [trace1, trace2, trace3]
    fig = go.Figure(data=data)
    fig.write_html(IMG_HTML / "question2-3kinds_sites.html")
    fig.write_image(IMG_PNG / "question2-3kinds_sites.png")
    fig.write_image(IMG_SVG / "question2-3kinds_sites.svg")
#     fig.show()
    del fig
    
    data = [trace1, trace2, trace3, trace4]
    fig = go.Figure(data=data)
    fig.write_html(IMG_HTML / "question2-4kinds_sites.html")
    fig.write_image(IMG_PNG / "question2-4kinds_sites.png")
    fig.write_image(IMG_SVG / "question2-4kinds_sites.svg")
#     fig.show()
    del fig
    return None


plot_4kinds_sites()

	0	1	2	3
0	66.0	1486.0	140.581390	0.0
1	67.0	1486.0	140.518829	0.0
2	177.0	1486.0	48.919178	0.0
3	187.0	1486.0	4.322495	0.0
4	284.0	1486.0	71.528404	1.0
...	...	...	...	...
182802	2350.0	2123.0	0.178571	0.0
182803	2353.0	2123.0	5.159708	0.0
182804	2354.0	2123.0	5.134017	0.0
182805	2355.0	2123.0	2.599999	0.0
182806	2372.0	2123.0	57.814999	0.0

	0	1	2	3
103182	844.0	1962.0	47795.011719	0.0
96789	1356.0	2271.0	43295.000000	0.0
110362	869.0	2292.0	32200.953125	0.0
57442	881.0	1256.0	24688.421875	0.0
107373	1096.0	1658.0	23715.160156	0.0
...	...	...	...	...
179506	2212.0	2423.0	0.000192	0.0
181329	2195.0	2433.0	0.000192	0.0
179502	2208.0	2423.0	0.000192	0.0
11838	2223.0	2460.0	0.000192	0.0
154838	2059.0	2358.0	0.000192	0.0

	0	1	2	3	labels
103182	844.0	1962.0	47795.011719	0.0	24
96789	1356.0	2271.0	43295.000000	0.0	61
110362	869.0	2292.0	32200.953125	0.0	24
57442	881.0	1256.0	24688.421875	0.0	1
107373	1096.0	1658.0	23715.160156	0.0	60
...	...	...	...	...	...
179506	2212.0	2423.0	0.000192	0.0	45
181329	2195.0	2433.0	0.000192	0.0	45
179502	2208.0	2423.0	0.000192	0.0	45
11838	2223.0	2460.0	0.000192	0.0	45
154838	2059.0	2358.0	0.000192	0.0	89

	0	1	2	3	labels
103182	844.0	1962.0	47795.011719	0.0	24
96789	1356.0	2271.0	43295.000000	0.0	61
110362	869.0	2292.0	32200.953125	0.0	24
57442	881.0	1256.0	24688.421875	0.0	1
107373	1096.0	1658.0	23715.160156	0.0	60
...	...	...	...	...	...
179506	2212.0	2423.0	0.000192	0.0	45
181329	2195.0	2433.0	0.000192	0.0	45
179502	2208.0	2423.0	0.000192	0.0	45
11838	2223.0	2460.0	0.000192	0.0	45
154838	2059.0	2358.0	0.000192	0.0	89

	0	1	2	3	4	5
0	844	1962	2.941118	3.542697	5.843110	1
1	1356	2271	2.985915	5.858931	1.994340	2
2	869	2292	3.323346	0.426135	2.267055	1
3	881	1256	4.109999	0.009277	3.834270	2
4	1096	1658	0.579936	2.522800	3.615235	1
5	683	2198	6.118514	3.457958	4.685333	2
6	865	2012	2.266906	2.992941	5.307013	2
7	1335	2206	4.453241	6.275982	1.811592	1
8	1357	1086	5.313147	0.607825	2.838493	2
9	675	1957	5.664622	1.837217	3.970463	1
10	1019	1593	0.152704	2.747127	4.986251	2
11	935	1665	0.874917	4.206048	4.716827	1
12	1489	1228	2.727830	5.605429	0.155014	2
13	699	2014	2.525903	6.121284	1.445451	1
14	1172	1797	4.327217	5.576492	1.411652	1
15	1314	1125	0.174048	1.978144	2.504269	2

问题2¶

读数据¶

处理数据¶

计算总业务量¶

kmeans 聚类¶

根据聚类结果和总业务量选择新建基站¶

绘制所有站点散点图¶

	x	y	𝜃1	𝜃2	𝜃3	type
0	932.0	2210.0	1.570796	2.094395	4.188790	2.0
1	972.0	1238.0	1.570796	2.094395	4.188790	1.0
2	1229.0	1894.0	1.570796	2.094395	4.188790	2.0
3	1311.0	2005.0	1.570796	2.094395	4.188790	1.0
4	1611.0	2233.0	1.570796	1.570796	4.188790	2.0
...	...	...	...	...	...	...
783	1171.0	812.0	1.570796	1.570796	1.165905	1.0
784	250.0	1065.0	1.570796	1.152572	1.570796	1.0
785	2353.0	24.0	1.570796	2.094395	4.188790	1.0
786	2422.0	583.0	1.570796	1.012197	1.107149	2.0
787	2249.0	1453.0	-1.107149	-1.438245	-1.107149	2.0

	x	y	traffic
0	66	1486	140.581390
1	67	1486	140.518829
2	177	1486	48.919178
3	187	1486	4.322495
4	284	1486	71.528404
...	...	...	...
182802	2350	2123	0.178571
182803	2353	2123	5.159708
182804	2354	2123	5.134017
182805	2355	2123	2.599999
182806	2372	2123	57.814999