# TODO import
import os
import sys
import hmz
import scipy
# import mitosheet
import numpy as np
import pandas as pd
from copy import copy

# import sympy
# from sympy import limit
# from sympy import diff
# from sympy import integrals

import sklearn
# import graphviz
from sklearn import tree
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error as MSE
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.metrics import classification_report, roc_auc_score

import plotly
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
plotly.offline.init_notebook_mode()

# import chart_studio
# import chart_studio.plotly as py
# from chart_studio.plotly import plot, iplot
# chart_studio.tools.set_credentials_file(username='zhiliao0824', api_key='qrGJtJRojwpsVJ3nosn0')

import cufflinks as cf
cf.set_config_file(
    offline=True, 
    world_readable=True,
    theme='white',  # cf.getThemes()
)

import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['KaiTi']  # SimHei KaiTi
plt.rcParams['axes.unicode_minus'] = False

import cv2 as cv

# import torch
# import torchvision
# import torch.nn as nn
# import torch.nn.functional as F
# import torch.utils.data as Data
# from torch.utils.data import DataLoader
# from torch.utils.data.dataset import Dataset

# import pylatex
# import latexify

import warnings
warnings.filterwarnings("ignore")

from IPython.core.interactiveshell import InteractiveShell 
# InteractiveShell.ast_node_interactivity = 'all'
# InteractiveShell.ast_node_interactivity = 'last'


def concert_data(
    data,
    orginal_type=["int64", "float64"],
    convert_type=["int32", "float32"],
    category_proportion=0.5,
):
    """转换数据的数据类型，减少其内存空间
    :param data: 待转换数据类型的数据
    """
    # number
    for type_i in range(len(orginal_type)):
        data_converted = data.select_dtypes(
            include=[orginal_type[type_i]], 
        ).astype(convert_type[type_i])
        data[data_converted.columns] = data_converted
    # string
    data_string = data.select_dtypes(include=["object"]).copy()
    for column in data_string.columns:
        unique_num = len(data_string[column].unique())
        total_num = len(data_string[column])
        if unique_num / total_num < category_proportion:
            data.loc[:, column] = data_string[column].astype('category')
    return data


InteractiveShell.ast_node_interactivity = 'all'
sheet_name = ["datat1", "data2", "data3"]

data1 = pd.read_excel('C题数据.xlsx', sheet_name=sheet_name[0])[:-1]
data1_uninternotated = copy(data1[0::2])  # 未插层--插层前
data1_uninternotated.fillna(0, inplace=True)  # NaN
data1_uninternotated.reset_index(drop=True, inplace=True)  # index
data1_uninternotated = concert_data(data1_uninternotated)  # str:object -> str:category, 64 -> 32
data1_uninternotated
data1_uninternotated.info()
data1_uninternotated_unnumbered = copy(data1_uninternotated.drop(columns=['编号']))
data1_uninternotated_unnumbered
data1_uninternotated_unnumbered.info()
data1_interpolated = copy(data1[1::2])  # 插层--插层后
data1_interpolated.reset_index(drop=True, inplace=True)  # index
data1_interpolated = concert_data(data1_interpolated)  # str:object -> str:category, 64 -> 32
data1_interpolated.iloc[:, 0] = data1_uninternotated.iloc[:, 0]  # 组号
data1_interpolated
data1_interpolated.info()
data1_interpolated_unnumbered = copy(data1_interpolated.drop(columns=['编号']))
data1_interpolated_unnumbered
data1_interpolated_unnumbered.info()

data2 = pd.read_excel('C题数据.xlsx', sheet_name=sheet_name[1], index_col=0, header=0)[3:7]
data2.dropna(axis=1, inplace=True)  # NaN
data2 = concert_data(data2)  # 64 -> 32
data2  # index:接收距离(cm), columns:热风速度(r/min)
data2.info()

data3 = pd.read_excel('C题数据.xlsx', sheet_name=sheet_name[2])
data3 = concert_data(data3)  # 64 -> 32
data3
data3.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 9 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   组号         25 non-null     float32 
 1   编号         25 non-null     category
 2   厚度mm       25 non-null     float32 
 3   孔隙率（%）     25 non-null     float32 
 4   压缩回弹性率（%）  25 non-null     float32 
 5   过滤阻力Pa     25 non-null     float32 
 6   过滤效率（%）    25 non-null     float32 
 7   透气性 mm/s   25 non-null     float32 
 8   插层率（%）     25 non-null     float32 
dtypes: category(1), float32(8)
memory usage: 1.0 KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   组号         25 non-null     float32
 1   厚度mm       25 non-null     float32
 2   孔隙率（%）     25 non-null     float32
 3   压缩回弹性率（%）  25 non-null     float32
 4   过滤阻力Pa     25 non-null     float32
 5   过滤效率（%）    25 non-null     float32
 6   透气性 mm/s   25 non-null     float32
 7   插层率（%）     25 non-null     float32
dtypes: float32(8)
memory usage: 928.0 bytes
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 9 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   组号         25 non-null     float32 
 1   编号         25 non-null     category
 2   厚度mm       25 non-null     float32 
 3   孔隙率（%）     25 non-null     float32 
 4   压缩回弹性率（%）  25 non-null     float32 
 5   过滤阻力Pa     25 non-null     float32 
 6   过滤效率（%）    25 non-null     float32 
 7   透气性 mm/s   25 non-null     float32 
 8   插层率（%）     25 non-null     float32 
dtypes: category(1), float32(8)
memory usage: 1.0 KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   组号         25 non-null     float32
 1   厚度mm       25 non-null     float32
 2   孔隙率（%）     25 non-null     float32
 3   压缩回弹性率（%）  25 non-null     float32
 4   过滤阻力Pa     25 non-null     float32
 5   过滤效率（%）    25 non-null     float32
 6   透气性 mm/s   25 non-null     float32
 7   插层率（%）     25 non-null     float32
dtypes: float32(8)
memory usage: 928.0 bytes
<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 20 to 35
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   800     4 non-null      float32
 1   900     4 non-null      float32
 2   1000    4 non-null      float32
 3   1100    4 non-null      float32
 4   1200    4 non-null      float32
dtypes: float32(5)
memory usage: 112.0+ bytes
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 75 entries, 0 to 74
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   接收距离(cm)     75 non-null     int32  
 1   热风速度(r/min)  75 non-null     int32  
 2   厚度mm         75 non-null     float32
 3   孔隙率（%）       75 non-null     float32
 4   压缩回弹性率（%）    75 non-null     float32
 5   过滤阻力Pa       75 non-null     float32
 6   过滤效率（%）      75 non-null     float32
 7   透气性 mm/s     75 non-null     float32
dtypes: float32(6), int32(2)
memory usage: 2.5 KB


# TODO 数据未标准化箱型图

# 插层前
data1_uninternotated_unnumbered.iloc[:, 1:-1].iplot(
    kind='box',
    title='插层前',
    colors=['#A1A9D0', '#F0988C', '#B883D4', '#9E9E9E', '#CFEAF1', '#C4A5DE'], 
    boxpoints='outliers',
)
# 插层后
data1_interpolated_unnumbered.iloc[:, 1:-1].iplot(
    kind='box',
    title='插层后',
    colors=['#A1A9D0', '#F0988C', '#B883D4', '#9E9E9E', '#CFEAF1', '#C4A5DE'], 
    boxpoints='suspectedoutliers',
)


# TODO 数据标准化箱型图
from hmz.math_model.process import Dimensionalize

# 插层前--数据未标准化箱型图
dim = Dimensionalize(data1_uninternotated_unnumbered.iloc[:, 1:-1])
data1_uninternotated_unnumbered_std = dim.fit(method='standard')
data1_uninternotated_unnumbered_std.iplot(
    kind='box',
    title='插层前结构变量与产品性能',
    colors=['#A1A9D0', '#F0988C', '#B883D4', '#9E9E9E', '#CFEAF1', '#C4A5DE', '#F6CAE5'], 
    boxpoints='all',
)
data1_uninternotated_unnumbered_std.figure(
    kind='box',
    title='插层前结构变量与产品性能',
    colors=['#A1A9D0', '#F0988C', '#B883D4', '#9E9E9E', '#CFEAF1', '#C4A5DE', '#F6CAE5'], 
    boxpoints='all',
).write_image('./img/问题1-插层前结构变量与产品性能箱型图.svg')

# 插层后--数据标准化箱型图
dim = Dimensionalize(data1_interpolated_unnumbered.iloc[:, 1:-1])
data1_interpolated_unnumbered_std = dim.fit(method='standard')
data1_interpolated_unnumbered_std.iplot(
    kind='box',
    title='插层后结构变量与产品性能',
    colors=['#A1A9D0', '#F0988C', '#B883D4', '#9E9E9E', '#CFEAF1', '#C4A5DE', '#F6CAE5'], 
    boxpoints='all',
)
data1_interpolated_unnumbered_std.figure(
    kind='box',
    title='插层后结构变量与产品性能',
    colors=['#A1A9D0', '#F0988C', '#B883D4', '#9E9E9E', '#CFEAF1', '#C4A5DE', '#F6CAE5'], 
    boxpoints='all',
).write_image('./img/问题1-插层后结构变量与产品性能箱型图.svg')


InteractiveShell.ast_node_interactivity = 'last'

colors = ["#929fff", "#9de0ff", "#ffa897", "#af87fe", "#7dc3fe", "#bb60b2", 
          "#433e7c", "#f47a75", "#009db2", "#024b51", "#0780cf", "#765005"]

plot_line_data1 = copy(data1_uninternotated_unnumbered.iloc[:, 1:-1])
plot_line_data1.columns = list(map(lambda x: '插层前-' + x, plot_line_data1.columns))
plot_line_data2 = copy(data1_interpolated_unnumbered.iloc[:, 1:-1])
plot_line_data2.columns = list(map(lambda x: '插层后-' + x, plot_line_data2.columns))
plot_line_data = pd.concat([plot_line_data1, plot_line_data2], axis=1)
plot_line_data
plot_line_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 12 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   插层前-厚度mm       25 non-null     float32
 1   插层前-孔隙率（%）     25 non-null     float32
 2   插层前-压缩回弹性率（%）  25 non-null     float32
 3   插层前-过滤阻力Pa     25 non-null     float32
 4   插层前-过滤效率（%）    25 non-null     float32
 5   插层前-透气性 mm/s   25 non-null     float32
 6   插层后-厚度mm       25 non-null     float32
 7   插层后-孔隙率（%）     25 non-null     float32
 8   插层后-压缩回弹性率（%）  25 non-null     float32
 9   插层后-过滤阻力Pa     25 non-null     float32
 10  插层后-过滤效率（%）    25 non-null     float32
 11  插层后-透气性 mm/s   25 non-null     float32
dtypes: float32(12)
memory usage: 1.3 KB


InteractiveShell.ast_node_interactivity = 'last'

fig = plotly.subplots.make_subplots(rows=3, cols=2, subplot_titles=data1_interpolated_unnumbered.columns[1:])
for i in range(6):
    row, col = i // 2 + 1, i % 2 + 1
    trace_pre = go.Scatter(
        x=plot_line_data.index, 
        y=plot_line_data.iloc[:, 0 + i], 
        line=dict(color=colors[2 * i]), 
#         width=4,
        name=plot_line_data.columns[0 + i], 
        legendgroup=data1_interpolated_unnumbered.columns[1:][i],
#         marker=dict(colorscale="RdBu"),
    )
    fig.append_trace(trace_pre, row, col)
    trace_suc = go.Scatter(
        x=plot_line_data.index, 
        y=plot_line_data.iloc[:, 6 + i], 
        line=dict(color=colors[2 * i + 1]), 
#         dash='dash',  # 'dash', 'dot', and 'dashdot'
        name=plot_line_data.columns[6 + i], 
        legendgroup=data1_interpolated_unnumbered.columns[1:][i],
#         marker=dict(colorscale="Viridis"),
    )
    fig.append_trace(trace_suc, row, col)
fig['layout'].update(
    height=800, 
    width=1200, 
    title='插层前后结构变量与产品性能对比',
    template='plotly_white',
)
fig.write_image('./img/问题1-插层前后结构变量与产品性能对比.svg')
fig.show()


# TODO 矩阵散点图&相关系数热力图
def plot_ScatterMatrix_Heatmap(data, fig_pre_name, save=False, height=900, width=1100):
    # 矩阵散点图
    fig = px.scatter_matrix(
        data, 
        title=fig_pre_name + '的矩阵散点图', 
    )
    if save:
        fig.write_image('./img/问题1-' + fig_pre_name + '的矩阵散点图.svg', height=900, width=1100)
    fig.show()

    # 相关系数热力图
    corrs = data.corr(method='pearson')  # 'pearson', 'kendall', 'spearman'
    figure = ff.create_annotated_heatmap(
        z=corrs.values,
        x=list(corrs.columns),
        y=list(corrs.index), 
        annotation_text=corrs.round(3).values, 
        showscale=True, 
        colorscale='blues', 
    )
    figure.update_layout(title=fig_pre_name + '的相关系数热力图')
    if save:
        figure.write_image('./img/问题1-' + fig_pre_name + '的相关系数热力图.svg', height=650, width=800)
    figure.show()
    return None
save = True
plot_ScatterMatrix_Heatmap(data1_uninternotated_unnumbered_std, '插层前结构变量、产品性能', save=save)
plot_ScatterMatrix_Heatmap(data1_interpolated_unnumbered_std, '插层后结构变量、产品性能', save=save)


plot_ScatterMatrix_Heatmap(
    data1_interpolated_unnumbered.iloc[:, 1:] - data1_uninternotated_unnumbered.iloc[:, 1:], 
    '插层前后结构变量、产品性能差值',
    save=True,
)


from hmz.math_model.evaluate import GRA

InteractiveShell.ast_node_interactivity = 'all'

gra_data = data1_interpolated_unnumbered.iloc[:, 1:] - data1_uninternotated_unnumbered.iloc[:, 1:]
columns = list(gra_data.columns)
gra_data = gra_data.loc[:, columns[-1:] + columns[:-1]]
gra_data
gra_data.info()

gra = GRA(gra_data)
gra_score = gra.score()
gamma = pd.DataFrame(gra.gamma(), index=list(gra_data.columns[1:]), columns=['灰色关联度']).T
gamma
gamma.to_csv('./灰色关联度.csv')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   插层率（%）     25 non-null     float32
 1   厚度mm       25 non-null     float32
 2   孔隙率（%）     25 non-null     float32
 3   压缩回弹性率（%）  25 non-null     float32
 4   过滤阻力Pa     25 non-null     float32
 5   过滤效率（%）    25 non-null     float32
 6   透气性 mm/s   25 non-null     float32
dtypes: float32(7)
memory usage: 828.0 bytes
灰色关联度:
[0.8959108  0.8476055  0.7327718  0.8260256  0.81745905 0.7853022 ]

灰色关联系数:
        厚度mm    孔隙率（%）  压缩回弹性率（%）    过滤阻力Pa   过滤效率（%）  透气性 mm/s
0   0.576020  0.796022   0.835506  1.475805  0.539845  1.672746
1   0.081890  0.367839   1.015504  0.492871  0.526189  1.248016
2   0.044790  0.559471   0.334995  1.056592  0.739672  0.312419
3   0.357915  0.205906   0.858976  0.254423  0.696714  0.885537
4   0.124973  0.707635   2.756264  0.726668  0.989451  0.567735
5   0.143453  0.082528   0.737080  0.373357  0.996039  1.062008
6   0.103448  0.060354   1.874436  0.838196  1.023852  1.361789
7   0.540669  0.159046   0.967954  0.041888  0.840206  0.160957
8   0.013843  0.216058   1.575688  0.199313  0.092507  0.008802
9   0.220045  0.035701   1.199816  0.665509  0.513998  0.033619
10  0.766514  1.115243   3.173759  0.775920  0.469974  2.031568
11  0.580944  0.442825   0.389211  0.175319  0.535077  1.325751
12  0.566117  0.550143   1.119752  2.088241  0.101250  0.870792
13  0.027254  0.204327   0.496543  0.153469  0.287851  0.652056
14  0.980026  0.856734   1.601493  1.408600  0.500449  0.093232
15  0.047278  0.560338   5.564355  0.107109  0.198861  0.701269
16  0.402163  0.985698   1.011925  0.038035  0.526750  0.859836
17  2.015413  1.954904   3.102322  1.622404  1.564539  1.809826
18  0.249025  0.440073   0.884174  0.595368  1.846941  1.225723
19  0.033029  0.341657   1.085884  0.349513  0.872984  0.789899
20  0.307778  1.436262   0.376130  0.794689  0.087483  0.370383
21  0.096979  0.851340   0.140334  0.594132  0.365314  0.535028
22  0.064413  0.694168   0.294091  0.516173  0.651408  0.460421
23  0.661943  0.133899   0.174712  0.509735  1.080606  1.062489
24  0.456667  0.339848   0.317768  0.766285  0.948687  1.173318

权重:
           厚度mm    孔隙率（%）  压缩回弹性率（%）    过滤阻力Pa   过滤效率（%）  透气性 mm/s
weight  0.18265  0.172802   0.149391  0.168402  0.166656    0.1601

最终得分:
    final score
0      0.973884
1      0.600727
2      0.506125
3      0.530008
4      0.935031
5      0.549473
6      0.839154
7      0.443688
8      0.325647
9      0.428718
10     1.341093
11     0.571725
12     0.873699
13     0.292675
14     0.901835
15     1.100177
16     0.626808
17     1.993092
18     0.857920
19     0.558102
20     0.568300
21     0.432384
22     0.444852
23     0.606176
24     0.664604

灰色关联度:
[0.8959108  0.8476055  0.7327718  0.8260256  0.81745905 0.7853022 ]


data1_interpolated_unnumbered.iloc[:, 1:].sort_values(by='插层率（%）').iplot(x='插层率（%）')
# data1_interpolated_unnumbered.iloc[:, 1:].sort_values(by='插层率（%）').iplot(x='插层率（%）', y='厚度mm')
# data1_interpolated_unnumbered.iloc[:, 1:].sort_values(by='插层率（%）').iplot(x='插层率（%）', y='孔隙率（%）')
# data1_interpolated_unnumbered.iloc[:, 1:].sort_values(by='插层率（%）').iplot(x='插层率（%）', y='压缩回弹性率（%）')
# data1_interpolated_unnumbered.iloc[:, 1:].sort_values(by='插层率（%）').iplot(x='插层率（%）', y='过滤阻力Pa')
# data1_interpolated_unnumbered.iloc[:, 1:].sort_values(by='插层率（%）').iplot(x='插层率（%）', y='过滤效率（%）')
# data1_interpolated_unnumbered.iloc[:, 1:].sort_values(by='插层率（%）').iplot(x='插层率（%）', y='透气性 mm/s')


InteractiveShell.ast_node_interactivity = 'all'

data3_data2 = copy(data3.iloc[:, :2].drop_duplicates())
data2_data1 = data3_data2.reset_index(drop=True, inplace=False)  # index

def add_specf_df(data, is_numbered, specf_df=data2_data1):
    data = pd.concat([data, specf_df], axis=1)
    columns = list(data.columns)
    i = 2 if is_numbered else 1
    columns = copy(columns[:i] + columns[-2:] + columns[i:-2])
    data = data[columns]
    return data

def update_df(df1, df2, df3, df4):
    df1 = add_specf_df(df1, is_numbered=True)
    df2 = add_specf_df(df2, is_numbered=False)
    df3 = add_specf_df(df3, is_numbered=True)
    df4 = add_specf_df(df4, is_numbered=False)
    return df1, df2, df3, df4

data1_uninternotated, data1_uninternotated_unnumbered, data1_interpolated, data1_interpolated_unnumbered = \
    update_df(data1_uninternotated, data1_uninternotated_unnumbered, 
              data1_interpolated, data1_interpolated_unnumbered)
data1_uninternotated
data1_uninternotated.info()
data1_uninternotated_unnumbered
data1_uninternotated_unnumbered.info()
data1_interpolated
data1_interpolated.info()
data1_interpolated_unnumbered
data1_interpolated_unnumbered.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   组号           25 non-null     float32 
 1   编号           25 non-null     category
 2   接收距离(cm)     25 non-null     int32   
 3   热风速度(r/min)  25 non-null     int32   
 4   厚度mm         25 non-null     float32 
 5   孔隙率（%）       25 non-null     float32 
 6   压缩回弹性率（%）    25 non-null     float32 
 7   过滤阻力Pa       25 non-null     float32 
 8   过滤效率（%）      25 non-null     float32 
 9   透气性 mm/s     25 non-null     float32 
 10  插层率（%）       25 non-null     float32 
dtypes: category(1), float32(8), int32(2)
memory usage: 1.2 KB

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   组号           25 non-null     float32
 1   接收距离(cm)     25 non-null     int32  
 2   热风速度(r/min)  25 non-null     int32  
 3   厚度mm         25 non-null     float32
 4   孔隙率（%）       25 non-null     float32
 5   压缩回弹性率（%）    25 non-null     float32
 6   过滤阻力Pa       25 non-null     float32
 7   过滤效率（%）      25 non-null     float32
 8   透气性 mm/s     25 non-null     float32
 9   插层率（%）       25 non-null     float32
dtypes: float32(8), int32(2)
memory usage: 1.1 KB

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   组号           25 non-null     float32 
 1   编号           25 non-null     category
 2   接收距离(cm)     25 non-null     int32   
 3   热风速度(r/min)  25 non-null     int32   
 4   厚度mm         25 non-null     float32 
 5   孔隙率（%）       25 non-null     float32 
 6   压缩回弹性率（%）    25 non-null     float32 
 7   过滤阻力Pa       25 non-null     float32 
 8   过滤效率（%）      25 non-null     float32 
 9   透气性 mm/s     25 non-null     float32 
 10  插层率（%）       25 non-null     float32 
dtypes: category(1), float32(8), int32(2)
memory usage: 1.2 KB

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   组号           25 non-null     float32
 1   接收距离(cm)     25 non-null     int32  
 2   热风速度(r/min)  25 non-null     int32  
 3   厚度mm         25 non-null     float32
 4   孔隙率（%）       25 non-null     float32
 5   压缩回弹性率（%）    25 non-null     float32
 6   过滤阻力Pa       25 non-null     float32
 7   过滤效率（%）      25 non-null     float32
 8   透气性 mm/s     25 non-null     float32
 9   插层率（%）       25 non-null     float32
dtypes: float32(8), int32(2)
memory usage: 1.1 KB


from sklearn.linear_model import LinearRegression       # 线性回归
from sklearn.linear_model import Lasso, LassoCV         # 岭回归
from sklearn.linear_model import Ridge, RidgeCV         # LASSO回归
from sklearn.neighbors import KNeighborsRegressor       # KNN回归
from sklearn.neural_network import MLPRegressor         # 多层感知机回归
from sklearn.svm import SVR                             # SVM回归
from sklearn.tree import DecisionTreeRegressor          # 决策树回归
from sklearn.tree import ExtraTreeRegressor             # 极限树回归
from sklearn.ensemble import RandomForestRegressor      # 随机森林回归
from sklearn.ensemble import AdaBoostRegressor          # AdaBoost回归
from sklearn.ensemble import GradientBoostingRegressor  # 梯度提升回归
from sklearn.ensemble import BaggingRegressor           # Bagging回归

from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingRegressor

from catboost import CatBoostRegressor
from xgboost import XGBRegressor, XGBRFRegressor
from lightgbm import LGBMRegressor

import joblib

from hmz.math_model.predict import predict_accuracy


# from sklearn.preprocessing import MinMaxScaler 

XY = copy(data3.iloc[:, :5])
X = np.array(XY.iloc[:, :2])
Y = np.array(XY.iloc[:, 2:])

# mms = MinMaxScaler()
# X = mms.fit_transform(X)
# Y = mms.fit_transform(Y)

colors = ["red", "lightpink", "darkorange", "khaki", "green", "lightgreen", "blue", "lightblue"]
colors = ["red", "lightpink", "green", "lightgreen", "blue", "lightblue"]

xpred = np.array([
    [38, 33, 28, 23, 38, 33, 28, 23],
    [850, 950, 1150, 1250, 1250, 1150, 950, 850],
]).T

def score(model, X, Y, xtrain, xtest, ytrain, ytest, Ypred):
    accuracy = predict_accuracy(Y, Ypred, type=1, th=0.005, con=0.8)
    print("准确率：", accuracy)
#     print("训练集分数：R^2 =", model.score(xtrain, ytrain))  # 负值: 拟合效果糟糕，模型完全不能使用
#     print("测试集分数：R^2 =", model.score(xtest, ytest))  # -> 1: 拟合效果很好，模型可以使用
#     print(r2_score(model.predict(xtrain), ytrain))
#     print(r2_score(model.predict(xtest), ytest))
#     scores = cross_val_score(model, X, Y, cv=10, scoring="r2")
#     print(scores.mean())
    return None

models = [
    LinearRegression(),
    Lasso(),
    Ridge(),
    KNeighborsRegressor(),
    MLPRegressor(),
    SVR(kernel='rbf', degree=200, gamma='auto'),  # 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'; ``'scale', 'auto'
    DecisionTreeRegressor(),
    ExtraTreeRegressor(),
    RandomForestRegressor(),
    AdaBoostRegressor(),
    GradientBoostingRegressor(),
    BaggingRegressor(),
    HistGradientBoostingRegressor(), 
    CatBoostRegressor(verbose=0), 
    XGBRegressor(), 
    XGBRFRegressor(), 
    LGBMRegressor(), 
]
models_name = [
    "线性回归",
    "LASSO套索回归",
    "Ridge岭回归",
    "KNN回归",
    "多层感知机回归",
    "SVM回归",
    "决策树回归",
    "极限树回归",
    "随机森林回归",
    "AdaBoost回归",
    "梯度提升回归",
    "Bagging回归",
    "基于直方图的梯度提升回归",
    "CatBoost回归",
    "XGBoost回归",
    "XGBoostRF回归",
    "LightGBM回归",
]
models_select = {
    "线性回归": False,
    "LASSO套索回归": False,
    "Ridge岭回归": False,
    "KNN回归": False,
    "多层感知机回归": False,
    "SVM回归": False,
    "决策树回归": False,
    "极限树回归": True,  # 1
    "随机森林回归": True,  # 2
    "AdaBoost回归": False,
    "梯度提升回归": False,
    "Bagging回归": False,
    "基于直方图的梯度提升回归": False,
    "CatBoost回归": False,
    "XGBoost回归": True,  # 0
    "XGBoostRF回归": False,
    "LightGBM回归": False,
}
saved_models_name = ['xgb', 'et', 'rf']

def run_Regressor_models(
    x=xpred, X=X, Y=Y, colors=colors, plot_fig=False, save_fig=False, 
    models=models, models_name=models_name, models_select=models_select,
    saved_models_name=saved_models_name, save_model=False,
):
    """
    :param x: np.ndarray
    :param X: np.ndarray
    :param Y: np.ndarray
    XGBoost(), ET(), RF()
    """
    xtrain, xtest, ytrain, ytest = train_test_split(X, Y, test_size=0.3)
    ypreds_s = []
    for model, model_name, model_idx in zip(models, models_name, range(len(models_name))):
#         print(models_select[model_name])
        if not models_select[model_name]:
            continue
        print("模型：", model_name)
        ypreds = []
        ytests = []
        for i in range(Y.shape[1]):
            # 训练
            model.fit(xtrain, ytrain[:, i])
            # 测试
            ytest_ = model.predict(X)
            score(model, X, Y[:, i], xtrain, xtest, ytrain[:, i], ytest[:, i], ytest_)
            ytests.append(list(ytest_))
            # 预测
            ypred = model.predict(x)
            ypreds.append(list(ypred))
            # 保存
            if save_model:
                if model_name == "XGBoost回归" and i == 0:
                    joblib.dump(model, './saved_model/pre0_xgb.pkl')
                elif model_name == "极限树回归" and i == 1:
                    joblib.dump(model, './saved_model/pre1_et.pkl')
                elif model_name == "随机森林回归" and i == 2:
                    joblib.dump(model, './saved_model/pre2_rf.pkl')
        ypreds_s.append(ypreds)
        
        if plot_fig:
            # 画图
            ytests = pd.DataFrame(ytests, index=list(XY.columns[2:5])).T
            ytests.columns = map(lambda x: x + '-预测值', ytests.columns)
            ytesttemp = pd.DataFrame(Y, columns=list(XY.columns[2:5]))
            ytesttemp.columns = map(lambda x: x + '-真实值', ytesttemp.columns)
            yplot = pd.concat([ytesttemp, ytests], axis=1)
    #         print(yplot.shape)
    #         print(yplot.head())
    #         print(ypreds.shape)
    #         print(ypreds.head())
    #         print(ytest_temp.shape)
    #         print(ytest_temp.head())
            
            title = '基于' + model_name + '的预测模型'
            yplot.iplot(
                color=colors[0::2] + colors[1::2],
                title=title,
            )
            if save_fig:
                yplot.figure(
                    color=colors[0::2] + colors[1::2],
                    title=title,
                ).write_image("./img/问题2-" + title + ".svg")

            yplot.columns = ['真实值'] * 3 + ['预测值'] * 3
            columns = list(XY.columns[2:5])
            for i in range(3):
                title = '基于' + model_name + '的预测模型——' + columns[i]
                yplot.iloc[:, i::3].iplot(
                    kind='spread',
                    color=[colors[i * 2 + 1], colors[i * 2]],
                    title=title,
                )
                if save_fig:
                    yplot.iloc[:, i::3].figure(
                        kind='spread',
                        color=[colors[i * 2 + 1], colors[i * 2]],
                        title=title,
                    ).write_image("./img/问题2-" + title + ".svg")
        
        print('-'*125)
    return ypreds_s
all_models_ypreds_q2 = np.array(run_Regressor_models(plot_fig=True, save_fig=True, save_model=True))
all_models_ypreds_q2

模型： 极限树回归
准确率： 0.9676401773238965
准确率： 0.9249176664432979
准确率： 0.9249265354576991

-----------------------------------------------------------------------------------------------------------------------------
模型： 随机森林回归
准确率： 0.6038533538344204
准确率： 0.935574843455885
准确率： 0.9142364183417401

-----------------------------------------------------------------------------------------------------------------------------
模型： XGBoost回归
准确率： 0.9676220567263663
准确率： 0.9355845125522465
准确率： 0.9249269273051371

-----------------------------------------------------------------------------------------------------------------------------

array([[[ 2.76130402,  2.39697003,  2.77314377,  2.40274191,
          3.15791488,  2.77314377,  2.39697003,  1.98269733],
        [96.31589127, 96.51655833, 96.37236786, 95.9524231 ,
         96.80519867, 96.39583969, 95.08143107, 94.77352142],
        [84.70593262, 87.12225342, 86.48152542, 85.13269043,
         83.70683289, 85.77477264, 88.2532196 , 86.91458639]],

       [[ 2.7823657 ,  2.71739246,  2.7690421 ,  2.59206242,
          3.50957404,  3.08188874,  2.38730479,  1.90849573],
        [96.14445255, 96.32734538, 96.45573221, 96.01032563,
         96.54976737, 96.43939534, 95.66271658, 94.62646967],
        [84.76908995, 87.14407931, 87.36721671, 86.32856071,
         83.96126417, 87.01517197, 88.23864825, 87.66418777]],

       [[ 3.0490427 ,  2.92080164,  2.85151792,  2.61077833,
          3.5192771 ,  3.1576283 ,  2.61824155,  2.16034007],
        [96.66072083, 96.51689911, 96.34745789, 95.95244598,
         96.51647186, 96.80378723, 96.30002594, 95.08094788],
        [85.32053375, 87.29962158, 86.48087311, 86.35616302,
         83.70688629, 85.77590942, 88.41117096, 87.93922424]]])


# TODO save answer
writer = pd.ExcelWriter('question2-answer.xlsx')
sheet_names = ["极限树回归", "随机森林回归", "XGBoost回归"]
for i in range(3):
    data_temp = pd.DataFrame(all_models_ypreds_q2[i], index=list(XY.columns)[-3:]).T
    data_temp.to_excel(writer, sheet_names[i])
writer.save()
writer.close()


from hmz.math_model.evaluate import PP, PPR


XY = copy(data3.iloc[:, 2:])
X1 = XY.iloc[:, :3]
X2= XY.iloc[:, 3:]

pp = PP(X1)
pp.forward(
    columns=[0, 1],
    process_type=[1, 1],
    process_values=[None, None],
)
weight, _ = pp.weight()
print(weight)  # [ 0.02938399  0.99932456 -0.02206817]

pp = PP(X2)
pp.forward(
    columns=[0, 1],
    process_type=[1, 1],
    process_values=[None, None],
)
weight, _ = pp.weight()
print(weight)  # [-0.66214501 -0.16667322  0.73060524]

0
正向化处理后的矩阵: 
       厚度mm     孔隙率（%）  压缩回弹性率（%）
0   2.76601  96.204430  84.763268
1   2.75493  95.879204  84.969391
2   2.75659  95.965271  84.648598
3   3.05117  96.114388  85.315208
4   3.05148  96.924713  85.365067
..      ...        ...        ...
70  2.32876  95.012779  86.629562
71  2.32818  95.307404  85.896751
72  2.42207  96.046860  85.250839
73  2.40229  95.588753  85.144913
74  2.40319  95.913101  85.120468

[75 rows x 3 columns]
标准化处理后的矩阵: 
       厚度mm   孔隙率（%）  压缩回弹性率（%）
0   0.12057  0.11586    0.11300
1   0.12009  0.11547    0.11327
2   0.12016  0.11557    0.11285
3   0.13300  0.11575    0.11374
4   0.13302  0.11673    0.11380
..      ...      ...        ...
70  0.10151  0.11442    0.11549
71  0.10149  0.11478    0.11451
72  0.10558  0.11567    0.11365
73  0.10472  0.11512    0.11351
74  0.10476  0.11551    0.11348

[75 rows x 3 columns]

[ 0.83322882  0.5446984  -0.09504405]
0
正向化处理后的矩阵: 
       过滤阻力Pa    过滤效率（%）    透气性 mm/s
0   24.756050  48.211700  654.149170
1   26.008209  51.443321  609.854614
2   26.222300  49.679939  635.112183
3   27.167860  48.599380  521.793030
4   25.323790  45.963821  553.848633
..        ...        ...         ...
70  28.862761  80.012337  206.124680
71  30.559219  81.993637  206.336365
72  28.296640  83.095757  209.181519
73  27.499001  83.201111  208.566986
74  29.158541  82.628143  210.166260

[75 rows x 3 columns]
标准化处理后的矩阵: 
     过滤阻力Pa  过滤效率（%）  透气性 mm/s
0   0.09770  0.10481   0.17094
1   0.10264  0.11183   0.15936
2   0.10348  0.10800   0.16596
3   0.10722  0.10565   0.13635
4   0.09994  0.09992   0.14473
..      ...      ...       ...
70  0.11391  0.17394   0.05386
71  0.12060  0.17824   0.05392
72  0.11167  0.18064   0.05466
73  0.10852  0.18087   0.05450
74  0.11507  0.17962   0.05492

[75 rows x 3 columns]

[-0.29524646 -0.75424891  0.58646237]


from sklearn.linear_model import LinearRegression       # 线性回归
from sklearn.linear_model import Lasso, LassoCV         # 岭回归
from sklearn.linear_model import Ridge, RidgeCV         # LASSO回归
from sklearn.neighbors import KNeighborsRegressor       # KNN回归
from sklearn.neural_network import MLPRegressor         # 多层感知机回归
from sklearn.svm import SVR                             # SVM回归
from sklearn.tree import DecisionTreeRegressor          # 决策树回归
from sklearn.tree import ExtraTreeRegressor             # 极限树回归
from sklearn.ensemble import RandomForestRegressor      # 随机森林回归
from sklearn.ensemble import AdaBoostRegressor          # AdaBoost回归
from sklearn.ensemble import GradientBoostingRegressor  # 梯度提升回归
from sklearn.ensemble import BaggingRegressor           # Bagging回归

from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingRegressor

from catboost import CatBoostRegressor
from xgboost import XGBRegressor, XGBRFRegressor
from lightgbm import LGBMRegressor

from hmz.math_model.predict import predict_accuracy


XY = copy(data3.iloc[:, 2:])
X = np.array(XY.iloc[:, :3])
Y = np.array(XY.iloc[:, 3:])

colors = ["red", "lightpink", "darkorange", "khaki", "green", "lightgreen", "blue", "lightblue"]
colors = ["red", "lightpink", "green", "lightgreen", "blue", "lightblue"]

xpred = np.array([
    [38, 33, 28, 23, 38, 33, 28, 23],
    [850, 950, 1150, 1250, 1250, 1150, 950, 850],
]).T

def score(model, X, Y, xtrain, xtest, ytrain, ytest, Ypred):
    accuracy = predict_accuracy(Y, Ypred, type=1, th=0.05, con=0.8)
    print("准确率：", accuracy)
#     print("训练集分数：R^2 =", model.score(xtrain, ytrain))  # 负值: 拟合效果糟糕，模型完全不能使用
#     print("测试集分数：R^2 =", model.score(xtest, ytest))  # -> 1: 拟合效果很好，模型可以使用
#     print(r2_score(model.predict(xtrain), ytrain))
#     print(r2_score(model.predict(xtest), ytest))
#     scores = cross_val_score(model, X, Y, cv=10, scoring="r2")
#     print(scores.mean())
    return None

models = [
    LinearRegression(),
    Lasso(),
    Ridge(),
    KNeighborsRegressor(),
    MLPRegressor(),
    SVR(kernel='rbf', degree=100, gamma='auto'),  # 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'; ``'scale', 'auto'
    DecisionTreeRegressor(),
    ExtraTreeRegressor(),
    RandomForestRegressor(),
    AdaBoostRegressor(),
    GradientBoostingRegressor(),
    BaggingRegressor(),
    HistGradientBoostingRegressor(), 
    CatBoostRegressor(verbose=0), 
    XGBRegressor(), 
    XGBRFRegressor(), 
    LGBMRegressor(), 
]
models_name = [
    "线性回归",
    "LASSO套索回归",
    "Ridge岭回归",
    "KNN回归",
    "多层感知机回归",
    "SVM回归",
    "决策树回归",
    "极限树回归",
    "随机森林回归",
    "AdaBoost回归",
    "梯度提升回归",
    "Bagging回归",
    "基于直方图的梯度提升回归",
    "CatBoost回归",
    "XGBoost回归",
    "XGBoostRF回归",
    "LightGBM回归",
]
models_select = {
    "线性回归": False,
    "LASSO套索回归": False,
    "Ridge岭回归": False,
    "KNN回归": False,
    "多层感知机回归": False,
    "SVM回归": False,
    "决策树回归": False,
    "极限树回归": False,
    "随机森林回归": False,
    "AdaBoost回归": False,
    "梯度提升回归": False,
    "Bagging回归": False,
    "基于直方图的梯度提升回归": False,
    "CatBoost回归": False,
    "XGBoost回归": True,
    "XGBoostRF回归": False,
    "LightGBM回归": False,
}
def run_Regressor_models(
    x=xpred, X=X, Y=Y, colors=colors, plot_fig=False, save_fig=False, 
    models=models, models_name=models_name, models_select=models_select,
    save_model=False,
):
    """
    :param x: np.ndarray
    :param X: np.ndarray
    :param Y: np.ndarray
    """
    xtrain, xtest, ytrain, ytest = train_test_split(X, Y, test_size=0.3)
    ypreds_s = []
    for model, model_name, model_select in zip(models, models_name, models_select.keys()):
        if not models_select[model_select]:
            continue
        print("模型：", model_name)
        ytests = []
        ypreds = []
        for i in range(Y.shape[1]):
            # 训练
            model.fit(xtrain, ytrain[:, i])
            # 预测
            ytest_ = model.predict(X)
            score(model, X, Y[:, i], xtrain, xtest, ytrain[:, i], ytest[:, i], ytest_)
            ytests.append(list(ytest_))
#             # 预测
#             ypred = model.predict(x)
#             ypreds.append(list(ypred))
            # 保存
            if i == 0:
                joblib.dump(model, './saved_model/suc0_xgb.pkl')
            elif i == 1:
                joblib.dump(model, './saved_model/suc1_xgb.pkl')
            elif i == 2:
                joblib.dump(model, './saved_model/suc2_xgb.pkl')
#         ypreds_s.append(ypreds)
        
        if plot_fig:
            columns = list(XY.columns[-3:])
            ytests = pd.DataFrame(ytests, index=columns).T
            ytests.columns = map(lambda x: x + '-预测值', ytests.columns)
            ytesttemp = pd.DataFrame(Y, columns=columns)
            ytesttemp.columns = map(lambda x: x + '-真实值', ytesttemp.columns)
            yplot = pd.concat([ytesttemp, ytests], axis=1)
            
            # 画图1
            title = '基于' + model_name + '的预测模型'
            yplot.iplot(
                color=colors[0::2] + colors[1::2],
                title=title,
            )
            if save_fig:
                yplot.figure(
                    color=colors[0::2] + colors[1::2],
                    title='基于' + model_name + '的预测模型',
                ).write_image("./img/问题3-" + title + ".svg")
            # 画图2
            yplot.columns = ['真实值'] * 3 + ['预测值'] * 3
            columns = list(XY.columns[-3:])
            columns[-1] = '透气性 mm／s'
            for i in range(3):
                title = '基于' + model_name + '的预测模型——' + columns[i]
                yplot.iloc[:, i::3].iplot(
                    kind='spread',
                    color=[colors[i * 2 + 1], colors[i * 2]],
                    title=title,
                )
                if save_fig:
                    yplot.iloc[:, i::3].figure(
                        kind='spread',
                        color=[colors[i * 2 + 1], colors[i * 2]],
                        title=title,
                    ).write_image("./img/问题3-" + title + ".svg")
        print('-'*125)
    return ypreds_s
all_models_ypreds_q3 = np.array(run_Regressor_models(plot_fig=True, save_fig=True, save_model=True))
all_models_ypreds_q3

模型： XGBoost回归
准确率： 0.8535077590545018
准确率： 0.8858960922559103
准确率： 0.8548131998678048

-----------------------------------------------------------------------------------------------------------------------------

array([], dtype=float64)


from numba import jit
from sko.GA import GA
from time import time
from scipy.optimize import minimize
from hmz.kinky_tricks.kinky_tricks import FORE

jieshoujuli_min, refengsudu_min = data3.min()[:2]
jieshoujuli_max, refengsudu_max = data3.max()[:2]
best_model1 = XGBRegressor()
best_model2 = XGBRegressor()

model_pre1, model_pre2, model_pre3 = joblib.load('./saved_model/pre0_xgb.pkl', 'r'), \
    joblib.load('./saved_model/pre1_et.pkl', 'r'), joblib.load('./saved_model/pre2_rf.pkl', 'r')
model_suc1, model_suc2, model_suc3 = joblib.load('./saved_model/suc0_xgb.pkl'), \
    joblib.load('./saved_model/suc1_xgb.pkl', 'r'), joblib.load('./saved_model/suc2_xgb.pkl', 'r')
model1s = [model_pre1, model_pre2, model_pre3]
model2s = [model_suc1, model_suc2, model_suc3]

def run_2part_models(
    X, model1s=model1s, model2s=model2s,
#     colors=colors, plot_fig=False, save_fig=False, 
#     models=models, models_name=models_name, models_select=models_select,
):
    """
    :param model1s: 第一部分的模型
    :param model2s: 第二部分的模型
    :param X: np.ndarray (n, 2) 输入数据
    :return: Y np.ndarray (n, 3) 输出数据
    """
    YasX = []
    for model1 in model1s:
        if isinstance(model1, XGBRegressor):
            y1pred = model1.predict(X, validate_features=False)
        else:
            y1pred = model1.predict(X)
        YasX.append(list(y1pred))
    YasX = np.squeeze(YasX)[None, :]  # (n, 3)
    
    Y = []
    for model2 in model2s:
        if isinstance(model2, XGBRegressor):
            y2pred = model2.predict(YasX, validate_features=False)
        else:
            y2pred = model2.predict(YasX)
        Y.append(list(y2pred))
    Y = np.squeeze(Y)[None, :]  # (n, 3)
    
    return Y

def run_2part_models_func(dis, spd):
    X = np.array([[dis, spd]])  # (n, 2)
    Y = run_2part_models(X)  # (n, 3)
    y = Y[0, 1]
#     print(Y)  # 过滤阻力Pa	过滤效率（%）	透气性 mm/s
#     print(y)  # 过滤效率（%）
#     print(X, round(y, 10))  # check
    return -y

def run_2part_models_fun(x):
    """sko -> scipy"""
    return run_2part_models_func(x[0], x[1])

# @jit
def select_2part_models(type_=0):
    """
    :param type_: 规划类型
        0: sko
        1: scipy
        2: gurobi
        3: linspace
    """
    xbest, ybest = None, None
    if type_ == 0:
        ga = GA(
            run_2part_models_func,
            n_dim=2,  # 接收距离(cm), 热风速度(r/min)
            size_pop=50,
            max_iter=10,
            lb=[jieshoujuli_min, refengsudu_min],
            ub=[jieshoujuli_max, refengsudu_max],
#             constraint_ueq=(lambda x: abs(f((inverter_num, x[0], x[1], x[2])) - NEED) - EPS, ),
        )
        xbest, ybest = ga.run()
    elif type_ == 1:
        opt = minimize(
            run_2part_models_fun, 
            x0=[25., 1100.],
        #     x0=[gate_wide_max, gate_wide_max, gate_len_max - 0.1],
            bounds=((jieshoujuli_min, jieshoujuli_max), (refengsudu_min, refengsudu_max)),
            method='trust-constr',  #  SLSQP trust-constr
            options={'gtol': 1e-9, 'disp': True},
        )
        xbest = opt.x
        ybest = run_2part_models_fun(xbest)
    elif type_ == 2:
        ...
    elif type_ == 3:
        def step_by_epoch(x1l, x1h, x2l, x2h, num=100):
            x1 = np.linspace(x1l, x1h, num)
            x2 = np.linspace(x2l, x2h, num)
            xbest, ybest = [x1l, x2l], 0
            for x1_ in x1:
                for x2_ in x2:
                    yres = -run_2part_models_func(x1_, x2_)
                    if yres > ybest:
                        xbest = [x1_, x2_]
                        ybest = yres
            return xbest, ybest
        xbest, ybest = [0, 0], 0
        epoch = 10
        x1l, x1h, x2l, x2h = jieshoujuli_min, jieshoujuli_max, refengsudu_min, refengsudu_max
        for i in range(epoch):
            xbest_, ybest_ = step_by_epoch(x1l, x1h, x2l, x2h)
            if ybest_ > ybest:
                xbest = xbest_
                ybest = ybest_
            x1, x2 = xbest_
            x1l = max(jieshoujuli_min, 0.5 * x1)
            x1h = min(jieshoujuli_max, 1.5 * x1)
            x2l = max(refengsudu_min, 0.5 * x2)
            x2h = min(refengsudu_max, 1.5 * x2)
        with FORE:
            print('-'*125)
    return xbest, ybest

# y = run_2part_models(np.array([[50, 800]]))
# y

# y = run_2part_models_func(50, 800)
# y

t0 = time()
xbest, ybest = select_2part_models(type_=0)  # sko
# xbest, ybest = select_2part_models(type_=1)  # scipy
# xbest, ybest = select_2part_models(type_=2)  # gurobi
# xbest, ybest = select_2part_models(type_=3)  # linspace
print("time:", time() - t0)
xbest, ybest

time: 27.857622385025024

(array([  21.93175168, 1196.89991651]), array([-82.94813], dtype=float32))


# check
y = -run_2part_models_func(20.2020202, 1151.51515152)


from numba import jit
from time import time

InteractiveShell.ast_node_interactivity = 'last'

def run_2part_models_func(dis, spd, pprint=False):
    X = np.array([[dis, spd]])  # (n, 2)
    Y = run_2part_models(X)  # (n, 3)
    y = Y[0, 1]
    if pprint:
        print(Y)  # 过滤阻力Pa	过滤效率（%）	透气性 mm/s
        print(y)  # 过滤效率（%）
        print(X, round(y, 10))  # check
    return -y

# @jit
def surface_data(
    step1=0.2, step2=4, 
    x1l=jieshoujuli_min, 
    x1h=jieshoujuli_max, 
    x2l=refengsudu_min, 
    x2h=refengsudu_max 
):
    x1 = np.arange(x1l, x1h, step1)
    x2 = np.arange(x2l, x2h, step2)
    z = []
    len1 = len(x1)
    len2 = len(x2)
    for idx, i in enumerate(x1):
        z_ = []
        for j in x2:
            z_.append(-run_2part_models_func(i, j))
        z.append(z_)
        if idx % 1 == 0:
            with FORE(c='RED'):
                print('-'*59, idx, '/', len1, '-'*59)
    z = pd.DataFrame(z, index=list(x1), columns=list(x2))
    return z

t0 = time()
Z = surface_data(step1=1, step2=20)
print("time:", time() - t0)

----------------------------------------------------------- 0 / 20 -----------------------------------------------------------
----------------------------------------------------------- 1 / 20 -----------------------------------------------------------
----------------------------------------------------------- 2 / 20 -----------------------------------------------------------
----------------------------------------------------------- 3 / 20 -----------------------------------------------------------
----------------------------------------------------------- 4 / 20 -----------------------------------------------------------
----------------------------------------------------------- 5 / 20 -----------------------------------------------------------
----------------------------------------------------------- 6 / 20 -----------------------------------------------------------
----------------------------------------------------------- 7 / 20 -----------------------------------------------------------
----------------------------------------------------------- 8 / 20 -----------------------------------------------------------
----------------------------------------------------------- 9 / 20 -----------------------------------------------------------
----------------------------------------------------------- 10 / 20 -----------------------------------------------------------
----------------------------------------------------------- 11 / 20 -----------------------------------------------------------
----------------------------------------------------------- 12 / 20 -----------------------------------------------------------
----------------------------------------------------------- 13 / 20 -----------------------------------------------------------
----------------------------------------------------------- 14 / 20 -----------------------------------------------------------
----------------------------------------------------------- 15 / 20 -----------------------------------------------------------
----------------------------------------------------------- 16 / 20 -----------------------------------------------------------
----------------------------------------------------------- 17 / 20 -----------------------------------------------------------
----------------------------------------------------------- 18 / 20 -----------------------------------------------------------
----------------------------------------------------------- 19 / 20 -----------------------------------------------------------
time: 25.59749984741211


InteractiveShell.ast_node_interactivity = 'last'

fig = go.Figure(
    data=[go.Surface(
        x=list(Z.columns), 
        y=list(Z.index), 
        z=Z.values, 
    )], 
)
fig.update_layout(
    title='过滤效率', 
    autosize=False,
    width=800, height=800,
    xaxis={"title": "接收距离(cm)"},
    yaxis={"title": "热风速度(r/min)"},
)
fig.write_image('./img/问题3-过滤效率曲面图.svg')
fig.write_html('./问题3-过滤效率曲面图.html')
# fig.show()


from hmz.math_model.process import forward_procession
from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler, StandardScaler


# alpha * 过滤效率, beta * 过滤阻力


# from sklearn.preprocessing import MinMaxScaler 

XY = copy(data3.iloc[:, :5])
X = np.array(XY.iloc[:, :2])
Y = np.array(XY.iloc[:, 2:])

# mms = MinMaxScaler()
# X = mms.fit_transform(X)
# Y = mms.fit_transform(Y)

colors = ["red", "lightpink", "darkorange", "khaki", "green", "lightgreen", "blue", "lightblue"]
colors = ["red", "lightpink", "green", "lightgreen", "blue", "lightblue"]

xpred = np.array([
    [38, 33, 28, 23, 38, 33, 28, 23],
    [850, 950, 1150, 1250, 1250, 1150, 950, 850],
]).T

def score(model, X, Y, xtrain, xtest, ytrain, ytest, Ypred):
    accuracy = predict_accuracy(Y, Ypred, type=1, th=0.005, con=0.8)
    print("准确率：", accuracy)
#     print("训练集分数：R^2 =", model.score(xtrain, ytrain))  # 负值: 拟合效果糟糕，模型完全不能使用
#     print("测试集分数：R^2 =", model.score(xtest, ytest))  # -> 1: 拟合效果很好，模型可以使用
#     print(r2_score(model.predict(xtrain), ytrain))
#     print(r2_score(model.predict(xtest), ytest))
#     scores = cross_val_score(model, X, Y, cv=10, scoring="r2")
#     print(scores.mean())
    return None

models = [
    LinearRegression(),
    Lasso(),
    Ridge(),
    KNeighborsRegressor(),
    MLPRegressor(),
    SVR(kernel='rbf', degree=200, gamma='auto'),  # 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'; ``'scale', 'auto'
    DecisionTreeRegressor(),
    ExtraTreeRegressor(),
    RandomForestRegressor(),
    AdaBoostRegressor(),
    GradientBoostingRegressor(),
    BaggingRegressor(),
    HistGradientBoostingRegressor(), 
    CatBoostRegressor(verbose=0), 
    XGBRegressor(), 
    XGBRFRegressor(), 
    LGBMRegressor(), 
]
models_name = [
    "线性回归",
    "LASSO套索回归",
    "Ridge岭回归",
    "KNN回归",
    "多层感知机回归",
    "SVM回归",
    "决策树回归",
    "极限树回归",
    "随机森林回归",
    "AdaBoost回归",
    "梯度提升回归",
    "Bagging回归",
    "基于直方图的梯度提升回归",
    "CatBoost回归",
    "XGBoost回归",
    "XGBoostRF回归",
    "LightGBM回归",
]
models_select = {
    "线性回归": False,
    "LASSO套索回归": False,
    "Ridge岭回归": False,
    "KNN回归": False,
    "多层感知机回归": False,
    "SVM回归": False,
    "决策树回归": False,
    "极限树回归": True,  # 1
    "随机森林回归": True,  # 2
    "AdaBoost回归": False,
    "梯度提升回归": False,
    "Bagging回归": False,
    "基于直方图的梯度提升回归": False,
    "CatBoost回归": False,
    "XGBoost回归": True,  # 0
    "XGBoostRF回归": False,
    "LightGBM回归": False,
}
saved_models_name = ['xgb', 'et', 'rf']

def run_Regressor_models(
    x=xpred, X=X, Y=Y, colors=colors, plot_fig=False, save_fig=False, 
    models=models, models_name=models_name, models_select=models_select,
    saved_models_name=saved_models_name, save_model=False,
):
    """
    :param x: np.ndarray
    :param X: np.ndarray
    :param Y: np.ndarray
    XGBoost(), ET(), RF()
    """
    xtrain, xtest, ytrain, ytest = train_test_split(X, Y, test_size=0.3)
    ypreds_s = []
    for model, model_name, model_idx in zip(models, models_name, range(len(models_name))):
#         print(models_select[model_name])
        if not models_select[model_name]:
            continue
        print("模型：", model_name)
        ypreds = []
        ytests = []
        for i in range(Y.shape[1]):
            # 训练
            model.fit(xtrain, ytrain[:, i])
            # 测试
            ytest_ = model.predict(X)
            score(model, X, Y[:, i], xtrain, xtest, ytrain[:, i], ytest[:, i], ytest_)
            ytests.append(list(ytest_))
            # 预测
            ypred = model.predict(x)
            ypreds.append(list(ypred))
            # 保存
            if save_model:
                if model_name == "XGBoost回归" and i == 0:
                    joblib.dump(model, './saved_model/pre0_xgb.pkl')
                elif model_name == "极限树回归" and i == 1:
                    joblib.dump(model, './saved_model/pre1_et.pkl')
                elif model_name == "随机森林回归" and i == 2:
                    joblib.dump(model, './saved_model/pre2_rf.pkl')
        ypreds_s.append(ypreds)
        
        if plot_fig:
            # 画图
            ytests = pd.DataFrame(ytests, index=list(XY.columns[2:5])).T
            ytests.columns = map(lambda x: x + '-预测值', ytests.columns)
            ytesttemp = pd.DataFrame(Y, columns=list(XY.columns[2:5]))
            ytesttemp.columns = map(lambda x: x + '-真实值', ytesttemp.columns)
            yplot = pd.concat([ytesttemp, ytests], axis=1)
    #         print(yplot.shape)
    #         print(yplot.head())
    #         print(ypreds.shape)
    #         print(ypreds.head())
    #         print(ytest_temp.shape)
    #         print(ytest_temp.head())
            
            title = '基于' + model_name + '的预测模型'
            yplot.iplot(
                color=colors[0::2] + colors[1::2],
                title=title,
            )
            if save_fig:
                yplot.figure(
                    color=colors[0::2] + colors[1::2],
                    title=title,
                ).write_image("./img/问题2-" + title + ".svg")

            yplot.columns = ['真实值'] * 3 + ['预测值'] * 3
            columns = list(XY.columns[2:5])
            for i in range(3):
                title = '基于' + model_name + '的预测模型——' + columns[i]
                yplot.iloc[:, i::3].iplot(
                    kind='spread',
                    color=[colors[i * 2 + 1], colors[i * 2]],
                    title=title,
                )
                if save_fig:
                    yplot.iloc[:, i::3].figure(
                        kind='spread',
                        color=[colors[i * 2 + 1], colors[i * 2]],
                        title=title,
                    ).write_image("./img/问题2-" + title + ".svg")
        
        print('-'*125)
    return ypreds_s
all_models_ypreds_q41 = np.array(run_Regressor_models(plot_fig=True, save_fig=True, save_model=True))
all_models_ypreds_q41

模型： 极限树回归
准确率： 0.9783027482740771
准确率： 0.967596518288745
准确率： 0.935611197528982

-----------------------------------------------------------------------------------------------------------------------------
模型： 随机森林回归
准确率： 0.6035103178338299
准确率： 0.9569203921292283
准确率： 0.9142168641267546

-----------------------------------------------------------------------------------------------------------------------------
模型： XGBoost回归
准确率： 0.9782985855635877
准确率： 0.9675964901521802
准确率： 0.9462774250066528

-----------------------------------------------------------------------------------------------------------------------------

array([[[ 2.76130402,  2.6910584 ,  2.85518781,  2.60998273,
          3.15791488,  3.15791488,  2.62150669,  1.70086253],
        [96.56214142, 95.62798564, 96.20490011, 95.97497304,
         96.80519867, 96.13383484, 95.08143107, 94.67373657],
        [87.09500122, 87.26670074, 87.36590576, 85.18565369,
         85.77477264, 85.77477264, 88.2532196 , 86.97471237]],

       [[ 2.77366502,  2.71362188,  2.78347211,  2.60510878,
          3.47870013,  3.07610693,  2.3827195 ,  1.91200316],
        [96.07752557, 96.5114232 , 96.22941806, 95.98321839,
         96.62033853, 96.40991409, 95.637855  , 94.43758658],
        [84.78415195, 87.37532201, 87.49335939, 86.20104624,
         84.32152345, 87.10273104, 88.25069028, 87.64345273]],

       [[ 3.04802966,  2.92000484,  2.85516977,  2.61001492,
          3.52473164,  3.15744686,  2.62026453,  2.15831184],
        [96.52910614, 96.4701004 , 96.42642975, 95.9756012 ,
         96.46608734, 96.80419922, 96.21205902, 95.08162689],
        [85.29875183, 87.26824951, 86.37413788, 86.22383118,
         83.89849091, 85.77510834, 88.44858551, 87.94226074]]])


XY = copy(data3.iloc[:, 2:])
X = np.array(XY.iloc[:, :3])
Y = np.array(XY.iloc[:, 3:])

colors = ["red", "lightpink", "darkorange", "khaki", "green", "lightgreen", "blue", "lightblue"]
colors = ["red", "lightpink", "green", "lightgreen", "blue", "lightblue"]

xpred = np.array([
    [38, 33, 28, 23, 38, 33, 28, 23],
    [850, 950, 1150, 1250, 1250, 1150, 950, 850],
]).T

def score(model, X, Y, xtrain, xtest, ytrain, ytest, Ypred):
    accuracy = predict_accuracy(Y, Ypred, type=1, th=0.05, con=0.8)
    print("准确率：", accuracy)
#     print("训练集分数：R^2 =", model.score(xtrain, ytrain))  # 负值: 拟合效果糟糕，模型完全不能使用
#     print("测试集分数：R^2 =", model.score(xtest, ytest))  # -> 1: 拟合效果很好，模型可以使用
#     print(r2_score(model.predict(xtrain), ytrain))
#     print(r2_score(model.predict(xtest), ytest))
#     scores = cross_val_score(model, X, Y, cv=10, scoring="r2")
#     print(scores.mean())
    return None

models = [
    LinearRegression(),
    Lasso(),
    Ridge(),
    KNeighborsRegressor(),
    MLPRegressor(),
    SVR(kernel='rbf', degree=100, gamma='auto'),  # 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'; ``'scale', 'auto'
    DecisionTreeRegressor(),
    ExtraTreeRegressor(),
    RandomForestRegressor(),
    AdaBoostRegressor(),
    GradientBoostingRegressor(),
    BaggingRegressor(),
    HistGradientBoostingRegressor(), 
    CatBoostRegressor(verbose=0), 
    XGBRegressor(), 
    XGBRFRegressor(), 
    LGBMRegressor(), 
]
models_name = [
    "线性回归",
    "LASSO套索回归",
    "Ridge岭回归",
    "KNN回归",
    "多层感知机回归",
    "SVM回归",
    "决策树回归",
    "极限树回归",
    "随机森林回归",
    "AdaBoost回归",
    "梯度提升回归",
    "Bagging回归",
    "基于直方图的梯度提升回归",
    "CatBoost回归",
    "XGBoost回归",
    "XGBoostRF回归",
    "LightGBM回归",
]
models_select = {
    "线性回归": False,
    "LASSO套索回归": False,
    "Ridge岭回归": False,
    "KNN回归": False,
    "多层感知机回归": False,
    "SVM回归": False,
    "决策树回归": False,
    "极限树回归": False,
    "随机森林回归": False,
    "AdaBoost回归": False,
    "梯度提升回归": False,
    "Bagging回归": False,
    "基于直方图的梯度提升回归": False,
    "CatBoost回归": False,
    "XGBoost回归": True,
    "XGBoostRF回归": False,
    "LightGBM回归": False,
}
def run_Regressor_models(
    x=xpred, X=X, Y=Y, colors=colors, plot_fig=False, save_fig=False, 
    models=models, models_name=models_name, models_select=models_select,
    save_model=False,
):
    """
    :param x: np.ndarray
    :param X: np.ndarray
    :param Y: np.ndarray
    """
    xtrain, xtest, ytrain, ytest = train_test_split(X, Y, test_size=0.3)
    ypreds_s = []
    for model, model_name, model_select in zip(models, models_name, models_select.keys()):
        if not models_select[model_select]:
            continue
        print("模型：", model_name)
        ytests = []
        ypreds = []
        for i in range(Y.shape[1]):
            # 训练
            model.fit(xtrain, ytrain[:, i])
            # 预测
            ytest_ = model.predict(X)
            score(model, X, Y[:, i], xtrain, xtest, ytrain[:, i], ytest[:, i], ytest_)
            ytests.append(list(ytest_))
#             # 预测
#             ypred = model.predict(x)
#             ypreds.append(list(ypred))
            # 保存
            if i == 0:
                joblib.dump(model, './saved_model/suc0_xgb.pkl')
            elif i == 1:
                joblib.dump(model, './saved_model/suc1_xgb.pkl')
            elif i == 2:
                joblib.dump(model, './saved_model/suc2_xgb.pkl')
#         ypreds_s.append(ypreds)
        
        if plot_fig:
            columns = list(XY.columns[-3:])
            ytests = pd.DataFrame(ytests, index=columns).T
            ytests.columns = map(lambda x: x + '-预测值', ytests.columns)
            ytesttemp = pd.DataFrame(Y, columns=columns)
            ytesttemp.columns = map(lambda x: x + '-真实值', ytesttemp.columns)
            yplot = pd.concat([ytesttemp, ytests], axis=1)
            
            # 画图1
            title = '基于' + model_name + '的预测模型'
            yplot.iplot(
                color=colors[0::2] + colors[1::2],
                title=title,
            )
            if save_fig:
                yplot.figure(
                    color=colors[0::2] + colors[1::2],
                    title='基于' + model_name + '的预测模型',
                ).write_image("./img/问题3-" + title + ".svg")
            # 画图2
            yplot.columns = ['真实值'] * 3 + ['预测值'] * 3
            columns = list(XY.columns[-3:])
            columns[-1] = '透气性 mm／s'
            for i in range(3):
                title = '基于' + model_name + '的预测模型——' + columns[i]
                yplot.iloc[:, i::3].iplot(
                    kind='spread',
                    color=[colors[i * 2 + 1], colors[i * 2]],
                    title=title,
                )
                if save_fig:
                    yplot.iloc[:, i::3].figure(
                        kind='spread',
                        color=[colors[i * 2 + 1], colors[i * 2]],
                        title=title,
                    ).write_image("./img/问题3-" + title + ".svg")
        print('-'*125)
    return ypreds_s
all_models_ypreds_q42 = np.array(run_Regressor_models(plot_fig=True, save_fig=True, save_model=True))
all_models_ypreds_q42

模型： XGBoost回归
准确率： 0.8866634379824002
准确率： 0.8755174210766952
准确率： 0.8438720485965411

-----------------------------------------------------------------------------------------------------------------------------

array([], dtype=float64)


mean1, mean2, _ = data3.iloc[:, -3:].mean()
std1, std2, _ = data3.iloc[:, -3:].std()


from numba import jit
from sko.GA import GA
from scipy.optimize import minimize
from hmz.kinky_tricks.kinky_tricks import FORE

jieshoujuli_min, refengsudu_min = data3.min()[:2]
jieshoujuli_max, refengsudu_max = data3.max()[:2]
best_model1 = XGBRegressor()
best_model2 = XGBRegressor()

path = './saved_model/best ever/'

model_pre1, model_pre2, model_pre3 = joblib.load(path + 'pre0_xgb.pkl', 'r'), \
    joblib.load(path + 'pre1_et.pkl', 'r'), joblib.load(path + 'pre2_rf.pkl', 'r')
model_suc1, model_suc2, model_suc3 = joblib.load(path + 'suc0_xgb.pkl'), \
    joblib.load(path + 'suc1_xgb.pkl', 'r'), joblib.load(path + 'suc2_xgb.pkl', 'r')
model1s = [model_pre1, model_pre2, model_pre3]
model2s = [model_suc1, model_suc2, model_suc3]


# TODO adjust 0.1

def adjust_alpha(alpha=0.1):
    def run_2part_models(
        X, model1s=model1s, model2s=model2s,
    #     colors=colors, plot_fig=False, save_fig=False, 
    #     models=models, models_name=models_name, models_select=models_select,
    ):
        """
        :param model1s: 第一部分的模型
        :param model2s: 第二部分的模型
        :param X: np.ndarray (n, 2) 输入数据
        :return: Y np.ndarray (n, 3) 输出数据
        """
        YasX = []
        for model1 in model1s:
            if isinstance(model1, XGBRegressor):
                y1pred = model1.predict(X, validate_features=False)
            else:
                y1pred = model1.predict(X)
            YasX.append(list(y1pred))
        YasX = np.squeeze(YasX)[None, :]  # (n, 3)

        Y = []
        for model2 in model2s:
            if isinstance(model2, XGBRegressor):
                y2pred = model2.predict(YasX, validate_features=False)
            else:
                y2pred = model2.predict(YasX)
            Y.append(list(y2pred))
        Y = np.squeeze(Y)[None, :]  # (n, 3)

        return Y

    def run_2part_models_func(dis, spd, alpha=alpha, pprint=False):
        X = np.array([[dis, spd]])  # (n, 2)
        Y = run_2part_models(X)  # (n, 3)
        y1 = Y[0, 0]  # 阻力
        y2 = Y[0, 1]  # 效率
        y = alpha * ((y1 - mean1) / std1) - (1 - alpha) * ((y2 - mean2) / std2)
        if pprint:
            print(Y)  # 过滤阻力Pa	过滤效率（%）	透气性 mm/s
            print(y)  # 过滤效率（%）
            print(
                [round(dis, 5), round(spd, 5)], 
                f"\t过滤阻力：{round(y1, 5)}", 
                f"\t过滤效率：{round(y2, 5)}", 
                f"\ty：{round(y, 5)}", 
            )  # check
        return y

    def run_2part_models_fun(x):
        """sko -> scipy"""
        return run_2part_models_func(x[0], x[1])

    def select_2part_models(type_=0):
        """
        :param type_: 规划类型
            0: sko
            1: scipy
            2: gurobi
            3: linspace
        """
        xbest, ybest = None, None
        if type_ == 0:
            ga = GA(
                run_2part_models_func,
                n_dim=2,  # 接收距离(cm), 热风速度(r/min)
                size_pop=50,
                max_iter=24,
                lb=[jieshoujuli_min, refengsudu_min],
                ub=[jieshoujuli_max, refengsudu_max],
    #             constraint_ueq=(lambda x: abs(f((inverter_num, x[0], x[1], x[2])) - NEED) - EPS, ),
            )
            xbest, ybest = ga.run()
        elif type_ == 1:
            opt = minimize(
                run_2part_models_fun, 
                x0=[25., 1100.],
            #     x0=[gate_wide_max, gate_wide_max, gate_len_max - 0.1],
                bounds=((jieshoujuli_min, jieshoujuli_max), (refengsudu_min, refengsudu_max)),
                method='trust-constr',  #  SLSQP trust-constr
                options={'gtol': 1e-9, 'disp': True},
            )
            xbest = opt.x
            ybest = run_2part_models_fun(xbest)
        elif type_ == 2:
            ...
        elif type_ == 3:
            def step_by_epoch(x1l, x1h, x2l, x2h, num=100):
                x1 = np.linspace(x1l, x1h, num)
                x2 = np.linspace(x2l, x2h, num)
                xbest, ybest = [x1l, x2l], 0
                for x1_ in x1:
                    for x2_ in x2:
                        yres = -run_2part_models_func(x1_, x2_)
                        if yres > ybest:
                            xbest = [x1_, x2_]
                            ybest = yres
                return xbest, ybest
            xbest, ybest = [0, 0], 0
            epoch = 10
            x1l, x1h, x2l, x2h = jieshoujuli_min, jieshoujuli_max, refengsudu_min, refengsudu_max
            for i in range(epoch):
                xbest_, ybest_ = step_by_epoch(x1l, x1h, x2l, x2h)
                if ybest_ > ybest:
                    xbest = xbest_
                    ybest = ybest_
                x1, x2 = xbest_
                x1l = max(jieshoujuli_min, 0.5 * x1)
                x1h = min(jieshoujuli_max, 1.5 * x1)
                x2l = max(refengsudu_min, 0.5 * x2)
                x2h = min(refengsudu_max, 1.5 * x2)
            with FORE:
                print('-'*125)
        return xbest, ybest

    t0 = time()
    xbest, ybest = select_2part_models(type_=0)
    print("time:", time() - t0)
    return run_2part_models(np.array([xbest]))

alphas = np.arange(0, 1.01, 0.1)
zulis = []
xiaolvs = []
for alpha in alphas:
    zuli, xiaolv, _ = adjust_alpha(alpha)[0]
    print("alpha:", alpha, '阻力', zuli, '效率', xiaolv)
    zulis.append(zuli)
    xiaolvs.append(xiaolv)
plt.plot(alphas, zulis)
plt.show()
plt.plot(alphas, xiaolvs)
plt.show()

time: 56.43698740005493
alpha: 0.0 阻力 28.733559 效率 83.2102
time: 57.337063789367676
alpha: 0.1 阻力 28.863338 效率 80.01078
time: 59.150070667266846
alpha: 0.2 阻力 28.517855 效率 83.19238
time: 58.667842864990234
alpha: 0.30000000000000004 阻力 28.517855 效率 83.19238
time: 57.32304120063782
alpha: 0.4 阻力 28.517855 效率 83.19238
time: 53.442864418029785
alpha: 0.5 阻力 28.517855 效率 83.19238
time: 63.988736152648926
alpha: 0.6000000000000001 阻力 28.517855 效率 83.19238
time: 64.94027495384216
alpha: 0.7000000000000001 阻力 28.517855 效率 83.19238
time: 64.14125680923462
alpha: 0.8 阻力 20.862177 效率 40.33767
time: 64.39215016365051
alpha: 0.9 阻力 20.862177 效率 40.33767
time: 65.25621724128723
alpha: 1.0 阻力 20.862177 效率 40.33767


from numba import jit
from time import time

def get_filter_from_diff_spd(dis, spd, pprint=False):
    X = np.array([[dis, spd]])  # (n, 2)
    Y = run_2part_models(X)  # (n, 3)
#     print(X)
#     print(Y)
    y0 = Y[0, 0]
    y1 = Y[0, 1]
    if pprint:
        print(Y)  # 过滤阻力Pa	过滤效率（%）	透气性 mm/s
        print(y)  # 过滤效率（%）
        print(X, round(y, 10))  # check
    return y0, y1

# @jit
def surface_data(x1, x2):
    y11s, y12s = [], []
    len1 = len(x1)
    for idx, i in enumerate(x1):
        y1, y2 = get_filter_from_diff_spd(i, 1000)
        y11s.append(y1)
        y12s.append(y2)
#         if idx % 1 == 0:
#             with FORE(c='RED'):
#                 print('-'*59, idx, '/', len1, '-'*59)
    
    y21s, y22s = [], []
    len2 = len(x2)
    for idx, i in enumerate(x2):
        y1, y2 = get_filter_from_diff_spd(21.25, i)
        y21s.append(y1)
        y22s.append(y2)
#         if idx % 1 == 0:
#             with FORE(c='RED'):
#                 print('-'*59, idx, '/', len2, '-'*59)
    return y11s, y12s, y21s, y22s

def plot_sensitive():
    plt.figure(figsize=(9, 9), dpi=100)
    
    num = 50
    x1 = np.linspace(jieshoujuli_min, jieshoujuli_max, num)
    x2 = np.linspace(refengsudu_min, refengsudu_max, num)
#     print(x1, x2)
    y1, y2, y3, y4 = surface_data(x1, x2)
#     print(y1, y2, y3, y4)
    
    plt.subplot(221)
    plt.plot(x1, y1)
    plt.title('灵敏性分析——过滤阻力Pa')
    plt.xlabel('接收距离(cm)')
    plt.ylabel('过滤阻力Pa')
    
    plt.subplot(222)
    plt.plot(x1, y2)
    plt.title('灵敏性分析——过滤效率（%）')
    plt.xlabel('接收距离(cm)')
    plt.ylabel('过滤效率（%）')
    
    plt.subplot(223)
    plt.plot(x2, y3)
    plt.title('灵敏性分析——过滤阻力Pa')
    plt.xlabel('热风速度(r/min)')
    plt.ylabel('过滤阻力Pa')
    
    plt.subplot(224)
    plt.plot(x2, y4)
    plt.title('灵敏性分析——过滤效率（%）')
    plt.xlabel('热风速度(r/min)')
    plt.ylabel('过滤效率（%）')
    
    plt.savefig('./img/模型的灵敏度分析.svg')
    plt.show()
    return None
plot_sensitive()

	插层前-厚度mm	插层前-孔隙率（%）	插层前-压缩回弹性率（%）	插层前-过滤阻力Pa	插层前-过滤效率（%）	插层前-透气性 mm/s	插层后-厚度mm	插层后-孔隙率（%）	插层后-压缩回弹性率（%）	插层后-过滤阻力Pa	插层后-过滤效率（%）	插层后-透气性 mm/s
0	1.715	93.519997	77.839996	8.130000	4.967000	777.099976	2.810	96.279999	83.199997	7.533000	19.966999	1019.669983
1	1.830	93.930000	86.230003	10.470000	1.933000	795.570007	2.910	96.410004	86.650002	7.200000	24.966999	968.630005
2	1.890	94.120003	82.120003	11.870000	4.300000	564.929993	3.425	96.949997	94.330002	10.133000	34.599998	643.400024
3	2.095	94.699997	83.010002	13.900000	11.767000	474.500000	3.400	96.930000	82.879997	10.600000	33.900002	603.169983
4	2.235	95.029999	86.040001	19.230000	20.767000	347.230011	3.845	97.300003	75.970001	15.700000	54.500000	405.829987
...	...	...	...	...	...	...	...	...	...	...	...	...
20	0.870	87.230003	83.889999	30.270000	34.000000	245.699997	1.740	94.000000	90.120003	23.033001	42.333000	309.929993
21	1.000	88.889999	79.169998	42.169998	53.900002	211.399994	2.020	94.830002	86.209999	34.099998	60.733002	234.130005
22	0.995	88.830002	77.519997	52.330002	67.567001	182.630005	2.060	94.930000	87.120003	49.367001	73.199997	226.029999
23	1.110	89.989998	76.389999	60.200001	74.766998	154.199997	2.085	94.989998	88.830002	48.567001	81.633003	191.100006
24	1.365	91.860001	76.389999	65.769997	80.032997	137.169998	2.425	95.690002	84.379997	62.033001	86.967003	156.330002

	组号	编号	接收距离(cm)	热风速度(r/min)	厚度mm	孔隙率（%）	压缩回弹性率（%）	过滤阻力Pa	过滤效率（%）	透气性 mm/s	插层率（%）
0	1.0	1#	40	800	1.715	93.519997	77.839996	8.130000	4.967000	777.099976	0.0
1	2.0	1#	40	900	1.830	93.930000	86.230003	10.470000	1.933000	795.570007	0.0
2	3.0	1#	40	1000	1.890	94.120003	82.120003	11.870000	4.300000	564.929993	0.0
3	4.0	1#	40	1100	2.095	94.699997	83.010002	13.900000	11.767000	474.500000	0.0
4	5.0	1#	40	1200	2.235	95.029999	86.040001	19.230000	20.767000	347.230011	0.0
...	...	...	...	...	...	...	...	...	...	...	...
20	21.0	1#	20	800	0.870	87.230003	83.889999	30.270000	34.000000	245.699997	0.0
21	22.0	1#	20	900	1.000	88.889999	79.169998	42.169998	53.900002	211.399994	0.0
22	23.0	1#	20	1000	0.995	88.830002	77.519997	52.330002	67.567001	182.630005	0.0
23	24.0	1#	20	1100	1.110	89.989998	76.389999	60.200001	74.766998	154.199997	0.0
24	25.0	1#	20	1200	1.365	91.860001	76.389999	65.769997	80.032997	137.169998	0.0

	组号	接收距离(cm)	热风速度(r/min)	厚度mm	孔隙率（%）	压缩回弹性率（%）	过滤阻力Pa	过滤效率（%）	透气性 mm/s	插层率（%）
0	1.0	40	800	1.715	93.519997	77.839996	8.130000	4.967000	777.099976	0.0
1	2.0	40	900	1.830	93.930000	86.230003	10.470000	1.933000	795.570007	0.0
2	3.0	40	1000	1.890	94.120003	82.120003	11.870000	4.300000	564.929993	0.0
3	4.0	40	1100	2.095	94.699997	83.010002	13.900000	11.767000	474.500000	0.0
4	5.0	40	1200	2.235	95.029999	86.040001	19.230000	20.767000	347.230011	0.0
...	...	...	...	...	...	...	...	...	...	...
20	21.0	20	800	0.870	87.230003	83.889999	30.270000	34.000000	245.699997	0.0
21	22.0	20	900	1.000	88.889999	79.169998	42.169998	53.900002	211.399994	0.0
22	23.0	20	1000	0.995	88.830002	77.519997	52.330002	67.567001	182.630005	0.0
23	24.0	20	1100	1.110	89.989998	76.389999	60.200001	74.766998	154.199997	0.0
24	25.0	20	1200	1.365	91.860001	76.389999	65.769997	80.032997	137.169998	0.0

	组号	编号	接收距离(cm)	热风速度(r/min)	厚度mm	孔隙率（%）	压缩回弹性率（%）	过滤阻力Pa	过滤效率（%）	透气性 mm/s	插层率（%）
0	1.0	2#	40	800	2.810	96.279999	83.199997	7.533000	19.966999	1019.669983	36.439999
1	2.0	2#	40	900	2.910	96.410004	86.650002	7.200000	24.966999	968.630005	24.740000
2	3.0	2#	40	1000	3.425	96.949997	94.330002	10.133000	34.599998	643.400024	31.450001
3	4.0	2#	40	1100	3.400	96.930000	82.879997	10.600000	33.900002	603.169983	19.370001
4	5.0	2#	40	1200	3.845	97.300003	75.970001	15.700000	54.500000	405.829987	31.190001
...	...	...	...	...	...	...	...	...	...	...	...
20	21.0	2#	20	800	1.740	94.000000	90.120003	23.033001	42.333000	309.929993	11.320000
21	22.0	2#	20	900	2.020	94.830002	86.209999	34.099998	60.733002	234.130005	19.350000
22	23.0	2#	20	1000	2.060	94.930000	87.120003	49.367001	73.199997	226.029999	24.020000
23	24.0	2#	20	1100	2.085	94.989998	88.830002	48.567001	81.633003	191.100006	35.880001
24	25.0	2#	20	1200	2.425	95.690002	84.379997	62.033001	86.967003	156.330002	32.950001

	组号	接收距离(cm)	热风速度(r/min)	厚度mm	孔隙率（%）	压缩回弹性率（%）	过滤阻力Pa	过滤效率（%）	透气性 mm/s	插层率（%）
0	1.0	40	800	2.810	96.279999	83.199997	7.533000	19.966999	1019.669983	36.439999
1	2.0	40	900	2.910	96.410004	86.650002	7.200000	24.966999	968.630005	24.740000
2	3.0	40	1000	3.425	96.949997	94.330002	10.133000	34.599998	643.400024	31.450001
3	4.0	40	1100	3.400	96.930000	82.879997	10.600000	33.900002	603.169983	19.370001
4	5.0	40	1200	3.845	97.300003	75.970001	15.700000	54.500000	405.829987	31.190001
...	...	...	...	...	...	...	...	...	...	...
20	21.0	20	800	1.740	94.000000	90.120003	23.033001	42.333000	309.929993	11.320000
21	22.0	20	900	2.020	94.830002	86.209999	34.099998	60.733002	234.130005	19.350000
22	23.0	20	1000	2.060	94.930000	87.120003	49.367001	73.199997	226.029999	24.020000
23	24.0	20	1100	2.085	94.989998	88.830002	48.567001	81.633003	191.100006	35.880001
24	25.0	20	1200	2.425	95.690002	84.379997	62.033001	86.967003	156.330002	32.950001

华数杯C题¶

问题1¶

处理数据¶

箱型图¶

箱型图（未标准化）¶

箱型图（标准化）¶

插层前后对比¶

结构变量之间、产品性能之间：
矩阵散点图&相关系数热力图

灰色关联分析¶

插层率对其他xxx的影响（忽略，最终未使用）¶

数据处理（补充）¶

问题2¶

选择模型¶

问题3¶

分析关系¶

选择模型¶

规划求解¶

过滤效率曲面图¶

问题4¶

前部分模型¶

后部分模型¶

画图（不存在的部分）¶

求解¶

模型的分析与检验¶

灵敏性分析¶

	厚度mm	孔隙率（%）	压缩回弹性率（%）
0	0.120574	0.115858	0.113000
1	0.120091	0.115466	0.113274
2	0.120163	0.115570	0.112847
3	0.133004	0.115749	0.113736
4	0.133017	0.116725	0.113802
...	...	...	...
70	0.101513	0.114423	0.115488
71	0.101488	0.114777	0.114511
72	0.105581	0.115668	0.113650
73	0.104718	0.115116	0.113508
74	0.104758	0.115507	0.113476

	过滤阻力Pa	过滤效率（%）	透气性 mm/s
0	0.097698	0.104806	0.170938
1	0.102640	0.111831	0.159364
2	0.103485	0.107998	0.165964
3	0.107217	0.105649	0.136352
4	0.099939	0.099920	0.144728
...	...	...	...
70	0.113905	0.173937	0.053863
71	0.120600	0.178244	0.053919
72	0.111671	0.180640	0.054662
73	0.108523	0.180869	0.054501
74	0.115073	0.179623	0.054919

华数杯C题¶

问题1¶

处理数据¶

箱型图¶

箱型图（未标准化）¶

箱型图（标准化）¶

插层前后对比¶

结构变量之间、产品性能之间：矩阵散点图&相关系数热力图

灰色关联分析¶

插层率对其他xxx的影响（忽略，最终未使用）¶

数据处理（补充）¶

问题2¶

选择模型¶

问题3¶

分析关系¶

选择模型¶

规划求解¶

过滤效率曲面图¶

问题4¶

前部分模型¶

后部分模型¶

画图（不存在的部分）¶

求解¶

模型的分析与检验¶

灵敏性分析¶

结构变量之间、产品性能之间：
矩阵散点图&相关系数热力图