In [900]:
from http.server import BaseHTTPRequestHandler, HTTPServer
import urllib
import time
from influxdb import InfluxDBClient
import sys
import seaborn as sns
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
from sklearn import tree
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import cross_val_score
In [1099]:
path = 'C:\\Users\\eg260905\\Desktop\\ENSE3\\3A\\smart_system\\'
analyse = 'appareil'
#analyse = 'circuit'
In [341]:
###########################
#    SCRIPT SETTINGS
###########################
# Set the port where you want the bridge service to run
PORT_NUMBER = 1234
# InfluxDB Server parameters
INLUXDB_SERVER_IP = '192.168.1.210'
INLUXDB_SERVER_PORT = 8086
INFLUXDB_USERNAME = 'eleves'
INFLUXDB_PASSWORD = 'SmarthouseG2Elab'
INFLUXDB_DB_NAME = 'jeedom'
###########################

client = InfluxDBClient(INLUXDB_SERVER_IP, INLUXDB_SERVER_PORT, INFLUXDB_USERNAME, INFLUXDB_PASSWORD, INFLUXDB_DB_NAME)
client.switch_database('jeedom')
In [342]:
#### Timestamps in seconds ###########"
timestamp_end= 1641805072
jour =86400
annee = 365*jour
timestamp_start=timestamp_end-(annee*2)
dt_object_start = datetime.fromtimestamp(timestamp_start)
print(dt_object_start)
dt_object_end = datetime.fromtimestamp(timestamp_end)
print(dt_object_end)
2020-01-11 09:57:52
2022-01-10 09:57:52
In [300]:
query = 'SELECT "value" FROM "jeedom"."autogen"."5229" WHERE time >= $start_time AND time < $end_time'
bind_params = {'end_time': str(dt_object_end), 'start_time': str(dt_object_start)}
result = client.query(query, bind_params=bind_params)
In [343]:
ids = {}
ids['linky'] = '1503'
ids['Télé']= '1372'
ids['homeciné']= '1382'
ids['freebox']= '1377'
ids['four']= '2091'
ids['garage']= '2093' #imprimante_3D
ids['plaque_cuisson']= '2095'
ids['lave_linge']= '2099'
ids['lave_linge']= '2099'
ids['aspirateur']= '1387'
ids['freebox_TV']= '1392'
ids['rasberry_kodi']= '1666'
ids['multimedia']= '1673'
ids['circuit_prise_forte']= '324'
ids['circuit_lumière']= '326'
ids['circuit_cuisine']= '328'
ids['circuit_prises']= '330'
ids['linky_gene']= '1505'
ids['PZEM_gene']= '5230'
In [344]:
result_sets = {}
for name in ids.keys():
    result_sets[name]= {'time':[],'value':[]}
In [345]:
bind_params = {'end_time': str(dt_object_end), 'start_time': str(dt_object_start)}
for id_name in ids.keys():
    query_name = 'SELECT "value" FROM "jeedom"."autogen".'+'"'+ids[id_name]+'" '+'WHERE time >= $start_time AND time < $end_time'
    resultset = client.query(query_name, bind_params=bind_params)
    for result in resultset:
        for line in result:
            temps = line['time'].split('T')
            date = temps[0]
            heure = temps[1].replace('Z','')
            heure = heure.split(':')
            date = date.split('-')
            #.strftime("%Y-%m-%d, %H:%M:%S")
            d = datetime(int(date[0]),int(date[1]),int(date[2]),int(heure[0]),int(heure[1]),int(float(heure[2])),0)
            result_sets[id_name]['time'].append(d)
            result_sets[id_name]['value'].append(line['value'])
In [902]:
mergeData=[] 
for col_name in result_sets.keys():
    df = pd.DataFrame.from_dict(result_sets[col_name])
    df.columns = ['time '+col_name,col_name]
    mergeData.append(df)
In [903]:
df_pristine=pd.concat(mergeData, axis=1, join='inner')
In [373]:
data_linky = pd.DataFrame.from_dict(result_sets['linky'])
data_linky.columns = ['time '+col_name,col_name]
six_kV = {'time':[],'value':[]}
for k in range(len(data_linky)):
    if data_linky['value'][k]>5500:
        six_kV['time'].append(data_linky['time'][k])
        six_kV['value'].append(data_linky['value'][k])
In [352]:
data_linky.plot(x='time',y='value')
plt.savefig(path+'study_range.png')
In [905]:
#### Function ###
def get_sample_mean_df(sensor_data,time_step='1T'):
    '''
    Reample data acquired with a given time step.
    This function uses the resample module from pandas with a mean method.
    If data has 'nan' value due to the mean method, the algorithm interpolate the value with the nearest measure
    inputs: 
        - sensor_data: dict containing the measurements with its acquisition time  
        - time_setp: str 1T = 1 minutes 
    outputs:
        - dataframe: the sensor data resampled
    '''
    df = pd.DataFrame.from_dict(sensor_data)
    df.set_index('time', inplace=True) 
    df.ffill() 
    resampled_df=df.resample(time_step).mean() 
    resampled_df.columns = [col_name]
    resampled_df.interpolate(method='nearest', inplace=True)
    return resampled_df   
In [906]:
df=pd.concat(mergeData, axis=1, join='inner')
In [537]:
"""df.to_excel(path+'donne_1.xlsx')"""
Out[537]:
"df.to_excel(path+'donne_1.xlsx')"
In [1100]:
list_appareil = ['linky','multimedia','Télé','homeciné','freebox','four','garage','plaque_cuisson','lave_linge','aspirateur','freebox_TV','rasberry_kodi']
list_circuit = ['linky','circuit_prise_forte','circuit_lumière','circuit_cuisine','circuit_prises']
if analyse == 'appareil':
    list_analyse=list_appareil
elif analyse == 'circuit':
    list_analyse=list_circuit
In [1101]:
mergeData=[]
for col_name in list_analyse:
    mergeData.append(get_sample_mean_df(result_sets[col_name],'1T')) # Resample the data
df_appa=pd.concat(mergeData, axis=1, join='inner') # concatenate the resampled data into 1 dataframe with the same time index
df_appa
Out[1101]:
linky multimedia Télé homeciné freebox four garage plaque_cuisson lave_linge aspirateur freebox_TV rasberry_kodi
time
2020-09-23 05:35:00 609.640000 14.1 0.000 0.550000 0.0 0.0 72.0 864.0 55.0 2.9 17.6 0.0
2020-09-23 05:36:00 609.640000 14.1 0.000 0.550000 0.0 0.0 70.0 864.0 0.0 2.9 17.6 0.0
2020-09-23 05:37:00 609.640000 14.1 0.000 0.566667 0.0 0.0 71.0 864.0 97.0 2.9 17.6 0.0
2020-09-23 05:38:00 609.640000 14.1 29.780 0.566667 0.0 0.0 72.0 864.0 118.0 2.9 17.6 0.0
2020-09-23 05:39:00 609.640000 14.1 29.780 0.550000 0.0 0.0 71.0 864.0 23.0 2.9 17.6 0.0
... ... ... ... ... ... ... ... ... ... ... ... ...
2021-12-31 14:13:00 2138.750000 17.8 4.425 0.560000 3.7 1597.0 92.0 0.0 50.0 2.7 17.7 0.8
2021-12-31 14:14:00 3434.615385 17.8 4.425 0.550000 3.7 59.0 91.0 0.0 50.0 2.7 17.7 0.8
2021-12-31 14:15:00 3987.771659 17.8 4.425 0.550000 3.7 1563.0 91.0 0.0 50.0 2.7 17.7 0.8
2021-12-31 14:16:00 2744.000000 17.8 4.425 0.600000 3.7 1572.0 91.0 0.0 50.0 2.7 17.7 0.8
2021-12-31 14:17:00 2577.333333 17.8 4.425 0.600000 3.7 0.0 90.0 0.0 50.0 2.7 17.7 0.8

668683 rows × 12 columns

In [1102]:
# Get the values of the linky above 6 kV and creates 1 array with 1 if <6kV or 0
linky_binary = []
six_kV = []
for k in range(df_appa.shape[0]):
    if df_appa['linky'][k]>5500:
        print(round(df_appa['linky'][k]),'kV  at ',df_appa.index[k].strftime("%Y-%m-%d, %H:%M"))
        six_kV.append(df_appa.index[k])
        linky_binary.append(1)
    else:
        linky_binary.append(0)
5587.0 kV  at  2020-11-04, 09:32
6251.0 kV  at  2020-11-04, 09:33
5515.0 kV  at  2020-11-04, 09:34
6222.0 kV  at  2020-11-04, 09:35
6202.0 kV  at  2020-11-04, 09:36
5962.0 kV  at  2020-11-04, 09:37
6096.0 kV  at  2020-11-04, 09:38
6743.0 kV  at  2020-11-04, 09:39
6367.0 kV  at  2020-11-04, 09:40
6066.0 kV  at  2020-11-04, 09:41
5983.0 kV  at  2020-11-04, 09:42
5782.0 kV  at  2020-11-18, 10:52
5963.0 kV  at  2020-11-18, 10:53
5987.0 kV  at  2020-11-18, 10:54
6290.0 kV  at  2020-12-02, 13:47
6290.0 kV  at  2020-12-02, 13:48
In [1103]:
jours = {}
for date_six_kV in six_kV:
    j = date_six_kV.strftime("%Y-%m-%d, %H")
    
    if (j in jours.keys())==False: 
        jours[j] = {'value':df_appa.loc[date_six_kV]['linky'],'index':date_six_kV}
    else:
        if df_appa.loc[date_six_kV]['linky']>jours[j]['value']:
            jours[j] = {'value':df_appa.loc[date_six_kV]['linky'],'index':date_six_kV}
jours
Out[1103]:
{'2020-11-04, 09': {'value': 6743.333333333333,
  'index': Timestamp('2020-11-04 09:39:00', freq='T')},
 '2020-11-18, 10': {'value': 5986.8421052631575,
  'index': Timestamp('2020-11-18 10:54:00', freq='T')},
 '2020-12-02, 13': {'value': 6290.434782608696,
  'index': Timestamp('2020-12-02 13:47:00', freq='T')}}
In [1104]:
df_appa_10 = df_appa.drop('linky',axis='columns')
In [1105]:
day = jours[list(jours.keys())[0]]
filename = str(day['index'].strftime("%Y-%m-%d_"))
print(filename,day['value'],df_appa_10.loc[day['index']].sum())
ax = df_appa_10.loc[day['index']].plot.bar(title=str(day['index'].strftime("%Y-%m-%d, %H:%M:%S"))+' at '+str(round(day['value']))+ 'kV',figsize=(10,6),rot=15).get_figure().savefig(path+filename+analyse+"_.png")
df_appa.loc[day['index']].to_excel(path+filename+'.xlsx')
2020-11-04_ 6743.333333333333 4668.9
In [1106]:
day = jours[list(jours.keys())[1]]
filename = str(day['index'].strftime("%Y-%m-%d_"))
print(filename,day['value'],df_appa_10.loc[day['index']].sum())
ax = df_appa_10.loc[day['index']].plot.bar(title=str(day['index'].strftime("%Y-%m-%d, %H:%M:%S"))+' at '+str(round(day['value']))+ 'kV',figsize=(10,6),rot=15).get_figure().savefig(path+filename+analyse+"_.png")
df_appa.loc[day['index']].to_excel(path+filename+'.xlsx')
2020-11-18_ 5986.8421052631575 4888.766666666666
In [1107]:
day = jours[list(jours.keys())[2]]
filename = str(day['index'].strftime("%Y-%m-%d_"))
print(filename,day['value'],df_appa_10.loc[day['index']].sum())
ax = df_appa_10.loc[day['index']].plot.bar(title=str(day['index'].strftime("%Y-%m-%d, %H:%M:%S"))+' at '+str(round(day['value']))+ 'kV',figsize=(10,6),rot=15).get_figure().savefig(path+filename+analyse+"_.png")
df_appa.loc[day['index']].to_excel(path+filename+'.xlsx')
2020-12-02_ 6290.434782608696 5157.95
In [1066]:
for day in jours.values():
    filename = str(day['index'].strftime("%Y-%m-%d"))
    print(filename,day['value'],df_appa_10.loc[day['index']].sum())
    ax = df_appa_10.loc[day['index']].plot.bar(title=str(day['index'].strftime("%Y-%m-%d, %H:%M:%S"))+' at '+str(round(day['value']))+ 'kV',figsize=(10,6),rot=15).get_figure().savefig(path+filename+".png")
    df_appa.loc[day['index']].to_excel(path+filename+'.xlsx')
2020-11-04 6743.333333333333 4669.9
2020-11-18 5986.8421052631575 4889.766666666666
2020-12-02 6290.434782608696 5158.95
In [1108]:
def get_df_peak(df,day,minutes=4,threshold=100):
    t = df.loc[day['index']].name.strftime("%Y-%m-%d %H:%M:%S")
    minute = t[-5:-3]
    minute_10 = int(minute)-minutes
    t_10 = t[:-5]+str(minute_10)+t[-3:]
    df_peak = df.loc[pd.Timestamp(t_10):t]
    for appa in df_peak:
        if df_peak[appa].sum()<threshold:
            df_peak =df_peak.drop([appa],axis='columns')
            print('drop ->',appa)
    return df_peak

def get_peak_image(df_peak):
    indexs= list(df_peak.columns)
    dict_peak = {}
    for times in df_peak.index:
        ind_name=times.strftime("%Y-%m-%d %H:%M:%S")+' P='+str(round(df_appa.loc[times]['linky']))+'kV'
        dict_peak[ind_name]=list(df_peak.loc[times])
    df_peak = pd.DataFrame(dict_peak, index=indexs)
    ax = df_peak.plot.bar(rot=0,figsize=(10,6),fontsize="large")
    plt.legend(bbox_to_anchor=[0.45, 0.7],fontsize="large")
    plt.ylabel('puissance',fontsize="large")
    plt.savefig(path+"peak"+ind_name.split(' ')[0]+analyse+".png")
In [1109]:
df_peak= get_df_peak(df=df_appa_10,day=jours[list(jours.keys())[0]],minutes=2,threshold=20)
get_peak_image(df_peak)
drop -> Télé
drop -> homeciné
drop -> freebox
drop -> aspirateur
drop -> rasberry_kodi
In [1110]:
df_peak= get_df_peak(df=df_appa_10,day=jours[list(jours.keys())[1]],minutes=3,threshold=20)
get_peak_image(df_peak)
drop -> Télé
drop -> homeciné
drop -> freebox
drop -> lave_linge
drop -> aspirateur
drop -> rasberry_kodi
In [1111]:
df_peak= get_df_peak(df=df_appa_10,day=jours[list(jours.keys())[2]],minutes=4,threshold=20)
get_peak_image(df_peak)
drop -> Télé
drop -> homeciné
drop -> freebox
drop -> four
drop -> aspirateur
drop -> rasberry_kodi
In [1112]:
def get_classifier(df,label_name='linky_binary',max_depth=3):
    features=[]
    for i in range(df.shape[0]):
        ligne = []
        for col_name in df.columns:
            if col_name != label_name:
                ligne.append(df[col_name][i])
            else:
                pass
        features.append(ligne)
    classifier = DecisionTreeRegressor(random_state=0)
    #classifier=tree.DecisionTreeClassifier(random_state=0,criterion='entropy',max_depth=max_depth)
    classifier.fit(features,df[label_name])
    return classifier

def get_feature_importance(df,classifier,label_name='linky_binary'):   
    list_importances = classifier.feature_importances_
    print(list_importances)
    i=0
    feature_importance_DT = {'feature':[],'feature_importance':[]}
    for col_name in df.columns:
        if col_name != label_name:
            feature_importance_DT['feature'].append(col_name)
            feature_importance_DT['feature_importance'].append(round(list_importances[i],4))
            i+=1
    return pd.DataFrame.from_dict(feature_importance_DT).set_index('feature')
In [1119]:
df_appa_10['linky_binary'] = linky_binary
a=3
t = df_appa_10.loc[six_kV[-a]].name.strftime("%Y-%m-%d %H:%M:%S")
mois = t[5:7]
t_1,t_2 = t[:5]+str(int(mois)-1)+t[7:],t[:5]+str(int(mois)+1)+t[7:]
df_features = df_appa_10.loc[pd.Timestamp(t_1):pd.Timestamp(t_2)]
#df_features=df_features.drop(['freebox','freebox_TV','rasberry_kodi','homeciné','multimedia','Télé','aspirateur'],axis='columns')
for appa in df_features:
    if df_features[appa].max()<1000 and appa!='linky_binary':
        df_features =df_features.drop([appa],axis='columns')
df_features
Out[1119]:
four garage plaque_cuisson lave_linge linky_binary
time
2020-10-18 10:54:00 0.0 72.0 0.0 0.0 0
2020-10-18 10:55:00 0.0 72.0 0.0 0.0 0
2020-10-18 10:56:00 0.0 72.0 0.0 0.0 0
2020-10-18 10:57:00 0.0 71.0 0.0 0.0 0
2020-10-18 10:58:00 0.0 71.0 0.0 0.0 0
... ... ... ... ... ...
2020-12-18 10:50:00 1573.0 78.0 0.0 0.0 0
2020-12-18 10:51:00 1573.0 77.0 0.0 0.0 0
2020-12-18 10:52:00 1573.0 78.0 0.0 0.0 0
2020-12-18 10:53:00 1573.0 77.0 0.0 0.0 0
2020-12-18 10:54:00 1573.0 78.0 0.0 0.0 0

87841 rows × 5 columns

In [1120]:
df_features.plot(y='linky_binary')
Out[1120]:
<matplotlib.axes._subplots.AxesSubplot at 0x1f094f1e430>
In [1121]:
one = []
for bi in df_features['linky_binary']:
    if bi ==1:
        one.append(1)
len(one)
Out[1121]:
16
In [1122]:
tree_class = get_classifier(df=df_features,label_name='linky_binary',max_depth=3)
In [1123]:
feature_importance = get_feature_importance(df_features,tree_class)
feature_importance
[0.40722135 0.27619737 0.12209048 0.1944908 ]
Out[1123]:
feature_importance
feature
four 0.4072
garage 0.2762
plaque_cuisson 0.1221
lave_linge 0.1945
In [1124]:
feature_importance.plot.bar(rot=0)
plt.savefig(path+'features_imporantces.png')
In [1118]:
day=jours[list(jours.keys())[2]]
#%H:%M:%S
t = day['index'].strftime("%Y-%m-%d")
df_day = df_appa_10.loc[t+' 00:00:00':t+' 23:59:00']
df_day
Out[1118]:
multimedia Télé homeciné freebox four garage plaque_cuisson lave_linge aspirateur freebox_TV rasberry_kodi linky_binary
time
2020-12-02 00:00:00 12.6 0.0 0.590000 3.7 0.0 91.0 1967.0 63.0 2.7 17.7 3.500000 0
2020-12-02 00:01:00 12.6 0.0 0.557143 3.7 0.0 91.0 1967.0 63.0 2.7 17.7 3.500000 0
2020-12-02 00:02:00 12.6 0.0 0.600000 3.7 0.0 91.0 1967.0 63.0 2.7 17.7 3.500000 0
2020-12-02 00:03:00 12.6 0.0 0.600000 3.7 0.0 92.0 1967.0 63.0 2.7 17.7 3.500000 0
2020-12-02 00:04:00 12.6 0.0 0.600000 3.7 0.0 92.0 1967.0 63.0 2.7 17.7 3.500000 0
... ... ... ... ... ... ... ... ... ... ... ... ...
2020-12-02 23:55:00 12.6 0.0 0.560000 3.7 1665.0 91.0 0.0 805.0 2.7 17.8 2.666667 0
2020-12-02 23:56:00 12.6 0.0 0.560000 3.7 1665.0 91.0 0.0 805.0 2.7 17.8 2.666667 0
2020-12-02 23:57:00 12.6 0.0 0.560000 3.7 1665.0 91.0 0.0 805.0 2.7 17.8 2.666667 0
2020-12-02 23:58:00 12.6 0.0 0.560000 3.7 1665.0 91.0 0.0 805.0 2.7 17.8 2.666667 0
2020-12-02 23:59:00 12.6 0.0 0.560000 3.7 1665.0 91.0 0.0 805.0 2.7 17.8 2.666667 0

1440 rows × 12 columns

In [ ]: