import pandas as pd from common.log_utils import logFactory from common.database_utils import database_util from common import constant import pickle import numpy as np import matplotlib.pyplot as plt import seaborn as sns click_client = database_util.get_client() logger = logFactory("data analysis").log if __name__: # train_pos_data = pd.read_pickle('./data/pkl/train_pos_all.pkl') # train_neg_data = pd.read_pickle('./data/pkl/train_neg_all.pkl') # train_neg_data = pd.concat( # [train_neg_data, train_neg_data, train_neg_data, train_neg_data, train_neg_data, train_neg_data, train_neg_data, # train_neg_data, train_neg_data, train_neg_data], axis=0) # # train_neg_data['mark'] = 0 # train_pos_data['mark'] = 1 # # all_data = pd.concat([train_pos_data, train_neg_data], axis=0) all_data = pd.read_pickle('./data/pkl/analysize_all.pkl') # first_EVENT_FLUX_V # second_EVENT_FLUX_V # temp = pd.melt(all_data[['first_EVENT_FLUX_V', 'second_EVENT_FLUX_V', 'mark']], id_vars='mark', var_name="Features", # value_name="Values") # sns.violinplot( # x="Features", # y="Values", # hue="mark", # data=temp, # split=True, # palette='muted' # ) # plt.show() # # # first_EVENT_CONSUM_V # # second_EVENT_CONSUM_V # # all_data = all_data[(all_data.EVENT_FLUX_V < 100000) & (all_data.EVENT_FLUX_V > 0)] # temp = all_data[(all_data.first_EVENT_CONSUM_V < 300) & (all_data.second_EVENT_CONSUM_V < 300)] # temp = pd.melt(temp[['first_EVENT_CONSUM_V', 'second_EVENT_CONSUM_V', 'mark']], id_vars='mark', # var_name="Features", # value_name="Values") # sns.violinplot( # x="Features", # y="Values", # hue="mark", # data=temp, # split=True, # palette='muted' # ) # plt.show() # temp = all_data[(all_data.first_EVENT_VIDEO_FLUX_V < 100) & (all_data.second_EVENT_VIDEO_FLUX_V < 100)] # temp = pd.melt(temp[['first_EVENT_VIDEO_FLUX_V', 'second_EVENT_VIDEO_FLUX_V', 'mark']], id_vars='mark', # var_name="Features", # value_name="Values") # sns.violinplot( # x="Features", # y="Values", # hue="mark", # data=temp, # split=True, # palette='muted' # ) # plt.show() # temp = pd.melt(all_data[['first_MAvg_TOTAL_FLUX_1_3_zs', 'second_MAvg_TOTAL_FLUX_1_3_zs', # 'first_MPer1_TOTAL_FLUX_zs', 'second_MPer1_TOTAL_FLUX_zs', # 'mark']], id_vars='mark', # var_name="Features", # value_name="Values") # sns.violinplot( # x="Features", # y="Values", # hue="mark", # data=temp, # split=True, # palette='muted' # ) # sns.set(rc={'figure.figsize': (30, 30)}) # plt.show() # # temp = pd.melt(all_data[[ # 'first_MAvg_TOTAL_VIDEO_FLUX_1_3_zs', 'second_MAvg_TOTAL_VIDEO_FLUX_1_3_zs', # 'first_MPer1_TOTAL_VIDEO_FLUX_zs', 'second_MPer1_TOTAL_VIDEO_FLUX_zs', # 'mark']], id_vars='mark', # var_name="Features", # value_name="Values") # sns.violinplot( # x="Features", # y="Values", # hue="mark", # data=temp, # split=True, # palette='muted' # ) # sns.set(rc={'figure.figsize': (30, 30)}) # plt.show() # # temp = pd.melt(all_data[[ # 'first_MAvg_Flow_kuaishou_1_3_zs', 'second_MAvg_Flow_kuaishou_1_3_zs', # 'first_MPer1_Flow_kuaishou_zs', 'second_MPer1_Flow_kuaishou_zs', # 'first_Div_kuaishou_vFlux_1_3', 'second_Div_kuaishou_vFlux_1_3', # 'mark']], id_vars='mark', # var_name="Features", # value_name="Values") # sns.violinplot( # x="Features", # y="Values", # hue="mark", # data=temp, # split=True, # palette='muted' # ) # sns.set(rc={'figure.figsize': (30, 30)}) # plt.show() #, 'diff_EVENT_CONSUM_V', # 'diff_EVENT_VIDEO_FLUX_V', 'diff_kuaishou_use', temp = all_data[(all_data.diff_EVENT_FLUX_V < 50000) & (all_data.diff_EVENT_FLUX_V < 50000)] temp = pd.melt(all_data[[ 'diff_EVENT_FLUX_V', 'mark']], id_vars='mark', var_name="Features", value_name="Values") sns.violinplot( x="Features", y="Values", hue="mark", data=temp, split=True, palette='muted' ) plt.show() temp = all_data[(all_data.diff_EVENT_VIDEO_FLUX_V < 50) & (all_data.diff_EVENT_VIDEO_FLUX_V < 50)] temp = pd.melt(all_data[[ 'diff_EVENT_VIDEO_FLUX_V', 'mark']], id_vars='mark', var_name="Features", value_name="Values") sns.violinplot( x="Features", y="Values", hue="mark", data=temp, split=True, palette='muted' ) plt.show() temp = pd.melt(all_data[[ 'diff_kuaishou_use', 'mark']], id_vars='mark', var_name="Features", value_name="Values") sns.violinplot( x="Features", y="Values", hue="mark", data=temp, split=True, palette='muted' ) plt.show() pass