import pandas as pd from common.log_utils import logFactory from common.database_utils import database_util from common import constant import pickle import numpy as np import matplotlib.pyplot as plt import seaborn as sns click_client = database_util.get_client() logger = logFactory("data analysis").log if __name__: total_pos = pd.read_pickle("data_pos_2013.pkl") total_neg = pd.read_pickle("data_neg_2013.pkl") total_pos['mark'] = 0 total_neg['mark'] = 1 total_train_data = pd.concat([total_pos, total_neg], axis=0) t0 = total_train_data[['EVENT_CONSUM_V', 'mark']] t0 = t0[t0.EVENT_CONSUM_V <= 100] all_data = pd.melt(t0, id_vars='mark', var_name="Features", value_name="Values") sns.violinplot( x="Features", y="Values", hue="mark", data=all_data, split=False, palette='muted' ) plt.show() t1 = total_train_data[['EVENT_VIDEO_FLUX_V', 'mark']] t1 = t1[t1.EVENT_VIDEO_FLUX_V <= 50] all_data = pd.melt(t1, id_vars='mark', var_name="Features", value_name="Values") sns.violinplot( x="Features", y="Values", hue="mark", data=all_data, split=False, palette='muted' ) plt.show()