123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171 |
- import pandas as pd
- from common.log_utils import logFactory
- from common.database_utils import database_util
- from common import constant
- import pickle
- import numpy as np
- import matplotlib.pyplot as plt
- import seaborn as sns
- click_client = database_util.get_client()
- logger = logFactory("data analysis").log
- if __name__:
- # train_pos_data = pd.read_pickle('./data/pkl/train_pos_all.pkl')
- # train_neg_data = pd.read_pickle('./data/pkl/train_neg_all.pkl')
- # train_neg_data = pd.concat(
- # [train_neg_data, train_neg_data, train_neg_data, train_neg_data, train_neg_data, train_neg_data, train_neg_data,
- # train_neg_data, train_neg_data, train_neg_data], axis=0)
- #
- # train_neg_data['mark'] = 0
- # train_pos_data['mark'] = 1
- #
- # all_data = pd.concat([train_pos_data, train_neg_data], axis=0)
- all_data = pd.read_pickle('./data/pkl/analysize_all.pkl')
- # first_EVENT_FLUX_V
- # second_EVENT_FLUX_V
- # temp = pd.melt(all_data[['first_EVENT_FLUX_V', 'second_EVENT_FLUX_V', 'mark']], id_vars='mark', var_name="Features",
- # value_name="Values")
- # sns.violinplot(
- # x="Features",
- # y="Values",
- # hue="mark",
- # data=temp,
- # split=True,
- # palette='muted'
- # )
- # plt.show()
- #
- # # first_EVENT_CONSUM_V
- # # second_EVENT_CONSUM_V
- # # all_data = all_data[(all_data.EVENT_FLUX_V < 100000) & (all_data.EVENT_FLUX_V > 0)]
- # temp = all_data[(all_data.first_EVENT_CONSUM_V < 300) & (all_data.second_EVENT_CONSUM_V < 300)]
- # temp = pd.melt(temp[['first_EVENT_CONSUM_V', 'second_EVENT_CONSUM_V', 'mark']], id_vars='mark',
- # var_name="Features",
- # value_name="Values")
- # sns.violinplot(
- # x="Features",
- # y="Values",
- # hue="mark",
- # data=temp,
- # split=True,
- # palette='muted'
- # )
- # plt.show()
- # temp = all_data[(all_data.first_EVENT_VIDEO_FLUX_V < 100) & (all_data.second_EVENT_VIDEO_FLUX_V < 100)]
- # temp = pd.melt(temp[['first_EVENT_VIDEO_FLUX_V', 'second_EVENT_VIDEO_FLUX_V', 'mark']], id_vars='mark',
- # var_name="Features",
- # value_name="Values")
- # sns.violinplot(
- # x="Features",
- # y="Values",
- # hue="mark",
- # data=temp,
- # split=True,
- # palette='muted'
- # )
- # plt.show()
- # temp = pd.melt(all_data[['first_MAvg_TOTAL_FLUX_1_3_zs', 'second_MAvg_TOTAL_FLUX_1_3_zs',
- # 'first_MPer1_TOTAL_FLUX_zs', 'second_MPer1_TOTAL_FLUX_zs',
- # 'mark']], id_vars='mark',
- # var_name="Features",
- # value_name="Values")
- # sns.violinplot(
- # x="Features",
- # y="Values",
- # hue="mark",
- # data=temp,
- # split=True,
- # palette='muted'
- # )
- # sns.set(rc={'figure.figsize': (30, 30)})
- # plt.show()
- #
- # temp = pd.melt(all_data[[
- # 'first_MAvg_TOTAL_VIDEO_FLUX_1_3_zs', 'second_MAvg_TOTAL_VIDEO_FLUX_1_3_zs',
- # 'first_MPer1_TOTAL_VIDEO_FLUX_zs', 'second_MPer1_TOTAL_VIDEO_FLUX_zs',
- # 'mark']], id_vars='mark',
- # var_name="Features",
- # value_name="Values")
- # sns.violinplot(
- # x="Features",
- # y="Values",
- # hue="mark",
- # data=temp,
- # split=True,
- # palette='muted'
- # )
- # sns.set(rc={'figure.figsize': (30, 30)})
- # plt.show()
- #
- # temp = pd.melt(all_data[[
- # 'first_MAvg_Flow_kuaishou_1_3_zs', 'second_MAvg_Flow_kuaishou_1_3_zs',
- # 'first_MPer1_Flow_kuaishou_zs', 'second_MPer1_Flow_kuaishou_zs',
- # 'first_Div_kuaishou_vFlux_1_3', 'second_Div_kuaishou_vFlux_1_3',
- # 'mark']], id_vars='mark',
- # var_name="Features",
- # value_name="Values")
- # sns.violinplot(
- # x="Features",
- # y="Values",
- # hue="mark",
- # data=temp,
- # split=True,
- # palette='muted'
- # )
- # sns.set(rc={'figure.figsize': (30, 30)})
- # plt.show()
- #, 'diff_EVENT_CONSUM_V',
- # 'diff_EVENT_VIDEO_FLUX_V', 'diff_kuaishou_use',
- temp = all_data[(all_data.diff_EVENT_FLUX_V < 50000) & (all_data.diff_EVENT_FLUX_V < 50000)]
- temp = pd.melt(all_data[[
- 'diff_EVENT_FLUX_V',
- 'mark']], id_vars='mark',
- var_name="Features",
- value_name="Values")
- sns.violinplot(
- x="Features",
- y="Values",
- hue="mark",
- data=temp,
- split=True,
- palette='muted'
- )
- plt.show()
- temp = all_data[(all_data.diff_EVENT_VIDEO_FLUX_V < 50) & (all_data.diff_EVENT_VIDEO_FLUX_V < 50)]
- temp = pd.melt(all_data[[
- 'diff_EVENT_VIDEO_FLUX_V',
- 'mark']], id_vars='mark',
- var_name="Features",
- value_name="Values")
- sns.violinplot(
- x="Features",
- y="Values",
- hue="mark",
- data=temp,
- split=True,
- palette='muted'
- )
- plt.show()
- temp = pd.melt(all_data[[
- 'diff_kuaishou_use',
- 'mark']], id_vars='mark',
- var_name="Features",
- value_name="Values")
- sns.violinplot(
- x="Features",
- y="Values",
- hue="mark",
- data=temp,
- split=True,
- palette='muted'
- )
- plt.show()
- pass
|