data_analysis_0213.py 1.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. import pandas as pd
  2. from common.log_utils import logFactory
  3. from common.database_utils import database_util
  4. from common import constant
  5. import pickle
  6. import numpy as np
  7. import matplotlib.pyplot as plt
  8. import seaborn as sns
  9. click_client = database_util.get_client()
  10. logger = logFactory("data analysis").log
  11. if __name__:
  12. total_pos = pd.read_pickle("data_pos_2013.pkl")
  13. total_neg = pd.read_pickle("data_neg_2013.pkl")
  14. total_pos['mark'] = 0
  15. total_neg['mark'] = 1
  16. total_train_data = pd.concat([total_pos, total_neg], axis=0)
  17. t0 = total_train_data[['EVENT_CONSUM_V', 'mark']]
  18. t0 = t0[t0.EVENT_CONSUM_V <= 100]
  19. all_data = pd.melt(t0, id_vars='mark', var_name="Features",
  20. value_name="Values")
  21. sns.violinplot(
  22. x="Features",
  23. y="Values",
  24. hue="mark",
  25. data=all_data,
  26. split=False,
  27. palette='muted'
  28. )
  29. plt.show()
  30. t1 = total_train_data[['EVENT_VIDEO_FLUX_V', 'mark']]
  31. t1 = t1[t1.EVENT_VIDEO_FLUX_V <= 50]
  32. all_data = pd.melt(t1, id_vars='mark', var_name="Features",
  33. value_name="Values")
  34. sns.violinplot(
  35. x="Features",
  36. y="Values",
  37. hue="mark",
  38. data=all_data,
  39. split=False,
  40. palette='muted'
  41. )
  42. plt.show()