data_join.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. import pandas as pd
  2. from common.log_utils import logFactory
  3. from common.database_utils import database_util
  4. from tqdm import tqdm
  5. click_client = database_util.get_client()
  6. logger = logFactory("data gen").log
  7. if __name__ == "__main__":
  8. index = 0
  9. fetch_size = 1000000
  10. # part1
  11. # for i in tqdm(range(0, 15)):
  12. # t1_start = i * fetch_size
  13. # for j in tqdm(range(0, 74)):
  14. # try:
  15. # t2_start = j * fetch_size
  16. # sql = f"insert into l_neg_origin_06 " \
  17. # f"select t2.*,length(t2.EVENT_APP_USE.C) as I_appuse,{i} as PART from (select * from l_neg_uuid_06 limit {t1_start},{fetch_size}) t1 inner join (select * from Z_USER_TAG_FLAT_out_202106 limit {t2_start},{fetch_size}) t2 on t1.uuid==t2.uuid order by I_appuse desc limit 1 by uuid,EVENT_SPNAME_C"
  18. # # logger.info(sql)
  19. # click_client.execute(sql)
  20. # except Exception as e:
  21. # logger.info(f"第{i}大循环的第{j}小循环出错")
  22. # logger.info(str(e))
  23. # continue
  24. # part2
  25. # for i in tqdm(range(15, 30)):
  26. # t1_start = i * fetch_size
  27. # for j in tqdm(range(0, 74)):
  28. # try:
  29. # t2_start = j * fetch_size
  30. # sql = f"insert into l_neg_origin_06 " \
  31. # f"select t2.*,length(t2.EVENT_APP_USE.C) as I_appuse,{i} as PART from (select * from l_neg_uuid_06 limit {t1_start},{fetch_size}) t1 inner join (select * from Z_USER_TAG_FLAT_out_202106 limit {t2_start},{fetch_size}) t2 on t1.uuid==t2.uuid order by I_appuse desc limit 1 by uuid,EVENT_SPNAME_C"
  32. # # logger.info(sql)
  33. # click_client.execute(sql)
  34. # except Exception as e:
  35. # logger.info(f"第{i}大循环的第{j}小循环出错")
  36. # logger.info(str(e))
  37. # continue
  38. #
  39. # # part3
  40. for i in tqdm(range(30, 47)):
  41. t1_start = i * fetch_size
  42. for j in tqdm(range(0, 74)):
  43. try:
  44. t2_start = j * fetch_size
  45. sql = f"insert into l_neg_origin_06 " \
  46. f"select t2.*,length(t2.EVENT_APP_USE.C) as I_appuse,{i} as PART from (select * from l_neg_uuid_06 limit {t1_start},{fetch_size}) t1 inner join (select * from Z_USER_TAG_FLAT_out_202106 limit {t2_start},{fetch_size}) t2 on t1.uuid==t2.uuid order by I_appuse desc limit 1 by uuid,EVENT_SPNAME_C"
  47. # logger.info(sql)
  48. click_client.execute(sql)
  49. except Exception as e:
  50. logger.info(f"第{i}大循环的第{j}小循环出错")
  51. logger.info(str(e))
  52. continue
  53. pass