-- 插入正样本ID 6月 insert into l_pos_uuid_06 select THEMONTH_V, uuid from ads.Z_USER_TAG_FLAT_out_202106 where uuid in ( select DISTINCT uuid from ads.Z_USER_TAG_FLAT_out_202107 where THEMONTH_V = '202107' and EVENT_CPNAME_C = '快手' and EVENT_ORDER_MONTH_C = '订购1月内' ) and EVENT_VIDEO_FLUX_V is not null and EVENT_CATEGORYNAME_C not in ('电商卡', '无限畅视'); -- 109927 select count(1) from l_pos_uuid_06; -- 插入正样本ID 7月 insert into l_pos_uuid_07 select THEMONTH_V, uuid from ads.Z_USER_TAG_FLAT_out_202107 where uuid in ( select DISTINCT uuid from ads.Z_USER_TAG_FLAT_out_202108 where THEMONTH_V = '202108' and EVENT_CPNAME_C = '快手' and EVENT_ORDER_MONTH_C = '订购1月内' ) and EVENT_VIDEO_FLUX_V is not null and EVENT_CATEGORYNAME_C not in ('电商卡', '无限畅视'); -- 24213 select count(1) from l_pos_uuid_07; -- 插入负样本ID 6月 insert into l_neg_uuid_06 select t0.THEMONTH_V,t0.uuid from (select THEMONTH_V, uuid from Z_USER_TAG_FLAT_out_202106 t1 where t1.uuid not in (select uuid from l_pos_uuid_06) and t1.EVENT_VIDEO_FLUX_V is not null and t1.EVENT_CONSUM_V < 150 and t1.EVENT_FLUX_C != '30G以上' and t1.EVENT_VIDEO_FLUX_C != '30G以上' AND t1.TAG_NETTYPE_C != '2G' and EVENT_CATEGORYNAME_C not in ('电商卡', '无限畅视') and position(arrayStringConcat(EVENT_APP_USE.C, ','), '快手') != 0) t0 where t0.uuid in (select DISTINCT uuid from ads.Z_USER_TAG_FLAT_out_202107 where THEMONTH_V = '202107' and EVENT_CPNAME_C != '快手' and EVENT_VIDEO_FLUX_V is not null and EVENT_CONSUM_V < 150 and EVENT_FLUX_C != '30G以上' and EVENT_VIDEO_FLUX_C != '30G以上' AND TAG_NETTYPE_C != '2G' and EVENT_CATEGORYNAME_C not in ('电商卡', '无限畅视') and position(arrayStringConcat(EVENT_APP_USE.C, ','), '快手') != 0 ); select count() from l_neg_uuid_06; -- 插入负样本ID 7月 insert into l_neg_uuid_07 select t0.THEMONTH_V,t0.uuid from (select THEMONTH_V, uuid from Z_USER_TAG_FLAT_out_202107 t1 where t1.uuid not in (select uuid from l_pos_uuid_07) and t1.EVENT_VIDEO_FLUX_V is not null and t1.EVENT_CONSUM_V < 150 and t1.EVENT_FLUX_C != '30G以上' and t1.EVENT_VIDEO_FLUX_C != '30G以上' AND t1.TAG_NETTYPE_C != '2G' and EVENT_CATEGORYNAME_C not in ('电商卡', '无限畅视') and position(arrayStringConcat(EVENT_APP_USE.C, ','), '快手') != 0) t0 where t0.uuid in (select DISTINCT uuid from ads.Z_USER_TAG_FLAT_out_202108 where THEMONTH_V = '202108' and EVENT_CPNAME_C != '快手' and EVENT_VIDEO_FLUX_V is not null and EVENT_CONSUM_V < 150 and EVENT_FLUX_C != '30G以上' and EVENT_VIDEO_FLUX_C != '30G以上' AND TAG_NETTYPE_C != '2G' and EVENT_CATEGORYNAME_C not in ('电商卡', '无限畅视') and position(arrayStringConcat(EVENT_APP_USE.C, ','), '快手') != 0 ); ------------------------------------------------------------- -- 插入正样本原始数据 insert into l_pos_origin_06 select *, length(t1.EVENT_APP_USE.C) as I_appuse from (select * from Z_USER_TAG_FLAT_out_202106) t1 where t1.uuid in (select uuid from l_pos_uuid_06) order by I_appuse desc limit 1 by uuid,t1.EVENT_SPNAME_C; -- 插入负样本原始数据 insert into l_neg_origin_06 select *, length(t1.EVENT_APP_USE.C) as I_appuse from (select * from Z_USER_TAG_FLAT_out_202106 where EVENT_CONSUM_C != '10元以下' and EVENT_VIDEO_FLUX_C != '未知' and TAG_NETTYPE_C != '2G' and position(arrayStringConcat(EVENT_APP_USE.C, ','), '快手') != 0 ) t1 where t1.uuid in (select uuid from l_neg_uuid_06) order by I_appuse desc limit 1 by uuid,t1.EVENT_SPNAME_C; ------------------------------------- 后续用python脚本处理