1 anno fa · 8f55ecaeb3
--- a/etl_bill_detail.py
+++ b/etl_bill_detail.py
@@ -21,6 +21,48 @@ if debug:
 
				 else:
			
 
				     debug_condition = ''
			
 
				 
			
 
				+cols = {
			
 
				+    'tenant_id': [NVARCHAR(10), '租户ID'],
			
 
				+    'dept_id': [NVARCHAR(64), '房源所属门店ID'],
			
 
				+    'dept_name': [NVARCHAR(50), '房源所属门店名称'],
			
 
				+    'house_address': [NVARCHAR(100), '房源地址'],
			
 
				+    'community_id': [NVARCHAR(64), '房源所属项目/小区ID'],
			
 
				+    'community_name': [NVARCHAR(50), '房源所属项目/小区'],
			
 
				+    'house_area': [DECIMAL(10, 2), '房源面积'],
			
 
				+    'contract_dept_id': [NVARCHAR(64), '合同所属门店/部门ID'],
			
 
				+    'contract_dept_name': [NVARCHAR(50), '合同所属门店/部门'],
			
 
				+    'contract_id': [NVARCHAR(64), '合同ID'],
			
 
				+    'contract_no': [NVARCHAR(64), '合同编号'],
			
 
				+    'contract_medium': [NVARCHAR(50), '合同类型'],
			
 
				+    'renter_name': [NVARCHAR(50), '租客姓名'],
			
 
				+    'renter_phone': [NVARCHAR(20), '租客手机号'],
			
 
				+    'renter_id_type': [NVARCHAR(50), '租客证件类型'],
			
 
				+    'renter_id_number': [NVARCHAR(50), '租客证件号码'],
			
 
				+    'sign_type': [NVARCHAR(50), '成交方式'],
			
 
				+    'contract_begin_date': [Date, '合同开始日期'],
			
 
				+    'contract_end_date': [Date, '合同结束日期'],
			
 
				+    'payment_method': [NVARCHAR(50), '付款方式'],
			
 
				+    'sign_date': [Date, '签约日期'],
			
 
				+    'signer_id': [NVARCHAR(64), '签约人ID'],
			
 
				+    'signer_name': [NVARCHAR(50), '签约人'],
			
 
				+    'pay_months': [INTEGER, '付几'],
			
 
				+    'deposit_months': [INTEGER, '押几'],
			
 
				+    'bill_id': [NVARCHAR(64), '账单ID'],
			
 
				+    'bill_detail_id': [NVARCHAR(64), '账单明细ID'],
			
 
				+    'fee_subject_id': [NVARCHAR(64), '费用科目ID'],
			
 
				+    'fee_subject_label': [NVARCHAR(64), '费用科目标签'],
			
 
				+    'fee_subject_name': [NVARCHAR(64), '费用科目名称'],
			
 
				+    'day': [Date, '日期'],
			
 
				+    'kind': [NVARCHAR(10), '类型'],
			
 
				+    'money': [DECIMAL(14, 2), '金额'],
			
 
				+    'reject_time': [Date, '退租时间'],
			
 
				+    'reject_name': [NVARCHAR(64), '退租处理人'],
			
 
				+    'reject_payment_account': [NVARCHAR(64), '退款账号'],
			
 
				+    'reject_payment_account_type': [NVARCHAR(32), '退款途径'],
			
 
				+    'reject_reason': [NVARCHAR(64), '退租原因'],
			
 
				+    'is_apportion': [INTEGER, '是否分摊']
			
 
				+}
			
 
				+
			
 
				 def query_total(conn) -> int:
			
 
				     query = """
			
 
				         select 
			
@@ -35,16 +77,29 @@ def extract(conn, batch_size, i) -> pd.DataFrame:
 
				     """
			
 
				     query = """
			
 
				         select 
			
 
				-            bd.tenant_id, rc.id 'contract_id', dept.id 'dept_id', dept.name 'dept_name', rc.sign_emp_id 'emp_id', emp.name 'emp_name',
			
 
				+			bd.tenant_id, 
			
 
				+            hhb.dept_id 'dept_id', house_sd.name 'dept_name', hhr.address 'house_address',
			
 
				+            hhb.community_id 'community_id', hc.name 'community_name', hhr.house_area 'house_area',
			
 
				+            crc_sd.id 'contract_dept_id', crc_sd.name 'contract_dept_name',
			
 
				+            rc.id 'contract_id', rc.contract_no 'contract_no', rc.contract_medium 'contract_medium',
			
 
				+            cri.name 'renter_name', cri.phone 'renter_phone', cri.certification_type 'renter_id_type', cri.identity_card 'renter_id_number',
			
 
				+            rc.sign_type 'sign_type', rc.begin_time 'contract_begin_date', rc.end_time 'contract_end_date',  rc.pay_pattern 'payment_method',
			
 
				+            rc.sign_time 'sign_date', rc.sign_emp_id 'signer_id', sign_emp.name 'signer_name', rc.periodMonth 'pay_months', rc.depositMonth 'deposit_months',
			
 
				             bd.bill_id, bd.id as 'bill_detail_id', bd.fee_subject_id, sd.label as 'fee_subject_label', sd.name as 'fee_subject_name', 
			
 
				+            0 'splitter',
			
 
				             bd.fee_direction, bd.original_money, bd.occurred_money,
			
 
				             bd.begin_time, bd.end_time, bd.is_occur, rc.cancel_info, bd.predict_time
			
 
				         from yuxin_finance.fin_finance_bill_detail bd
			
 
				         left join yuxin_setting.setting_dictionary sd on sd.id=bd.fee_subject_id
			
 
				         left join yuxin_contract.cont_renter_contract rc on rc.id=bd.biz_id
			
 
				-        left join yuxin_setting.setting_employee_dept ed on ed.emp_id=rc.maintainer_id and ed.is_delete=0
			
 
				-        left join yuxin_setting.setting_employee_info emp on emp.id=rc.sign_emp_id and emp.is_delete=0
			
 
				-        left join yuxin_setting.setting_department dept on dept.id=ed.dept_id and dept.is_delete=0
			
 
				+        left join yuxin_house.hse_house_room hhr on hhr.is_delete=0 and hhr.id=rc.house_id
			
 
				+        left join yuxin_house.hse_house_base hhb on hhb.is_delete=0 and hhb.id=rc.house_id
			
 
				+        left join yuxin_setting.setting_department house_sd on house_sd.id=hhb.dept_id and house_sd.is_delete=0
			
 
				+        left join yuxin_house.hse_community hc on hc.id=hhb.community_id and hc.is_delete=0
			
 
				+        left join yuxin_setting.setting_employee_dept crc_ed on crc_ed.emp_id=rc.maintainer_id and crc_ed.is_delete=0
			
 
				+        left join yuxin_setting.setting_department crc_sd on crc_sd.id=crc_ed.dept_id and crc_sd.is_delete=0
			
 
				+        left join yuxin_contract.cont_renter_info cri on cri.is_delete=0 and cri.customer_type=1 and cri.contract_id=bd.biz_id
			
 
				+        left join yuxin_setting.setting_employee_info sign_emp on sign_emp.id=rc.sign_emp_id and sign_emp.is_delete=0
			
 
				         where bd.is_valid=1 and bd.is_delete=0 and bd.biz_type=2 {debug_condition}
			
 
				         limit {batch_size} offset {offset}
			
 
				         """.format(batch_size=batch_size, offset=i, debug_condition=debug_condition)
			
@@ -58,7 +113,8 @@ def transform(data) -> pd.DataFrame:
 
				         --- 指标：金额(尾差保留在最后一日中)
			
 
				      """
			
 
				     # target columns
			
 
				-    columns = list(data.columns[:11])
			
 
				+    last_splitter_index = list(data.columns).index('splitter')
			
 
				+    columns = list(data.columns[:last_splitter_index])
			
 
				     columns.extend(['day', 'kind', 'money',
			
 
				                     'reject_time',
			
 
				                     'reject_name',
			
@@ -67,6 +123,7 @@ def transform(data) -> pd.DataFrame:
 
				                     'reject_reason',
			
 
				                     'is_apportion',
			
 
				                     ])
			
 
				+
			
 
				     # target data
			
 
				     df = pd.DataFrame(columns=columns)
			
 
				 
			
@@ -208,34 +265,23 @@ def load(conn, df: pd.DataFrame, target_db) -> None:
 
				     """
			
 
				     
			
 
				     # Define the column types for the table
			
 
				-    dtype = {
			
 
				-        'tenant_id': NVARCHAR(10),
			
 
				-        'contract_id': NVARCHAR(64),
			
 
				-        'dept_id': NVARCHAR(64),
			
 
				-        'dept_name': NVARCHAR(50),
			
 
				-        'emp_id': NVARCHAR(64),
			
 
				-        'emp_name': NVARCHAR(50),
			
 
				-        'bill_id': NVARCHAR(64),
			
 
				-        'bill_detail_id': NVARCHAR(64),
			
 
				-        'fee_subject_id': NVARCHAR(64),
			
 
				-        'fee_subject_label': NVARCHAR(64),
			
 
				-        'fee_subject_name': NVARCHAR(64),
			
 
				-        'day': Date,
			
 
				-        'kind': NVARCHAR(10),
			
 
				-        'money': DECIMAL(14,2),
			
 
				-        'reject_time': Date,
			
 
				-        'reject_name': NVARCHAR(64),
			
 
				-        'reject_payment_account': NVARCHAR(64),
			
 
				-        'reject_payment_account_type': NVARCHAR(32),
			
 
				-        'reject_reason': NVARCHAR(64),
			
 
				-        'is_apportion': INTEGER,
			
 
				-    }
			
 
				+    dtypes = {key: value[0] for key, value in cols.items()}
			
 
				     # create target table with df.dtypes
			
 
				     df.to_sql(target_db, con=conn, if_exists='append',
			
 
				-              index=False, dtype=dtype)
			
 
				+              index=False, dtype=dtypes)
			
 
				 
			
 
				+    # add columns' comment into table
			
 
				     pass
			
 
				 
			
 
				+def update_column_comment(conn, target_db):
			
 
				+    for key, value in cols.items():
			
 
				+        dtype = value[0]
			
 
				+        if dtype == Date:
			
 
				+            dtype = 'date'
			
 
				+        elif dtype == INTEGER:
			
 
				+            dtype = 'INT'
			
 
				+        conn.execute(text('alter table {target_db} modify column `{col}` {dtype} comment \'{comment}\''
			
 
				+                          .format(target_db=target_db, col=key, dtype=dtype, comment=value[1])))
			
 
				 
			
 
				 def etl():
			
 
				     config = load_config()
			
@@ -273,5 +319,6 @@ def etl():
 
				                 print(data.head())
			
 
				             load(conn, data, target_db)
			
 
				 
			
 
				+        update_column_comment(conn, target_db)
			
 
				 
			
 
				 etl()