import pandas as pd from sqlalchemy import create_engine # 连接到数据库 engine = create_engine('mysql+pymysql://user:password@localhost/database') # 获取所有表格的名称 with engine.connect() as conn, conn.begin(): tables = conn.execute("SHOW TABLES").fetchall() # 遍历所有表格 for table in tables: table_name = table[0] table_name_quoted = '' + table_name + '' # 检查是否存在名为'a'的列,如果不存在则添加'a'和'b'列 with engine.connect() as conn, conn.begin(): a_column = conn.execute("SHOW COLUMNS FROM " + table_name_quoted + " LIKE 'a'").fetchone() if a_column is None: conn.execute("ALTER TABLE " + table_name_quoted + " ADD COLUMN a DECIMAL(10,2)") conn.execute("ALTER TABLE " + table_name_quoted + " ADD COLUMN b DECIMAL(10,2)") # 查询net_mf_amount列的数据 query = "SELECT trade_date, net_mf_amount FROM " + table_name_quoted + " ORDER BY trade_date DESC" df = pd.read_sql_query(query, engine) # 计算a和b列 a_column = [] b_column = [] for i in range(len(df)): if i == 0: a_column.append(None) b_column.append(None) else: if pd.notnull(df.iloc[i]['net_mf_amount']) and pd.notnull(df.iloc[i-1]['net_mf_amount']): if i-2 >= 0: if pd.notnull(df.iloc[i-2]['net_mf_amount']): a = df.iloc[i]['net_mf_amount'] - df.iloc[i-1]['net_mf_amount'] b = df.iloc[i]['net_mf_amount'] - df.iloc[i-2]['net_mf_amount'] a_column.append(a) b_column.append(b) else: j = i-3 while j >= 0: if pd.notnull(df.iloc[j]['net_mf_amount']): a = df.iloc[i]['net_mf_amount'] - df.iloc[i-1]['net_mf_amount'] b = df.iloc[i]['net_mf_amount'] - df.iloc[j]['net_mf_amount'] a_column.append(a) b_column.append(b) break j -= 1 else: a = df.iloc[i]['net_mf_amount'] - df.iloc[i-1]['net_mf_amount'] b = None a_column.append(a) b_column.append(b) else: a_column.append(None) b_column.append(None) # 将结果保存到数据库 with engine.connect() as conn, conn.begin(): for i in range(len(df)): conn.execute("UPDATE " + table_name_quoted + " SET a=%s, b=%s WHERE trade_date=%s", (a_column[i], b_column[i], df.iloc[i]['trade_date'])) # 关闭连接 engine.dispose() 有5000个表格,使用多线程,线程池
时间: 2024-02-11 20:08:31 浏览: 148
可以使用Python内置的`concurrent.futures`模块实现多线程处理。具体实现可以参考以下代码:
```
import concurrent.futures
def process_table(table_name):
table_name_quoted = '' + table_name + ''
with engine.connect() as conn, conn.begin():
a_column = conn.execute("SHOW COLUMNS FROM " + table_name_quoted + " LIKE 'a'").fetchone()
if a_column is None:
conn.execute("ALTER TABLE " + table_name_quoted + " ADD COLUMN a DECIMAL(10,2)")
conn.execute("ALTER TABLE " + table_name_quoted + " ADD COLUMN b DECIMAL(10,2)")
query = "SELECT trade_date, net_mf_amount FROM " + table_name_quoted + " ORDER BY trade_date DESC"
df = pd.read_sql_query(query, engine)
a_column = []
b_column = []
for i in range(len(df)):
if i == 0:
a_column.append(None)
b_column.append(None)
else:
if pd.notnull(df.iloc[i]['net_mf_amount']) and pd.notnull(df.iloc[i-1]['net_mf_amount']):
if i-2 >= 0:
if pd.notnull(df.iloc[i-2]['net_mf_amount']):
a = df.iloc[i]['net_mf_amount'] - df.iloc[i-1]['net_mf_amount']
b = df.iloc[i]['net_mf_amount'] - df.iloc[i-2]['net_mf_amount']
a_column.append(a)
b_column.append(b)
else:
j = i-3
while j >= 0:
if pd.notnull(df.iloc[j]['net_mf_amount']):
a = df.iloc[i]['net_mf_amount'] - df.iloc[i-1]['net_mf_amount']
b = df.iloc[i]['net_mf_amount'] - df.iloc[j]['net_mf_amount']
a_column.append(a)
b_column.append(b)
break
j -= 1
else:
a = df.iloc[i]['net_mf_amount'] - df.iloc[i-1]['net_mf_amount']
b = None
a_column.append(a)
b_column.append(b)
else:
a_column.append(None)
b_column.append(None)
with engine.connect() as conn, conn.begin():
for i in range(len(df)):
conn.execute("UPDATE " + table_name_quoted + " SET a=%s, b=%s WHERE trade_date=%s", (a_column[i], b_column[i], df.iloc[i]['trade_date']))
print("Processed table:", table_name)
if __name__ == '__main__':
engine = create_engine('mysql+pymysql://user:password@localhost/database')
with engine.connect() as conn, conn.begin():
tables = conn.execute("SHOW TABLES").fetchall()
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
executor.map(process_table, [table[0] for table in tables])
engine.dispose()
```
在`main`函数中,首先获取所有表格的名称,然后使用`ThreadPoolExecutor`创建一个最大线程数为10的线程池。使用`map`方法将`process_table`函数和表格名称列表传入,线程池会自动分配任务并执行。每个线程都会执行`process_table`函数,对一张表格进行处理。
在`process_table`函数中,首先检查是否存在名为'a'的列,如果不存在则添加'a'和'b'列。然后从数据库中查询数据到`DataFrame`中,并对每一行数据计算'a'和'b'列的值。最后将结果更新到数据库中。每个线程在处理完一张表格后,会输出一条信息,表示该表格处理完成。
阅读全文