Skip to content

Commit

Permalink
修复只有横截面算子时计算特慢的bug
Browse files Browse the repository at this point in the history
  • Loading branch information
wukan1986 committed Jul 3, 2024
1 parent e2405a4 commit f6139b4
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 13 deletions.
2 changes: 1 addition & 1 deletion expr_codegen/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.7.2"
__version__ = "0.7.3"
10 changes: 7 additions & 3 deletions expr_codegen/pandas/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def get_groupby_from_tuple(tup, func_name):

def symbols_to_code(syms, alias):
a = [f"{s}" for s in syms]
b = [f"r'{alias.get(s, s)}'" for s in syms]
b = [f"'{alias.get(s, s)}'" for s in syms]
return f"""_ = ({','.join(b)},)
({','.join(a)},) = _"""

Expand All @@ -46,7 +46,7 @@ def codegen(exprs_ldl: ListDictList, exprs_src, syms_dst,
# polars风格代码
funcs = {}
# 分组应用代码。这里利用了字典按插入顺序排序的特点,将排序放在最前
groupbys = {'sort': 'df = df'}
groupbys = {'sort': ''}
# 处理过后的表达式
exprs_dst = []
syms_out = []
Expand All @@ -70,9 +70,13 @@ def codegen(exprs_ldl: ListDictList, exprs_src, syms_dst,
syms_out.append(va)

if k[0] == TS:
groupbys['sort'] = f'df = df.sort_values(by=[_DATE_, _ASSET_]).reset_index(drop=True)'
if len(groupbys['sort']) == 0:
groupbys['sort'] = f'df = df.sort_values(by=[_ASSET_, _DATE_]).reset_index(drop=True)'
# 时序需要排序
func_code = [f' df = df.sort_values(by=[_DATE_])'] + func_code
elif k[0] == CS:
if len(groupbys['sort']) == 0:
groupbys['sort'] = f'df = df.sort_values(by=[_DATE_, _ASSET_]).reset_index(drop=True)'

# polars风格代码列表
funcs[func_name] = '\n'.join(func_code)
Expand Down
16 changes: 9 additions & 7 deletions expr_codegen/polars/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,21 @@ def get_groupby_from_tuple(tup, func_name):
if prefix2 == TS:
# 组内需要按时间进行排序,需要维持顺序
prefix2, asset = tup
return f'df = df.group_by(_ASSET_).map_groups({func_name})'
return f'df = df.sort(_ASSET_, _DATE_).group_by(_ASSET_).map_groups({func_name})'
if prefix2 == CS:
prefix2, date = tup
return f'df = df.group_by(_DATE_).map_groups({func_name})'
return f'df = df.sort(_DATE_).group_by(_DATE_).map_groups({func_name})'
if prefix2 == GP:
prefix2, date, group = tup
return f'df = df.group_by(_DATE_, "{group}").map_groups({func_name})'
return f'df = df.sort(_DATE_, "{group}").group_by(_DATE_, "{group}").map_groups({func_name})'

return f'df = {func_name}(df)'


def symbols_to_code(syms, alias):
a = [f"{s}" for s in syms]
b = [f"r'{alias.get(s, s)}'" for s in syms]
b = [f"r'{alias.get(s, s)}'" for s in syms] #
b = [f"'{alias.get(s, s)}'" for s in syms]
return f"""_ = ({','.join(b)},)
({','.join(a)},) = (pl.col(i) for i in _)"""

Expand All @@ -46,7 +47,7 @@ def codegen(exprs_ldl: ListDictList, exprs_src, syms_dst,
# polars风格代码
funcs = {}
# 分组应用代码。这里利用了字典按插入顺序排序的特点,将排序放在最前
groupbys = {'sort': 'df = df'}
groupbys = {'sort': ''}
# 处理过后的表达式
exprs_dst = []
syms_out = []
Expand Down Expand Up @@ -80,9 +81,10 @@ def codegen(exprs_ldl: ListDictList, exprs_src, syms_dst,
func_code = func_code[1:]

if k[0] == TS:
groupbys['sort'] = f'df = df.sort(by=[_DATE_, _ASSET_])'
# if len(groupbys['sort']) == 0:
# groupbys['sort'] = f'df = df.sort(_ASSET_, _DATE_)'
# 时序需要排序
func_code = [f' df = df.sort(by=[_DATE_])'] + func_code
func_code = [f' df = df.sort(_DATE_)'] + func_code

# polars风格代码列表
funcs[func_name] = '\n'.join(func_code)
Expand Down
3 changes: 1 addition & 2 deletions expr_codegen/polars/template.py.j2
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# this code is auto generated by the expr_codegen
# https://github.com/wukan1986/expr_codegen
# 此段代码由 expr_codegen 自动生成,欢迎提交 issue 或 pull request
import re

import numpy as np # noqa
import pandas as pd # noqa
Expand Down Expand Up @@ -68,7 +67,7 @@ def main(df: pl.DataFrame) -> pl.DataFrame:
# logger.info('done')

# save
# df.write_parquet('output.parquet', compression='zstd')
# df.write_parquet('output.parquet')

return df

Expand Down

0 comments on commit f6139b4

Please sign in to comment.