Python/Pandas

Pandas _ 이상치 제외 방법

MINSU KANG 2021. 1. 27. 22:01


df_list = []
for i in range(2015, 2018):
    df_list.append(
        pd.read_csv("{}.csv".format(i))
    )

df = pd.concat(df_list)
df = df.dropna()

df['rtn'] = df['price2']/df['price'] - 1

# 이상치 제거

for col in df.columns:
    if col not in ['ticker', 'price2', 'price', 'trn']:
        mu = df[col].mean()
        std = df[col].std()
        
        cond1 = mu - 2*std <= df[col]
        cond2 = df[col] <= mu + 2*std
        
        df = df[cond1 & cond2]