Python/Pandas
Pandas _ 이상치 제외 방법
MINSU KANG
2021. 1. 27. 22:01
df_list = []
for i in range(2015, 2018):
df_list.append(
pd.read_csv("{}.csv".format(i))
)
df = pd.concat(df_list)
df = df.dropna()
df['rtn'] = df['price2']/df['price'] - 1
# 이상치 제거
for col in df.columns:
if col not in ['ticker', 'price2', 'price', 'trn']:
mu = df[col].mean()
std = df[col].std()
cond1 = mu - 2*std <= df[col]
cond2 = df[col] <= mu + 2*std
df = df[cond1 & cond2]