加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
ctr5.py 1.66 KB
一键复制 编辑 原始数据 按行查看 历史
pan 提交于 2020-12-03 16:47 . init
# import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
# import lightgbm as lgbm
# import _pickle as pickle
import time
from lightgbm.sklearn import LGBMClassifier
from sklearn.model_selection import GridSearchCV
from scipy.sparse import csr_matrix
import warnings
warnings.filterwarnings("ignore")
begin_time = time.time()
def timer(s=0):
global begin_time
if s == 1:
begin_time = time.time()
else:
print(time.time() - begin_time)
X_train = pd.read_pickle("pkl/X_train")
X_train = X_train.sample(n=2000000, random_state=0, axis=0)
print("X_train read")
X_train = csr_matrix(X_train)
print("CSR")
y_train = pd.read_pickle("pkl/y_train")
y_train = y_train.sample(n=2000000, random_state=0, axis=0)
MAX_ROUNDS = 10000
kfold = StratifiedKFold(n_splits=3, shuffle=True, random_state=3)
params = {'boosting_type': 'gbdt',
'objective': 'binary',
'n_jobs': -1,
'learning_rate': 0.1,
'n_estimators': 863,
'max_depth': 7,
'num_leaves': 63,
'max_bin': 127, #2^6,原始特征为整数,很少超过100
'subsample': 0.7,
'bagging_freq': 1,
'colsample_bytree': 0.7,
'verbose=': -1
}
lg = LGBMClassifier(silent=False, **params)
min_child_samples_s = range(10,50,10)
tuned_parameters = dict( min_child_samples = min_child_samples_s)
grid_search2 = GridSearchCV(lg, n_jobs= -1, param_grid=tuned_parameters, cv = kfold, scoring="neg_log_loss", verbose=-1, refit = False)
grid_search2.fit(X_train , y_train)
fo = open("res.txt", "a+")
fo.write("min_child_samples: " + str(grid_search2.best_params_) + "\n")
fo.close()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化