代码拉取完成,页面将自动刷新
同步操作将从 深度学习/天池_道路通行时间预测LSTM 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
# -*- coding: utf-8 -*-
"""
Created on Fri Jul 28 13:32:26 2017
@author: xuanlei
"""
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
import math
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import normalize
import matplotlib as mpl
#==============================================================================
# Data Preprocessing
#==============================================================================
data_dir = 'C:\\Users\\www\Desktop\\SDATA\\code\\data\\'
def data_load():
globals()['wtno_num'] = os.listdir(data_dir)
for item in wtno_num:
if item == 'gy_contest_link_traveltime_training_data.txt':
print('>>>>>>>>>>>>开始读取'+item+'<<<<<<<<<<<<<<<<<')
globals()[item[:-4]] = pd.read_table(data_dir+item,sep = ';',header=0)
else:
print('>>>>>>>>>>>>开始读取'+item+'<<<<<<<<<<<<<<<<<')
globals()[item[:-4]] = pd.read_csv(data_dir+item,sep = ';',header=0)
print('>>>>>>>>>>>>读取完成<<<<<<<<<<<<<<<<<')
#==============================================================================
# Data Extract:
# row: time
# cloumns: link_time
# shape(len(time),132)
#==============================================================================
def get_all_data(travel_data):
travel_data.index = travel_data.time_interval
globals()['temp_data'] = travel_data.sort_values(by = 'time_interval')
sort_data = globals()['temp_data'].drop(['time_interval','date'],axis = 1)
linkid = list(set(list(sort_data['link_ID'])))
linkid.sort()
times = list(set(list(sort_data.index)))
times.sort()
all_data = []
i = 0
for t in times:
# time.sleep(0.001)
t_list = []
tempt = sort_data.loc[t]
for link in linkid:
if len(tempt[tempt.link_ID==link].values) != 0:
t_list.append(tempt[tempt.link_ID==link].values[0][1])
else:
t_list.append('nan')
i+=1
print('>>>>>>>>>>>>完成第{0}条,{1}提取<<<<<<<<<<<<<<<<<'.format(i,t))
all_data.append(t_list)
globals()['temp'] = all_data
print('>>>>>>>>>>>>完成全部提取:%d<<<<<<<<<<<<<<<<<'%i)
df = pd.DataFrame(all_data)
df.columns = [str(x) for x in linkid]
df.index = times
return df,df.columns
#==============================================================================
# Data Extract: fill na
#==============================================================================
def deal_na(df):
dfna = df.replace('nan',float('nan'))
dfna = dfna.fillna(method='pad')
df_result = dfna.dropna()
return df_result
#==============================================================================
# Data Extract:
# 生成时间序列的滑窗数据,格式为[[[],[]],[[],[]],...[[],[]]]
# 滑窗参数有两个一个是步长gap,决定滑窗之间的间隔;另一个是num,决定滑窗内的数据条数
#==============================================================================
def get_window_data(df,num,gap):
rows = df.shape[0]
window_num = math.floor(rows/gap)
index = 0
result_list = []
for i in range(window_num-1):
window_data = df.iloc[index:index+num+1,:]
tran_window_data = window_data.iloc[0:-1,:]
tran_window_label = window_data.iloc[1:,:]
if (tran_window_data.shape[0]==num)&(tran_window_label.shape[0]==30):
tran_result_data = [tran_window_data, tran_window_label]
index += gap
result_list.append(tran_result_data)
return result_list
#==============================================================================
# start function
#==============================================================================
def start():
data_load()
print('>>>>>>>>>>>>完成数据加载<<<<<<<<<<<<<<<<<')
df,fl = get_all_data(gy_contest_link_traveltime_training_data)
print('>>>>>>>>>>>>完成数据抽取<<<<<<<<<<<<<<<<<')
dfna = deal_na(df)
print('>>>>>>>>>>>>完成数据填充<<<<<<<<<<<<<<<<<')
result = get_window_data(dfna,30,2) #num和gap分别取30,2
print('>>>>>>>>>>>>完成数据滑窗生成<<<<<<<<<<<<<<<<<')
return result,list(fl)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。