代码拉取完成,页面将自动刷新
# -*- coding: UTF-8 -*-
import os
import findspark
findspark.init()
from pyspark import SparkConf, SparkContext
from pyspark.sql import SparkSession
from pyspark.sql.functions import udf
from pyspark.sql.types import StringType
import pyspark.sql.functions as F
os.environ['JAVA_HOME'] = '/usr/java/jdk1.8.0_181-cloudera'
spark_session = SparkSession.builder.master("local[*]").appName("hive_test_1") \
.config("hive.metastore.uris", "thrift://127.0.0.1:9083") \
.enableHiveSupport().getOrCreate()
import time
jdsql= """
(SELECT
id,
jd_id,
resume_id,1
FROM
rcn_prod.t_delivery_order
WHERE EXISTS ( SELECT 1 FROM rcn_prod.t_delivery_order_operation WHERE rcn_prod.t_delivery_order.id = rcn_prod.t_delivery_order_operation.order_id AND rcn_prod.t_delivery_order_operation.operation_type = 401 )
)UNION
(SELECT
id,
jd_id,
resume_id,0
FROM
rcn_prod.t_delivery_order
WHERE EXISTS ( SELECT 1 FROM rcn_prod.t_delivery_order_operation WHERE rcn_prod.t_delivery_order.id = rcn_prod.t_delivery_order_operation.order_id AND rcn_prod.t_delivery_order_operation.operation_type = 402 )
)
"""
s = time.time()
cv = spark_session.sql(jdsql)
print('order', cv.count())
print(time.time() - s)
cv.toPandas().to_csv('all_train_order_hive.csv')
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。