糖尿病康复,内容丰富有趣,生活中的好帮手!
糖尿病康复 > 天池大数据比赛 菜鸟仓库比赛 御膳房操作

天池大数据比赛 菜鸟仓库比赛 御膳房操作

时间:2021-08-31 19:34:09

相关推荐

天池大数据比赛 菜鸟仓库比赛 御膳房操作

御膳房--操作

表名 描述 来源 所属包 所属项目 操作

item_feature 商品粒度相关特征 天池 查看包 tianchi_data (tianchi_data) 已授权

config 每个商品在全国和分仓区域的补少... 天池 查看包 tianchi_data (tianchi_data) 已授权

item_store_feature 商品和分仓区域粒度相关特征 天池 查看包 tianchi_data (tianchi_data) 已授权

数据开发-工作流-工作流节点

select

count(*)

from tianchi_data.config;

select

*

from tianchi_data.config limit 10;

创建结果输出的表target

create table if not exists target (

item_id bigint,

store_code string,

target Double);

表管理-筛选-输入target-

创建分仓结果输出的测试表target2

create table if not exists target2 (

item_id bigint,

store_code string,

target Double);

把查询结果保存到表

insert overwrite table target2

select item_id,store_code,qty_alipay_njhs from tianchi_data.item_store_feature limit 10;

查询表结构

desc tianchi_data.item_feature

desc tianchi_data.item_store_feature

查询新建的表

select

item_id,

store_code,

target

from target2;

删除表中所有数据

truncate table target2

创建全国结果输出的测试表target3

create table if not exists target3 (

item_id bigint,

target Double);

统计前两周的和——查询

insert overwrite table target3

select item_id,sum(qty_alipay_njhs) from tianchi_data.item_feature where thedate >='1214' and thedate <='1227' group by item_id;

insert overwrite table target2

select item_id,store_code,sum(qty_alipay_njhs) from tianchi_data.item_store_feature where thedate>='1214' and thedate<='1227' group by item_id,store_code;

实验数据操作表

create table if not exists test1 (

item_id bigint,

store_code string,

target Double);

Case when 的使用方法

--简单Case函数

CASE sex

WHEN '1' THEN '男'

WHEN '2' THEN '女'

ELSE '其他' END

--Case搜索函数

CASE WHEN sex = '1' THEN '男'

WHEN sex = '2' THEN '女'

ELSE '其他' END

---case a when b ---如果a=b返回then后面的结果

insert overwrite table test1

select item_id,case 100 when 100 then 'all' end ,target from target3 ;

查看

select count(*) from target

1389442

1189442

将test1数据追加到target2

insert into table target2

select item_id,store_code,target from test1

测试

select item_id,store_code,target from target2 where item_id=4

生成最终结果

insert into table target

select item_id,store_code,target from target2

UNION 操作符用于合并两个或多个 SELECT 语句的结果集。如果允许重复的值,请使用 UNION ALL

SELECT column_name(s) FROM table_name1

UNION ALL

SELECT column_name(s) FROM table_name2

mvn打包进入项目目录执行命令D:\eclipseHadoop1x\hadoop-test-indigoVersion-space\TianChiMapreduce

mvn clean package

配置请全部在base.mapred.xml中完成

MapOnly程序在Map阶段直接针对每一条输入记录进行处理并输出,后续无Combiner和Reducer。要编写MapOnly程序,只要做如下两点即可:

将base.mapred.xml中的Combiner和Reducer注释或删除

Mapper中要直接输出outputRecord,不要像普通MR程序输出Map的中间结果

如何读取本地文件

ODPS的MR程序不允许直接使用本地IO,如有资源文件,可放在src/main/resources/下,然后通过TaskContext的readResourceFileAsStream方法读取。

示例:

// 假设资源文件为src/main/resources/data.txt

String resourceFileName = "src/main/resources/data.txt";

BufferedInputStream bis = new BufferedInputStream(this.getClass().getClassLoader().getResourceAsStream(resourceFileName));

BufferedReader = new BufferedReader(new InputStreamReader(bis));

实验数据操作表

create table if not exists test2 (

item_id bigint,

store_code string,

target Double);

增加测试数据

insert into table test2

select item_id,store_code,target from target limit 20

输出结果

create table if not exists result (

str1 string,

str2 string);

参考示例

drop table if exists result;

create table if not exists result (str1 string,str2 string);

insert into table result

select

myudf(item_id) as (item_id)

from

(

select

*

from test2 tb2

where item_id=4

) tb1;

参考示例

SELECT

myudf(user_id,item_id,behavior_type,time,'-12-18',1) AS (user_id,item_id,time,feature) //返回的结果重命--方便插入其他表字段对应

FROM

(

SELECT

*

FROM

t_mj_p_user tb2 //tb2必须有防止报错

WHERE

time < '-12-18'

DISTRIBUTE BY user_id,item_id

SORT BY user_id,item_id,time DESC

) tb1; //tb1必须有防止报错

create table if not exists cn_submit (

item_id bigint,

store_code string,

target Double);

insert into table cn_submit

select * from target;

create table if not exists all_feature2 (

item_id bigint,

store_code string,

1w Double,

2w Double,

3w Double,

1m Double,

2m Double,

1d Double,

2d Double,

3d Double);

insert into table all_feature2

select item_id,case 100 when 100 then 'all' end as store_code,sum(dd),sum(dd2),sum(dd3),sum(dd4),sum(dd5),sum(dd6),sum(dd7),sum(dd8) from (

select item_id,sum(qty_alipay_njhs) as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate >='1221' and thedate <='1227' group by item_id

union all

select item_id,case when 1=1 then 0 end as dd,sum(qty_alipay_njhs) as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate >='1214' and thedate <='1227' group by item_id

union all

select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,sum(qty_alipay_njhs) as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate >='1207' and thedate <='1227' group by item_id

union all

select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,sum(qty_alipay_njhs) as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate >='1130' and thedate <='1227' group by item_id

union all

select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,sum(qty_alipay_njhs) as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate >='1102' and thedate <='1227' group by item_id

union all

select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,sum(qty_alipay_njhs) as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate ='1227' group by item_id

union all

select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,sum(qty_alipay_njhs) as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate ='1226' group by item_id

union all

select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,sum(qty_alipay_njhs) as dd8 from tianchi_data.item_feature where thedate ='1225' group by item_id )

t group by item_id;

线性回归预测模型存放的地址

create table if not exists test3 (

item_id bigint,

store_code string,

target Double);

ceil取上整函数 floor向下取整 log取对数

insert into table test3

select item_id,store_code,floor(prediction_score) from test4;

insert into table test3

select item_id,store_code,sum(qty_alipay_njhs) from tianchi_data.item_store_feature where thedate>='1214' and thedate<='1227' group by item_id,store_code;

truncate table cn_submit;

insert into table cn_submit

select * from test3;

创建时间列表

create table if not exists datalist as

SELECT distinct(thedate) FROM tianchi_data.item_feature where thedate >='1020' and thedate <='1227' order by thedate limit 500;

清楚表重新生成

truncate table datalist;

insert into table datalist

SELECT distinct(thedate) FROM tianchi_data.item_feature where thedate >='1020' and thedate <='1227' order by thedate limit 500;

查询

select * from datalist;

create table if not exists datalist2 as

select thedate ,myudf() from datalist;

select * from datalist2;

创建要连接的表

create table if not exists item_feature as

select item_id,qty_alipay_njhs,thedate from tianchi_data.item_feature where thedate >='1020' and thedate <='1227'

表连接

create table if not exists item_feature2 as

select /* + mapjoin(a) */

a._c1 as tab,

b.item_id,

b.thedate,

b.qty_alipay_njhs

from datalist2 a join item_feature b

on a.thedate = b.thedate;

select * from item_feature2 where item_id='222784' order by thedate

按ceiling(tab/14)分组--31维度的特征

create table if not exists item_feature4 as

select item_id,ceil(tab/14) as tab1,sum(qty_alipay_njhs) as sum1 from item_feature2 group by item_id,ceil(tab/14)

特征建立组

create table if not exists item_feature5 as

create table if not exists test3_9 as

select item_id,store_code,target from test3 order by item_id;

truncate table cn_submit;

insert into table cn_submit

select * from test3_9;

insert into table cn_submit

select item_id,store_code,sum(qty_alipay_njhs) from tianchi_data.item_store_feature where thedate>='1214' and thedate<='1227' group by item_id,store_code;

模型组合

create table if not exists test3_91 as

select item_id,case 100 when 100 then 'all' end as store_code,floor(0.9*target) from target3;

truncate table cn_submit;

insert into table cn_submit

select * from test3_9;

insert into table cn_submit

select item_id,store_code,sum(qty_alipay_njhs) from tianchi_data.item_store_feature where thedate>='1214' and thedate<='1227' group by item_id,store_code;

将全国最大值缩小200 总结果1389442个 max1结果1389435个

少7个值

create table if not exists max1 as

select * from target2 where item_id not in ('985273') ;

将七个值取出

create table if not exists max1_7 as

select * from target2 where item_id ='985273';

将其中6个放入max1 结果1389441个

insert into table max1

select * from max1_7 where store_code not in ('all');

select count(*) from max1

将最大值减去200

select item_id,store_code,target-200 as target from max1_7 where item_id ='985273' and store_code='all';

提交结果

truncate table cn_submit;

insert into table cn_submit

select * from max1;

insert into table cn_submit

select item_id,store_code,target-200 as target from max1_7 where item_id ='985273' and store_code='all';

查看特征得知受12.25影响特别大

'985273'

将全国最大值0.97 总结果1389442个 max1结果1389435个

少7个值

create table if not exists max2 as

select * from target2 where item_id not in ('985273') ;

将七个值取出

create table if not exists max2_7 as

select * from target2 where item_id ='985273';

提交结果

truncate table cn_submit;

insert into table cn_submit

select * from max2;

insert into table cn_submit

select item_id,store_code,floor(0.97*target) as target from max2_7;

查看特征得知受12.25影响特别大 3d > (1d+2d)==>3d > 2*(1d+2d)

create table if not exists top50x as

select item_id,store_code,2w from all_feature2 where 3d > (1d+2d) order by 2w desc limit 50

create table if not exists max50 as

select * from target2 where item_id not in (

select item_id from top50x

) ;

50x7=350 1389442-350=1389092

将350个值取出

create table if not exists max50_7 as

select * from target2 where item_id in (select item_id from top50x);

提交结果

truncate table cn_submit;

insert into table cn_submit

select * from max50;

insert into table cn_submit

select item_id,store_code,floor(0.97*target) as target from max50_7;

查看特征得知受12.25影响特别大 3d > (1d+2d)==>3d > 2*(1d+2d)

create table if not exists top1000x as

select item_id,store_code,2w from all_feature2 where 3d > (1d+2d) order by 2w desc limit 1000;

create table if not exists max1000 as

select * from target2 where item_id not in (

select item_id from top1000x

) ;

将值取出

create table if not exists max1000_7 as

select * from target2 where item_id in (select item_id from top1000x);

提交结果

truncate table cn_submit;

insert into table cn_submit

select * from max1000;

insert into table cn_submit

select item_id,store_code,floor(0.95*target) as target from max1000_7;

平滑12.25后提交前两周和

create table if not exists pinghua3 as

select item_id,qty_alipay_njhs,thedate from tianchi_data.item_feature where thedate not in ('1225');

insert into table pinghua3

select item_id,qty_alipay_njhs,case when 1=1 then '1225' end as thedate from tianchi_data.item_feature where thedate ='1226';

create table if not exists pinghua2 as

select item_id,store_code,qty_alipay_njhs,thedate from tianchi_data.item_store_feature where thedate not in ('1225');

insert into table pinghua2

select item_id,store_code,qty_alipay_njhs,case when 1=1 then '1225' end as thedate from tianchi_data.item_store_feature where thedate ='1226';

truncate table cn_submit;

insert into table cn_submit

select item_id,store_code,sum(qty_alipay_njhs) from pinghua2 where thedate>='1214' and thedate<='1227' group by item_id,store_code;

insert into table cn_submit

select item_id,case when 1=1 then 'all' end as store_code,sum(qty_alipay_njhs) from pinghua3 where thedate>='1214' and thedate<='1227' group by item_id;

如果觉得《天池大数据比赛 菜鸟仓库比赛 御膳房操作》对你有帮助,请点赞、收藏,并留下你的观点哦!

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。