hive多表联合查询(GroupLens->Users,Movies,Rat

系统 3336 0

hive (UserMovieRating)> create table if not exists Users (
                      > UserID int comment 'user id',
                      > Gender string comment 'user sex',
                      > Age int comment '1:Under 18,18:18-24,25:25-34,35:35-44,45:45-49,50:50-55,56:56+',
                      > Occupation int comment '0-20 represents different jobs',
                      > ZipCode string comment 'your home zip code')
                      > row format delimited fields terminated by '\t'
                      > stored as textfile;
OK
Time taken: 0.249 seconds
hive (UserMovieRating)> load data local inpath '/home/landen/MahoutTest/users.txt' overwrite into table Users;
Copying data from file:/home/landen/MahoutTest/users.txt
Copying file: file:/home/landen/MahoutTest/users.txt
Loading data to table usermovierating.users
Deleted hdfs://Master:9000/home/landen/UntarFile/hive-0.10.0/warehouse/usermovierating.db/users
Table usermovierating.users stats: [num_partitions: 0, num_files: 1, num_rows: 0, total_size: 110208, raw_data_size: 0]
OK
Time taken: 0.745 seconds
hive (UserMovieRating)> select * from Users limit 10;
OK
userid    gender    age    occupation    zipcode
1    F    1    10    48067
2    M    56    16    70072
3    M    25    15    55117
4    M    45    7    02460
5    M    25    20    55455
6    F    50    9    55117
7    M    35    1    06810
8    M    25    12    11413
9    M    25    17    61614
10    F    35    1    95370
Time taken: 0.096 seconds

hive (UserMovieRating)> create table if not exists Movies (            
                      > MovieID int comment 'movie id',               
                      > MovieName string comment 'movie name',        
                      > ReleasedDate int comment 'released year',     
                      > MovieType string comment 'movie type')        
                      > row format delimited fields terminated by '\t'
                      > stored as textfile;                           
OK
Time taken: 0.183 seconds
hive (UserMovieRating)> show tables;
OK
tab_name
movies
users
Time taken: 0.083 seconds

hive (UserMovieRating)> load data local inpath '/home/landen/MahoutTest/Processedmovies.txt' overwrite into table Movies;
Copying data from file:/home/landen/MahoutTest/Processedmovies.txt
Copying file: file:/home/landen/MahoutTest/Processedmovies.txt
Loading data to table usermovierating.movies
Deleted hdfs://Master:9000/home/landen/UntarFile/hive-0.10.0/warehouse/usermovierating.db/movies
Table usermovierating.movies stats: [num_partitions: 0, num_files: 1, num_rows: 0, total_size: 155900, raw_data_size: 0]
OK
Time taken: 0.695 seconds
hive (UserMovieRating)> select * from Movies limit 10;
OK
movieid    moviename    releaseddate    movietype
1    Toy Story    1995    Animation,Children's,Comedy
2    Jumanji    1995    Adventure,Children's,Fantasy
3    Grumpier Old Men    1995    Comedy,Romance
4    Waiting to Exhale    1995    Comedy,Drama
5    Father of the Bride Part II    1995    Comedy
6    Heat    1995    Action,Crime,Thriller
7    Sabrina    1995    Comedy,Romance
8    Tom and Huck    1995    Adventure,Children's
9    Sudden Death    1995    Action
10    GoldenEye    1995    Action,Adventure,Thriller
Time taken: 0.095 seconds

hive (UserMovieRating)> create table if not exists Rating (                      
                      > UserID int comment 'user id',                           
                      > MovieID int comment 'movie id(1-3952)',                 
                      > Rating int comment 'Ratings are made on a 5-star scale',
                      > RatingTime string comment 'user rates time')            
                      > row format delimited fields terminated by '\t'          
                      > stored as textfile;                                     
OK
Time taken: 1.3 seconds
hive (UserMovieRating)> load data local inpath '/home/landen/MahoutTest/ProcessedRating.txt' overwrite into table Rating;
Copying data from file:/home/landen/MahoutTest/ProcessedRating.txt
Copying file: file:/home/landen/MahoutTest/ProcessedRating.txt
Loading data to table usermovierating.rating
Deleted hdfs://Master:9000/home/landen/UntarFile/hive-0.10.0/warehouse/usermovierating.db/rating
Table usermovierating.rating stats: [num_partitions: 0, num_files: 1, num_rows: 0, total_size: 21593504, raw_data_size: 0]
OK
Time taken: 3.293 seconds
hive (UserMovieRating)> select * from Rating limit 10;
OK
userid    movieid    rating    ratingtime
1    1193    5    978300760
1    661    3    978302109
1    914    3    978301968
1    3408    4    978300275
1    2355    5    978824291
1    1197    3    978302268
1    1287    5    978302039
1    2804    5    978300719
1    594    4    978302268
1    919    4    978301368
Time taken: 0.658 seconds
hive (UserMovieRating)> describe users;
OK
col_name    data_type    comment
userid    int    user id
gender    string    user sex
age    int    1:Under 18,18:18-24,25:25-34,35:35-44,45:45-49,50:50-55,56:56+
occupation    int    0-20 represents different jobs
zipcode    string    your home zip code
Time taken: 0.514 seconds
hive (UserMovieRating)> describe movies;
OK
col_name    data_type    comment
movieid    int    movie id
moviename    string    movie name
releaseddate    int    released year
movietype    string    movie type
Time taken: 0.085 seconds
hive (UserMovieRating)> describe ratings;
FAILED: SemanticException [Error 10001]: Table not found ratings
hive (UserMovieRating)> describe rating;
OK
col_name    data_type    comment
userid    int    user id
movieid    int    movie id(1-3952)
rating    int    Ratings are made on a 5-star scale
ratingtime    string    user rates time
Time taken: 0.121 seconds
Users,Movies,Rating三表联合查询:

hive (UserMovieRating)> select u.userid,u.occupation,m.moviename,r.rating
                                   > from rating r   
                                   > join users u on r.userid = u.userid
                                   > join movies m on r.movieid = m.movieid;

hive多表联合查询(GroupLens->Users,Movies,Ratings表)


更多文章、技术交流、商务合作、联系博主

微信扫码或搜索:z360901061

微信扫一扫加我为好友

QQ号联系: 360901061

您的支持是博主写作最大的动力,如果您喜欢我的文章,感觉我的文章对您有帮助,请用微信扫描下面二维码支持博主2元、5元、10元、20元等您想捐的金额吧,狠狠点击下面给点支持吧,站长非常感激您!手机微信长按不能支付解决办法:请将微信支付二维码保存到相册,切换到微信,然后点击微信右上角扫一扫功能,选择支付二维码完成支付。

【本文对您有帮助就好】

您的支持是博主写作最大的动力,如果您喜欢我的文章,感觉我的文章对您有帮助,请用微信扫描上面二维码支持博主2元、5元、10元、自定义金额等您想捐的金额吧,站长会非常 感谢您的哦!!!

发表我的评论
最新评论 总共0条评论