import sys import datetime for line in sys.stdin: line = line.strip() userid, movieid, rating, unixtime = line.split(‘\t‘) weekday = datetime.datetime.fromtimestamp(float(unixtime)).isoweekday() print ‘\t‘.join([userid, movieid, rating, str(weekday)])
Use the mapper script:
CREATE TABLE u_data_new ( userid INT, movieid INT, rating INT, weekday INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘\t‘; add FILE weekday_mapper.py; INSERT OVERWRITE TABLE u_data_new SELECT TRANSFORM (userid, movieid, rating, unixtime) USING ‘python weekday_mapper.py‘ AS (userid, movieid, rating, weekday) FROM u_data; SELECT weekday, COUNT(*) FROM u_data_new GROUP BY weekday;
- FROM (
- MAP doctext USING ‘python wc_mapper.py‘ AS (word, cnt)
- FROM docs
- CLUSTER BY word
- ) a
- REDUCE word, cnt USING ‘python wc_reduce.py‘;
时间: 2024-12-29 06:14:17