hive> CREATE TABLE wordlist (word STRING, year INT, wordcount INT, pagecount INT, bookcount INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
hive> LOAD DATA LOCAL INPATH '/inputfile' OVERWRITE INTO TABLE wordlist;
hive> CREATE TABLE wordlengths (wordlength INT, wordcount INT);
hive> INSERT OVERWRITE TABLE wordlengths SELECT length(word), wordcount FROM wordlist;
hive> SELECT wordlength, sum(wordcount) FROM wordlengths group by wordlength;
Word Count in Pig :
Lines = load './input.txt' AS (line:chararray);
--TOKENIZE splits the line into a bag of words
--FLATTEN produces a separate record for each item from a bag
Words = foreach Lines generate flatten(TOKENIZE(line)) as word;
---group records togather by each words
Groups = Group words by word;
--Counts words
Counts= foreach Groups generate group,COUNT(Words);
--store the results
store Counts into './wordcount';
---use functions and UDF in uppercase -it's case sensitive
Word Count in
No comments:
Post a Comment