COGROUP
A = load 'data4'
using PigStorage(' ') as (a1:int,a2:int);
B = load 'data5' using PigStorage(' ') as (a3:int,a4:int);
C = COGROUP A by a1,B by a4;
Dump C;
B = load 'data5' using PigStorage(' ') as (a3:int,a4:int);
C = COGROUP A by a1,B by a4;
Dump C;
Group BY
A = load 'data3'
using PigStorage(' ') as (a1:chararray,a2:int,a3:int);
B = Group A by a2;
C = foreach B generate group,COUNT(A);
Dump C;
B = Group A by a2;
C = foreach B generate group,COUNT(A);
Dump C;
CROSS
A = load 'data4'
using PigStorage(' ') as (a1:int,a2:int);
B = load 'data5' using PigStorage(' ') as (a3:int,a4:int);
C = CROSS A,B;
Dump C;
B = load 'data5' using PigStorage(' ') as (a3:int,a4:int);
C = CROSS A,B;
Dump C;
DISTINCT
A = load 'data7'
using PigStorage(' ') as (a1:int,a2:int,a3:int);
B = DISTINCT A;
Dump B;
B = DISTINCT A;
Dump B;
Filter
A = load 'data7'
using PigStorage(' ') as (a1:int,a2:int,a3:int);
B = Filter A by a2==5;
Dump B;
B = Filter A by a2==5;
Dump B;
COGROUP
A = load 'data4'
using PigStorage(' ') as (a1:int,a2:int);
B = load 'data5' using PigStorage(' ') as (a3:int,a4:int);
C = COGROUP A by a1,B by a4;
D = Foreach C generate group,FLATTEN (A);
Dump D;
B = load 'data5' using PigStorage(' ') as (a3:int,a4:int);
C = COGROUP A by a1,B by a4;
D = Foreach C generate group,FLATTEN (A);
Dump D;
A = load 'data3'
using PigStorage(' ') as (a1:chararray,a2:int,a3:int);
B = Group A by a2;
Describe B;
Dump B;
C = foreach B generate group,Count(A);
Dump C;
B = Group A by a2;
Describe B;
Dump B;
C = foreach B generate group,Count(A);
Dump C;
JOIN
A = load 'data4'
using PigStorage(' ') as (a1:int,a2:int);
B = load 'data5' using PigStorage(' ') as (a3:int,a4:int);
C = JOIN A by a1,B by a4;
Dump C;
B = load 'data5' using PigStorage(' ') as (a3:int,a4:int);
C = JOIN A by a1,B by a4;
Dump C;
LIMIT
A = load 'data6'
using PigStorage(' ') as (a1:int,a2:int,a3:int,a4:int,a5:int);
B = LIMIT A 3;
Dump B;
B = LIMIT A 3;
Dump B;
Order by
A = load 'data6'
using PigStorage(' ') as (a1:int,a2:int,a3:int,a4:int,a5:int);
B = Order A by $1;
Dump B;
B = Order A by $1;
Dump B;
SPLIT
A = load 'data4'
using PigStorage(' ') as (a1:int,a2:int);
SPLIT A into B if a1==2, C if a1==3;
Dump B;
Dump C;
SPLIT A into B if a1==2, C if a1==3;
Dump B;
Dump C;
UNION
A = load 'data4'
using PigStorage(' ') as (a1:int,a2:int);
B = load 'data5' using PigStorage(' ') as (a3:int,a4:int);
C = UNION A,B;
Dump C;
B = load 'data5' using PigStorage(' ') as (a3:int,a4:int);
C = UNION A,B;
Dump C;
No comments:
Post a Comment