Wednesday, May 13, 2015

APACHE PIG BUILT IN TRANSFORMATION



COGROUP 
A = load 'data4' using PigStorage(' ') as (a1:int,a2:int);
B = load 'data5' using PigStorage(' ') as (a3:int,a4:int);
C =  COGROUP A by a1,B by a4;
Dump C;

Group BY
A = load 'data3' using PigStorage(' ') as (a1:chararray,a2:int,a3:int);
B = Group A by a2;
C = foreach B generate group,COUNT(A);
Dump C;

CROSS 
A = load 'data4' using PigStorage(' ') as (a1:int,a2:int);
B = load 'data5' using PigStorage(' ') as (a3:int,a4:int);
C = CROSS A,B;
Dump C;

DISTINCT 
A = load 'data7' using PigStorage(' ') as (a1:int,a2:int,a3:int);
B = DISTINCT A;
Dump B;

Filter 
A = load 'data7' using PigStorage(' ') as (a1:int,a2:int,a3:int);
B = Filter A by a2==5;
Dump B;


COGROUP 
A = load 'data4' using PigStorage(' ') as (a1:int,a2:int);
B = load 'data5' using PigStorage(' ') as (a3:int,a4:int);
C =  COGROUP A by a1,B by a4;
D = Foreach C generate group,FLATTEN (A);
Dump D;



A = load 'data3' using PigStorage(' ') as (a1:chararray,a2:int,a3:int);
B = Group A by a2;
  Describe B;
Dump B;
C = foreach B generate  group,Count(A);
Dump C;


JOIN 
A = load 'data4' using PigStorage(' ') as (a1:int,a2:int);
B = load 'data5' using PigStorage(' ') as (a3:int,a4:int);
C =  JOIN A by a1,B by a4;
Dump C;

LIMIT 
A = load 'data6' using PigStorage(' ') as (a1:int,a2:int,a3:int,a4:int,a5:int);
B = LIMIT A 3;
Dump B;
 

Order by
A = load 'data6' using PigStorage(' ') as (a1:int,a2:int,a3:int,a4:int,a5:int);
B = Order A by $1;
Dump B;
 
SPLIT 
A = load 'data4' using PigStorage(' ') as (a1:int,a2:int);
SPLIT A into B if a1==2, C if a1==3;
Dump B;
Dump C;
 
UNION 
A = load 'data4' using PigStorage(' ') as (a1:int,a2:int);
B = load 'data5' using PigStorage(' ') as (a3:int,a4:int);
C = UNION A,B;
Dump C;
 

No comments:

Post a Comment