Vote count:
0
I am using the following code to generate sessionId in pig by using sessionize UDF in datafu.
SET mapred.min.split.size 1073741824
SET mapred.job.queue.name 'marathon'
SET mapred.output.compress true;
--SET avro.output.codec snappy;
--SET pig.maxCombinedSplitSize 536870912;
page_view_pre = LOAD '/data/tracking/PageViewEvent/' USING LiAvroStorage('date.range','start.date=20150226;end.date=20150226;error.on.missing=true'); -----logic is currently for 2015-02-26,will later replace them with date parameters
p_key = LOAD '/projects/dwh/dwh_dim/dim_page_key/#LATEST' USING LiAvroStorage();
page_view_pre = FILTER page_view_pre BY (requestHeader.userAgent != 'CRAWLER' and requestHeader.browserId != 'CRAWLER') and NOT IsTestMemberId(header.memberId);
page_view_pre = FOREACH page_view_pre GENERATE
(int) (header.memberId <0 ? -9 : header.memberId ) as member_sk,
(chararray) requestHeader.browserId as browserId,
--(chararray) requestHeader.sessionId as sessionId,
(chararray) UnixToISO(header.time) as pageViewTime,
header.time as pv_time,
(chararray) requestHeader.path as path,
(chararray) requestHeader.referer as referer,
(chararray) epochToFormat(header.time, 'yyyyMMdd', 'America/Los_Angeles') as tracking_date,
(chararray) requestHeader.pageKey as pageKey,
(chararray) SUBSTRING(requestHeader.trackingCode, 0, 500) as trackingCode,
FLATTEN(botLookup(requestHeader.userAgent, requestHeader.browserId)) as (is_crawler, crawler_type),
(int) totalTime as totalTime,
((int) totalTime < 20 ? 1 :0) as bounce_flag;
page_view_pre = FILTER page_view_pre BY is_crawler == 'N' ;
p_key = FILTER p_key By is_aggregate ==1;
page_view_agg = JOIN page_view_pre by pageKey ,p_key by page_key;
page_view_agg = FOREACH page_view_agg GENERATE
(chararray)page_view_pre::member_sk as member_sk,
(chararray)page_view_pre::browserId as browserId,
--page_view_pre::sessionId as sessionId,
(chararray)page_view_pre::pageViewTime as pageViewTime,
(long)page_view_pre::pv_time as pv_time,
(chararray)page_view_pre::tracking_date as tracking_date,
(chararray)page_view_pre::path as path,
(chararray)page_view_pre::referer as referer,
(chararray)page_view_pre::pageKey as pageKey,
(int)p_key::page_key_sk as page_key_sk,
(chararray)page_view_pre::trackingCode as trackingCode,
(int)page_view_pre::totalTime as totalTime,
(int)page_view_pre::bounce_flag as bounce_flag;
page_view_agg = FILTER page_view_agg By (member_sk is NOT null) OR (browserId IS NOT NULL) ;
pvs_by_member_browser_pair = GROUP page_view_agg BY (member_sk,browserId);
***session_groups = FOREACH pvs_by_member_browser_pair {
visits = ORDER page_view_agg BY pv_time;
GENERATE FLATTEN(Sessionize(visits)) AS (
pageViewTime,member_sk, pv_time,tracking_date, pageKey,page_key_sk,browserId,referer ,path, trackingCode,totalTime, sessionId
);
}***
The bolded part is giving me the following error :
ERROR 1031: Incompatable schema: left is "pageViewTime:NULL,member_sk:NULL,pv_time:NULL,tracking_date:NULL,pageKey:NULL,page_key_sk:NULL,browserId:NULL,referer:NULL,path:NULL,trackingCode:NULL,totalTime:NULL,sessionId:NULL", right is "datafu.pig.sessions.sessionize_visits_43::member_sk:chararray,datafu.pig.sessions.sessionize_visits_43::browserId:chararray,datafu.pig.sessions.sessionize_visits_43::pageViewTime:chararray,datafu.pig.sessions.sessionize_visits_43::pv_time:long,datafu.pig.sessions.sessionize_visits_43::tracking_date:chararray,datafu.pig.sessions.sessionize_visits_43::path:chararray,datafu.pig.sessions.sessionize_visits_43::referer:chararray,datafu.pig.sessions.sessionize_visits_43::pageKey:chararray,datafu.pig.sessions.sessionize_visits_43::page_key_sk:int,datafu.pig.sessions.sessionize_visits_43::trackingCode:chararray,datafu.pig.sessions.sessionize_visits_43::totalTime:int,datafu.pig.sessions.sessionize_visits_43::bounce_flag:int,datafu.pig.sessions.sessionize_visits_43::session_id:chararray"
I thought initially this had to do with null member or browser id's.I filtered for them too, still the error is persisting.
Can somebody please help me regarding this, have been stuck here for hours.
Thanks!
Pig throwing incompatible type error
Aucun commentaire:
Enregistrer un commentaire