diff --git a/services/libs/tinybird/pipes/activities_filtered.pipe b/services/libs/tinybird/pipes/activities_filtered.pipe index e042c1c8c5..0429ff2d5c 100644 --- a/services/libs/tinybird/pipes/activities_filtered.pipe +++ b/services/libs/tinybird/pipes/activities_filtered.pipe @@ -2,18 +2,17 @@ DESCRIPTION > - `activities_filtered.pipe` is the core filtering infrastructure pipe for activity data across the entire analytics platform. - This pipe serves as the foundation for most activity-related widgets, by providing a consistent, filtered view of contribution activities. - It filters activities from `activityRelations_deduplicated_cleaned_ds` datasource based on project segment, time ranges, repositories, platforms, and activity types. - - By default, this pipe returns only contribution activities (`isContribution = 1`) unless explicitly overridden with `onlyContributions = 0`. - The pipe automatically scopes data to the current project using `segments_filtered` pipe for security and data isolation. - Parameters: - `project`: Inherited from `segments_filtered`, project slug (e.g., 'k8s', 'tensorflow') - - `repos`: Inherited from `segments_filtered`, array of repository URLs for filtering + - `repos`: Optional array of repository URLs for filtering (e.g., ['https://github.com/kubernetes/kubernetes']). Inherited from `segments_filtered`. - `startDate`: Optional DateTime filter for activities after timestamp (e.g., '2024-01-01 00:00:00') - `endDate`: Optional DateTime filter for activities before timestamp (e.g., '2024-12-31 23:59:59') - - `repos`: Optional array of repository URLs (e.g., ['https://github.com/kubernetes/kubernetes']) - `platform`: Optional string filter for source platform (e.g., 'github', 'discord', 'slack') - `activity_type`: Optional string filter for single activity type (e.g., 'authored-commit') - `activity_types`: Optional array of activity types (e.g., ['authored-commit', 'co-authored-commit']) - - `onlyContributions`: Optional boolean, defaults to 1 (contributions only), set to 0 for all activities + - `includeCodeContributions`: Optional boolean to include code contribution activities. Defaults to 1. Set to 0 to exclude. Inherited from activityTypes_filtered. + - `includeCollaborations`: Optional boolean to include or exclude collaboration activities. Inherited from activityTypes_filtered. - Response: `id` (activityId), `timestamp`, `type`, `platform`, `memberId`, `organizationId`, `segmentId`. - This pipe is consumed by many of downstream pipes and widgets across the platform for consistent activity filtering. - Performance is optimized through proper sorting keys on `segmentId`, `timestamp`, `type`, `platform`, and `memberId` in the source datasource. @@ -41,10 +40,7 @@ SQL > AND a.platform = {{ String(platform, description="Filter activity platform", required=False) }} {% end %} - {% if ( - not defined(onlyContributions) - or (defined(onlyContributions) and onlyContributions == 1) - ) %} AND a.isContribution {% end %} + AND (a.type, a.platform) IN (SELECT activityType, platform FROM activityTypes_filtered) {% if defined(activity_type) %} AND a.type = {{ String(activity_type, description="Filter activity type", required=False) }} {% end %} diff --git a/services/libs/tinybird/pipes/activities_filtered_historical_cutoff.pipe b/services/libs/tinybird/pipes/activities_filtered_historical_cutoff.pipe index 225bdf47ce..afb7a3a868 100644 --- a/services/libs/tinybird/pipes/activities_filtered_historical_cutoff.pipe +++ b/services/libs/tinybird/pipes/activities_filtered_historical_cutoff.pipe @@ -14,7 +14,8 @@ DESCRIPTION > - `platform`: Optional string filter for source platform (e.g., 'github', 'discord', 'slack') - `activity_type`: Optional string filter for single activity type (e.g., 'authored-commit') - `activity_types`: Optional array of activity types (e.g., ['authored-commit', 'co-authored-commit']) - - `onlyContributions`: Optional boolean, defaults to 1 (contributions only), set to 0 for all activities + - `includeCodeContributions`: Optional boolean to include code contribution activities. Defaults to 1. Set to 0 to exclude. Inherited from activityTypes_filtered. + - `includeCollaborations`: Optional boolean to include or exclude collaboration activities. Inherited from activityTypes_filtered. - Response: `id` (activityId), `timestamp`, `type`, `platform`, `memberId`, `organizationId`, `segmentId`. NODE activities_filtered_by_timestamp_and_channel @@ -41,9 +42,7 @@ SQL > AND a.platform = {{ String(platform, description="Filter activity platform", required=False) }} {% end %} - {% if not defined(onlyContributions) or ( - defined(onlyContributions) and onlyContributions == 1 - ) %} AND a.isContribution {% end %} + AND (a.type, a.platform) IN (SELECT activityType, platform FROM activityTypes_filtered) {% if defined(activity_type) %} AND a.type = {{ String(activity_type, description="Filter activity type", required=False) }} {% end %} diff --git a/services/libs/tinybird/pipes/activities_filtered_retention.pipe b/services/libs/tinybird/pipes/activities_filtered_retention.pipe index 5d41ffd664..1a48d372bd 100644 --- a/services/libs/tinybird/pipes/activities_filtered_retention.pipe +++ b/services/libs/tinybird/pipes/activities_filtered_retention.pipe @@ -13,7 +13,8 @@ DESCRIPTION > - `platform`: Optional string filter for source platform (e.g., 'github', 'discord', 'slack') - `activity_type`: Optional string filter for single activity type (e.g., 'authored-commit') - `activity_types`: Optional array of activity types (e.g., ['authored-commit', 'co-authored-commit']) - - `onlyContributions`: Optional boolean, defaults to 1 (contributions only), set to 0 for all activities + - `includeCodeContributions`: Optional boolean to include code contribution activities. Defaults to 1. Set to 0 to exclude. Inherited from activityTypes_filtered. + - `includeCollaborations`: Optional boolean to include or exclude collaboration activities. Inherited from activityTypes_filtered. - `granularity`: Required string for time aggregation and period extension ('daily', 'weekly', 'monthly', 'quarterly', 'yearly') - Response: `id` (activityId), `timestamp`, `type`, `platform`, `memberId`, `organizationId`, `segmentId`. @@ -57,9 +58,7 @@ SQL > AND a.platform = {{ String(platform, description="Filter activity platform", required=False) }} {% end %} - {% if not defined(onlyContributions) or ( - defined(onlyContributions) and onlyContributions == 1 - ) %} AND a.isContribution {% end %} + AND (a.type, a.platform) IN (SELECT activityType, platform FROM activityTypes_filtered) {% if defined(activity_type) %} AND a.type = {{ String(activity_type, description="Filter activity type", required=False) }} {% end %} diff --git a/services/libs/tinybird/pipes/activityTypes_filtered.pipe b/services/libs/tinybird/pipes/activityTypes_filtered.pipe new file mode 100644 index 0000000000..42bc6a9931 --- /dev/null +++ b/services/libs/tinybird/pipes/activityTypes_filtered.pipe @@ -0,0 +1,24 @@ +DESCRIPTION > + - `activityTypes_filtered.pipe` allows filtering activityTypes from the respective data source. + - By default, this only returns code contribution activities (`includeCodeContributions = 1`). + - To return all activities, set `includeCodeContributions = 1`, `includeCollaborations = 1`, and `includeOtherContributions = 1`. + - Parameters: + - `includeCodeContributions`: Optional boolean to include code contribution activities. Defaults to 1. Set to 0 to exclude. + - `includeCollaborations`: Optional boolean to include or exclude collaboration activities. + - `includeOtherContributions`: Optional boolean to include other contribution activities (activities that are neither code contributions nor collaborations). + - Response: `activityType`, `platform`. + - This pipe is used by other downstream pipes as an auxiliary method of filtering data by activity types. + +NODE activityTypes_selected +SQL > + % + WITH + {{ UInt8(includeCodeContributions, default=1) }} AS icc, + {{ UInt8(includeCollaborations, default=0) }} AS icol, + {{ UInt8(includeOtherContributions, default=0) }} AS ioc + SELECT activityType, platform + FROM activityTypes + WHERE + (icc = 1 AND isCodeContribution = 1) + OR (icol = 1 AND isCollaboration = 1) + OR (ioc = 1 AND isCodeContribution = 0 AND isCollaboration = 0) diff --git a/services/libs/tinybird/pipes/health_score_active_contributors.pipe b/services/libs/tinybird/pipes/health_score_active_contributors.pipe index 2877947c04..54e45d2a8c 100644 --- a/services/libs/tinybird/pipes/health_score_active_contributors.pipe +++ b/services/libs/tinybird/pipes/health_score_active_contributors.pipe @@ -7,7 +7,8 @@ SQL > SELECT segmentId, COALESCE(uniq(memberId), 0) AS activeContributors FROM activityRelations_deduplicated_cleaned_ds WHERE - memberId != '' AND isContribution + memberId != '' + AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) {% if defined(project) %} AND segmentId = (SELECT segmentId FROM segments_filtered) {% if defined(repos) %} diff --git a/services/libs/tinybird/pipes/health_score_contributor_dependency.pipe b/services/libs/tinybird/pipes/health_score_contributor_dependency.pipe index de7f76b5d5..440f51cebf 100644 --- a/services/libs/tinybird/pipes/health_score_contributor_dependency.pipe +++ b/services/libs/tinybird/pipes/health_score_contributor_dependency.pipe @@ -4,7 +4,8 @@ SQL > SELECT segmentId, memberId, count() AS contributionCount, MIN(timestamp), MAX(timestamp) FROM activityRelations_deduplicated_cleaned_ds WHERE - memberId != '' AND isContribution + memberId != '' + AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) {% if defined(project) %} AND segmentId = (SELECT segmentId FROM segments_filtered) {% if defined(repos) %} diff --git a/services/libs/tinybird/pipes/health_score_organization_dependency.pipe b/services/libs/tinybird/pipes/health_score_organization_dependency.pipe index b0a8b2827a..2ad37b82ef 100644 --- a/services/libs/tinybird/pipes/health_score_organization_dependency.pipe +++ b/services/libs/tinybird/pipes/health_score_organization_dependency.pipe @@ -4,7 +4,8 @@ SQL > SELECT segmentId, organizationId, count() AS contributionCount FROM activityRelations_deduplicated_cleaned_ds WHERE - organizationId != '' AND isContribution + organizationId != '' + AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) {% if defined(project) %} AND segmentId = (SELECT segmentId FROM segments_filtered) {% if defined(repos) %} diff --git a/services/libs/tinybird/pipes/segmentId_aggregates_mv.pipe b/services/libs/tinybird/pipes/segmentId_aggregates_mv.pipe index 80e7b53e47..9b643aea77 100644 --- a/services/libs/tinybird/pipes/segmentId_aggregates_mv.pipe +++ b/services/libs/tinybird/pipes/segmentId_aggregates_mv.pipe @@ -8,5 +8,10 @@ SQL > countDistinctState(memberId) AS contributorCount, countDistinctState(organizationId) AS organizationCount FROM activityRelations_deduplicated_cleaned_ds - WHERE isContribution = true + WHERE + (type, platform) IN ( + SELECT activityType, platform + FROM activityTypes + WHERE isCodeContribution = 1 OR isCollaboration = 1 + ) GROUP BY segmentId diff --git a/services/libs/tinybird/pipes/top_member_org_copy.pipe b/services/libs/tinybird/pipes/top_member_org_copy.pipe index 337910c5b2..223f30a6f3 100644 --- a/services/libs/tinybird/pipes/top_member_org_copy.pipe +++ b/services/libs/tinybird/pipes/top_member_org_copy.pipe @@ -17,7 +17,14 @@ NODE top_member_org_copy_member_activity_count SQL > SELECT memberId, count(*) AS activityCount FROM activityRelations_deduplicated_cleaned_ds - WHERE (timestamp >= (now() - toIntervalYear(10))) AND (timestamp < now()) + WHERE + (timestamp >= (now() - toIntervalYear(10))) + AND (timestamp < now()) + AND (type, platform) IN ( + SELECT activityType, platform + FROM activityTypes + WHERE isCodeContribution = 1 OR isCollaboration = 1 + ) GROUP BY memberId ORDER BY activityCount DESC LIMIT 100 @@ -41,7 +48,15 @@ NODE top_member_org_copy_organization_activity_count SQL > SELECT organizationId, count(*) AS activityCount FROM activityRelations_deduplicated_cleaned_ds - WHERE (timestamp >= (now() - toIntervalYear(10))) AND (timestamp < now()) AND organizationId != '' + WHERE + (timestamp >= (now() - toIntervalYear(10))) + AND (timestamp < now()) + AND organizationId != '' + AND (type, platform) IN ( + SELECT activityType, platform + FROM activityTypes + WHERE isCodeContribution = 1 OR isCollaboration = 1 + ) GROUP BY organizationId ORDER BY activityCount DESC LIMIT 100