@@ -3018,4 +3018,314 @@ describe('checkAlerts', () => {
30183018 expect ( slack . postMessageToWebhook ) . toHaveBeenCalledTimes ( 2 ) ;
30193019 } ) ;
30203020 } ) ;
3021+
3022+ describe ( 'group-by alert with disappearing group' , ( ) => {
3023+ const server = getServer ( ) ;
3024+
3025+ beforeAll ( async ( ) => {
3026+ await server . start ( ) ;
3027+ } ) ;
3028+
3029+ afterEach ( async ( ) => {
3030+ await server . clearDBs ( ) ;
3031+ jest . clearAllMocks ( ) ;
3032+ } ) ;
3033+
3034+ afterAll ( async ( ) => {
3035+ await server . stop ( ) ;
3036+ } ) ;
3037+
3038+ it ( 'should use latest createdAt from any group and not rescan old timeframe when one group disappears' , async ( ) => {
3039+ jest . spyOn ( slack , 'postMessageToWebhook' ) . mockResolvedValue ( null as any ) ;
3040+
3041+ const team = await createTeam ( { name : 'My Team' } ) ;
3042+
3043+ const now = new Date ( '2023-11-16T22:18:00.000Z' ) ;
3044+
3045+ // Insert logs in first time bucket (22:00-22:05):
3046+ // - service-a: 3 errors (ALERT - exceeds threshold of 2)
3047+ // - service-b: 3 errors (ALERT - exceeds threshold of 2)
3048+ await bulkInsertLogs ( [
3049+ {
3050+ ServiceName : 'service-a' ,
3051+ Timestamp : new Date ( '2023-11-16T22:00:00.000Z' ) ,
3052+ SeverityText : 'error' ,
3053+ Body : 'Error from service-a' ,
3054+ } ,
3055+ {
3056+ ServiceName : 'service-a' ,
3057+ Timestamp : new Date ( '2023-11-16T22:01:00.000Z' ) ,
3058+ SeverityText : 'error' ,
3059+ Body : 'Error from service-a' ,
3060+ } ,
3061+ {
3062+ ServiceName : 'service-a' ,
3063+ Timestamp : new Date ( '2023-11-16T22:02:00.000Z' ) ,
3064+ SeverityText : 'error' ,
3065+ Body : 'Error from service-a' ,
3066+ } ,
3067+ {
3068+ ServiceName : 'service-b' ,
3069+ Timestamp : new Date ( '2023-11-16T22:00:00.000Z' ) ,
3070+ SeverityText : 'error' ,
3071+ Body : 'Error from service-b' ,
3072+ } ,
3073+ {
3074+ ServiceName : 'service-b' ,
3075+ Timestamp : new Date ( '2023-11-16T22:01:00.000Z' ) ,
3076+ SeverityText : 'error' ,
3077+ Body : 'Error from service-b' ,
3078+ } ,
3079+ {
3080+ ServiceName : 'service-b' ,
3081+ Timestamp : new Date ( '2023-11-16T22:02:00.000Z' ) ,
3082+ SeverityText : 'error' ,
3083+ Body : 'Error from service-b' ,
3084+ } ,
3085+ // Second time bucket (22:05-22:10):
3086+ // - service-a: 1 error (OK - below threshold)
3087+ // - service-b: 3 errors (ALERT - exceeds threshold)
3088+ {
3089+ ServiceName : 'service-a' ,
3090+ Timestamp : new Date ( '2023-11-16T22:05:00.000Z' ) ,
3091+ SeverityText : 'error' ,
3092+ Body : 'Error from service-a' ,
3093+ } ,
3094+ {
3095+ ServiceName : 'service-b' ,
3096+ Timestamp : new Date ( '2023-11-16T22:05:00.000Z' ) ,
3097+ SeverityText : 'error' ,
3098+ Body : 'Error from service-b' ,
3099+ } ,
3100+ {
3101+ ServiceName : 'service-b' ,
3102+ Timestamp : new Date ( '2023-11-16T22:06:00.000Z' ) ,
3103+ SeverityText : 'error' ,
3104+ Body : 'Error from service-b' ,
3105+ } ,
3106+ {
3107+ ServiceName : 'service-b' ,
3108+ Timestamp : new Date ( '2023-11-16T22:07:00.000Z' ) ,
3109+ SeverityText : 'error' ,
3110+ Body : 'Error from service-b' ,
3111+ } ,
3112+ // Third time bucket (22:10-22:15):
3113+ // - service-a: 1 error (OK - below threshold)
3114+ // - service-b: DISAPPEARS (no logs)
3115+ {
3116+ ServiceName : 'service-a' ,
3117+ Timestamp : new Date ( '2023-11-16T22:10:00.000Z' ) ,
3118+ SeverityText : 'error' ,
3119+ Body : 'Error from service-a' ,
3120+ } ,
3121+ ] ) ;
3122+
3123+ const webhook = await new Webhook ( {
3124+ team : team . _id ,
3125+ service : 'slack' ,
3126+ url : 'https://hooks.slack.com/services/123' ,
3127+ name : 'My Webhook' ,
3128+ } ) . save ( ) ;
3129+ const teamWebhooksById = new Map < string , typeof webhook > ( [
3130+ [ webhook . _id . toString ( ) , webhook ] ,
3131+ ] ) ;
3132+ const connection = await Connection . create ( {
3133+ team : team . _id ,
3134+ name : 'Default' ,
3135+ host : config . CLICKHOUSE_HOST ,
3136+ username : config . CLICKHOUSE_USER ,
3137+ password : config . CLICKHOUSE_PASSWORD ,
3138+ } ) ;
3139+ const source = await Source . create ( {
3140+ kind : 'log' ,
3141+ team : team . _id ,
3142+ from : {
3143+ databaseName : 'default' ,
3144+ tableName : 'otel_logs' ,
3145+ } ,
3146+ timestampValueExpression : 'Timestamp' ,
3147+ connection : connection . id ,
3148+ name : 'Logs' ,
3149+ } ) ;
3150+ const savedSearch = await new SavedSearch ( {
3151+ team : team . _id ,
3152+ name : 'My Error Search' ,
3153+ select : 'Body' ,
3154+ where : 'SeverityText: "error"' ,
3155+ whereLanguage : 'lucene' ,
3156+ orderBy : 'Timestamp' ,
3157+ source : source . id ,
3158+ tags : [ 'test' ] ,
3159+ } ) . save ( ) ;
3160+ const mockUserId = new mongoose . Types . ObjectId ( ) ;
3161+ const alert = await createAlert (
3162+ team . _id ,
3163+ {
3164+ source : AlertSource . SAVED_SEARCH ,
3165+ channel : {
3166+ type : 'webhook' ,
3167+ webhookId : webhook . _id . toString ( ) ,
3168+ } ,
3169+ interval : '5m' ,
3170+ thresholdType : AlertThresholdType . ABOVE ,
3171+ threshold : 2 ,
3172+ savedSearchId : savedSearch . id ,
3173+ groupBy : 'ServiceName' , // Group by ServiceName
3174+ } ,
3175+ mockUserId ,
3176+ ) ;
3177+
3178+ const enhancedAlert : any = await Alert . findById ( alert . id ) . populate ( [
3179+ 'team' ,
3180+ 'savedSearch' ,
3181+ ] ) ;
3182+
3183+ // Create previous alert histories at 22:00 for initial baseline (one per service)
3184+ await AlertHistory . create ( [
3185+ {
3186+ alert : alert . id ,
3187+ createdAt : new Date ( '2023-11-16T22:00:00.000Z' ) ,
3188+ state : 'OK' ,
3189+ counts : 0 ,
3190+ lastValues : [ ] ,
3191+ group : 'ServiceName:service-a' ,
3192+ } ,
3193+ {
3194+ alert : alert . id ,
3195+ createdAt : new Date ( '2023-11-16T22:00:00.000Z' ) ,
3196+ state : 'OK' ,
3197+ counts : 0 ,
3198+ lastValues : [ ] ,
3199+ group : 'ServiceName:service-b' ,
3200+ } ,
3201+ ] ) ;
3202+
3203+ const previousMap = await getPreviousAlertHistories (
3204+ [ enhancedAlert . id ] ,
3205+ now ,
3206+ ) ;
3207+
3208+ const details = {
3209+ alert : enhancedAlert ,
3210+ source,
3211+ taskType : AlertTaskType . SAVED_SEARCH ,
3212+ savedSearch,
3213+ previousMap,
3214+ } satisfies AlertDetails ;
3215+
3216+ const clickhouseClient = new ClickhouseClient ( {
3217+ host : connection . host ,
3218+ username : connection . username ,
3219+ password : connection . password ,
3220+ } ) ;
3221+
3222+ const mockMetadata = {
3223+ getColumn : jest . fn ( ) . mockImplementation ( ( { column } ) => {
3224+ const columnMap = {
3225+ Body : { name : 'Body' , type : 'String' } ,
3226+ Timestamp : { name : 'Timestamp' , type : 'DateTime' } ,
3227+ SeverityText : { name : 'SeverityText' , type : 'String' } ,
3228+ ServiceName : { name : 'ServiceName' , type : 'String' } ,
3229+ } ;
3230+ return Promise . resolve ( columnMap [ column ] ) ;
3231+ } ) ,
3232+ } ;
3233+
3234+ jest . mock ( '@hyperdx/common-utils/dist/metadata' , ( ) => ( {
3235+ ...jest . requireActual ( '@hyperdx/common-utils/dist/metadata' ) ,
3236+ getMetadata : jest . fn ( ) . mockReturnValue ( mockMetadata ) ,
3237+ } ) ) ;
3238+
3239+ // First run: process alert at 22:18
3240+ // Should check buckets: 22:00-22:05, 22:05-22:10, 22:10-22:15
3241+ await processAlert (
3242+ now ,
3243+ details ,
3244+ clickhouseClient ,
3245+ connection . id ,
3246+ alertProvider ,
3247+ teamWebhooksById ,
3248+ ) ;
3249+
3250+ // Alert should be in ALERT state (service-b still alerting)
3251+ const updatedAlert = await Alert . findById ( enhancedAlert . id ) ;
3252+ expect ( updatedAlert ! . state ) . toBe ( 'ALERT' ) ;
3253+
3254+ // Check alert histories after first run
3255+ const firstRunHistories = await AlertHistory . find ( {
3256+ alert : alert . id ,
3257+ } ) . sort ( { createdAt : 1 } ) ;
3258+
3259+ // Should have histories: 1 previous + 2 groups (service-a, service-b)
3260+ // service-a should resolve, service-b should alert
3261+ expect ( firstRunHistories . length ) . toBeGreaterThan ( 1 ) ;
3262+
3263+ // Find the latest createdAt from all group histories (should be from 22:15)
3264+ const latestCreatedAt = firstRunHistories
3265+ . slice ( 1 ) // Skip the initial previous history
3266+ . reduce (
3267+ ( latest , history ) => {
3268+ return ! latest || history . createdAt > latest
3269+ ? history . createdAt
3270+ : latest ;
3271+ } ,
3272+ null as Date | null ,
3273+ ) ;
3274+
3275+ expect ( latestCreatedAt ) . toEqual ( new Date ( '2023-11-16T22:15:00.000Z' ) ) ;
3276+
3277+ // Second run: process alert at 22:23:00
3278+ // The check was already done at 22:15 (latest createdAt from any group)
3279+ // Even though service-b doesn't exist in that bucket, we shouldn't recheck old buckets
3280+ // Should use the LATEST createdAt (22:15) from any group, not the earliest
3281+ // This means it should only check NEW bucket: 22:15-22:20
3282+ // Should NOT rescan 22:00-22:05 where service-b had data but was already checked
3283+ const nextRun = new Date ( '2023-11-16T22:23:00.000Z' ) ;
3284+ const previousMapNextRun = await getPreviousAlertHistories (
3285+ [ enhancedAlert . id ] ,
3286+ nextRun ,
3287+ ) ;
3288+
3289+ // Verify that previousMapNextRun has the latest createdAt from any group
3290+ const previousDates = Array . from ( previousMapNextRun . values ( ) ) . map (
3291+ h => h . createdAt ,
3292+ ) ;
3293+ const maxPreviousDate = previousDates . reduce (
3294+ ( max , date ) => ( date > max ? date : max ) ,
3295+ new Date ( 0 ) ,
3296+ ) ;
3297+ expect ( maxPreviousDate ) . toEqual ( new Date ( '2023-11-16T22:15:00.000Z' ) ) ;
3298+
3299+ await processAlert (
3300+ nextRun ,
3301+ {
3302+ ...details ,
3303+ previousMap : previousMapNextRun ,
3304+ } ,
3305+ clickhouseClient ,
3306+ connection . id ,
3307+ alertProvider ,
3308+ teamWebhooksById ,
3309+ ) ;
3310+
3311+ // Check all alert histories after second run
3312+ const allHistories = await AlertHistory . find ( {
3313+ alert : alert . id ,
3314+ } ) . sort ( { createdAt : 1 } ) ;
3315+
3316+ // Verify no histories were created for old timeframes (22:00-22:05)
3317+ // All new histories should have createdAt >= 22:15
3318+ const firstRunEndTime = new Date ( '2023-11-16T22:15:00.000Z' ) . getTime ( ) ;
3319+ const newHistories = allHistories . filter (
3320+ h => h . createdAt . getTime ( ) > firstRunEndTime , // Exclude first run histories (which have createdAt <= 22:15)
3321+ ) ;
3322+
3323+ // New histories should be for the new bucket (22:20) only
3324+ newHistories . forEach ( history => {
3325+ expect ( history . createdAt . getTime ( ) ) . toBeGreaterThanOrEqual (
3326+ new Date ( '2023-11-16T22:20:00.000Z' ) . getTime ( ) ,
3327+ ) ;
3328+ } ) ;
3329+ } ) ;
3330+ } ) ;
30213331} ) ;
0 commit comments