11import time
2+
23from django .core .management .base import BaseCommand
34from django .db .models import Count
45from django .db import connection
6+
57from osf .models import NotificationSubscription , NotificationSubscriptionLegacy
68
79
810class Command (BaseCommand ):
9- help = 'Verify notification migration integrity (duplicates, invalid frequencies, counts, distribution)'
10- '''
11+ """
1112 Usage example:
1213 python manage.py migrate_notifications_verification
13- python manage.py migrate_notifications_verification --duplicates --counts
14- '''
14+ python manage.py migrate_notifications_verification --duplicates --distribution
15+ python manage.py migrate_notifications_verification --duplicates --unique-digest --output-size=100
16+ """
17+
18+ help = 'Verify notification migration integrity (duplicates, invalid frequencies, counts and distribution)'
1519
1620 def add_arguments (self , parser ):
21+ parser .add_argument ('--all' , action = 'store_true' , default = False , help = 'Run all checks' )
1722 parser .add_argument ('--duplicates' , action = 'store_true' , help = 'Check for duplicate NotificationSubscription entries' )
1823 parser .add_argument ('--frequencies' , action = 'store_true' , help = 'Check message_frequency values for invalid ones' )
1924 parser .add_argument ('--counts' , action = 'store_true' , help = 'Compare legacy M2M total with migrated count' )
2025 parser .add_argument ('--distribution' , action = 'store_true' , help = 'Print breakdown summary' )
21- parser .add_argument ('--all' , action = 'store_true' , help = 'Run all checks' )
26+ parser .add_argument ('--exclude-is-digest' , action = 'store_true' , default = False , help = 'Used along with --duplicates to exclude _is_digest field in unique_together' )
27+ parser .add_argument ('--output-size' , type = int , default = 10 , help = 'Used along with other options to set the number of found duplicates for output' )
2228
2329 def handle (self , * args , ** options ):
30+
2431 start = time .time ()
2532 flags = {k for k , v in options .items () if v and k in ['duplicates' , 'frequencies' , 'counts' , 'distribution' ]}
33+ run_all = options ['all' ]
34+ output_size = options ['output_size' ]
2635
27- run_all = options ['all' ] or not flags
2836 print ('\n ================ Notification Migration Verification ================\n ' )
2937
38+ if not run_all and not flags :
39+ print ('\n ⚠ No options selected, command will exit ... \n ' )
40+
41+ # 1. Detect duplicates
3042 if run_all or 'duplicates' in flags :
31- # 1. Detect duplicates
32- print ('1) Checking duplicate NotificationSubscription entries...' )
33- duplicates = (
34- NotificationSubscription .objects .values (
35- 'user_id' , 'content_type_id' , 'object_id' , 'notification_type_id'
43+ action_word = 'excludes' if options ['exclude_is_digest' ] else 'includes'
44+ print (f'1) Checking duplicate NotificationSubscription entries (unique_together { action_word } _is_digest)...' )
45+ if options ['exclude_is_digest' ]:
46+ duplicates = (
47+ NotificationSubscription .objects .values (
48+ 'user_id' , 'content_type_id' , 'object_id' , 'notification_type_id' ,
49+ )
50+ .annotate (count = Count ('id' ))
51+ .filter (count__gt = 1 )
3652 )
37- .annotate (count = Count ('id' ))
38- .filter (count__gt = 1 )
39- )
40- print (f" → Duplicates found: { duplicates .count ()} " )
53+ else :
54+ duplicates = (
55+ NotificationSubscription .objects .values (
56+ 'user_id' , 'content_type_id' , 'object_id' , 'notification_type_id' , '_is_digest' ,
57+ )
58+ .annotate (count = Count ('id' ))
59+ .filter (count__gt = 1 )
60+ )
61+ print (f' → Duplicates found: { duplicates .count ()} .' )
4162 if duplicates .exists ():
42- print (' Sample (up to 10 ):' )
43- for d in duplicates [: 10 ]:
63+ print (f ' Sample (up to { output_size } ):' )
64+ for d in duplicates . order_by ( '-count' )[: output_size ]:
4465 print (' ' , d )
4566 print (' ✔ OK' if not duplicates .exists () else ' ⚠ Needs review' )
4667
68+ # 2. Invalid frequencies
4769 if run_all or 'frequencies' in flags :
48- # 2. Invalid frequencies
4970 print ('\n 2) Checking invalid message_frequency values...' )
5071 valid = {'none' , 'daily' , 'instantly' }
5172 invalid_freq = NotificationSubscription .objects .exclude (message_frequency__in = valid )
5273
53- print (f" → Invalid frequency rows: { invalid_freq .count ()} " )
74+ print (f' → Invalid frequency rows: { invalid_freq .count ()} ' )
5475 if invalid_freq .exists ():
5576 print (' Sample (id, freq):' )
56- for row in invalid_freq [:10 ]:
57- print (f" { row .id } → { row .message_frequency } " )
77+ for row in invalid_freq [:output_size ]:
78+ print (f' { row .id } → { row .message_frequency } ' )
5879 print (' ✔ OK' if not invalid_freq .exists () else ' ⚠ Needs cleanup' )
5980
81+ # 3. Compare legacy frequency-based totals vs new subscription count
6082 if run_all or 'counts' in flags :
61- # 3. Compare legacy frequency-based totals vs new subscription count
6283 print ('\n 3) Validating total count migrated...' )
6384 valid_subscription_ids = NotificationSubscriptionLegacy .objects .filter (event_name__in = ['global_reviews' , 'global_file_updated' , 'file_updated' ]).values_list ('id' , flat = True )
6485 with connection .cursor () as cursor :
@@ -72,26 +93,28 @@ def handle(self, *args, **options):
7293 legacy_total_expanded = none_count + digest_count + transactional_count
7394 new_total = NotificationSubscription .objects .count ()
7495
75- print (f" Legacy M2M total: { legacy_total_expanded } " )
76- print (f" New subscriptions: { new_total } " )
96+ print (f' Legacy M2M total: { legacy_total_expanded } ' )
97+ print (f' New subscriptions: { new_total } ' )
7798
7899 if legacy_total_expanded == new_total :
79100 print (' ✔ Counts match' )
80101 else :
81102 diff = new_total - legacy_total_expanded
82- print (f" ⚠ Mismatch: difference = { diff } (possibly skipped or duplicates removed)" )
103+ print (f' ⚠ Mismatch: difference = { diff } (possibly skipped, duplicates removed or newly created)' )
104+
105+ print (' ⚠ Note: this is accurate only right after migration and before any new subscriptions are created.)' )
83106
84107 if run_all or 'distribution' in flags :
85108 # 4. Distribution summary
86- print ('\n 4) Subscription distribution breakdown (top 30 ):\n ' )
109+ print (f '\n 4) Subscription distribution breakdown (top { output_size } ):\n ' )
87110 dist = (
88111 NotificationSubscription .objects
89112 .values ('notification_type_id' , 'message_frequency' )
90113 .annotate (total = Count ('id' ))
91- .order_by ('-total' )[:30 ]
114+ .order_by ('-total' )[:output_size ]
92115 )
93116 for row in dist :
94117 print (' ' , row )
95118
96119 elapsed = time .time () - start
97- print (f" \n ================ Verification complete in { elapsed :.2f} s ================\n " )
120+ print (f' \n ================ Verification complete in { elapsed :.2f} s ================\n ' )
0 commit comments