@@ -13,7 +13,6 @@ import (
13
13
14
14
kaddht "github.com/libp2p/go-libp2p-kad-dht"
15
15
"github.com/libp2p/go-libp2p/core/network"
16
- "github.com/libp2p/go-libp2p/core/peer"
17
16
log "github.com/sirupsen/logrus"
18
17
"github.com/urfave/cli/v2"
19
18
@@ -273,6 +272,11 @@ func CrawlAction(c *cli.Context) error {
273
272
MeterProvider : cfg .Root .MeterProvider ,
274
273
}
275
274
275
+ var (
276
+ summary * core.Summary
277
+ runErr error
278
+ )
279
+
276
280
switch cfg .Network {
277
281
case string (config .NetworkEthExec ):
278
282
@@ -328,18 +332,7 @@ func CrawlAction(c *cli.Context) error {
328
332
}
329
333
330
334
// finally, start the crawl
331
- queuedPeers , runErr := eng .Run (ctx )
332
-
333
- // a bit ugly but, but the handler will contain crawl statistics, that
334
- // we'll save to the database and print to the screen
335
- handler .QueuedPeers = len (queuedPeers )
336
- if err := persistCrawlInformation (dbc , handler , runErr ); err != nil {
337
- return fmt .Errorf ("persist crawl information: %w" , err )
338
- }
339
-
340
- logSummary (handler , time .Since (start ))
341
-
342
- return nil
335
+ summary , runErr = eng .Run (ctx )
343
336
344
337
case string (config .NetworkBitcoin ):
345
338
bpEnodes , err := cfg .BootstrapBitcoinEntries ()
@@ -377,16 +370,7 @@ func CrawlAction(c *cli.Context) error {
377
370
}
378
371
379
372
// finally, start the crawl
380
- queuedPeers , runErr := eng .Run (ctx )
381
-
382
- // a bit ugly but, but the handler will contain crawl statistics, that
383
- // we'll save to the database and print to the screen
384
- handler .QueuedPeers = len (queuedPeers )
385
- if err := persistCrawlInformation (dbc , handler , runErr ); err != nil {
386
- return fmt .Errorf ("persist crawl information: %w" , err )
387
- }
388
-
389
- return nil
373
+ summary , runErr = eng .Run (ctx )
390
374
391
375
case string (config .NetworkEthCons ),
392
376
string (config .NetworkHolesky ),
@@ -458,18 +442,8 @@ func CrawlAction(c *cli.Context) error {
458
442
}
459
443
460
444
// finally, start the crawl
461
- queuedPeers , runErr : = eng .Run (ctx )
445
+ summary , runErr = eng .Run (ctx )
462
446
463
- // a bit ugly but, but the handler will contain crawl statistics, that
464
- // we'll save to the database and print to the screen
465
- handler .QueuedPeers = len (queuedPeers )
466
- if err := persistCrawlInformation (dbc , handler , runErr ); err != nil {
467
- return fmt .Errorf ("persist crawl information: %w" , err )
468
- }
469
-
470
- logSummary (handler , time .Since (start ))
471
-
472
- return nil
473
447
default :
474
448
475
449
addrInfos , err := cfg .BootstrapAddrInfos ()
@@ -511,22 +485,20 @@ func CrawlAction(c *cli.Context) error {
511
485
}
512
486
513
487
// finally, start the crawl
514
- queuedPeers , runErr := eng .Run (ctx )
515
-
516
- // a bit ugly but, but the handler will contain crawl statistics, that
517
- // we'll save to the database and print to the screen
518
- handler .QueuedPeers = len (queuedPeers )
519
- if err := persistCrawlInformation (dbc , handler , runErr ); err != nil {
520
- return fmt .Errorf ("persist crawl information: %w" , err )
521
- }
488
+ summary , runErr = eng .Run (ctx )
489
+ }
522
490
523
- logSummary (handler , time .Since (start ))
491
+ // we're done with the crawl so seal the crawl and store aggregate information
492
+ if err := persistCrawlInformation (dbc , summary , runErr ); err != nil {
493
+ return fmt .Errorf ("persist crawl information: %w" , err )
524
494
}
525
495
496
+ logSummary (summary , time .Since (start ))
497
+
526
498
return nil
527
499
}
528
500
529
- func persistCrawlInformation [ I core. PeerInfo [ I ]] (dbc db.Client , handler * core.CrawlHandler [ I ] , runErr error ) error {
501
+ func persistCrawlInformation (dbc db.Client , summary * core.Summary , runErr error ) error {
530
502
// construct a new cleanup context to store the crawl results even
531
503
// if the user cancelled the process.
532
504
sigs := make (chan os.Signal , 1 )
@@ -541,36 +513,36 @@ func persistCrawlInformation[I core.PeerInfo[I]](dbc db.Client, handler *core.Cr
541
513
}()
542
514
543
515
// Persist the crawl results
544
- if err := updateCrawl (cleanupCtx , dbc , runErr , handler ); err != nil {
516
+ if err := updateCrawl (cleanupCtx , dbc , runErr , summary ); err != nil {
545
517
return fmt .Errorf ("persist crawl: %w" , err )
546
518
}
547
519
548
520
// Persist associated crawl properties
549
- if err := persistCrawlProperties (cleanupCtx , dbc , handler ); err != nil {
521
+ if err := persistCrawlProperties (cleanupCtx , dbc , summary ); err != nil {
550
522
return fmt .Errorf ("persist crawl properties: %w" , err )
551
523
}
552
524
553
- // persist all neighbor information
554
- if err := storeNeighbors (cleanupCtx , dbc , handler ); err != nil {
555
- return fmt . Errorf ( "store neighbors: %w" , err )
525
+ // flush any left-over information to the database.
526
+ if err := dbc . Flush (cleanupCtx ); err != nil {
527
+ log . WithError ( err ). Warnln ( "Failed flushing information to database" )
556
528
}
557
529
558
530
return nil
559
531
}
560
532
561
533
// updateCrawl writes crawl statistics to the database
562
- func updateCrawl [ I core. PeerInfo [ I ]] (ctx context.Context , dbc db.Client , runErr error , handler * core.CrawlHandler [ I ] ) error {
534
+ func updateCrawl (ctx context.Context , dbc db.Client , runErr error , summary * core.Summary ) error {
563
535
if _ , ok := dbc .(* db.NoopClient ); ok {
564
536
return nil
565
537
}
566
538
567
539
log .Infoln ("Persisting crawl result..." )
568
540
569
541
args := & db.SealCrawlArgs {
570
- Crawled : handler . CrawledPeers ,
571
- Dialable : handler . CrawledPeers - handler . TotalErrors () ,
572
- Undialable : handler . TotalErrors () ,
573
- Remaining : handler . QueuedPeers ,
542
+ Crawled : summary . PeersCrawled ,
543
+ Dialable : summary . PeersDialable ,
544
+ Undialable : summary . PeersUndialable ,
545
+ Remaining : summary . PeersRemaining ,
574
546
}
575
547
576
548
if runErr == nil {
@@ -585,92 +557,55 @@ func updateCrawl[I core.PeerInfo[I]](ctx context.Context, dbc db.Client, runErr
585
557
}
586
558
587
559
// persistCrawlProperties writes crawl property statistics to the database.
588
- func persistCrawlProperties [ I core. PeerInfo [ I ]] (ctx context.Context , dbc db.Client , handler * core.CrawlHandler [ I ] ) error {
560
+ func persistCrawlProperties (ctx context.Context , dbc db.Client , summary * core.Summary ) error {
589
561
if _ , ok := dbc .(* db.NoopClient ); ok {
590
562
return nil
591
563
}
592
564
593
565
log .Infoln ("Persisting crawl properties..." )
594
566
avFull := map [string ]int {}
595
- for version , count := range handler .AgentVersion {
567
+ for version , count := range summary .AgentVersion {
596
568
avFull [version ] += count
597
569
}
598
570
pps := map [string ]map [string ]int {
599
571
"agent_version" : avFull ,
600
- "protocol" : handler .Protocols ,
601
- "error" : handler .ConnErrs ,
572
+ "protocol" : summary .Protocols ,
573
+ "error" : summary .ConnErrs ,
602
574
}
603
575
604
576
return dbc .InsertCrawlProperties (ctx , pps )
605
577
}
606
578
607
- // storeNeighbors fills the neighbors table with topology information
608
- func storeNeighbors [I core.PeerInfo [I ]](ctx context.Context , dbc db.Client , handler * core.CrawlHandler [I ]) error {
609
- if _ , ok := dbc .(* db.NoopClient ); ok {
610
- return nil
611
- }
612
-
613
- if len (handler .RoutingTables ) == 0 {
614
- return nil
615
- }
616
-
617
- log .Infoln ("Storing neighbor information..." )
618
-
619
- start := time .Now ()
620
- neighborsCount := 0
621
- i := 0
622
- for p , routingTable := range handler .RoutingTables {
623
- if i % 100 == 0 && i > 0 {
624
- log .Infof ("Stored %d peers and their neighbors" , i )
625
- }
626
- i ++
627
- neighborsCount += len (routingTable .Neighbors )
628
-
629
- neighbors := make ([]peer.ID , len (routingTable .Neighbors ))
630
- for j , n := range routingTable .Neighbors {
631
- neighbors [j ] = n .ID ()
632
- }
633
-
634
- if err := dbc .InsertNeighbors (ctx , p , neighbors , routingTable .ErrorBits ); err != nil {
635
- return fmt .Errorf ("persiting neighbor information: %w" , err )
636
- }
637
- }
638
- log .WithFields (log.Fields {
639
- "duration" : time .Since (start ).String (),
640
- "avg" : fmt .Sprintf ("%.2fms" , time .Since (start ).Seconds ()/ float64 (len (handler .RoutingTables ))* 1000 ),
641
- "peers" : len (handler .RoutingTables ),
642
- "totalNeighbors" : neighborsCount ,
643
- }).Infoln ("Finished storing neighbor information" )
644
- return nil
645
- }
646
-
647
579
// logSummary logs the final results of the crawl.
648
- func logSummary [I core.PeerInfo [I ]](handler * core.CrawlHandler [I ], crawlDuration time.Duration ) {
580
+ func logSummary (summary * core.Summary , crawlDuration time.Duration ) {
581
+ log .Infoln ("" )
582
+ log .Infoln ("" )
649
583
log .Infoln ("Crawl summary:" )
650
584
651
585
log .Infoln ("" )
652
- for err , count := range handler .ConnErrs {
586
+ for err , count := range summary .ConnErrs {
653
587
log .WithField ("count" , count ).WithField ("value" , err ).Infoln ("Dial Error" )
654
588
}
655
589
656
590
log .Infoln ("" )
657
- for err , count := range handler .CrawlErrs {
591
+ for err , count := range summary .CrawlErrs {
658
592
log .WithField ("count" , count ).WithField ("value" , err ).Infoln ("Crawl Error" )
659
593
}
660
594
661
595
log .Infoln ("" )
662
- for agent , count := range handler .AgentVersion {
596
+ for agent , count := range summary .AgentVersion {
663
597
log .WithField ("count" , count ).WithField ("value" , agent ).Infoln ("Agent" )
664
598
}
665
599
log .Infoln ("" )
666
- for protocol , count := range handler .Protocols {
600
+ for protocol , count := range summary .Protocols {
667
601
log .WithField ("count" , count ).WithField ("value" , protocol ).Infoln ("Protocol" )
668
602
}
669
603
log .Infoln ("" )
670
604
log .WithFields (log.Fields {
671
- "crawledPeers" : handler . CrawledPeers ,
605
+ "crawledPeers" : summary . PeersCrawled ,
672
606
"crawlDuration" : crawlDuration .String (),
673
- "dialablePeers" : handler .CrawledPeers - handler .TotalErrors (),
674
- "undialablePeers" : handler .TotalErrors (),
607
+ "dialablePeers" : summary .PeersDialable ,
608
+ "undialablePeers" : summary .PeersUndialable ,
609
+ "remainingPeers" : summary .PeersRemaining ,
675
610
}).Infoln ("Finished crawl" )
676
611
}
0 commit comments