Skip to content

Commit

Permalink
Merge #93: ncdumpzone: Add mode for a URL list.
Browse files Browse the repository at this point in the history
39fbbc0 ncdumpzone: Add mode for a URL list. (JeremyRand)

Pull request description:

  This PR is intended to facilitate YaCy crawl jobs.

ACKs for commit 39fbbc:

Tree-SHA512: 070cfd9ad1ee4bc59286ec2c0c120379f30b4cfd761bd3eecd5e2a028b70091171b6dd054acb3c9205800d454187c88fed9e1be9c6591e087d808dd49e2ecda4
  • Loading branch information
JeremyRand committed Aug 2, 2019
2 parents d569a86 + 39fbbc0 commit d9c24e2
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 2 deletions.
10 changes: 9 additions & 1 deletion ncdumpzone/ncdumpzone.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
extratypes "github.com/hlandau/ncbtcjsontypes"
"github.com/namecoin/ncdns/namecoin"
"github.com/namecoin/ncdns/ncdomain"
"github.com/namecoin/ncdns/rrtourl"
"github.com/namecoin/ncdns/tlsoverridefirefox"
"github.com/namecoin/ncdns/util"
)
Expand All @@ -29,6 +30,12 @@ func dumpRR(rr dns.RR, dest io.Writer, format string) error {
return err
}
fmt.Fprint(dest, result)
case "url-list":
result, err := rrtourl.URLsFromRR(rr)
if err != nil {
return err
}
fmt.Fprint(dest, result)
}

return nil
Expand Down Expand Up @@ -77,7 +84,8 @@ func dumpName(item *extratypes.NameFilterItem, conn namecoin.Conn,
// Dump extracts all domain names from conn, formats them according to the
// specified format, and writes the result to dest.
func Dump(conn namecoin.Conn, dest io.Writer, format string) error {
if format != "zonefile" && format != "firefox-override" {
if format != "zonefile" && format != "firefox-override" &&
format != "url-list" {
return fmt.Errorf("Invalid \"format\" argument: %s", format)
}

Expand Down
3 changes: 2 additions & 1 deletion ncdumpzone/ncdumpzone/ncdumpzone.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ var (
"Namecoin RPC password")
formatFlag = cflag.String(flagGroup, "format", "zonefile", "Output "+
"format. \"zonefile\" = DNS zone file. "+
"\"firefox-override\" = Firefox cert_override.txt format.")
"\"firefox-override\" = Firefox cert_override.txt format. "+
"\"url-list\" = URL list.")
)

var conn namecoin.Conn
Expand Down
41 changes: 41 additions & 0 deletions rrtourl/rrtourl.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package rrtourl

import (
"fmt"
"strings"

"github.com/miekg/dns"
"github.com/namecoin/ncdns/util"
)

// URLsFromRR returns a list of URL's derived from rr, which is suitable for
// passing to a search engine crawler like YaCy. If no such list can be
// derived, returns an empty string.
func URLsFromRR(rr dns.RR) (string, error) {
header := rr.Header()
if header == nil {
return "", fmt.Errorf("Nil RR header")
}

hostFQDN := header.Name

// Remove things like "_443._tcp" in TLSA records
for strings.HasPrefix(hostFQDN, "_") {
_, hostFQDN = util.SplitDomainTail(hostFQDN)
}

// Remove the trailing period from FQDN's
host := strings.TrimSuffix(hostFQDN, ".")

// Remove wildcard subdomains (later we assume that they might be "www.")
host = strings.TrimPrefix(host, "*.")

return "http://" + host + "/" + "\n" +
"http://www." + host + "/" + "\n" +
"https://" + host + "/" + "\n" +
"https://www." + host + "/" + "\n" +
"ftp://" + host + "/" + "\n" +
"ftp://www." + host + "/" + "\n" +
"ftps://" + host + "/" + "\n" +
"ftps://www." + host + "/" + "\n", nil
}

0 comments on commit d9c24e2

Please sign in to comment.