Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions .github/workflows/build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
steps:
- uses: actions/checkout@v6
- uses: projectdiscovery/actions/setup/go@v1
- uses: projectdiscovery/actions/golangci-lint@v1
- uses: projectdiscovery/actions/golangci-lint/v2@v1

build:
name: Test Builds
Expand All @@ -25,16 +25,13 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macOS-latest]
go-version: [1.24.x]
steps:
- name: Set up Go
uses: actions/setup-go@v6
with:
go-version: ${{ matrix.go-version }}

- name: Check out code
uses: actions/checkout@v6

- name: Set up Go
uses: projectdiscovery/actions/setup/go@v1

- name: Test
run: go test ./...
working-directory: .
Expand Down
8 changes: 3 additions & 5 deletions .github/workflows/functional-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,12 @@ jobs:
matrix:
os: [ubuntu-latest, windows-latest, macOS-latest]
steps:
- name: Set up Go
uses: actions/setup-go@v6
with:
go-version: 1.24.x

- name: Check out code
uses: actions/checkout@v6

- name: Set up Go
uses: projectdiscovery/actions/setup/go@v1

- name: Functional Tests
run: |
chmod +x run.sh
Expand Down
45 changes: 18 additions & 27 deletions .github/workflows/release-binary.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,73 +12,64 @@ jobs:
steps:
- name: "Check out code"
uses: actions/checkout@v6
with:
with:
fetch-depth: 0

- name: "Set up Go"
uses: actions/setup-go@v6
with:
go-version: 1.24.x
cache: true

uses: projectdiscovery/actions/setup/go@v1

- name: "Create release on GitHub"
uses: goreleaser/goreleaser-action@v7
with:
with:
args: "release -f .goreleaser/mac.yml --clean"
version: latest
workdir: .
env:
env:
GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"

build-windows:
runs-on: windows-latest-8-cores
steps:
- name: "Check out code"
uses: actions/checkout@v6
with:
with:
fetch-depth: 0

- name: "Set up Go"
uses: actions/setup-go@v6
with:
go-version: 1.24.x
cache: true

uses: projectdiscovery/actions/setup/go@v1

- name: "Create release on GitHub"
uses: goreleaser/goreleaser-action@v7
with:
with:
args: "release -f .goreleaser/windows.yml --clean"
version: latest
workdir: .
env:
env:
GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"

build-linux:
runs-on: ubuntu-latest-16-cores
steps:
- name: "Check out code"
uses: actions/checkout@v6
with:
with:
fetch-depth: 0

- name: "Set up Go"
uses: actions/setup-go@v6
with:
go-version: 1.24.x
cache: true
uses: projectdiscovery/actions/setup/go@v1

# todo: musl compatible?
- name: Install Dependences
run: sudo apt install gcc-aarch64-linux-gnu

- name: "Create release on GitHub"
uses: goreleaser/goreleaser-action@v7
with:
with:
args: "release -f .goreleaser/linux.yml --clean"
version: latest
workdir: .
env:
env:
GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
SLACK_WEBHOOK: "${{ secrets.RELEASE_SLACK_WEBHOOK }}"
DISCORD_WEBHOOK_ID: "${{ secrets.DISCORD_WEBHOOK_ID }}"
DISCORD_WEBHOOK_TOKEN: "${{ secrets.DISCORD_WEBHOOK_TOKEN }}"
DISCORD_WEBHOOK_TOKEN: "${{ secrets.DISCORD_WEBHOOK_TOKEN }}"
8 changes: 2 additions & 6 deletions .github/workflows/release-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,7 @@ jobs:
fetch-depth: 0

- name: Set up Go
uses: actions/setup-go@v6
with:
go-version: 1.24.x
uses: projectdiscovery/actions/setup/go@v1

# todo: musl compatible?
- name: Install Dependences
Expand All @@ -62,9 +60,7 @@ jobs:
fetch-depth: 0

- name: Set up Go
uses: actions/setup-go@v6
with:
go-version: 1.24.x
uses: projectdiscovery/actions/setup/go@v1

- name: release test
uses: projectdiscovery/actions/goreleaser@v1
Expand Down
8 changes: 3 additions & 5 deletions .github/workflows/security-crawl-maze-score.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,12 @@ jobs:
name: Run Scoring
runs-on: ubuntu-latest-16-cores
steps:
- name: Set up Go
uses: actions/setup-go@v6
with:
go-version: 1.24.x

- name: Check out code
uses: actions/checkout@v6

- name: Set up Go
uses: projectdiscovery/actions/setup/go@v1

- name: Build
run: go build .
working-directory: cmd/katana/
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM golang:1.25.5-alpine AS build-env
FROM golang:1.25.7-alpine AS build-env
RUN apk add --no-cache git gcc musl-dev
WORKDIR /app
COPY . /app
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@

## Installation

katana requires Go 1.24+ to install successfully. If you encounter any installation issues, we recommend trying with the latest available version of Go, as the minimum required version may have changed. Run the command below or download a pre-compiled binary from the [release page](https://github.com/projectdiscovery/katana/releases).
katana requires Go 1.25+ to install successfully. If you encounter any installation issues, we recommend trying with the latest available version of Go, as the minimum required version may have changed. Run the command below or download a pre-compiled binary from the [release page](https://github.com/projectdiscovery/katana/releases).

```console
CGO_ENABLED=1 go install github.com/projectdiscovery/katana/cmd/katana@latest
Expand Down Expand Up @@ -142,6 +142,7 @@ CONFIGURATION:
-fst, -filter-similar-threshold int number of distinct values before a path position is treated as parameter (default 10)
-tlsi, -tls-impersonate enable experimental client hello (ja3) tls randomization
-dr, -disable-redirects disable following redirects (default false)
-kb, -knowledge-base enable knowledge base classification

DEBUG:
-health-check, -hc run diagnostic check up
Expand Down Expand Up @@ -180,6 +181,7 @@ FILTER:
-mdc, -match-condition string match response with dsl based condition
-fdc, -filter-condition string filter response with dsl based condition
-duf, -disable-unique-filter disable duplicate content filtering
-fpt, -filter-page-type string[] filter response with page type (e.g. error,captcha,parked)

RATE-LIMIT:
-c, -concurrency int number of concurrent fetchers to use (default 10)
Expand Down
2 changes: 2 additions & 0 deletions cmd/katana/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ pipelines offering both headless and non-headless crawling.`)
flagSet.BoolVarP(&options.TlsImpersonate, "tls-impersonate", "tlsi", false, "enable experimental client hello (ja3) tls randomization"),
flagSet.BoolVarP(&options.DisableRedirects, "disable-redirects", "dr", false, "disable following redirects (default false)"),
flagSet.BoolVarP(&options.PathClimb, "path-climb", "pc", false, "enable path climb (auto crawl parent paths)"),
flagSet.BoolVarP(&options.KnowledgeBase, "knowledge-base", "kb", false, "enable knowledge base classification"),
)

flagSet.CreateGroup("debug", "Debug",
Expand Down Expand Up @@ -217,6 +218,7 @@ pipelines offering both headless and non-headless crawling.`)
flagSet.StringVarP(&options.OutputMatchCondition, "match-condition", "mdc", "", "match response with dsl based condition"),
flagSet.StringVarP(&options.OutputFilterCondition, "filter-condition", "fdc", "", "filter response with dsl based condition"),
flagSet.BoolVarP(&options.DisableUniqueFilter, "disable-unique-filter", "duf", false, "disable duplicate content filtering"),
flagSet.StringSliceVarP(&options.FilterPageType, "filter-page-type", "fpt", nil, "filter response with page type (e.g. error,captcha,parked)", goflags.CommaSeparatedStringSliceOptions),
)

flagSet.CreateGroup("ratelimit", "Rate-Limit",
Expand Down
7 changes: 3 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
module github.com/projectdiscovery/katana

go 1.24.1

toolchain go1.24.2
go 1.25.7

require (
github.com/BishopFox/jsluice v0.0.0-20240110145140-0ddfab153e06
github.com/PuerkitoBio/goquery v1.11.0
github.com/adrianbrad/queue v1.3.0
github.com/dominikbraun/graph v0.23.0
github.com/go-rod/rod v0.116.2
github.com/happyhackingspace/dit v0.0.14
github.com/hashicorp/golang-lru/v2 v2.0.7
github.com/json-iterator/go v1.1.12
github.com/lmittmann/tint v1.0.6
Expand All @@ -34,7 +33,7 @@ require (
github.com/stretchr/testify v1.11.1
github.com/valyala/fasttemplate v1.2.2
go.uber.org/multierr v1.11.0
golang.org/x/net v0.50.0
golang.org/x/net v0.51.0
gopkg.in/yaml.v3 v3.0.1
)

Expand Down
6 changes: 4 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,8 @@ github.com/gosimple/slug v1.15.0 h1:wRZHsRrRcs6b0XnxMUBM6WK1U1Vg5B0R7VkIf1Xzobo=
github.com/gosimple/slug v1.15.0/go.mod h1:UiRaFH+GEilHstLUmcBgWcI42viBN7mAb818JrYOeFQ=
github.com/gosimple/unidecode v1.0.1 h1:hZzFTMMqSswvf0LBJZCZgThIZrpDHFXux9KeGmn6T/o=
github.com/gosimple/unidecode v1.0.1/go.mod h1:CP0Cr1Y1kogOtx0bJblKzsVWrqYaqfNOnHzpgWw4Awc=
github.com/happyhackingspace/dit v0.0.14 h1:rkIu0HuFqvqr8F2PJgG0F+lx6DbX/tQE1hXKwIF2NQQ=
github.com/happyhackingspace/dit v0.0.14/go.mod h1:+WeAxrX7QYeiDmXLVaDgrqpyfD4O/sHlOL4wtbiIpUQ=
github.com/hashicorp/go-version v1.8.0 h1:KAkNb1HAiZd1ukkxDFGmokVZe1Xy9HG6NUp+bPle2i4=
github.com/hashicorp/go-version v1.8.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
Expand Down Expand Up @@ -573,8 +575,8 @@ golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
golang.org/x/net v0.50.0 h1:ucWh9eiCGyDR3vtzso0WMQinm2Dnt8cFMuQa9K33J60=
golang.org/x/net v0.50.0/go.mod h1:UgoSli3F/pBgdJBHCTc+tp3gmrU4XswgGRgtnwWTfyM=
golang.org/x/net v0.51.0 h1:94R/GTO7mt3/4wIKpcR5gkGmRLOuE/2hNGeWq/GBIFo=
golang.org/x/net v0.51.0/go.mod h1:aamm+2QF5ogm02fjy5Bb7CQ0WMt1/WVM7FtyaTLlA9Y=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
Expand Down
17 changes: 9 additions & 8 deletions pkg/engine/common/base.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,14 +259,15 @@ func (s *Shared) NewCrawlSessionWithURL(URL string) (*CrawlSession, error) {
technologyKeys = mapsutil.GetKeys(technologies)
}
navigationResponse := &navigation.Response{
Depth: depth + 1,
RootHostname: hostname,
Resp: resp,
Body: string(body),
Reader: reader,
Technologies: technologyKeys,
StatusCode: resp.StatusCode,
Headers: utils.FlattenHeaders(resp.Header),
Depth: depth + 1,
RootHostname: hostname,
Resp: resp,
Body: string(body),
Reader: reader,
Technologies: technologyKeys,
StatusCode: resp.StatusCode,
Headers: utils.FlattenHeaders(resp.Header),
KnowledgeBase: s.Options.ClassifyPage(string(body)),
}
navigationRequests := s.Options.Parser.ParseResponse(navigationResponse)
s.Enqueue(queue, navigationRequests...)
Expand Down
5 changes: 5 additions & 0 deletions pkg/engine/headless/captcha/captcha.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"

"github.com/go-rod/rod"
ditcaptcha "github.com/happyhackingspace/dit/captcha"
"github.com/projectdiscovery/gologger"
captchajs "github.com/projectdiscovery/katana/pkg/engine/headless/captcha/js"
)
Expand All @@ -25,6 +26,10 @@ func NewHandler(solverProvider, apiKey string) (*Handler, error) {
}

func (h *Handler) HandleIfCaptcha(ctx context.Context, page *rod.Page, pageHTML string) (bool, error) {
if ct := ditcaptcha.DetectCaptchaInHTML(pageHTML); ct == ditcaptcha.CaptchaTypeNone {
return false, nil
}

info, err := Identify(page)
if err != nil {
gologger.Debug().Msgf("captcha identification failed: %s", err)
Expand Down
40 changes: 40 additions & 0 deletions pkg/engine/headless/captcha/identify_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,50 @@ package captcha
import (
"testing"

ditcaptcha "github.com/happyhackingspace/dit/captcha"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestDetectCaptchaWithDit(t *testing.T) {
tests := []struct {
name string
html string
want ditcaptcha.CaptchaType
}{
{
name: "recaptcha",
html: `<html><body><div class="g-recaptcha" data-sitekey="6Lc"></div>
<script src="https://www.google.com/recaptcha/api.js"></script></body></html>`,
want: ditcaptcha.CaptchaTypeRecaptcha,
},
{
name: "turnstile",
html: `<html><body><div class="cf-turnstile" data-sitekey="0x4AAA"></div>
<script src="https://challenges.cloudflare.com/turnstile/v0/api.js"></script></body></html>`,
want: ditcaptcha.CaptchaTypeTurnstile,
},
{
name: "hcaptcha",
html: `<html><body><div class="h-captcha" data-sitekey="abc"></div>
<script src="https://js.hcaptcha.com/1/api.js"></script></body></html>`,
want: ditcaptcha.CaptchaTypeHCaptcha,
},
{
name: "no captcha",
html: `<html><body><h1>Hello</h1></body></html>`,
want: ditcaptcha.CaptchaTypeNone,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := ditcaptcha.DetectCaptchaInHTML(tt.html)
assert.Equal(t, tt.want, got)
})
}
}

func TestIdentify(t *testing.T) {
browser := setupBrowser(t)

Expand Down
1 change: 1 addition & 0 deletions pkg/engine/headless/headless.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ func (h *Headless) Crawl(URL string) error {
}

if rr.Response != nil {
rr.Response.KnowledgeBase = h.options.ClassifyPage(rr.Response.Body)
rr.Response.Raw = ""
rr.Response.Body = ""
}
Expand Down
1 change: 1 addition & 0 deletions pkg/engine/hybrid/crawl.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ func (c *Crawler) navigateRequest(s *common.CrawlSession, request *navigation.Re
Headers: utils.FlattenHeaders(headers),
Raw: string(rawBytesResponse),
ContentLength: httpresp.ContentLength,
KnowledgeBase: c.Options.ClassifyPage(string(body)),
}
response.ContentLength = resp.ContentLength

Expand Down
2 changes: 2 additions & 0 deletions pkg/engine/standard/crawl.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ func (c *Crawler) makeRequest(s *common.CrawlSession, request *navigation.Reques
response.Technologies = mapsutil.GetKeys(technologies)
}

response.KnowledgeBase = c.Options.ClassifyPage(string(data))

// Restore the read data to resp.Body for further use.
resp.Body = io.NopCloser(strings.NewReader(string(data)))

Expand Down
Loading
Loading