Skip to content

Commit 59c4454

Browse files
authored
fix: broken robots:config normalizing (#234)
1 parent 6a5cc75 commit 59c4454

File tree

13 files changed

+417
-6
lines changed

13 files changed

+417
-6
lines changed

src/runtime/server/composables/getPathRobotConfig.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,13 @@ export function getPathRobotConfig(e: H3Event, options?: { userAgent?: string, s
4848
...nitroApp._robots.ctx.groups.filter(g => g.userAgent.includes('*')),
4949
]
5050
for (const group of groups) {
51-
if (!group._indexable) {
51+
if (group._indexable === false) {
5252
return {
5353
indexable: false,
5454
rule: robotsDisabledValue,
5555
debug: {
5656
source: '/robots.txt',
57-
line: `Disallow: /`,
57+
line: JSON.stringify(group),
5858
},
5959
}
6060
}

src/runtime/server/composables/getSiteRobotConfig.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import type { H3Event } from 'h3'
22
import type { ParsedRobotsTxt } from '../../types'
3+
import { getSiteConfig } from '#site-config/server/composables'
34
import { getSiteIndexable } from '#site-config/server/composables/getSiteIndexable'
4-
import { useSiteConfig } from '#site-config/server/composables/useSiteConfig'
55
import { getQuery } from 'h3'
66
import { useRuntimeConfigNuxtRobots } from './useRuntimeConfigNuxtRobots'
77

@@ -14,7 +14,7 @@ export function getSiteRobotConfig(e: H3Event): { indexable: boolean, hints: str
1414
// allow previewing with ?mockProductionEnv
1515
const queryIndexableEnabled = String(query.mockProductionEnv) === 'true' || query.mockProductionEnv === ''
1616
if ((debug || import.meta.dev)) {
17-
const { _context } = useSiteConfig(e, { debug: debug || import.meta.dev })
17+
const { _context } = getSiteConfig(e, { debug: debug || import.meta.dev })
1818
if (queryIndexableEnabled) {
1919
indexable = true
2020
hints.push('You are mocking a production enviroment with ?mockProductionEnv query.')

src/runtime/server/util.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import type { H3Event } from 'h3'
22
import type { NitroApp } from 'nitropack'
33
import type { HookRobotsConfigContext } from '../types'
44
import { useNitroApp } from 'nitropack/runtime'
5+
import { normalizeGroup } from '../../util'
56
import { useRuntimeConfigNuxtRobots } from './composables/useRuntimeConfigNuxtRobots'
67

78
export async function resolveRobotsTxtContext(e: H3Event | undefined, nitro: NitroApp = useNitroApp()) {
@@ -13,6 +14,7 @@ export async function resolveRobotsTxtContext(e: H3Event | undefined, nitro: Nit
1314
...JSON.parse(JSON.stringify({ groups, sitemaps })),
1415
}
1516
await nitro.hooks.callHook('robots:config', generateRobotsTxtCtx)
17+
generateRobotsTxtCtx.groups = generateRobotsTxtCtx.groups.map(normalizeGroup)
1618
nitro._robots.ctx = generateRobotsTxtCtx
1719
return generateRobotsTxtCtx
1820
}

src/runtime/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ export interface RobotsGroupResolved {
7979
// runtime optimization
8080
_indexable?: boolean
8181
_rules?: { pattern: string, allow: boolean }[]
82+
_normalized?: boolean
8283
}
8384

8485
export interface HookRobotsTxtContext {

src/util.ts

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,18 @@ export function asArray(v: any) {
262262
return typeof v === 'undefined' ? [] : (Array.isArray(v) ? v : [v])
263263
}
264264

265-
export function normalizeGroup(group: RobotsGroupInput): RobotsGroupResolved {
265+
export function normalizeGroup(group: RobotsGroupInput | RobotsGroupResolved): RobotsGroupResolved {
266+
// quick renormalization check
267+
if ((group as RobotsGroupResolved)._normalized) {
268+
const resolvedGroup = group as RobotsGroupResolved
269+
const disallow = asArray(resolvedGroup.disallow) // we can have empty disallow
270+
resolvedGroup._indexable = !disallow.includes('/')
271+
resolvedGroup._rules = [
272+
...resolvedGroup.disallow.filter(Boolean).map(r => ({ pattern: r, allow: false })),
273+
...resolvedGroup.allow.map(r => ({ pattern: r, allow: true })),
274+
]
275+
return resolvedGroup
276+
}
266277
const disallow = asArray(group.disallow) // we can have empty disallow
267278
const allow = asArray(group.allow).filter(rule => Boolean(rule))
268279
const contentUsage = asArray(group.contentUsage).filter(rule => Boolean(rule))
@@ -272,11 +283,12 @@ export function normalizeGroup(group: RobotsGroupInput): RobotsGroupResolved {
272283
disallow,
273284
allow,
274285
contentUsage,
275-
_indexable: !disallow.includes((rule: string) => rule === '/'),
286+
_indexable: !disallow.includes('/'),
276287
_rules: [
277288
...disallow.filter(Boolean).map(r => ({ pattern: r, allow: false })),
278289
...allow.map(r => ({ pattern: r, allow: true })),
279290
],
291+
_normalized: true,
280292
}
281293
}
282294

test/e2e/hook-config.test.ts

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
import { createResolver } from '@nuxt/kit'
2+
import { setup } from '@nuxt/test-utils'
3+
import { describe, expect, it } from 'vitest'
4+
5+
const { resolve } = createResolver(import.meta.url)
6+
7+
process.env.NODE_ENV = 'production'
8+
9+
describe('robots:config hook - issue #233', async () => {
10+
await setup({
11+
rootDir: resolve('../../.playground'),
12+
build: true,
13+
server: true,
14+
nuxtConfig: {
15+
nitro: {
16+
plugins: [],
17+
},
18+
hooks: {
19+
'nitro:config': function (nitroConfig: any) {
20+
nitroConfig.plugins = nitroConfig.plugins || []
21+
nitroConfig.plugins.push(resolve('../fixtures/hook-config/server/plugins/robots.ts'))
22+
},
23+
},
24+
},
25+
})
26+
27+
it('generates robots.txt with groups from hook', async () => {
28+
const robotsTxt = await $fetch('/robots.txt')
29+
expect(robotsTxt).toContain('Disallow: /_cwa/*')
30+
expect(robotsTxt).toContain('AhrefsBot')
31+
})
32+
33+
it('should NOT block indexable pages when groups are added via hook', async () => {
34+
// This test demonstrates the bug: pages that should be indexable
35+
// are incorrectly marked as non-indexable because groups added via
36+
// the hook are missing the _indexable property
37+
const { headers: indexHeaders } = await $fetch.raw('/', {
38+
headers: {
39+
'User-Agent': 'Mozilla/5.0',
40+
},
41+
})
42+
43+
// This page should NOT have noindex header because:
44+
// 1. The disallow rule is for /_cwa/* which doesn't match /
45+
// 2. The AhrefsBot rule only applies to AhrefsBot user agent, not Mozilla
46+
expect(indexHeaders.get('x-robots-tag')).toContain('index')
47+
expect(indexHeaders.get('x-robots-tag')).not.toContain('noindex')
48+
})
49+
50+
it('should correctly block paths matching disallow patterns', async () => {
51+
// This should be blocked by the /_cwa/* rule even though page doesn't exist
52+
// We test with ignoreResponseError to capture headers from 404 responses
53+
const { headers } = await $fetch.raw('/_cwa/test', {
54+
headers: {
55+
'User-Agent': 'Mozilla/5.0',
56+
},
57+
ignoreResponseError: true,
58+
})
59+
60+
expect(headers.get('x-robots-tag')).toMatchInlineSnapshot(`"noindex, nofollow"`)
61+
})
62+
63+
it('should block AhrefsBot from all paths', async () => {
64+
const { headers: indexHeaders } = await $fetch.raw('/', {
65+
headers: {
66+
'User-Agent': 'AhrefsBot',
67+
},
68+
})
69+
70+
// AhrefsBot should be blocked everywhere
71+
expect(indexHeaders.get('x-robots-tag')).toMatchInlineSnapshot(`"noindex, nofollow"`)
72+
})
73+
74+
// Edge case: Multiple hook calls shouldn't cause issues
75+
it('should handle multiple hook calls without breaking normalization', async () => {
76+
// Second request - the hook might be called again depending on caching
77+
const { headers } = await $fetch.raw('/api/test', {
78+
headers: {
79+
'User-Agent': 'Mozilla/5.0',
80+
},
81+
ignoreResponseError: true,
82+
})
83+
84+
// Should still work correctly on subsequent requests
85+
expect(headers.get('x-robots-tag')).toBeDefined()
86+
})
87+
88+
// Edge case: Empty user agent header
89+
it('should handle requests with no user agent gracefully', async () => {
90+
const { headers } = await $fetch.raw('/', {
91+
headers: {
92+
// No User-Agent header
93+
},
94+
})
95+
96+
// Should still apply rules (defaults to * user agent)
97+
expect(headers.get('x-robots-tag')).toBeDefined()
98+
})
99+
100+
// Edge case: Case sensitivity in user agent matching
101+
it('should handle user agent case variations', async () => {
102+
const tests = [
103+
{ ua: 'ahrefsbot', desc: 'lowercase' },
104+
{ ua: 'AHREFSBOT', desc: 'uppercase' },
105+
{ ua: 'AhRefsBot', desc: 'mixed case' },
106+
]
107+
108+
for (const { ua } of tests) {
109+
const { headers } = await $fetch.raw('/', {
110+
headers: {
111+
'User-Agent': ua,
112+
},
113+
})
114+
115+
// User agent matching should be case-insensitive
116+
expect(headers.get('x-robots-tag')).toContain('noindex')
117+
}
118+
})
119+
})
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import { defineNitroPlugin } from '#imports'
2+
3+
export default defineNitroPlugin((nitroApp) => {
4+
nitroApp.hooks.hook('robots:config', async (ctx) => {
5+
// Edge case 1: Add group with no disallow/allow (invalid but shouldn't crash)
6+
ctx.groups.push({
7+
userAgent: 'EdgeCaseBot1',
8+
} as any)
9+
10+
// Edge case 2: Add group that's already normalized (double normalization test)
11+
ctx.groups.push({
12+
userAgent: ['EdgeCaseBot2'],
13+
disallow: ['/'],
14+
allow: [],
15+
_indexable: false,
16+
_rules: [{ pattern: '/', allow: false }],
17+
} as any)
18+
19+
// Edge case 3: Modify existing groups from config
20+
// This tests if normalization preserves modifications
21+
if (ctx.groups.length > 0) {
22+
ctx.groups[0].disallow?.push('/hook-added-path')
23+
}
24+
25+
// Edge case 4: Add group with "/" mixed with other patterns
26+
ctx.groups.push({
27+
userAgent: 'EdgeCaseBot3',
28+
disallow: ['/admin', '/', '/api'],
29+
})
30+
31+
// Edge case 5: Add group with non-array values (tests asArray conversion)
32+
ctx.groups.push({
33+
userAgent: 'EdgeCaseBot4',
34+
disallow: '/single-string-disallow',
35+
allow: '/single-string-allow',
36+
} as any)
37+
38+
// Edge case 6: Add group with special characters and whitespace
39+
ctx.groups.push({
40+
userAgent: [' Bot With Spaces ', 'Bot*With?Special[Chars]'],
41+
disallow: [' /path-with-spaces ', '/normal'],
42+
} as any)
43+
44+
// Edge case 7: Completely remove groups (extreme case)
45+
// Commented out because it would break robots.txt generation
46+
// ctx.groups = []
47+
48+
// Edge case 8: Add duplicate user agents
49+
ctx.groups.push({
50+
userAgent: '*', // Duplicate of default
51+
disallow: ['/duplicate-test'],
52+
})
53+
})
54+
})
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import NuxteRobots from '../../../src/module'
2+
3+
export default defineNuxtConfig({
4+
modules: [NuxteRobots],
5+
compatibilityDate: '2024-04-03',
6+
site: {
7+
url: 'https://example.com',
8+
},
9+
})
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
<template>
2+
<div>About Page</div>
3+
</template>
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
<template>
2+
<div>Index Page</div>
3+
</template>

0 commit comments

Comments
 (0)