diff --git a/.gitignore b/.gitignore index 4cc66d942..f4983d5e7 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ test-results.xml .aws-sam/ env.sh output.txt +env-dev.sh \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index 1d03674b9..3f6ad4221 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1005,7 +1005,6 @@ "resolved": "https://registry.npmjs.org/@adobe/helix-universal/-/helix-universal-5.2.3.tgz", "integrity": "sha512-13lpLUAyPvitjNh6eVZKjh8+kE+0LGHElyIaV0X7wOqUn371+BR9LrfS5seOpcIDAFNpEbL4T68rgFa1/aQrbA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@adobe/fetch": "4.2.3", "aws4": "1.13.2" @@ -3923,7 +3922,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.726.1.tgz", "integrity": "sha512-qh9Q9Vu1hrM/wMBOBIaskwnE4GTFaZu26Q6WHwyWNfj7J8a40vBxpW16c2vYXHLBtwRKM1be8uRLkmDwghpiNw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -4806,7 +4804,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -6106,7 +6103,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.726.1.tgz", "integrity": "sha512-qh9Q9Vu1hrM/wMBOBIaskwnE4GTFaZu26Q6WHwyWNfj7J8a40vBxpW16c2vYXHLBtwRKM1be8uRLkmDwghpiNw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -6988,7 +6984,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -8243,7 +8238,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.726.1.tgz", "integrity": "sha512-qh9Q9Vu1hrM/wMBOBIaskwnE4GTFaZu26Q6WHwyWNfj7J8a40vBxpW16c2vYXHLBtwRKM1be8uRLkmDwghpiNw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -9126,7 +9120,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -9548,7 +9541,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sso-oidc/-/client-sso-oidc-3.716.0.tgz", "integrity": "sha512-lA4IB9FzR2KjH7EVCo+mHGFKqdViVyeBQEIX9oVratL/l7P0bMS1fMwgfHOc3ACazqNxBxDES7x08ZCp32y6Lw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -9602,7 +9594,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.716.0.tgz", "integrity": "sha512-i4SVNsrdXudp8T4bkm7Fi3YWlRnvXCSwvNDqf6nLqSJxqr4CN3VlBELueDyjBK7TAt453/qSif+eNx+bHmwo4Q==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -10841,7 +10832,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sso-oidc/-/client-sso-oidc-3.716.0.tgz", "integrity": "sha512-lA4IB9FzR2KjH7EVCo+mHGFKqdViVyeBQEIX9oVratL/l7P0bMS1fMwgfHOc3ACazqNxBxDES7x08ZCp32y6Lw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -10895,7 +10885,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.716.0.tgz", "integrity": "sha512-i4SVNsrdXudp8T4bkm7Fi3YWlRnvXCSwvNDqf6nLqSJxqr4CN3VlBELueDyjBK7TAt453/qSif+eNx+bHmwo4Q==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -12168,7 +12157,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sso-oidc/-/client-sso-oidc-3.726.0.tgz", "integrity": "sha512-5JzTX9jwev7+y2Jkzjz0pd1wobB5JQfPOQF3N2DrJ5Pao0/k6uRYwE4NqB0p0HlGrMTDm7xNq7OSPPIPG575Jw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -12222,7 +12210,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.726.1.tgz", "integrity": "sha512-qh9Q9Vu1hrM/wMBOBIaskwnE4GTFaZu26Q6WHwyWNfj7J8a40vBxpW16c2vYXHLBtwRKM1be8uRLkmDwghpiNw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -12814,7 +12801,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-dynamodb/-/client-dynamodb-3.721.0.tgz", "integrity": "sha512-TGENpPbk6xtbLH07XZVZlhmK+SLs3stHLIQ/lZXZ8stZKT9//kA19P1E5+LNCmJFSLNxSj5ziHFOv/CzQN9U9g==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -15611,7 +15597,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.721.0.tgz", "integrity": "sha512-1Pv8F02hQFmPZs7WtGfQNlnInbG1lLzyngJc/MlZ3Ld2fIoWjaWp7bJWgYAjnzHNEuDtCabWJvIfePdRqsbYoA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -18693,7 +18678,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -18942,7 +18926,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-dynamodb/-/client-dynamodb-3.859.0.tgz", "integrity": "sha512-Bt840uICsGcn7IFewif8ARCF0CxtdTx9DX/LfUGRI+SVZcqyeEccmH2JJRRzThtEzKTXr+rCN6yaNB3c4RQY2g==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -22189,7 +22172,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -23989,7 +23971,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -24965,7 +24946,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-dynamodb/-/client-dynamodb-3.859.0.tgz", "integrity": "sha512-Bt840uICsGcn7IFewif8ARCF0CxtdTx9DX/LfUGRI+SVZcqyeEccmH2JJRRzThtEzKTXr+rCN6yaNB3c4RQY2g==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -27257,7 +27237,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -27574,7 +27553,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.726.1.tgz", "integrity": "sha512-qh9Q9Vu1hrM/wMBOBIaskwnE4GTFaZu26Q6WHwyWNfj7J8a40vBxpW16c2vYXHLBtwRKM1be8uRLkmDwghpiNw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -28515,7 +28493,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.3.tgz", "integrity": "sha512-bltsLAr4juMJJ2tT5/L/CtwUGIvHihtPe6SO/z3jjOD73PHhOYxcuwCMFFyTbTy5S4WThJO32oZk7r+pg3ZoCQ==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -30343,7 +30320,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -30432,7 +30408,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-dynamodb/-/client-dynamodb-3.859.0.tgz", "integrity": "sha512-Bt840uICsGcn7IFewif8ARCF0CxtdTx9DX/LfUGRI+SVZcqyeEccmH2JJRRzThtEzKTXr+rCN6yaNB3c4RQY2g==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -31973,7 +31948,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.726.1.tgz", "integrity": "sha512-qh9Q9Vu1hrM/wMBOBIaskwnE4GTFaZu26Q6WHwyWNfj7J8a40vBxpW16c2vYXHLBtwRKM1be8uRLkmDwghpiNw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -32952,7 +32926,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.3.tgz", "integrity": "sha512-bltsLAr4juMJJ2tT5/L/CtwUGIvHihtPe6SO/z3jjOD73PHhOYxcuwCMFFyTbTy5S4WThJO32oZk7r+pg3ZoCQ==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -33160,7 +33133,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-dynamodb/-/client-dynamodb-3.859.0.tgz", "integrity": "sha512-Bt840uICsGcn7IFewif8ARCF0CxtdTx9DX/LfUGRI+SVZcqyeEccmH2JJRRzThtEzKTXr+rCN6yaNB3c4RQY2g==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -36414,7 +36386,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.3.tgz", "integrity": "sha512-bltsLAr4juMJJ2tT5/L/CtwUGIvHihtPe6SO/z3jjOD73PHhOYxcuwCMFFyTbTy5S4WThJO32oZk7r+pg3ZoCQ==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -37628,7 +37599,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.726.1.tgz", "integrity": "sha512-qh9Q9Vu1hrM/wMBOBIaskwnE4GTFaZu26Q6WHwyWNfj7J8a40vBxpW16c2vYXHLBtwRKM1be8uRLkmDwghpiNw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -38569,7 +38539,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -39925,7 +39894,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.726.1.tgz", "integrity": "sha512-qh9Q9Vu1hrM/wMBOBIaskwnE4GTFaZu26Q6WHwyWNfj7J8a40vBxpW16c2vYXHLBtwRKM1be8uRLkmDwghpiNw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -40808,7 +40776,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -42064,7 +42031,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.726.1.tgz", "integrity": "sha512-qh9Q9Vu1hrM/wMBOBIaskwnE4GTFaZu26Q6WHwyWNfj7J8a40vBxpW16c2vYXHLBtwRKM1be8uRLkmDwghpiNw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -42947,7 +42913,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -44183,7 +44148,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.726.1.tgz", "integrity": "sha512-qh9Q9Vu1hrM/wMBOBIaskwnE4GTFaZu26Q6WHwyWNfj7J8a40vBxpW16c2vYXHLBtwRKM1be8uRLkmDwghpiNw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -45066,7 +45030,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -46315,7 +46278,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.726.1.tgz", "integrity": "sha512-qh9Q9Vu1hrM/wMBOBIaskwnE4GTFaZu26Q6WHwyWNfj7J8a40vBxpW16c2vYXHLBtwRKM1be8uRLkmDwghpiNw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -47198,7 +47160,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -48733,7 +48694,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-dynamodb/-/client-dynamodb-3.917.0.tgz", "integrity": "sha512-PPOyDwlg59ESbj/Ur8VKRvlW6GRViThykNCg5qjCuejiEQ8F1j+0yPxIa+H0x6iklDZF/+AiERtLpmZh3UjD0g==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -50741,7 +50701,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sso-oidc/-/client-sso-oidc-3.726.0.tgz", "integrity": "sha512-5JzTX9jwev7+y2Jkzjz0pd1wobB5JQfPOQF3N2DrJ5Pao0/k6uRYwE4NqB0p0HlGrMTDm7xNq7OSPPIPG575Jw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -53935,7 +53894,6 @@ "integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@babel/code-frame": "^7.27.1", "@babel/generator": "^7.28.5", @@ -54344,7 +54302,6 @@ } ], "license": "MIT", - "peer": true, "engines": { "node": ">=18" }, @@ -54367,7 +54324,6 @@ } ], "license": "MIT", - "peer": true, "engines": { "node": ">=18" } @@ -55762,7 +55718,6 @@ "integrity": "sha512-jOT8V1Ba5BdC79sKrRWDdMT5l1R+XNHTPR6CPWzUP2EcfAcvIHZWF0eAbmRcpOOP5gVIwnqNg0C4nvh6Abc3OA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@octokit/auth-token": "^6.0.0", "@octokit/graphql": "^9.0.1", @@ -56059,7 +56014,6 @@ "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==", "dev": true, "license": "Apache-2.0", - "peer": true, "engines": { "node": ">=8.0.0" } @@ -58438,7 +58392,6 @@ "integrity": "sha512-BnOroVl1SgrPLywqxyqdJ4l3S2MsKVLDVxZvjI1Eoe8ev2r3kGDo+PcMihNmDE+6/KjkTubSJnmqGZZjQSBq/g==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.46.2", "@typescript-eslint/types": "8.46.2", @@ -58896,7 +58849,6 @@ "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "dev": true, "license": "MIT", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -58943,7 +58895,6 @@ "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", @@ -59421,7 +59372,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.11.0.tgz", "integrity": "sha512-b7RRs3/twrsCxb113ZgycyaYcXJUQADFMKTiAfzRJu/2hBD2UZkyrjrh8BNTwQ5PUJJmHLoapv1uhpJFk3qKvQ==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@aws/lambda-invoke-store": "^0.0.1", @@ -59819,7 +59769,6 @@ } ], "license": "MIT", - "peer": true, "dependencies": { "baseline-browser-mapping": "^2.8.2", "caniuse-lite": "^1.0.30001741", @@ -60076,7 +60025,6 @@ "integrity": "sha512-aUTnJc/JipRzJrNADXVvpVqi6CO0dn3nx4EVPxijri+fj3LUUDyZQOgVeW54Ob3Y1Xh9Iz8f+CgaCl8v0mn9bA==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=18" } @@ -62339,7 +62287,6 @@ "integrity": "sha512-t5aPOpmtJcZcz5UJyY2GbvpDlsK5E8JqRqoKtfiKE3cNh437KIqfJr3A3AKf5k64NPx6d0G3dno6XDY05PqPtw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -66948,7 +66895,6 @@ "integrity": "sha512-PRsaiG84bK+AMvxziE/lCFss8juXjNaWzVbN5tXAm4XjeaS9NAHhop+PjQxz2A9h8Q4M/xGmzP8vqNwy6JeK0A==", "dev": true, "license": "MIT", - "peer": true, "bin": { "marked": "bin/marked.js" }, @@ -68015,7 +67961,6 @@ "integrity": "sha512-UczzB+0nnwGotYSgllfARAqWCJ5e/skuV2K/l+Zyck/H6pJIhLXuBnz+6vn2i211o7DtbE78HQtsYEKICHGI+g==", "dev": true, "license": "MIT", - "peer": true, "funding": { "type": "opencollective", "url": "https://opencollective.com/mobx" @@ -71037,7 +70982,6 @@ "dev": true, "inBundle": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -72629,7 +72573,6 @@ "integrity": "sha512-tmbWg6W31tQLeB5cdIBOicJDJRR2KzXsV7uSK9iNfLWQ5bIZfxuPEHp7M8wiHyHnn0DD1i7w3Zmin0FtkrwoCQ==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=0.10.0" } @@ -72640,7 +72583,6 @@ "integrity": "sha512-UlbRu4cAiGaIewkPyiRGJk0imDN2T3JjieT6spoL2UeSf5od4n5LB/mQ4ejmxhCFT1tYe8IvaFulzynWovsEFQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "scheduler": "^0.27.0" }, @@ -73367,7 +73309,6 @@ "integrity": "sha512-0OCYLm0AfVilNGukM+w0C4aptITfuW1Mhvmz8LQliLeYbPOTFRCIJzoltWWx/F5zVFe6np9eNatBUHdAvMFeZg==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@semantic-release/commit-analyzer": "^13.0.1", "@semantic-release/error": "^4.0.0", @@ -74798,7 +74739,6 @@ "integrity": "sha512-1v/e3Dl1BknC37cXMhwGomhO8AkYmN41CqyX9xhUDxry1ns3BFQy2lLDRQXJRdVVWB9OHemv/53xaStimvWyuA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@emotion/is-prop-valid": "1.2.2", "@emotion/unitless": "0.8.1", @@ -75504,7 +75444,6 @@ "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -75877,7 +75816,6 @@ "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "dev": true, "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -75976,7 +75914,6 @@ "resolved": "https://registry.npmjs.org/unified/-/unified-11.0.5.tgz", "integrity": "sha512-xKvGhPWw3k84Qjh8bI3ZeJjqnyadK+GEFtazSfZv/rKeTkTjOJho6mFqh2SM96iIcZokxiOpg78GazTSg8+KHA==", "license": "MIT", - "peer": true, "dependencies": { "@types/unist": "^3.0.0", "bail": "^2.0.0", diff --git a/scripts/constants.js b/scripts/constants.js new file mode 100644 index 000000000..672279da2 --- /dev/null +++ b/scripts/constants.js @@ -0,0 +1,153 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/** + * Site configurations for fix checker scripts + */ +export const SITES = [ + { + id: 'da39921f-9a02-41db-b491-02c98330d956', + name: 'bamboohr.com' + }, + { + id: '0983c6da-0dee-45cc-b897-3f1fed6b460b', + name: 'hersheyland.com' + }, + { + id: '68ba8c72-aab1-4a45-8bcc-79368d45caa4', + name: 'wilson.com' + }, + { + id: 'e12c091c-075b-4c94-aab7-398a04412b5c', + name: 'chocolateworld.com' + }, + { + id: 'c2473d89-e997-458d-a86d-b4096649c12b', + name: 'adobe.com' + }, + { + id: '917ca4a0-bb8e-47a8-aa6e-c4eb73defb97', + name: 'theplayers.com' + }, + { + id: '14220f09-7bdd-4c91-9adf-adcbe0adf1df', + name: 'westjet.com' + }, + { + id: 'c63f25a4-5173-4657-bbad-edb710957c92', + name: 'whirlpool.com' + }, + { + id: '0535e81f-f826-465c-8853-3aad7c1b45ec', + name: 'volvotrucks.us' + }, + { + id: '9aedf036-575a-4834-b1bc-28b2aeef5f65', + name: 'volvotrucks.com' + }, + { + id: 'd8db1956-b24c-4ad7-bdb6-6f5a90d89edc', + name: 'business.adobe.com' + }, + { + id: 'cc31e47f-91ee-46d6-999a-4afbc0325339', + name: 'blog.adobe.com' + }, + { + id: '92d24fa2-5e99-4d43-8799-84cba3385ae1', + name: 'qualcomm.com' + }, + { + id: '8f34399d-4442-4545-ad6c-1060980107fb', + name: 'sunstargum.com' + }, + { + id: '542ad116-ed33-448a-a123-06cbcd7c1d4c', + name: 'sunstar.com' + }, + { + id: '60ee28c4-d6cf-4dae-9218-8b0187848e8e', + name: 'sunstar-engineering.com' + }, + { + id: 'b017b216-c6de-4e69-9f91-0202d330f876', + name: 'sunstar-foundation.com' + }, + { + id: '9b12429a-60ed-4e3b-9719-978b1c9d8397', + name: 'breville.com' + }, + { + id: '94d5d687-68b0-457f-aac8-760ed8e9045a', + name: 'crucial.com' + }, + { + id: '7b8f919a-162d-41bb-9869-ef4c7ad057e7', + name: 'micron.com' + }, + { + id: '1baa230b-d2dd-4ece-8803-4f24949faed0', + name: 't-mobile.com' + }, + { + id: '620fbc50-59e3-4027-a915-980ed57a6ee7', + name: 'hongkonghellotech.com' + }, + { + id: 'e043d740-c365-436a-bf30-de5add3d1b7f', + name: 'abbvie.com' + }, + { + id: 'b914bc7b-ec12-460a-899e-42bf252de356', + name: 'continental-tires.com' + }, + { + id: '8e1dc1d3-c71f-419a-9c43-ea22c77a5378', + name: 'continentaltire.com' + }, + { + id: '8c94166c-ad8d-42b4-aafd-e75c2cf95d48', + name: 'bhhs.com' + }, + { + id: '7e7a883c-5009-42dd-bf2e-e4cb3b643503', + name: 'astrazeneca.com' + }, + { + id: '3dc4b29f-b423-4d8e-9f62-9199f0d7598f', + name: 'myastrazeneca.co.uk' + }, + { + id: 'd82b9d5f-4f38-4390-80cc-b55251d420b4', + name: 'humana.com' + }, + { + id: '9c25d632-420a-432e-8a82-555ba90879dd', + name: 'celestyal.com' + }, + { + id: 'a8f00731-a43d-43ec-b348-31f57a5072de', + name: 'jet2.com' + }, + { + id: '04807acc-4aab-4854-9b02-6ed99e9c9f14', + name: 'jet2holidays.com' + }, + { + id: '34bb79c0-c901-4241-929e-a9c930560e03', + name: 'frescopa.coffee' + }, + { + id: 'ca0314e5-43fa-41e2-a947-08c7a3a33256', + name: 'unilever.com' + } +]; diff --git a/scripts/create-fix-entity.js b/scripts/create-fix-entity.js new file mode 100644 index 000000000..aa9bac6a5 --- /dev/null +++ b/scripts/create-fix-entity.js @@ -0,0 +1,213 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/** + * Utility functions for creating fix entities for verified suggestions. + * + * Usage example: + * ```javascript + * import { createDataAccess } from '@adobe/spacecat-shared-data-access'; + * import { createFixEntityForSuggestion } from './create-fix-entity.js'; + * + * // In your fix checker script: + * const dataAccess = createDataAccess(config); + * + * // For a single verified suggestion: + * if (isFixed) { + * await createFixEntityForSuggestion(dataAccess, suggestion, { + * status: 'PENDING', + * logger: this.log + * }); + * } + * + * // For multiple verified suggestions: + * import { createFixEntitiesForSuggestions } from './create-fix-entity.js'; + * + * const fixedSuggestions = results.filter(r => r.isFixed).map(r => r.suggestion); + * const result = await createFixEntitiesForSuggestions(dataAccess, fixedSuggestions, { + * logger: this.log + * }); + * console.log(`Created: ${result.createdItems.length}, Skipped: ${result.skippedItems.length}`); + * ``` + */ + +/** + * Creates a fix entity and fix entity suggestion for a verified fix. + * Prevents duplication by checking if a fix entity already exists for the suggestion. + * + * @param {Object} dataAccess - The data access instance from createDataAccess() + * @param {Suggestion} suggestion - The suggestion object that has been verified as fixed + * @param {Object} options - Optional configuration + * @param {string} options.status - Fix entity status (default: 'PENDING') + * @param {string} options.origin - Fix entity origin (default: 'SPACECAT') + * @param {Object} options.logger - Optional logger object with info/error/debug methods + * @returns {Promise} - Returns the created or existing fix entity + * @throws {Error} - Throws error if creation fails + */ +export async function createFixEntityForSuggestion(dataAccess, suggestion, options = {}) { + const { + status = 'PUBLISHED', + origin = 'spacecat', + logger = null + } = options; + + const log = logger || { + info: () => {}, + error: (msg) => console.error(`[ERROR] ${msg}`), + debug: (msg) => {} + }; + + try { + // Validate inputs + if (!dataAccess) { + throw new Error('dataAccess is required'); + } + + if (!suggestion) { + throw new Error('suggestion is required'); + } + + // Get suggestion details + const suggestionId = suggestion.getId ? suggestion.getId() : suggestion.id; + const opportunityId = suggestion.getOpportunityId ? suggestion.getOpportunityId() : suggestion.opportunityId; + const suggestionType = suggestion.getType ? suggestion.getType() : suggestion.type; + const suggestionData = suggestion.getData ? suggestion.getData() : suggestion.data; + + if (!suggestionId) { + throw new Error('suggestion must have an ID'); + } + + if (!opportunityId) { + throw new Error('suggestion must have an opportunityId'); + } + + if (!suggestionType) { + throw new Error('suggestion must have a type'); + } + + if (!suggestionData || typeof suggestionData !== 'object') { + throw new Error('suggestion.getData() must return a non-empty object'); + } + + // Check if fix entity already exists for this suggestion (prevent duplication) + const { Suggestion: SuggestionCollection, FixEntity: FixEntityCollection } = dataAccess; + + log.debug(`Checking if fix entity already exists for suggestion ${suggestionId}...`); + const existingFixEntitiesResult = await SuggestionCollection.getFixEntitiesBySuggestionId(suggestionId); + + if (existingFixEntitiesResult && existingFixEntitiesResult.data && existingFixEntitiesResult.data.length > 0) { + log.debug(`Fix entity already exists for suggestion ${suggestionId}, skipping creation`); + return existingFixEntitiesResult.data[0]; + } + + log.debug(`Creating new fix entity for suggestion ${suggestionId}...`); + + // Create fix entity + const fixEntity = await FixEntityCollection.create({ + opportunityId, + type: suggestionType, + changeDetails: suggestionData, + status, + origin, + createdAt: suggestion.getUpdatedAt(), + updatedAt: suggestion.getUpdatedAt(), + }); + + log.info(`Created fix entity ${fixEntity.getId()} for suggestion ${suggestionId}`); + + // Link the suggestion to the fix entity using the proper API + log.debug(`Linking suggestion ${suggestionId} to fix entity ${fixEntity.getId()}...`); + const linkResult = await FixEntityCollection.setSuggestionsForFixEntity( + opportunityId, + fixEntity, + [suggestion] + ); + + log.info(`Created fix entity suggestion link: ${suggestionId} -> ${fixEntity.getId()} (created: ${linkResult.createdItems.length}, errors: ${linkResult.errorItems.length})`); + + return fixEntity; + + } catch (error) { + log.error(`Failed to create fix entity for suggestion: ${error.message}`); + throw error; + } +} + +/** + * Batch create fix entities for multiple suggestions. + * Useful when processing multiple verified fixes at once. + * + * @param {Object} dataAccess - The data access instance from createDataAccess() + * @param {Array} suggestions - Array of suggestion objects that have been verified as fixed + * @param {Object} options - Optional configuration (same as createFixEntityForSuggestion) + * @returns {Promise} - Returns object with createdItems and skippedItems arrays + */ +export async function createFixEntitiesForSuggestions(dataAccess, suggestions, options = {}) { + const { + logger = null + } = options; + + const log = logger || { + info: (msg) => console.log(`[INFO] ${msg}`), + error: (msg) => console.error(`[ERROR] ${msg}`), + debug: (msg) => {} + }; + + const createdItems = []; + const skippedItems = []; + const errorItems = []; + + log.info(`Processing ${suggestions.length} suggestions for fix entity creation`); + + for (const suggestion of suggestions) { + try { + const suggestionId = suggestion.getId ? suggestion.getId() : suggestion.id; + + // Check if fix entity already exists before attempting creation + const { Suggestion: SuggestionCollection } = dataAccess; + const existingFixEntitiesResult = await SuggestionCollection.getFixEntitiesBySuggestionId(suggestionId); + + if (existingFixEntitiesResult && existingFixEntitiesResult.data && existingFixEntitiesResult.data.length > 0) { + skippedItems.push({ + suggestionId, + reason: 'Fix entity already exists' + }); + continue; + } + + // Create new fix entity + const result = await createFixEntityForSuggestion(dataAccess, suggestion, options); + + if (result) { + createdItems.push({ + suggestionId, + fixEntity: result + }); + } + } catch (error) { + const suggestionId = suggestion.getId ? suggestion.getId() : suggestion.id; + errorItems.push({ + suggestionId, + error: error.message + }); + log.error(`Failed to create fix entity for suggestion ${suggestionId}: ${error.message}`); + } + } + + log.info(`Created: ${createdItems.length}, Skipped: ${skippedItems.length}, Errors: ${errorItems.length}`); + + return { + createdItems, + skippedItems, + errorItems + }; +} diff --git a/scripts/csv-utils.js b/scripts/csv-utils.js new file mode 100644 index 000000000..7b84376ec --- /dev/null +++ b/scripts/csv-utils.js @@ -0,0 +1,1122 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import { writeFileSync } from 'fs'; + +/** + * Common CSV utilities for all audit fix checkers + */ + +/** + * Comprehensive Metatags Raw Data CSV headers (25 columns) + */ +export const METATAGS_CSV_HEADERS = [ + // Core Identity (5 columns) + 'Site ID', + 'Site Name', + 'Opportunity ID', + 'Opportunity Status', + 'Suggestion ID', + + // Suggestion Details (6 columns) + 'Suggestion Type', + 'Suggestion Status', + 'Suggestion Rank', + 'Tag Name', + 'Issue', + 'Issue Details', + + // Content Analysis (4 columns) + 'URL', + 'Original Content', + 'AI Suggestion', + 'Current Content', + + // Fix Detection Results (4 columns) + 'AI Suggestion Implemented', + 'Is Fixed Overall', + 'Fix Type', + 'Test Date', + + // Timestamps and Metadata (6 columns) + 'Opportunity Created', + 'Opportunity Updated', + 'Suggestion Created', + 'Suggestion Updated', + 'Updated By', + 'Recommended Action' +]; + +/** + * Sitemap CSV headers (with status codes) + */ +export const SITEMAP_CSV_HEADERS = [ + 'Site ID', + 'Site Name', + 'Opportunity ID', + 'Opportunity Status', + 'Suggestion ID', + 'Suggestion Type', + 'Suggestion Status', + 'Suggestion Rank', + 'Sitemap URL', + 'Page URL', + 'Original Status Code', + 'Current Status Code', + 'Suggested URLs', + 'Recommended Action', + 'AI Suggestion Implemented', + 'Is Fixed Overall', + 'Fix Type', + 'Test Date', + 'Opportunity Created', + 'Opportunity Updated', + 'Suggestion Created', + 'Suggestion Updated', + 'Updated By' +]; + +/** + * Internal Links CSV headers (20 columns) + */ +export const INTERNAL_LINKS_CSV_HEADERS = [ + // Core Identity (5 columns) + 'Site ID', + 'Site Name', + 'Opportunity ID', + 'Opportunity Status', + 'Suggestion ID', + + // Suggestion Details (3 columns) + 'Suggestion Type', + 'Suggestion Status', + 'URL From', + 'URL To', + + // AI Recommendations (1 column) + 'URLs Suggested', + + // Fix Detection Results (4 columns) + 'Link Fixed', + 'AI Suggestion Implemented', + 'Fix Type', + 'Current Status Code', + + // Timestamps and Metadata (6 columns) + 'Opportunity Created', + 'Opportunity Updated', + 'Suggestion Created', + 'Suggestion Updated', + 'Updated By', + 'Test Date' +]; + +/** + * CWV CSV headers (26 columns) + */ +export const CWV_CSV_HEADERS = [ + // Core Identity (5 columns) + 'Site ID', + 'Site Name', + 'Opportunity ID', + 'Opportunity Status', + 'Suggestion ID', + + // Suggestion Details (6 columns) + 'Suggestion Type', + 'Suggestion Status', + 'Suggestion Rank', + 'Entry Type', + 'URL or Pattern', + 'Pageviews', + + // Historical CWV Metrics (3 columns) + 'Old LCP (ms)', + 'Old CLS', + 'Old INP (ms)', + + // Current CWV Metrics (3 columns) + 'Current LCP (ms)', + 'Current CLS', + 'Current INP (ms)', + + // Performance Analysis (3 columns) + 'Metrics Improved', + 'Is Fixed', + 'Fix Type', + + // Timestamps and Metadata (6 columns) + 'Opportunity Created', + 'Opportunity Updated', + 'Suggestion Created', + 'Suggestion Updated', + 'Updated By', + 'Test Date' +]; + +/** + * Alt-Text CSV headers (27 columns) + */ +export const ALT_TEXT_CSV_HEADERS = [ + // Core Identity (5 columns) + 'Site ID', + 'Site Name', + 'Opportunity ID', + 'Opportunity Status', + 'Suggestion ID', + + // Suggestion Details (6 columns) + 'Suggestion Type', + 'Suggestion Status', + 'Suggestion Rank', + 'Image ID', + 'Page URL', + 'Image URL', + + // Image Analysis (5 columns) + 'XPath', + 'Match Method', + 'Is Decorative', + 'Is Appropriate', + 'Language', + + // Alt Text Comparison (4 columns) + 'Suggested Alt Text', + 'Current Alt Text', + 'Similarity', + 'AI Suggestion Implemented', + + // Fix Detection Results (2 columns) + 'Is Fixed', + 'Fix Type', + + // Timestamps and Metadata (4 columns) + 'Opportunity Created', + 'Opportunity Updated', + 'Suggestion Created', + 'Suggestion Updated', + 'Updated By', + 'Test Date' +]; + +/** + * Broken Backlinks CSV headers (24 columns - removed Final Status Code) + */ +export const BROKEN_BACKLINKS_CSV_HEADERS = [ + // Core Identity (5 columns) + 'Site ID', + 'Site Name', + 'Opportunity ID', + 'Opportunity Status', + 'Suggestion ID', + + // Suggestion Details (6 columns) + 'Suggestion Type', + 'Suggestion Status', + 'Suggestion Rank', + 'Title', + 'URL From', + 'URL To', + + // Traffic Analysis (2 columns) + 'Traffic Domain', + 'URLs Suggested', + + // Fix Detection Results (4 columns) + 'Redirect Implemented', + 'AI Suggestion Implemented', + 'Is Fixed', + 'Fix Type', + + // Current Status (1 column - removed Final Status Code) + 'Final URL', + + // Timestamps and Metadata (6 columns) + 'Opportunity Created', + 'Opportunity Updated', + 'Suggestion Created', + 'Suggestion Updated', + 'Updated By', + 'Test Date' +]; + +/** + * Structured Data CSV headers (27 columns) + */ +export const STRUCTURED_DATA_CSV_HEADERS = [ + // Core Identity (5 columns) + 'Site ID', + 'Site Name', + 'Opportunity ID', + 'Opportunity Status', + 'Suggestion ID', + + // Suggestion Details (6 columns) + 'Suggestion Type', + 'Suggestion Status', + 'Suggestion Rank', + 'URL', + 'Error ID', + 'Error Title', + + // Schema Analysis (6 columns) + 'Total JSON-LD Blocks', + 'Valid JSON-LD Blocks', + 'Schema Types', + 'Completeness Score', + 'AI Suggestion Fix', + 'Best Similarity', + + // Fix Detection Results (4 columns) + 'Has Valid Schema', + 'AI Suggestion Implemented', + 'Is Fixed', + 'Fix Type', + + // Timestamps and Metadata (6 columns) + 'Opportunity Created', + 'Opportunity Updated', + 'Suggestion Created', + 'Suggestion Updated', + 'Updated By', + 'Test Date' +]; + +/** + * Format metatags result for comprehensive raw data CSV (25 columns) + */ +export function formatMetatagsResult(result, siteId, siteName) { + const testDate = new Date().toISOString(); + + return [ + // Core Identity (5 columns) - SAFE DEFAULTS TO PREVENT COLUMN MISALIGNMENT + siteId || result.siteId || 'MISSING_SITE_ID', + `"${siteName || result.siteName || 'MISSING_SITE_NAME'}"`, + result.opportunityId || 'MISSING_OPPORTUNITY_ID', + result.opportunityStatus || 'MISSING_OPPORTUNITY_STATUS', + result.suggestionId || 'MISSING_SUGGESTION_ID', + + // Suggestion Details (6 columns) - SAFE DEFAULTS + result.suggestionType || 'MISSING_SUGGESTION_TYPE', + result.suggestionStatus || 'MISSING_SUGGESTION_STATUS', + result.suggestionRank !== undefined ? result.suggestionRank : 'MISSING_RANK', + result.tagName || 'MISSING_TAG_NAME', + `"${result.issue || 'MISSING_ISSUE'}"`, + `"${result.issueDetails || result.issue || 'MISSING_ISSUE_DETAILS'}"`, + + // Content Analysis (4 columns) - SAFE DEFAULTS + result.url || 'MISSING_URL', + `"${result.originalContent || 'MISSING_ORIGINAL_CONTENT'}"`, + `"${result.aiSuggestion || 'MISSING_AI_SUGGESTION'}"`, + `"${result.currentContent || 'MISSING_CURRENT_CONTENT'}"`, + + // Fix Detection Results (4 columns) - SAFE BOOLEAN HANDLING + result.aiSuggestionImplemented !== undefined ? (result.aiSuggestionImplemented ? 'YES' : 'NO') : 'UNKNOWN', + result.isFixedOverall !== undefined ? (result.isFixedOverall ? 'YES' : 'NO') : 'UNKNOWN', + result.fixType || result.fixMethod || 'MISSING_FIX_TYPE', + testDate, + + // Timestamps and Metadata (6 columns) - SAFE DEFAULTS + result.opportunityCreated || 'MISSING_OPPORTUNITY_CREATED', + result.opportunityUpdated || 'MISSING_OPPORTUNITY_UPDATED', + result.suggestionCreated || 'MISSING_SUGGESTION_CREATED', + result.suggestionUpdated || 'MISSING_SUGGESTION_UPDATED', + result.updatedBy || 'MISSING_UPDATED_BY', + `"${result.recommendedAction || 'MISSING_RECOMMENDED_ACTION'}"` + ]; +} + +/** + * Format sitemap result for clean CSV (with status codes) + */ +export function formatSitemapResult(result, siteId, siteName) { + const testDate = new Date().toISOString(); + + return [ + siteId || '', + `"${siteName || ''}"`, + result.opportunityId || '', + result.opportunityStatus || '', + result.suggestionId || '', + result.suggestionType || '', + result.suggestionStatus || '', + result.suggestionRank || '', + `"${result.sitemapUrl || ''}"`, + result.pageUrl || '', + result.originalStatusCode || '', + result.currentStatusCode || '', + `"${result.urlsSuggested || ''}"`, + `"${result.recommendedAction || ''}"`, + result.redirectImplemented ? 'YES' : 'NO', + result.isFixed ? 'YES' : 'NO', + result.fixType || '', + testDate, + result.opportunityCreated || '', + result.opportunityUpdated || '', + result.suggestionCreated || '', + result.suggestionUpdated || '', + result.updatedBy || '' + ]; +} + +/** + * Format internal links result for comprehensive raw data CSV (20 columns) + */ +export function formatInternalLinksResult(result, siteId, siteName) { + return [ + // Core Identity (5 columns) + siteId || '', + `"${siteName || ''}"`, + result.opportunityId || '', + result.opportunityStatus || '', + result.suggestionId || '', + + // Suggestion Details (3 columns) + result.suggestionType || '', + result.suggestionStatus || '', + result.urlFrom || '', + result.urlTo || '', + + // AI Recommendations (1 column) + `"${result.urlsSuggested || ''}"`, + + // Fix Detection Results (4 columns) + result.linkFixed ? 'YES' : 'NO', + result.aiSuggestionImplemented ? 'YES' : 'NO', + result.fixType || '', + result.currentStatusCode || '', + + // Timestamps and Metadata (6 columns) + result.opportunityCreated || '', + result.opportunityUpdated || '', + result.suggestionCreated || '', + result.suggestionUpdated || '', + result.updatedBy || '', + result.testDate || '' + ]; +} + +/** + * Format structured data result for comprehensive raw data CSV (27 columns) + */ +export function formatStructuredDataResult(result, siteId, siteName) { + return [ + // Core Identity (5 columns) + siteId || '', + `"${siteName || ''}"`, + result.opportunityId || '', + result.opportunityStatus || '', + result.suggestionId || '', + + // Suggestion Details (6 columns) + result.suggestionType || '', + result.suggestionStatus || '', + result.suggestionRank || '', + result.url || '', + `"${(result.errorId || '').replace(/"/g, '""')}"`, + `"${(result.errorTitle || '').replace(/"/g, '""').replace(/\n/g, ' ').replace(/\r/g, '')}"`, + + // Schema Analysis (6 columns) + result.totalJsonLdBlocks || '', + result.validJsonLdBlocks || '', + `"${result.schemaTypes || ''}"`, + result.completenessScore || '', + `"${(result.aiSuggestionFix || '').replace(/"/g, '""').replace(/\n/g, ' ').replace(/\r/g, '')}"`, + result.bestSimilarity || '', + + // Fix Detection Results (4 columns) + result.hasValidSchema || '', + result.aiSuggestionImplemented ? 'YES' : 'NO', + result.isFixed ? 'YES' : 'NO', + result.fixType || '', + + // Timestamps and Metadata (6 columns) + result.opportunityCreated || '', + result.opportunityUpdated || '', + result.suggestionCreated || '', + result.suggestionUpdated || '', + result.updatedBy || '', + result.testDate || '' + ]; +} + +/** + * Format broken backlinks result for comprehensive raw data CSV (24 columns - removed Final Status Code) + */ +export function formatBrokenBacklinksResult(result, siteId, siteName) { + return [ + // Core Identity (5 columns) + siteId || '', + `"${siteName || ''}"`, + result.opportunityId || '', + result.opportunityStatus || '', + result.suggestionId || '', + + // Suggestion Details (6 columns) + result.suggestionType || '', + result.suggestionStatus || '', + result.suggestionRank || '', + `"${result.title || ''}"`, + result.urlFrom || '', + result.urlTo || '', + + // Traffic Analysis (2 columns) + result.trafficDomain || '', + `"${result.urlsSuggested || ''}"`, + + // Fix Detection Results (4 columns) + result.redirectImplemented ? 'YES' : 'NO', + result.aiSuggestionImplemented ? 'YES' : 'NO', + result.isFixed ? 'YES' : 'NO', + result.fixType || '', + + // Current Status (1 column - removed Final Status Code) + result.finalUrl || '', + + // Timestamps and Metadata (6 columns) + result.opportunityCreated || '', + result.opportunityUpdated || '', + result.suggestionCreated || '', + result.suggestionUpdated || '', + result.updatedBy || '', + result.testDate || '' + ]; +} + +/** + * Format alt-text result for comprehensive raw data CSV (27 columns) + */ +export function formatAltTextResult(result, siteId, siteName) { + return [ + // Core Identity (5 columns) + siteId || '', + `"${siteName || ''}"`, + result.opportunityId || '', + result.opportunityStatus || '', + result.suggestionId || '', + + // Suggestion Details (6 columns) + result.suggestionType || '', + result.suggestionStatus || '', + result.suggestionRank || '', + `"${result.imageId || ''}"`, + result.pageUrl || '', + result.imageUrl || '', + + // Image Analysis (5 columns) + `"${result.xpath || ''}"`, + result.matchMethod || '', + result.isDecorative || '', + result.isAppropriate || '', + result.language || '', + + // Alt Text Comparison (4 columns) + `"${result.suggestedAltText || ''}"`, + `"${result.currentAltText || ''}"`, + result.similarity || '', + result.aiSuggestionImplemented ? 'YES' : 'NO', + + // Fix Detection Results (2 columns) + result.isFixed ? 'YES' : 'NO', + result.fixType || '', + + // Timestamps and Metadata (6 columns) + result.opportunityCreated || '', + result.opportunityUpdated || '', + result.suggestionCreated || '', + result.suggestionUpdated || '', + result.updatedBy || '', + result.testDate || '' + ]; +} + +/** + * Format CWV result for comprehensive raw data CSV (26 columns) + */ +export function formatCWVResult(result, siteId, siteName) { + return [ + // Core Identity (5 columns) + siteId || '', + `"${siteName || ''}"`, + result.opportunityId || '', + result.opportunityStatus || '', + result.suggestionId || '', + + // Suggestion Details (6 columns) + result.suggestionType || '', + result.suggestionStatus || '', + result.suggestionRank || '', + result.entryType || '', + result.urlOrPattern || '', + result.pageviews || '', + + // Historical CWV Metrics (3 columns) + result.oldLCP || '', + result.oldCLS || '', + result.oldINP || '', + + // Current CWV Metrics (3 columns) + result.currentLCP || '', + result.currentCLS || '', + result.currentINP || '', + + // Performance Analysis (3 columns) + `"${result.metricsImproved || ''}"`, + result.isFixed ? 'YES' : 'NO', + result.fixType || '', + + // Timestamps and Metadata (6 columns) + result.opportunityCreated || '', + result.opportunityUpdated || '', + result.suggestionCreated || '', + result.suggestionUpdated || '', + result.updatedBy || '', + result.testDate || '' + ]; +} + +/** + * Generate comprehensive metatags CSV content + */ +export function generateMetatagsCSV(results, siteId, siteName) { + const csvRows = results.map(result => formatMetatagsResult(result, siteId, siteName)); + return [ + METATAGS_CSV_HEADERS.join(','), + ...csvRows.map(row => row.join(',')) + ].join('\n'); +} + +/** + * Generate comprehensive internal links CSV content + */ +export function generateInternalLinksCSV(results, siteId, siteName) { + const csvRows = results.map(result => formatInternalLinksResult(result, siteId, siteName)); + return [ + INTERNAL_LINKS_CSV_HEADERS.join(','), + ...csvRows.map(row => row.join(',')) + ].join('\n'); +} + +/** + * Generate comprehensive structured data CSV content + */ +export function generateStructuredDataCSV(results, siteId, siteName) { + const csvRows = results.map(result => formatStructuredDataResult(result, siteId, siteName)); + return [ + STRUCTURED_DATA_CSV_HEADERS.join(','), + ...csvRows.map(row => row.join(',')) + ].join('\n'); +} + +/** + * Generate comprehensive broken backlinks CSV content + */ +export function generateBrokenBacklinksCSV(results, siteId, siteName) { + const csvRows = results.map(result => formatBrokenBacklinksResult(result, siteId, siteName)); + return [ + BROKEN_BACKLINKS_CSV_HEADERS.join(','), + ...csvRows.map(row => row.join(',')) + ].join('\n'); +} + +/** + * Generate comprehensive alt-text CSV content + */ +export function generateAltTextCSV(results, siteId, siteName) { + const csvRows = results.map(result => formatAltTextResult(result, siteId, siteName)); + return [ + ALT_TEXT_CSV_HEADERS.join(','), + ...csvRows.map(row => row.join(',')) + ].join('\n'); +} + +/** + * Generate comprehensive CWV CSV content + */ +export function generateCWVCSV(results, siteId, siteName) { + const csvRows = results.map(result => formatCWVResult(result, siteId, siteName)); + return [ + CWV_CSV_HEADERS.join(','), + ...csvRows.map(row => row.join(',')) + ].join('\n'); +} + +/** + * Generate clean sitemap CSV content + */ +export function generateSitemapCSV(results, siteId, siteName) { + const csvRows = results.map(result => formatSitemapResult(result, siteId, siteName)); + return [ + SITEMAP_CSV_HEADERS.join(','), + ...csvRows.map(row => row.join(',')) + ].join('\n'); +} + +/** + * Generate CSV content from normalized results (legacy) + */ +export function generateCSVContent(normalizedResults) { + const csvRows = normalizedResults.map(result => [ + result.siteId, + `"${result.siteName}"`, + result.auditType, + result.opportunityId, + result.opportunityStatus, + result.suggestionId, + result.suggestionStatus, + result.url, + result.issueType, + `"${result.issueDescription}"`, + result.createdAt, + result.updatedAt, + `"${result.originalContent}"`, + result.originalStatusCode, + `"${result.originalError}"`, + `"${result.aiSuggestion}"`, + result.autofix, + `"${result.currentContent}"`, + result.currentStatusCode, + `"${result.currentError}"`, + result.isFixedOverall, + result.fixType, + result.fixMethod, + result.redirectImplemented, + `"${result.redirectTarget}"`, + `"${result.recommendedAction}"` + ]); + + return [ + COMMON_CSV_HEADERS.join(','), + ...csvRows.map(row => row.join(',')) + ].join('\n'); +} + +/** + * Generate timestamped filename + */ +export function generateFilename(auditType, siteInfo = null) { + const timestamp = new Date().toISOString().replace(/[:.]/g, '-').split('T'); + const dateStr = timestamp[0]; + const timeStr = timestamp[1].split('.')[0]; + + if (siteInfo) { + return `${auditType}-fix-check-${siteInfo}-${dateStr}-${timeStr}.csv`; + } else { + return `${auditType}-fix-check-ALL-SITES-${dateStr}-${timeStr}.csv`; + } +} + +/** + * Write comprehensive metatags CSV file (25 columns) + */ +export function writeMetatagsCSV(results, siteId, siteName) { + const filename = generateFilename('metatags', siteId); + + try { + // Use safe CSV generation with column validation + const csvContent = generateSafeCSV(METATAGS_CSV_HEADERS, results, formatMetatagsResult, siteId, siteName); + writeFileSync(filename, csvContent, 'utf8'); + + console.log(`[INFO] ✓ Clean metatags CSV written: ${filename} (${results.length} rows, ${METATAGS_CSV_HEADERS.length} columns)`); + return filename; + + } catch (error) { + console.error(`[ERROR] Failed to write metatags CSV: ${error.message}`); + + // Fallback: write basic error CSV with correct column count + const errorRow = new Array(METATAGS_CSV_HEADERS.length).fill('ERROR_CSV_GENERATION_FAILED'); + errorRow[0] = siteId || 'ERROR_SITE_ID'; + errorRow[1] = `"${siteName || 'ERROR_SITE_NAME'}"`; + errorRow[9] = `"CSV_GENERATION_ERROR: ${error.message}"`; + + const errorContent = [ + METATAGS_CSV_HEADERS.join(','), + errorRow.join(',') + ].join('\n') + '\n'; + + writeFileSync(filename, errorContent, 'utf8'); + console.log(`[ERROR] ⚠️ Fallback error CSV written: ${filename}`); + return filename; + } +} + +/** + * Write comprehensive internal links CSV file (24 columns) + */ +export function writeInternalLinksCSV(results, siteId, siteName) { + const csvContent = generateInternalLinksCSV(results, siteId, siteName); + const filename = generateFilename('internal-links', siteId); + + writeFileSync(filename, csvContent); + console.log(`[INFO] Comprehensive internal links CSV written: ${filename}`); + + return filename; +} + +/** + * Write comprehensive structured data CSV file (27 columns) + */ +export function writeStructuredDataCSV(results, siteId, siteName) { + const csvContent = generateStructuredDataCSV(results, siteId, siteName); + const filename = generateFilename('structured-data', siteId); + + writeFileSync(filename, csvContent); + console.log(`[INFO] Comprehensive structured data CSV written: ${filename}`); + + return filename; +} + +/** + * Write comprehensive broken backlinks CSV file (25 columns) + */ +export function writeBrokenBacklinksCSV(results, siteId, siteName) { + const csvContent = generateBrokenBacklinksCSV(results, siteId, siteName); + const filename = generateFilename('broken-backlinks', siteId); + + writeFileSync(filename, csvContent); + console.log(`[INFO] Comprehensive broken backlinks CSV written: ${filename}`); + + return filename; +} + +/** + * Write comprehensive alt-text CSV file (26 columns) + */ +export function writeAltTextCSV(results, siteId, siteName) { + const csvContent = generateAltTextCSV(results, siteId, siteName); + const filename = generateFilename('alt-text', siteId); + + writeFileSync(filename, csvContent); + console.log(`[INFO] Comprehensive alt-text CSV written: ${filename}`); + + return filename; +} + +/** + * Write comprehensive CWV CSV file (26 columns) + */ +export function writeCWVCSV(results, siteId, siteName) { + const csvContent = generateCWVCSV(results, siteId, siteName); + const filename = generateFilename('cwv', siteId); + + writeFileSync(filename, csvContent); + console.log(`[INFO] Comprehensive CWV CSV written: ${filename}`); + + return filename; +} + +/** + * Write clean sitemap CSV file (with status codes) + */ +export function writeSitemapCSV(results, siteId, siteName) { + const csvContent = generateSitemapCSV(results, siteId, siteName); + const filename = generateFilename('sitemap', siteId); + + writeFileSync(filename, csvContent); + console.log(`[INFO] Clean sitemap CSV written: ${filename}`); + + return filename; +} + +/** + * Write consolidated CSV file with mixed audit types + */ +export function writeConsolidatedCSV(allResults, auditTypes) { + const normalizedResults = []; + + for (const auditType of auditTypes) { + const auditResults = allResults[auditType] || []; + + if (auditType === 'metatags') { + normalizedResults.push(...auditResults.map(normalizeMetatagsResult)); + } else if (auditType === 'sitemap') { + normalizedResults.push(...auditResults.map(result => normalizeSitemapResult(result, result.siteId, result.siteName))); + } + } + + const csvContent = generateCSVContent(normalizedResults); + const timestamp = new Date().toISOString().replace(/[:.]/g, '-').split('T'); + const filename = `ALL-AUDITS-fix-check-ALL-SITES-${timestamp[0]}-${timestamp[1].split('.')[0]}.csv`; + + writeFileSync(filename, csvContent); + return { filename, totalResults: normalizedResults.length }; +} + +/** + * Generate summary statistics for any audit type + */ +export function generateSummaryStats(normalizedResults, auditType) { + const total = normalizedResults.length; + const fixedOverall = normalizedResults.filter(r => r.isFixedOverall === 'YES').length; + const fixedByAI = normalizedResults.filter(r => r.isFixedByAI === 'YES').length; + const fixedByOther = fixedOverall - fixedByAI; + + const stats = { + auditType, + total, + fixedOverall, + fixedByAI, + fixedByOther, + notFixed: total - fixedOverall, + overallFixRate: total > 0 ? ((fixedOverall / total) * 100).toFixed(1) : '0.0', + aiSuccessRate: total > 0 ? ((fixedByAI / total) * 100).toFixed(1) : '0.0' + }; + + // Add audit-specific stats + if (auditType === 'sitemap') { + const urlsFixed = normalizedResults.filter(r => r.fixMethod === 'URL_FIXED').length; + const redirectsAdded = normalizedResults.filter(r => r.fixMethod === 'REDIRECT_ADDED').length; + stats.urlsFixed = urlsFixed; + stats.redirectsAdded = redirectsAdded; + } + + return stats; +} + +/** + * Print summary statistics + */ +export function printSummary(stats, sitesProcessed = 1) { + console.log(`\n=== ${stats.auditType.toUpperCase()} SUMMARY ===`); + if (sitesProcessed > 1) { + console.log(`Total sites processed: ${sitesProcessed}`); + } + console.log(`Total suggestions analyzed: ${stats.total}`); + + if (stats.auditType === 'metatags') { + console.log(`Fixed by AI suggestions: ${stats.fixedByAI}`); + console.log(`Fixed by other means: ${stats.fixedByOther}`); + console.log(`AI suggestion success rate: ${stats.aiSuccessRate}%`); + } else if (stats.auditType === 'sitemap') { + console.log(`URLs now working (200 OK): ${stats.urlsFixed || 0}`); + console.log(`Redirects implemented: ${stats.redirectsAdded || 0}`); + } + + console.log(`Total fixed (any method): ${stats.fixedOverall}`); + console.log(`Overall fix rate: ${stats.overallFixRate}%`); + console.log(`Still not fixed: ${stats.notFixed}`); +} + +/** + * Site Summary CSV Headers (16 columns) - Optimized for Site Leads + */ +export const SITE_SUMMARY_CSV_HEADERS = [ + // Core Identity & Context (5 columns) + 'Site ID', + 'Site Name', + 'Opportunity ID', + 'Opportunity Type', + 'Suggestion ID', + + // Actionable Information (6 columns) + 'Suggestion Status', + 'Priority Score', + 'Action Required', + 'Rank', + 'Days Old', + 'Fix Status', + + // Content Details (5 columns) + 'URL', + 'Issue Description', + 'AI Suggestion', + 'Created Date', + 'Updated Date' +]; + +/** + * Format site summary result for CSV (16 columns) + */ +export function formatSiteSummaryResult(result) { + return [ + result.siteId || 'MISSING_SITE_ID', + `"${result.siteName || 'MISSING_SITE_NAME'}"`, + result.opportunityId || 'MISSING_OPPORTUNITY_ID', + result.opportunityType || 'MISSING_OPPORTUNITY_TYPE', + result.suggestionId || 'MISSING_SUGGESTION_ID', + result.suggestionStatus || 'MISSING_STATUS', + result.priorityScore || 0, + result.actionRequired || 'MONITOR', + result.rank || 'N/A', + result.daysOld || 'N/A', + result.fixStatus || 'UNKNOWN', + result.url || 'MISSING_URL', + `"${result.issueDescription || 'MISSING_ISSUE'}"`, + `"${result.aiSuggestion || 'MISSING_AI_SUGGESTION'}"`, + result.createdDate || 'MISSING_CREATED_DATE', + result.updatedDate || 'MISSING_UPDATED_DATE' + ]; +} + +/** + * Generate site summary CSV content + */ +export function generateSiteSummaryCSV(results, siteId, siteName) { + const csvRows = results.map(result => formatSiteSummaryResult(result)); + return [ + SITE_SUMMARY_CSV_HEADERS.join(','), + ...csvRows.map(row => row.join(',')) + ].join('\n'); +} + +/** + * Write site summary CSV file + */ +export function writeSiteSummaryCSV(results, siteId, siteName) { + const csvContent = generateSiteSummaryCSV(results, siteId, siteName); + const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, -5); + const filename = `site-summary-${siteId}-${timestamp}.csv`; + + writeFileSync(filename, csvContent, 'utf8'); + console.log(`[INFO] ✓ Site summary CSV written: ${filename} (${results.length} suggestions)`); + + return filename; +} + +// Error CSV Headers +export const ERROR_CSV_HEADERS = [ + 'Timestamp', + 'Script Name', + 'Site ID', + 'Site Name', + 'Error Type', + 'Error Message', + 'Error Details', + 'Suggestion ID', + 'Opportunity ID', + 'URL', + 'Stack Trace' +]; + +/** + * Format error result for CSV + */ +export function formatErrorResult(errorData) { + return [ + errorData.timestamp || new Date().toISOString(), + errorData.scriptName || '', + errorData.siteId || '', + errorData.siteName || '', + errorData.errorType || 'UNKNOWN_ERROR', + errorData.errorMessage || '', + errorData.errorDetails || '', + errorData.suggestionId || '', + errorData.opportunityId || '', + errorData.url || '', + errorData.stackTrace || '' + ]; +} + +/** + * Generate error CSV content + */ +export function generateErrorCSV(errors) { + if (!errors || errors.length === 0) { + return ERROR_CSV_HEADERS.join(',') + '\n'; + } + + const csvContent = [ERROR_CSV_HEADERS.join(',')]; + + errors.forEach(error => { + const formattedRow = formatErrorResult(error); + const csvRow = formattedRow.map(field => { + if (field === null || field === undefined) return ''; + const stringField = String(field); + // Escape quotes and wrap in quotes if contains comma, quote, or newline + if (stringField.includes(',') || stringField.includes('"') || stringField.includes('\n')) { + return `"${stringField.replace(/"/g, '""')}"`; + } + return stringField; + }).join(','); + csvContent.push(csvRow); + }); + + return csvContent.join('\n') + '\n'; +} + +/** + * Write error CSV file + */ +export function writeErrorCSV(errors, scriptName, siteId = 'ALL_SITES') { + const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, -5); + const filename = `error-report-${scriptName}-${siteId}-${timestamp}.csv`; + + const csvContent = generateErrorCSV(errors); + writeFileSync(filename, csvContent, 'utf8'); + + console.log(`[ERROR] Error report written to: ${filename}`); + return filename; +} + +/** + * Validate CSV row has correct number of columns + */ +export function validateCSVRow(row, expectedColumns, rowType = 'data') { + if (!Array.isArray(row)) { + throw new Error(`${rowType} row must be an array, got ${typeof row}`); + } + + if (row.length !== expectedColumns) { + throw new Error(`${rowType} row has ${row.length} columns, expected ${expectedColumns}. Row: ${JSON.stringify(row.slice(0, 5))}...`); + } + + return true; +} + +/** + * Safe CSV generation with column validation + */ +export function generateSafeCSV(headers, results, formatFunction, siteId, siteName) { + const expectedColumns = headers.length; + + // Validate headers + validateCSVRow(headers, expectedColumns, 'header'); + + const csvContent = [headers.join(',')]; + + results.forEach((result, index) => { + try { + const formattedRow = formatFunction(result, siteId, siteName); + + // Validate each row has correct number of columns + validateCSVRow(formattedRow, expectedColumns, `row ${index + 1}`); + + // Escape and join + const csvRow = formattedRow.map(field => { + if (field === null || field === undefined) return ''; + const stringField = String(field); + // Already quoted fields don't need re-quoting + if (stringField.startsWith('"') && stringField.endsWith('"')) { + return stringField; + } + // Quote fields with commas, quotes, or newlines + if (stringField.includes(',') || stringField.includes('"') || stringField.includes('\n')) { + return `"${stringField.replace(/"/g, '""')}"`; + } + return stringField; + }).join(','); + + csvContent.push(csvRow); + + } catch (error) { + console.error(`[ERROR] Failed to format row ${index + 1}: ${error.message}`); + + // Create error placeholder row with correct number of columns + const errorRow = new Array(expectedColumns).fill('ERROR_FORMATTING_FAILED'); + errorRow[0] = siteId || 'ERROR_SITE_ID'; + errorRow[1] = `"ERROR: ${error.message}"`; + csvContent.push(errorRow.join(',')); + } + }); + + return csvContent.join('\n') + '\n'; +} diff --git a/scripts/opportunities/alt-text/index.mjs b/scripts/opportunities/alt-text/index.mjs new file mode 100644 index 000000000..a726d6426 --- /dev/null +++ b/scripts/opportunities/alt-text/index.mjs @@ -0,0 +1,681 @@ +#!/usr/bin/env node + +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/** + * Alt-Text Fix Checker + * + * This script checks if image alt-text issues from existing suggestions have been fixed. + * It compares AI-suggested alt text with current alt attributes on images to identify implementations. + * + * Features: + * - Comprehensive 27-column raw data schema + * - Scrapes current page content from S3 + * - Parses HTML to find images by XPath + * - Compares current vs suggested alt text using similarity matching + * - Multi-site processing with consolidation + * - Efficient database queries (outdated + fixed suggestions) + */ + +import { writeFileSync } from 'fs'; +// Using simple console logger instead of shared-utils dependency +import { createDataAccess } from '@adobe/spacecat-shared-data-access'; +import { S3Client, GetObjectCommand } from '@aws-sdk/client-s3'; +import { JSDOM } from 'jsdom'; +// Removed fastest-levenshtein dependency - using exact string matching +import { SITES } from '../../constants.js'; +import { writeAltTextCSV, formatAltTextResult, ALT_TEXT_CSV_HEADERS } from '../../csv-utils.js'; +// import { createFixEntityForSuggestion } from '../../create-fix-entity.js'; // Uncomment when implementing fix entity creation +// Using exact string matching for AI suggestion detection + +// Helper function to transform URL to scrape.json path +function getScrapeJsonPath(url, siteId) { + const pathname = new URL(url).pathname.replace(/\/$/, ''); + return `scrapes/${siteId}${pathname}/scrape.json`; +} + +/** + * Alt-Text Fix Checker Class + */ +class AltTextFixChecker { + constructor(options = {}) { + this.options = { + siteId: null, + verbose: false, + limit: null, + markFixed: false, + dryRun: true, + ...options + }; + + this.log = this.createSimpleLogger(this.options.verbose); + + this.results = []; + this.errors = []; + this.dataAccess = null; + this.site = null; + this.s3Client = null; + this.opportunityStatusMap = {}; + this.opportunityDataMap = {}; + } + + /** + * Create simple console logger + */ + createSimpleLogger(verbose) { + return { + info: (msg) => console.log(`[INFO] ${msg}`), + debug: verbose ? (msg) => console.log(`[DEBUG] ${msg}`) : () => {}, + error: (msg) => console.error(`[ERROR] ${msg}`) + }; + } + + /** + * Initialize data access and validate site + */ + async initializeDataAccess() { + this.log.info('Initializing data access...'); + + try { + // Set up required environment variables for data access + if (!process.env.DYNAMO_TABLE_NAME_DATA) { + process.env.DYNAMO_TABLE_NAME_DATA = 'spacecat-services-data'; + this.log.debug('Set default DYNAMO_TABLE_NAME_DATA'); + } + + if (!process.env.S3_SCRAPER_BUCKET_NAME) { + process.env.S3_SCRAPER_BUCKET_NAME = 'spacecat-prod-scraper'; + this.log.debug('Set default S3_SCRAPER_BUCKET_NAME'); + } + + // Initialize data access with configuration + const config = { + tableNameData: process.env.DYNAMO_TABLE_NAME_DATA, + indexNameAllByStatus: 'gsi1pk-gsi1sk-index', + indexNameAllBySiteId: 'gsi2pk-gsi2sk-index' + }; + + this.dataAccess = createDataAccess(config); + const { Site } = this.dataAccess; + + this.site = await Site.findById(this.options.siteId); + if (!this.site) { + throw new Error(`Site not found: ${this.options.siteId}`); + } + + this.log.info(`✓ Site found: ${this.site.getBaseURL()}`); + + // Initialize S3 client for scraping data + this.s3Client = new S3Client({ + region: process.env.AWS_REGION || 'us-east-1' + }); + + } catch (error) { + this.log.error('Failed to initialize data access:', error.message); + throw error; + } + } + + /** + * Main execution method + */ + async run() { + try { + await this.initializeDataAccess(); + + const existingSuggestions = await this.getExistingSuggestions(); + if (existingSuggestions.length === 0) { + this.log.info('No alt-text suggestions found for this site'); + return; + } + + await this.checkSuggestionsFixes(existingSuggestions); + + if (this.results.length > 0) { + this.generateCSV(); + this.printSummary(); + + if (this.options.markFixed && !this.options.dryRun) { + await this.markFixedSuggestions(); + } + } else { + this.log.info('No results to report'); + } + + } catch (error) { + this.log.error('Failed to run alt-text fix checker:', error.message); + throw error; + } + } + + /** + * Get existing alt-text suggestions from database + */ + async getExistingSuggestions() { + this.log.debug('Fetching existing alt-text suggestions...'); + + const { Opportunity } = this.dataAccess; + const allOpportunities = await Opportunity.allBySiteId(this.options.siteId); + + // Debug: Log all opportunity types found + if (this.options.verbose) { + const opportunityTypes = [...new Set(allOpportunities.map(opp => opp.getType()))]; + this.log.debug(`All opportunity types found: ${opportunityTypes.join(', ')}`); + } + + // Filter for alt-text opportunities + const altTextOpportunities = allOpportunities.filter((opportunity) => + opportunity.getType() === 'alt-text' + ); + + this.log.debug(`Found ${altTextOpportunities.length} alt-text opportunities`); + + // Create opportunity data map for later use + this.opportunityStatusMap = {}; + this.opportunityDataMap = {}; + altTextOpportunities.forEach(opportunity => { + const oppId = opportunity.getId(); + this.opportunityStatusMap[oppId] = opportunity.getStatus ? opportunity.getStatus() : (opportunity.status || 'unknown'); + this.opportunityDataMap[oppId] = { + status: opportunity.getStatus ? opportunity.getStatus() : (opportunity.status || 'unknown'), + createdAt: opportunity.getCreatedAt ? opportunity.getCreatedAt() : (opportunity.createdAt || ''), + updatedAt: opportunity.getUpdatedAt ? opportunity.getUpdatedAt() : (opportunity.updatedAt || '') + }; + }); + + // Get outdated suggestions only (as per requirement) + const { Suggestion } = this.dataAccess; + const suggestions = []; + + for (const opportunity of altTextOpportunities) { + const opptyId = opportunity.getId(); + + // Get outdated suggestions + const outdatedSuggestions = await Suggestion.allByOpportunityIdAndStatus(opptyId, 'outdated'); + suggestions.push(...outdatedSuggestions); + } + + this.log.debug(`Found ${suggestions.length} outdated alt-text suggestions`); + return suggestions; + } + + /** + * Get current page content from S3 + */ + async getCurrentPageContent(pageUrl) { + try { + const scrapeJsonPath = getScrapeJsonPath(pageUrl, this.options.siteId); + const bucketName = process.env.S3_SCRAPER_BUCKET_NAME || 'spacecat-prod-scraper'; + + this.log.debug(`Fetching content from S3: ${scrapeJsonPath}`); + + const command = new GetObjectCommand({ + Bucket: bucketName, + Key: scrapeJsonPath + }); + + const response = await this.s3Client.send(command); + const content = await response.Body.transformToString(); + const scrapeData = JSON.parse(content); + + // S3 scrape data structure: scrapeResult.rawBody contains the HTML content + // This matches the pattern used by metatags handler and other audit handlers + return scrapeData.scrapeResult?.rawBody || scrapeData.content || ''; + + } catch (error) { + this.log.debug(`Failed to fetch content for ${pageUrl}: ${error.message}`); + return null; + } + } + + /** + * Find image by XPath (most reliable method) + */ + findImageByXPath(document, xpath) { + try { + // Use proper XPath evaluation + const result = document.evaluate( + xpath, + document, + null, + document.defaultView.XPathResult.FIRST_ORDERED_NODE_TYPE, + null + ); + + const imageElement = result.singleNodeValue; + if (imageElement && imageElement.tagName === 'IMG') { + return imageElement; + } + + return null; + } catch (error) { + this.log.debug(`XPath evaluation failed for ${xpath}: ${error.message}`); + return null; + } + } + + /** + * Find image by URL (fallback method) + */ + findImageByUrl(document, imageUrl) { + try { + const images = document.querySelectorAll('img'); + + for (const img of images) { + // Try exact URL match first + if (img.src === imageUrl) { + return img; + } + + // Try partial URL match (filename) + const imageFilename = imageUrl.split('/').pop(); + if (imageFilename && img.src.includes(imageFilename)) { + return img; + } + + // Try srcset attribute for responsive images + if (img.srcset?.includes(imageUrl)) { + return img; + } + } + + return null; + } catch (error) { + this.log.debug(`Error finding image by URL ${imageUrl}: ${error.message}`); + return null; + } + } + + /** + * Find image alt text using XPath first, URL fallback + */ + findImageAltText(htmlContent, xpath, imageUrl) { + try { + const dom = new JSDOM(htmlContent); + const document = dom.window.document; + + let imageElement = null; + let matchMethod = 'NOT_FOUND'; + + // Strategy 1: Try XPath first (most reliable) + if (xpath) { + imageElement = this.findImageByXPath(document, xpath); + if (imageElement) { + matchMethod = 'XPATH'; + } + } + + // Strategy 2: Fallback to URL matching if XPath fails + if (!imageElement && imageUrl) { + imageElement = this.findImageByUrl(document, imageUrl); + if (imageElement) { + matchMethod = 'URL'; + } + } + + if (imageElement) { + const altText = imageElement.getAttribute('alt') || ''; + this.log.debug(`Found image via ${matchMethod}: alt="${altText}"`); + return { altText, matchMethod }; + } + + this.log.debug(`Image not found: xpath=${xpath}, imageUrl=${imageUrl}`); + return { altText: null, matchMethod: 'NOT_FOUND' }; + + } catch (error) { + this.log.debug(`Error finding image alt text: ${error.message}`); + return { altText: null, matchMethod: 'ERROR' }; + } + } + + /** + * Check if two strings match exactly (case-insensitive) + */ + isExactMatch(str1, str2) { + if (!str1 || !str2) return false; + return str1.toLowerCase().trim() === str2.toLowerCase().trim(); + } + + /** + * Check if alt-text suggestions are fixed by comparing current alt attributes + */ + async checkSuggestionsFixes(existingSuggestions) { + this.log.info('Checking if alt-text suggestions are fixed...'); + + const suggestionsToCheck = this.options.limit + ? existingSuggestions.slice(0, this.options.limit) + : existingSuggestions; + + this.log.info(`Analyzing ${suggestionsToCheck.length} suggestions`); + + for (let i = 0; i < suggestionsToCheck.length; i++) { + const suggestion = suggestionsToCheck[i]; + const suggestionData = suggestion.getData ? suggestion.getData() : suggestion.data; + + if (!suggestionData || !suggestionData.recommendations || suggestionData.recommendations.length === 0) { + this.log.debug(`Skipping invalid suggestion: ${suggestion.getId ? suggestion.getId() : 'unknown'}`); + continue; + } + + const recommendation = suggestionData.recommendations[0]; + const { + id: imageId, + pageUrl, + imageUrl, + altText: suggestedAltText, + isAppropriate, + isDecorative, + xpath, + language + } = recommendation; + + this.log.debug(`Analyzing ${i + 1}/${suggestionsToCheck.length}: ${pageUrl} - ${imageId}`); + + // Get current page content + const currentContent = await this.getCurrentPageContent(pageUrl); + let currentAltText = null; + let isFixed = false; + let aiSuggestionImplemented = false; + let fixType = 'NOT_FIXED'; + let similarity = 0; + let matchMethod = 'NOT_FOUND'; + + if (currentContent) { + const imageResult = this.findImageAltText(currentContent, xpath, imageUrl); + currentAltText = imageResult.altText; + matchMethod = imageResult.matchMethod; + + if (currentAltText !== null) { + // Alt attribute exists - check if it's properly fixed + + if (isDecorative) { + // For decorative images, empty alt="" is the correct fix + if (currentAltText === '') { + isFixed = true; + fixType = 'DECORATIVE_ALT_ADDED'; + aiSuggestionImplemented = true; // Empty alt for decorative is following AI guidance + similarity = 1.0; + } else { + // Decorative image has non-empty alt text (not ideal but still has alt) + isFixed = true; + fixType = 'ALT_TEXT_ADDED_NON_DECORATIVE'; + } + } else { + // For non-decorative images, alt text should be non-empty + if (currentAltText.trim() !== '') { + isFixed = true; + fixType = 'ALT_TEXT_ADDED'; + + // Check if AI suggestion was implemented (exact match) + if (suggestedAltText) { + if (this.isExactMatch(currentAltText, suggestedAltText)) { + aiSuggestionImplemented = true; + fixType = 'AI_SUGGESTION_IMPLEMENTED'; + similarity = 1.0; // Perfect match + } + } + } + } + } else { + fixType = 'IMAGE_NOT_FOUND'; + } + } else { + fixType = 'PAGE_NOT_FOUND'; + } + + // Get opportunity data from our pre-built map + const opportunityId = suggestion.getOpportunityId ? suggestion.getOpportunityId() : 'unknown'; + const opportunityData = this.opportunityDataMap[opportunityId] || {}; + + // Store result with all required fields for 26-column schema + this.results.push({ + // Core Identity (5 columns) + siteId: this.options.siteId, + siteName: this.site.getBaseURL(), + opportunityId: opportunityId, + opportunityStatus: opportunityData.status || 'unknown', + suggestionId: suggestion.getId ? suggestion.getId() : 'unknown', + + // Suggestion Details (6 columns) + suggestionType: suggestion.getType ? suggestion.getType() : suggestion.type, + suggestionStatus: suggestion.getStatus?.() || suggestion.status, + suggestionRank: suggestion.getRank ? suggestion.getRank() : suggestion.rank, + imageId: imageId || '', + pageUrl: pageUrl || '', + imageUrl: imageUrl || '', + + // Image Analysis (5 columns) + xpath: xpath || '', + matchMethod: matchMethod || 'NOT_FOUND', + isDecorative: isDecorative ? 'YES' : 'NO', + isAppropriate: isAppropriate ? 'YES' : 'NO', + language: language || '', + + // Alt Text Comparison (4 columns) + suggestedAltText: suggestedAltText || '', + currentAltText: currentAltText || '', + similarity: Math.round(similarity * 100) / 100, + aiSuggestionImplemented: aiSuggestionImplemented, + + // Fix Detection Results (2 columns) + isFixed: isFixed, + fixType: fixType, + + // Timestamps and Metadata (4 columns) + opportunityCreated: opportunityData.createdAt || '', + opportunityUpdated: opportunityData.updatedAt || '', + suggestionCreated: suggestion.getCreatedAt ? suggestion.getCreatedAt() : (suggestion.createdAt || ''), + suggestionUpdated: suggestion.getUpdatedAt ? suggestion.getUpdatedAt() : (suggestion.updatedAt || ''), + + updatedBy: suggestion.getUpdatedBy ? suggestion.getUpdatedBy() : (suggestion.updatedBy || ''), + testDate: new Date().toISOString(), + suggestion: suggestion // Store suggestion reference for fix entity creation + }); + + if (isFixed) { + this.log.info(`✅ FIXED: ${pageUrl} - ${imageId} (${fixType})`); + } else { + this.log.debug(`❌ NOT FIXED: ${pageUrl} - ${imageId} (${fixType})`); + } + + // Rate limiting + await new Promise(resolve => setTimeout(resolve, 100)); + } + } + + /** + * Generate CSV report + */ + generateCSV() { + const filename = writeAltTextCSV(this.results, this.options.siteId, this.site?.getBaseURL() || 'Unknown Site'); + this.log.info(`📊 Comprehensive alt-text CSV report generated: ${filename}`); + return filename; + } + + /** + * Mark fixed suggestions in database + */ + async markFixedSuggestions() { + const fixedResults = this.results.filter(r => r.aiSuggestionImplemented); + + if (fixedResults.length === 0) { + this.log.info('No suggestions to mark as fixed'); + return; + } + + this.log.info(`Creating fix entities for ${fixedResults.length} fixed suggestions`); + + for (const result of fixedResults) { + if (this.options.dryRun) { + this.log.info(`Would create fix entity for ${result.suggestionId} (dry run)`); + } else { + try { + // await createFixEntityForSuggestion(this.dataAccess, result.suggestion, { logger: this.log }); + } catch (error) { + this.log.error(`Failed to create fix entity for ${result.suggestionId}: ${error.message}`); + } + } + } + } + + /** + * Print summary statistics + */ + printSummary() { + const totalSuggestions = this.results.length; + const fixed = this.results.filter(r => r.isFixed).length; + const aiImplemented = this.results.filter(r => r.aiSuggestionImplemented).length; + const decorativeImages = this.results.filter(r => r.isDecorative === 'YES').length; + + this.log.info(''); + this.log.info('=== ALT-TEXT SUMMARY ==='); + this.log.info(`Total suggestions processed: ${totalSuggestions}`); + this.log.info(`Alt text added: ${fixed}`); + this.log.info(`AI suggestions implemented: ${aiImplemented}`); + this.log.info(`Decorative images: ${decorativeImages}`); + this.log.info(`Still missing alt text: ${totalSuggestions - fixed}`); + + if (fixed > 0) { + this.log.info(''); + this.log.info('Fix types:'); + const fixTypes = {}; + this.results.filter(r => r.isFixed).forEach(r => { + fixTypes[r.fixType] = (fixTypes[r.fixType] || 0) + 1; + }); + + Object.entries(fixTypes).forEach(([type, count]) => { + this.log.info(` ${type}: ${count} images`); + }); + } + } +} + +// CLI setup +import { Command } from 'commander'; + +const program = new Command(); +program + .name('check-alt-text-fixed') + .description('Check if image alt-text issues from suggestions have been fixed') + .option('--siteId ', 'Site ID to check') + .option('--verbose', 'Enable verbose logging', false) + .option('--limit ', 'Limit number of suggestions to check', parseInt) + .option('--markFixed', 'Mark fixed suggestions in database', false) + .option('--dryRun', 'Dry run mode (default: true)', true) + .option('--allSites', 'Process all sites from constants.js', false) + .option('--sites ', 'Comma-separated list of site IDs to process') + .option('--consolidate', 'Generate consolidated CSV for multiple sites', false); + +program.parse(); +const options = program.opts(); + +// Validate options +let sitesToProcess = []; + +if (options.allSites) { + sitesToProcess = SITES; + console.log(`[INFO] Processing all ${sitesToProcess.length} sites`); +} else if (options.sites) { + const siteIds = options.sites.split(','); + sitesToProcess = SITES.filter(site => siteIds.includes(site.id)); + console.log(`[INFO] Processing ${sitesToProcess.length} specified sites`); +} else if (options.siteId) { + const site = SITES.find(s => s.id === options.siteId); + if (!site) { + console.error(`[ERROR] Site ID not found in constants: ${options.siteId}`); + process.exit(1); + } else { + sitesToProcess = [{ id: options.siteId, name: 'Custom Site' }]; + } +} else { + // Default site ID for testing + options.siteId = '9ae8877a-bbf3-407d-9adb-d6a72ce3c5e3'; + const defaultSite = SITES.find(s => s.id === options.siteId); + sitesToProcess = [defaultSite]; + console.log(`[INFO] Using default site ID: ${options.siteId}`); +} + +// Process sites +async function processSites() { + const allResults = []; + + for (let i = 0; i < sitesToProcess.length; i++) { + const site = sitesToProcess[i]; + + try { + console.log(`\n[INFO] Processing site ${i + 1}/${sitesToProcess.length}: ${site.name} (${site.id})`); + + // Create checker for this site + const siteOptions = { ...options, siteId: site.id }; + const checker = new AltTextFixChecker(siteOptions); + + await checker.run(); + + // Collect results if consolidating + if (options.consolidate && sitesToProcess.length > 1) { + // Add site info to each result for consolidation using SITES constant names + const resultsWithSiteInfo = checker.results.map(result => ({ + ...result, + siteId: site.id, + siteName: site.name // Use the friendly name from SITES constant + })); + allResults.push(...resultsWithSiteInfo); + } + + // Add delay between sites to avoid overwhelming servers + if (i < sitesToProcess.length - 1) { + console.log(`[INFO] Waiting 2 seconds before next site...`); + await new Promise(resolve => setTimeout(resolve, 2000)); + } + + } catch (error) { + console.error(`[ERROR] Failed to process ${site.name}: ${error.message}`); + if (options.verbose) { + console.error(error.stack); + } + continue; + } + } + + // Generate consolidated CSV if requested + if (options.consolidate && sitesToProcess.length > 1 && allResults.length > 0) { + generateConsolidatedCSV(allResults); + } +} + +// Generate consolidated CSV for multiple sites +function generateConsolidatedCSV(allResults) { + if (allResults.length === 0) { + console.log('📊 No results to consolidate'); + return; + } + + const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19); + const filename = `consolidated-alt-text-all-sites-${timestamp}Z.csv`; + + // Generate CSV with proper site info from each result + const csvRows = allResults.map(result => formatAltTextResult(result, result.siteId, result.siteName)); + const csvContent = [ + ALT_TEXT_CSV_HEADERS.join(','), + ...csvRows.map(row => row.join(',')) + ].join('\n'); + + writeFileSync(filename, csvContent); + console.log(`📊 Consolidated alt-text CSV generated: ${filename} (${allResults.length} total results)`); +} + +// Run the processing +processSites().catch(error => { + console.error('Fatal error:', error.message); + process.exit(1); +}); diff --git a/scripts/opportunities/broken-backlinks/index.mjs b/scripts/opportunities/broken-backlinks/index.mjs new file mode 100644 index 000000000..969fae65a --- /dev/null +++ b/scripts/opportunities/broken-backlinks/index.mjs @@ -0,0 +1,665 @@ +#!/usr/bin/env node + +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/** + * Broken Backlinks Fix Checker + * + * This script checks if broken backlink issues from existing suggestions have been fixed. + * It tests broken URLs to see if redirects have been implemented to working pages. + * + * Features: + * - Comprehensive 24-column raw data schema + * - Tests broken URLs using content analysis (detects soft 404s) + * - Validates redirect targets and content + * - Compares redirects with AI-suggested URLs + * - Multi-site processing with consolidation + * - Efficient database queries (outdated + fixed suggestions) + */ + +import { writeFileSync } from 'fs'; +// Using simple console logger instead of shared-utils dependency +import { createDataAccess } from '@adobe/spacecat-shared-data-access'; +import { tracingFetch as fetch } from '@adobe/spacecat-shared-utils'; +import { SITES } from '../../constants.js'; +import { writeBrokenBacklinksCSV, formatBrokenBacklinksResult, BROKEN_BACKLINKS_CSV_HEADERS } from '../../csv-utils.js'; +import { createFixEntityForSuggestion } from '../../create-fix-entity.js'; + +// HTTP timeout for URL testing (same as handler) +const TIMEOUT = 3000; + +/** + * Broken Backlinks Fix Checker Class + */ +class BrokenBacklinksFixChecker { + constructor(options = {}) { + this.options = { + siteId: null, + verbose: false, + limit: null, + markFixed: false, + dryRun: true, + ...options + }; + + this.log = this.createSimpleLogger(this.options.verbose); + + this.results = []; + this.dataAccess = null; + this.site = null; + this.opportunityStatusMap = {}; + this.opportunityDataMap = {}; + } + + /** + * Create simple console logger + */ + createSimpleLogger(verbose) { + return { + info: (msg) => console.log(`[INFO] ${msg}`), + debug: verbose ? (msg) => console.log(`[DEBUG] ${msg}`) : () => {}, + error: (msg) => console.error(`[ERROR] ${msg}`) + }; + } + + /** + * Initialize data access and validate site + */ + async initializeDataAccess() { + this.log.info('Initializing data access...'); + + try { + // Set up required environment variables for data access + if (!process.env.DYNAMO_TABLE_NAME_DATA) { + process.env.DYNAMO_TABLE_NAME_DATA = 'spacecat-services-data'; + this.log.debug('Set default DYNAMO_TABLE_NAME_DATA'); + } + + if (!process.env.S3_SCRAPER_BUCKET_NAME) { + process.env.S3_SCRAPER_BUCKET_NAME = 'spacecat-prod-scraper'; + this.log.debug('Set default S3_SCRAPER_BUCKET_NAME'); + } + + // Initialize data access with configuration + const config = { + tableNameData: process.env.DYNAMO_TABLE_NAME_DATA, + indexNameAllByStatus: 'gsi1pk-gsi1sk-index', + indexNameAllBySiteId: 'gsi2pk-gsi2sk-index' + }; + + this.dataAccess = createDataAccess(config); + const { Site } = this.dataAccess; + + this.site = await Site.findById(this.options.siteId); + if (!this.site) { + throw new Error(`Site not found: ${this.options.siteId}`); + } + + this.log.info(`✓ Site found: ${this.site.getBaseURL()}`); + + } catch (error) { + this.log.error('Failed to initialize data access:', error.message); + throw error; + } + } + + /** + * Main execution method + */ + async run() { + try { + await this.initializeDataAccess(); + + const existingSuggestions = await this.getExistingSuggestions(); + if (existingSuggestions.length === 0) { + this.log.info('No broken backlinks suggestions found for this site'); + return; + } + + await this.checkSuggestionsFixes(existingSuggestions); + + if (this.results.length > 0) { + this.generateCSV(); + this.printSummary(); + + if (this.options.markFixed && !this.options.dryRun) { + await this.markFixedSuggestions(); + } + } else { + this.log.info('No results to report'); + } + + } catch (error) { + this.log.error('Failed to run broken backlinks fix checker:', error.message); + throw error; + } + } + + /** + * Get existing broken backlinks suggestions from database + */ + async getExistingSuggestions() { + this.log.debug('Fetching existing broken backlinks suggestions...'); + + const { Opportunity } = this.dataAccess; + const allOpportunities = await Opportunity.allBySiteId(this.options.siteId); + + // Debug: Log all opportunity types found + if (this.options.verbose) { + const opportunityTypes = [...new Set(allOpportunities.map(opp => opp.getType()))]; + this.log.debug(`All opportunity types found: ${opportunityTypes.join(', ')}`); + } + + // Filter for broken-backlinks opportunities + const brokenBacklinksOpportunities = allOpportunities.filter((opportunity) => + opportunity.getType() === 'broken-backlinks' + ); + + this.log.debug(`Found ${brokenBacklinksOpportunities.length} broken-backlinks opportunities`); + + // Create opportunity data map for later use + this.opportunityStatusMap = {}; + this.opportunityDataMap = {}; + brokenBacklinksOpportunities.forEach(opportunity => { + const oppId = opportunity.getId(); + this.opportunityStatusMap[oppId] = opportunity.getStatus ? opportunity.getStatus() : (opportunity.status || 'unknown'); + this.opportunityDataMap[oppId] = { + status: opportunity.getStatus ? opportunity.getStatus() : (opportunity.status || 'unknown'), + createdAt: opportunity.getCreatedAt ? opportunity.getCreatedAt() : (opportunity.createdAt || ''), + updatedAt: opportunity.getUpdatedAt ? opportunity.getUpdatedAt() : (opportunity.updatedAt || '') + }; + }); + + // Get outdated AND fixed suggestions directly from database + const { Suggestion } = this.dataAccess; + const suggestions = []; + + for (const opportunity of brokenBacklinksOpportunities) { + const opptyId = opportunity.getId(); + + // Get outdated suggestions + const outdatedSuggestions = await Suggestion.allByOpportunityIdAndStatus(opptyId, 'outdated'); + suggestions.push(...outdatedSuggestions); + + // Get fixed suggestions + // const fixedSuggestions = await Suggestion.allByOpportunityIdAndStatus(opptyId, 'fixed'); + // suggestions.push(...fixedSuggestions); + } + + this.log.debug(`Found ${suggestions.length} outdated broken backlinks suggestions`); + return suggestions; + } + + /** + * Fetch with timeout wrapper (matches handler logic exactly) + */ + async fetchWithTimeout(url, timeout) { + try { + return await fetch(url, { timeout }); + } catch (error) { + if (error.code === 'ETIMEOUT') { + this.log.debug(`Request to ${url} timed out after ${timeout}ms`); + return { ok: false, status: 408 }; + } else { + this.log.debug(`Request to ${url} failed with error: ${error.message}`); + } + return { ok: false, status: 500 }; + } + } + + /** + * Test if URL is still broken (same logic as handler) + */ + async isStillBrokenBacklink(url) { + const response = await this.fetchWithTimeout(url, TIMEOUT); + + if (!response.ok && response.status !== 404 + && response.status >= 400 && response.status < 500) { + this.log.debug(`Backlink ${url} returned status ${response.status}`); + } + + // Match handler logic exactly: return !response.ok + return !response.ok; + } + + /** + * Test URL status and redirects (for fix detection) + */ + async testUrlStatus(url) { + try { + const response = await fetch(url, { + method: 'GET', + timeout: TIMEOUT, + redirect: 'manual' // Don't follow redirects automatically + }); + + return { + success: true, + statusCode: response.status, + redirectLocation: response.headers.get('location'), + finalUrl: url + }; + + } catch (error) { + this.log.debug(`Failed to test URL ${url}: ${error.message}`); + return { + success: false, + statusCode: null, + redirectLocation: null, + finalUrl: url, + error: error.message + }; + } + } + + /** + * Follow redirect chain to final destination + */ + async followRedirectChain(url, maxRedirects = 5) { + let currentUrl = url; + let redirectCount = 0; + const redirectChain = []; + + while (redirectCount < maxRedirects) { + const result = await this.testUrlStatus(currentUrl); + redirectChain.push({ + url: currentUrl, + statusCode: result.statusCode, + redirectLocation: result.redirectLocation + }); + + if (!result.success) { + return { + success: false, + finalUrl: currentUrl, + finalStatusCode: null, + redirectChain, + error: result.error + }; + } + + // Check if it's a redirect (301, 302, 303, 307, 308) + // These are the status codes that fetch follows automatically + if (result.statusCode >= 300 && result.statusCode < 400) { + if (result.redirectLocation) { + currentUrl = new URL(result.redirectLocation, currentUrl).href; + redirectCount++; + } else { + break; + } + } else { + // Final destination reached (2xx success or 4xx/5xx error) + return { + success: true, + finalUrl: currentUrl, + finalStatusCode: result.statusCode, + redirectChain, + isRedirect: redirectCount > 0 + }; + } + } + + return { + success: false, + finalUrl: currentUrl, + finalStatusCode: null, + redirectChain, + error: 'Too many redirects' + }; + } + + /** + * Check if redirect matches suggested URLs + */ + checkRedirectMatchesSuggestions(finalUrl, urlsSuggested) { + if (!urlsSuggested || urlsSuggested.length === 0) { + return false; + } + + return urlsSuggested.some(suggestedUrl => { + // Normalize URLs for comparison + try { + const finalUrlObj = new URL(finalUrl); + const suggestedUrlObj = new URL(suggestedUrl); + + // Compare pathname and search params, ignore hash + return finalUrlObj.pathname === suggestedUrlObj.pathname && + finalUrlObj.search === suggestedUrlObj.search; + } catch (error) { + // Fallback to string comparison + return finalUrl === suggestedUrl; + } + }); + } + + /** + * Check if broken backlinks suggestions are fixed by testing redirects + */ + async checkSuggestionsFixes(existingSuggestions) { + this.log.info('Checking if broken backlinks suggestions are fixed...'); + + const suggestionsToCheck = this.options.limit + ? existingSuggestions.slice(0, this.options.limit) + : existingSuggestions; + + this.log.info(`Testing ${suggestionsToCheck.length} suggestions`); + + for (let i = 0; i < suggestionsToCheck.length; i++) { + const suggestion = suggestionsToCheck[i]; + const suggestionData = suggestion.getData ? suggestion.getData() : suggestion.data; + + if (!suggestionData) { + this.log.debug(`Skipping invalid suggestion: ${suggestion.getId ? suggestion.getId() : 'unknown'}`); + continue; + } + + const { + title, + url_from: urlFrom, + url_to: urlTo, + traffic_domain: trafficDomain, + urlsSuggested + } = suggestionData; + + this.log.debug(`Testing ${i + 1}/${suggestionsToCheck.length}: ${urlTo}`); + + // First, check if URL is still broken using handler logic + // This follows redirects automatically (like handler does) + const isStillBroken = await this.isStillBrokenBacklink(urlTo); + + let isFixed = false; + let redirectImplemented = false; + let aiSuggestionImplemented = false; + let fixType = 'STILL_BROKEN'; + let finalUrl = urlTo; + + if (!isStillBroken) { + // URL is no longer broken - analyze how it was fixed + // Use manual redirect detection to see if redirects were implemented + const redirectResult = await this.followRedirectChain(urlTo); + + if (redirectResult.success) { + finalUrl = redirectResult.finalUrl; + + if (redirectResult.isRedirect) { + // A redirect was implemented to fix the broken link + redirectImplemented = true; + isFixed = true; + fixType = 'REDIRECT_TO_WORKING_PAGE'; + + // Check if redirect matches AI suggestions + if (this.checkRedirectMatchesSuggestions(finalUrl, urlsSuggested)) { + aiSuggestionImplemented = true; + fixType = 'AI_SUGGESTED_REDIRECT_IMPLEMENTED'; + } + } else { + // URL now works directly (no redirect needed - page was restored) + isFixed = true; + fixType = 'URL_NOW_WORKS'; + } + } else { + // This shouldn't happen if isStillBrokenBacklink returned false, + // but handle edge case where redirect chain fails + this.log.debug(`URL ${urlTo} passes handler check but redirect chain analysis failed: ${redirectResult.error}`); + // Still consider it fixed since handler says it's not broken + isFixed = true; + fixType = 'URL_NOW_WORKS'; + finalUrl = urlTo; + } + } + + // Get opportunity data from our pre-built map + const opportunityId = suggestion.getOpportunityId ? suggestion.getOpportunityId() : 'unknown'; + const opportunityData = this.opportunityDataMap[opportunityId] || {}; + + // Store result with all required fields for 24-column schema (removed Final Status Code) + this.results.push({ + // Core Identity (5 columns) + siteId: this.options.siteId, + siteName: this.site.getBaseURL(), + opportunityId: opportunityId, + opportunityStatus: opportunityData.status || 'unknown', + suggestionId: suggestion.getId ? suggestion.getId() : 'unknown', + + // Suggestion Details (6 columns) + suggestionType: suggestion.getType ? suggestion.getType() : suggestion.type, + suggestionStatus: suggestion.getStatus ? suggestion.getStatus() : suggestion.status, + suggestionRank: suggestion.getRank ? suggestion.getRank() : suggestion.rank, + title: title || '', + urlFrom: urlFrom || '', + urlTo: urlTo || '', + + // Traffic Analysis (2 columns) + trafficDomain: trafficDomain || 0, + urlsSuggested: Array.isArray(urlsSuggested) ? urlsSuggested.join(', ') : (urlsSuggested || ''), + + // Fix Detection Results (4 columns) + redirectImplemented: redirectImplemented, + aiSuggestionImplemented: aiSuggestionImplemented, + isFixed: isFixed, + fixType: fixType, + + // Current Status (1 column) + finalUrl: finalUrl || '', + + // Timestamps and Metadata (6 columns) + opportunityCreated: opportunityData.createdAt || '', + opportunityUpdated: opportunityData.updatedAt || '', + suggestionCreated: suggestion.getCreatedAt ? suggestion.getCreatedAt() : (suggestion.createdAt || ''), + suggestionUpdated: suggestion.getUpdatedAt ? suggestion.getUpdatedAt() : (suggestion.updatedAt || ''), + updatedBy: suggestion.getUpdatedBy ? suggestion.getUpdatedBy() : (suggestion.updatedBy || ''), + testDate: new Date().toISOString(), + suggestion: suggestion // Store suggestion reference for fix entity creation + }); + + if (isFixed) { + this.log.info(`✅ FIXED: ${urlTo} → ${finalUrl} (${fixType})`); + } else { + this.log.debug(`❌ NOT FIXED: ${urlTo} (${fixType})`); + } + + // Rate limiting + await new Promise(resolve => setTimeout(resolve, 200)); + } + } + + /** + * Generate CSV report + */ + generateCSV() { + const filename = writeBrokenBacklinksCSV(this.results, this.options.siteId, this.site?.getBaseURL() || 'Unknown Site'); + this.log.info(`📊 Comprehensive broken backlinks CSV report generated: ${filename}`); + return filename; + } + + /** + * Mark fixed suggestions in database + */ + async markFixedSuggestions() { + const fixedResults = this.results.filter(r => r.aiSuggestionImplemented); + + if (fixedResults.length === 0) { + this.log.info('No suggestions to mark as fixed'); + return; + } + + this.log.info(`Creating fix entities for ${fixedResults.length} fixed suggestions`); + + for (const result of fixedResults) { + if (this.options.dryRun) { + this.log.info(`Would create fix entity for ${result.suggestionId} (dry run)`); + } else { + try { + // await createFixEntityForSuggestion(this.dataAccess, result.suggestion, { logger: this.log }); + } catch (error) { + this.log.error(`Failed to create fix entity for ${result.suggestionId}: ${error.message}`); + } + } + } + } + + /** + * Print summary statistics + */ + printSummary() { + const totalSuggestions = this.results.length; + const fixed = this.results.filter(r => r.isFixed).length; + const redirectsImplemented = this.results.filter(r => r.redirectImplemented).length; + const aiSuggestionsImplemented = this.results.filter(r => r.aiSuggestionImplemented).length; + const totalTrafficDomain = this.results.reduce((sum, r) => sum + (r.trafficDomain || 0), 0); + const recoveredTrafficDomain = this.results.filter(r => r.isFixed).reduce((sum, r) => sum + (r.trafficDomain || 0), 0); + + this.log.info(''); + this.log.info('=== BROKEN BACKLINKS SUMMARY ==='); + this.log.info(`Total suggestions processed: ${totalSuggestions}`); + this.log.info(`Backlinks fixed: ${fixed}`); + this.log.info(`Redirects implemented: ${redirectsImplemented}`); + this.log.info(`AI suggestions implemented: ${aiSuggestionsImplemented}`); + this.log.info(`Total traffic domain: ${totalTrafficDomain}`); + this.log.info(`Recovered traffic domain: ${recoveredTrafficDomain}`); + this.log.info(`Still broken: ${totalSuggestions - fixed}`); + + if (fixed > 0) { + this.log.info(''); + this.log.info('Fix types:'); + const fixTypes = {}; + this.results.filter(r => r.isFixed).forEach(r => { + fixTypes[r.fixType] = (fixTypes[r.fixType] || 0) + 1; + }); + + Object.entries(fixTypes).forEach(([type, count]) => { + this.log.info(` ${type}: ${count} backlinks`); + }); + } + } +} + +// CLI setup +import { Command } from 'commander'; + +const program = new Command(); +program + .name('check-broken-backlinks-fixed') + .description('Check if broken backlink issues from suggestions have been fixed') + .option('--siteId ', 'Site ID to check') + .option('--verbose', 'Enable verbose logging', false) + .option('--limit ', 'Limit number of suggestions to check', parseInt) + .option('--markFixed', 'Mark fixed suggestions in database', false) + .option('--dryRun', 'Dry run mode (default: true)', true) + .option('--allSites', 'Process all sites from constants.js', false) + .option('--sites ', 'Comma-separated list of site IDs to process') + .option('--consolidate', 'Generate consolidated CSV for multiple sites', false); + +program.parse(); +const options = program.opts(); + +// Validate options +let sitesToProcess = []; + +if (options.allSites) { + sitesToProcess = SITES; + console.log(`[INFO] Processing all ${sitesToProcess.length} sites`); +} else if (options.sites) { + const siteIds = options.sites.split(','); + sitesToProcess = SITES.filter(site => siteIds.includes(site.id)); + console.log(`[INFO] Processing ${sitesToProcess.length} specified sites`); +} else if (options.siteId) { + const site = SITES.find(s => s.id === options.siteId); + if (!site) { + console.error(`[ERROR] Site ID not found in constants: ${options.siteId}`); + process.exit(1); + } else { + sitesToProcess = [{ id: options.siteId, name: 'Custom Site' }]; + } +} else { + // Default site ID for testing + options.siteId = '9ae8877a-bbf3-407d-9adb-d6a72ce3c5e3'; + const defaultSite = SITES.find(s => s.id === options.siteId); + sitesToProcess = [defaultSite]; + console.log(`[INFO] Using default site ID: ${options.siteId}`); +} + +// Process sites +async function processSites() { + const allResults = []; + + for (let i = 0; i < sitesToProcess.length; i++) { + const site = sitesToProcess[i]; + + try { + console.log(`\n[INFO] Processing site ${i + 1}/${sitesToProcess.length}: ${site.name} (${site.id})`); + + // Create checker for this site + const siteOptions = { ...options, siteId: site.id }; + const checker = new BrokenBacklinksFixChecker(siteOptions); + + await checker.run(); + + // Collect results if consolidating + if (options.consolidate && sitesToProcess.length > 1) { + // Add site info to each result for consolidation using SITES constant names + const resultsWithSiteInfo = checker.results.map(result => ({ + ...result, + siteId: site.id, + siteName: site.name // Use the friendly name from SITES constant + })); + allResults.push(...resultsWithSiteInfo); + } + + // Add delay between sites to avoid overwhelming servers + if (i < sitesToProcess.length - 1) { + console.log(`[INFO] Waiting 2 seconds before next site...`); + await new Promise(resolve => setTimeout(resolve, 2000)); + } + + } catch (error) { + console.error(`[ERROR] Failed to process ${site.name}: ${error.message}`); + if (options.verbose) { + console.error(error.stack); + } + continue; + } + } + + // Generate consolidated CSV if requested + if (options.consolidate && sitesToProcess.length > 1 && allResults.length > 0) { + generateConsolidatedCSV(allResults); + } +} + +// Generate consolidated CSV for multiple sites +function generateConsolidatedCSV(allResults) { + if (allResults.length === 0) { + console.log('📊 No results to consolidate'); + return; + } + + const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19); + const filename = `consolidated-broken-backlinks-all-sites-${timestamp}Z.csv`; + + // Generate CSV with proper site info from each result + const csvRows = allResults.map(result => formatBrokenBacklinksResult(result, result.siteId, result.siteName)); + const csvContent = [ + BROKEN_BACKLINKS_CSV_HEADERS.join(','), + ...csvRows.map(row => row.join(',')) + ].join('\n'); + + writeFileSync(filename, csvContent); + console.log(`📊 Consolidated broken backlinks CSV generated: ${filename} (${allResults.length} total results)`); +} + +// Run the processing +processSites().catch(error => { + console.error('Fatal error:', error.message); + process.exit(1); +}); diff --git a/scripts/opportunities/broken-internal-links/index.mjs b/scripts/opportunities/broken-internal-links/index.mjs new file mode 100644 index 000000000..f259cffb7 --- /dev/null +++ b/scripts/opportunities/broken-internal-links/index.mjs @@ -0,0 +1,714 @@ +#!/usr/bin/env node + +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/** + * Internal Links Fix Checker + * + * This script checks if broken internal links from existing suggestions have been fixed. + * It tests the current status of broken URLs and determines if fixes have been implemented. + * + * Features: + * - Comprehensive 24-column raw data schema + * - Tests current URL status (200 OK, redirects, etc.) + * - Checks if AI-suggested URLs were implemented via: + * 1. Direct redirects to suggested URLs + * 2. Source page link updates to suggested URLs + * - Detects link removal from source pages via S3 scraping + * - Multi-site processing with consolidation + * - Efficient database queries (outdated + fixed suggestions) + * + * Fix Types: + * - AI_SUGGESTION_IMPLEMENTED: Redirect or link update matches AI suggestion + * - URL_NOW_WORKS: URL is directly accessible (200 OK) + * - URL_ACCESSIBLE: URL returns success status (< 400) + * - URL_REDIRECTS_ELSEWHERE: Redirect exists but not to suggested URL + * - LINK_REMOVED_FROM_SOURCE: Broken link removed from source page + * - STILL_BROKEN: URL still returns error (>= 400) or unreachable + */ + +import { writeFileSync } from 'fs'; +import { S3Client, GetObjectCommand } from '@aws-sdk/client-s3'; +// Using simple console logger instead of shared-utils dependency +import { createDataAccess } from '@adobe/spacecat-shared-data-access'; +import { SITES } from '../../constants.js'; +import { writeInternalLinksCSV, formatInternalLinksResult, INTERNAL_LINKS_CSV_HEADERS } from '../../csv-utils.js'; +import { createFixEntityForSuggestion } from '../../create-fix-entity.js'; + +/** + * Internal Links Fix Checker Class + */ +class InternalLinksFixChecker { + constructor(options = {}) { + this.options = { + siteId: null, + verbose: false, + limit: null, + markFixed: false, + dryRun: true, + ...options + }; + + this.log = this.createSimpleLogger(this.options.verbose); + + this.results = []; + this.dataAccess = null; + this.site = null; + this.opportunityStatusMap = {}; + this.opportunityDataMap = {}; + } + + /** + * Create simple console logger + */ + createSimpleLogger(verbose) { + return { + info: (msg) => console.log(`[INFO] ${msg}`), + debug: verbose ? (msg) => console.log(`[DEBUG] ${msg}`) : () => {}, + error: (msg) => console.error(`[ERROR] ${msg}`) + }; + } + + /** + * Initialize data access and validate site + */ + async initializeDataAccess() { + this.log.info('Initializing data access...'); + + try { + // Set up required environment variables for data access + if (!process.env.DYNAMO_TABLE_NAME_DATA) { + process.env.DYNAMO_TABLE_NAME_DATA = 'spacecat-services-data'; + this.log.debug('Set default DYNAMO_TABLE_NAME_DATA'); + } + + if (!process.env.S3_SCRAPER_BUCKET_NAME) { + process.env.S3_SCRAPER_BUCKET_NAME = 'spacecat-prod-scraper'; + this.log.debug('Set default S3_SCRAPER_BUCKET_NAME'); + } + + // Initialize data access with configuration + const config = { + tableNameData: process.env.DYNAMO_TABLE_NAME_DATA, + indexNameAllByStatus: 'gsi1pk-gsi1sk-index', + indexNameAllBySiteId: 'gsi2pk-gsi2sk-index' + }; + + this.dataAccess = createDataAccess(config); + const { Site } = this.dataAccess; + + this.site = await Site.findById(this.options.siteId); + if (!this.site) { + throw new Error(`Site not found: ${this.options.siteId}`); + } + // Setup S3 client for source-page scraping detection + this.s3Client = new S3Client({ region: process.env.AWS_REGION || 'us-east-1' }); + this.s3Bucket = process.env.S3_SCRAPER_BUCKET_NAME; + + this.log.info(`✓ Site found: ${this.site.getBaseURL()}`); + + } catch (error) { + this.log.error('Failed to initialize data access:', error.message); + throw error; + } + } + + /** + * Main execution method + */ + async run() { + try { + await this.initializeDataAccess(); + + const existingSuggestions = await this.getExistingSuggestions(); + if (existingSuggestions.length === 0) { + this.log.info('No internal links suggestions found for this site'); + return; + } + + await this.checkSuggestionsFixes(existingSuggestions); + + if (this.results.length > 0) { + this.generateCSV(); + this.printSummary(); + + if (this.options.markFixed && !this.options.dryRun) { + await this.markFixedSuggestions(); + } + } else { + this.log.info('No results to report'); + } + + } catch (error) { + this.log.error('Failed to run internal links fix checker:', error.message); + throw error; + } + } + + /** + * Get existing internal links suggestions from database + */ + async getExistingSuggestions() { + this.log.debug('Fetching existing internal links suggestions...'); + + const { Opportunity } = this.dataAccess; + const allOpportunities = await Opportunity.allBySiteId(this.options.siteId); + + // Debug: Log all opportunity types found + if (this.options.verbose) { + const opportunityTypes = [...new Set(allOpportunities.map(opp => opp.getType()))]; + this.log.debug(`All opportunity types found: ${opportunityTypes.join(', ')}`); + } + + // Filter for internal links opportunities + const internalLinksOpportunities = allOpportunities.filter((opportunity) => + opportunity.getType() === 'broken-internal-links' + ); + + this.log.debug(`Found ${internalLinksOpportunities.length} internal links opportunities`); + + // Create opportunity data map for later use + this.opportunityStatusMap = {}; + this.opportunityDataMap = {}; + internalLinksOpportunities.forEach(opportunity => { + const oppId = opportunity.getId(); + this.opportunityStatusMap[oppId] = opportunity.getStatus ? opportunity.getStatus() : (opportunity.status || 'unknown'); + this.opportunityDataMap[oppId] = { + status: opportunity.getStatus ? opportunity.getStatus() : (opportunity.status || 'unknown'), + createdAt: opportunity.getCreatedAt ? opportunity.getCreatedAt() : (opportunity.createdAt || ''), + updatedAt: opportunity.getUpdatedAt ? opportunity.getUpdatedAt() : (opportunity.updatedAt || '') + }; + }); + + // Get outdated AND fixed suggestions directly from database + const { Suggestion } = this.dataAccess; + const suggestions = []; + + for (const opportunity of internalLinksOpportunities) { + const opptyId = opportunity.getId(); + + // Get outdated suggestions + const outdatedSuggestions = await Suggestion.allByOpportunityIdAndStatus(opptyId, 'outdated'); + suggestions.push(...outdatedSuggestions); + + // Get fixed suggestions + const fixedSuggestions = await Suggestion.allByOpportunityIdAndStatus(opptyId, 'fixed'); + suggestions.push(...fixedSuggestions); + } + + this.log.debug(`Found ${suggestions.length} outdated + fixed internal links suggestions`); + return suggestions; + } + + /** + * Check if internal links suggestions are fixed + */ + async checkSuggestionsFixes(existingSuggestions) { + this.log.info('Checking if internal links suggestions are fixed...'); + + const suggestionsToCheck = this.options.limit + ? existingSuggestions.slice(0, this.options.limit) + : existingSuggestions; + + this.log.info(`Testing ${suggestionsToCheck.length} suggestions`); + + for (let i = 0; i < suggestionsToCheck.length; i++) { + const suggestion = suggestionsToCheck[i]; + const suggestionData = suggestion.getData ? suggestion.getData() : suggestion.data; + + if (!suggestionData || !suggestionData.urlFrom || !suggestionData.urlTo) { + this.log.debug(`Skipping invalid suggestion: ${suggestion.getId ? suggestion.getId() : 'unknown'}`); + continue; + } + + const { urlFrom, urlTo, urlsSuggested, aiRationale, trafficDomain } = suggestionData; + + this.log.debug(`Testing ${i + 1}/${suggestionsToCheck.length}: ${urlTo}`); + + // Test current status of the broken URL + const currentStatus = await this.testUrlStatus(urlTo); + await this.delay(100); // Rate limiting + + let isFixed = false; + let fixType = 'NOT_FIXED'; + let currentStatusDisplay = currentStatus.success ? currentStatus.statusCode : 'ERROR'; + let aiSuggestionImplemented = false; + + let linkRemovedFromSource = false; + let detectionMethod = ''; + + if (currentStatus.success) { + // FIRST: Check if redirect was implemented to AI-suggested URL (priority check) + if (urlsSuggested && Array.isArray(urlsSuggested) && urlsSuggested.length > 0 && + (currentStatus.statusCode === 301 || currentStatus.statusCode === 302)) { + aiSuggestionImplemented = await this.checkRedirectToSuggested(urlTo, urlsSuggested); + if (aiSuggestionImplemented) { + isFixed = true; + fixType = 'AI_SUGGESTION_IMPLEMENTED'; + detectionMethod = 'REDIRECT_TO_SUGGESTED_URL'; + } else { + // Redirect exists but not to suggested URL + isFixed = true; + fixType = 'URL_REDIRECTS_ELSEWHERE'; + } + } + // SECOND: Check if URL is now directly accessible (200 OK, or other success codes) + else if (currentStatus.isAccessible) { + // Check if it's a 200 OK specifically + if (currentStatus.statusCode === 200) { + isFixed = true; + fixType = 'URL_NOW_WORKS'; + } else { + // Other success status codes (e.g., 204, 206, etc.) + isFixed = true; + fixType = 'URL_ACCESSIBLE'; + } + } + // THIRD: Still broken (status >= 400) + else { + isFixed = false; + fixType = 'STILL_BROKEN'; + } + } else { + // If we couldn't test the URL, it's still broken + isFixed = false; + fixType = 'STILL_BROKEN'; + } + + // Enhancement: detect link removal/update in source page (urlFrom) via S3 scrape + if (!isFixed) { + try { + const removed = await this.wasLinkRemovedFromSource(urlFrom, urlTo); + if (removed === true) { + // Link was removed - check if it was replaced with AI suggestion + if (urlsSuggested && Array.isArray(urlsSuggested) && urlsSuggested.length > 0) { + const updatedToSuggestion = await this.wasLinkUpdatedToSuggestion(urlFrom, urlsSuggested); + if (updatedToSuggestion) { + isFixed = true; + aiSuggestionImplemented = true; + fixType = 'AI_SUGGESTION_IMPLEMENTED'; + detectionMethod = 'SOURCE_PAGE_LINK_UPDATED'; + } else { + isFixed = true; + linkRemovedFromSource = true; + fixType = 'LINK_REMOVED_FROM_SOURCE'; + detectionMethod = 'SOURCE_PAGE_SCRAPING'; + } + } else { + // No AI suggestions to check, just mark as removed + isFixed = true; + linkRemovedFromSource = true; + fixType = 'LINK_REMOVED_FROM_SOURCE'; + detectionMethod = 'SOURCE_PAGE_SCRAPING'; + } + } + } catch (err) { + this.log.debug(`Source page scraping check failed for ${urlFrom}: ${err.message}`); + } + } + + // Get opportunity data from our pre-built map (no additional API call!) + const opportunityId = suggestion.getOpportunityId ? suggestion.getOpportunityId() : 'unknown'; + const opportunityData = this.opportunityDataMap[opportunityId] || {}; + + // Store result with all required fields for 24-column schema + this.results.push({ + // Core Identity (5 columns) + siteId: this.options.siteId, + siteName: this.site.getBaseURL(), + opportunityId: opportunityId, + opportunityStatus: opportunityData.status || 'unknown', + suggestionId: suggestion.getId ? suggestion.getId() : 'unknown', + + // Suggestion Details (6 columns) + suggestionType: suggestion.getType ? suggestion.getType() : suggestion.type, + suggestionStatus: suggestion.getStatus ? suggestion.getStatus() : suggestion.status, + suggestionRank: suggestion.getRank ? suggestion.getRank() : suggestion.rank, + urlFrom: urlFrom, + urlTo: urlTo, + trafficDomain: trafficDomain || 0, + + // AI Recommendations (3 columns) + urlsSuggested: Array.isArray(urlsSuggested) ? urlsSuggested.join(', ') : (urlsSuggested || ''), + aiRationale: aiRationale || '', + recommendedAction: isFixed ? 'MARK AS FIXED' : 'KEEP CURRENT STATUS', + + // Fix Detection Results (4 columns) + linkFixed: isFixed, + aiSuggestionImplemented: aiSuggestionImplemented, + linkRemovedFromSource: linkRemovedFromSource, + fixType: fixType, + fixMethod: '', + currentStatusCode: currentStatusDisplay, + redirectLocation: currentStatus.redirectLocation || '', + detectionMethod: detectionMethod, + + // Timestamps and Metadata (6 columns) + opportunityCreated: opportunityData.createdAt || '', + opportunityUpdated: opportunityData.updatedAt || '', + suggestionCreated: suggestion.getCreatedAt ? suggestion.getCreatedAt() : (suggestion.createdAt || ''), + suggestionUpdated: suggestion.getUpdatedAt ? suggestion.getUpdatedAt() : (suggestion.updatedAt || ''), + updatedBy: suggestion.getUpdatedBy ? suggestion.getUpdatedBy() : (suggestion.updatedBy || ''), + testDate: new Date().toISOString(), + suggestion: suggestion // Store suggestion reference for fix entity creation + }); + + if (isFixed) { + const methodInfo = detectionMethod ? ` via ${detectionMethod}` : ''; + const aiInfo = aiSuggestionImplemented ? ' [AI IMPLEMENTED]' : ''; + this.log.info(`✅ FIXED: ${urlTo} (${fixType}${methodInfo}${aiInfo})`); + } else { + this.log.debug(`❌ NOT FIXED: ${urlTo} (still ${currentStatusDisplay})`); + } + } + } + + /** + * Test URL status + */ + /** + * Test URL accessibility using the same logic as internal links handler + * A URL is considered accessible if status < 400, otherwise it's broken + */ + async testUrlStatus(url) { + try { + // Use same timeout as internal links handler (3000ms) + const response = await fetch(url, { timeout: 3000 }); + const { status } = response; + + // Log non-404, non-200 status codes (same as handler) + if (status >= 400 && status < 500 && status !== 404) { + this.log.debug(`Warning: ${url} returned client error: ${status}`); + } + + return { + success: true, + statusCode: status, + redirectLocation: response.headers.get('location'), + isAccessible: status < 400 // Same logic as handler: < 400 = accessible + }; + } catch (error) { + this.log.debug(`Error checking ${url}: ${error.code === 'ETIMEOUT' ? `Request timed out after 3000ms` : error.message}`); + return { + success: false, + error: error.message, + isAccessible: false // Any error means URL is inaccessible + }; + } + } + + /** + * Check if redirect points to any of the suggested URLs + */ + async checkRedirectToSuggested(originalUrl, suggestedUrls) { + try { + const response = await fetch(originalUrl, { + method: 'HEAD', + redirect: 'manual', + timeout: 10000 + }); + + const redirectLocation = response.headers.get('location'); + if (!redirectLocation) return false; + + // Normalize URLs for comparison + const normalizedRedirect = new URL(redirectLocation, originalUrl).href; + + return suggestedUrls.some(suggestedUrl => { + try { + const normalizedSuggested = new URL(suggestedUrl, originalUrl).href; + return normalizedRedirect === normalizedSuggested; + } catch { + return false; + } + }); + } catch (error) { + this.log.debug(`Error checking redirect for ${originalUrl}: ${error.message}`); + return false; + } + } + + // --- Helpers for urlFrom scraping detection --- + getScrapeJsonPath(url, siteId) { + try { + const pathname = new URL(url).pathname.replace(/\/$/, ''); + return `scrapes/${siteId}${pathname}/scrape.json`; + } catch (e) { + return null; + } + } + + async getObjectJson(bucket, key) { + if (!key) return null; + const cmd = new GetObjectCommand({ Bucket: bucket, Key: key }); + const res = await this.s3Client.send(cmd); + const body = await res.Body.transformToString(); + try { + return JSON.parse(body); + } catch (e) { + throw new Error(`Invalid JSON at s3://${bucket}/${key}`); + } + } + + escapeForRegex(text) { + return text.replace(/[.*+?^${}()|[\]\\]/g, r => `\\${r}`); + } + + normalizeHrefCandidates(urlFrom, urlTo) { + try { + const to = new URL(urlTo, urlFrom); + const abs = to.href; + const path = to.pathname + (to.search || '') + (to.hash || ''); + const noHash = to.pathname + (to.search || ''); + return Array.from(new Set([abs, path, noHash])); + } catch { + return [urlTo]; + } + } + + async wasLinkRemovedFromSource(urlFrom, urlTo) { + const key = this.getScrapeJsonPath(urlFrom, this.options.siteId); + if (!key) return null; + const obj = await this.getObjectJson(this.s3Bucket, key); + const html = obj?.scrapeResult?.rawBody; + if (!html || typeof html !== 'string' || html.length < 50) return null; + const lower = html.toLowerCase(); + const candidates = this.normalizeHrefCandidates(urlFrom, urlTo).map(s => s.toLowerCase()); + // Look for exact href matches (single or double quotes) + for (const c of candidates) { + const pattern = new RegExp(`href\\s*=\\s*(["'])${this.escapeForRegex(c)}\\1`, 'i'); + if (pattern.test(lower)) { + return false; // Link still present + } + } + return true; // No candidate hrefs found → treated as removed + } + + async wasLinkUpdatedToSuggestion(urlFrom, urlsSuggested) { + const key = this.getScrapeJsonPath(urlFrom, this.options.siteId); + if (!key) return false; + try { + const obj = await this.getObjectJson(this.s3Bucket, key); + const html = obj?.scrapeResult?.rawBody; + if (!html || typeof html !== 'string' || html.length < 50) return false; + const lower = html.toLowerCase(); + + // Check if any suggested URL is now present in the source page + for (const suggestedUrl of urlsSuggested) { + const candidates = this.normalizeHrefCandidates(urlFrom, suggestedUrl).map(s => s.toLowerCase()); + for (const c of candidates) { + const pattern = new RegExp(`href\\s*=\\s*(["'])${this.escapeForRegex(c)}\\1`, 'i'); + if (pattern.test(lower)) { + return true; // Found AI-suggested URL in source page + } + } + } + return false; // No suggested URLs found + } catch (err) { + this.log.debug(`Error checking if link was updated to suggestion: ${err.message}`); + return false; + } + } + + /** + * Add delay to avoid overwhelming servers + */ + async delay(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); + } + + /** + * Generate CSV report + */ + generateCSV() { + const filename = writeInternalLinksCSV(this.results, this.options.siteId, this.site?.getBaseURL() || 'Unknown Site'); + this.log.info(`📊 Comprehensive internal links CSV report generated: ${filename}`); + return filename; + } + + /** + * Mark fixed suggestions in database + */ + async markFixedSuggestions() { + const fixedResults = this.results.filter(r => r.aiSuggestionImplemented); + + if (fixedResults.length === 0) { + this.log.info('No suggestions to mark as fixed'); + return; + } + + this.log.info(`Creating fix entities for ${fixedResults.length} fixed suggestions`); + + for (const result of fixedResults) { + if (this.options.dryRun) { + this.log.info(`Would create fix entity for ${result.suggestionId} (dry run)`); + } else { + try { + await createFixEntityForSuggestion(this.dataAccess, result.suggestion, { logger: this.log }); + } catch (error) { + this.log.error(`Failed to create fix entity for ${result.suggestionId}: ${error.message}`); + } + } + } + } + + /** + * Print summary statistics + */ + printSummary() { + const totalSuggestions = this.results.length; + const fixedByAI = this.results.filter(r => r.aiSuggestionImplemented).length; + const fixedOverall = this.results.filter(r => r.linkFixed).length; + + this.log.info(''); + this.log.info('=== INTERNAL LINKS SUMMARY ==='); + this.log.info(`Total suggestions processed: ${totalSuggestions}`); + this.log.info(`AI Suggestions Implemented: ${fixedByAI}`); + this.log.info(`Fixed overall: ${fixedOverall}`); + this.log.info(`Not fixed: ${totalSuggestions - fixedOverall}`); + + if (fixedOverall > 0) { + this.log.info(''); + this.log.info('Fixed links by type:'); + const fixedByType = {}; + this.results.filter(r => r.linkFixed).forEach(r => { + const key = r.fixType; + fixedByType[key] = (fixedByType[key] || 0) + 1; + }); + + Object.entries(fixedByType).forEach(([type, count]) => { + this.log.info(` ${type}: ${count}`); + }); + } + } +} + +// CLI setup +import { Command } from 'commander'; + +const program = new Command(); +program + .name('check-internal-links-fixed') + .description('Check if broken internal links from suggestions have been fixed') + .option('--siteId ', 'Site ID to check') + .option('--verbose', 'Enable verbose logging', false) + .option('--limit ', 'Limit number of suggestions to check', parseInt) + .option('--markFixed', 'Mark fixed suggestions in database', false) + .option('--dryRun', 'Dry run mode (default: true)', true) + .option('--allSites', 'Process all sites from constants.js', false) + .option('--sites ', 'Comma-separated list of site IDs to process') + .option('--consolidate', 'Generate consolidated CSV for multiple sites', false); + +program.parse(); +const options = program.opts(); + +// Validate options +let sitesToProcess = []; + +if (options.allSites) { + sitesToProcess = SITES; + console.log(`[INFO] Processing all ${sitesToProcess.length} sites`); +} else if (options.sites) { + const siteIds = options.sites.split(','); + sitesToProcess = SITES.filter(site => siteIds.includes(site.id)); + console.log(`[INFO] Processing ${sitesToProcess.length} specified sites`); +} else if (options.siteId) { + const site = SITES.find(s => s.id === options.siteId); + if (!site) { + console.error(`[ERROR] Site ID not found in constants: ${options.siteId}`); + process.exit(1); + } else { + sitesToProcess = [{ id: options.siteId, name: 'Custom Site' }]; + } +} else { + // Default site ID for testing + options.siteId = '9ae8877a-bbf3-407d-9adb-d6a72ce3c5e3'; + const defaultSite = SITES.find(s => s.id === options.siteId); + sitesToProcess = [defaultSite]; + console.log(`[INFO] Using default site ID: ${options.siteId}`); +} + +// Process sites +async function processSites() { + const allResults = []; + + for (let i = 0; i < sitesToProcess.length; i++) { + const site = sitesToProcess[i]; + + try { + console.log(`\n[INFO] Processing site ${i + 1}/${sitesToProcess.length}: ${site.name} (${site.id})`); + + // Create checker for this site + const siteOptions = { ...options, siteId: site.id }; + const checker = new InternalLinksFixChecker(siteOptions); + + await checker.run(); + + // Collect results if consolidating + if (options.consolidate && sitesToProcess.length > 1) { + // Add site info to each result for consolidation using SITES constant names + const resultsWithSiteInfo = checker.results.map(result => ({ + ...result, + siteId: site.id, + siteName: site.name // Use the friendly name from SITES constant + })); + allResults.push(...resultsWithSiteInfo); + } + + // Add delay between sites to avoid overwhelming servers + if (i < sitesToProcess.length - 1) { + console.log(`[INFO] Waiting 2 seconds before next site...`); + await new Promise(resolve => setTimeout(resolve, 2000)); + } + + } catch (error) { + console.error(`[ERROR] Failed to process ${site.name}: ${error.message}`); + if (options.verbose) { + console.error(error.stack); + } + continue; + } + } + + // Generate consolidated CSV if requested + if (options.consolidate && sitesToProcess.length > 1 && allResults.length > 0) { + generateConsolidatedCSV(allResults); + } +} + +// Generate consolidated CSV for multiple sites +function generateConsolidatedCSV(allResults) { + if (allResults.length === 0) { + console.log('📊 No results to consolidate'); + return; + } + + const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19); + const filename = `consolidated-internal-links-all-sites-${timestamp}Z.csv`; + + // Generate CSV with proper site info from each result + const csvRows = allResults.map(result => formatInternalLinksResult(result, result.siteId, result.siteName)); + const csvContent = [ + INTERNAL_LINKS_CSV_HEADERS.join(','), + ...csvRows.map(row => row.join(',')) + ].join('\n'); + + writeFileSync(filename, csvContent); + console.log(`📊 Consolidated internal links CSV generated: ${filename} (${allResults.length} total results)`); +} + +// Run the processing +processSites().catch(error => { + console.error('Fatal error:', error.message); + process.exit(1); +}); diff --git a/scripts/opportunities/cwv/index.mjs b/scripts/opportunities/cwv/index.mjs new file mode 100644 index 000000000..de0892480 --- /dev/null +++ b/scripts/opportunities/cwv/index.mjs @@ -0,0 +1,703 @@ +#!/usr/bin/env node + +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/** + * Core Web Vitals Fix Checker + * + * This script checks if Core Web Vitals performance issues from existing suggestions have been fixed. + * It compares historical CWV metrics with current performance data to identify improvements. + * + * Features: + * - Comprehensive 26-column raw data schema + * - Compares current vs historical CWV metrics (LCP, CLS, INP) + * - Identifies performance improvements and regressions + * - Multi-site processing with consolidation + * - Efficient database queries (outdated + fixed suggestions) + */ + +import { writeFileSync } from 'fs'; +import dotenv from 'dotenv'; +// Using simple console logger instead of shared-utils dependency +import { createDataAccess, Audit } from '@adobe/spacecat-shared-data-access'; + +// Load environment variables from .env file +dotenv.config(); +import RUMAPIClient from '@adobe/spacecat-shared-rum-api-client'; +import { SITES } from '../../constants.js'; +import { writeCWVCSV, formatCWVResult, CWV_CSV_HEADERS } from '../../csv-utils.js'; +import { createFixEntityForSuggestion } from '../../create-fix-entity.js'; + +// CWV Thresholds (from handler) +const THRESHOLDS = { + lcp: 2500, // ms + cls: 0.1, // score + inp: 200, // ms +}; + +const INTERVAL = 7; // days +const DAILY_THRESHOLD = 100; +const auditType = Audit.AUDIT_TYPES.CWV; + +/** + * Core Web Vitals Fix Checker Class + */ +class CWVFixChecker { + constructor(options = {}) { + this.options = { + siteId: null, + verbose: false, + limit: null, + markFixed: false, + dryRun: true, + ...options + }; + + this.log = this.createSimpleLogger(this.options.verbose); + + this.results = []; + this.dataAccess = null; + this.site = null; + this.rumAPIClient = null; + this.opportunityStatusMap = {}; + this.opportunityDataMap = {}; + } + + /** + * Create simple console logger + */ + createSimpleLogger(verbose) { + return { + info: (msg) => console.log(`[INFO] ${msg}`), + debug: verbose ? (msg) => console.log(`[DEBUG] ${msg}`) : () => {}, + error: (msg) => console.error(`[ERROR] ${msg}`) + }; + } + + /** + * Initialize data access and validate site + */ + async initializeDataAccess() { + this.log.info('Initializing data access...'); + + try { + // Set up required environment variables for data access + if (!process.env.DYNAMO_TABLE_NAME_DATA) { + process.env.DYNAMO_TABLE_NAME_DATA = 'spacecat-services-data'; + this.log.debug('Set default DYNAMO_TABLE_NAME_DATA'); + } + + if (!process.env.S3_SCRAPER_BUCKET_NAME) { + process.env.S3_SCRAPER_BUCKET_NAME = 'spacecat-prod-scraper'; + this.log.debug('Set default S3_SCRAPER_BUCKET_NAME'); + } + + // Check for required RUM API environment variables + if (!process.env.RUM_DOMAIN_KEY) { + this.log.error('❌ Missing required environment variable: RUM_DOMAIN_KEY'); + this.log.error(' This is needed for RUM API access to fetch current CWV data'); + } + + // Initialize data access with configuration + const config = { + tableNameData: process.env.DYNAMO_TABLE_NAME_DATA, + indexNameAllByStatus: 'gsi1pk-gsi1sk-index', + indexNameAllBySiteId: 'gsi2pk-gsi2sk-index' + }; + + this.dataAccess = createDataAccess(config); + const { Site } = this.dataAccess; + + this.site = await Site.findById(this.options.siteId); + if (!this.site) { + throw new Error(`Site not found: ${this.options.siteId}`); + } + + this.log.info(`✓ Site found: ${this.site.getBaseURL()}`); + + } catch (error) { + this.log.error('Failed to initialize data access:', error.message); + throw error; + } + } + + /** + * Main execution method + */ + async run() { + try { + await this.initializeDataAccess(); + + const existingSuggestions = await this.getExistingSuggestions(); + if (existingSuggestions.length === 0) { + this.log.info('No CWV suggestions found for this site'); + return; + } + + // Get current CWV data for comparison + const currentCWVData = await this.getCurrentCWVData(); + + await this.checkSuggestionsFixes(existingSuggestions, currentCWVData); + + if (this.results.length > 0) { + this.generateCSV(); + this.printSummary(); + + if (this.options.markFixed && !this.options.dryRun) { + await this.markFixedSuggestions(); + } + } else { + this.log.info('No results to report'); + } + + } catch (error) { + this.log.error('Failed to run CWV fix checker:', error.message); + throw error; + } + } + + /** + * Get existing CWV suggestions from database + */ + async getExistingSuggestions() { + this.log.debug('Fetching existing CWV suggestions...'); + + const { Opportunity } = this.dataAccess; + const allOpportunities = await Opportunity.allBySiteId(this.options.siteId); + + // Debug: Log all opportunity types found + if (this.options.verbose) { + const opportunityTypes = [...new Set(allOpportunities.map(opp => opp.getType()))]; + this.log.debug(`All opportunity types found: ${opportunityTypes.join(', ')}`); + } + + // Filter for CWV opportunities + const cwvOpportunities = allOpportunities.filter((opportunity) => + opportunity.getType() === 'cwv' + ); + + this.log.debug(`Found ${cwvOpportunities.length} CWV opportunities`); + + // Create opportunity data map for later use + this.opportunityStatusMap = {}; + this.opportunityDataMap = {}; + cwvOpportunities.forEach(opportunity => { + const oppId = opportunity.getId(); + this.opportunityStatusMap[oppId] = opportunity.getStatus ? opportunity.getStatus() : (opportunity.status || 'unknown'); + this.opportunityDataMap[oppId] = { + status: opportunity.getStatus ? opportunity.getStatus() : (opportunity.status || 'unknown'), + createdAt: opportunity.getCreatedAt ? opportunity.getCreatedAt() : (opportunity.createdAt || ''), + updatedAt: opportunity.getUpdatedAt ? opportunity.getUpdatedAt() : (opportunity.updatedAt || '') + }; + }); + + // Get outdated AND fixed suggestions directly from database + const { Suggestion } = this.dataAccess; + const suggestions = []; + + for (const opportunity of cwvOpportunities) { + const opptyId = opportunity.getId(); + + // Get outdated suggestions + const outdatedSuggestions = await Suggestion.allByOpportunityIdAndStatus(opptyId, 'outdated'); + suggestions.push(...outdatedSuggestions); + + // Get fixed suggestions + const fixedSuggestions = await Suggestion.allByOpportunityIdAndStatus(opptyId, 'fixed'); + suggestions.push(...fixedSuggestions); + } + + this.log.debug(`Found ${suggestions.length} outdated + fixed CWV suggestions`); + return suggestions; + } + + /** + * Get current CWV data from RUM API (optimized for fix checker) + */ + async getCurrentCWVData() { + this.log.info('Fetching current CWV data from RUM API (optimized query)...'); + try { + // Create proper context object for RUM API client (same as handler) + const context = { + log: this.log, + env: process.env + }; + + this.rumAPIClient = RUMAPIClient.createFrom(context); + + // Debug: Check if RUM_DOMAIN_KEY is available + if (!process.env.RUM_DOMAIN_KEY) { + throw new Error('RUM_DOMAIN_KEY environment variable is required but not set'); + } + + this.log.debug(`Using RUM_DOMAIN_KEY: ${process.env.RUM_DOMAIN_KEY.substring(0, 8)}...`); + + // Use exact same logic as CWV handler with fallback to cdn-analysis + let groupedURLs = this.site.getConfig().getGroupedURLs(auditType); + + // Fallback to cdn-analysis grouped URLs if CWV-specific ones don't exist + if (!groupedURLs) { + groupedURLs = this.site.getConfig().getGroupedURLs('cdn-analysis'); + this.log.debug(`CWV grouped URLs not found, using cdn-analysis fallback: ${groupedURLs ? groupedURLs.length + ' patterns' : 'none'}`); + } else { + this.log.debug(`Using CWV-specific grouped URLs: ${groupedURLs.length} patterns`); + } + + // Use the same domain format as the handler - use wwwUrlResolver like the handler does + const { wwwUrlResolver } = await import('../../../src/common/index.js'); + const auditUrl = await wwwUrlResolver(this.site, { log: this.log, env: process.env }); + this.log.debug(`Resolved audit URL: ${auditUrl} (vs base URL: ${this.site.getBaseURL()})`); + + const options = { + domain: auditUrl, + interval: INTERVAL, + granularity: 'hourly', + groupedURLs, + }; + + this.log.debug(`RUM API query options: ${JSON.stringify(options, null, 2)}`); + + this.log.debug('Starting RUM API query...'); + + + let cwvData; + try { + this.log.debug(`Calling rumAPIClient.query with auditType "${auditType}"...`); + cwvData = await this.rumAPIClient.query(auditType, options); + this.log.debug(`Raw CWV data received: ${cwvData ? cwvData.length : 'null'} entries`); + } catch (rumError) { + this.log.error('RUM API query failed:', rumError.message); + throw rumError; + } + + // No filtering - we want all data for fix checking + this.log.info(`✓ Retrieved ${cwvData.length} current CWV entries (no pageview filtering)`); + + // Create lookup map for easy comparison (same buildKey logic as handler) + const cwvMap = {}; + cwvData.forEach(entry => { + const key = entry.type === 'url' ? entry.url : entry.pattern; // Same buildKey logic + cwvMap[key] = entry; + }); + + return cwvMap; + + } catch (error) { + this.log.error('Failed to fetch current CWV data:', error.message); + this.log.error('Error stack:', error.stack); + if (error.response) { + this.log.error('Response status:', error.response.status); + this.log.error('Response data:', JSON.stringify(error.response.data, null, 2)); + } + if (error.config) { + this.log.error('Request config:', JSON.stringify(error.config, null, 2)); + } + return {}; + } + } + + /** + * Check if CWV suggestions are fixed by comparing metrics + */ + async checkSuggestionsFixes(existingSuggestions, currentCWVData) { + this.log.info('Checking if CWV suggestions are fixed...'); + + const suggestionsToCheck = this.options.limit + ? existingSuggestions.slice(0, this.options.limit) + : existingSuggestions; + + this.log.info(`Analyzing ${suggestionsToCheck.length} suggestions`); + + for (let i = 0; i < suggestionsToCheck.length; i++) { + const suggestion = suggestionsToCheck[i]; + const suggestionData = suggestion.getData ? suggestion.getData() : suggestion.data; + + if (!suggestionData) { + this.log.debug(`Skipping invalid suggestion: ${suggestion.getId ? suggestion.getId() : 'unknown'}`); + continue; + } + + // Extract CWV data from the suggestion data object (same structure as handler stores) + const { type, url, pattern, pageviews, metrics } = suggestionData; + + // Calculate weighted average CWV metrics from the historical metrics array + let oldLCP = 0, oldCLS = 0, oldINP = 0; + let totalPageviews = 0; + + if (metrics && Array.isArray(metrics)) { + let lcpSum = 0, clsSum = 0, inpSum = 0; + let lcpWeight = 0, clsWeight = 0, inpWeight = 0; + + metrics.forEach(metric => { + const weight = metric.pageviews || 0; + totalPageviews += weight; + + if (metric.lcp !== null && metric.lcp !== undefined) { + lcpSum += metric.lcp * weight; + lcpWeight += weight; + } + if (metric.cls !== null && metric.cls !== undefined) { + clsSum += metric.cls * weight; + clsWeight += weight; + } + if (metric.inp !== null && metric.inp !== undefined) { + inpSum += metric.inp * weight; + inpWeight += weight; + } + }); + + oldLCP = lcpWeight > 0 ? lcpSum / lcpWeight : 0; + oldCLS = clsWeight > 0 ? clsSum / clsWeight : 0; + oldINP = inpWeight > 0 ? inpSum / inpWeight : 0; + + if (this.options.verbose && i < 3) { + this.log.debug(`Historical metrics: LCP=${oldLCP.toFixed(2)}ms, CLS=${oldCLS.toFixed(4)}, INP=${oldINP.toFixed(2)}ms (${metrics.length} devices)`); + } + } else { + if (this.options.verbose && i < 3) { + this.log.debug(`No metrics array found in suggestion data for ${type === 'url' ? url : pattern}`); + } + } + + this.log.debug(`Analyzing ${i + 1}/${suggestionsToCheck.length}: ${type === 'url' ? url : pattern}`); + + // Find current data for comparison + const key = type === 'url' ? url : pattern; + const currentData = currentCWVData[key]; + + let isFixed = false; + let fixType = 'NOT_IMPROVED'; + let metricsImproved = []; + let currentLCP = null, currentCLS = null, currentINP = null; + + if (currentData) { + // Calculate weighted average CWV metrics from current data metrics array + if (currentData.metrics && Array.isArray(currentData.metrics)) { + let lcpSum = 0, clsSum = 0, inpSum = 0; + let lcpWeight = 0, clsWeight = 0, inpWeight = 0; + + currentData.metrics.forEach(metric => { + const weight = metric.pageviews || 0; + + if (metric.lcp !== null && metric.lcp !== undefined) { + lcpSum += metric.lcp * weight; + lcpWeight += weight; + } + if (metric.cls !== null && metric.cls !== undefined) { + clsSum += metric.cls * weight; + clsWeight += weight; + } + if (metric.inp !== null && metric.inp !== undefined) { + inpSum += metric.inp * weight; + inpWeight += weight; + } + }); + + currentLCP = lcpWeight > 0 ? lcpSum / lcpWeight : null; + currentCLS = clsWeight > 0 ? clsSum / clsWeight : null; + currentINP = inpWeight > 0 ? inpSum / inpWeight : null; + } + + // Identify which metrics were originally problematic (above threshold) + const problemMetrics = []; + const wasLCPBad = oldLCP > 0 && oldLCP > THRESHOLDS.lcp; + const wasCLSBad = oldCLS > 0 && oldCLS > THRESHOLDS.cls; + const wasINPBad = oldINP > 0 && oldINP > THRESHOLDS.inp; + + if (wasLCPBad) problemMetrics.push('LCP'); + if (wasCLSBad) problemMetrics.push('CLS'); + if (wasINPBad) problemMetrics.push('INP'); + + // Check if each problematic metric is now fixed (improved to "good" threshold) + const lcpFixed = !wasLCPBad || (currentLCP !== null && currentLCP <= THRESHOLDS.lcp); + const clsFixed = !wasCLSBad || (currentCLS !== null && currentCLS <= THRESHOLDS.cls); + const inpFixed = !wasINPBad || (currentINP !== null && currentINP <= THRESHOLDS.inp); + + // Track which metrics improved + if (wasLCPBad && currentLCP !== null && currentLCP <= THRESHOLDS.lcp) { + metricsImproved.push('LCP'); + } + if (wasCLSBad && currentCLS !== null && currentCLS <= THRESHOLDS.cls) { + metricsImproved.push('CLS'); + } + if (wasINPBad && currentINP !== null && currentINP <= THRESHOLDS.inp) { + metricsImproved.push('INP'); + } + + // Only mark as fixed if ALL originally problematic metrics are now good + if (problemMetrics.length > 0 && lcpFixed && clsFixed && inpFixed) { + isFixed = true; + if (metricsImproved.length === problemMetrics.length) { + fixType = `ALL_FIXED_${metricsImproved.join('_')}`; + } else { + // Some metrics improved, but we need to check if all problems are resolved + fixType = metricsImproved.length > 0 ? `PARTIALLY_FIXED_${metricsImproved.join('_')}` : 'FIXED_NO_CURRENT_DATA'; + } + } else if (problemMetrics.length > 0) { + // Not all problems are fixed yet + const stillBad = []; + if (wasLCPBad && !lcpFixed) stillBad.push('LCP'); + if (wasCLSBad && !clsFixed) stillBad.push('CLS'); + if (wasINPBad && !inpFixed) stillBad.push('INP'); + fixType = `STILL_BAD_${stillBad.join('_')}`; + } + } else { + // No current data - might be low traffic or removed + fixType = 'NO_CURRENT_DATA'; + } + + // Get opportunity data from our pre-built map + const opportunityId = suggestion.getOpportunityId ? suggestion.getOpportunityId() : 'unknown'; + const opportunityData = this.opportunityDataMap[opportunityId] || {}; + + // Store result with all required fields for 26-column schema + this.results.push({ + // Core Identity (5 columns) + siteId: this.options.siteId, + siteName: this.site.getBaseURL(), + opportunityId: opportunityId, + opportunityStatus: opportunityData.status || 'unknown', + suggestionId: suggestion.getId ? suggestion.getId() : 'unknown', + + // Suggestion Details (6 columns) + suggestionType: suggestion.getType ? suggestion.getType() : suggestion.type, + suggestionStatus: suggestion.getStatus ? suggestion.getStatus() : suggestion.status, + suggestionRank: suggestion.getRank ? suggestion.getRank() : suggestion.rank, + entryType: type, + urlOrPattern: key, + pageviews: pageviews || totalPageviews || 0, + + // Historical CWV Metrics (3 columns) + oldLCP: oldLCP || 0, + oldCLS: oldCLS || 0, + oldINP: oldINP || 0, + + // Current CWV Metrics (3 columns) + currentLCP: currentLCP || 0, + currentCLS: currentCLS || 0, + currentINP: currentINP || 0, + + // Performance Analysis (3 columns) + metricsImproved: metricsImproved.join(', '), + isFixed: isFixed, + fixType: fixType, + + // Timestamps and Metadata (6 columns) + opportunityCreated: opportunityData.createdAt || '', + opportunityUpdated: opportunityData.updatedAt || '', + suggestionCreated: suggestion.getCreatedAt ? suggestion.getCreatedAt() : (suggestion.createdAt || ''), + suggestionUpdated: suggestion.getUpdatedAt ? suggestion.getUpdatedAt() : (suggestion.updatedAt || ''), + updatedBy: suggestion.getUpdatedBy ? suggestion.getUpdatedBy() : (suggestion.updatedBy || ''), + testDate: new Date().toISOString(), + + recommendedAction: isFixed ? 'MARK AS FIXED' : 'CONTINUE OPTIMIZATION', + suggestion: suggestion // Store suggestion reference for fix entity creation + }); + + if (isFixed) { + this.log.info(`✅ IMPROVED: ${key} (${fixType})`); + } else { + this.log.debug(`❌ NOT IMPROVED: ${key} (${fixType})`); + } + } + } + + /** + * Generate CSV report + */ + generateCSV() { + const filename = writeCWVCSV(this.results, this.options.siteId, this.site?.getBaseURL() || 'Unknown Site'); + this.log.info(`📊 Comprehensive CWV CSV report generated: ${filename}`); + return filename; + } + + /** + * Mark fixed suggestions in database + */ + async markFixedSuggestions() { + const fixedResults = this.results.filter(r => r.isFixed); + + if (fixedResults.length === 0) { + this.log.info('No suggestions to mark as fixed'); + return; + } + + this.log.info(`Creating fix entities for ${fixedResults.length} fixed suggestions`); + + for (const result of fixedResults) { + if (this.options.dryRun) { + this.log.info(`Would create fix entity for ${result.suggestionId} (dry run)`); + } else { + try { + // await createFixEntityForSuggestion(this.dataAccess, result.suggestion, { logger: this.log }); + } catch (error) { + this.log.error(`Failed to create fix entity for ${result.suggestionId}: ${error.message}`); + } + } + } + } + + /** + * Print summary statistics + */ + printSummary() { + const totalSuggestions = this.results.length; + const improved = this.results.filter(r => r.isFixed).length; + const allGood = this.results.filter(r => r.fixType === 'ALL_METRICS_GOOD').length; + + this.log.info(''); + this.log.info('=== CWV SUMMARY ==='); + this.log.info(`Total suggestions processed: ${totalSuggestions}`); + this.log.info(`Performance improved: ${improved}`); + this.log.info(`All metrics good: ${allGood}`); + this.log.info(`Still needs work: ${totalSuggestions - improved}`); + + if (improved > 0) { + this.log.info(''); + this.log.info('Improvements by metric:'); + const metricCounts = { LCP: 0, CLS: 0, INP: 0 }; + + this.results.filter(r => r.isFixed).forEach(r => { + if (r.metricsImproved.includes('LCP')) metricCounts.LCP++; + if (r.metricsImproved.includes('CLS')) metricCounts.CLS++; + if (r.metricsImproved.includes('INP')) metricCounts.INP++; + }); + + Object.entries(metricCounts).forEach(([metric, count]) => { + if (count > 0) { + this.log.info(` ${metric}: ${count} improvements`); + } + }); + } + } +} + +// CLI setup +import { Command } from 'commander'; + +const program = new Command(); +program + .name('check-cwv-fixed') + .description('Check if Core Web Vitals performance issues from suggestions have been fixed') + .option('--siteId ', 'Site ID to check') + .option('--verbose', 'Enable verbose logging', false) + .option('--limit ', 'Limit number of suggestions to check', parseInt) + .option('--markFixed', 'Mark fixed suggestions in database', false) + .option('--dryRun', 'Dry run mode (default: true)', true) + .option('--allSites', 'Process all sites from constants.js', false) + .option('--sites ', 'Comma-separated list of site IDs to process') + .option('--consolidate', 'Generate consolidated CSV for multiple sites', false); + +program.parse(); +const options = program.opts(); + +// Validate options +let sitesToProcess = []; + +if (options.allSites) { + sitesToProcess = SITES; + console.log(`[INFO] Processing all ${sitesToProcess.length} sites`); +} else if (options.sites) { + const siteIds = options.sites.split(','); + sitesToProcess = SITES.filter(site => siteIds.includes(site.id)); + console.log(`[INFO] Processing ${sitesToProcess.length} specified sites`); +} else if (options.siteId) { + const site = SITES.find(s => s.id === options.siteId); + if (!site) { + console.error(`[ERROR] Site ID not found in constants: ${options.siteId}`); + process.exit(1); + } else { + sitesToProcess = [{ id: options.siteId, name: 'Custom Site' }]; + } +} else { + // Default site ID for testing + options.siteId = '9ae8877a-bbf3-407d-9adb-d6a72ce3c5e3'; + const defaultSite = SITES.find(s => s.id === options.siteId); + sitesToProcess = [defaultSite]; + console.log(`[INFO] Using default site ID: ${options.siteId}`); +} + +// Process sites +async function processSites() { + const allResults = []; + + for (let i = 0; i < sitesToProcess.length; i++) { + const site = sitesToProcess[i]; + + try { + console.log(`\n[INFO] Processing site ${i + 1}/${sitesToProcess.length}: ${site.name} (${site.id})`); + + // Create checker for this site + const siteOptions = { ...options, siteId: site.id }; + const checker = new CWVFixChecker(siteOptions); + + await checker.run(); + + // Collect results if consolidating + if (options.consolidate && sitesToProcess.length > 1) { + // Add site info to each result for consolidation using SITES constant names + const resultsWithSiteInfo = checker.results.map(result => ({ + ...result, + siteId: site.id, + siteName: site.name // Use the friendly name from SITES constant + })); + allResults.push(...resultsWithSiteInfo); + } + + // Add delay between sites to avoid overwhelming servers + if (i < sitesToProcess.length - 1) { + console.log(`[INFO] Waiting 2 seconds before next site...`); + await new Promise(resolve => setTimeout(resolve, 2000)); + } + + } catch (error) { + console.error(`[ERROR] Failed to process ${site.name}: ${error.message}`); + if (options.verbose) { + console.error(error.stack); + } + continue; + } + } + + // Generate consolidated CSV if requested + if (options.consolidate && sitesToProcess.length > 1 && allResults.length > 0) { + generateConsolidatedCSV(allResults); + } +} + +// Generate consolidated CSV for multiple sites +function generateConsolidatedCSV(allResults) { + if (allResults.length === 0) { + console.log('📊 No results to consolidate'); + return; + } + + const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19); + const filename = `consolidated-cwv-all-sites-${timestamp}Z.csv`; + + // Generate CSV with proper site info from each result + const csvRows = allResults.map(result => formatCWVResult(result, result.siteId, result.siteName)); + const csvContent = [ + CWV_CSV_HEADERS.join(','), + ...csvRows.map(row => row.join(',')) + ].join('\n'); + + writeFileSync(filename, csvContent); + console.log(`📊 Consolidated CWV CSV generated: ${filename} (${allResults.length} total results)`); +} + +// Run the processing +processSites().catch(error => { + console.error('Fatal error:', error.message); + process.exit(1); +}); diff --git a/scripts/opportunities/meta-tags/index.mjs b/scripts/opportunities/meta-tags/index.mjs new file mode 100644 index 000000000..37b0b0bdd --- /dev/null +++ b/scripts/opportunities/meta-tags/index.mjs @@ -0,0 +1,706 @@ +#!/usr/bin/env node + +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/** + * Meta Tags Fix Checker + * + * Compares existing suggestions with current audit results to identify fixed issues. + * Outputs results to CSV for easy analysis. + * + * Supported Issue Types: + * ✅ Missing Tags: Missing Title, Missing Description, Missing H1 + * ✅ Length Issues: Too short, too long, empty (for Title, Description, H1) + * ✅ Duplicate Tags: Duplicate Title, Duplicate Description, Duplicate H1 + * ✅ Structural Issues: Multiple H1 tags on a single page + * + * Detection Logic: + * 1. AI_SUGGESTION_IMPLEMENTED: Exact match with AI suggestion + * 2. FIXED_BY_OTHER_MEANS: Issue no longer detected by SEO checks + * 3. DUPLICATE_CONTENT_CHANGED: Duplicate issue with changed content + * 4. NOT_IMPLEMENTED: Issue still exists + * 5. PAGE_NOT_AVAILABLE: Cannot verify (page not in S3) + * + * Features: + * - Validates suggestion data before processing + * - Special handling for duplicate issues (requires cross-page comparison) + * - Manual check for Multiple H1 issues (disabled in SeoChecks) + * - Proper CSV escaping for quotes, newlines, and commas + * - Comprehensive fix type reporting + * + * Usage: + * node scripts/check-metatags-fixed.mjs --siteId [options] + * + * Options: + * --siteId Site ID to check (required) + * --markFixed Mark fixed suggestions in database (TODO: not yet implemented) + * --dryRun Show what would be marked without making changes + * --verbose Detailed logging + * --limit Limit number of suggestions to check (for testing) + */ + +import { program } from 'commander'; +import { writeFileSync } from 'fs'; +// Simple console logger +import { createDataAccess } from '@adobe/spacecat-shared-data-access'; +import { createFixEntityForSuggestion } from '../../create-fix-entity.js'; + +// Import metatags utilities +import { fetchAndProcessPageObject } from '../../../src/metatags/handler.js'; +import SeoChecks from '../../../src/metatags/seo-checks.js'; +import { S3Client } from '@aws-sdk/client-s3'; + +// Transform URL to scrape.json path - same as handler +function getScrapeJsonPath(url, siteId) { + try { + // If URL doesn't have a protocol, assume https:// + const fullUrl = url.startsWith('http') ? url : `https://${url}`; + const pathname = new URL(fullUrl).pathname.replace(/\/$/, ''); + return `scrapes/${siteId}${pathname}/scrape.json`; + } catch (error) { + return null; + } +} + +class MetaTagsFixChecker { + constructor(options) { + this.options = options; + this.log = this.createSimpleLogger(options.verbose); + this.results = []; + this.skippedCount = 0; + } + + createSimpleLogger(verbose) { + return { + info: (msg) => console.log(`[INFO] ${msg}`), + debug: verbose ? (msg) => console.log(`[DEBUG] ${msg}`) : () => {}, + error: (msg) => console.error(`[ERROR] ${msg}`) + }; + } + + /** + * Initialize data access connections + */ + async initializeDataAccess() { + this.log.debug('Initializing data access for meta-tags audit...'); + + try { + // Set up required environment variables for data access + if (!process.env.DYNAMO_TABLE_NAME_DATA) { + process.env.DYNAMO_TABLE_NAME_DATA = 'spacecat-services-data'; + this.log.debug('Set default DYNAMO_TABLE_NAME_DATA'); + } + + if (!process.env.S3_SCRAPER_BUCKET_NAME) { + process.env.S3_SCRAPER_BUCKET_NAME = 'spacecat-dev-scraper'; + this.log.debug('Set default S3_SCRAPER_BUCKET_NAME'); + } + + // Initialize data access with configuration + const config = { + tableNameData: process.env.DYNAMO_TABLE_NAME_DATA, + indexNameAllByStatus: 'gsi1pk-gsi1sk-index', + indexNameAllBySiteId: 'gsi2pk-gsi2sk-index' + }; + + this.dataAccess = createDataAccess(config); + + // Load site + this.site = await this.dataAccess.Site.findById(this.options.siteId); + if (!this.site) { + throw new Error(`Site not found: ${this.options.siteId}`); + } + + // Setup S3 client + this.s3Client = new S3Client({ + region: process.env.AWS_REGION || 'us-east-1' + }); + + // Setup audit context + this.context = { + dataAccess: this.dataAccess, + site: this.site, + log: this.log, + env: process.env, + s3Client: this.s3Client, + // Add other required context properties as needed + }; + + this.log.info(`✓ Data access initialized for site ${this.options.siteId} (${this.site.getBaseURL()})`); + } catch (error) { + this.log.error(`Failed to initialize data access: ${error.message}`); + throw error; + } + } + + async run() { + this.log.info('=== META TAGS FIX CHECKER ==='); + this.log.info(`Site ID: ${this.options.siteId}`); + this.log.info(''); + + try { + // Initialize data access + await this.initializeDataAccess(); + + // Step 1: Get existing suggestions + this.log.info('Step 1: Getting existing meta-tags suggestions...'); + const existingSuggestions = await this.getExistingSuggestions(); + this.log.info(`Found ${existingSuggestions.length} existing suggestions`); + + // Step 2: Check each suggestion against S3 single-page content + this.log.info('Step 2: Checking suggestions using S3 single-page scrape + SEO checks...'); + await this.compareAndIdentifyFixes(existingSuggestions); + + // Step 3: Generate CSV + this.log.info('Step 3: Generating CSV report...'); + this.generateCSV(); + + // Step 5: Mark as fixed if requested (TODO) + if (this.options.markFixed) { + await this.markFixedSuggestions(); + } + + this.printSummary(); + + } catch (error) { + this.log.error('Error:', error.message); + if (this.options.verbose) { + this.log.error(error.stack); + } + process.exit(1); + } + } + + /** + * Get existing suggestions from database + */ + async getExistingSuggestions() { + this.log.debug('Fetching existing meta-tags suggestions from database...'); + + try { + const { Opportunity } = this.dataAccess; + + // Get all opportunities for this site + const allOpportunities = await Opportunity.allBySiteId(this.options.siteId); + + // Filter for meta-tags opportunities + const metaTagsOpportunities = allOpportunities.filter( + (opportunity) => opportunity.getType() === 'meta-tags' + ); + + this.log.debug(`Found ${metaTagsOpportunities.length} meta-tags opportunities`); + + // Get outdated and fixed suggestions directly from database + const { Suggestion } = this.dataAccess; + const suggestions = []; + + for (const opportunity of metaTagsOpportunities) { + const opptyId = opportunity.getId(); + const oppStatus = opportunity.getStatus ? opportunity.getStatus() : (opportunity.status || ''); + const oppCreated = opportunity.getCreatedAt ? opportunity.getCreatedAt() : (opportunity.createdAt || ''); + const oppUpdated = opportunity.getUpdatedAt ? opportunity.getUpdatedAt() : (opportunity.updatedAt || ''); + + const outdatedSuggestions = await Suggestion.allByOpportunityIdAndStatus(opptyId, 'OUTDATED'); + + const pushWithMeta = (s) => { + suggestions.push({ + suggestion: s, + opportunity: { + id: opptyId, + status: oppStatus, + createdAt: oppCreated, + updatedAt: oppUpdated, + }, + }); + }; + + outdatedSuggestions.forEach(pushWithMeta); + } + + this.log.debug(`Found ${suggestions.length} outdated + fixed suggestions`); + return suggestions; + + } catch (error) { + this.log.error(`Failed to fetch suggestions: ${error.message}`); + throw error; + } + } + + // removed full-audit path; checker now uses single-page S3 content only + + /** + * Compare existing suggestions using single-page S3 + SEO checks + */ + async compareAndIdentifyFixes(existingSuggestions) { + this.log.info('Comparing existing suggestions via single-page S3 + SEO checks...'); + + const suggestionsToCheck = this.options.limit + ? existingSuggestions.slice(0, this.options.limit) + : existingSuggestions; + + this.log.info(`Processing ${suggestionsToCheck.length} suggestions`); + + const bucketName = this.context.env.S3_SCRAPER_BUCKET_NAME; + + for (const entry of suggestionsToCheck) { + const suggestion = entry?.suggestion || entry; + const opportunityMeta = entry?.opportunity || {}; + const suggestionData = suggestion.getData ? suggestion.getData() : suggestion.data; + + // Validate suggestion data + if (!suggestionData) { + this.log.error('Skipping suggestion with no data'); + this.skippedCount++; + continue; + } + + const { url, issue, tagName, tagContent, aiSuggestion } = suggestionData; + + // Validate required fields + if (!url) { + this.log.error('Skipping suggestion with missing URL'); + this.skippedCount++; + continue; + } + + if (!tagName || !['title', 'description', 'h1'].includes(tagName)) { + this.log.error(`Skipping suggestion with invalid tagName: ${tagName}`); + this.skippedCount++; + continue; + } + + const issueDetails = suggestionData.issueDetails || suggestionData.issue_detail || ''; + const isDuplicateIssue = issue && issue.toLowerCase().includes('duplicate'); + const isMultipleH1Issue = issue && issue.toLowerCase().includes('multiple h1'); + + let pageWasAudited = false; + let currentTagContent; + let singlePageDetectedIssueExists = null; + + // Build S3 key from URL + const s3Key = getScrapeJsonPath(url, this.options.siteId); + if (!s3Key) { + this.log.debug(`Invalid URL: ${url}`); + currentTagContent = { + error: 'INVALID_URL', + message: `Invalid URL: ${url}`, + }; + } else { + // Log the S3 key being fetched + this.log.debug(`Fetching S3 key: ${s3Key} for URL: ${url} in the bucket : ${bucketName}`); + + // Fetch page object from S3 + const pageObj = await fetchAndProcessPageObject(this.s3Client, bucketName, url, s3Key, this.log); + + if (pageObj) { + // Get normalized path for matching + const urlPath = url.startsWith('http') ? new URL(url).pathname : new URL(`https://${url}`).pathname; + const normalizedPath = urlPath.replace(/\/$/, '') || '/'; + + // Find matching page in object (use normalizedPath or first available) + const pageKey = pageObj[normalizedPath] ? normalizedPath : Object.keys(pageObj)[0]; + + if (pageObj[pageKey]) { + pageWasAudited = true; + + // Ensure h1 is always an array to prevent SeoChecks from crashing + const pageTags = pageObj[pageKey]; + if (!Array.isArray(pageTags.h1)) { + pageTags.h1 = pageTags.h1 ? [pageTags.h1] : []; + } + + // For duplicate issues, we can't reliably check with single-page audit + // because duplicates require cross-page comparison + if (isDuplicateIssue) { + this.log.debug(`Duplicate issue detected - skipping single-page SEO check for ${tagName} on ${url}`); + singlePageDetectedIssueExists = null; // Mark as unable to verify + } else if (isMultipleH1Issue) { + // Multiple H1 check is disabled in SeoChecks, but we can manually check + this.log.debug(`Multiple H1 issue detected - performing manual check for ${url}`); + const h1Count = Array.isArray(pageTags.h1) ? pageTags.h1.length : (pageTags.h1 ? 1 : 0); + singlePageDetectedIssueExists = h1Count > 1; + this.log.debug(` H1 count: ${h1Count}, issue still exists: ${singlePageDetectedIssueExists}`); + } else { + // Run SEO checks for non-duplicate issues + const seoChecks = new SeoChecks(this.log); + seoChecks.performChecks(pageKey, pageTags); + seoChecks.finalChecks(); + + // Check if issue still exists + const detectedTags = seoChecks.getDetectedTags(); + singlePageDetectedIssueExists = detectedTags[pageKey]?.[tagName] !== undefined; + } + + // Extract current tag content + // For Multiple H1 issues, show all H1s to help verify the fix + if (isMultipleH1Issue && Array.isArray(pageTags.h1)) { + currentTagContent = `[${pageTags.h1.length} H1s: ${pageTags.h1.join(', ')}]`; + } else { + currentTagContent = Array.isArray(pageTags[tagName]) + ? (pageTags[tagName][0] || null) + : pageTags[tagName]; + } + } + } + + if (!pageWasAudited) { + currentTagContent = { + error: 'PAGE_NOT_IN_BUCKET', + message: `Page not found in S3: ${s3Key}`, + }; + } + } + + // Handle error cases + let currentContentDisplay; + let aiSuggestionImplemented = false; + + if (currentTagContent && typeof currentTagContent === 'object' && currentTagContent.error) { + currentContentDisplay = currentTagContent.message; + aiSuggestionImplemented = false; // Can't compare if there's an error + } else { + currentContentDisplay = currentTagContent || '(empty)'; + aiSuggestionImplemented = this.checkIfAISuggestionImplemented(currentTagContent, aiSuggestion); + } + + // Add small delay to avoid overwhelming servers + await this.delay(100); + + // Complete fix logic covering all cases + let isFixed; + let fixType; + + if (currentTagContent && typeof currentTagContent === 'object' && currentTagContent.error) { + // Page/tag unavailable - cannot verify + isFixed = false; + fixType = currentTagContent.error === 'INVALID_URL' ? 'INVALID_URL' : 'PAGE_NOT_AVAILABLE'; + } else if (aiSuggestionImplemented) { + // Best case: AI suggestion was implemented exactly + isFixed = true; + fixType = 'AI_SUGGESTION_IMPLEMENTED'; + } else if (isDuplicateIssue) { + // For duplicate issues, if content changed from original, consider it potentially fixed + // since we can't verify duplicates with single-page checks + const normalizeContent = (content) => { + if (content === null || content === undefined) return ''; + return String(content).trim(); + }; + + const currentNormalized = normalizeContent(currentTagContent); + const originalNormalized = normalizeContent(tagContent); + const contentChanged = currentNormalized !== originalNormalized && currentNormalized !== ''; + + if (contentChanged) { + isFixed = true; + fixType = 'DUPLICATE_CONTENT_CHANGED'; + } else { + isFixed = false; + fixType = 'DUPLICATE_UNCHANGED'; + } + } else if (singlePageDetectedIssueExists === false) { + // Issue no longer detected by SEO checks + isFixed = true; + fixType = 'FIXED_BY_OTHER_MEANS'; + } else if (singlePageDetectedIssueExists === null) { + // Unable to determine (shouldn't happen for non-duplicate issues) + isFixed = false; + fixType = 'UNABLE_TO_VERIFY'; + } else { + // Issue still exists + isFixed = false; + fixType = 'NOT_IMPLEMENTED'; + } + + const toIso = (v) => (v && typeof v.toISOString === 'function' ? v.toISOString() : (v || '')); + const result = { + siteId: this.options.siteId, + siteName: this.site?.getBaseURL ? this.site.getBaseURL() : '', + opportunityId: opportunityMeta.id || '', + opportunityStatus: opportunityMeta.status || '', + suggestionId: suggestion.getId ? suggestion.getId() : suggestion.id, + suggestionType: suggestion.getType ? (suggestion.getType() || 'METADATA_UPDATE') : (suggestion.type || 'METADATA_UPDATE'), + suggestionStatus: suggestion.getStatus ? suggestion.getStatus() : suggestion.status, + suggestionRank: suggestion.getRank ? suggestion.getRank() : (suggestion.rank || ''), + tagName: tagName, + issue: issue, + issueDetails: issueDetails, + url: url, + originalContent: tagContent || '(empty)', + aiSuggestion: aiSuggestion || '(none)', + currentContent: currentContentDisplay, + suggestion: suggestion, // Store suggestion reference for fix entity creation + aiSuggestionImplemented: aiSuggestionImplemented, + isFixed: isFixed, + fixType: fixType, + contentScraped: pageWasAudited ? 'YES' : 'NO', + testDate: new Date().toISOString(), + opportunityCreated: toIso(opportunityMeta.createdAt), + opportunityUpdated: toIso(opportunityMeta.updatedAt), + suggestionCreated: toIso(suggestion.getCreatedAt ? suggestion.getCreatedAt() : suggestion.createdAt), + suggestionUpdated: toIso(suggestion.getUpdatedAt ? suggestion.getUpdatedAt() : suggestion.updatedAt), + updatedBy: suggestion.getUpdatedBy ? (suggestion.getUpdatedBy() || 'MISSING_UPDATED_BY') : (suggestion.updatedBy || 'MISSING_UPDATED_BY'), + recommendedAction: isFixed ? 'MARK AS FIXED' : 'KEEP CURRENT STATUS' + }; + + this.results.push(result); + + if (aiSuggestionImplemented) { + this.log.info(`✅ FIXED (AI_SUGGESTION_IMPLEMENTED): ${tagName} on ${url}`); + this.log.info(` AI Suggested: "${aiSuggestion}"`); + this.log.info(` Current: "${currentContentDisplay}"`); + } else if (isFixed && fixType === 'FIXED_BY_OTHER_MEANS') { + this.log.info(`✅ FIXED (FIXED_BY_OTHER_MEANS): ${tagName} on ${url} - issue no longer detected`); + this.log.debug(` AI Suggested: "${aiSuggestion}"`); + this.log.debug(` Current: "${currentContentDisplay}"`); + } else if (isFixed && fixType === 'DUPLICATE_CONTENT_CHANGED') { + this.log.info(`✅ FIXED (DUPLICATE_CONTENT_CHANGED): ${tagName} on ${url} - duplicate content was changed`); + this.log.debug(` Original: "${tagContent}"`); + this.log.debug(` Current: "${currentContentDisplay}"`); + } else if (fixType === 'PAGE_NOT_AVAILABLE' || fixType === 'INVALID_URL') { + this.log.debug(`⚠️ ${fixType}: ${tagName} on ${url} - cannot verify`); + } else if (fixType === 'DUPLICATE_UNCHANGED') { + this.log.debug(`❌ DUPLICATE_UNCHANGED: ${tagName} on ${url} - duplicate content unchanged`); + this.log.debug(` Content: "${currentContentDisplay}"`); + } else if (!aiSuggestion || aiSuggestion === 'undefined') { + this.log.debug(`⚠️ NO AI SUGGESTION: ${tagName} on ${url} - cannot verify if implemented`); + this.log.debug(` Current: "${currentContentDisplay}"`); + } else { + this.log.debug(`❌ NOT IMPLEMENTED: ${tagName} on ${url}`); + this.log.debug(` AI Suggested: "${aiSuggestion}"`); + this.log.debug(` Current: "${currentContentDisplay}"`); + } + } + } + // removed extractedTags fallback path + + /** + * Add delay to avoid overwhelming servers + */ + async delay(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); + } + + /** + * Check if AI suggestion was implemented (exact, case-sensitive) + */ + checkIfAISuggestionImplemented(currentContent, aiSuggestion) { + if (currentContent === undefined || currentContent === null || aiSuggestion === undefined || aiSuggestion === null) { + return false; + } + const currentNorm = String(currentContent).trim(); + const aiNorm = String(aiSuggestion).trim(); + return currentNorm === aiNorm; + } + + /** + * Properly escape CSV field values + * Handles quotes, newlines, and commas per RFC 4180 + */ + escapeCsvField(value) { + if (value === null || value === undefined) { + return ''; + } + + const stringValue = String(value); + + // If field contains quotes, newlines, or commas, it must be quoted + // and internal quotes must be doubled + if (stringValue.includes('"') || stringValue.includes('\n') || stringValue.includes('\r') || stringValue.includes(',')) { + return `"${stringValue.replace(/"/g, '""')}"`; + } + + // Otherwise, just wrap in quotes for safety + return `"${stringValue}"`; + } + + + /** + * Generate CSV report + */ + generateCSV() { + const csvHeaders = [ + 'Site ID', + 'Site Name', + 'Opportunity ID', + 'Opportunity Status', + 'Suggestion ID', + 'Suggestion Type', + 'Suggestion Status', + 'Suggestion Rank', + 'Tag Name', + 'Issue', + 'Issue Details', + 'URL', + 'Original Content', + 'AI Suggestion', + 'Current Content', + 'AI Suggestion Implemented', + 'Is Fixed', + 'Fix Type', + 'Content Scraped', + 'Test Date', + 'Opportunity Created', + 'Opportunity Updated', + 'Suggestion Created', + 'Suggestion Updated', + 'Updated By', + 'Recommended Action' + ]; + + const csvRows = this.results.map(result => [ + this.escapeCsvField(result.siteId), + this.escapeCsvField(result.siteName), + this.escapeCsvField(result.opportunityId), + this.escapeCsvField(result.opportunityStatus), + this.escapeCsvField(result.suggestionId), + this.escapeCsvField(result.suggestionType), + this.escapeCsvField(result.suggestionStatus), + this.escapeCsvField(result.suggestionRank), + this.escapeCsvField(result.tagName), + this.escapeCsvField(result.issue), + this.escapeCsvField(result.issueDetails), + this.escapeCsvField(result.url), + this.escapeCsvField(result.originalContent), + this.escapeCsvField(result.aiSuggestion), + this.escapeCsvField(result.currentContent), + this.escapeCsvField(result.aiSuggestionImplemented ? 'YES' : 'NO'), + this.escapeCsvField(result.isFixed ? 'YES' : 'NO'), + this.escapeCsvField(result.fixType), + this.escapeCsvField(result.contentScraped), + this.escapeCsvField(result.testDate), + this.escapeCsvField(result.opportunityCreated), + this.escapeCsvField(result.opportunityUpdated), + this.escapeCsvField(result.suggestionCreated), + this.escapeCsvField(result.suggestionUpdated), + this.escapeCsvField(result.updatedBy), + this.escapeCsvField(result.recommendedAction) + ]); + + const csvContent = [ + csvHeaders.join(','), + ...csvRows.map(row => row.join(',')) + ].join('\n'); + + const timestamp = new Date().toISOString().replace(/[:.]/g, '-').split('T'); + const filename = `metatags-fix-check-${this.options.siteId}-${timestamp[0]}-${timestamp[1].split('.')[0]}.csv`; + writeFileSync(filename, csvContent); + + this.log.info(`✓ CSV report generated: ${filename}`); + } + + /** + * Mark fixed suggestions in database + */ + async markFixedSuggestions() { + const fixedResults = this.results.filter(r => r.aiSuggestionImplemented); + + if (fixedResults.length === 0) { + this.log.info('No suggestions to mark as fixed'); + return; + } + + this.log.info(`Creating fix entities for ${fixedResults.length} fixed suggestions`); + + for (const result of fixedResults) { + if (this.options.dryRun) { + this.log.info(`Would create fix entity for ${result.suggestionId} (dry run)`); + } else { + try { + // await createFixEntityForSuggestion(this.dataAccess, result.suggestion, { logger: this.log }); + } catch (error) { + this.log.error(`Failed to create fix entity for ${result.suggestionId}: ${error.message}`); + } + } + } + } + + /** + * Print summary + */ + printSummary() { + const totalChecked = this.results.length; + const totalFixed = this.results.filter(r => r.isFixed).length; + const totalNotImplemented = this.results.filter(r => !r.isFixed).length; + + this.log.info(''); + this.log.info('=== SUMMARY ==='); + this.log.info(`Total suggestions checked: ${totalChecked}`); + if (this.skippedCount > 0) { + this.log.info(`Skipped (validation errors): ${this.skippedCount}`); + } + this.log.info(`Issues that were fixed: ${totalFixed}`); + this.log.info(`Suggestions not implemented: ${totalNotImplemented}`); + + // Break down by fix type + if (this.results.length > 0) { + this.log.info(''); + this.log.info('Results by fix type:'); + const byFixType = {}; + this.results.forEach(r => { + byFixType[r.fixType] = (byFixType[r.fixType] || 0) + 1; + }); + + Object.entries(byFixType).sort((a, b) => b[1] - a[1]).forEach(([type, count]) => { + const icon = ['AI_SUGGESTION_IMPLEMENTED', 'FIXED_BY_OTHER_MEANS', 'DUPLICATE_CONTENT_CHANGED'].includes(type) ? '✅' : '❌'; + this.log.info(` ${icon} ${type}: ${count}`); + }); + } + + if (totalFixed > 0) { + this.log.info(''); + this.log.info('Fixed issues by tag and issue type:'); + const fixedByType = {}; + this.results.filter(r => r.isFixed).forEach(r => { + const key = `${r.tagName}: ${r.issue}`; + fixedByType[key] = (fixedByType[key] || 0) + 1; + }); + + Object.entries(fixedByType).sort((a, b) => b[1] - a[1]).forEach(([type, count]) => { + this.log.info(` ${type}: ${count}`); + }); + } + } +} + +// CLI setup +program + .name('check-metatags-fixed') + .description('Check which meta-tags suggestions have been fixed by comparing S3 single-page content') + .option('--siteId ', 'Site ID to check (defaults to test site)') + .option('--markFixed', 'Mark fixed suggestions in database', false) + .option('--dryRun', 'Show what would be marked without making changes', false) + .option('--verbose', 'Detailed logging', false) + .option('--limit ', 'Limit number of suggestions to check (for testing)', parseInt) + .parse(); + +const options = program.opts(); + + +// Default site ID for testing +if (!options.siteId) { + options.siteId = '9ae8877a-bbf3-407d-9adb-d6a72ce3c5e3'; + console.log(`[INFO] Using default site ID: ${options.siteId}`); +} + +// Default limit for testing (removed - now processes all suggestions by default) +// if (!options.limit) { +// options.limit = 10; +// console.log(`[INFO] Using default limit: ${options.limit} suggestions`); +// } + +// Run the checker +const checker = new MetaTagsFixChecker(options); +checker.run().catch(error => { + console.error('Fatal error:', error.message); + process.exit(1); +}); diff --git a/scripts/opportunities/sitemap/index.mjs b/scripts/opportunities/sitemap/index.mjs new file mode 100644 index 000000000..ada3aa5e1 --- /dev/null +++ b/scripts/opportunities/sitemap/index.mjs @@ -0,0 +1,663 @@ +#!/usr/bin/env node + +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/** + * Sitemap Fix Checker + * + * Checks if broken URLs from sitemap suggestions are now fixed. + * Tests HTTP status codes to identify resolved issues. + * + * Logic: + * - If broken URL (404/301/302) now returns 200 OK = FIXED + * - If suggested redirect was implemented = FIXED + * - Otherwise = NOT FIXED + * + * Usage: + * node scripts/check-sitemap-fixed.mjs --siteId [options] + */ + +import { program } from 'commander'; +import { createDataAccess } from '@adobe/spacecat-shared-data-access'; +import { tracingFetch as fetch } from '@adobe/spacecat-shared-utils'; +import { writeFileSync } from 'fs'; +import { SITES } from '../../constants.js'; +import { writeSitemapCSV, generateSitemapCSV, formatSitemapResult, SITEMAP_CSV_HEADERS } from '../../csv-utils.js'; +import { createFixEntityForSuggestion } from '../../create-fix-entity.js'; + +class SitemapFixChecker { + constructor(options) { + this.options = options; + this.log = this.createSimpleLogger(options.verbose); + this.results = []; + } + + createSimpleLogger(verbose) { + return { + info: (msg) => console.log(`[INFO] ${msg}`), + debug: verbose ? (msg) => console.log(`[DEBUG] ${msg}`) : () => {}, + error: (msg) => console.error(`[ERROR] ${msg}`) + }; + } + + /** + * Initialize data access connections + */ + async initializeDataAccess() { + this.log.debug('Initializing data access for sitemap audit...'); + + try { + // Set up required environment variables + if (!process.env.DYNAMO_TABLE_NAME_DATA) { + process.env.DYNAMO_TABLE_NAME_DATA = 'spacecat-services-data'; + this.log.debug('Set default DYNAMO_TABLE_NAME_DATA'); + } + + if (!process.env.S3_SCRAPER_BUCKET_NAME) { + process.env.S3_SCRAPER_BUCKET_NAME = 'spacecat-prod-scraper'; + this.log.debug('Set default S3_SCRAPER_BUCKET_NAME'); + } + + // Initialize data access with configuration + const config = { + tableNameData: process.env.DYNAMO_TABLE_NAME_DATA, + indexNameAllByStatus: 'gsi1pk-gsi1sk-index', + indexNameAllBySiteId: 'gsi2pk-gsi2sk-index' + }; + + this.dataAccess = createDataAccess(config); + + // Load site + this.site = await this.dataAccess.Site.findById(this.options.siteId); + if (!this.site) { + throw new Error(`Site not found: ${this.options.siteId}`); + } + + this.log.info(`Initialized data access for site: ${this.site.getBaseURL()}`); + + } catch (error) { + this.log.error(`Failed to initialize data access: ${error.message}`); + throw error; + } + } + + /** + * Get existing sitemap suggestions from database + */ + async getExistingSuggestions() { + this.log.debug('Fetching existing sitemap suggestions...'); + + const { Opportunity } = this.dataAccess; + const allOpportunities = await Opportunity.allBySiteId(this.options.siteId); + + // Debug: Log all opportunity types found + if (this.options.verbose) { + const opportunityTypes = [...new Set(allOpportunities.map(opp => opp.getType()))]; + this.log.debug(`All opportunity types found: ${opportunityTypes.join(', ')}`); + } + + // Filter for sitemap opportunities + const sitemapOpportunities = allOpportunities.filter((opportunity) => + opportunity.getType() === 'sitemap' + ); + + this.log.debug(`Found ${sitemapOpportunities.length} sitemap opportunities`); + + // Create opportunity data map for later use + this.opportunityDataMap = {}; + sitemapOpportunities.forEach(opportunity => { + this.opportunityDataMap[opportunity.getId()] = { + status: opportunity.getStatus ? opportunity.getStatus() : (opportunity.status || 'unknown'), + createdAt: opportunity.getCreatedAt ? opportunity.getCreatedAt() : (opportunity.createdAt || ''), + updatedAt: opportunity.getUpdatedAt ? opportunity.getUpdatedAt() : (opportunity.updatedAt || '') + }; + }); + + // Get outdated suggestions directly from database using efficient API + const { Suggestion } = this.dataAccess; + const suggestions = []; + + for (const opportunity of sitemapOpportunities) { + const opptyId = opportunity.getId(); + + // Get outdated suggestions only + const outdatedSuggestions = await Suggestion.allByOpportunityIdAndStatus(opptyId, 'outdated'); + suggestions.push(...outdatedSuggestions); + } + + this.log.info(`Found ${suggestions.length} outdated sitemap suggestions`); + return suggestions; + } + + /** + * Test URL status with HTTP request + * Uses same fetchWithHeadFallback logic as sitemap handler + */ + async testUrlStatus(url) { + const TIMEOUT = 5000; + + try { + // Try HEAD request first (same as handler) + const headResponse = await fetch(url, { + method: 'HEAD', + timeout: TIMEOUT, + redirect: 'manual' + }); + + // If HEAD returns 404, try GET as fallback (same as handler) + if (headResponse.status === 404) { + try { + const getResponse = await fetch(url, { + method: 'GET', + timeout: TIMEOUT, + redirect: 'manual' + }); + + return { + statusCode: getResponse.status, + location: getResponse.headers.get('location'), + success: true + }; + } catch { + // If GET also fails, return the original HEAD response (same as handler) + return { + statusCode: headResponse.status, + location: headResponse.headers.get('location'), + success: true + }; + } + } + + return { + statusCode: headResponse.status, + location: headResponse.headers.get('location'), + success: true + }; + + } catch (error) { + this.log.debug(`Error testing URL ${url}: ${error.message}`); + return { + statusCode: 0, + error: error.message, + success: false + }; + } + } + + /** + * Check if URL redirects to suggested URL + */ + async checkRedirectImplemented(originalUrl, suggestedUrl) { + try { + const response = await fetch(originalUrl, { + method: 'HEAD', + timeout: 5000, + redirect: 'follow' // Follow redirects to see final destination + }); + + // Check if final URL matches suggested URL + const finalUrl = response.url || originalUrl; + const normalizedFinal = this.normalizeUrl(finalUrl); + const normalizedSuggested = this.normalizeUrl(suggestedUrl); + + return normalizedFinal === normalizedSuggested; + } catch (error) { + this.log.debug(`Error checking redirect for ${originalUrl}: ${error.message}`); + return false; + } + } + + /** + * Check if URL exists in sitemap (EXACT MATCH ONLY) + */ + async checkUrlInSitemap(sitemapUrl, pageUrl) { + try { + this.log.debug(`Checking if ${pageUrl} is in sitemap: ${sitemapUrl}`); + + const response = await fetch(sitemapUrl, { + timeout: 10000, + headers: { + 'User-Agent': 'Mozilla/5.0 (compatible; SpaceCat-SitemapChecker/1.0)' + } + }); + + if (!response.ok) { + this.log.debug(`Sitemap ${sitemapUrl} returned ${response.status}, cannot verify`); + return { inSitemap: null, error: `Sitemap returned ${response.status}` }; + } + + const sitemapContent = await response.text(); + + // EXACT URL matching - no substring matching! + const normalizedPageUrl = this.normalizeUrl(pageUrl); + + // Check for exact matches in XML format + const urlPattern = new RegExp(`\\s*${normalizedPageUrl.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\s*`, 'i'); + const isPresent = urlPattern.test(sitemapContent); + + if (isPresent) { + this.log.debug(`✅ URL ${pageUrl} found in sitemap`); + } else { + this.log.debug(`❌ URL ${pageUrl} NOT found in sitemap (exact match)`); + } + + return { inSitemap: isPresent, error: null }; + + } catch (error) { + this.log.debug(`Error checking sitemap ${sitemapUrl}: ${error.message}`); + return { inSitemap: null, error: error.message }; + } + } + + /** + * Normalize URL for comparison + */ + normalizeUrl(url) { + try { + const urlObj = new URL(url); + // Remove trailing slash and convert to lowercase + return `${urlObj.protocol}//${urlObj.host}${urlObj.pathname.replace(/\/$/, '')}${urlObj.search}`.toLowerCase(); + } catch { + return url.toLowerCase(); + } + } + + /** + * Check if sitemap suggestions are fixed + */ + async checkSuggestionsFixes(existingSuggestions) { + this.log.info('Checking if sitemap suggestions are fixed...'); + + const suggestionsToCheck = this.options.limit + ? existingSuggestions.slice(0, this.options.limit) + : existingSuggestions; + + this.log.info(`Testing ${suggestionsToCheck.length} suggestions`); + + for (let i = 0; i < suggestionsToCheck.length; i++) { + const suggestion = suggestionsToCheck[i]; + const suggestionData = suggestion.getData ? suggestion.getData() : suggestion.data; + + // Determine suggestion type (default to 'url' if not specified but has pageUrl) + const suggestionType = suggestionData.type || (suggestionData.pageUrl ? 'url' : 'unknown'); + + if (!suggestionData || (suggestionType !== 'url' && suggestionType !== 'error')) { + this.log.debug(`Skipping unsupported suggestion type '${suggestionType}': ${suggestion.getId ? suggestion.getId() : 'unknown'}`); + continue; + } + + // Handle different suggestion types + if (suggestionType === 'error') { + // Error-type suggestions (e.g., sitemap not found, invalid format) + this.log.debug(`Processing error suggestion ${i + 1}/${suggestionsToCheck.length}: ${suggestionData.error}`); + + // Error suggestions are typically not "fixable" in the traditional sense + // They represent systemic issues like "sitemap not found" + const opportunityId = suggestion.getOpportunityId ? suggestion.getOpportunityId() : 'unknown'; + const opportunityData = this.opportunityDataMap[opportunityId] || {}; + + this.results.push({ + // Core identity + opportunityId: opportunityId, + opportunityStatus: opportunityData.status || 'unknown', + suggestionId: suggestion.getId ? suggestion.getId() : 'unknown', + + // Suggestion info + suggestionType: suggestion.getType ? suggestion.getType() : suggestion.type, + suggestionStatus: suggestion.getStatus ? suggestion.getStatus() : suggestion.status, + suggestionRank: suggestion.getRank ? suggestion.getRank() : suggestion.rank, + + // URL data (empty for error suggestions) + sitemapUrl: '', + pageUrl: '', + originalStatusCode: '', + currentStatusCode: '', + urlsSuggested: '', + recommendedAction: suggestionData.recommendedAction || '', + + // Our test results + redirectImplemented: false, + urlRemovedFromSitemap: false, + isFixed: false, + fixType: 'SYSTEMIC_ERROR', + + // Timestamps + opportunityCreated: opportunityData.createdAt || '', + opportunityUpdated: opportunityData.updatedAt || '', + suggestionCreated: suggestion.getCreatedAt ? suggestion.getCreatedAt() : (suggestion.createdAt || ''), + suggestionUpdated: suggestion.getUpdatedAt ? suggestion.getUpdatedAt() : (suggestion.updatedAt || ''), + updatedBy: suggestion.getUpdatedBy ? suggestion.getUpdatedBy() : (suggestion.updatedBy || '') + }); + + continue; + } + + // Handle URL-type suggestions + const { + siteMapUrl, + sitemapUrl, + pageUrl, + statusCode: originalStatusCode, + urlsSuggested + } = suggestionData; + + // Handle both field name variations + const finalSitemapUrl = siteMapUrl || sitemapUrl; + + this.log.debug(`Testing ${i + 1}/${suggestionsToCheck.length}: ${pageUrl}`); + + // Test current status of the URL + const currentStatus = await this.testUrlStatus(pageUrl); + await this.delay(100); // Rate limiting + + // Check if URL was removed from sitemap + const sitemapCheck = await this.checkUrlInSitemap(finalSitemapUrl, pageUrl); + await this.delay(100); // Rate limiting + + let isFixed = false; + let fixType = 'NOT_FIXED'; + let currentStatusDisplay = currentStatus.success ? currentStatus.statusCode : 'ERROR'; + let redirectImplemented = false; + let urlRemovedFromSitemap = sitemapCheck.inSitemap === false; + + if (currentStatus.success) { + // According to handler: ONLY 200 OK = working/fixed + // 301/302/404 are considered "broken" and create suggestions + if (currentStatus.statusCode === 200) { + isFixed = true; + fixType = 'URL_NOW_WORKS'; + } + // All other status codes (including 301/302) = still broken/not fixed + else { + isFixed = false; + fixType = 'NOT_FIXED'; + } + } + + // If URL is still broken but was removed from sitemap, consider it fixed + if (!isFixed && urlRemovedFromSitemap) { + isFixed = true; + fixType = 'URL_REMOVED_FROM_SITEMAP'; + } + + // Check if they implemented our specific redirect suggestion (for reporting) + if (currentStatus.success && (currentStatus.statusCode === 301 || currentStatus.statusCode === 302) && urlsSuggested) { + const suggestedUrls = Array.isArray(urlsSuggested) ? urlsSuggested : [urlsSuggested]; + + for (const suggestedUrl of suggestedUrls) { + redirectImplemented = await this.checkRedirectImplemented(pageUrl, suggestedUrl); + if (redirectImplemented) { + break; // Found a match to our suggestion + } + } + } + + // Get opportunity data from our pre-built map (no additional API call!) + const opportunityId = suggestion.getOpportunityId ? suggestion.getOpportunityId() : 'unknown'; + const opportunityData = this.opportunityDataMap[opportunityId] || {}; + + // Extract additional suggestion data for schema + const additionalSuggestionData = suggestion.getData ? suggestion.getData() : suggestion.data; + + // Store result with all required fields + this.results.push({ + // Core identity + opportunityId: opportunityId, + opportunityStatus: opportunityData.status || 'unknown', + suggestionId: suggestion.getId ? suggestion.getId() : 'unknown', + + // Suggestion info + suggestionType: suggestion.getType ? suggestion.getType() : suggestion.type, + suggestionStatus: suggestion.getStatus ? suggestion.getStatus() : suggestion.status, + suggestionRank: suggestion.getRank ? suggestion.getRank() : suggestion.rank, + + // URL data + sitemapUrl: finalSitemapUrl, + pageUrl, + originalStatusCode, + currentStatusCode: currentStatusDisplay, + urlsSuggested: urlsSuggested || '', + recommendedAction: additionalSuggestionData?.recommendedAction || '', + + // Our test results + redirectImplemented, + urlRemovedFromSitemap, + isFixed, + fixType, + + // Timestamps + opportunityCreated: opportunityData.createdAt || '', + opportunityUpdated: opportunityData.updatedAt || '', + suggestionCreated: suggestion.getCreatedAt ? suggestion.getCreatedAt() : (suggestion.createdAt || ''), + suggestionUpdated: suggestion.getUpdatedAt ? suggestion.getUpdatedAt() : (suggestion.updatedAt || ''), + updatedBy: suggestion.getUpdatedBy ? suggestion.getUpdatedBy() : (suggestion.updatedBy || ''), + suggestion: suggestion // Store suggestion reference for fix entity creation + }); + + if (isFixed) { + this.log.info(`✅ FIXED: ${pageUrl} (${fixType})`); + } else { + this.log.debug(`❌ NOT FIXED: ${pageUrl} (still ${currentStatusDisplay})`); + } + } + } + + /** + * Generate CSV report using common utilities + */ + generateCSV() { + const filename = writeSitemapCSV(this.results, this.options.siteId, this.site?.getBaseURL() || 'Unknown Site'); + this.log.info(`📊 CSV report generated: ${filename}`); + return filename; + } + + /** + * Print summary using common utilities + */ + printSummary() { + const totalSuggestions = this.results.length; + const fixed = this.results.filter(r => r.isFixed).length; + const redirectsImplemented = this.results.filter(r => r.redirectImplemented).length; + + this.log.info(''); + this.log.info('=== SUMMARY ==='); + this.log.info(`Total suggestions processed: ${totalSuggestions}`); + this.log.info(`Fixed overall: ${fixed}`); + this.log.info(`Redirects implemented: ${redirectsImplemented}`); + this.log.info(`Not fixed: ${totalSuggestions - fixed}`); + } + + /** + * Mark suggestions as fixed + */ + async markFixedSuggestions() { + const fixedResults = this.results.filter(r => r.isFixed); + + if (fixedResults.length === 0) { + this.log.info('No suggestions to mark as fixed'); + return; + } + + this.log.info(`Creating fix entities for ${fixedResults.length} fixed suggestions`); + + for (const result of fixedResults) { + if (this.options.dryRun) { + this.log.info(`Would create fix entity for ${result.suggestionId} (dry run)`); + } else { + try { + // await createFixEntityForSuggestion(this.dataAccess, result.suggestion, { logger: this.log }); + } catch (error) { + this.log.error(`Failed to create fix entity for ${result.suggestionId}: ${error.message}`); + } + } + } + } + + /** + * Rate limiting delay + */ + async delay(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); + } + + /** + * Main execution flow + */ + async run() { + try { + this.log.info('🗺️ Starting Sitemap Fix Checker...'); + + await this.initializeDataAccess(); + const existingSuggestions = await this.getExistingSuggestions(); + + if (existingSuggestions.length === 0) { + this.log.info('No unfixed sitemap suggestions found'); + return; + } + + await this.checkSuggestionsFixes(existingSuggestions); + + if (this.results.length > 0) { + this.generateCSV(); + this.printSummary(); + await this.markFixedSuggestions(); + } else { + this.log.info('No results to report'); + } + + this.log.info('✅ Sitemap fix check completed'); + + } catch (error) { + this.log.error(`Failed to run sitemap fix checker: ${error.message}`); + if (this.options.verbose) { + console.error(error.stack); + } + process.exit(1); + } + } +} + +// CLI Configuration +program + .name('check-sitemap-fixed') + .description('Check if sitemap suggestions are fixed by testing URL status codes') + .option('--siteId ', 'Site ID to check', '9ae8877a-bbf3-407d-9adb-d6a72ce3c5e3') + .option('--allSites', 'Process all configured sites', false) + .option('--sites ', 'Specific site IDs to process (space-separated)') + .option('--markFixed', 'Mark fixed suggestions in database', false) + .option('--dryRun', 'Run without making changes', false) + .option('--verbose', 'Enable verbose logging', false) + .option('--limit ', 'Limit number of suggestions to check (for testing)', parseInt) + .option('--consolidate', 'Generate consolidated CSV for multiple sites', false) + .parse(); + +const options = program.opts(); + +// Determine which sites to process +let sitesToProcess = []; + +if (options.allSites) { + sitesToProcess = SITES; + console.log(`[INFO] Processing all ${SITES.length} configured sites`); +} else if (options.sites) { + sitesToProcess = SITES.filter(site => options.sites.includes(site.id)); + console.log(`[INFO] Processing ${sitesToProcess.length} specified sites`); +} else if (options.siteId) { + const site = SITES.find(s => s.id === options.siteId); + if (site) { + sitesToProcess = [site]; + } else { + // Custom site ID not in the list + sitesToProcess = [{ id: options.siteId, name: 'Custom Site' }]; + } +} else { + // Default site ID + const defaultSite = SITES.find(s => s.id === '9ae8877a-bbf3-407d-9adb-d6a72ce3c5e3'); + sitesToProcess = [defaultSite]; + console.log(`[INFO] Using default site ID: 9ae8877a-bbf3-407d-9adb-d6a72ce3c5e3`); +} + +// Process sites +async function processSites() { + const allResults = []; + + for (let i = 0; i < sitesToProcess.length; i++) { + const site = sitesToProcess[i]; + + try { + console.log(`\n[INFO] Processing site ${i + 1}/${sitesToProcess.length}: ${site.name} (${site.id})`); + + // Create checker for this site + const siteOptions = { ...options, siteId: site.id }; + const checker = new SitemapFixChecker(siteOptions); + + await checker.run(); + + // Collect results if consolidating + if (options.consolidate) { + // Add site info to each result for consolidation + const resultsWithSiteInfo = checker.results.map(result => ({ + ...result, + siteId: site.id, + siteName: site.name + })); + allResults.push(...resultsWithSiteInfo); + } + + // Add delay between sites to avoid overwhelming servers + if (i < sitesToProcess.length - 1) { + console.log(`[INFO] Waiting 2 seconds before next site...`); + await new Promise(resolve => setTimeout(resolve, 2000)); + } + + } catch (error) { + console.error(`[ERROR] Failed to process ${site.name}: ${error.message}`); + if (options.verbose) { + console.error(error.stack); + } + continue; + } + } + + // Generate consolidated CSV if requested + if (options.consolidate && allResults.length > 0) { + generateConsolidatedCSV(allResults); + } +} + +// Generate consolidated CSV for multiple sites +function generateConsolidatedCSV(allResults) { + if (allResults.length === 0) { + console.log('📊 No results to consolidate'); + return; + } + + const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19); + const filename = `consolidated-sitemap-all-sites-${timestamp}Z.csv`; + + // Generate CSV with proper site info from each result + const csvRows = allResults.map(result => formatSitemapResult(result, result.siteId, result.siteName)); + const csvContent = [ + SITEMAP_CSV_HEADERS.join(','), + ...csvRows.map(row => row.join(',')) + ].join('\n'); + + writeFileSync(filename, csvContent); + console.log(`📊 Consolidated CSV generated: ${filename} (${allResults.length} total results)`); +} + +// Run the processing +processSites().catch(error => { + console.error('Fatal error:', error.message); + process.exit(1); +}); diff --git a/scripts/opportunities/structured-data/index.mjs b/scripts/opportunities/structured-data/index.mjs new file mode 100644 index 000000000..cb853944b --- /dev/null +++ b/scripts/opportunities/structured-data/index.mjs @@ -0,0 +1,610 @@ +#!/usr/bin/env node + +/** + * Structured Data Fix Checker + * + * Compares existing structured data suggestions with current S3 scraped data to identify fixed issues. + * Uses the SAME validation logic as the structured data handler (getIssuesFromScraper). + * + * Process: + * 1. Find all OUTDATED structured-data suggestions + * 2. Fetch current page from S3 scraper bucket + * 3. Run the SAME audit checks as handler.js (using getIssuesFromScraper) + * 4. Compare original errors with current issues + * 5. Categorize as: + * - AI_SUGGESTION_IMPLEMENTED: AI's suggested fix was implemented exactly + * - FIXED_BY_OTHER_MEANS: Issue no longer detected (fixed differently) + * - NOT_FIXED: Issue still exists + * - PAGE_NOT_AVAILABLE: Cannot verify + */ + +import { program } from 'commander'; +import { createDataAccess, Audit } from '@adobe/spacecat-shared-data-access'; +import { S3Client, GetObjectCommand } from '@aws-sdk/client-s3'; +import { + writeStructuredDataCSV, + writeErrorCSV +} from '../../csv-utils.js'; +import { createFixEntityForSuggestion } from '../../create-fix-entity.js'; +import dotenv from 'dotenv'; + +// Import the SAME validation functions as the handler +import { + getIssuesFromScraper +} from '../../../src/structured-data/lib.js'; + +dotenv.config(); + +const auditType = Audit.AUDIT_TYPES.STRUCTURED_DATA; + +class StructuredDataFixChecker { + constructor(options = {}) { + this.options = { siteId: null, verbose: false, limit: null, markFixed: false, dryRun: true, ...options }; + this.log = this.createSimpleLogger(this.options.verbose); + this.dataAccess = null; + this.site = null; + this.errors = []; + this.s3Client = new S3Client({ region: 'us-east-1' }); + + // Set default environment variables + if (!process.env.DYNAMO_TABLE_NAME_DATA) { + process.env.DYNAMO_TABLE_NAME_DATA = 'spacecat-services-all-sites'; + } + if (!process.env.S3_SCRAPER_BUCKET_NAME) { + process.env.S3_SCRAPER_BUCKET_NAME = 'spacecat-prod-scraper'; + } + } + + createSimpleLogger(verbose = false) { + return { + info: (msg, ...args) => console.log(`[INFO] ${msg}`, ...args), + warn: (msg, ...args) => console.warn(`[WARN] ${msg}`, ...args), + error: (msg, ...args) => console.error(`[ERROR] ${msg}`, ...args), + debug: verbose ? (msg, ...args) => console.log(`[DEBUG] ${msg}`, ...args) : () => {} + }; + } + + logError(context, error, suggestionId = null) { + const errorEntry = { + context, error: error.message || error, stack: error.stack, + suggestionId, timestamp: new Date().toISOString() + }; + this.errors.push(errorEntry); + this.log.error(`${context}: ${error.message || error}`); + } + + async initializeDataAccess() { + try { + const config = { + tableNameData: process.env.DYNAMO_TABLE_NAME_DATA, + indexNameAllByStatus: process.env.DYNAMO_INDEX_ALL_BY_STATUS || 'spacecat-services-all-sites-gsi1pk-gsi1sk-index', + indexNameAllBySiteId: process.env.DYNAMO_INDEX_ALL_BY_SITE_ID || 'spacecat-services-all-sites-gsi2pk-gsi2sk-index' + }; + this.dataAccess = createDataAccess(config); + + // Load site object (needed for validation context) + this.site = await this.dataAccess.Site.findById(this.options.siteId); + if (!this.site) { + throw new Error(`Site not found in database: ${this.options.siteId}`); + } + + this.log.info(`Loaded site: ${this.site.baseURL} (${this.site.getId()})`); + } catch (error) { + this.logError('Data access initialization', error); + throw error; + } + } + + async getExistingSuggestions(siteId) { + try { + const { Opportunity, Suggestion } = this.dataAccess; + + // Get all opportunities for this site + const allOpportunities = await Opportunity.allBySiteId(siteId); + + // Filter for structured-data opportunities + const opportunities = allOpportunities.filter( + (opportunity) => opportunity.getType() === 'structured-data' + ); + + this.log.info(`Found ${opportunities.length} structured-data opportunities for site ${siteId}`); + + if (opportunities.length === 0) return []; + + // Build opportunity data map + const opportunityDataMap = {}; + for (const opportunity of opportunities) { + opportunityDataMap[opportunity.getId()] = { + status: opportunity.getStatus(), + createdAt: opportunity.getCreatedAt(), + updatedAt: opportunity.getUpdatedAt() + }; + } + this.opportunityDataMap = opportunityDataMap; + + // Get all suggestions (outdated + fixed) + const allSuggestions = []; + for (const opportunity of opportunities) { + const opptyId = opportunity.getId(); + const [outdatedSuggestions, fixedSuggestions] = await Promise.all([ + Suggestion.allByOpportunityIdAndStatus(opptyId, 'outdated'), + Suggestion.allByOpportunityIdAndStatus(opptyId, 'fixed') + ]); + allSuggestions.push(...outdatedSuggestions, ...fixedSuggestions); + } + + this.log.info(`Found ${allSuggestions.length} structured data suggestions`); + return allSuggestions; + + } catch (error) { + this.logError('Fetching existing suggestions', error); + throw error; + } + } + + async getCurrentStructuredData(pageUrl) { + try { + const scrapeJsonPath = this.getScrapeJsonPath(pageUrl); + const command = new GetObjectCommand({ + Bucket: 'spacecat-prod-scraper', + Key: scrapeJsonPath + }); + + const response = await this.s3Client.send(command); + const content = await response.Body.transformToString(); + const scrapeData = JSON.parse(content); + + // Structured data is in scrapeResult.structuredData + return scrapeData.scrapeResult?.structuredData || null; + + } catch (error) { + this.log.debug(`Failed to fetch structured data for ${pageUrl}: ${error.message}`); + return null; + } + } + + getScrapeJsonPath(url) { + const pathname = new URL(url).pathname.replace(/\/$/, ''); + return `scrapes/${this.options.siteId}${pathname}/scrape.json`; + } + + /** + * Use the SAME validation logic as handler.js + * This runs getIssuesFromScraper on the current page data + */ + async getCurrentIssues(pageUrl, currentStructuredData) { + try { + // Create a mock scrape cache with the current data + let { pathname } = new URL(pageUrl); + if (pathname.endsWith('/')) { + pathname = pathname.slice(0, -1); + } + + const scrapeCache = new Map(); + scrapeCache.set(pathname, Promise.resolve({ + scrapeResult: { + structuredData: currentStructuredData + } + })); + + // Create context matching handler requirements + const mockContext = { + log: this.log, + site: this.site, + }; + + // Run the SAME validation as handler.js + const issues = await getIssuesFromScraper( + mockContext, + [{ url: pageUrl }], + scrapeCache + ); + + return issues; + + } catch (error) { + this.log.debug(`Error running validation for ${pageUrl}: ${error.message}`); + return []; + } + } + + /** + * Check if AI's suggested fix was implemented + * Compares the AI's correctedMarkup with current structured data + */ + checkAISuggestionImplemented(originalErrors, currentStructuredData) { + if (!originalErrors || originalErrors.length === 0 || !currentStructuredData) { + return false; + } + + // Check each original error for AI suggestion + for (const error of originalErrors) { + if (!error.fix) continue; + + // Extract AI suggestion details from the fix markdown + const correctedMarkupMatch = error.fix.match(/```json\n([\s\S]+?)\n```/); + if (!correctedMarkupMatch) continue; + + try { + const aiSuggestedMarkup = JSON.parse(correctedMarkupMatch[1]); + + // Deep comparison of AI suggestion with current data + const currentDataNormalized = JSON.stringify(currentStructuredData, null, 2); + const aiSuggestionNormalized = JSON.stringify(aiSuggestedMarkup, null, 2); + + if (currentDataNormalized === aiSuggestionNormalized) { + return true; + } + } catch (e) { + // Continue checking other errors if parsing fails + continue; + } + } + + return false; + } + + /** + * Compare original suggestion errors with current issues + * Returns fix status and type + */ + async compareWithOriginalErrors(pageUrl, originalErrors, currentStructuredData) { + if (!originalErrors || originalErrors.length === 0) { + return { + isFixed: false, + aiSuggestionImplemented: false, + fixType: 'NO_ORIGINAL_ERRORS', + details: 'No original errors to compare' + }; + } + + // If no current structured data, issue cannot be fixed + if (!currentStructuredData) { + return { + isFixed: false, + aiSuggestionImplemented: false, + fixType: 'NO_STRUCTURED_DATA', + details: 'No structured data found on page' + }; + } + + // Run the SAME validation as handler.js + const currentIssues = await this.getCurrentIssues(pageUrl, currentStructuredData); + + // Check if AI suggestion was implemented exactly + const aiImplemented = this.checkAISuggestionImplemented(originalErrors, currentStructuredData); + + if (aiImplemented) { + return { + isFixed: true, + aiSuggestionImplemented: true, + fixType: 'AI_SUGGESTION_IMPLEMENTED', + details: 'AI suggested markup matches current structured data' + }; + } + + // Check if original errors still exist + const stillHasIssues = originalErrors.some(originalError => { + const errorTitle = originalError.errorTitle || ''; + + return currentIssues.some(currentIssue => { + // Match based on error message and root type + const issueMessage = currentIssue.issueMessage || ''; + const rootTypeMatch = originalError.errorTitle?.includes(currentIssue.rootType); + + return errorTitle.includes(issueMessage) || rootTypeMatch; + }); + }); + + if (!stillHasIssues && currentIssues.length === 0) { + // No issues found - fixed by some means + return { + isFixed: true, + aiSuggestionImplemented: false, + fixType: 'FIXED_BY_OTHER_MEANS', + details: 'Original issue no longer detected by validation' + }; + } + + // Issues still exist + return { + isFixed: false, + aiSuggestionImplemented: false, + fixType: 'NOT_FIXED', + details: `Still has ${currentIssues.length} validation issue(s)` + }; + } + + async checkSuggestionsFixes(suggestions) { + const results = []; + let processed = 0; + + for (const suggestion of suggestions) { + try { + const suggestionData = suggestion.getData(); + const opportunityData = this.opportunityDataMap[suggestion.getOpportunityId()] || {}; + + // Debug: Log the actual suggestion data structure + this.log.debug(`Processing suggestion ${suggestion.getId()}`); + + if (!suggestionData?.url) { + this.logError('Invalid suggestion data', new Error(`Missing URL. Data: ${JSON.stringify(suggestionData)}`), suggestion.getId()); + + // Create an error result instead of skipping + results.push({ + siteId: this.options.siteId, + siteName: this.site?.baseURL || 'Unknown', + opportunityId: suggestion.getOpportunityId(), + opportunityStatus: opportunityData.status || 'UNKNOWN', + suggestionId: suggestion.getId(), + suggestionType: suggestion.getType(), + suggestionStatus: suggestion.getStatus(), + suggestionRank: suggestion.getRank() || 0, + url: 'ERROR: Missing URL', + errorId: 'missing-url', + errorTitle: 'Missing URL in suggestion data', + totalJsonLdBlocks: 0, + validJsonLdBlocks: 0, + schemaTypes: '', + currentJsonLdContent: 'ERROR: Cannot fetch without URL', + completenessScore: 0, + aiSuggestionFix: 'ERROR: Cannot process without URL', + bestSimilarity: 0, + hasValidSchema: false, + aiSuggestionImplemented: false, + isFixed: false, + fixType: 'ERROR_MISSING_URL', + opportunityCreated: opportunityData.createdAt || '', + opportunityUpdated: opportunityData.updatedAt || '', + suggestionCreated: suggestion.getCreatedAt?.() || suggestion.createdAt || '', + suggestionUpdated: suggestion.getUpdatedAt?.() || suggestion.updatedAt || '', + updatedBy: suggestion.getUpdatedBy?.() || suggestion.updatedBy || '', + testDate: new Date().toISOString() + }); + continue; + } + + const pageUrl = suggestionData.url; + const originalErrors = suggestionData.errors || []; + + // Get current structured data from S3 + const currentStructuredData = await this.getCurrentStructuredData(pageUrl); + + // Use the SAME validation logic as handler.js + const comparison = await this.compareWithOriginalErrors(pageUrl, originalErrors, currentStructuredData); + + // Extract original error details for reporting + let errorDescription = '', suggestedFix = '', confidenceScore = 0; + + if (originalErrors.length > 0) { + const firstError = originalErrors[0]; + errorDescription = firstError.errorTitle || 'Structured data issue'; + + if (firstError.fix) { + // Extract error description + const issueMatch = firstError.fix.match(/## Issue Detected for (.+?)\n(.+?)(?:\n##|$)/); + if (issueMatch) { + suggestedFix = `${issueMatch[1]}: ${issueMatch[2]}`; + } else { + // Extract from corrected markup section + const firstLine = firstError.fix.split('\n').find(line => line.trim() && !line.startsWith('#')); + suggestedFix = firstLine ? firstLine.trim() : 'Schema validation issue'; + } + + // Extract confidence score if available + const scoreMatch = firstError.fix.match(/Confidence score:\s*(\d+)%/); + if (scoreMatch) confidenceScore = parseInt(scoreMatch[1]); + } + } + + // Extract schema types from current data + let schemaTypes = ''; + if (currentStructuredData) { + if (Array.isArray(currentStructuredData)) { + schemaTypes = currentStructuredData.map(item => item?.['@type']).filter(Boolean).join(', '); + } else if (currentStructuredData['@type']) { + schemaTypes = currentStructuredData['@type']; + } else if (currentStructuredData.jsonld) { + // Handle new scraper format + const jsonldData = Object.values(currentStructuredData.jsonld || {}).flat(); + schemaTypes = jsonldData.map(item => item?.['@type']).filter(Boolean).join(', '); + } + } + + const result = { + siteId: this.options.siteId, + siteName: this.site?.baseURL || 'Unknown', + opportunityId: suggestion.getOpportunityId(), + opportunityStatus: opportunityData.status || 'UNKNOWN', + suggestionId: suggestion.getId(), + suggestionType: suggestion.getType(), + suggestionStatus: suggestion.getStatus(), + suggestionRank: suggestion.getRank() || 0, + url: pageUrl, + errorId: originalErrors.length > 0 ? originalErrors[0].id || 'unknown' : 'unknown', + errorTitle: errorDescription, + totalJsonLdBlocks: Array.isArray(currentStructuredData) ? currentStructuredData.length : (currentStructuredData ? 1 : 0), + validJsonLdBlocks: Array.isArray(currentStructuredData) ? currentStructuredData.filter(item => item && typeof item === 'object').length : (currentStructuredData && typeof currentStructuredData === 'object' ? 1 : 0), + schemaTypes: schemaTypes, + currentJsonLdContent: currentStructuredData ? JSON.stringify(currentStructuredData).substring(0, 1000) : 'No structured data found', + completenessScore: confidenceScore, + aiSuggestionFix: suggestedFix.substring(0, 500), + bestSimilarity: 0, // Not used for structured data + hasValidSchema: !!currentStructuredData, + aiSuggestionImplemented: comparison.aiSuggestionImplemented, + isFixed: comparison.isFixed, + fixType: comparison.fixType, + opportunityCreated: opportunityData.createdAt || '', + opportunityUpdated: opportunityData.updatedAt || '', + suggestionCreated: suggestion.getCreatedAt() || '', + suggestionUpdated: suggestion.getUpdatedAt() || '', + updatedBy: 'system', + testDate: new Date().toISOString(), + suggestion: suggestion // Store suggestion reference for fix entity creation + }; + + results.push(result); + processed++; + + // Log progress + if (comparison.isFixed) { + if (comparison.aiSuggestionImplemented) { + this.log.info(`✅ AI_SUGGESTION_IMPLEMENTED: ${pageUrl}`); + } else { + this.log.info(`✅ FIXED_BY_OTHER_MEANS: ${pageUrl}`); + } + } else { + this.log.debug(`❌ ${comparison.fixType}: ${pageUrl}`); + } + + if (this.options.limit && processed >= this.options.limit) { + this.log.info(`Reached limit of ${this.options.limit} suggestions`); + break; + } + + } catch (error) { + this.logError(`Processing suggestion ${suggestion.getId()}`, error, suggestion.getId()); + + // Create an error result instead of skipping + const opportunityData = this.opportunityDataMap[suggestion.getOpportunityId()] || {}; + results.push({ + siteId: this.options.siteId, + siteName: this.site?.baseURL || 'Unknown', + opportunityId: suggestion.getOpportunityId(), + opportunityStatus: opportunityData.status || 'UNKNOWN', + suggestionId: suggestion.getId(), + suggestionType: suggestion.getType(), + suggestionStatus: suggestion.getStatus(), + suggestionRank: suggestion.getRank() || 0, + url: 'ERROR: Processing failed', + errorId: 'processing-error', + errorTitle: `Processing error: ${error.message}`, + totalJsonLdBlocks: 0, + validJsonLdBlocks: 0, + schemaTypes: '', + completenessScore: 0, + aiSuggestionFix: `ERROR: ${error.message}`, + bestSimilarity: 0, + hasValidSchema: false, + aiSuggestionImplemented: false, + isFixed: false, + fixType: 'ERROR_PROCESSING', + opportunityCreated: opportunityData.createdAt || '', + opportunityUpdated: opportunityData.updatedAt || '', + suggestionCreated: suggestion.getCreatedAt?.() || suggestion.createdAt || '', + suggestionUpdated: suggestion.getUpdatedAt?.() || suggestion.updatedAt || '', + updatedBy: suggestion.getUpdatedBy?.() || suggestion.updatedBy || '', + testDate: new Date().toISOString() + }); + } + } + + return results; + } + + generateCSV(results) { + const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, -5); + + const filename = writeStructuredDataCSV(results, this.options.siteId, this.site?.name || 'Unknown'); + + if (this.errors.length > 0) { + writeErrorCSV(this.errors, 'structured-data', this.options.siteId); + } + + return filename; + } + + printSummary(results) { + const summary = results.reduce((acc, result) => { + acc.total++; + if (result.isFixed) acc.fixed++; + if (result.aiSuggestionImplemented) acc.aiImplemented++; + acc.fixTypes[result.fixType] = (acc.fixTypes[result.fixType] || 0) + 1; + return acc; + }, { total: 0, fixed: 0, aiImplemented: 0, fixTypes: {} }); + + this.log.info('\n=== STRUCTURED DATA FIX SUMMARY ==='); + this.log.info(`📊 Total: ${summary.total}, ✅ Fixed: ${summary.fixed}, 🤖 AI: ${summary.aiImplemented}`); + this.log.info('Fix Types:', summary.fixTypes); + } + + async markFixedSuggestions() { + const fixedResults = this.results.filter(r => r.isFixed); + + if (fixedResults.length === 0) { + this.log.info('No suggestions to mark as fixed'); + return; + } + + this.log.info(`Creating fix entities for ${fixedResults.length} fixed suggestions`); + + for (const result of fixedResults) { + if (this.options.dryRun) { + this.log.info(`Would create fix entity for ${result.suggestionId} (dry run)`); + } else { + try { + // await createFixEntityForSuggestion(this.dataAccess, result.suggestion, { logger: this.log }); + } catch (error) { + this.log.error(`Failed to create fix entity for ${result.suggestionId}: ${error.message}`); + } + } + } + } + + async run() { + try { + this.log.info('=== STRUCTURED DATA FIX CHECKER ==='); + this.log.info(`Site ID: ${this.options.siteId}`); + + await this.initializeDataAccess(); + const suggestions = await this.getExistingSuggestions(this.options.siteId); + + if (suggestions.length === 0) { + this.log.info('No structured data suggestions found'); + return; + } + + this.log.info(`Checking ${suggestions.length} suggestions using handler validation logic...`); + const results = await this.checkSuggestionsFixes(suggestions); + this.results = results; // Store results for markFixedSuggestions + const filename = this.generateCSV(results); + this.printSummary(results); + + if (this.options.markFixed && !this.options.dryRun) { + await this.markFixedSuggestions(); + } + + this.log.info(`🎉 Complete! Results: ${filename}`); + + } catch (error) { + this.logError('Main execution', error); + this.log.error('❌ Failed:', error.message); + if (this.options.verbose) { + console.error(error.stack); + } + process.exit(1); + } + } +} + +// CLI Configuration +program + .name('check-structured-data-fixed') + .description('Check if structured data suggestions have been fixed') + .requiredOption('--siteId ', 'Site ID to check') + .option('--verbose', 'Enable verbose logging', false) + .option('--limit ', 'Limit number of suggestions to process', parseInt) + .option('--markFixed', 'Mark fixed suggestions in database', false) + .option('--dryRun', 'Dry run mode (default: true)', true); + +program.parse(); +const options = program.opts(); + +// Validate required options +if (!options.siteId) { + console.error('❌ Error: --siteId is required'); + process.exit(1); +} + +const checker = new StructuredDataFixChecker(options); +checker.run(); \ No newline at end of file diff --git a/scripts/populate-env.sh b/scripts/populate-env.sh index b27110d18..6d9af6f9b 100755 --- a/scripts/populate-env.sh +++ b/scripts/populate-env.sh @@ -1,6 +1,12 @@ #!/bin/bash # Define the secret name and region +unset AWS_ACCESS_KEY_ID +unset AWS_SECRET_ACCESS_KEY +unset AWS_SESSION_TOKEN + +aws sts get-caller-identity + SECRET_NAME="/helix-deploy/spacecat-services/audit-worker/latest" REGION="us-east-1"