Skip to content

Commit 8e85aa5

Browse files
committed
Add in a first stab at the parsing for the html
1 parent bb25830 commit 8e85aa5

9 files changed

+85
-17
lines changed

.DS_Store

0 Bytes
Binary file not shown.
+2-2
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
<Project Sdk="Microsoft.NET.Sdk">
22

33
<PropertyGroup>
4-
<TargetFramework>netstandard2.1</TargetFramework>
4+
<TargetFramework>net8.0</TargetFramework>
55
<Nullable>enable</Nullable>
66
</PropertyGroup>
77

88
<ItemGroup>
9-
<PackageReference Include="PdfPig" Version="0.1.8" />
9+
<PackageReference Include="HtmlAgilityPack" Version="1.11.54" />
1010
</ItemGroup>
1111

1212
</Project>

AdobeDocsParser/Parser.cs

+20-13
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,33 @@
22
using System.Collections.Generic;
33
using System.IO;
44
using System.Text;
5-
using UglyToad.PdfPig.Content;
6-
using UglyToad.PdfPig;
75
using System.Linq;
6+
using HtmlAgilityPack;
87

98
namespace AdobeDocsParser;
109

11-
public class Parser(string _pdfFilePath)
10+
public class Parser
1211
{
13-
public void Test()
12+
public bool TryParseHeader(HtmlNode node, out HeaderNode header)
1413
{
15-
using var document = PdfDocument.Open(_pdfFilePath);
16-
17-
foreach (Page page in document.GetPages())
14+
if (node.Name == "p")
1815
{
19-
var letters = page.Letters;
20-
var example = string.Join(string.Empty, letters.Select(x => x.Value));
21-
22-
var words = page.GetWords();
23-
24-
var images = page.GetImages();
16+
if (node.FirstChild?.Name == "strong")
17+
{
18+
header = new(node.FirstChild.InnerText);
19+
return true;
20+
}
2521
}
22+
23+
header = new(string.Empty);
24+
return false;
2625
}
2726
}
27+
28+
public record class HeaderNode(string Header);
29+
30+
public static class AgilityPackExtensions
31+
{
32+
public static IEnumerable<HtmlNode> GetDirectDescendents(this HtmlNode node)
33+
=> node.SelectNodes("*");
34+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<TargetFramework>net8.0</TargetFramework>
5+
<ImplicitUsings>enable</ImplicitUsings>
6+
<Nullable>enable</Nullable>
7+
8+
<IsPackable>false</IsPackable>
9+
<IsTestProject>true</IsTestProject>
10+
</PropertyGroup>
11+
12+
<ItemGroup>
13+
<PackageReference Include="HtmlAgilityPack" Version="1.11.54" />
14+
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.6.0" />
15+
<PackageReference Include="NUnit" Version="3.13.3" />
16+
<PackageReference Include="NUnit3TestAdapter" Version="4.2.1" />
17+
<PackageReference Include="NUnit.Analyzers" Version="3.6.1" />
18+
<PackageReference Include="coverlet.collector" Version="6.0.0" />
19+
</ItemGroup>
20+
21+
<ItemGroup>
22+
<ProjectReference Include="..\AdobeDocsParser\AdobeDocsParser.csproj" />
23+
</ItemGroup>
24+
25+
</Project>

AdobeDocsRunner.Tests/GlobalUsings.cs

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
global using NUnit.Framework;

AdobeDocsRunner.Tests/ParserTests.cs

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
using AdobeDocsParser;
2+
using HtmlAgilityPack;
3+
4+
namespace AdobeDocsRunner.Tests
5+
{
6+
public class ParserTests
7+
{
8+
[Test]
9+
public void Parses_Header_Node()
10+
{
11+
var headerNode = CreateHeaderNode("Test");
12+
var parser = CreateParser();
13+
14+
var succeeded = parser.TryParseHeader(headerNode, out var result);
15+
16+
Assert.That(succeeded, Is.True);
17+
Assert.That(result.Header, Is.EqualTo("Test"));
18+
}
19+
20+
private HtmlNode CreateHeaderNode(string header)
21+
{
22+
var htmlText = $"<p><strong>{header}</strong></p>";
23+
return HtmlNode.CreateNode(htmlText);
24+
}
25+
26+
private Parser CreateParser()
27+
=> new Parser();
28+
}
29+
}

AdobeDocsRunner/Program.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22

33
using AdobeDocsParser;
44

5-
var parser = new Parser(@"\\Mac\Home\Desktop\ae-scripting-docsforadobe-dev-en-latest.pdf");
5+
var parser = new Parser(@"https://ae-scripting.docsforadobe.dev/general/application.html");
66
parser.Test();

AdobeScriptMaker.Core.sln

+6
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AdobeDocsParser", "AdobeDoc
5151
EndProject
5252
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AdobeDocsRunner", "AdobeDocsRunner\AdobeDocsRunner.csproj", "{2AD10B1C-A933-45C8-B890-8E4BFAE33A7F}"
5353
EndProject
54+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AdobeDocsRunner.Tests", "AdobeDocsRunner.Tests\AdobeDocsRunner.Tests.csproj", "{F37DC8AB-FDD8-4F10-BD31-C89F08AC6111}"
55+
EndProject
5456
Global
5557
GlobalSection(SolutionConfigurationPlatforms) = preSolution
5658
Debug|Any CPU = Debug|Any CPU
@@ -149,6 +151,10 @@ Global
149151
{2AD10B1C-A933-45C8-B890-8E4BFAE33A7F}.Debug|Any CPU.Build.0 = Debug|Any CPU
150152
{2AD10B1C-A933-45C8-B890-8E4BFAE33A7F}.Release|Any CPU.ActiveCfg = Release|Any CPU
151153
{2AD10B1C-A933-45C8-B890-8E4BFAE33A7F}.Release|Any CPU.Build.0 = Release|Any CPU
154+
{F37DC8AB-FDD8-4F10-BD31-C89F08AC6111}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
155+
{F37DC8AB-FDD8-4F10-BD31-C89F08AC6111}.Debug|Any CPU.Build.0 = Debug|Any CPU
156+
{F37DC8AB-FDD8-4F10-BD31-C89F08AC6111}.Release|Any CPU.ActiveCfg = Release|Any CPU
157+
{F37DC8AB-FDD8-4F10-BD31-C89F08AC6111}.Release|Any CPU.Build.0 = Release|Any CPU
152158
EndGlobalSection
153159
GlobalSection(SolutionProperties) = preSolution
154160
HideSolutionNode = FALSE

Directory.Build.props

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<Project>
22
<PropertyGroup>
3-
<LangVersion>Latest</LangVersion>
3+
<LangVersion>12.0</LangVersion>
44
</PropertyGroup>
55
</Project>

0 commit comments

Comments
 (0)