Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## 3.3.1

- Improve parsing to correctly handle spaces in css style #222

## 3.3.0

- Rewriting of parsing to use `Span<char>` instead of Regex for +25% performance gain 🚀
Expand Down
1 change: 1 addition & 0 deletions Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
<Nullable>enable</Nullable>
<LangVersion>latest</LangVersion>
<ImplicitUsings>enable</ImplicitUsings>
<Version>3.3.1</Version>
</PropertyGroup>

<PropertyGroup>
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

HtmlToOpenXml is a small .Net library that convert simple or advanced HTML to plain OpenXml components. This program has started in 2009, initially to convert user's comments into Word.

This library supports both **.Net Framework 4.6.2**, **.NET Standard 2.0** and **.NET 8** which are all LTS.
This library supports both **.Net Framework 4.6.2**, **.NET Standard 2.0**, **.NET 8** **.NET 10** which are all LTS.

Depends on [DocumentFormat.OpenXml](https://www.nuget.org/packages/DocumentFormat.OpenXml/) and [AngleSharp](https://www.nuget.org/packages/AngleSharp).

Expand Down
12 changes: 0 additions & 12 deletions src/Html2OpenXml/HtmlToOpenXml.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,10 @@
<AssemblyName>HtmlToOpenXml</AssemblyName>
<RootNamespace>HtmlToOpenXml</RootNamespace>
<PackageId>HtmlToOpenXml.dll</PackageId>
<Version>3.3.0</Version>
<PackageIcon>icon.png</PackageIcon>
<PackageReleaseNotes>See changelog https://github.com/onizet/html2openxml/blob/master/CHANGELOG.md</PackageReleaseNotes>
<PackageReadmeFile>README.md</PackageReadmeFile>
<PackageTags>office openxml netcore html</PackageTags>
<AssemblyVersion>3.3.0</AssemblyVersion>
<PackageLicenseExpression>MIT</PackageLicenseExpression>
<PackageProjectUrl>https://github.com/onizet/html2openxml</PackageProjectUrl>
<RepositoryUrl>https://github.com/onizet/html2openxml</RepositoryUrl>
Expand Down Expand Up @@ -61,15 +59,5 @@
<PropertyGroup Condition="'$(GITHUB_ACTIONS)' == 'true'">
<ContinuousIntegrationBuild>true</ContinuousIntegrationBuild>
</PropertyGroup>

<!-- replace nuget release notes from changelog -->
<Target Name="PreparePackageReleaseNotesFromFile" BeforeTargets="GenerateNuspec">
<ReadLinesFromFile File="../../CHANGELOG.md">
<Output TaskParameter="Lines" ItemName="ReleaseNoteLines" />
</ReadLinesFromFile>
<PropertyGroup>
<PackageReleaseNotes>@(ReleaseNoteLines, '%0a')</PackageReleaseNotes>
</PropertyGroup>
</Target>

</Project>
23 changes: 12 additions & 11 deletions src/Html2OpenXml/Utilities/Converter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ static class Converter
{
Span<char> loweredValue = span.Length <= 128 ? stackalloc char[span.Length] : new char[span.Length];
span.ToLowerInvariant(loweredValue);
return loweredValue switch
return loweredValue.Trim() switch
{
"left" => JustificationValues.Left,
"left" => JustificationValues.Left,
"right" => JustificationValues.Right,
"center" => JustificationValues.Center,
"justify" => JustificationValues.Both,
Expand All @@ -43,7 +43,7 @@ static class Converter
{
Span<char> loweredValue = span.Length <= 128 ? stackalloc char[span.Length] : new char[span.Length];
span.ToLowerInvariant(loweredValue);
return loweredValue switch
return loweredValue.Trim() switch
{
"top" => TableVerticalAlignmentValues.Top,
"middle" => TableVerticalAlignmentValues.Center,
Expand All @@ -61,7 +61,7 @@ public static Unit ToFontSize(ReadOnlySpan<char> span)

Span<char> loweredValue = span.Length <= 128 ? stackalloc char[span.Length] : new char[span.Length];
span.ToLowerInvariant(loweredValue);
var unit = loweredValue switch
var unit = loweredValue.Trim() switch
{
"1" or "xx-small" => new Unit(UnitMetric.Point, 10),
"2" or "x-small" => new Unit(UnitMetric.Point, 15),
Expand Down Expand Up @@ -92,7 +92,7 @@ public static Unit ToFontSize(ReadOnlySpan<char> span)

Span<char> loweredValue = span.Length <= 128 ? stackalloc char[span.Length] : new char[span.Length];
span.ToLowerInvariant(loweredValue);
return loweredValue switch
return loweredValue.Trim() switch
{
"small-caps" => FontVariant.SmallCaps,
"normal" => FontVariant.Normal,
Expand All @@ -106,7 +106,7 @@ public static Unit ToFontSize(ReadOnlySpan<char> span)

Span<char> loweredValue = span.Length <= 128 ? stackalloc char[span.Length] : new char[span.Length];
span.ToLowerInvariant(loweredValue);
return loweredValue switch
return loweredValue.Trim() switch
{
"italic" or "oblique" => FontStyle.Italic,
"normal" => FontStyle.Normal,
Expand All @@ -120,7 +120,7 @@ public static Unit ToFontSize(ReadOnlySpan<char> span)

Span<char> loweredValue = span.Length <= 128 ? stackalloc char[span.Length] : new char[span.Length];
span.ToLowerInvariant(loweredValue);
return loweredValue switch
return loweredValue.Trim() switch
{
"700" or "bold" => FontWeight.Bold,
"bolder" => FontWeight.Bolder,
Expand All @@ -135,8 +135,9 @@ public static Unit ToFontSize(ReadOnlySpan<char> span)

// return the first font name
Span<Range> tokens = stackalloc Range[1];
return span.SplitCompositeAttribute(tokens, ',') switch {
1 => span.Slice(tokens[0]).ToString(),
return span.SplitCompositeAttribute(tokens, ',') switch
{
1 => span.Slice(tokens[0]).Trim().ToString(),
_ => null
};
}
Expand All @@ -148,7 +149,7 @@ public static BorderValues ToBorderStyle(ReadOnlySpan<char> span)

Span<char> loweredValue = span.Length <= 128 ? stackalloc char[span.Length] : new char[span.Length];
span.ToLowerInvariant(loweredValue);
return loweredValue switch
return loweredValue.Trim() switch
{
"dotted" => BorderValues.Dotted,
"dashed" => BorderValues.Dashed,
Expand Down Expand Up @@ -177,7 +178,7 @@ public static ICollection<TextDecoration> ToTextDecoration(ReadOnlySpan<char> va
var tokenCount = span.Split(tokens, ' ', StringSplitOptions.RemoveEmptyEntries);
for (int i = 0; i < tokenCount; i++)
{
switch (span.Slice(tokens[i]))
switch (span.Slice(tokens[i]).Trim())
{
case "underline": decorations.Add(TextDecoration.Underline); break;
case "line-through": decorations.Add(TextDecoration.LineThrough); break;
Expand Down
16 changes: 8 additions & 8 deletions test/HtmlToOpenXml.Tests/ElementTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,13 @@ public void PhrasingTag_ReturnsRunWithDefaultStyle<T> (string html) where T : Op
public void MultipleStyle_ShouldBeAllApplied ()
{
var elements = converter.Parse(@"<b style=""
font-style:italic;
font-size:12px;
font-family:Verdana;
font-variant:small-caps;
color:white;
text-decoration:wavy line-through double;
background:red;
font-style: italic;
font-size: 12px;
font-family: Verdana;
font-variant: small-caps;
color: white;
text-decoration: wavy line-through double;
background: red;
"">bold with italic style</b>");
Assert.That(elements, Has.Count.EqualTo(1));

Expand All @@ -67,7 +67,7 @@ public void MultipleStyle_ShouldBeAllApplied ()
}
}

[TestCase("<span style='font-style:normal'><span style='font-style:italic'>Italic!</span></span>")]
[TestCase("<span style='font-style: normal'><span style='font-style: italic'>Italic!</span></span>")]
[TestCase("<div style='font-style:italic'><span style='font-style:normal'><span style='font-style:italic'>Italic!</span></span></div>")]
[TestCase("<div id='outer' style='font-style:italic'><div id='inner'>Italic</div></div>")]
public void NestedTagWithStyle_ShouldCascadeParentStyle (string html)
Expand Down