diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 218511f..30ca994 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -12,6 +12,10 @@ jobs:
matrix:
php: ['8.1', '8.2', '8.3', '8.4']
stability: [prefer-lowest, prefer-stable]
+ include:
+ - php: '8.5'
+ flags: "--ignore-platform-req=php"
+ stability: prefer-stable
steps:
- name: Checkout code
uses: actions/checkout@v4
@@ -45,11 +49,16 @@ jobs:
- name: Run Unit tests with coverage
run: composer phpunit -- ${{ matrix.phpunit-flags }}
+ if: ${{ matrix.php != '8.5' }}
+
+ - name: Run Unit tests without coverage
+ run: vendor/bin/phpunit --no-coverage
+ if: ${{ matrix.php == '8.5' }}
- name: Run static analysis
run: composer phpstan
- if: ${{ matrix.php == '8.3' && matrix.stability == 'prefer-stable'}}
+ if: ${{ matrix.php == '8.4' && matrix.stability == 'prefer-stable'}}
- name: Run Coding style rules
run: composer phpcs:fix
- if: ${{ matrix.php == '8.3' && matrix.stability == 'prefer-stable'}}
+ if: ${{ matrix.php == '8.4' && matrix.stability == 'prefer-stable'}}
diff --git a/.php-cs-fixer.php b/.php-cs-fixer.php
index 79865d7..28ec97a 100644
--- a/.php-cs-fixer.php
+++ b/.php-cs-fixer.php
@@ -7,6 +7,7 @@
$config = new PhpCsFixer\Config();
return $config
+ ->setUnsupportedPhpVersionAllowed(true)
->setRules([
'@PSR12' => true,
'array_syntax' => ['syntax' => 'short'],
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c72e73d..9bc34e5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,12 +2,23 @@
All Notable changes to `bakame/html-table` will be documented in this file.
+# [Next](https://github.com/bakame-php/html-table/compare/0.5.0...main) - TBD
+
+## What's Changed
+
+* **BC BREAK:** the `Table` class now implements the `TabularDataProvider` instead of the `TabularDataReader` interface.
+* **BC BREAK:** the `ParserError` class now extends the `Exception` instead of the `InvalidArgumentException` exception class.
+* **BC BREAK:** the `Parser::withoutFormatter` is deprecated; use `Parser::withFormatter(null)` instead.
+* The `Parser::new` static constructor is deprecated use the default constructor instead.
+* Boolean parameters are now replaced by `Enum` for maintenance and readability.
+* Adding `SplFileInfo` to `parseHTML` and `parseFile`
+
# [0.5.0](https://github.com/bakame-php/html-table/compare/0.4.0...0.5.0) - 2025-07-06
## What's Changed
* Upgrade dependencies on `aide-error` to version `0.2.0`
-* fix use statement by @tacman in https://github.com/bakame-php/html-table/pull/6
+* fix use statements by @tacman in https://github.com/bakame-php/html-table/pull/6
## New Contributors
diff --git a/README.md b/README.md
index 22f3583..aeaf4d3 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
[](https://github.com/sponsors/nyamsprod)
`bakame/html-table` is a small PHP package that allows you to parse, import and manipulate
-tabular data represented as HTML Table. Once installed you will be able to do the following:
+tabular data represented as HTML Table. Once installed, you will be able to do the following:
```php
use Bakame\TabularData\HtmlTable\Parser;
@@ -18,6 +18,7 @@ $table = Parser::new()
->parseFile('https://www.bbc.com/sport/football/tables');
$table
+ ->getTabularData()
->filter(fn (array $row) => (int) $row['points'] >= 10)
->sorted(fn (array $rowA, array $rowB) => (int) $rowB['for'] <=> (int) $rowA['for'])
->fetchPairs('team', 'for');
@@ -35,7 +36,7 @@ $table
## System Requirements
-**league\csv 9.23.0** library is required. (since version 0.4.0).
+**league\csv 9.25.0** library is required. (since version 0.6.0).
## Installation
@@ -62,7 +63,6 @@ use Bakame\HtmlTable\Parser;
$parser = Parser::new()
->ignoreTableHeader()
->ignoreXmlErrors()
- ->withoutFormatter()
->tableCaption('This is a beautiful table');
```
@@ -74,7 +74,7 @@ If parsing is not possible a `ParseError` exception will be thrown.
```php
use Bakame\HtmlTable\Parser;
-$parser = Parser::new();
+$parser = new Parser();
$table = $parser->parseHtml('
');
$table = $parser->parseFile('path/to/html/file.html');
@@ -129,24 +129,25 @@ $html = <<
HTML;
-$table = Parser::new()->parseHtml($html);
+$table = (new Parser())->parseHtml($html);
$table->getCaption(); //returns 'Songs'
$table->getHeader(); //returns ['Title','Singer', 'Country']
-$table->nth(2); //returns ["Title" => "Nzinzi", "Singer" => "Emeneya", "Country" => "DR Congo"]
-json_encode($table->slice(0, 1));
-//{"caption":"Songs","header":["Title","Singer","Country"],"rows":[{"Title":"Nakei Nairobi","Singer":"Mbilia Bel","Country":"DR Congo"}]}
+$tableData = $table->geTabularData();
+$tableData->nth(2); //returns ["Title" => "Nzinzi", "Singer" => "Emeneya", "Country" => "DR Congo"]
+json_encode($tableData->slice(0, 1));
+//[{"Title":"Nakei Nairobi","Singer":"Mbilia Bel","Country":"DR Congo"}]
```
#### Default configuration
-By default, when calling the `Parser::new()` named constructor the parser will:
+By default, when calling the `new Parser()` the parser will:
- try to parse the first table found in the page
- expect the table header row to be the first `tr` found in the `thead` section of your table
- exclude the table `thead` section when extracting the table content.
- ignore XML errors.
- have no formatter attached.
-- have no default caption to used if none is present in the table.
+- have no default caption to be used if none is present in the table.
Each of the following settings can be changed to improve the conversion against your business rules:
@@ -163,9 +164,9 @@ If the expression is valid, and a list of table is found, the first result will
```php
use Bakame\HtmlTable\Parser;
-$parser = Parser::new()->tablePosition('table-id'); // parses the
-$parser = Parser::new()->tablePosition(3); // parses the 4th table of the page
-$parser = Parser::new()->tableXPathPosition("//main/div/table");
+$parser = (new Parser())->tablePosition('table-id'); // parses the
+$parser = (new Parser())->tablePosition(3); // parses the 4th table of the page
+$parser = (new Parser())->tableXPathPosition("//main/div/table");
//parse the first table that matches the xpath expression
```
@@ -179,8 +180,8 @@ You can optionally define a caption for your table if none is present or found d
```php
use Bakame\HtmlTable\Parser;
-$parser = Parser::new()->tableCaption('this is a generated caption');
-$parser = Parser::new()->tableCaption(null); // remove any default caption set
+$parser = (new Parser())->tableCaption('this is a generated caption');
+$parser = (new Parser())->tableCaption(null); // remove any default caption set
```
### tableHeader, tableHeaderPosition, ignoreTableHeader and resolveTableHeader
@@ -197,7 +198,7 @@ Tells where to locate and resolve the table header
use Bakame\HtmlTable\Parser;
use Bakame\HtmlTable\Section;
-$parser = Parser::new()->tableHeaderPosition(Section::Thead, 3);
+$parser = (new Parser())->tableHeaderPosition(Section::Thead, 3);
// header is the 4th row in the table section
```
@@ -227,25 +228,25 @@ If no resolution is done, no header will be included in the returned `Table` ins
```php
use Bakame\HtmlTable\Parser;
-$parser = Parser::new()->ignoreTableHeader(); // no table header will be resolved
-$parser = Parser::new()->resolveTableHeader(); // will attempt to resolve the table header
+$parser = (new Parser())->ignoreTableHeader(); // no table header will be resolved
+$parser = (new Parser())->resolveTableHeader(); // will attempt to resolve the table header
```
#### tableHeader
-You can specify directly the header of your table and override any other table header
+You can directly specify the header of your table and override any other table header
related configuration with this configuration
```php
use Bakame\HtmlTable\Parser;
use Bakame\HtmlTable\Section;
-$parser = Parser::new()->tableHeader(['rank', 'team', 'winner']);
+$parser = (new Parser())->tableHeader(['rank', 'team', 'winner']);
```
**If you specify a non-empty array as the table header, it will take precedence over any other table header related options.**
-**Because it is a tabular data each cell MUST be unique otherwise an exception will be thrown**
+**Because it is tabular data, each cell MUST be unique otherwise an exception will be thrown**
You can skip or re-arrange the source columns by skipping them by their offsets and/or by
re-ordering the offsets.
@@ -254,8 +255,8 @@ re-ordering the offsets.
use Bakame\HtmlTable\Parser;
use Bakame\HtmlTable\Section;
-$parser = Parser::new()->tableHeader([3 => 'rank', 7 => 'winner', 5 => 'team']);
-// only 3 column will be extracted the 4th, 6th and 8th columns
+$parser = (new Parser())->tableHeader([3 => 'rank', 7 => 'winner', 5 => 'team']);
+// only 3 columns will be extracted the 4th, 6th and 8th columns
// and re-arrange as 'rank' first and 'team' last
// if a column is missing its value will be PHP `null` type
```
@@ -268,34 +269,34 @@ Tells which section should be parsed based on the `Section` enum
use Bakame\HtmlTable\Parser;
use Bakame\HtmlTable\Section;
-$parser = Parser::new()->includeSection(Section::Tbody); // thead and tfoot are included during parsing
-$parser = Parser::new()->excludeSection(Section::Tr, Section::Tfoot); // table direct tr children and tfoot are not included during parsing
+$parser = (new Parser())->includeSection(Section::Tbody); // thead and tfoot are included during parsing
+$parser = (new Parser())->excludeSection(Section::Tr, Section::Tfoot); // table direct tr children and tfoot are not included during parsing
```
**By default, the `thead` section is not parse. If a `thead` row is selected to be the header, it will
be parsed independently of this setting.**
-**⚠️Tips:** to be sure of which sections will be modified, first remove all previous setting
+**⚠️Tips:** to be sure of which sections will be modified, first remove all previous settings
before applying your configuration as shown below:
```diff
-- Parser::new()->includeSection(Section::tbody);
-+ Parser::new()->excludeSection(...Section::cases())->includeSection(Section::tbody);
+- (new Parser())->includeSection(Section::tbody);
++ (new Parser())->excludeSection(...Section::cases())->includeSection(Section::tbody);
```
The first call will still include the `tfoot` and the `tr` sections, whereas the second call
-remove any previous setting guaranting that only the `tbody` if present will be parsed.
+removes any previous setting guaranting that only the `tbody` if present will be parsed.
### withFormatter and withoutFormatter
-Adds or remove a record formatter applied to the data extracted from the table before you
+Add or remove a record formatter applied to the data extracted from the table before you
can access it. The header is not affected by the formatter if it is defined.
```php
use Bakame\HtmlTable\Parser;
-$parser = Parser::new()->withFormatter($formatter); // attach a formatter to the parser
-$parser = Parser::new()->withoutFormatter(); // removed the attached formatter if it exists
+$parser = (new Parser())->withFormatter($formatter); // attach a formatter to the parser
+$parser = (new Parser())->withFormatter(null); // removed the attached formatter if it exists
```
The formatter closure signature should be:
@@ -304,7 +305,7 @@ The formatter closure signature should be:
function (array $record): array;
```
-If a header was defined or specified, the submitted record will have the header definition set,
+If a header was defined or specified, the submitted record will have the header definition set;
otherwise an array list is provided.
The following formatter will work on any table content as long as it is defined as a string.
@@ -333,8 +334,8 @@ Tells whether the parser should ignore or throw in case of malformed HTML conten
```php
use Bakame\HtmlTable\Parser;
-$parser = Parser::new()->ignoreXmlErrors(); // ignore the XML errors
-$parser = Parser::new()->failOnXmlErrors(3); // throw on XML errors
+$parser = (new Parser())->ignoreXmlErrors(); // ignore the XML errors
+$parser = (new Parser())->failOnXmlErrors(3); // throw on XML errors
```
## Testing
diff --git a/composer.json b/composer.json
index 4e57a13..525b469 100644
--- a/composer.json
+++ b/composer.json
@@ -30,9 +30,7 @@
"ext-libxml": "*",
"ext-mbstring": "*",
"ext-simplexml": "*",
- "bakame/aide-enums": "^0.1.0",
- "bakame/aide-error": "^0.2.0",
- "league/csv": "^9.23.0"
+ "league/csv": "^9.25.0"
},
"require-dev": {
"ext-curl": "*",
@@ -54,7 +52,7 @@
}
},
"scripts": {
- "phpcs": "PHP_CS_FIXER_IGNORE_ENV=1 php-cs-fixer fix -vvv --diff --dry-run --allow-risky=yes --ansi",
+ "phpcs": "php-cs-fixer fix -vvv --diff --dry-run --allow-risky=yes --ansi",
"phpcs:fix": "php-cs-fixer fix -vvv --allow-risky=yes --ansi",
"phpstan": "phpstan analyse -c phpstan.neon --ansi --memory-limit=192M",
"phpunit": "XDEBUG_MODE=coverage phpunit --coverage-text",
diff --git a/src/Feature.php b/src/Feature.php
new file mode 100644
index 0000000..e8c59b5
--- /dev/null
+++ b/src/Feature.php
@@ -0,0 +1,11 @@
+ $tableHeader
* @param array $includedSections
*/
- private function __construct(
- private readonly string $tableExpression,
- private readonly ?string $caption,
- private readonly array $tableHeader,
- private readonly bool $ignoreTableHeader,
- private readonly string $tableHeaderExpression,
- private readonly array $includedSections,
- private readonly ?Closure $formatter,
- private readonly bool $throwOnXmlErrors,
+ public function __construct(
+ private readonly string $tableExpression = '(//table)[1]',
+ private readonly ?string $caption = null,
+ private readonly array $tableHeader = [],
+ private readonly Feature $ignoreTableHeader = Feature::Disabled,
+ private readonly string $tableHeaderExpression = '(//table/thead/tr)[1]',
+ private readonly array $includedSections = [Section::Tbody, Section::Tfoot, Section::Tr],
+ private readonly ?Closure $formatter = null,
+ private readonly Feature $throwOnXmlErrors = Feature::Disabled,
) {
}
- public static function new(): self
- {
- return new self(
- '(//table)[1]',
- null,
- [],
- false,
- '(//table/thead/tr)[1]',
- [Section::Tbody, Section::Tfoot, Section::Tr],
- null,
- false,
- );
- }
-
public function tableXPathPosition(string $expression): self
{
- $query = (new DOMXPath(new DOMDocument()))->query(...);
- $domXPath = Cloak::warning($query);
+ if ($expression === $this->tableExpression) {
+ return $this;
+ }
- return match (true) {
- $expression === $this->tableExpression => $this,
- false === $domXPath($expression) => throw new ParserError(
+ try {
+ Warning::trap((new DOMXPath(new DOMDocument()))->query(...), $expression);
+ } catch (ErrorException $exception) {
+ throw new ParserError(
message: 'The xpath expression `'.$expression.'` is invalid.',
- previous: $domXPath->errors()->last()
- ),
- default => new self(
- $expression,
- $this->caption,
- $this->tableHeader,
- $this->ignoreTableHeader,
- $this->tableHeaderExpression,
- $this->includedSections,
- $this->formatter,
- $this->throwOnXmlErrors,
- ),
- };
+ previous: $exception
+ );
+ }
+
+ return new self(
+ $expression,
+ $this->caption,
+ $this->tableHeader,
+ $this->ignoreTableHeader,
+ $this->tableHeaderExpression,
+ $this->includedSections,
+ $this->formatter,
+ $this->throwOnXmlErrors,
+ );
}
/**
@@ -135,13 +132,13 @@ public function tableHeader(array $headerRow): self
public function ignoreTableHeader(): self
{
- return match ($this->ignoreTableHeader) {
+ return match (Feature::Enabled === $this->ignoreTableHeader) {
true => $this,
false => new self(
$this->tableExpression,
$this->caption,
$this->tableHeader,
- true,
+ Feature::Enabled,
$this->tableHeaderExpression,
$this->includedSections,
$this->formatter,
@@ -152,13 +149,13 @@ public function ignoreTableHeader(): self
public function resolveTableHeader(): self
{
- return match ($this->ignoreTableHeader) {
+ return match (Feature::Disabled === $this->ignoreTableHeader) {
false => $this,
true => new self(
$this->tableExpression,
$this->caption,
$this->tableHeader,
- false,
+ Feature::Disabled,
$this->tableHeaderExpression,
$this->includedSections,
$this->formatter,
@@ -258,7 +255,7 @@ public function excludeSection(Section ...$sections): self
public function failOnXmlErrors(): self
{
- return match ($this->throwOnXmlErrors) {
+ return match (Feature::Enabled === $this->throwOnXmlErrors) {
true => $this,
false => new self(
$this->tableExpression,
@@ -268,14 +265,14 @@ public function failOnXmlErrors(): self
$this->tableHeaderExpression,
$this->includedSections,
$this->formatter,
- true,
+ Feature::Enabled,
),
};
}
public function ignoreXmlErrors(): self
{
- return match ($this->throwOnXmlErrors) {
+ return match (Feature::Disabled === $this->throwOnXmlErrors) {
false => $this,
true => new self(
$this->tableExpression,
@@ -285,29 +282,15 @@ public function ignoreXmlErrors(): self
$this->tableHeaderExpression,
$this->includedSections,
$this->formatter,
- false,
+ Feature::Disabled,
),
};
}
- public function withFormatter(Closure $formatter): self
- {
- return new self(
- $this->tableExpression,
- $this->caption,
- $this->tableHeader,
- $this->ignoreTableHeader,
- $this->tableHeaderExpression,
- $this->includedSections,
- $formatter,
- $this->throwOnXmlErrors,
- );
- }
-
- public function withoutFormatter(): self
+ public function withFormatter(?Closure $formatter): self
{
- return match (null) {
- $this->formatter => $this,
+ return match (true) {
+ $formatter === $this->formatter => $this,
default => new self(
$this->tableExpression,
$this->caption,
@@ -315,7 +298,7 @@ public function withoutFormatter(): self
$this->ignoreTableHeader,
$this->tableHeaderExpression,
$this->includedSections,
- null,
+ $formatter,
$this->throwOnXmlErrors,
),
};
@@ -339,7 +322,7 @@ public function tableCaption(?string $caption): self
}
/**
- * @param resource|string $filenameOrStream
+ * @param SplFileInfo|resource|string $filenameOrStream
* @param resource|null $filenameContext
*
* @throws ParserError
@@ -349,20 +332,21 @@ public function tableCaption(?string $caption): self
*/
public function parseFile(mixed $filenameOrStream, $filenameContext = null): Table
{
+ if ($filenameOrStream instanceof SplFileInfo) {
+ return $this->parseHtml($filenameOrStream);
+ }
+
if (is_resource($filenameOrStream)) {
return $this->parseHtml($this->streamToString($filenameOrStream));
}
- $fopen = Cloak::warning(fopen(...));
- $resource = $fopen(...match ($filenameContext) {
- null => [$filenameOrStream, 'r'],
- default => [$filenameOrStream, 'r', false, $filenameContext],
- });
-
- if (!is_resource($resource)) {
+ try {
+ /** @var resource $resource */
+ $resource = Warning::trap(fopen(...), ...['filename' => $filenameOrStream, 'mode' => 'r', 'context' => $filenameContext]);
+ } catch (ErrorException $exception) {
throw new ParserError(
message: '`'.$filenameOrStream.'`: failed to open stream: No such file or directory.',
- previous: $fopen->errors()->last()
+ previous: $exception
);
}
@@ -375,27 +359,23 @@ public function parseFile(mixed $filenameOrStream, $filenameContext = null): Tab
/**
* @throws ParserError
* @throws SyntaxError
+ * @throws CannotInsertRecord
*
* @return Table>
*/
- public function parseHtml(DOMDocument|DOMElement|SimpleXMLElement|Stringable|string $source): Table
+ public function parseHtml(SplFileInfo|DOMDocument|DOMElement|SimpleXMLElement|Stringable|string $source): Table
{
/** @var DOMNodeList $query */
$query = (new DOMXPath($this->sourceToDomDocument($source)))->query($this->tableExpression);
$table = $query->item(0);
- if (!$table instanceof DOMElement) {
- throw new ParserError('The HTML table could not be found in the submitted html.');
- }
-
+ $table instanceof DOMElement || throw new ParserError('The HTML table could not be found in the submitted html.');
$tagName = strtolower($table->nodeName);
- if ('table' !== $tagName) {
- throw new ParserError('Expected a table element to be selected; received `'.$tagName.'` instead.');
- }
+ 'table' === $tagName || throw new ParserError('Expected a table element to be selected; received `'.$tagName.'` instead.');
$xpath = new DOMXPath($this->sourceToDomDocument($table));
$header = match (true) {
[] !== $this->tableHeader => $this->tableHeader,
- $this->ignoreTableHeader => [],
+ Feature::Enabled === $this->ignoreTableHeader => [],
default => $this->extractTableHeader($xpath),
};
@@ -421,20 +401,20 @@ public function parseHtml(DOMDocument|DOMElement|SimpleXMLElement|Stringable|str
*/
private function streamToString($stream): string
{
- $stream_get_contents = Cloak::warning(stream_get_contents(...));
- /** @var string|false $html */
- $html = $stream_get_contents($stream);
+ try {
+ /** @var string $result */
+ $result = Warning::trap(stream_get_contents(...), $stream);
- return match (false) {
- $html => throw new ParserError('The resource could not be read.', 0, $stream_get_contents->errors()->last()),
- default => $html,
- };
+ return $result;
+ } catch (ErrorException $exception) {
+ throw new ParserError(message: 'The resource could not be read.', previous: $exception);
+ }
}
/**
* @throws ParserError
*/
- private function sourceToDomDocument(DOMDocument|SimpleXMLElement|DOMElement|Stringable|string $document): DOMDocument
+ private function sourceToDomDocument(SplFileInfo|DOMDocument|SimpleXMLElement|DOMElement|Stringable|string $document): DOMDocument
{
if ($document instanceof DOMDocument) {
return $document;
@@ -453,13 +433,22 @@ private function sourceToDomDocument(DOMDocument|SimpleXMLElement|DOMElement|Str
return $dom;
}
+ $content = (string) $document;
+ if ($document instanceof SplFileInfo) {
+ $content = '';
+ $file = $document->openFile();
+ while (!$file->eof()) {
+ $content .= $file->fgets();
+ }
+ }
+
libxml_use_internal_errors(true);
- $dom->loadHTML((string) $document);
+ $dom->loadHTML($content, LIBXML_NOWARNING | LIBXML_NOERROR);
$errors = libxml_get_errors();
libxml_clear_errors();
return match (true) {
- $this->throwOnXmlErrors && [] !== $errors => throw ParserError::dueToLibXmlErrors($errors),
+ Feature::Enabled === $this->throwOnXmlErrors && [] !== $errors => throw ParserError::dueToLibXmlErrors($errors),
default => $dom,
};
}
@@ -547,7 +536,7 @@ private function extractHeaderRow(DOMElement $tr): array
$tr->setAttribute(self::HEADER_ROW_ATTRIBUTE_NAME, 'true');
}
- return $headerRow;
+ return array_map(fn (string|null $item): string => trim((string) $item, "\u{A0} \t\n\r\0\x0B"), $headerRow);
}
/**
@@ -631,4 +620,30 @@ private function combineArray(array $record, array $header): array
return $row;
}
+
+ /**
+ * DEPRECATION WARNING! This method will be removed in the next major point release.
+ * @deprecated since version 0.6.0
+ * @see self::withFormatter()
+ *
+ * @codeCoverageIgnore
+ */
+ #[Deprecated(message:'use Bakame\TabularData\HtmlTable\Parser::withFormatter() instead', since:'bakame/html-table:0.6.0')]
+ public function withoutFormatter(): self
+ {
+ return $this->withFormatter(null);
+ }
+
+ /**
+ * DEPRECATION WARNING! This method will be removed in the next major point release.
+ * @deprecated since version 0.6.0
+ * @see self::__construct()
+ *
+ * @codeCoverageIgnore
+ */
+ #[Deprecated(message:'use Bakame\TabularData\HtmlTable\Parser::__construct() instead', since:'bakame/html-table:0.6.0')]
+ public static function new(): self
+ {
+ return new self();
+ }
}
diff --git a/src/ParserError.php b/src/ParserError.php
index 95b98fb..400baed 100644
--- a/src/ParserError.php
+++ b/src/ParserError.php
@@ -4,7 +4,7 @@
namespace Bakame\TabularData\HtmlTable;
-use InvalidArgumentException;
+use Exception;
use LibXMLError;
use function array_count_values;
@@ -16,7 +16,7 @@
use const PHP_EOL;
-class ParserError extends InvalidArgumentException
+class ParserError extends Exception
{
/** @var array */
private array $duplicateColumnNames = [];
diff --git a/src/ParserErrorTest.php b/src/ParserErrorTest.php
index ce51ca0..3f2e9ee 100644
--- a/src/ParserErrorTest.php
+++ b/src/ParserErrorTest.php
@@ -4,9 +4,11 @@
namespace Bakame\TabularData\HtmlTable;
+use PHPUnit\Framework\Attributes\CoversClass;
use PHPUnit\Framework\Attributes\Test;
use PHPUnit\Framework\TestCase;
+#[CoversClass(ParserError::class)]
final class ParserErrorTest extends TestCase
{
#[Test]
diff --git a/src/ParserTest.php b/src/ParserTest.php
index 174733e..35e6226 100644
--- a/src/ParserTest.php
+++ b/src/ParserTest.php
@@ -7,11 +7,17 @@
use DOMDocument;
use DOMElement;
use League\Csv\TabularDataReader;
+use PHPUnit\Framework\Attributes\CoversClass;
use PHPUnit\Framework\Attributes\DataProvider;
use PHPUnit\Framework\Attributes\Test;
use PHPUnit\Framework\TestCase;
use SimpleXMLElement;
+#[CoversClass(Warning::class)]
+#[CoversClass(Parser::class)]
+#[CoversClass(Feature::class)]
+#[CoversClass(Section::class)]
+#[CoversClass(Table::class)]
final class ParserTest extends TestCase
{
private const HTML = <<tablePosition(0)
- ->tableHeaderPosition(Section::Thead, 0)
+ ->tableHeaderPosition(Section::Thead)
->includeSection(Section::Tbody, Section::Tfoot, Section::Tr)
->tableHeader([])
->resolveTableHeader()
->ignoreXmlErrors()
- ->withoutFormatter()
+ ->withFormatter(null)
->tableCaption(null)
);
}
@@ -63,7 +69,7 @@ public function it_will_throw_if_the_header_contains_duplicate_values(): void
$this->expectException(ParserError::class);
$this->expectExceptionMessage('The header record contains duplicate column names: `foo`, `toto`.');
- Parser::new()->tableHeader($headerRow);
+ (new Parser())->tableHeader($headerRow);
}
#[Test]
@@ -71,7 +77,7 @@ public function it_will_throw_if_the_header_does_not_only_contains_string(): voi
{
$this->expectException(ParserError::class);
- Parser::new()->tableHeader(['foo', 1]); /* @phpstan-ignore-line */
+ (new Parser())->tableHeader(['foo', 1]); /* @phpstan-ignore-line */
}
#[Test]
@@ -80,7 +86,7 @@ public function it_will_throw_if_the_identifier_is_invalid(string|int $identifie
{
$this->expectException(ParserError::class);
- Parser::new()->tablePosition($identifier);
+ (new Parser())->tablePosition($identifier);
}
/**
@@ -102,7 +108,7 @@ public function it_will_throw_if_the_identifier_is_a_negative_integer(): void
{
$this->expectException(ParserError::class);
- Parser::new()->tablePosition(-1);
+ (new Parser())->tablePosition(-1);
}
#[Test]
@@ -110,7 +116,7 @@ public function it_will_throw_if_the_table_header_row_offset_is_negative(): void
{
$this->expectException(ParserError::class);
- Parser::new()->tableHeaderPosition(Section::Thead, -1); /* @phpstan-ignore-line */
+ (new Parser())->tableHeaderPosition(Section::Thead, -1); /* @phpstan-ignore-line */
}
#[Test]
@@ -118,7 +124,7 @@ public function it_will_throw_if_the_xpath_expression_is_invalid(): void
{
$this->expectException(ParserError::class);
- Parser::new()->tableXPathPosition('//table@@invalid');
+ (new Parser())->tableXPathPosition('//table@@invalid');
}
#[Test]
@@ -129,14 +135,14 @@ public function it_will_fail_to_load_any_element_other_than_a_table(): void
HTML;
$this->expectException(ParserError::class);
$this->expectExceptionMessage('Expected a table element to be selected; received `p` instead.');
- Parser::new()->tableXPathPosition('//p')->parseHtml($html);
+ (new Parser())->tableXPathPosition('//p')->parseHtml($html);
}
#[Test]
public function it_can_load_the_first_html_table_found_by_default(): void
{
- $table = Parser::new()->parseHtml(self::HTML);
+ $table = (new Parser())->parseHtml(self::HTML);
$header = ['prenoms', 'nombre', 'sexe', 'annee'];
$row = [
'prenoms' => 'Abdoulaye',
@@ -147,16 +153,16 @@ public function it_can_load_the_first_html_table_found_by_default(): void
self::assertSame(['prenoms', 'nombre', 'sexe', 'annee'], $table->getHeader());
self::assertCount(4, $table);
- self::assertSame($row, $table->first());
+ self::assertSame($row, $table->getTabularData()->first());
- $sliced = $table->slice(0, 1);
- self::assertSame(['caption' => null, 'header' => $header, 'rows' => [$row]], $sliced->jsonSerialize());
+ $sliced = $table->getTabularData()->slice(0, 1);
+ self::assertSame([$row], iterator_to_array($sliced));
}
#[Test]
public function it_can_load_the_first_html_table_found_by_default_without_the_header(): void
{
- $table = Parser::new()->ignoreTableHeader()->parseHtml(self::HTML);
+ $table = (new Parser())->ignoreTableHeader()->parseHtml(self::HTML);
self::assertSame([], $table->getHeader());
self::assertCount(4, $table);
@@ -165,13 +171,13 @@ public function it_can_load_the_first_html_table_found_by_default_without_the_he
'15',
'M',
'2004',
- ], $table->first());
+ ], $table->getTabularData()->first());
}
#[Test]
public function it_can_load_any_html_table_by_occurrence(): void
{
- $table = Parser::new()
+ $table = (new Parser())
->tablePosition(1)
->parseFile(dirname(__DIR__).'/test_files/table.html');
@@ -182,7 +188,7 @@ public function it_can_load_any_html_table_by_occurrence(): void
#[Test]
public function it_can_load_any_html_table_by_attribute_id(): void
{
- $table = Parser::new()
+ $table = (new Parser())
->tablePosition('testb')
->parseFile(dirname(__DIR__).'/test_files/table.html');
@@ -195,7 +201,7 @@ public function it_uses_the_table_first_tr_to_search_for_the_header(): void
{
/** @var resource $stream */
$stream = fopen(dirname(__DIR__).'/test_files/table.html', 'r');
- $table = Parser::new()
+ $table = (new Parser())
->tablePosition('testb')
->tableHeaderPosition(Section::Tr)
->parseFile($stream);
@@ -207,7 +213,7 @@ public function it_uses_the_table_first_tr_to_search_for_the_header(): void
'nombre' => '15',
'sexe' => 'M',
'annee' => '2004',
- ], $table->first());
+ ], $table->getTabularData()->first());
fclose($stream);
}
@@ -217,7 +223,7 @@ public function it_will_fail_to_load_a_missing_file(): void
{
$this->expectException(ParserError::class);
- Parser::new()->parseFile('/path/tp/my/heart.html');
+ (new Parser())->parseFile('/path/tp/my/heart.html');
}
#[Test]
@@ -236,7 +242,7 @@ public function it_uses_the_table_first_tr_in_the_first_tbody_to_search_for_the_
TABLE;
- $table = Parser::new()
+ $table = (new Parser())
->tableHeaderPosition(Section::Tbody)
->parseHtml($html);
@@ -247,7 +253,7 @@ public function it_uses_the_table_first_tr_in_the_first_tbody_to_search_for_the_
'nombre' => '15',
'sexe' => 'M',
'annee' => '2004',
- ], $table->nth(0));
+ ], $table->getTabularData()->nth(0));
}
#[Test]
@@ -255,7 +261,7 @@ public function it_will_throw_if_the_html_is_malformed(): void
{
$this->expectExceptionObject(new ParserError('The HTML table could not be found in the submitted html.'));
- Parser::new()->parseHtml('vasdfadadf');
+ (new Parser())->parseHtml('vasdfadadf');
}
#[Test]
@@ -263,13 +269,13 @@ public function it_will_throw_if_no_table_is_found(): void
{
$this->expectExceptionObject(new ParserError('The HTML table could not be found in the submitted html.'));
- Parser::new()->parseHtml('- foo
');
+ (new Parser())->parseHtml('- foo
');
}
#[Test]
public function it_will_use_the_submitted_headers(): void
{
- $parser = Parser::new()
+ $parser = (new Parser())
->tableHeader(['firstname', 'count', 'gender', 'year']);
$table = $parser->parseHtml(self::HTML);
@@ -280,7 +286,7 @@ public function it_will_use_the_submitted_headers(): void
'count' => '15',
'gender' => 'M',
'year' => '2004',
- ], $table->first());
+ ], $table->getTabularData()->first());
}
@@ -299,7 +305,7 @@ public function it_will_rearrange_the_content_with_table_header(): void
TABLE;
$header = [3 => 'Annee', 2 => 'Sexe', 0 => 'Firstname', 1 => 'Count'];
- $table = Parser::new()
+ $table = (new Parser())
->tableHeader($header)
->parseHtml($html);
@@ -309,10 +315,10 @@ public function it_will_rearrange_the_content_with_table_header(): void
'Sexe' => 'M',
'Firstname' => 'Abel',
'Count' => '14',
- ], $table->first());
+ ], $table->getTabularData()->first());
$header = [3 => 'Annee', 0 => 'Firstname', 1 => 'Count'];
- $table = Parser::new()
+ $table = (new Parser())
->tableHeader($header)
->parseHtml($html);
@@ -321,7 +327,7 @@ public function it_will_rearrange_the_content_with_table_header(): void
'Annee' => '2004',
'Firstname' => 'Abel',
'Count' => '14',
- ], $table->first());
+ ], $table->getTabularData()->first());
}
#[Test]
@@ -338,10 +344,11 @@ public function it_will_duplicate_colspan_data(): void
TABLE;
- $table = Parser::new()->parseHtml($html);
+ $table = (new Parser())->parseHtml($html);
+ $data = $table->getTabularData();
- self::assertSame($table->nth(1), ['Abdoulaye', 'Abdoulaye', 'Abdoulaye', '2004']);
- self::assertSame($table->nth(0), ['prenoms', 'nombre', 'sexe', 'annee']);
+ self::assertSame($data->nth(1), ['Abdoulaye', 'Abdoulaye', 'Abdoulaye', '2004']);
+ self::assertSame($data->nth(0), ['prenoms', 'nombre', 'sexe', 'annee']);
}
#[Test]
@@ -361,11 +368,13 @@ public function it_will_ignore_the_malformed_header_by_deault(): void
$dom = new DOMDocument();
$dom->loadHTML($html);
- $table = Parser::new()->parseHtml($dom);
+ $table = (new Parser())->parseHtml($dom);
+
+ $tabularData = $table->getTabularData();
self::assertSame([], $table->getHeader());
- self::assertSame($table->first(), ['Abdoulaye', 'Abdoulaye', 'Abdoulaye', '2004']);
- self::assertSame($table->nth(1), ['Abel', '14', 'M', '2004']);
+ self::assertSame($tabularData->first(), ['Abdoulaye', 'Abdoulaye', 'Abdoulaye', '2004']);
+ self::assertSame($tabularData->nth(1), ['Abel', '14', 'M', '2004']);
}
#[Test]
@@ -377,7 +386,7 @@ public function it_will_fails_on_malformed_html(): void
$this->expectException(ParserError::class);
- Parser::new()
+ (new Parser())
->failOnXmlErrors()
->parseHtml($html);
}
@@ -387,7 +396,7 @@ public function it_will_fail_to_load_other_html_tag(): void
{
$this->expectException(ParserError::class);
- Parser::new()->parseHtml(new DOMElement('p', 'I know who you are'));
+ (new Parser())->parseHtml(new DOMElement('p', 'I know who you are'));
}
#[Test]
@@ -403,7 +412,7 @@ public function it_will_found_no_header(): void
/** @var SimpleXMLElement $simpleXML */
$simpleXML = simplexml_load_string($html);
- $table = Parser::new()
+ $table = (new Parser())
->tableHeaderPosition(Section::Tbody)
->parseHtml($simpleXML);
@@ -422,7 +431,7 @@ public function it_will_found_no_header_in_any_section(): void
TABLE;
- $table = Parser::new()
+ $table = (new Parser())
->tableHeaderPosition(Section::Tr)
->parseHtml($html);
@@ -444,12 +453,12 @@ public function it_will_use_the_table_footer(): void
TABLE;
- $table = Parser::new()
+ $table = (new Parser())
->excludeSection(Section::Tfoot)
->parseHtml($html);
self::assertSame([], $table->getHeader());
- self::assertSame([], $table->first());
+ self::assertSame([], $table->getTabularData()->first());
}
#[Test]
@@ -457,7 +466,7 @@ public function it_uses_the_parser_formatter(): void
{
/** @var resource $stream */
$stream = fopen(dirname(__DIR__).'/test_files/table.html', 'r');
- $table = Parser::new()
+ $table = (new Parser())
->tablePosition('testb')
->tableHeaderPosition(Section::Tr)
->withFormatter(function (array $record): array {
@@ -476,7 +485,7 @@ public function it_uses_the_parser_formatter(): void
'nombre' => 15,
'sexe' => 'M',
'annee' => 2004,
- ], $table->first());
+ ], $table->getTabularData()->first());
fclose($stream);
}
@@ -532,18 +541,18 @@ public function it_can_handle_rowspan_and_colspan(): void
fn (int $carry, array $record): int => $carry + (array_count_values($record)[$value] ?? 0),
0
);
- $table = Parser::new()->parseHtml($table);
+ $table = (new Parser())->parseHtml($table);
- self::assertSame(2, $reducer($table, 'colspan'));
- self::assertSame(2, $reducer($table, 'rowspan'));
- self::assertSame(6, $reducer($table, 'colspan+rowspan'));
+ self::assertSame(2, $reducer($table->getTabularData(), 'colspan'));
+ self::assertSame(2, $reducer($table->getTabularData(), 'rowspan'));
+ self::assertSame(6, $reducer($table->getTabularData(), 'colspan+rowspan'));
}
#[Test]
#[DataProvider('providesCaption')]
public function it_can_load_the_table_caption(string $table, ?string $defaultCaption, ?string $expected): void
{
- self::assertSame($expected, Parser::new()->tableCaption($defaultCaption)->parseHtml($table)->getCaption());
+ self::assertSame($expected, (new Parser())->tableCaption($defaultCaption)->parseHtml($table)->getCaption());
}
/**
diff --git a/src/Section.php b/src/Section.php
index ac1468e..d771b1e 100644
--- a/src/Section.php
+++ b/src/Section.php
@@ -4,12 +4,8 @@
namespace Bakame\TabularData\HtmlTable;
-use Bakame\Aide\Enum\Helper;
-
enum Section: string
{
- use Helper;
-
case Thead = 'thead';
case Tbody = 'tbody';
case Tfoot = 'tfoot';
@@ -17,12 +13,12 @@ enum Section: string
/**
* @param int<0, max> $offset
+ *
+ * @throws ParserError
*/
public function xpathRow(int $offset = 0): string
{
- if ($offset < 0) { /* @phpstan-ignore-line */
- throw new ParserError('The table header row offset must be a positive integer or 0.');
- }
+ $offset > -1 || throw new ParserError('The table header row offset must be a positive integer or 0.'); /* @phpstan-ignore-line */
++$offset;
return match ($this) {
diff --git a/src/Table.php b/src/Table.php
index 581ce86..6c6d2b6 100644
--- a/src/Table.php
+++ b/src/Table.php
@@ -4,119 +4,29 @@
namespace Bakame\TabularData\HtmlTable;
-use Closure;
+use Countable;
use Iterator;
+use IteratorAggregate;
use JsonSerializable;
+use League\Csv\TabularDataProvider;
use League\Csv\TabularDataReader;
/**
* @template TValue of array
*
- * @implements TabularDataReader>
+ * @implements IteratorAggregate
*/
-final class Table implements TabularDataReader, JsonSerializable
+final class Table implements IteratorAggregate, Countable, JsonSerializable, TabularDataProvider
{
/**
- * @param TabularDataReader> $tabularDataReader
+ * @param TabularDataReader $tabularData
*/
public function __construct(
- private readonly TabularDataReader $tabularDataReader,
+ private readonly TabularDataReader $tabularData,
private readonly ?string $caption = null
) {
}
- public function count(): int
- {
- return $this->tabularDataReader->count();
- }
-
- public function getIterator(): Iterator
- {
- return $this->tabularDataReader->getIterator();
- }
-
- /**
- * @return array{
- * caption: ?string,
- * header: array,
- * rows:array>
- * }
- */
- public function jsonSerialize(): array
- {
- return [
- 'caption' => $this->caption,
- 'header' => $this->getHeader(),
- 'rows' => array_values([...$this->tabularDataReader]),
- ];
- }
-
- public function each(Closure $closure): bool
- {
- return $this->tabularDataReader->each($closure);
- }
-
- public function exists(Closure $closure): bool
- {
- return $this->tabularDataReader->exists($closure);
- }
-
- /**
- * @return array
- */
- public function nth(int $nth_record): array
- {
- return $this->tabularDataReader->nth($nth_record);
- }
-
- /**
- * @return array
- */
- public function first(): array
- {
- return $this->tabularDataReader->first();
- }
-
- /**
- *
- * @return Table>
- */
- public function filter(Closure $closure): TabularDataReader
- {
- return new self($this->tabularDataReader->filter($closure), $this->caption);
- }
-
- public function fetchColumnByName(string $name): Iterator
- {
- return $this->tabularDataReader->fetchColumnByName($name);
- }
-
- public function fetchColumnByOffset(int $offset): Iterator
- {
- return $this->tabularDataReader->fetchColumnByOffset($offset);
- }
-
- public function reduce(Closure $closure, mixed $initial = null): mixed
- {
- return $this->tabularDataReader->reduce($closure, $initial);
- }
-
- /**
- * @return Table>
- */
- public function slice(int $offset, ?int $length = null): TabularDataReader
- {
- return new self($this->tabularDataReader->slice($offset, $length), $this->caption);
- }
-
- /**
- * @return Table>
- */
- public function sorted(Closure $orderBy): TabularDataReader
- {
- return new self($this->tabularDataReader->sorted($orderBy), $this->caption);
- }
-
public function getCaption(): ?string
{
return $this->caption;
@@ -127,74 +37,40 @@ public function getCaption(): ?string
*/
public function getHeader(): array
{
- return $this->tabularDataReader->getHeader();
- }
-
- public function getRecords(array $header = []): Iterator
- {
- return $this->tabularDataReader->getRecords($header);
+ return $this->tabularData->getHeader();
}
/**
- * @param array $header
+ * @return TabularDataReader $tabularData
*/
- public function getObjects(string $className, array $header = []): Iterator
- {
- return $this->tabularDataReader->getObjects($className, $header);
- }
-
- /**
- * @return array
- */
- public function fetchOne(int $nth_record = 0): array
- {
- return $this->tabularDataReader->fetchOne($nth_record);
- }
-
- public function fetchPairs($offset_index = 0, $value_index = 1): Iterator
+ public function getTabularData(): TabularDataReader
{
- return $this->tabularDataReader->fetchPairs($offset_index, $value_index);
+ return $this->tabularData;
}
- public function fetchColumn($index = 0): Iterator
- {
- return $this->tabularDataReader->fetchColumn($index);
- }
-
- /**
- * @return TabularDataReader>
- */
- public function select(string|int ...$columnOffsetOrName): TabularDataReader
- {
- return $this->tabularDataReader->select(...$columnOffsetOrName);
- }
-
- /** @return iterable>> */
- public function matching(string $expression): iterable
+ public function count(): int
{
- return $this->tabularDataReader->matching($expression);
+ return $this->tabularData->count();
}
- /**
- *
- * @return ?TabularDataReader>
- */
- public function matchingFirst(string $expression): ?TabularDataReader
+ public function getIterator(): Iterator
{
- return $this->tabularDataReader->matchingFirst($expression);
+ return $this->tabularData->getIterator();
}
/**
- *
- * @return TabularDataReader>
+ * @return array{
+ * caption: ?string,
+ * header: array,
+ * rows:array>
+ * }
*/
- public function matchingFirstOrFail(string $expression): TabularDataReader
- {
- return $this->tabularDataReader->matchingFirstOrFail($expression);
- }
-
- public function value(int|string $column = 0): mixed
+ public function jsonSerialize(): array
{
- return $this->tabularDataReader->value($column);
+ return [
+ 'caption' => $this->caption,
+ 'header' => $this->getHeader(),
+ 'rows' => array_values([...$this->tabularData]),
+ ];
}
}
diff --git a/src/Warning.php b/src/Warning.php
new file mode 100644
index 0000000..5e1e893
--- /dev/null
+++ b/src/Warning.php
@@ -0,0 +1,49 @@
+
+ in_array($errno, [E_WARNING, E_USER_WARNING], true)
+ ? throw new ErrorException($errstr, 0, $errno, $errfile, $errline)
+ : false
+ );
+
+ try {
+ return $callback(...$arguments);
+ } finally {
+ restore_error_handler();
+ }
+ }
+}