From e5df46deaee6034b7d1f109720622884a6578d2b Mon Sep 17 00:00:00 2001 From: ignace nyamagana butera Date: Tue, 30 Sep 2025 12:26:13 +0200 Subject: [PATCH 1/3] Remove no longer needed dependencies --- .php-cs-fixer.php | 1 + CHANGELOG.md | 5 ++ README.md | 13 ++-- composer.json | 6 +- src/Parser.php | 80 ++++++++++---------- src/ParserError.php | 4 +- src/ParserTest.php | 39 +++++----- src/Section.php | 10 +-- src/Table.php | 176 +++++++------------------------------------- src/Warning.php | 72 ++++++++++++++++++ 10 files changed, 178 insertions(+), 228 deletions(-) create mode 100644 src/Warning.php diff --git a/.php-cs-fixer.php b/.php-cs-fixer.php index 79865d7..28ec97a 100644 --- a/.php-cs-fixer.php +++ b/.php-cs-fixer.php @@ -7,6 +7,7 @@ $config = new PhpCsFixer\Config(); return $config + ->setUnsupportedPhpVersionAllowed(true) ->setRules([ '@PSR12' => true, 'array_syntax' => ['syntax' => 'short'], diff --git a/CHANGELOG.md b/CHANGELOG.md index c72e73d..00f8539 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ All Notable changes to `bakame/html-table` will be documented in this file. +# [Next](https://github.com/bakame-php/html-table/compare/0.5.0...main) - TBD + +* **BC BREAK:** the `Table` class now implements the `TabularDataProvider` instead of the `TabularDataReader` interface. +* **BC BREAK:** the `ParserError` class now extends the `Exception` instead of the `InvalidArgumentException` exception class. + # [0.5.0](https://github.com/bakame-php/html-table/compare/0.4.0...0.5.0) - 2025-07-06 ## What's Changed diff --git a/README.md b/README.md index 22f3583..4e67e35 100644 --- a/README.md +++ b/README.md @@ -132,9 +132,10 @@ HTML; $table = Parser::new()->parseHtml($html); $table->getCaption(); //returns 'Songs' $table->getHeader(); //returns ['Title','Singer', 'Country'] -$table->nth(2); //returns ["Title" => "Nzinzi", "Singer" => "Emeneya", "Country" => "DR Congo"] -json_encode($table->slice(0, 1)); -//{"caption":"Songs","header":["Title","Singer","Country"],"rows":[{"Title":"Nakei Nairobi","Singer":"Mbilia Bel","Country":"DR Congo"}]} +$tableData = $table->geTabularData(); +$tableData->nth(2); //returns ["Title" => "Nzinzi", "Singer" => "Emeneya", "Country" => "DR Congo"] +json_encode($tableData->slice(0, 1)); +//[{"Title":"Nakei Nairobi","Singer":"Mbilia Bel","Country":"DR Congo"}] ``` #### Default configuration @@ -233,7 +234,7 @@ $parser = Parser::new()->resolveTableHeader(); // will attempt to resolve the ta #### tableHeader -You can specify directly the header of your table and override any other table header +You can directly specify the header of your table and override any other table header related configuration with this configuration ```php @@ -288,7 +289,7 @@ remove any previous setting guaranting that only the `tbody` if present will be ### withFormatter and withoutFormatter -Adds or remove a record formatter applied to the data extracted from the table before you +Add or remove a record formatter applied to the data extracted from the table before you can access it. The header is not affected by the formatter if it is defined. ```php @@ -304,7 +305,7 @@ The formatter closure signature should be: function (array $record): array; ``` -If a header was defined or specified, the submitted record will have the header definition set, +If a header was defined or specified, the submitted record will have the header definition set; otherwise an array list is provided. The following formatter will work on any table content as long as it is defined as a string. diff --git a/composer.json b/composer.json index 4e57a13..525b469 100644 --- a/composer.json +++ b/composer.json @@ -30,9 +30,7 @@ "ext-libxml": "*", "ext-mbstring": "*", "ext-simplexml": "*", - "bakame/aide-enums": "^0.1.0", - "bakame/aide-error": "^0.2.0", - "league/csv": "^9.23.0" + "league/csv": "^9.25.0" }, "require-dev": { "ext-curl": "*", @@ -54,7 +52,7 @@ } }, "scripts": { - "phpcs": "PHP_CS_FIXER_IGNORE_ENV=1 php-cs-fixer fix -vvv --diff --dry-run --allow-risky=yes --ansi", + "phpcs": "php-cs-fixer fix -vvv --diff --dry-run --allow-risky=yes --ansi", "phpcs:fix": "php-cs-fixer fix -vvv --allow-risky=yes --ansi", "phpstan": "phpstan analyse -c phpstan.neon --ansi --memory-limit=192M", "phpunit": "XDEBUG_MODE=coverage phpunit --coverage-text", diff --git a/src/Parser.php b/src/Parser.php index 2be061f..35994f5 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -5,13 +5,13 @@ namespace Bakame\TabularData\HtmlTable; use ArrayIterator; -use Bakame\Aide\Error\Cloak; use Closure; use DOMDocument; use DOMElement; use DOMNode; use DOMNodeList; use DOMXPath; +use ErrorException; use Iterator; use League\Csv\Buffer; use League\Csv\ResultSet; @@ -72,26 +72,29 @@ public static function new(): self public function tableXPathPosition(string $expression): self { - $query = (new DOMXPath(new DOMDocument()))->query(...); - $domXPath = Cloak::warning($query); + if ($expression === $this->tableExpression) { + return $this; + } - return match (true) { - $expression === $this->tableExpression => $this, - false === $domXPath($expression) => throw new ParserError( + try { + Warning::trap((new DOMXPath(new DOMDocument()))->query(...), $expression); + } catch (ErrorException $exception) { + throw new ParserError( message: 'The xpath expression `'.$expression.'` is invalid.', - previous: $domXPath->errors()->last() - ), - default => new self( - $expression, - $this->caption, - $this->tableHeader, - $this->ignoreTableHeader, - $this->tableHeaderExpression, - $this->includedSections, - $this->formatter, - $this->throwOnXmlErrors, - ), - }; + previous: $exception + ); + } + + return new self( + $expression, + $this->caption, + $this->tableHeader, + $this->ignoreTableHeader, + $this->tableHeaderExpression, + $this->includedSections, + $this->formatter, + $this->throwOnXmlErrors, + ); } /** @@ -353,16 +356,16 @@ public function parseFile(mixed $filenameOrStream, $filenameContext = null): Tab return $this->parseHtml($this->streamToString($filenameOrStream)); } - $fopen = Cloak::warning(fopen(...)); - $resource = $fopen(...match ($filenameContext) { - null => [$filenameOrStream, 'r'], - default => [$filenameOrStream, 'r', false, $filenameContext], - }); - - if (!is_resource($resource)) { + try { + /** @var resource $resource */ + $resource = Warning::trap(fopen(...), ...match ($filenameContext) { + null => [$filenameOrStream, 'r'], + default => [$filenameOrStream, 'r', false, $filenameContext], + }); + } catch (ErrorException $exception) { throw new ParserError( message: '`'.$filenameOrStream.'`: failed to open stream: No such file or directory.', - previous: $fopen->errors()->last() + previous: $exception ); } @@ -383,14 +386,9 @@ public function parseHtml(DOMDocument|DOMElement|SimpleXMLElement|Stringable|str /** @var DOMNodeList $query */ $query = (new DOMXPath($this->sourceToDomDocument($source)))->query($this->tableExpression); $table = $query->item(0); - if (!$table instanceof DOMElement) { - throw new ParserError('The HTML table could not be found in the submitted html.'); - } - + $table instanceof DOMElement || throw new ParserError('The HTML table could not be found in the submitted html.'); $tagName = strtolower($table->nodeName); - if ('table' !== $tagName) { - throw new ParserError('Expected a table element to be selected; received `'.$tagName.'` instead.'); - } + 'table' === $tagName || throw new ParserError('Expected a table element to be selected; received `'.$tagName.'` instead.'); $xpath = new DOMXPath($this->sourceToDomDocument($table)); $header = match (true) { @@ -421,14 +419,14 @@ public function parseHtml(DOMDocument|DOMElement|SimpleXMLElement|Stringable|str */ private function streamToString($stream): string { - $stream_get_contents = Cloak::warning(stream_get_contents(...)); - /** @var string|false $html */ - $html = $stream_get_contents($stream); + try { + /** @var string $result */ + $result = Warning::trap(stream_get_contents(...), $stream); - return match (false) { - $html => throw new ParserError('The resource could not be read.', 0, $stream_get_contents->errors()->last()), - default => $html, - }; + return $result; + } catch (ErrorException $exception) { + throw new ParserError(message: 'The resource could not be read.', previous: $exception); + } } /** diff --git a/src/ParserError.php b/src/ParserError.php index 95b98fb..400baed 100644 --- a/src/ParserError.php +++ b/src/ParserError.php @@ -4,7 +4,7 @@ namespace Bakame\TabularData\HtmlTable; -use InvalidArgumentException; +use Exception; use LibXMLError; use function array_count_values; @@ -16,7 +16,7 @@ use const PHP_EOL; -class ParserError extends InvalidArgumentException +class ParserError extends Exception { /** @var array */ private array $duplicateColumnNames = []; diff --git a/src/ParserTest.php b/src/ParserTest.php index 174733e..f77cfc9 100644 --- a/src/ParserTest.php +++ b/src/ParserTest.php @@ -147,10 +147,10 @@ public function it_can_load_the_first_html_table_found_by_default(): void self::assertSame(['prenoms', 'nombre', 'sexe', 'annee'], $table->getHeader()); self::assertCount(4, $table); - self::assertSame($row, $table->first()); + self::assertSame($row, $table->getTabularData()->first()); - $sliced = $table->slice(0, 1); - self::assertSame(['caption' => null, 'header' => $header, 'rows' => [$row]], $sliced->jsonSerialize()); + $sliced = $table->getTabularData()->slice(0, 1); + self::assertSame([$row], iterator_to_array($sliced)); } #[Test] @@ -165,7 +165,7 @@ public function it_can_load_the_first_html_table_found_by_default_without_the_he '15', 'M', '2004', - ], $table->first()); + ], $table->getTabularData()->first()); } #[Test] @@ -207,7 +207,7 @@ public function it_uses_the_table_first_tr_to_search_for_the_header(): void 'nombre' => '15', 'sexe' => 'M', 'annee' => '2004', - ], $table->first()); + ], $table->getTabularData()->first()); fclose($stream); } @@ -247,7 +247,7 @@ public function it_uses_the_table_first_tr_in_the_first_tbody_to_search_for_the_ 'nombre' => '15', 'sexe' => 'M', 'annee' => '2004', - ], $table->nth(0)); + ], $table->getTabularData()->nth(0)); } #[Test] @@ -280,7 +280,7 @@ public function it_will_use_the_submitted_headers(): void 'count' => '15', 'gender' => 'M', 'year' => '2004', - ], $table->first()); + ], $table->getTabularData()->first()); } @@ -309,7 +309,7 @@ public function it_will_rearrange_the_content_with_table_header(): void 'Sexe' => 'M', 'Firstname' => 'Abel', 'Count' => '14', - ], $table->first()); + ], $table->getTabularData()->first()); $header = [3 => 'Annee', 0 => 'Firstname', 1 => 'Count']; $table = Parser::new() @@ -321,7 +321,7 @@ public function it_will_rearrange_the_content_with_table_header(): void 'Annee' => '2004', 'Firstname' => 'Abel', 'Count' => '14', - ], $table->first()); + ], $table->getTabularData()->first()); } #[Test] @@ -339,9 +339,10 @@ public function it_will_duplicate_colspan_data(): void TABLE; $table = Parser::new()->parseHtml($html); + $data = $table->getTabularData(); - self::assertSame($table->nth(1), ['Abdoulaye', 'Abdoulaye', 'Abdoulaye', '2004']); - self::assertSame($table->nth(0), ['prenoms', 'nombre', 'sexe', 'annee']); + self::assertSame($data->nth(1), ['Abdoulaye', 'Abdoulaye', 'Abdoulaye', '2004']); + self::assertSame($data->nth(0), ['prenoms', 'nombre', 'sexe', 'annee']); } #[Test] @@ -363,9 +364,11 @@ public function it_will_ignore_the_malformed_header_by_deault(): void $table = Parser::new()->parseHtml($dom); + $tabularData = $table->getTabularData(); + self::assertSame([], $table->getHeader()); - self::assertSame($table->first(), ['Abdoulaye', 'Abdoulaye', 'Abdoulaye', '2004']); - self::assertSame($table->nth(1), ['Abel', '14', 'M', '2004']); + self::assertSame($tabularData->first(), ['Abdoulaye', 'Abdoulaye', 'Abdoulaye', '2004']); + self::assertSame($tabularData->nth(1), ['Abel', '14', 'M', '2004']); } #[Test] @@ -449,7 +452,7 @@ public function it_will_use_the_table_footer(): void ->parseHtml($html); self::assertSame([], $table->getHeader()); - self::assertSame([], $table->first()); + self::assertSame([], $table->getTabularData()->first()); } #[Test] @@ -476,7 +479,7 @@ public function it_uses_the_parser_formatter(): void 'nombre' => 15, 'sexe' => 'M', 'annee' => 2004, - ], $table->first()); + ], $table->getTabularData()->first()); fclose($stream); } @@ -534,9 +537,9 @@ public function it_can_handle_rowspan_and_colspan(): void ); $table = Parser::new()->parseHtml($table); - self::assertSame(2, $reducer($table, 'colspan')); - self::assertSame(2, $reducer($table, 'rowspan')); - self::assertSame(6, $reducer($table, 'colspan+rowspan')); + self::assertSame(2, $reducer($table->getTabularData(), 'colspan')); + self::assertSame(2, $reducer($table->getTabularData(), 'rowspan')); + self::assertSame(6, $reducer($table->getTabularData(), 'colspan+rowspan')); } #[Test] diff --git a/src/Section.php b/src/Section.php index ac1468e..d771b1e 100644 --- a/src/Section.php +++ b/src/Section.php @@ -4,12 +4,8 @@ namespace Bakame\TabularData\HtmlTable; -use Bakame\Aide\Enum\Helper; - enum Section: string { - use Helper; - case Thead = 'thead'; case Tbody = 'tbody'; case Tfoot = 'tfoot'; @@ -17,12 +13,12 @@ enum Section: string /** * @param int<0, max> $offset + * + * @throws ParserError */ public function xpathRow(int $offset = 0): string { - if ($offset < 0) { /* @phpstan-ignore-line */ - throw new ParserError('The table header row offset must be a positive integer or 0.'); - } + $offset > -1 || throw new ParserError('The table header row offset must be a positive integer or 0.'); /* @phpstan-ignore-line */ ++$offset; return match ($this) { diff --git a/src/Table.php b/src/Table.php index 581ce86..6c6d2b6 100644 --- a/src/Table.php +++ b/src/Table.php @@ -4,119 +4,29 @@ namespace Bakame\TabularData\HtmlTable; -use Closure; +use Countable; use Iterator; +use IteratorAggregate; use JsonSerializable; +use League\Csv\TabularDataProvider; use League\Csv\TabularDataReader; /** * @template TValue of array * - * @implements TabularDataReader> + * @implements IteratorAggregate */ -final class Table implements TabularDataReader, JsonSerializable +final class Table implements IteratorAggregate, Countable, JsonSerializable, TabularDataProvider { /** - * @param TabularDataReader> $tabularDataReader + * @param TabularDataReader $tabularData */ public function __construct( - private readonly TabularDataReader $tabularDataReader, + private readonly TabularDataReader $tabularData, private readonly ?string $caption = null ) { } - public function count(): int - { - return $this->tabularDataReader->count(); - } - - public function getIterator(): Iterator - { - return $this->tabularDataReader->getIterator(); - } - - /** - * @return array{ - * caption: ?string, - * header: array, - * rows:array> - * } - */ - public function jsonSerialize(): array - { - return [ - 'caption' => $this->caption, - 'header' => $this->getHeader(), - 'rows' => array_values([...$this->tabularDataReader]), - ]; - } - - public function each(Closure $closure): bool - { - return $this->tabularDataReader->each($closure); - } - - public function exists(Closure $closure): bool - { - return $this->tabularDataReader->exists($closure); - } - - /** - * @return array - */ - public function nth(int $nth_record): array - { - return $this->tabularDataReader->nth($nth_record); - } - - /** - * @return array - */ - public function first(): array - { - return $this->tabularDataReader->first(); - } - - /** - * - * @return Table> - */ - public function filter(Closure $closure): TabularDataReader - { - return new self($this->tabularDataReader->filter($closure), $this->caption); - } - - public function fetchColumnByName(string $name): Iterator - { - return $this->tabularDataReader->fetchColumnByName($name); - } - - public function fetchColumnByOffset(int $offset): Iterator - { - return $this->tabularDataReader->fetchColumnByOffset($offset); - } - - public function reduce(Closure $closure, mixed $initial = null): mixed - { - return $this->tabularDataReader->reduce($closure, $initial); - } - - /** - * @return Table> - */ - public function slice(int $offset, ?int $length = null): TabularDataReader - { - return new self($this->tabularDataReader->slice($offset, $length), $this->caption); - } - - /** - * @return Table> - */ - public function sorted(Closure $orderBy): TabularDataReader - { - return new self($this->tabularDataReader->sorted($orderBy), $this->caption); - } - public function getCaption(): ?string { return $this->caption; @@ -127,74 +37,40 @@ public function getCaption(): ?string */ public function getHeader(): array { - return $this->tabularDataReader->getHeader(); - } - - public function getRecords(array $header = []): Iterator - { - return $this->tabularDataReader->getRecords($header); + return $this->tabularData->getHeader(); } /** - * @param array $header + * @return TabularDataReader $tabularData */ - public function getObjects(string $className, array $header = []): Iterator - { - return $this->tabularDataReader->getObjects($className, $header); - } - - /** - * @return array - */ - public function fetchOne(int $nth_record = 0): array - { - return $this->tabularDataReader->fetchOne($nth_record); - } - - public function fetchPairs($offset_index = 0, $value_index = 1): Iterator + public function getTabularData(): TabularDataReader { - return $this->tabularDataReader->fetchPairs($offset_index, $value_index); + return $this->tabularData; } - public function fetchColumn($index = 0): Iterator - { - return $this->tabularDataReader->fetchColumn($index); - } - - /** - * @return TabularDataReader> - */ - public function select(string|int ...$columnOffsetOrName): TabularDataReader - { - return $this->tabularDataReader->select(...$columnOffsetOrName); - } - - /** @return iterable>> */ - public function matching(string $expression): iterable + public function count(): int { - return $this->tabularDataReader->matching($expression); + return $this->tabularData->count(); } - /** - * - * @return ?TabularDataReader> - */ - public function matchingFirst(string $expression): ?TabularDataReader + public function getIterator(): Iterator { - return $this->tabularDataReader->matchingFirst($expression); + return $this->tabularData->getIterator(); } /** - * - * @return TabularDataReader> + * @return array{ + * caption: ?string, + * header: array, + * rows:array> + * } */ - public function matchingFirstOrFail(string $expression): TabularDataReader - { - return $this->tabularDataReader->matchingFirstOrFail($expression); - } - - public function value(int|string $column = 0): mixed + public function jsonSerialize(): array { - return $this->tabularDataReader->value($column); + return [ + 'caption' => $this->caption, + 'header' => $this->getHeader(), + 'rows' => array_values([...$this->tabularData]), + ]; } } diff --git a/src/Warning.php b/src/Warning.php new file mode 100644 index 0000000..0601c95 --- /dev/null +++ b/src/Warning.php @@ -0,0 +1,72 @@ + + in_array($errno, [E_WARNING, E_USER_WARNING], true) + ? throw new ErrorException($errstr, 0, $errno, $errfile, $errline) + : false + ); + + try { + return $callback(...$arguments); + } finally { + restore_error_handler(); + } + } + + /** + * Hides PHP Warnings. + * + * @param mixed ...$arguments the callback arguments if needed + * + * @throws Throwable on callback execution if the callback throws + * + * @return TReturn The result returned by the callback. + */ + public static function cloak(callable $callback, mixed ...$arguments): mixed + { + set_error_handler( + fn (int $errno, string $errstr, string $errfile, int $errline): bool => + in_array($errno, [E_WARNING, E_USER_WARNING], true), + ); + + try { + return $callback(...$arguments); + } finally { + restore_error_handler(); + } + } +} From 9fb2ab51bb5c4ea20f5f54865a0af073472d2207 Mon Sep 17 00:00:00 2001 From: ignace nyamagana butera Date: Tue, 30 Sep 2025 15:53:52 +0200 Subject: [PATCH 2/3] Adding support for SplFileInfo --- .github/workflows/build.yml | 15 +++- CHANGELOG.md | 8 ++- README.md | 58 +++++++-------- src/Feature.php | 11 +++ src/Parser.php | 136 ++++++++++++++++++++---------------- src/ParserTest.php | 66 ++++++++--------- 6 files changed, 168 insertions(+), 126 deletions(-) create mode 100644 src/Feature.php diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 218511f..9ec49e8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,6 +12,10 @@ jobs: matrix: php: ['8.1', '8.2', '8.3', '8.4'] stability: [prefer-lowest, prefer-stable] + include: + - php: '8.5' + flags: "--ignore-platform-req=php" + stability: prefer-stable steps: - name: Checkout code uses: actions/checkout@v4 @@ -45,11 +49,16 @@ jobs: - name: Run Unit tests with coverage run: composer phpunit -- ${{ matrix.phpunit-flags }} + if: ${{ matrix.php != '8.5' }} + + - name: Run Unit tests without coverage + run: vendor/bin/phpunit --no-coverage + if: ${{ matrix.php == '8.5' }} - name: Run static analysis - run: composer phpstan - if: ${{ matrix.php == '8.3' && matrix.stability == 'prefer-stable'}} + run: composer phpstan-build + if: ${{ matrix.php == '8.4' && matrix.stability == 'prefer-stable'}} - name: Run Coding style rules run: composer phpcs:fix - if: ${{ matrix.php == '8.3' && matrix.stability == 'prefer-stable'}} + if: ${{ matrix.php == '8.4' && matrix.stability == 'prefer-stable'}} diff --git a/CHANGELOG.md b/CHANGELOG.md index 00f8539..9bc34e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,15 +4,21 @@ All Notable changes to `bakame/html-table` will be documented in this file. # [Next](https://github.com/bakame-php/html-table/compare/0.5.0...main) - TBD +## What's Changed + * **BC BREAK:** the `Table` class now implements the `TabularDataProvider` instead of the `TabularDataReader` interface. * **BC BREAK:** the `ParserError` class now extends the `Exception` instead of the `InvalidArgumentException` exception class. +* **BC BREAK:** the `Parser::withoutFormatter` is deprecated; use `Parser::withFormatter(null)` instead. +* The `Parser::new` static constructor is deprecated use the default constructor instead. +* Boolean parameters are now replaced by `Enum` for maintenance and readability. +* Adding `SplFileInfo` to `parseHTML` and `parseFile` # [0.5.0](https://github.com/bakame-php/html-table/compare/0.4.0...0.5.0) - 2025-07-06 ## What's Changed * Upgrade dependencies on `aide-error` to version `0.2.0` -* fix use statement by @tacman in https://github.com/bakame-php/html-table/pull/6 +* fix use statements by @tacman in https://github.com/bakame-php/html-table/pull/6 ## New Contributors diff --git a/README.md b/README.md index 4e67e35..aeaf4d3 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [![Sponsor development of this project](https://img.shields.io/badge/sponsor%20this%20package-%E2%9D%A4-ff69b4.svg?style=flat-square)](https://github.com/sponsors/nyamsprod) `bakame/html-table` is a small PHP package that allows you to parse, import and manipulate -tabular data represented as HTML Table. Once installed you will be able to do the following: +tabular data represented as HTML Table. Once installed, you will be able to do the following: ```php use Bakame\TabularData\HtmlTable\Parser; @@ -18,6 +18,7 @@ $table = Parser::new() ->parseFile('https://www.bbc.com/sport/football/tables'); $table + ->getTabularData() ->filter(fn (array $row) => (int) $row['points'] >= 10) ->sorted(fn (array $rowA, array $rowB) => (int) $rowB['for'] <=> (int) $rowA['for']) ->fetchPairs('team', 'for'); @@ -35,7 +36,7 @@ $table ## System Requirements -**league\csv 9.23.0** library is required. (since version 0.4.0). +**league\csv 9.25.0** library is required. (since version 0.6.0). ## Installation @@ -62,7 +63,6 @@ use Bakame\HtmlTable\Parser; $parser = Parser::new() ->ignoreTableHeader() ->ignoreXmlErrors() - ->withoutFormatter() ->tableCaption('This is a beautiful table'); ``` @@ -74,7 +74,7 @@ If parsing is not possible a `ParseError` exception will be thrown. ```php use Bakame\HtmlTable\Parser; -$parser = Parser::new(); +$parser = new Parser(); $table = $parser->parseHtml('...
'); $table = $parser->parseFile('path/to/html/file.html'); @@ -129,7 +129,7 @@ $html = << HTML; -$table = Parser::new()->parseHtml($html); +$table = (new Parser())->parseHtml($html); $table->getCaption(); //returns 'Songs' $table->getHeader(); //returns ['Title','Singer', 'Country'] $tableData = $table->geTabularData(); @@ -140,14 +140,14 @@ json_encode($tableData->slice(0, 1)); #### Default configuration -By default, when calling the `Parser::new()` named constructor the parser will: +By default, when calling the `new Parser()` the parser will: - try to parse the first table found in the page - expect the table header row to be the first `tr` found in the `thead` section of your table - exclude the table `thead` section when extracting the table content. - ignore XML errors. - have no formatter attached. -- have no default caption to used if none is present in the table. +- have no default caption to be used if none is present in the table. Each of the following settings can be changed to improve the conversion against your business rules: @@ -164,9 +164,9 @@ If the expression is valid, and a list of table is found, the first result will ```php use Bakame\HtmlTable\Parser; -$parser = Parser::new()->tablePosition('table-id'); // parses the -$parser = Parser::new()->tablePosition(3); // parses the 4th table of the page -$parser = Parser::new()->tableXPathPosition("//main/div/table"); +$parser = (new Parser())->tablePosition('table-id'); // parses the
+$parser = (new Parser())->tablePosition(3); // parses the 4th table of the page +$parser = (new Parser())->tableXPathPosition("//main/div/table"); //parse the first table that matches the xpath expression ``` @@ -180,8 +180,8 @@ You can optionally define a caption for your table if none is present or found d ```php use Bakame\HtmlTable\Parser; -$parser = Parser::new()->tableCaption('this is a generated caption'); -$parser = Parser::new()->tableCaption(null); // remove any default caption set +$parser = (new Parser())->tableCaption('this is a generated caption'); +$parser = (new Parser())->tableCaption(null); // remove any default caption set ``` ### tableHeader, tableHeaderPosition, ignoreTableHeader and resolveTableHeader @@ -198,7 +198,7 @@ Tells where to locate and resolve the table header use Bakame\HtmlTable\Parser; use Bakame\HtmlTable\Section; -$parser = Parser::new()->tableHeaderPosition(Section::Thead, 3); +$parser = (new Parser())->tableHeaderPosition(Section::Thead, 3); // header is the 4th row in the table section ``` @@ -228,8 +228,8 @@ If no resolution is done, no header will be included in the returned `Table` ins ```php use Bakame\HtmlTable\Parser; -$parser = Parser::new()->ignoreTableHeader(); // no table header will be resolved -$parser = Parser::new()->resolveTableHeader(); // will attempt to resolve the table header +$parser = (new Parser())->ignoreTableHeader(); // no table header will be resolved +$parser = (new Parser())->resolveTableHeader(); // will attempt to resolve the table header ``` #### tableHeader @@ -241,12 +241,12 @@ related configuration with this configuration use Bakame\HtmlTable\Parser; use Bakame\HtmlTable\Section; -$parser = Parser::new()->tableHeader(['rank', 'team', 'winner']); +$parser = (new Parser())->tableHeader(['rank', 'team', 'winner']); ``` **If you specify a non-empty array as the table header, it will take precedence over any other table header related options.** -**Because it is a tabular data each cell MUST be unique otherwise an exception will be thrown** +**Because it is tabular data, each cell MUST be unique otherwise an exception will be thrown** You can skip or re-arrange the source columns by skipping them by their offsets and/or by re-ordering the offsets. @@ -255,8 +255,8 @@ re-ordering the offsets. use Bakame\HtmlTable\Parser; use Bakame\HtmlTable\Section; -$parser = Parser::new()->tableHeader([3 => 'rank', 7 => 'winner', 5 => 'team']); -// only 3 column will be extracted the 4th, 6th and 8th columns +$parser = (new Parser())->tableHeader([3 => 'rank', 7 => 'winner', 5 => 'team']); +// only 3 columns will be extracted the 4th, 6th and 8th columns // and re-arrange as 'rank' first and 'team' last // if a column is missing its value will be PHP `null` type ``` @@ -269,23 +269,23 @@ Tells which section should be parsed based on the `Section` enum use Bakame\HtmlTable\Parser; use Bakame\HtmlTable\Section; -$parser = Parser::new()->includeSection(Section::Tbody); // thead and tfoot are included during parsing -$parser = Parser::new()->excludeSection(Section::Tr, Section::Tfoot); // table direct tr children and tfoot are not included during parsing +$parser = (new Parser())->includeSection(Section::Tbody); // thead and tfoot are included during parsing +$parser = (new Parser())->excludeSection(Section::Tr, Section::Tfoot); // table direct tr children and tfoot are not included during parsing ``` **By default, the `thead` section is not parse. If a `thead` row is selected to be the header, it will be parsed independently of this setting.** -**⚠️Tips:** to be sure of which sections will be modified, first remove all previous setting +**⚠️Tips:** to be sure of which sections will be modified, first remove all previous settings before applying your configuration as shown below: ```diff -- Parser::new()->includeSection(Section::tbody); -+ Parser::new()->excludeSection(...Section::cases())->includeSection(Section::tbody); +- (new Parser())->includeSection(Section::tbody); ++ (new Parser())->excludeSection(...Section::cases())->includeSection(Section::tbody); ``` The first call will still include the `tfoot` and the `tr` sections, whereas the second call -remove any previous setting guaranting that only the `tbody` if present will be parsed. +removes any previous setting guaranting that only the `tbody` if present will be parsed. ### withFormatter and withoutFormatter @@ -295,8 +295,8 @@ can access it. The header is not affected by the formatter if it is defined. ```php use Bakame\HtmlTable\Parser; -$parser = Parser::new()->withFormatter($formatter); // attach a formatter to the parser -$parser = Parser::new()->withoutFormatter(); // removed the attached formatter if it exists +$parser = (new Parser())->withFormatter($formatter); // attach a formatter to the parser +$parser = (new Parser())->withFormatter(null); // removed the attached formatter if it exists ``` The formatter closure signature should be: @@ -334,8 +334,8 @@ Tells whether the parser should ignore or throw in case of malformed HTML conten ```php use Bakame\HtmlTable\Parser; -$parser = Parser::new()->ignoreXmlErrors(); // ignore the XML errors -$parser = Parser::new()->failOnXmlErrors(3); // throw on XML errors +$parser = (new Parser())->ignoreXmlErrors(); // ignore the XML errors +$parser = (new Parser())->failOnXmlErrors(3); // throw on XML errors ``` ## Testing diff --git a/src/Feature.php b/src/Feature.php new file mode 100644 index 0000000..e8c59b5 --- /dev/null +++ b/src/Feature.php @@ -0,0 +1,11 @@ + $tableHeader * @param array
$includedSections */ - private function __construct( - private readonly string $tableExpression, - private readonly ?string $caption, - private readonly array $tableHeader, - private readonly bool $ignoreTableHeader, - private readonly string $tableHeaderExpression, - private readonly array $includedSections, - private readonly ?Closure $formatter, - private readonly bool $throwOnXmlErrors, + public function __construct( + private readonly string $tableExpression = '(//table)[1]', + private readonly ?string $caption = null, + private readonly array $tableHeader = [], + private readonly Feature $ignoreTableHeader = Feature::Disabled, + private readonly string $tableHeaderExpression = '(//table/thead/tr)[1]', + private readonly array $includedSections = [Section::Tbody, Section::Tfoot, Section::Tr], + private readonly ?Closure $formatter = null, + private readonly Feature $throwOnXmlErrors = Feature::Disabled, ) { } - public static function new(): self - { - return new self( - '(//table)[1]', - null, - [], - false, - '(//table/thead/tr)[1]', - [Section::Tbody, Section::Tfoot, Section::Tr], - null, - false, - ); - } - public function tableXPathPosition(string $expression): self { if ($expression === $this->tableExpression) { @@ -138,13 +132,13 @@ public function tableHeader(array $headerRow): self public function ignoreTableHeader(): self { - return match ($this->ignoreTableHeader) { + return match (Feature::Enabled === $this->ignoreTableHeader) { true => $this, false => new self( $this->tableExpression, $this->caption, $this->tableHeader, - true, + Feature::Enabled, $this->tableHeaderExpression, $this->includedSections, $this->formatter, @@ -155,13 +149,13 @@ public function ignoreTableHeader(): self public function resolveTableHeader(): self { - return match ($this->ignoreTableHeader) { + return match (Feature::Disabled === $this->ignoreTableHeader) { false => $this, true => new self( $this->tableExpression, $this->caption, $this->tableHeader, - false, + Feature::Disabled, $this->tableHeaderExpression, $this->includedSections, $this->formatter, @@ -261,7 +255,7 @@ public function excludeSection(Section ...$sections): self public function failOnXmlErrors(): self { - return match ($this->throwOnXmlErrors) { + return match (Feature::Enabled === $this->throwOnXmlErrors) { true => $this, false => new self( $this->tableExpression, @@ -271,14 +265,14 @@ public function failOnXmlErrors(): self $this->tableHeaderExpression, $this->includedSections, $this->formatter, - true, + Feature::Enabled, ), }; } public function ignoreXmlErrors(): self { - return match ($this->throwOnXmlErrors) { + return match (Feature::Disabled === $this->throwOnXmlErrors) { false => $this, true => new self( $this->tableExpression, @@ -288,29 +282,15 @@ public function ignoreXmlErrors(): self $this->tableHeaderExpression, $this->includedSections, $this->formatter, - false, + Feature::Disabled, ), }; } - public function withFormatter(Closure $formatter): self + public function withFormatter(?Closure $formatter): self { - return new self( - $this->tableExpression, - $this->caption, - $this->tableHeader, - $this->ignoreTableHeader, - $this->tableHeaderExpression, - $this->includedSections, - $formatter, - $this->throwOnXmlErrors, - ); - } - - public function withoutFormatter(): self - { - return match (null) { - $this->formatter => $this, + return match (true) { + $formatter === $this->formatter => $this, default => new self( $this->tableExpression, $this->caption, @@ -318,7 +298,7 @@ public function withoutFormatter(): self $this->ignoreTableHeader, $this->tableHeaderExpression, $this->includedSections, - null, + $formatter, $this->throwOnXmlErrors, ), }; @@ -342,7 +322,7 @@ public function tableCaption(?string $caption): self } /** - * @param resource|string $filenameOrStream + * @param SplFileInfo|resource|string $filenameOrStream * @param resource|null $filenameContext * * @throws ParserError @@ -352,16 +332,17 @@ public function tableCaption(?string $caption): self */ public function parseFile(mixed $filenameOrStream, $filenameContext = null): Table { + if ($filenameOrStream instanceof SplFileInfo) { + return $this->parseHtml($filenameOrStream); + } + if (is_resource($filenameOrStream)) { return $this->parseHtml($this->streamToString($filenameOrStream)); } try { /** @var resource $resource */ - $resource = Warning::trap(fopen(...), ...match ($filenameContext) { - null => [$filenameOrStream, 'r'], - default => [$filenameOrStream, 'r', false, $filenameContext], - }); + $resource = Warning::trap(fopen(...), ...['filename' => $filenameOrStream, 'mode' => 'r', 'context' => $filenameContext]); } catch (ErrorException $exception) { throw new ParserError( message: '`'.$filenameOrStream.'`: failed to open stream: No such file or directory.', @@ -381,7 +362,7 @@ public function parseFile(mixed $filenameOrStream, $filenameContext = null): Tab * * @return Table> */ - public function parseHtml(DOMDocument|DOMElement|SimpleXMLElement|Stringable|string $source): Table + public function parseHtml(SplFileInfo|DOMDocument|DOMElement|SimpleXMLElement|Stringable|string $source): Table { /** @var DOMNodeList $query */ $query = (new DOMXPath($this->sourceToDomDocument($source)))->query($this->tableExpression); @@ -393,7 +374,7 @@ public function parseHtml(DOMDocument|DOMElement|SimpleXMLElement|Stringable|str $xpath = new DOMXPath($this->sourceToDomDocument($table)); $header = match (true) { [] !== $this->tableHeader => $this->tableHeader, - $this->ignoreTableHeader => [], + Feature::Enabled === $this->ignoreTableHeader => [], default => $this->extractTableHeader($xpath), }; @@ -432,7 +413,7 @@ private function streamToString($stream): string /** * @throws ParserError */ - private function sourceToDomDocument(DOMDocument|SimpleXMLElement|DOMElement|Stringable|string $document): DOMDocument + private function sourceToDomDocument(SplFileInfo|DOMDocument|SimpleXMLElement|DOMElement|Stringable|string $document): DOMDocument { if ($document instanceof DOMDocument) { return $document; @@ -451,13 +432,22 @@ private function sourceToDomDocument(DOMDocument|SimpleXMLElement|DOMElement|Str return $dom; } + $content = (string) $document; + if ($document instanceof SplFileInfo) { + $content = ''; + $file = $document->openFile(); + while (!$file->eof()) { + $content .= $file->fgets(); + } + } + libxml_use_internal_errors(true); - $dom->loadHTML((string) $document); + $dom->loadHTML($content, LIBXML_NOWARNING | LIBXML_NOERROR); $errors = libxml_get_errors(); libxml_clear_errors(); return match (true) { - $this->throwOnXmlErrors && [] !== $errors => throw ParserError::dueToLibXmlErrors($errors), + Feature::Enabled === $this->throwOnXmlErrors && [] !== $errors => throw ParserError::dueToLibXmlErrors($errors), default => $dom, }; } @@ -545,7 +535,7 @@ private function extractHeaderRow(DOMElement $tr): array $tr->setAttribute(self::HEADER_ROW_ATTRIBUTE_NAME, 'true'); } - return $headerRow; + return array_map(fn (string|null $item): string => trim((string) $item, "\u{A0} \t\n\r\0\x0B"), $headerRow); } /** @@ -629,4 +619,30 @@ private function combineArray(array $record, array $header): array return $row; } + + /** + * DEPRECATION WARNING! This method will be removed in the next major point release. + * @deprecated since version 0.6.0 + * @see self::withFormatter() + * + * @codeCoverageIgnore + */ + #[Deprecated(message:'use Bakame\TabularData\HtmlTable\Parser::withFormatter() instead', since:'bakame/html-table:0.6.0')] + public function withoutFormatter(): self + { + return $this->withFormatter(null); + } + + /** + * DEPRECATION WARNING! This method will be removed in the next major point release. + * @deprecated since version 0.6.0 + * @see self::__construct() + * + * @codeCoverageIgnore + */ + #[Deprecated(message:'use Bakame\TabularData\HtmlTable\Parser::__construct() instead', since:'bakame/html-table:0.6.0')] + public static function new(): self + { + return new self(); + } } diff --git a/src/ParserTest.php b/src/ParserTest.php index f77cfc9..f13b8a3 100644 --- a/src/ParserTest.php +++ b/src/ParserTest.php @@ -40,18 +40,18 @@ final class ParserTest extends TestCase #[Test] public function it_will_return_the_same_options(): void { - $parser = Parser::new(); + $parser = new Parser(); - self::assertSame( + self::assertEquals( $parser, $parser ->tablePosition(0) - ->tableHeaderPosition(Section::Thead, 0) + ->tableHeaderPosition(Section::Thead) ->includeSection(Section::Tbody, Section::Tfoot, Section::Tr) ->tableHeader([]) ->resolveTableHeader() ->ignoreXmlErrors() - ->withoutFormatter() + ->withFormatter(null) ->tableCaption(null) ); } @@ -63,7 +63,7 @@ public function it_will_throw_if_the_header_contains_duplicate_values(): void $this->expectException(ParserError::class); $this->expectExceptionMessage('The header record contains duplicate column names: `foo`, `toto`.'); - Parser::new()->tableHeader($headerRow); + (new Parser())->tableHeader($headerRow); } #[Test] @@ -71,7 +71,7 @@ public function it_will_throw_if_the_header_does_not_only_contains_string(): voi { $this->expectException(ParserError::class); - Parser::new()->tableHeader(['foo', 1]); /* @phpstan-ignore-line */ + (new Parser())->tableHeader(['foo', 1]); /* @phpstan-ignore-line */ } #[Test] @@ -80,7 +80,7 @@ public function it_will_throw_if_the_identifier_is_invalid(string|int $identifie { $this->expectException(ParserError::class); - Parser::new()->tablePosition($identifier); + (new Parser())->tablePosition($identifier); } /** @@ -102,7 +102,7 @@ public function it_will_throw_if_the_identifier_is_a_negative_integer(): void { $this->expectException(ParserError::class); - Parser::new()->tablePosition(-1); + (new Parser())->tablePosition(-1); } #[Test] @@ -110,7 +110,7 @@ public function it_will_throw_if_the_table_header_row_offset_is_negative(): void { $this->expectException(ParserError::class); - Parser::new()->tableHeaderPosition(Section::Thead, -1); /* @phpstan-ignore-line */ + (new Parser())->tableHeaderPosition(Section::Thead, -1); /* @phpstan-ignore-line */ } #[Test] @@ -118,7 +118,7 @@ public function it_will_throw_if_the_xpath_expression_is_invalid(): void { $this->expectException(ParserError::class); - Parser::new()->tableXPathPosition('//table@@invalid'); + (new Parser())->tableXPathPosition('//table@@invalid'); } #[Test] @@ -129,14 +129,14 @@ public function it_will_fail_to_load_any_element_other_than_a_table(): void HTML; $this->expectException(ParserError::class); $this->expectExceptionMessage('Expected a table element to be selected; received `p` instead.'); - Parser::new()->tableXPathPosition('//p')->parseHtml($html); + (new Parser())->tableXPathPosition('//p')->parseHtml($html); } #[Test] public function it_can_load_the_first_html_table_found_by_default(): void { - $table = Parser::new()->parseHtml(self::HTML); + $table = (new Parser())->parseHtml(self::HTML); $header = ['prenoms', 'nombre', 'sexe', 'annee']; $row = [ 'prenoms' => 'Abdoulaye', @@ -156,7 +156,7 @@ public function it_can_load_the_first_html_table_found_by_default(): void #[Test] public function it_can_load_the_first_html_table_found_by_default_without_the_header(): void { - $table = Parser::new()->ignoreTableHeader()->parseHtml(self::HTML); + $table = (new Parser())->ignoreTableHeader()->parseHtml(self::HTML); self::assertSame([], $table->getHeader()); self::assertCount(4, $table); @@ -171,7 +171,7 @@ public function it_can_load_the_first_html_table_found_by_default_without_the_he #[Test] public function it_can_load_any_html_table_by_occurrence(): void { - $table = Parser::new() + $table = (new Parser()) ->tablePosition(1) ->parseFile(dirname(__DIR__).'/test_files/table.html'); @@ -182,7 +182,7 @@ public function it_can_load_any_html_table_by_occurrence(): void #[Test] public function it_can_load_any_html_table_by_attribute_id(): void { - $table = Parser::new() + $table = (new Parser()) ->tablePosition('testb') ->parseFile(dirname(__DIR__).'/test_files/table.html'); @@ -195,7 +195,7 @@ public function it_uses_the_table_first_tr_to_search_for_the_header(): void { /** @var resource $stream */ $stream = fopen(dirname(__DIR__).'/test_files/table.html', 'r'); - $table = Parser::new() + $table = (new Parser()) ->tablePosition('testb') ->tableHeaderPosition(Section::Tr) ->parseFile($stream); @@ -217,7 +217,7 @@ public function it_will_fail_to_load_a_missing_file(): void { $this->expectException(ParserError::class); - Parser::new()->parseFile('/path/tp/my/heart.html'); + (new Parser())->parseFile('/path/tp/my/heart.html'); } #[Test] @@ -236,7 +236,7 @@ public function it_uses_the_table_first_tr_in_the_first_tbody_to_search_for_the_
TABLE; - $table = Parser::new() + $table = (new Parser()) ->tableHeaderPosition(Section::Tbody) ->parseHtml($html); @@ -255,7 +255,7 @@ public function it_will_throw_if_the_html_is_malformed(): void { $this->expectExceptionObject(new ParserError('The HTML table could not be found in the submitted html.')); - Parser::new()->parseHtml('vasdfadadf'); + (new Parser())->parseHtml('vasdfadadf'); } #[Test] @@ -263,13 +263,13 @@ public function it_will_throw_if_no_table_is_found(): void { $this->expectExceptionObject(new ParserError('The HTML table could not be found in the submitted html.')); - Parser::new()->parseHtml('
  1. foo
'); + (new Parser())->parseHtml('
  1. foo
'); } #[Test] public function it_will_use_the_submitted_headers(): void { - $parser = Parser::new() + $parser = (new Parser()) ->tableHeader(['firstname', 'count', 'gender', 'year']); $table = $parser->parseHtml(self::HTML); @@ -299,7 +299,7 @@ public function it_will_rearrange_the_content_with_table_header(): void TABLE; $header = [3 => 'Annee', 2 => 'Sexe', 0 => 'Firstname', 1 => 'Count']; - $table = Parser::new() + $table = (new Parser()) ->tableHeader($header) ->parseHtml($html); @@ -312,7 +312,7 @@ public function it_will_rearrange_the_content_with_table_header(): void ], $table->getTabularData()->first()); $header = [3 => 'Annee', 0 => 'Firstname', 1 => 'Count']; - $table = Parser::new() + $table = (new Parser()) ->tableHeader($header) ->parseHtml($html); @@ -338,7 +338,7 @@ public function it_will_duplicate_colspan_data(): void TABLE; - $table = Parser::new()->parseHtml($html); + $table = (new Parser())->parseHtml($html); $data = $table->getTabularData(); self::assertSame($data->nth(1), ['Abdoulaye', 'Abdoulaye', 'Abdoulaye', '2004']); @@ -362,7 +362,7 @@ public function it_will_ignore_the_malformed_header_by_deault(): void $dom = new DOMDocument(); $dom->loadHTML($html); - $table = Parser::new()->parseHtml($dom); + $table = (new Parser())->parseHtml($dom); $tabularData = $table->getTabularData(); @@ -380,7 +380,7 @@ public function it_will_fails_on_malformed_html(): void $this->expectException(ParserError::class); - Parser::new() + (new Parser()) ->failOnXmlErrors() ->parseHtml($html); } @@ -390,7 +390,7 @@ public function it_will_fail_to_load_other_html_tag(): void { $this->expectException(ParserError::class); - Parser::new()->parseHtml(new DOMElement('p', 'I know who you are')); + (new Parser())->parseHtml(new DOMElement('p', 'I know who you are')); } #[Test] @@ -406,7 +406,7 @@ public function it_will_found_no_header(): void /** @var SimpleXMLElement $simpleXML */ $simpleXML = simplexml_load_string($html); - $table = Parser::new() + $table = (new Parser()) ->tableHeaderPosition(Section::Tbody) ->parseHtml($simpleXML); @@ -425,7 +425,7 @@ public function it_will_found_no_header_in_any_section(): void TABLE; - $table = Parser::new() + $table = (new Parser()) ->tableHeaderPosition(Section::Tr) ->parseHtml($html); @@ -447,7 +447,7 @@ public function it_will_use_the_table_footer(): void TABLE; - $table = Parser::new() + $table = (new Parser()) ->excludeSection(Section::Tfoot) ->parseHtml($html); @@ -460,7 +460,7 @@ public function it_uses_the_parser_formatter(): void { /** @var resource $stream */ $stream = fopen(dirname(__DIR__).'/test_files/table.html', 'r'); - $table = Parser::new() + $table = (new Parser()) ->tablePosition('testb') ->tableHeaderPosition(Section::Tr) ->withFormatter(function (array $record): array { @@ -535,7 +535,7 @@ public function it_can_handle_rowspan_and_colspan(): void fn (int $carry, array $record): int => $carry + (array_count_values($record)[$value] ?? 0), 0 ); - $table = Parser::new()->parseHtml($table); + $table = (new Parser())->parseHtml($table); self::assertSame(2, $reducer($table->getTabularData(), 'colspan')); self::assertSame(2, $reducer($table->getTabularData(), 'rowspan')); @@ -546,7 +546,7 @@ public function it_can_handle_rowspan_and_colspan(): void #[DataProvider('providesCaption')] public function it_can_load_the_table_caption(string $table, ?string $defaultCaption, ?string $expected): void { - self::assertSame($expected, Parser::new()->tableCaption($defaultCaption)->parseHtml($table)->getCaption()); + self::assertSame($expected, (new Parser())->tableCaption($defaultCaption)->parseHtml($table)->getCaption()); } /** From 7bbf5ffa4258b59bf961f1360fc0cdb759205d3a Mon Sep 17 00:00:00 2001 From: ignace nyamagana butera Date: Tue, 30 Sep 2025 16:21:00 +0200 Subject: [PATCH 3/3] Improve test suite --- .github/workflows/build.yml | 2 +- src/Parser.php | 3 ++- src/ParserErrorTest.php | 2 ++ src/ParserTest.php | 6 ++++++ src/Warning.php | 23 ----------------------- 5 files changed, 11 insertions(+), 25 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9ec49e8..30ca994 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -56,7 +56,7 @@ jobs: if: ${{ matrix.php == '8.5' }} - name: Run static analysis - run: composer phpstan-build + run: composer phpstan if: ${{ matrix.php == '8.4' && matrix.stability == 'prefer-stable'}} - name: Run Coding style rules diff --git a/src/Parser.php b/src/Parser.php index c47577c..89ae764 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -15,10 +15,10 @@ use ErrorException; use Iterator; use League\Csv\Buffer; +use League\Csv\CannotInsertRecord; use League\Csv\ResultSet; use League\Csv\SyntaxError; use League\Csv\TabularDataReader; -use PHPUnit\Framework\Attributes\CodeCoverageIgnore; use SimpleXMLElement; use SplFileInfo; use Stringable; @@ -359,6 +359,7 @@ public function parseFile(mixed $filenameOrStream, $filenameContext = null): Tab /** * @throws ParserError * @throws SyntaxError + * @throws CannotInsertRecord * * @return Table> */ diff --git a/src/ParserErrorTest.php b/src/ParserErrorTest.php index ce51ca0..3f2e9ee 100644 --- a/src/ParserErrorTest.php +++ b/src/ParserErrorTest.php @@ -4,9 +4,11 @@ namespace Bakame\TabularData\HtmlTable; +use PHPUnit\Framework\Attributes\CoversClass; use PHPUnit\Framework\Attributes\Test; use PHPUnit\Framework\TestCase; +#[CoversClass(ParserError::class)] final class ParserErrorTest extends TestCase { #[Test] diff --git a/src/ParserTest.php b/src/ParserTest.php index f13b8a3..35e6226 100644 --- a/src/ParserTest.php +++ b/src/ParserTest.php @@ -7,11 +7,17 @@ use DOMDocument; use DOMElement; use League\Csv\TabularDataReader; +use PHPUnit\Framework\Attributes\CoversClass; use PHPUnit\Framework\Attributes\DataProvider; use PHPUnit\Framework\Attributes\Test; use PHPUnit\Framework\TestCase; use SimpleXMLElement; +#[CoversClass(Warning::class)] +#[CoversClass(Parser::class)] +#[CoversClass(Feature::class)] +#[CoversClass(Section::class)] +#[CoversClass(Table::class)] final class ParserTest extends TestCase { private const HTML = << - in_array($errno, [E_WARNING, E_USER_WARNING], true), - ); - - try { - return $callback(...$arguments); - } finally { - restore_error_handler(); - } - } }