diff --git a/reports/.gitkeep b/reports/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/reports/machine_learning_benchmark.ipynb b/reports/machine_learning_benchmark.ipynb new file mode 100644 index 0000000..5d6555d --- /dev/null +++ b/reports/machine_learning_benchmark.ipynb @@ -0,0 +1,4864 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "5a0ac8c1198c4c2daa6a2f42fb999fcc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_b9685968526f418e8a5acda06fbf1968", + "IPY_MODEL_b9a1a30ac1a643afbdf7be645963a9b7", + "IPY_MODEL_6c0b61d869b04183a4245019cbab68bd" + ], + "layout": "IPY_MODEL_bd7508dc556342e987d4b1015a9ecfab" + } + }, + "b9685968526f418e8a5acda06fbf1968": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5223bcca9c90442e98a3b98c8fcb4203", + "placeholder": "", + "style": "IPY_MODEL_e99ad09ab6ef46cca9407848423713df", + "value": "README.md: " + } + }, + "b9a1a30ac1a643afbdf7be645963a9b7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6d222d2e3d234c2a8323b3fb9ec000b3", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_b82668e264924b5dbd35b2d57b5039de", + "value": 1 + } + }, + "6c0b61d869b04183a4245019cbab68bd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f5b10a15ab0e468784e322135cf9c324", + "placeholder": "", + "style": "IPY_MODEL_40f0bda9fded407983e4f0c62fbd2dda", + "value": " 6.81k/? [00:00<00:00, 352kB/s]" + } + }, + "bd7508dc556342e987d4b1015a9ecfab": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5223bcca9c90442e98a3b98c8fcb4203": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e99ad09ab6ef46cca9407848423713df": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "6d222d2e3d234c2a8323b3fb9ec000b3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "20px" + } + }, + "b82668e264924b5dbd35b2d57b5039de": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f5b10a15ab0e468784e322135cf9c324": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "40f0bda9fded407983e4f0c62fbd2dda": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f576a8515b084e95a80e997c17d2315a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_2a019390a6624c008fb2411f8d19cc82", + "IPY_MODEL_7f0e4487d7324fd0bcc40afa6fba7fe7", + "IPY_MODEL_2f1997b0ae6043bcb5f1ca32177ab50b" + ], + "layout": "IPY_MODEL_ae62a76dcbdc49fabf859a781350057f" + } + }, + "2a019390a6624c008fb2411f8d19cc82": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3a7bf055d65b45afa17d4ca7ccda48dd", + "placeholder": "", + "style": "IPY_MODEL_4e0c44ca02e348ad8ca18f88c8ba9370", + "value": "amazon_polarity/train-00000-of-00004.par(…): 100%" + } + }, + "7f0e4487d7324fd0bcc40afa6fba7fe7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_975c82fcad474cb3b93d33108d2cb90c", + "max": 259761770, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_510533ee0c1d413e8fec253b46a3aa9b", + "value": 259761770 + } + }, + "2f1997b0ae6043bcb5f1ca32177ab50b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_31b1eb8b518045e99ec6f7cfa464fa8e", + "placeholder": "", + "style": "IPY_MODEL_13439674899e48189b83ec73ba1f2dc7", + "value": " 260M/260M [00:05<00:00, 45.0MB/s]" + } + }, + "ae62a76dcbdc49fabf859a781350057f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3a7bf055d65b45afa17d4ca7ccda48dd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4e0c44ca02e348ad8ca18f88c8ba9370": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "975c82fcad474cb3b93d33108d2cb90c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "510533ee0c1d413e8fec253b46a3aa9b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "31b1eb8b518045e99ec6f7cfa464fa8e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "13439674899e48189b83ec73ba1f2dc7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "31d696b7927f42ec9576f09be2c3d98b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c850cb31ae67428faefed7398bf67387", + "IPY_MODEL_0c793656e98442a2981b3b7d3323dc85", + "IPY_MODEL_f01c5cb7bab34beeba25de2d2eec9744" + ], + "layout": "IPY_MODEL_b8ebf0e6597e4fd7a9d5109249ad48ca" + } + }, + "c850cb31ae67428faefed7398bf67387": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bf214ab650d24e69b2d2aa851cbbf9ca", + "placeholder": "", + "style": "IPY_MODEL_4e04267df185487ea8f8392b8457df7c", + "value": "amazon_polarity/train-00001-of-00004.par(…): 100%" + } + }, + "0c793656e98442a2981b3b7d3323dc85": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_980ffcc461a44efaaa1301cc8aadc3f5", + "max": 258363554, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_9c51dac5c5d9452b8d724059eac74438", + "value": 258363554 + } + }, + "f01c5cb7bab34beeba25de2d2eec9744": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_abf3ba014071476ca0e1e3f49a52279b", + "placeholder": "", + "style": "IPY_MODEL_9abbd9045722468c89fd04503c1a3d60", + "value": " 258M/258M [00:04<00:00, 96.9MB/s]" + } + }, + "b8ebf0e6597e4fd7a9d5109249ad48ca": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bf214ab650d24e69b2d2aa851cbbf9ca": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4e04267df185487ea8f8392b8457df7c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "980ffcc461a44efaaa1301cc8aadc3f5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9c51dac5c5d9452b8d724059eac74438": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "abf3ba014071476ca0e1e3f49a52279b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9abbd9045722468c89fd04503c1a3d60": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b61b9fbe801e4c4282808920aadf7371": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c78eba94196e43208d303301f1f72b13", + "IPY_MODEL_0907f6b5721e48919e5d3f1cdf3854f3", + "IPY_MODEL_bb3f0c309fa24719a04dc046d78dd1f4" + ], + "layout": "IPY_MODEL_136d295dbc7a4593a5876e077522e976" + } + }, + "c78eba94196e43208d303301f1f72b13": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_be7dac6179f14d49a04778141be5df99", + "placeholder": "", + "style": "IPY_MODEL_cdfc3ecc89e647ad8f593585b2a14c34", + "value": "amazon_polarity/train-00002-of-00004.par(…): 100%" + } + }, + "0907f6b5721e48919e5d3f1cdf3854f3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b7e164a485124f5a8f716a31396debc9", + "max": 255471883, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_2a9fb5f144484df0895e426462dfa0a1", + "value": 255471883 + } + }, + "bb3f0c309fa24719a04dc046d78dd1f4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b9ec58743b2e467f8ea7452c53630156", + "placeholder": "", + "style": "IPY_MODEL_71c0c4e453a24912b0d35f466079197b", + "value": " 255M/255M [00:02<00:00, 189MB/s]" + } + }, + "136d295dbc7a4593a5876e077522e976": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "be7dac6179f14d49a04778141be5df99": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cdfc3ecc89e647ad8f593585b2a14c34": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b7e164a485124f5a8f716a31396debc9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2a9fb5f144484df0895e426462dfa0a1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "b9ec58743b2e467f8ea7452c53630156": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "71c0c4e453a24912b0d35f466079197b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c1b5f033aa3148118570ccc325046c35": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_6169bca3f9ef4d6b99f5c39aa67f5561", + "IPY_MODEL_10d31a5c91b54f56bc6b30241f394788", + "IPY_MODEL_bac092e2447144909522015e017d3588" + ], + "layout": "IPY_MODEL_b6eb15c0ac7849e285f4aab02e0d4f7e" + } + }, + "6169bca3f9ef4d6b99f5c39aa67f5561": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e06c9299fe874987bb822820d560a768", + "placeholder": "", + "style": "IPY_MODEL_0278269c48074d9788c2eb54a296d9c5", + "value": "amazon_polarity/train-00003-of-00004.par(…): 100%" + } + }, + "10d31a5c91b54f56bc6b30241f394788": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ee6ff7545b804e2ea643df3528a7361a", + "max": 254410930, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_56fdb04f02bc4c1bb3a43e1fa0652cde", + "value": 254410930 + } + }, + "bac092e2447144909522015e017d3588": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f3eb00e9817748c1ac14519a189dc684", + "placeholder": "", + "style": "IPY_MODEL_2985417fbe584ae288ee6d22fbde554e", + "value": " 254M/254M [00:02<00:00, 168MB/s]" + } + }, + "b6eb15c0ac7849e285f4aab02e0d4f7e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e06c9299fe874987bb822820d560a768": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0278269c48074d9788c2eb54a296d9c5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ee6ff7545b804e2ea643df3528a7361a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "56fdb04f02bc4c1bb3a43e1fa0652cde": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f3eb00e9817748c1ac14519a189dc684": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2985417fbe584ae288ee6d22fbde554e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f97f673536e94561a7d341f6c69c0e7e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_a606da5378ec455f9dbd4d895857458e", + "IPY_MODEL_cd50f2eade344c34b56eed1f699032aa", + "IPY_MODEL_6848b8d3dbaa43598010d060f72c8b5c" + ], + "layout": "IPY_MODEL_a2f29c5f75e9405f93cbf31992df957d" + } + }, + "a606da5378ec455f9dbd4d895857458e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_983f2d2a2ae849b18fdd88aa4937fe5b", + "placeholder": "", + "style": "IPY_MODEL_50eb2633d2644371b5d382b345ce5080", + "value": "amazon_polarity/test-00000-of-00001.parq(…): 100%" + } + }, + "cd50f2eade344c34b56eed1f699032aa": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ad635ba05af44dc9ac29f81b6fecd43c", + "max": 117422360, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_ae3d02e234734e8da4f468642f0860f9", + "value": 117422360 + } + }, + "6848b8d3dbaa43598010d060f72c8b5c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ee1be19c8cfd4a4694ff9bc72d5a6384", + "placeholder": "", + "style": "IPY_MODEL_76a75dfa593342c69026424c0c7591b7", + "value": " 117M/117M [00:02<00:00, 64.4MB/s]" + } + }, + "a2f29c5f75e9405f93cbf31992df957d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "983f2d2a2ae849b18fdd88aa4937fe5b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "50eb2633d2644371b5d382b345ce5080": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ad635ba05af44dc9ac29f81b6fecd43c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ae3d02e234734e8da4f468642f0860f9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "ee1be19c8cfd4a4694ff9bc72d5a6384": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "76a75dfa593342c69026424c0c7591b7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c4dd07f49a534338a8718d6efc536156": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_b3c5dd42012c40a89cac005cb14b1596", + "IPY_MODEL_554e279ea4db4058b60068feca2d46db", + "IPY_MODEL_83e4452b683e4befa68cfa0baea303ee" + ], + "layout": "IPY_MODEL_56527a0eef604dc1b4af2fca80627068" + } + }, + "b3c5dd42012c40a89cac005cb14b1596": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8b2cc632c55e408a8fc7b03e268e5e6b", + "placeholder": "", + "style": "IPY_MODEL_a3ac227a919748b98c9aaab9afccb469", + "value": "Generating train split: 100%" + } + }, + "554e279ea4db4058b60068feca2d46db": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c028dfaae83c40979fea90f85344f354", + "max": 3600000, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_8a777d045fd7496ab95f4081ca88fc71", + "value": 3600000 + } + }, + "83e4452b683e4befa68cfa0baea303ee": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_14f78810c9c94b3ba1308a9cfda15911", + "placeholder": "", + "style": "IPY_MODEL_9ac87e70aa7d451dafc62b592f5d45fa", + "value": " 3600000/3600000 [00:13<00:00, 164995.19 examples/s]" + } + }, + "56527a0eef604dc1b4af2fca80627068": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8b2cc632c55e408a8fc7b03e268e5e6b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a3ac227a919748b98c9aaab9afccb469": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c028dfaae83c40979fea90f85344f354": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8a777d045fd7496ab95f4081ca88fc71": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "14f78810c9c94b3ba1308a9cfda15911": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9ac87e70aa7d451dafc62b592f5d45fa": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "61f3197c6f0547c4964e2c030df0ea3b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_b7ed86d35ae746d2849b8e1e18a948e0", + "IPY_MODEL_a09064c441244fc7a333eb92a2e4edd8", + "IPY_MODEL_1478dc488d4f4d17abb26acc069c5ba7" + ], + "layout": "IPY_MODEL_eda83c1917d246178eb4c691fb943a77" + } + }, + "b7ed86d35ae746d2849b8e1e18a948e0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1d955b2ca1834e0bacd41a97d690148d", + "placeholder": "", + "style": "IPY_MODEL_87ad65daa34747daa1e9013faefca302", + "value": "Generating test split: 100%" + } + }, + "a09064c441244fc7a333eb92a2e4edd8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d0ce34a0515643a48ceba993bd89e3bd", + "max": 400000, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_b525492479004f5f90da96aff44fda79", + "value": 400000 + } + }, + "1478dc488d4f4d17abb26acc069c5ba7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d44f742be39e40e3a4d01facee41213e", + "placeholder": "", + "style": "IPY_MODEL_ade8859ae4804772ab10d9b5abba97de", + "value": " 400000/400000 [00:00<00:00, 404115.24 examples/s]" + } + }, + "eda83c1917d246178eb4c691fb943a77": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1d955b2ca1834e0bacd41a97d690148d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "87ad65daa34747daa1e9013faefca302": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d0ce34a0515643a48ceba993bd89e3bd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b525492479004f5f90da96aff44fda79": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "d44f742be39e40e3a4d01facee41213e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ade8859ae4804772ab10d9b5abba97de": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d7edd9e1cb5c49d7a628d608df6ca9aa": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c2413503263844ae87c509f01040e239", + "IPY_MODEL_b5350b54decd4a8b8c3fa55af35aa6a5", + "IPY_MODEL_9a31c5f1a7884b5d85b84be07ab3aed1" + ], + "layout": "IPY_MODEL_6924897a1a1043abb4c2314112822ec6" + } + }, + "c2413503263844ae87c509f01040e239": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8c537726c7034e75b5a2906b5f23ff8a", + "placeholder": "", + "style": "IPY_MODEL_ae695dc9063048d0979e462d4988d77f", + "value": "Creating json from Arrow format: 100%" + } + }, + "b5350b54decd4a8b8c3fa55af35aa6a5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2bca204eb3e241c29e90ba15e61227af", + "max": 3600, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_4725888641e8425c8db61ef836d5f283", + "value": 3600 + } + }, + "9a31c5f1a7884b5d85b84be07ab3aed1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_66f994463d2a41ec890e90fe7ee1e6fa", + "placeholder": "", + "style": "IPY_MODEL_e64efef822d846b7b5ee9b27ed780d34", + "value": " 3600/3600 [00:27<00:00, 210.11ba/s]" + } + }, + "6924897a1a1043abb4c2314112822ec6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8c537726c7034e75b5a2906b5f23ff8a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ae695dc9063048d0979e462d4988d77f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2bca204eb3e241c29e90ba15e61227af": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4725888641e8425c8db61ef836d5f283": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "66f994463d2a41ec890e90fe7ee1e6fa": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e64efef822d846b7b5ee9b27ed780d34": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "79b9e2fee0f34577a0f2a566d848e8e7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_4cf4537065b7434a87f4284c26b7c661", + "IPY_MODEL_7a2fa8ad072442c287e7cf33831a3c10", + "IPY_MODEL_ed89f367a6a24e3b9b5af45035d3cd84" + ], + "layout": "IPY_MODEL_df97681806be49598f393f6c0c7b43ec" + } + }, + "4cf4537065b7434a87f4284c26b7c661": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_dc733b574c00437bbe91d282d51af34d", + "placeholder": "", + "style": "IPY_MODEL_d2e57805352d44d7bf42aba3ffdd1b89", + "value": "Creating json from Arrow format: 100%" + } + }, + "7a2fa8ad072442c287e7cf33831a3c10": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7f14e5655eed4bb4a5f40e5d381517b6", + "max": 400, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_9a8cf916e4fc4a229f91462bfd038907", + "value": 400 + } + }, + "ed89f367a6a24e3b9b5af45035d3cd84": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_43ba4e2bc78942509f1ec955c09e173a", + "placeholder": "", + "style": "IPY_MODEL_1897b3670ae74d878501f85890576230", + "value": " 400/400 [00:01<00:00, 232.91ba/s]" + } + }, + "df97681806be49598f393f6c0c7b43ec": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dc733b574c00437bbe91d282d51af34d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d2e57805352d44d7bf42aba3ffdd1b89": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "7f14e5655eed4bb4a5f40e5d381517b6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9a8cf916e4fc4a229f91462bfd038907": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "43ba4e2bc78942509f1ec955c09e173a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1897b3670ae74d878501f85890576230": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "cell_type": "code", + "source": [ + "# @title ⚙️ Runtime Specifications\n", + "import platform\n", + "import psutil\n", + "import os\n", + "import sys\n", + "import torch\n", + "import tensorflow\n", + "\n", + "def get_size(bytes_val, suffix=\"B\"):\n", + " \"\"\"Scale bytes to its proper format (e.g., 1024 -> 1KB).\"\"\"\n", + " if bytes_val is None:\n", + " return \"N/A\"\n", + "\n", + " factor = 1024\n", + " for unit in [\"\", \"K\", \"M\", \"G\", \"T\", \"P\"]:\n", + " if bytes_val < factor:\n", + " # Ensure the value is converted to float for proper division and formatting\n", + " return f\"{float(bytes_val):.2f}{unit}{suffix}\"\n", + " bytes_val /= factor\n", + "\n", + "print(\"--- System and Environment ---\")\n", + "print(f\"OS/Platform: {platform.system()} ({platform.release()})\")\n", + "print(f\"Kernel Version: {platform.version()}\")\n", + "print(f\"Python Version: {sys.version.split()[0]} ({platform.architecture()[0]})\")\n", + "print(f\"PyTorch Version: {torch.__version__}\")\n", + "print(f\"TensorFlow Version: {tensorflow.__version__}\")\n", + "print()\n", + "\n", + "print(\"--- CPU and System RAM ---\")\n", + "print(\"CPU Details (from `!lscpu`): \")\n", + "!lscpu | grep 'Model name\\|Socket(s)\\|Core(s) per socket\\|Thread(s) per core\\|CPU MHz'\n", + "mem = psutil.virtual_memory()\n", + "print(f\"System RAM: {get_size(mem.total)}\")\n", + "print()\n", + "\n", + "# Check for GPU (CUDA)\n", + "if torch.cuda.is_available():\n", + " device_name = torch.cuda.get_device_name(0)\n", + " device_props = torch.cuda.get_device_properties(0)\n", + " print(\"--- Accelerator: GPU (CUDA) ---\")\n", + " print(f\"Device Name: {device_name}\")\n", + " print(f\"CUDA Cores: {device_props.multi_processor_count * 64} (Approx)\")\n", + " print(f\"Global Memory: {get_size(device_props.total_memory)}\")\n", + " print(f\"CUDA Capability: {device_props.major}.{device_props.minor}\")\n", + "if 'TPU_NAME' in os.environ:\n", + " print(\"--- Accelerator: TPU ---\")\n", + " print(f\"**TPU Name:** {os.environ['TPU_NAME']}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IxofynaY2gB_", + "outputId": "132e7103-b60c-4fd4-ab31-d0a06e970d6a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--- System and Environment ---\n", + "OS/Platform: Linux (6.6.105+)\n", + "Kernel Version: #1 SMP Thu Oct 2 10:42:05 UTC 2025\n", + "Python Version: 3.12.12 (64bit)\n", + "PyTorch Version: 2.9.0+cu126\n", + "TensorFlow Version: 2.19.0\n", + "\n", + "--- CPU and System RAM ---\n", + "CPU Details (from `!lscpu`): \n", + "Model name: Intel(R) Xeon(R) CPU @ 2.00GHz\n", + "Thread(s) per core: 2\n", + "Core(s) per socket: 1\n", + "Socket(s): 1\n", + "System RAM: 12.67GB\n", + "\n", + "--- Accelerator: GPU (CUDA) ---\n", + "Device Name: Tesla T4\n", + "CUDA Cores: 2560 (Approx)\n", + "Global Memory: 14.74GB\n", + "CUDA Capability: 7.5\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# 1. Configuration & Helper Functions\n", + "\n", + "We define a standardized benchmarking function to ensure fair comparison across all formats. This function measures:\n", + "\n", + "1. Loading Time: Time taken to read data from disk into RAM.\n", + "2. Training Time: Time taken to vectorize text and train the model.\n", + "3. Peak Memory: Maximum RAM usage during the process.\n", + "4. Performance: Accuracy and F1 Score." + ], + "metadata": { + "id": "hxkKUGQBzfpV" + } + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "b8614c08", + "outputId": "a719671a-746e-41cd-e3eb-41b9bdd4e956" + }, + "source": [ + "# @title 1.1 Setup & Dependencies\n", + "# Install necessary libraries for the benchmark and SchemaForge\n", + "!pip install -q datasets pandas scikit-learn matplotlib seaborn psutil pyarrow fastavro ijson\n", + "\n", + "import os\n", + "import sys\n", + "import time\n", + "import psutil\n", + "import shutil\n", + "import subprocess\n", + "import gc\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "from datasets import load_dataset\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.pipeline import make_pipeline\n", + "from sklearn.metrics import accuracy_score, f1_score\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "# Configure plotting\n", + "sns.set_theme(style=\"whitegrid\")\n", + "plt.rcParams['figure.figsize'] = (12, 6)\n", + "\n", + "print(\"✅ Environment setup complete.\")" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/3.5 MB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[90m╺\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/3.5 MB\u001b[0m \u001b[31m46.8 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.5/3.5 MB\u001b[0m \u001b[31m57.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/149.0 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m149.0/149.0 kB\u001b[0m \u001b[31m15.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h✅ Environment setup complete.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# @title 1.2 Define Benchmarking Utilities\n", + "\n", + "class BenchmarkTracker:\n", + " def __init__(self):\n", + " self.results = []\n", + "\n", + " def measure(self, format_name, load_func, train_func):\n", + " \"\"\"\n", + " Generic function to measure load and train performance.\n", + " \"\"\"\n", + " print(f\"--- Benchmarking: {format_name} ---\")\n", + "\n", + " process = psutil.Process(os.getpid())\n", + " mem_before = process.memory_info().rss / (1024 * 1024)\n", + "\n", + " # --- Measure Loading ---\n", + " print(f\" ⏳ Loading data...\")\n", + " start_load = time.perf_counter()\n", + " X_train, y_train, X_test, y_test = load_func()\n", + " end_load = time.perf_counter()\n", + " load_time = end_load - start_load\n", + " print(f\" ✅ Loaded {len(y_train):,} rows in {load_time:.2f}s\")\n", + "\n", + " # --- Measure Training ---\n", + " print(f\" ⚙️ Training model...\")\n", + " start_train = time.perf_counter()\n", + " model = train_func(X_train, y_train)\n", + " end_train = time.perf_counter()\n", + " train_time = end_train - start_train\n", + "\n", + " # --- Measure Memory Peak ---\n", + " mem_after = process.memory_info().rss / (1024 * 1024)\n", + " peak_memory_usage = max(0, mem_after - mem_before)\n", + "\n", + " # --- Evaluate ---\n", + " # Predict on a subset of test data to save inference time in benchmark\n", + " subset_test_size = 10000\n", + " y_pred = model.predict(X_test[:subset_test_size])\n", + " acc = accuracy_score(y_test[:subset_test_size], y_pred)\n", + " f1 = f1_score(y_test[:subset_test_size], y_pred, average='weighted')\n", + "\n", + " print(f\" ⏱️ Train Time: {train_time:.4f}s\")\n", + " print(f\" 💾 Mem Delta: {peak_memory_usage:.2f} MB\")\n", + " print(\"-\" * 30)\n", + "\n", + " self.results.append({\n", + " \"Format\": format_name,\n", + " \"Load Time (s)\": load_time,\n", + " \"Training Time (s)\": train_time,\n", + " \"Total Time (s)\": load_time + train_time,\n", + " \"Peak Memory Delta (MB)\": peak_memory_usage,\n", + " \"Accuracy\": acc,\n", + " \"F1 Score\": f1\n", + " })\n", + "\n", + " def get_summary(self):\n", + " return pd.DataFrame(self.results)\n", + "\n", + "# Initialize tracker\n", + "tracker = BenchmarkTracker()\n", + "\n", + "# OPTIMIZED Model Architecture for Large Datasets\n", + "from sklearn.linear_model import SGDClassifier\n", + "\n", + "def train_standard_model(X_train, y_train):\n", + " \"\"\"\n", + " Pipeline: TF-IDF + SGDClassifier.\n", + " SGDClassifier is much faster for large datasets (1M+ rows) than standard LogisticRegression.\n", + " \"\"\"\n", + " model = make_pipeline(\n", + " # Limit features to keep memory usage stable during vectorization\n", + " TfidfVectorizer(max_features=10000, stop_words='english'),\n", + " # Log loss = Logistic Regression via SGD\n", + " SGDClassifier(loss='log_loss', max_iter=1000, tol=1e-3, n_jobs=-1, random_state=42)\n", + " )\n", + " model.fit(X_train, y_train)\n", + " return model\n", + "\n", + "print(\"✅ Benchmarking utilities ready.\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Ti7DuqC8zrXl", + "outputId": "cea7f39c-4ac7-4adf-b016-8c008c42c712" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "✅ Benchmarking utilities ready.\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "36252453", + "outputId": "6120c233-6ccc-4fdc-e45e-f39bceee1ebc" + }, + "source": [ + "# @title 1.3 Define Global Constants and Paths\n", + "\n", + "# Dataset Configuration\n", + "DATASET_NAME = \"amazon_polarity\"\n", + "TRAIN_SPLIT = \"train\"\n", + "TEST_SPLIT = \"test\"\n", + "\n", + "# Directory and File Paths\n", + "SCHEMA_BENCH_ROOT = \"schemaforge_bench\"\n", + "DATA_DIR = os.path.join(SCHEMA_BENCH_ROOT, \"data\")\n", + "OUTPUT_DIR = os.path.join(SCHEMA_BENCH_ROOT, \"output\")\n", + "SCHEMA_REPORT_PREFIX = os.path.join(SCHEMA_BENCH_ROOT, \"schema_report\")\n", + "SCHEMA_REPORT_PATH = f\"{SCHEMA_REPORT_PREFIX}.json\"\n", + "\n", + "# Create necessary directories\n", + "os.makedirs(DATA_DIR, exist_ok=True)\n", + "os.makedirs(OUTPUT_DIR, exist_ok=True)\n", + "\n", + "print(\"✅ Global constants and paths defined.\")" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "✅ Global constants and paths defined.\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# 2. Baseline: Hugging Face datasets\n", + "\n", + "We use the AG News dataset (Text Classification). We load it directly using the Hugging Face datasets library, which relies on Arrow format internally (memory-mapped), usually providing a very fast baseline." + ], + "metadata": { + "id": "mBCOX3hUzsun" + } + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 532, + "referenced_widgets": [ + "5a0ac8c1198c4c2daa6a2f42fb999fcc", + "b9685968526f418e8a5acda06fbf1968", + "b9a1a30ac1a643afbdf7be645963a9b7", + "6c0b61d869b04183a4245019cbab68bd", + "bd7508dc556342e987d4b1015a9ecfab", + "5223bcca9c90442e98a3b98c8fcb4203", + "e99ad09ab6ef46cca9407848423713df", + "6d222d2e3d234c2a8323b3fb9ec000b3", + "b82668e264924b5dbd35b2d57b5039de", + "f5b10a15ab0e468784e322135cf9c324", + "40f0bda9fded407983e4f0c62fbd2dda", + "f576a8515b084e95a80e997c17d2315a", + "2a019390a6624c008fb2411f8d19cc82", + "7f0e4487d7324fd0bcc40afa6fba7fe7", + "2f1997b0ae6043bcb5f1ca32177ab50b", + "ae62a76dcbdc49fabf859a781350057f", + "3a7bf055d65b45afa17d4ca7ccda48dd", + "4e0c44ca02e348ad8ca18f88c8ba9370", + "975c82fcad474cb3b93d33108d2cb90c", + "510533ee0c1d413e8fec253b46a3aa9b", + "31b1eb8b518045e99ec6f7cfa464fa8e", + "13439674899e48189b83ec73ba1f2dc7", + "31d696b7927f42ec9576f09be2c3d98b", + "c850cb31ae67428faefed7398bf67387", + "0c793656e98442a2981b3b7d3323dc85", + "f01c5cb7bab34beeba25de2d2eec9744", + "b8ebf0e6597e4fd7a9d5109249ad48ca", + "bf214ab650d24e69b2d2aa851cbbf9ca", + "4e04267df185487ea8f8392b8457df7c", + "980ffcc461a44efaaa1301cc8aadc3f5", + "9c51dac5c5d9452b8d724059eac74438", + "abf3ba014071476ca0e1e3f49a52279b", + "9abbd9045722468c89fd04503c1a3d60", + "b61b9fbe801e4c4282808920aadf7371", + "c78eba94196e43208d303301f1f72b13", + "0907f6b5721e48919e5d3f1cdf3854f3", + "bb3f0c309fa24719a04dc046d78dd1f4", + "136d295dbc7a4593a5876e077522e976", + "be7dac6179f14d49a04778141be5df99", + "cdfc3ecc89e647ad8f593585b2a14c34", + "b7e164a485124f5a8f716a31396debc9", + "2a9fb5f144484df0895e426462dfa0a1", + "b9ec58743b2e467f8ea7452c53630156", + "71c0c4e453a24912b0d35f466079197b", + "c1b5f033aa3148118570ccc325046c35", + "6169bca3f9ef4d6b99f5c39aa67f5561", + "10d31a5c91b54f56bc6b30241f394788", + "bac092e2447144909522015e017d3588", + "b6eb15c0ac7849e285f4aab02e0d4f7e", + "e06c9299fe874987bb822820d560a768", + "0278269c48074d9788c2eb54a296d9c5", + "ee6ff7545b804e2ea643df3528a7361a", + "56fdb04f02bc4c1bb3a43e1fa0652cde", + "f3eb00e9817748c1ac14519a189dc684", + "2985417fbe584ae288ee6d22fbde554e", + "f97f673536e94561a7d341f6c69c0e7e", + "a606da5378ec455f9dbd4d895857458e", + "cd50f2eade344c34b56eed1f699032aa", + "6848b8d3dbaa43598010d060f72c8b5c", + "a2f29c5f75e9405f93cbf31992df957d", + "983f2d2a2ae849b18fdd88aa4937fe5b", + "50eb2633d2644371b5d382b345ce5080", + "ad635ba05af44dc9ac29f81b6fecd43c", + "ae3d02e234734e8da4f468642f0860f9", + "ee1be19c8cfd4a4694ff9bc72d5a6384", + "76a75dfa593342c69026424c0c7591b7", + "c4dd07f49a534338a8718d6efc536156", + "b3c5dd42012c40a89cac005cb14b1596", + "554e279ea4db4058b60068feca2d46db", + "83e4452b683e4befa68cfa0baea303ee", + "56527a0eef604dc1b4af2fca80627068", + "8b2cc632c55e408a8fc7b03e268e5e6b", + "a3ac227a919748b98c9aaab9afccb469", + "c028dfaae83c40979fea90f85344f354", + "8a777d045fd7496ab95f4081ca88fc71", + "14f78810c9c94b3ba1308a9cfda15911", + "9ac87e70aa7d451dafc62b592f5d45fa", + "61f3197c6f0547c4964e2c030df0ea3b", + "b7ed86d35ae746d2849b8e1e18a948e0", + "a09064c441244fc7a333eb92a2e4edd8", + "1478dc488d4f4d17abb26acc069c5ba7", + "eda83c1917d246178eb4c691fb943a77", + "1d955b2ca1834e0bacd41a97d690148d", + "87ad65daa34747daa1e9013faefca302", + "d0ce34a0515643a48ceba993bd89e3bd", + "b525492479004f5f90da96aff44fda79", + "d44f742be39e40e3a4d01facee41213e", + "ade8859ae4804772ab10d9b5abba97de" + ] + }, + "id": "9214cb29", + "outputId": "36baefbe-fbef-4d20-a781-6934bd2d6fbf" + }, + "source": [ + "# @title 2.1 Benchmark Hugging Face Baseline\n", + "\n", + "# Ensure data is cached locally before benchmarking to exclude network effects\n", + "cached_dataset = load_dataset(DATASET_NAME, split=TRAIN_SPLIT)\n", + "cached_test_dataset = load_dataset(DATASET_NAME, split=TEST_SPLIT)\n", + "\n", + "# Free up the cached variable immediately to prevent affecting the Baseline benchmark's result\n", + "del cached_dataset\n", + "del cached_test_dataset\n", + "gc.collect()\n", + "\n", + "def load_hf_baseline():\n", + " \"\"\"\n", + " Loads a subset of the Amazon Polarity dataset using Hugging Face datasets\n", + " for benchmarking purposes.\n", + " \"\"\"\n", + " dataset = load_dataset(DATASET_NAME, split=TRAIN_SPLIT)\n", + " test_dataset = load_dataset(DATASET_NAME, split=TEST_SPLIT)\n", + "\n", + " df_train = dataset.to_pandas()\n", + " df_test = test_dataset.to_pandas()\n", + "\n", + " return df_train['content'], df_train['label'], df_test['content'], df_test['label']\n", + "\n", + "# Run Baseline\n", + "tracker.measure(\n", + " format_name=\"HF Dataset (Arrow)\",\n", + " load_func=load_hf_baseline,\n", + " train_func=train_standard_model\n", + ")" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "README.md: 0.00B [00:00, ?B/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "5a0ac8c1198c4c2daa6a2f42fb999fcc" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "amazon_polarity/train-00000-of-00004.par(…): 0%| | 0.00/260M [00:00, ?B/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "f576a8515b084e95a80e997c17d2315a" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "amazon_polarity/train-00001-of-00004.par(…): 0%| | 0.00/258M [00:00, ?B/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "31d696b7927f42ec9576f09be2c3d98b" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "amazon_polarity/train-00002-of-00004.par(…): 0%| | 0.00/255M [00:00, ?B/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "b61b9fbe801e4c4282808920aadf7371" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "amazon_polarity/train-00003-of-00004.par(…): 0%| | 0.00/254M [00:00, ?B/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "c1b5f033aa3148118570ccc325046c35" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "amazon_polarity/test-00000-of-00001.parq(…): 0%| | 0.00/117M [00:00, ?B/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "f97f673536e94561a7d341f6c69c0e7e" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Generating train split: 0%| | 0/3600000 [00:00, ? examples/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "c4dd07f49a534338a8718d6efc536156" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Generating test split: 0%| | 0/400000 [00:00, ? examples/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "61f3197c6f0547c4964e2c030df0ea3b" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--- Benchmarking: HF Dataset (Arrow) ---\n", + " ⏳ Loading data...\n", + " ✅ Loaded 3,600,000 rows in 6.28s\n", + " ⚙️ Training model...\n", + " ⏱️ Train Time: 174.5190s\n", + " 💾 Mem Delta: 2250.99 MB\n", + "------------------------------\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# 3. Setup SchemaForge & Convert Data\n", + "\n", + "Now we clone SchemaForge, prepare the input JSON data (simulating a raw data ingestion scenario), and use the tool to generate CSV, Parquet, and Feather files." + ], + "metadata": { + "id": "QJkmkLVMz2HQ" + } + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 154, + "referenced_widgets": [ + "d7edd9e1cb5c49d7a628d608df6ca9aa", + "c2413503263844ae87c509f01040e239", + "b5350b54decd4a8b8c3fa55af35aa6a5", + "9a31c5f1a7884b5d85b84be07ab3aed1", + "6924897a1a1043abb4c2314112822ec6", + "8c537726c7034e75b5a2906b5f23ff8a", + "ae695dc9063048d0979e462d4988d77f", + "2bca204eb3e241c29e90ba15e61227af", + "4725888641e8425c8db61ef836d5f283", + "66f994463d2a41ec890e90fe7ee1e6fa", + "e64efef822d846b7b5ee9b27ed780d34", + "79b9e2fee0f34577a0f2a566d848e8e7", + "4cf4537065b7434a87f4284c26b7c661", + "7a2fa8ad072442c287e7cf33831a3c10", + "ed89f367a6a24e3b9b5af45035d3cd84", + "df97681806be49598f393f6c0c7b43ec", + "dc733b574c00437bbe91d282d51af34d", + "d2e57805352d44d7bf42aba3ffdd1b89", + "7f14e5655eed4bb4a5f40e5d381517b6", + "9a8cf916e4fc4a229f91462bfd038907", + "43ba4e2bc78942509f1ec955c09e173a", + "1897b3670ae74d878501f85890576230" + ] + }, + "id": "50649dfc", + "outputId": "08ba526e-13b0-453b-826e-899cf8a1eff7" + }, + "source": [ + "# @title 3.1 Prepare Input Data for SchemaForge\n", + "# SchemaForge requires raw JSON files as input.\n", + "# We will export the HF dataset to a raw JSON format to simulate the \"Chaos\" state.\n", + "\n", + "print(\"📥 Loading raw dataset for export (this may take a minute)... \")\n", + "# Use the predefined constants for dataset name and splits\n", + "raw_data = load_dataset(DATASET_NAME, split=TRAIN_SPLIT)\n", + "test_data = load_dataset(DATASET_NAME, split=TEST_SPLIT)\n", + "\n", + "print(\"💾 Saving training data to JSON Lines (NDJSON)...\")\n", + "# Using lines=True (NDJSON) is much more memory efficient for 1M+ rows\n", + "raw_data.to_json(os.path.join(DATA_DIR, \"train_data.json\"), orient=\"records\", lines=True)\n", + "\n", + "print(\"💾 Saving test data to JSON Lines (NDJSON)...\")\n", + "test_data.to_json(os.path.join(DATA_DIR, \"test_data.json\"), orient=\"records\", lines=True)\n", + "\n", + "# Free up memory immediately\n", + "del raw_data\n", + "del test_data\n", + "gc.collect()\n", + "\n", + "print(f\"✅ Data exported to JSON in '{DATA_DIR}'\")" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "📥 Loading raw dataset for export (this may take a minute)... \n", + "💾 Saving training data to JSON Lines (NDJSON)...\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Creating json from Arrow format: 0%| | 0/3600 [00:00, ?ba/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "d7edd9e1cb5c49d7a628d608df6ca9aa" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "💾 Saving test data to JSON Lines (NDJSON)...\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Creating json from Arrow format: 0%| | 0/400 [00:00, ?ba/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "79b9e2fee0f34577a0f2a566d848e8e7" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "✅ Data exported to JSON in 'schemaforge_bench/data'\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# @title 3.2 Clone SchemaForge\n", + "!git clone https://github.com/Syntax-Error-1337/SchemaForge.git schemaforge_tool --depth 1\n", + "!cd schemaforge_tool && pip install -r requirements.txt" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "aQcp-jxNz782", + "outputId": "b82d921b-024f-4b85-b27d-fbb2a3d8b1df" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Cloning into 'schemaforge_tool'...\n", + "remote: Enumerating objects: 46, done.\u001b[K\n", + "remote: Counting objects: 100% (46/46), done.\u001b[K\n", + "remote: Compressing objects: 100% (39/39), done.\u001b[K\n", + "remote: Total 46 (delta 5), reused 30 (delta 4), pack-reused 0 (from 0)\u001b[K\n", + "Receiving objects: 100% (46/46), 41.65 KiB | 2.78 MiB/s, done.\n", + "Resolving deltas: 100% (5/5), done.\n", + "Requirement already satisfied: pandas>=2.0.0 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 1)) (2.2.2)\n", + "Requirement already satisfied: pyarrow>=12.0.0 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 2)) (18.1.0)\n", + "Requirement already satisfied: pytest>=7.0.0 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 3)) (8.4.2)\n", + "Requirement already satisfied: ijson>=3.2.0 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 4)) (3.4.0.post0)\n", + "Collecting json5>=0.9.0 (from -r requirements.txt (line 5))\n", + " Downloading json5-0.12.1-py3-none-any.whl.metadata (36 kB)\n", + "Requirement already satisfied: fastavro>=1.8.0 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 6)) (1.12.1)\n", + "Requirement already satisfied: psutil>=5.9.0 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 7)) (5.9.5)\n", + "Requirement already satisfied: numpy>=1.26.0 in /usr/local/lib/python3.12/dist-packages (from pandas>=2.0.0->-r requirements.txt (line 1)) (2.0.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.12/dist-packages (from pandas>=2.0.0->-r requirements.txt (line 1)) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas>=2.0.0->-r requirements.txt (line 1)) (2025.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas>=2.0.0->-r requirements.txt (line 1)) (2025.2)\n", + "Requirement already satisfied: iniconfig>=1 in /usr/local/lib/python3.12/dist-packages (from pytest>=7.0.0->-r requirements.txt (line 3)) (2.3.0)\n", + "Requirement already satisfied: packaging>=20 in /usr/local/lib/python3.12/dist-packages (from pytest>=7.0.0->-r requirements.txt (line 3)) (25.0)\n", + "Requirement already satisfied: pluggy<2,>=1.5 in /usr/local/lib/python3.12/dist-packages (from pytest>=7.0.0->-r requirements.txt (line 3)) (1.6.0)\n", + "Requirement already satisfied: pygments>=2.7.2 in /usr/local/lib/python3.12/dist-packages (from pytest>=7.0.0->-r requirements.txt (line 3)) (2.19.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.8.2->pandas>=2.0.0->-r requirements.txt (line 1)) (1.17.0)\n", + "Downloading json5-0.12.1-py3-none-any.whl (36 kB)\n", + "Installing collected packages: json5\n", + "Successfully installed json5-0.12.1\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8ca7948e", + "outputId": "7d191f0a-325b-4199-b0ca-1369152dd19f" + }, + "source": [ + "# @title 3.3 Run SchemaForge Pipeline (Scan & Convert)\n", + "\n", + "# 1. Scan Schemas\n", + "# We run from inside 'SchemaForge', so we point to data using relative paths from schemaforge_tool\n", + "print(\"🔍 Running SchemaForge: scan-schemas...\")\n", + "!cd schemaforge_tool && python -m src.cli scan-schemas \\\n", + " --data-dir ../{DATA_DIR} \\\n", + " --output-report ../{SCHEMA_REPORT_PREFIX}.md\n", + "\n", + "# 2. Convert to CSV\n", + "print(\"🔄 Converting to CSV...\")\n", + "!cd schemaforge_tool && python -m src.cli convert \\\n", + " --format csv \\\n", + " --data-dir ../{DATA_DIR} \\\n", + " --output-dir ../{OUTPUT_DIR}/csv \\\n", + " --schema-report ../{SCHEMA_REPORT_PATH}\n", + "\n", + "# 3. Convert to Parquet\n", + "print(\"🔄 Converting to Parquet...\")\n", + "!cd schemaforge_tool && python -m src.cli convert \\\n", + " --format parquet \\\n", + " --data-dir ../{DATA_DIR} \\\n", + " --output-dir ../{OUTPUT_DIR}/parquet \\\n", + " --schema-report ../{SCHEMA_REPORT_PATH}\n", + "\n", + "# 4. Convert to Feather (Fast I/O)\n", + "print(\"🔄 Converting to Feather...\")\n", + "!cd schemaforge_tool && python -m src.cli convert \\\n", + " --format feather \\\n", + " --data-dir ../{DATA_DIR} \\\n", + " --output-dir ../{OUTPUT_DIR}/feather \\\n", + " --schema-report ../{SCHEMA_REPORT_PATH}\n", + "\n", + "print(f\"\\n✅ Conversion Complete! Formats ready in '{OUTPUT_DIR}/'\")" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "🔍 Running SchemaForge: scan-schemas...\n", + "2025-12-08 18:46:04,545 - __main__ - INFO - Starting schema scan...\n", + "2025-12-08 18:46:04,546 - src.schema_reader.inference - INFO - Found 2 JSON file(s) in ../schemaforge_bench/data\n", + "2025-12-08 18:46:04,572 - src.schema_reader.inference - INFO - Processing file: train_data.json\n", + "2025-12-08 18:46:04,573 - src.schema_reader.inference - INFO - No max_sample_size set. Defaulting to 10000 for performance.\n", + "2025-12-08 18:46:04,574 - src.schema_reader.inference - INFO - Processing file: test_data.json\n", + "2025-12-08 18:46:04,575 - src.schema_reader.inference - INFO - No max_sample_size set. Defaulting to 10000 for performance.\n", + "2025-12-08 18:46:05,067 - src.json_loader - WARNING - Streaming failed for ../schemaforge_bench/data/test_data.json: Extra data: line 2 column 1 (char 569). Falling back to memory load.\n", + "2025-12-08 18:46:10,694 - src.json_loader - WARNING - Streaming failed for ../schemaforge_bench/data/train_data.json: Extra data: line 2 column 1 (char 460). Falling back to memory load.\n", + "2025-12-08 18:46:19,325 - src.schema_reader.inference - INFO - Streaming first 10000 records from test_data.json\n", + "2025-12-08 18:46:19,325 - src.schema_reader.inference - INFO - Analyzing 10000 of 10000 records from test_data.json\n", + "2025-12-08 18:46:20,248 - src.schema_reader.inference - INFO - Successfully inferred schema for test_data.json: 3 fields\n", + "2025-12-08 18:46:58,099 - src.schema_reader.inference - INFO - Streaming first 10000 records from train_data.json\n", + "2025-12-08 18:46:58,102 - src.schema_reader.inference - INFO - Analyzing 10000 of 10000 records from train_data.json\n", + "2025-12-08 18:46:58,320 - src.schema_reader.inference - INFO - Successfully inferred schema for train_data.json: 3 fields\n", + "2025-12-08 18:46:59,479 - __main__ - INFO - Successfully scanned 2 file(s)\n", + "2025-12-08 18:46:59,480 - src.schema_reader.reporting - INFO - Schema report written to ../schemaforge_bench/schema_report.md\n", + "2025-12-08 18:46:59,480 - src.schema_reader.reporting - INFO - Schemas saved to JSON: ../schemaforge_bench/schema_report.json\n", + "2025-12-08 18:46:59,481 - __main__ - INFO - Schema report generated: ../schemaforge_bench/schema_report.md\n", + "🔄 Converting to CSV...\n", + "2025-12-08 18:47:01,008 - __main__ - INFO - Starting conversion to csv...\n", + "2025-12-08 18:47:01,009 - src.converter.core - INFO - Loading schemas from schema report: /content/schemaforge_bench/schema_report.json\n", + "2025-12-08 18:47:01,010 - src.schema_reader.reporting - INFO - Loaded 2 schema(s) from /content/schemaforge_bench/schema_report.json\n", + "2025-12-08 18:47:01,033 - src.converter.csv - INFO - Converting train_data.json to CSV...\n", + "2025-12-08 18:47:01,034 - src.json_loader - INFO - File train_data.json is 1601.5MB. Using streaming for efficiency.\n", + "2025-12-08 18:47:01,035 - src.converter.csv - INFO - Converting test_data.json to CSV...\n", + "2025-12-08 18:47:01,035 - src.json_loader - INFO - File test_data.json is 177.9MB. Using streaming for efficiency.\n", + "2025-12-08 18:47:02,501 - src.json_loader - WARNING - Streaming failed for ../schemaforge_bench/data/test_data.json: Extra data: line 2 column 1 (char 569). Falling back to memory load.\n", + "2025-12-08 18:47:06,406 - src.json_loader - WARNING - Streaming failed for ../schemaforge_bench/data/train_data.json: Extra data: line 2 column 1 (char 460). Falling back to memory load.\n", + "2025-12-08 18:47:22,764 - src.converter.csv - INFO - Successfully converted test_data.json to ../schemaforge_bench/output/csv/test_data.csv\n", + "2025-12-08 18:48:51,356 - src.converter.csv - INFO - Successfully converted train_data.json to ../schemaforge_bench/output/csv/train_data.csv\n", + "2025-12-08 18:48:51,993 - __main__ - INFO - Conversion complete: 2 successful, 0 failed\n", + "🔄 Converting to Parquet...\n", + "2025-12-08 18:48:53,292 - __main__ - INFO - Starting conversion to parquet...\n", + "2025-12-08 18:48:53,292 - src.converter.core - INFO - Loading schemas from schema report: /content/schemaforge_bench/schema_report.json\n", + "2025-12-08 18:48:53,293 - src.schema_reader.reporting - INFO - Loaded 2 schema(s) from /content/schemaforge_bench/schema_report.json\n", + "2025-12-08 18:48:53,313 - src.converter.parquet - INFO - Converting train_data.json to Parquet...\n", + "2025-12-08 18:48:53,314 - src.json_loader - INFO - File train_data.json is 1601.5MB. Using streaming for efficiency.\n", + "2025-12-08 18:48:53,315 - src.converter.parquet - INFO - Converting test_data.json to Parquet...\n", + "2025-12-08 18:48:53,315 - src.json_loader - INFO - File test_data.json is 177.9MB. Using streaming for efficiency.\n", + "2025-12-08 18:48:53,674 - src.json_loader - WARNING - Streaming failed for ../schemaforge_bench/data/test_data.json: Extra data: line 2 column 1 (char 569). Falling back to memory load.\n", + "2025-12-08 18:49:00,067 - src.json_loader - WARNING - Streaming failed for ../schemaforge_bench/data/train_data.json: Extra data: line 2 column 1 (char 460). Falling back to memory load.\n", + "2025-12-08 18:49:04,518 - src.converter.parquet - INFO - Successfully converted test_data.json to ../schemaforge_bench/output/parquet/test_data.parquet\n", + "2025-12-08 18:50:20,543 - src.converter.parquet - INFO - Successfully converted train_data.json to ../schemaforge_bench/output/parquet/train_data.parquet\n", + "2025-12-08 18:50:21,374 - __main__ - INFO - Conversion complete: 2 successful, 0 failed\n", + "🔄 Converting to Feather...\n", + "2025-12-08 18:50:22,837 - __main__ - INFO - Starting conversion to feather...\n", + "2025-12-08 18:50:22,838 - src.converter.core - INFO - Loading schemas from schema report: /content/schemaforge_bench/schema_report.json\n", + "2025-12-08 18:50:22,838 - src.schema_reader.reporting - INFO - Loaded 2 schema(s) from /content/schemaforge_bench/schema_report.json\n", + "2025-12-08 18:50:22,865 - src.converter.feather - INFO - Converting train_data.json to Feather...\n", + "2025-12-08 18:50:22,865 - src.json_loader - INFO - File train_data.json is 1601.5MB. Using streaming for efficiency.\n", + "2025-12-08 18:50:22,867 - src.converter.feather - INFO - Converting test_data.json to Feather...\n", + "2025-12-08 18:50:22,868 - src.json_loader - INFO - File test_data.json is 177.9MB. Using streaming for efficiency.\n", + "2025-12-08 18:50:24,483 - src.json_loader - WARNING - Streaming failed for ../schemaforge_bench/data/test_data.json: Extra data: line 2 column 1 (char 569). Falling back to memory load.\n", + "2025-12-08 18:50:27,295 - src.json_loader - WARNING - Streaming failed for ../schemaforge_bench/data/train_data.json: Extra data: line 2 column 1 (char 460). Falling back to memory load.\n", + "2025-12-08 18:50:35,792 - src.converter.feather - INFO - Successfully converted test_data.json to ../schemaforge_bench/output/feather/test_data.feather\n", + "2025-12-08 18:52:00,644 - src.converter.feather - INFO - Successfully converted train_data.json to ../schemaforge_bench/output/feather/train_data.feather\n", + "2025-12-08 18:52:01,646 - __main__ - INFO - Conversion complete: 2 successful, 0 failed\n", + "\n", + "✅ Conversion Complete! Formats ready in 'schemaforge_bench/output/'\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# 4. Benchmark Converted Formats\n", + "\n", + "Now we benchmark the loading and training efficiency for the formats generated by SchemaForge." + ], + "metadata": { + "id": "2XizdDLVz_Vu" + } + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6c6df480", + "outputId": "356e7070-d503-44a3-fa22-61778175df85" + }, + "source": [ + "# @title 4.1 Benchmark CSV\n", + "\n", + "def load_csv_format():\n", + " # Load Train\n", + " df_train = pd.read_csv(os.path.join(OUTPUT_DIR, \"csv\", \"train_data.csv\"))\n", + " # Load Test\n", + " df_test = pd.read_csv(os.path.join(OUTPUT_DIR, \"csv\", \"test_data.csv\"))\n", + "\n", + " return df_train['content'], df_train['label'], df_test['content'], df_test['label']\n", + "\n", + "tracker.measure(\n", + " format_name=\"CSV (Pandas)\",\n", + " load_func=load_csv_format,\n", + " train_func=train_standard_model\n", + ")" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--- Benchmarking: CSV (Pandas) ---\n", + " ⏳ Loading data...\n", + " ✅ Loaded 3,600,000 rows in 34.78s\n", + " ⚙️ Training model...\n", + " ⏱️ Train Time: 176.3890s\n", + " 💾 Mem Delta: 2298.81 MB\n", + "------------------------------\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bd83abbb", + "outputId": "4f1a5acf-eabc-4353-ad60-4b4ce70b5a1e" + }, + "source": [ + "# @title 4.2 Benchmark Parquet\n", + "\n", + "def load_parquet_format():\n", + " # Load Train\n", + " df_train = pd.read_parquet(os.path.join(OUTPUT_DIR, \"parquet\", \"train_data.parquet\"))\n", + " # Load Test\n", + " df_test = pd.read_parquet(os.path.join(OUTPUT_DIR, \"parquet\", \"test_data.parquet\"))\n", + "\n", + " return df_train['content'], df_train['label'], df_test['content'], df_test['label']\n", + "\n", + "tracker.measure(\n", + " format_name=\"Parquet (PyArrow)\",\n", + " load_func=load_parquet_format,\n", + " train_func=train_standard_model\n", + ")" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--- Benchmarking: Parquet (PyArrow) ---\n", + " ⏳ Loading data...\n", + " ✅ Loaded 3,600,000 rows in 13.88s\n", + " ⚙️ Training model...\n", + " ⏱️ Train Time: 180.0725s\n", + " 💾 Mem Delta: 2028.25 MB\n", + "------------------------------\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "731384f9", + "outputId": "e2dc793e-f0c3-4a70-dc2c-324115088098" + }, + "source": [ + "# @title 4.3 Benchmark Feather\n", + "\n", + "def load_feather_format():\n", + " # Load Train\n", + " df_train = pd.read_feather(os.path.join(OUTPUT_DIR, \"feather\", \"train_data.feather\"))\n", + " # Load Test\n", + " df_test = pd.read_feather(os.path.join(OUTPUT_DIR, \"feather\", \"test_data.feather\"))\n", + "\n", + " return df_train['content'], df_train['label'], df_test['content'], df_test['label']\n", + "\n", + "tracker.measure(\n", + " format_name=\"Feather (Arrow IPC)\",\n", + " load_func=load_feather_format,\n", + " train_func=train_standard_model\n", + ")" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--- Benchmarking: Feather (Arrow IPC) ---\n", + " ⏳ Loading data...\n", + " ✅ Loaded 3,600,000 rows in 9.55s\n", + " ⚙️ Training model...\n", + " ⏱️ Train Time: 175.7252s\n", + " 💾 Mem Delta: 2329.90 MB\n", + "------------------------------\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# 5. Analysis & Visualization\n", + "\n", + "We aggregate the results into a clean DataFrame and visualize the trade-offs between load time, memory usage, and storage efficiency." + ], + "metadata": { + "id": "VCPcR34-0RRY" + } + }, + { + "cell_type": "code", + "source": [ + "# @title 5.1 Results Summary\n", + "results_df = tracker.get_summary()\n", + "\n", + "# Normalize columns for better visualization comparison if needed,\n", + "# but raw values are usually better for technical benchmarks.\n", + "display(results_df.round(4))" + ], + "metadata": { + "id": "OxcT34jU0TEr", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 175 + }, + "outputId": "e775db8f-b2e8-4755-899e-b19a72efbe95" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " Format Load Time (s) Training Time (s) Total Time (s) \\\n", + "0 HF Dataset (Arrow) 6.2827 174.5190 180.8017 \n", + "1 CSV (Pandas) 34.7801 176.3890 211.1692 \n", + "2 Parquet (PyArrow) 13.8773 180.0725 193.9499 \n", + "3 Feather (Arrow IPC) 9.5521 175.7252 185.2773 \n", + "\n", + " Peak Memory Delta (MB) Accuracy F1 Score \n", + "0 2250.9883 0.8397 0.8396 \n", + "1 2298.8125 0.8397 0.8396 \n", + "2 2028.2461 0.8397 0.8396 \n", + "3 2329.9023 0.8397 0.8396 " + ], + "text/html": [ + "\n", + "
| \n", + " | Format | \n", + "Load Time (s) | \n", + "Training Time (s) | \n", + "Total Time (s) | \n", + "Peak Memory Delta (MB) | \n", + "Accuracy | \n", + "F1 Score | \n", + "
|---|---|---|---|---|---|---|---|
| 0 | \n", + "HF Dataset (Arrow) | \n", + "6.2827 | \n", + "174.5190 | \n", + "180.8017 | \n", + "2250.9883 | \n", + "0.8397 | \n", + "0.8396 | \n", + "
| 1 | \n", + "CSV (Pandas) | \n", + "34.7801 | \n", + "176.3890 | \n", + "211.1692 | \n", + "2298.8125 | \n", + "0.8397 | \n", + "0.8396 | \n", + "
| 2 | \n", + "Parquet (PyArrow) | \n", + "13.8773 | \n", + "180.0725 | \n", + "193.9499 | \n", + "2028.2461 | \n", + "0.8397 | \n", + "0.8396 | \n", + "
| 3 | \n", + "Feather (Arrow IPC) | \n", + "9.5521 | \n", + "175.7252 | \n", + "185.2773 | \n", + "2329.9023 | \n", + "0.8397 | \n", + "0.8396 | \n", + "