{ "cells": [ { "cell_type": "markdown", "id": "b4f7b85e", "metadata": {}, "source": [ "# Daily Dose of Data Science\n", "\n", "[You Will NEVER Use Pandas’ Describe Method After Using These Two Libraries](https://www.blog.dailydoseofds.com/p/you-will-never-use-pandas-describe)\n", "\n", "Author: Avi Chawla" ] }, { "cell_type": "markdown", "id": "d7ee5b7b", "metadata": {}, "source": [ "## Install libraries" ] }, { "cell_type": "code", "execution_count": null, "id": "fdfd3364", "metadata": {}, "outputs": [], "source": [ "!pip install polars==0.18.4\n", "!pip install summarytools\n", "!pip install skimpy" ] }, { "cell_type": "markdown", "id": "5f36e00d", "metadata": {}, "source": [ "## Imports" ] }, { "cell_type": "code", "execution_count": 1, "id": "287cf76a", "metadata": {}, "outputs": [], "source": [ "import polars as pl\n", "\n", "import pandas as pd\n", "import seaborn as sns\n", "\n", "from summarytools import dfSummary\n", "from skimpy import skim" ] }, { "cell_type": "markdown", "id": "ae22d16e", "metadata": {}, "source": [ "## Dataset" ] }, { "cell_type": "code", "execution_count": 2, "id": "cadfb89b", "metadata": {}, "outputs": [], "source": [ "df_pd = sns.load_dataset('iris')\n", "df_pl = pl.from_pandas(df_pd)" ] }, { "cell_type": "markdown", "id": "720ea10b", "metadata": {}, "source": [ "## Summary Tools" ] }, { "cell_type": "code", "execution_count": 3, "id": "22d5fcaa", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Data Frame Summary
df_pd
Dimensions: 150 x 5
Duplicates: 1
NoVariableStats / ValuesFreqs / (% of Valid)GraphMissing
1sepal_length
[float64]
Mean (sd) : 5.8 (0.8)
min < med < max:
4.3 < 5.8 < 7.9
IQR (CV) : 1.3 (7.1)
35 distinct values0
(0.0%)
2sepal_width
[float64]
Mean (sd) : 3.1 (0.4)
min < med < max:
2.0 < 3.0 < 4.4
IQR (CV) : 0.5 (7.0)
23 distinct values0
(0.0%)
3petal_length
[float64]
Mean (sd) : 3.8 (1.8)
min < med < max:
1.0 < 4.3 < 6.9
IQR (CV) : 3.5 (2.1)
43 distinct values0
(0.0%)
4petal_width
[float64]
Mean (sd) : 1.2 (0.8)
min < med < max:
0.1 < 1.3 < 2.5
IQR (CV) : 1.5 (1.6)
22 distinct values0
(0.0%)
5species
[object]
1. setosa
2. versicolor
3. virginica
50 (33.3%)
50 (33.3%)
50 (33.3%)
0
(0.0%)
\n" ], "text/plain": [ "" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfSummary(df_pd)" ] }, { "cell_type": "markdown", "id": "8282b526", "metadata": {}, "source": [ "## Skimpy" ] }, { "cell_type": "code", "execution_count": 4, "id": "3398425e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
╭──────────────────────────────────────────────── skimpy summary ─────────────────────────────────────────────────╮\n",
       "│          Data Summary                Data Types                                                                 │\n",
       "│ ┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┓ ┏━━━━━━━━━━━━━┳━━━━━━━┓                                                          │\n",
       "│ ┃ dataframe          Values ┃ ┃ Column Type  Count ┃                                                          │\n",
       "│ ┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━┩ ┡━━━━━━━━━━━━━╇━━━━━━━┩                                                          │\n",
       "│ │ Number of rows    │ 150    │ │ float64     │ 4     │                                                          │\n",
       "│ │ Number of columns │ 5      │ │ string      │ 1     │                                                          │\n",
       "│ └───────────────────┴────────┘ └─────────────┴───────┘                                                          │\n",
       "│                                                     number                                                      │\n",
       "│ ┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━┓  │\n",
       "│ ┃ column_name         NA    NA %     mean     sd       p0     p25    p50    p75    p100    hist    ┃  │\n",
       "│ ┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━┩  │\n",
       "│ │ sepal_length         0      0    5.8   0.83  4.3  5.1  5.8  6.4   7.9▃▇▇▇▅▂  │  │\n",
       "│ │ sepal_width          0      0    3.1   0.44    2  2.8    3  3.3   4.4▁▇▇▇▂▁  │  │\n",
       "│ │ petal_length         0      0    3.8    1.8    1  1.6  4.3  5.1   6.9▇ ▂▇▆▂  │  │\n",
       "│ │ petal_width          0      0    1.2   0.76  0.1  0.3  1.3  1.8   2.5▇ ▂▆▅▃  │  │\n",
       "│ └────────────────────┴──────┴─────────┴─────────┴─────────┴───────┴───────┴───────┴───────┴────────┴─────────┘  │\n",
       "│                                                     string                                                      │\n",
       "│ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓  │\n",
       "│ ┃ column_name                NA       NA %        words per row                 total words              ┃  │\n",
       "│ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━┩  │\n",
       "│ │ species                        0         0                           1                     150 │  │\n",
       "│ └───────────────────────────┴─────────┴────────────┴──────────────────────────────┴──────────────────────────┘  │\n",
       "╰────────────────────────────────────────────────────── End ──────────────────────────────────────────────────────╯\n",
       "
\n" ], "text/plain": [ "╭──────────────────────────────────────────────── skimpy summary ─────────────────────────────────────────────────╮\n", "│ \u001b[3m Data Summary \u001b[0m \u001b[3m Data Types \u001b[0m │\n", "│ ┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┓ ┏━━━━━━━━━━━━━┳━━━━━━━┓ │\n", "│ ┃\u001b[1;36m \u001b[0m\u001b[1;36mdataframe \u001b[0m\u001b[1;36m \u001b[0m┃\u001b[1;36m \u001b[0m\u001b[1;36mValues\u001b[0m\u001b[1;36m \u001b[0m┃ ┃\u001b[1;36m \u001b[0m\u001b[1;36mColumn Type\u001b[0m\u001b[1;36m \u001b[0m┃\u001b[1;36m \u001b[0m\u001b[1;36mCount\u001b[0m\u001b[1;36m \u001b[0m┃ │\n", "│ ┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━┩ ┡━━━━━━━━━━━━━╇━━━━━━━┩ │\n", "│ │ Number of rows │ 150 │ │ float64 │ 4 │ │\n", "│ │ Number of columns │ 5 │ │ string │ 1 │ │\n", "│ └───────────────────┴────────┘ └─────────────┴───────┘ │\n", "│ \u001b[3m number \u001b[0m │\n", "│ ┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━┓ │\n", "│ ┃\u001b[1m \u001b[0m\u001b[1mcolumn_name \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA % \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mmean \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1msd \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp0 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp25 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp50 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp75 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp100 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mhist \u001b[0m\u001b[1m \u001b[0m┃ │\n", "│ ┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━┩ │\n", "│ │ \u001b[38;5;141msepal_length \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 5.8\u001b[0m │ \u001b[36m 0.83\u001b[0m │ \u001b[36m 4.3\u001b[0m │ \u001b[36m 5.1\u001b[0m │ \u001b[36m 5.8\u001b[0m │ \u001b[36m 6.4\u001b[0m │ \u001b[36m 7.9\u001b[0m │ \u001b[32m▃▇▇▇▅▂ \u001b[0m │ │\n", "│ │ \u001b[38;5;141msepal_width \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 3.1\u001b[0m │ \u001b[36m 0.44\u001b[0m │ \u001b[36m 2\u001b[0m │ \u001b[36m 2.8\u001b[0m │ \u001b[36m 3\u001b[0m │ \u001b[36m 3.3\u001b[0m │ \u001b[36m 4.4\u001b[0m │ \u001b[32m▁▇▇▇▂▁ \u001b[0m │ │\n", "│ │ \u001b[38;5;141mpetal_length \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 3.8\u001b[0m │ \u001b[36m 1.8\u001b[0m │ \u001b[36m 1\u001b[0m │ \u001b[36m 1.6\u001b[0m │ \u001b[36m 4.3\u001b[0m │ \u001b[36m 5.1\u001b[0m │ \u001b[36m 6.9\u001b[0m │ \u001b[32m▇ ▂▇▆▂ \u001b[0m │ │\n", "│ │ \u001b[38;5;141mpetal_width \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 1.2\u001b[0m │ \u001b[36m 0.76\u001b[0m │ \u001b[36m 0.1\u001b[0m │ \u001b[36m 0.3\u001b[0m │ \u001b[36m 1.3\u001b[0m │ \u001b[36m 1.8\u001b[0m │ \u001b[36m 2.5\u001b[0m │ \u001b[32m▇ ▂▆▅▃ \u001b[0m │ │\n", "│ └────────────────────┴──────┴─────────┴─────────┴─────────┴───────┴───────┴───────┴───────┴────────┴─────────┘ │\n", "│ \u001b[3m string \u001b[0m │\n", "│ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓ │\n", "│ ┃\u001b[1m \u001b[0m\u001b[1mcolumn_name \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA % \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mwords per row \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mtotal words \u001b[0m\u001b[1m \u001b[0m┃ │\n", "│ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │\n", "│ │ \u001b[38;5;141mspecies \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 1\u001b[0m │ \u001b[36m 150\u001b[0m │ │\n", "│ └───────────────────────────┴─────────┴────────────┴──────────────────────────────┴──────────────────────────┘ │\n", "╰────────────────────────────────────────────────────── End ──────────────────────────────────────────────────────╯\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "skim(df_pd)" ] }, { "cell_type": "code", "execution_count": 5, "id": "8c1ca247", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
╭──────────────────────────────────────────────── skimpy summary ─────────────────────────────────────────────────╮\n",
       "│          Data Summary                Data Types                                                                 │\n",
       "│ ┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┓ ┏━━━━━━━━━━━━━┳━━━━━━━┓                                                          │\n",
       "│ ┃ dataframe          Values ┃ ┃ Column Type  Count ┃                                                          │\n",
       "│ ┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━┩ ┡━━━━━━━━━━━━━╇━━━━━━━┩                                                          │\n",
       "│ │ Number of rows    │ 150    │ │ float64     │ 4     │                                                          │\n",
       "│ │ Number of columns │ 5      │ │ string      │ 1     │                                                          │\n",
       "│ └───────────────────┴────────┘ └─────────────┴───────┘                                                          │\n",
       "│                                                     number                                                      │\n",
       "│ ┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━┓  │\n",
       "│ ┃ column_name         NA    NA %     mean     sd       p0     p25    p50    p75    p100    hist    ┃  │\n",
       "│ ┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━┩  │\n",
       "│ │ sepal_length         0      0    5.8   0.83  4.3  5.1  5.8  6.4   7.9▃▇▇▇▅▂  │  │\n",
       "│ │ sepal_width          0      0    3.1   0.44    2  2.8    3  3.3   4.4▁▇▇▇▂▁  │  │\n",
       "│ │ petal_length         0      0    3.8    1.8    1  1.6  4.3  5.1   6.9▇ ▂▇▆▂  │  │\n",
       "│ │ petal_width          0      0    1.2   0.76  0.1  0.3  1.3  1.8   2.5▇ ▂▆▅▃  │  │\n",
       "│ └────────────────────┴──────┴─────────┴─────────┴─────────┴───────┴───────┴───────┴───────┴────────┴─────────┘  │\n",
       "│                                                     string                                                      │\n",
       "│ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓  │\n",
       "│ ┃ column_name                NA       NA %        words per row                 total words              ┃  │\n",
       "│ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━┩  │\n",
       "│ │ species                        0         0                           1                     150 │  │\n",
       "│ └───────────────────────────┴─────────┴────────────┴──────────────────────────────┴──────────────────────────┘  │\n",
       "╰────────────────────────────────────────────────────── End ──────────────────────────────────────────────────────╯\n",
       "
\n" ], "text/plain": [ "╭──────────────────────────────────────────────── skimpy summary ─────────────────────────────────────────────────╮\n", "│ \u001b[3m Data Summary \u001b[0m \u001b[3m Data Types \u001b[0m │\n", "│ ┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┓ ┏━━━━━━━━━━━━━┳━━━━━━━┓ │\n", "│ ┃\u001b[1;36m \u001b[0m\u001b[1;36mdataframe \u001b[0m\u001b[1;36m \u001b[0m┃\u001b[1;36m \u001b[0m\u001b[1;36mValues\u001b[0m\u001b[1;36m \u001b[0m┃ ┃\u001b[1;36m \u001b[0m\u001b[1;36mColumn Type\u001b[0m\u001b[1;36m \u001b[0m┃\u001b[1;36m \u001b[0m\u001b[1;36mCount\u001b[0m\u001b[1;36m \u001b[0m┃ │\n", "│ ┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━┩ ┡━━━━━━━━━━━━━╇━━━━━━━┩ │\n", "│ │ Number of rows │ 150 │ │ float64 │ 4 │ │\n", "│ │ Number of columns │ 5 │ │ string │ 1 │ │\n", "│ └───────────────────┴────────┘ └─────────────┴───────┘ │\n", "│ \u001b[3m number \u001b[0m │\n", "│ ┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━┓ │\n", "│ ┃\u001b[1m \u001b[0m\u001b[1mcolumn_name \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA % \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mmean \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1msd \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp0 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp25 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp50 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp75 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp100 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mhist \u001b[0m\u001b[1m \u001b[0m┃ │\n", "│ ┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━┩ │\n", "│ │ \u001b[38;5;141msepal_length \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 5.8\u001b[0m │ \u001b[36m 0.83\u001b[0m │ \u001b[36m 4.3\u001b[0m │ \u001b[36m 5.1\u001b[0m │ \u001b[36m 5.8\u001b[0m │ \u001b[36m 6.4\u001b[0m │ \u001b[36m 7.9\u001b[0m │ \u001b[32m▃▇▇▇▅▂ \u001b[0m │ │\n", "│ │ \u001b[38;5;141msepal_width \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 3.1\u001b[0m │ \u001b[36m 0.44\u001b[0m │ \u001b[36m 2\u001b[0m │ \u001b[36m 2.8\u001b[0m │ \u001b[36m 3\u001b[0m │ \u001b[36m 3.3\u001b[0m │ \u001b[36m 4.4\u001b[0m │ \u001b[32m▁▇▇▇▂▁ \u001b[0m │ │\n", "│ │ \u001b[38;5;141mpetal_length \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 3.8\u001b[0m │ \u001b[36m 1.8\u001b[0m │ \u001b[36m 1\u001b[0m │ \u001b[36m 1.6\u001b[0m │ \u001b[36m 4.3\u001b[0m │ \u001b[36m 5.1\u001b[0m │ \u001b[36m 6.9\u001b[0m │ \u001b[32m▇ ▂▇▆▂ \u001b[0m │ │\n", "│ │ \u001b[38;5;141mpetal_width \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 1.2\u001b[0m │ \u001b[36m 0.76\u001b[0m │ \u001b[36m 0.1\u001b[0m │ \u001b[36m 0.3\u001b[0m │ \u001b[36m 1.3\u001b[0m │ \u001b[36m 1.8\u001b[0m │ \u001b[36m 2.5\u001b[0m │ \u001b[32m▇ ▂▆▅▃ \u001b[0m │ │\n", "│ └────────────────────┴──────┴─────────┴─────────┴─────────┴───────┴───────┴───────┴───────┴────────┴─────────┘ │\n", "│ \u001b[3m string \u001b[0m │\n", "│ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓ │\n", "│ ┃\u001b[1m \u001b[0m\u001b[1mcolumn_name \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA % \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mwords per row \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mtotal words \u001b[0m\u001b[1m \u001b[0m┃ │\n", "│ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │\n", "│ │ \u001b[38;5;141mspecies \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 1\u001b[0m │ \u001b[36m 150\u001b[0m │ │\n", "│ └───────────────────────────┴─────────┴────────────┴──────────────────────────────┴──────────────────────────┘ │\n", "╰────────────────────────────────────────────────────── End ──────────────────────────────────────────────────────╯\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "skim(df_pl) # works with Polars DataFrame" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 5 }