{ "cells": [ { "cell_type": "markdown", "id": "b4f7b85e", "metadata": {}, "source": [ "# Daily Dose of Data Science\n", "\n", "[You Will NEVER Use Pandas’ Describe Method After Using These Two Libraries](https://www.blog.dailydoseofds.com/p/you-will-never-use-pandas-describe)\n", "\n", "Author: Avi Chawla" ] }, { "cell_type": "markdown", "id": "d7ee5b7b", "metadata": {}, "source": [ "## Install libraries" ] }, { "cell_type": "code", "execution_count": null, "id": "fdfd3364", "metadata": {}, "outputs": [], "source": [ "!pip install polars==0.18.4\n", "!pip install summarytools\n", "!pip install skimpy" ] }, { "cell_type": "markdown", "id": "5f36e00d", "metadata": {}, "source": [ "## Imports" ] }, { "cell_type": "code", "execution_count": 1, "id": "287cf76a", "metadata": {}, "outputs": [], "source": [ "import polars as pl\n", "\n", "import pandas as pd\n", "import seaborn as sns\n", "\n", "from summarytools import dfSummary\n", "from skimpy import skim" ] }, { "cell_type": "markdown", "id": "ae22d16e", "metadata": {}, "source": [ "## Dataset" ] }, { "cell_type": "code", "execution_count": 2, "id": "cadfb89b", "metadata": {}, "outputs": [], "source": [ "df_pd = sns.load_dataset('iris')\n", "df_pl = pl.from_pandas(df_pd)" ] }, { "cell_type": "markdown", "id": "720ea10b", "metadata": {}, "source": [ "## Summary Tools" ] }, { "cell_type": "code", "execution_count": 3, "id": "22d5fcaa", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
No | \n", "Variable | \n", "Stats / Values | \n", "Freqs / (% of Valid) | \n", "Graph | \n", "Missing | \n", "
---|---|---|---|---|---|
1 | \n", "sepal_length [float64] | \n",
" Mean (sd) : 5.8 (0.8) min < med < max: 4.3 < 5.8 < 7.9 IQR (CV) : 1.3 (7.1) | \n",
" 35 distinct values | \n", "0 (0.0%) | \n",
" |
2 | \n", "sepal_width [float64] | \n",
" Mean (sd) : 3.1 (0.4) min < med < max: 2.0 < 3.0 < 4.4 IQR (CV) : 0.5 (7.0) | \n",
" 23 distinct values | \n", "0 (0.0%) | \n",
" |
3 | \n", "petal_length [float64] | \n",
" Mean (sd) : 3.8 (1.8) min < med < max: 1.0 < 4.3 < 6.9 IQR (CV) : 3.5 (2.1) | \n",
" 43 distinct values | \n", "0 (0.0%) | \n",
" |
4 | \n", "petal_width [float64] | \n",
" Mean (sd) : 1.2 (0.8) min < med < max: 0.1 < 1.3 < 2.5 IQR (CV) : 1.5 (1.6) | \n",
" 22 distinct values | \n", "0 (0.0%) | \n",
" |
5 | \n", "species [object] | \n",
" 1. setosa 2. versicolor 3. virginica | \n",
" 50 (33.3%) 50 (33.3%) 50 (33.3%) | \n",
" 0 (0.0%) | \n",
"
╭──────────────────────────────────────────────── skimpy summary ─────────────────────────────────────────────────╮\n", "│ Data Summary Data Types │\n", "│ ┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┓ ┏━━━━━━━━━━━━━┳━━━━━━━┓ │\n", "│ ┃ dataframe ┃ Values ┃ ┃ Column Type ┃ Count ┃ │\n", "│ ┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━┩ ┡━━━━━━━━━━━━━╇━━━━━━━┩ │\n", "│ │ Number of rows │ 150 │ │ float64 │ 4 │ │\n", "│ │ Number of columns │ 5 │ │ string │ 1 │ │\n", "│ └───────────────────┴────────┘ └─────────────┴───────┘ │\n", "│ number │\n", "│ ┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━┓ │\n", "│ ┃ column_name ┃ NA ┃ NA % ┃ mean ┃ sd ┃ p0 ┃ p25 ┃ p50 ┃ p75 ┃ p100 ┃ hist ┃ │\n", "│ ┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━┩ │\n", "│ │ sepal_length │ 0 │ 0 │ 5.8 │ 0.83 │ 4.3 │ 5.1 │ 5.8 │ 6.4 │ 7.9 │ ▃▇▇▇▅▂ │ │\n", "│ │ sepal_width │ 0 │ 0 │ 3.1 │ 0.44 │ 2 │ 2.8 │ 3 │ 3.3 │ 4.4 │ ▁▇▇▇▂▁ │ │\n", "│ │ petal_length │ 0 │ 0 │ 3.8 │ 1.8 │ 1 │ 1.6 │ 4.3 │ 5.1 │ 6.9 │ ▇ ▂▇▆▂ │ │\n", "│ │ petal_width │ 0 │ 0 │ 1.2 │ 0.76 │ 0.1 │ 0.3 │ 1.3 │ 1.8 │ 2.5 │ ▇ ▂▆▅▃ │ │\n", "│ └────────────────────┴──────┴─────────┴─────────┴─────────┴───────┴───────┴───────┴───────┴────────┴─────────┘ │\n", "│ string │\n", "│ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓ │\n", "│ ┃ column_name ┃ NA ┃ NA % ┃ words per row ┃ total words ┃ │\n", "│ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │\n", "│ │ species │ 0 │ 0 │ 1 │ 150 │ │\n", "│ └───────────────────────────┴─────────┴────────────┴──────────────────────────────┴──────────────────────────┘ │\n", "╰────────────────────────────────────────────────────── End ──────────────────────────────────────────────────────╯\n", "\n" ], "text/plain": [ "╭──────────────────────────────────────────────── skimpy summary ─────────────────────────────────────────────────╮\n", "│ \u001b[3m Data Summary \u001b[0m \u001b[3m Data Types \u001b[0m │\n", "│ ┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┓ ┏━━━━━━━━━━━━━┳━━━━━━━┓ │\n", "│ ┃\u001b[1;36m \u001b[0m\u001b[1;36mdataframe \u001b[0m\u001b[1;36m \u001b[0m┃\u001b[1;36m \u001b[0m\u001b[1;36mValues\u001b[0m\u001b[1;36m \u001b[0m┃ ┃\u001b[1;36m \u001b[0m\u001b[1;36mColumn Type\u001b[0m\u001b[1;36m \u001b[0m┃\u001b[1;36m \u001b[0m\u001b[1;36mCount\u001b[0m\u001b[1;36m \u001b[0m┃ │\n", "│ ┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━┩ ┡━━━━━━━━━━━━━╇━━━━━━━┩ │\n", "│ │ Number of rows │ 150 │ │ float64 │ 4 │ │\n", "│ │ Number of columns │ 5 │ │ string │ 1 │ │\n", "│ └───────────────────┴────────┘ └─────────────┴───────┘ │\n", "│ \u001b[3m number \u001b[0m │\n", "│ ┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━┓ │\n", "│ ┃\u001b[1m \u001b[0m\u001b[1mcolumn_name \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA % \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mmean \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1msd \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp0 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp25 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp50 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp75 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp100 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mhist \u001b[0m\u001b[1m \u001b[0m┃ │\n", "│ ┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━┩ │\n", "│ │ \u001b[38;5;141msepal_length \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 5.8\u001b[0m │ \u001b[36m 0.83\u001b[0m │ \u001b[36m 4.3\u001b[0m │ \u001b[36m 5.1\u001b[0m │ \u001b[36m 5.8\u001b[0m │ \u001b[36m 6.4\u001b[0m │ \u001b[36m 7.9\u001b[0m │ \u001b[32m▃▇▇▇▅▂ \u001b[0m │ │\n", "│ │ \u001b[38;5;141msepal_width \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 3.1\u001b[0m │ \u001b[36m 0.44\u001b[0m │ \u001b[36m 2\u001b[0m │ \u001b[36m 2.8\u001b[0m │ \u001b[36m 3\u001b[0m │ \u001b[36m 3.3\u001b[0m │ \u001b[36m 4.4\u001b[0m │ \u001b[32m▁▇▇▇▂▁ \u001b[0m │ │\n", "│ │ \u001b[38;5;141mpetal_length \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 3.8\u001b[0m │ \u001b[36m 1.8\u001b[0m │ \u001b[36m 1\u001b[0m │ \u001b[36m 1.6\u001b[0m │ \u001b[36m 4.3\u001b[0m │ \u001b[36m 5.1\u001b[0m │ \u001b[36m 6.9\u001b[0m │ \u001b[32m▇ ▂▇▆▂ \u001b[0m │ │\n", "│ │ \u001b[38;5;141mpetal_width \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 1.2\u001b[0m │ \u001b[36m 0.76\u001b[0m │ \u001b[36m 0.1\u001b[0m │ \u001b[36m 0.3\u001b[0m │ \u001b[36m 1.3\u001b[0m │ \u001b[36m 1.8\u001b[0m │ \u001b[36m 2.5\u001b[0m │ \u001b[32m▇ ▂▆▅▃ \u001b[0m │ │\n", "│ └────────────────────┴──────┴─────────┴─────────┴─────────┴───────┴───────┴───────┴───────┴────────┴─────────┘ │\n", "│ \u001b[3m string \u001b[0m │\n", "│ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓ │\n", "│ ┃\u001b[1m \u001b[0m\u001b[1mcolumn_name \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA % \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mwords per row \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mtotal words \u001b[0m\u001b[1m \u001b[0m┃ │\n", "│ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │\n", "│ │ \u001b[38;5;141mspecies \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 1\u001b[0m │ \u001b[36m 150\u001b[0m │ │\n", "│ └───────────────────────────┴─────────┴────────────┴──────────────────────────────┴──────────────────────────┘ │\n", "╰────────────────────────────────────────────────────── End ──────────────────────────────────────────────────────╯\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "skim(df_pd)" ] }, { "cell_type": "code", "execution_count": 5, "id": "8c1ca247", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
╭──────────────────────────────────────────────── skimpy summary ─────────────────────────────────────────────────╮\n", "│ Data Summary Data Types │\n", "│ ┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┓ ┏━━━━━━━━━━━━━┳━━━━━━━┓ │\n", "│ ┃ dataframe ┃ Values ┃ ┃ Column Type ┃ Count ┃ │\n", "│ ┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━┩ ┡━━━━━━━━━━━━━╇━━━━━━━┩ │\n", "│ │ Number of rows │ 150 │ │ float64 │ 4 │ │\n", "│ │ Number of columns │ 5 │ │ string │ 1 │ │\n", "│ └───────────────────┴────────┘ └─────────────┴───────┘ │\n", "│ number │\n", "│ ┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━┓ │\n", "│ ┃ column_name ┃ NA ┃ NA % ┃ mean ┃ sd ┃ p0 ┃ p25 ┃ p50 ┃ p75 ┃ p100 ┃ hist ┃ │\n", "│ ┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━┩ │\n", "│ │ sepal_length │ 0 │ 0 │ 5.8 │ 0.83 │ 4.3 │ 5.1 │ 5.8 │ 6.4 │ 7.9 │ ▃▇▇▇▅▂ │ │\n", "│ │ sepal_width │ 0 │ 0 │ 3.1 │ 0.44 │ 2 │ 2.8 │ 3 │ 3.3 │ 4.4 │ ▁▇▇▇▂▁ │ │\n", "│ │ petal_length │ 0 │ 0 │ 3.8 │ 1.8 │ 1 │ 1.6 │ 4.3 │ 5.1 │ 6.9 │ ▇ ▂▇▆▂ │ │\n", "│ │ petal_width │ 0 │ 0 │ 1.2 │ 0.76 │ 0.1 │ 0.3 │ 1.3 │ 1.8 │ 2.5 │ ▇ ▂▆▅▃ │ │\n", "│ └────────────────────┴──────┴─────────┴─────────┴─────────┴───────┴───────┴───────┴───────┴────────┴─────────┘ │\n", "│ string │\n", "│ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓ │\n", "│ ┃ column_name ┃ NA ┃ NA % ┃ words per row ┃ total words ┃ │\n", "│ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │\n", "│ │ species │ 0 │ 0 │ 1 │ 150 │ │\n", "│ └───────────────────────────┴─────────┴────────────┴──────────────────────────────┴──────────────────────────┘ │\n", "╰────────────────────────────────────────────────────── End ──────────────────────────────────────────────────────╯\n", "\n" ], "text/plain": [ "╭──────────────────────────────────────────────── skimpy summary ─────────────────────────────────────────────────╮\n", "│ \u001b[3m Data Summary \u001b[0m \u001b[3m Data Types \u001b[0m │\n", "│ ┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┓ ┏━━━━━━━━━━━━━┳━━━━━━━┓ │\n", "│ ┃\u001b[1;36m \u001b[0m\u001b[1;36mdataframe \u001b[0m\u001b[1;36m \u001b[0m┃\u001b[1;36m \u001b[0m\u001b[1;36mValues\u001b[0m\u001b[1;36m \u001b[0m┃ ┃\u001b[1;36m \u001b[0m\u001b[1;36mColumn Type\u001b[0m\u001b[1;36m \u001b[0m┃\u001b[1;36m \u001b[0m\u001b[1;36mCount\u001b[0m\u001b[1;36m \u001b[0m┃ │\n", "│ ┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━┩ ┡━━━━━━━━━━━━━╇━━━━━━━┩ │\n", "│ │ Number of rows │ 150 │ │ float64 │ 4 │ │\n", "│ │ Number of columns │ 5 │ │ string │ 1 │ │\n", "│ └───────────────────┴────────┘ └─────────────┴───────┘ │\n", "│ \u001b[3m number \u001b[0m │\n", "│ ┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━┓ │\n", "│ ┃\u001b[1m \u001b[0m\u001b[1mcolumn_name \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA % \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mmean \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1msd \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp0 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp25 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp50 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp75 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp100 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mhist \u001b[0m\u001b[1m \u001b[0m┃ │\n", "│ ┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━┩ │\n", "│ │ \u001b[38;5;141msepal_length \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 5.8\u001b[0m │ \u001b[36m 0.83\u001b[0m │ \u001b[36m 4.3\u001b[0m │ \u001b[36m 5.1\u001b[0m │ \u001b[36m 5.8\u001b[0m │ \u001b[36m 6.4\u001b[0m │ \u001b[36m 7.9\u001b[0m │ \u001b[32m▃▇▇▇▅▂ \u001b[0m │ │\n", "│ │ \u001b[38;5;141msepal_width \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 3.1\u001b[0m │ \u001b[36m 0.44\u001b[0m │ \u001b[36m 2\u001b[0m │ \u001b[36m 2.8\u001b[0m │ \u001b[36m 3\u001b[0m │ \u001b[36m 3.3\u001b[0m │ \u001b[36m 4.4\u001b[0m │ \u001b[32m▁▇▇▇▂▁ \u001b[0m │ │\n", "│ │ \u001b[38;5;141mpetal_length \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 3.8\u001b[0m │ \u001b[36m 1.8\u001b[0m │ \u001b[36m 1\u001b[0m │ \u001b[36m 1.6\u001b[0m │ \u001b[36m 4.3\u001b[0m │ \u001b[36m 5.1\u001b[0m │ \u001b[36m 6.9\u001b[0m │ \u001b[32m▇ ▂▇▆▂ \u001b[0m │ │\n", "│ │ \u001b[38;5;141mpetal_width \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 1.2\u001b[0m │ \u001b[36m 0.76\u001b[0m │ \u001b[36m 0.1\u001b[0m │ \u001b[36m 0.3\u001b[0m │ \u001b[36m 1.3\u001b[0m │ \u001b[36m 1.8\u001b[0m │ \u001b[36m 2.5\u001b[0m │ \u001b[32m▇ ▂▆▅▃ \u001b[0m │ │\n", "│ └────────────────────┴──────┴─────────┴─────────┴─────────┴───────┴───────┴───────┴───────┴────────┴─────────┘ │\n", "│ \u001b[3m string \u001b[0m │\n", "│ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓ │\n", "│ ┃\u001b[1m \u001b[0m\u001b[1mcolumn_name \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA % \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mwords per row \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mtotal words \u001b[0m\u001b[1m \u001b[0m┃ │\n", "│ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │\n", "│ │ \u001b[38;5;141mspecies \u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 0\u001b[0m │ \u001b[36m 1\u001b[0m │ \u001b[36m 150\u001b[0m │ │\n", "│ └───────────────────────────┴─────────┴────────────┴──────────────────────────────┴──────────────────────────┘ │\n", "╰────────────────────────────────────────────────────── End ──────────────────────────────────────────────────────╯\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "skim(df_pl) # works with Polars DataFrame" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 5 }