{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "d5d0ea64",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from IPython.core.display import display, HTML, Image\n",
"display(HTML(\"\"))\n",
"%config IPCompleter.use_jedi=False"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "403c4b8a",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from IPython.display import Markdown, display, HTML, IFrame\n",
"from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator\n",
"import base64"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "1c48706a",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('./adult.csv')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "b512f166",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 32561 entries, 0 to 32560\n",
"Data columns (total 15 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 age 32561 non-null int64 \n",
" 1 workclass 32561 non-null object\n",
" 2 fnlwgt 32561 non-null int64 \n",
" 3 education 32561 non-null object\n",
" 4 education.num 32561 non-null int64 \n",
" 5 marital.status 32561 non-null object\n",
" 6 occupation 32561 non-null object\n",
" 7 relationship 32561 non-null object\n",
" 8 race 32561 non-null object\n",
" 9 sex 32561 non-null object\n",
" 10 capital.gain 32561 non-null int64 \n",
" 11 capital.loss 32561 non-null int64 \n",
" 12 hours.per.week 32561 non-null int64 \n",
" 13 native.country 32561 non-null object\n",
" 14 income 32561 non-null object\n",
"dtypes: int64(6), object(9)\n",
"memory usage: 3.7+ MB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "fce8e9f4",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "string indices must be integers",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/tmp/ipykernel_28/1621212634.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mproto\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGenericFeatureStatisticsGenerator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mProtoFromDataFrames\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mprotostr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbase64\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mb64encode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mproto\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSerializeToString\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"utf-8\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m HTML_TEMPLATE = \"\"\"\n\u001b[1;32m 4\u001b[0m \u001b[0;34m<\u001b[0m\u001b[0mscript\u001b[0m \u001b[0msrc\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js\"\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;34m<\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0mscript\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m<\u001b[0m\u001b[0mlink\u001b[0m \u001b[0mrel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"import\"\u001b[0m \u001b[0mhref\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html\"\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.9/site-packages/facets_overview/base_generic_feature_statistics_generator.py\u001b[0m in \u001b[0;36mProtoFromDataFrames\u001b[0;34m(self, dataframes, histogram_categorical_levels_count)\u001b[0m\n\u001b[1;32m 49\u001b[0m \u001b[0mdatasets\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mdataframe\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdataframes\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 51\u001b[0;31m \u001b[0mtable\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdataframe\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'table'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 52\u001b[0m \u001b[0mtable_entries\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 53\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mcol\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtable\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mTypeError\u001b[0m: string indices must be integers"
]
}
],
"source": [
"proto = GenericFeatureStatisticsGenerator().ProtoFromDataFrames(df)\n",
"protostr = base64.b64encode(proto.SerializeToString()).decode(\"utf-8\")\n",
"HTML_TEMPLATE = \"\"\"\n",
" \n",
" \n",
" \n",
" \"\"\"\n",
"html_str = HTML_TEMPLATE.format(protostr=protostr)\n",
"with open(\"index.html\",'w') as fo:\n",
" fo.write(html_str)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c0a817dc",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}