File size: 4,437 Bytes
41c4cf9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 |
# Title of the document
title: "Pandas Profiling Report"
# Metadata
dataset:
description: ""
creator: ""
author: ""
copyright_holder: ""
copyright_year: ""
url: ""
variables:
descriptions: {}
# infer dtypes
infer_dtypes: false
# Show the description at each variable (in addition to the overview tab)
show_variable_description: false
# Number of workers (0=multiprocessing.cpu_count())
pool_size: 0
# Show the progress bar
progress_bar: true
# Per variable type description settings
vars:
num:
quantiles:
- 0.05
- 0.25
- 0.5
- 0.75
- 0.95
skewness_threshold: 20
low_categorical_threshold: 5
# Set to zero to disable
chi_squared_threshold: 0.0
cat:
length: false
characters: false
words: false
cardinality_threshold: 50
n_obs: 5
# Set to zero to disable
chi_squared_threshold: 0.0
coerce_str_to_date: false
redact: false
histogram_largest: 10
stop_words: []
bool:
n_obs: 3
# string to boolean mapping dict
mappings:
t: true
f: false
yes: true
no: false
y: true
n: false
true: true
false: false
path:
active: false
file:
active: false
image:
active: false
exif: false
hash: false
url:
active: false
timeseries:
active: false
autocorrelation: 0.7
lags: [1, 7, 12, 24, 30]
significance: 0.05
pacf_acf_lag: 100
# Sort the variables. Possible values: "ascending", "descending" or null (leaves original sorting)
sort: null
# which diagrams to show
missing_diagrams:
bar: false
matrix: false
heatmap: false
correlations:
pearson:
calculate: false
warn_high_correlations: true
threshold: 0.9
spearman:
calculate: false
warn_high_correlations: false
threshold: 0.9
kendall:
calculate: false
warn_high_correlations: false
threshold: 0.9
phi_k:
calculate: false
warn_high_correlations: false
threshold: 0.9
cramers:
calculate: false
warn_high_correlations: true
threshold: 0.9
auto:
calculate: false
warn_high_correlations: true
threshold: 0.9
# Bivariate / Pairwise relations
interactions:
targets: []
continuous: false
# For categorical
categorical_maximum_correlation_distinct: 100
report:
precision: 10
# Plot-specific settings
plot:
# Image format (svg or png)
image_format: "svg"
dpi: 800
scatter_threshold: 1000
correlation:
cmap: 'RdBu'
bad: '#000000'
missing:
cmap: 'RdBu'
# Force labels when there are > 50 variables
force_labels: true
cat_frequency:
show: true # if false, the category frequency plot is turned off
type: 'bar' # options: 'bar', 'pie'
max_unique: 0
colors: null # use null for default or give a list of matplotlib recognised strings
histogram:
x_axis_labels: true
# Number of bins (set to 0 to automatically detect the bin size)
bins: 50
# Maximum number of bins (when bins=0)
max_bins: 250
font_path: null
# The number of observations to show
n_obs_unique: 5
n_extreme_obs: 5
n_freq_table_max: 10
# Use `deep` flag for memory_usage
memory_deep: false
# Configuration related to the duplicates
duplicates:
head: 0
key: "# duplicates"
# Configuration related to the samples area
samples:
head: 0
tail: 0
random: 0
# Configuration related to the rejection of variables
reject_variables: true
# When in a Jupyter notebook
notebook:
iframe:
height: '800px'
width: '100%'
# or 'src'
attribute: 'srcdoc'
html:
# Minify the html
minify_html: true
# Offline support
use_local_assets: true
# If true, single file, else directory with assets
inline: true
# Show navbar
navbar_show: false
# Assets prefix if inline = true
assets_prefix: null
# Styling options for the HTML report
style:
theme: null
logo: ""
primary_colors:
- "#377eb8"
- "#e41a1c"
- "#4daf4a"
font-size: 10px
full_width: true |