From 6fbe34e876c905a8cdc89ffef2999124285ca0a5 Mon Sep 17 00:00:00 2001
From: makrobios <bayer.christoph@gmail.com>
Date: Thu, 9 Jul 2020 17:20:28 +0200
Subject: [PATCH 01/14] added count_encoder to docs

---
 category_encoders/__init__.py | 1 +
 docs/source/count.rst         | 6 ++++++
 docs/source/index.rst         | 2 ++
 3 files changed, 9 insertions(+)
 create mode 100755 docs/source/count.rst
diff --git a/category_encoders/__init__.py b/category_encoders/__init__.py
index bc705eab..790bb467 100644
--- a/category_encoders/__init__.py
+++ b/category_encoders/__init__.py
@@ -31,6 +31,7 @@
 __all__ = [
     'BackwardDifferenceEncoder',
     'BinaryEncoder',
+    'CountEncoder',
     'HashingEncoder',
     'HelmertEncoder',
     'OneHotEncoder',
diff --git a/docs/source/count.rst b/docs/source/count.rst
new file mode 100755
index 00000000..bbd0653f
--- /dev/null
+++ b/docs/source/count.rst
@@ -0,0 +1,6 @@
+Count Encoder
+==============
+
+.. autoclass:: category_encoders.count.CountEncoder
+    :members:
+
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 05f7312d..02f4b702 100755
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -42,6 +42,7 @@ To use:
     encoder = ce.BaseNEncoder(cols=[...])
     encoder = ce.BinaryEncoder(cols=[...])
     encoder = ce.CatBoostEncoder(cols=[...])
+    encoder = ce.CountEncoder(cols=[...])
     encoder = ce.GLMMEncoder(cols=[...])
     encoder = ce.HashingEncoder(cols=[...])
     encoder = ce.HelmertEncoder(cols=[...])
@@ -70,6 +71,7 @@ Contents:
    basen
    binary
    catboost
+   count
    glmm
    hashing
    helmert

From 12999ec8684d7a2c138580250e8107bf42cc322f Mon Sep 17 00:00:00 2001
From: makrobios <bayer.christoph@gmail.com>
Date: Thu, 9 Jul 2020 17:32:44 +0200
Subject: [PATCH 02/14] added method get_feature_names to CountEncoder

---
 category_encoders/.count.py.swp | Bin 0 -> 28672 bytes
 category_encoders/count.py      |  50 ++++++++++++++++++++++++--------
 2 files changed, 38 insertions(+), 12 deletions(-)
 create mode 100644 category_encoders/.count.py.swp

diff --git a/category_encoders/.count.py.swp b/category_encoders/.count.py.swp
new file mode 100644
index 0000000000000000000000000000000000000000..16dbc48b1e4ac392a74bf8e133fc0fec76ebaa8f
GIT binary patch
literal 28672
zcmeI4dyFJkb%%>rOk!*UAtD3^U(d#LkLm5*#cM(a&%%0l4J+{4tk-y881>Y2SI^XL
zPgSR?dUwahfP{%0;~1nwfy5Z%h$spW6h$T>Kn&!CAT|L^5Rr&@gg+950$2tjghRe_
zANA<2p51joilQ#*H{D%zALpKX?m73IdurBpzv}R{>V@q+9@kSn@6M~w>nuO~Yu?3o
zdtP@#Zl3wd_U_$%=2D*b`tHWAVG<t?x>>rb>u14woNRT1s2lf!r1nlcin8`_>uegI
zm6>s1#)0V^7-jvPFTB9J;`w{@THP^ssrvb+-#ESe?8%G+GY-r+Fyp|C12YcHI56YD
zi~}<c{J-NsHhhxz21fOGJF;W;^KoOJPqM!=`{_IfSMG`Se&2q6%6_`%KVyIQ?B_1~
zz&(G0y|3dCeE;pS=l1dJW5$6Q2WA|YabU)Q83$$@m~mjnff)y89GG!n#(^0J9s>t_
z9P(#S?~USbbN=uA|8HI7c@Kbp1RnzL0k?wJfj-y?THtx$0`L@YKKSB=`0ii@>;>Pw
z!1MkJycygEUIZ=%&jwEh-+PMZJp#T0-VN>qzY2Z{JbJ$8eGuFOUI{J%PXO=zIp_rY
zz|+7de%A96a4C57$)5Lz;5Kk0I1haIJkL80o(}$rK*YV^dhjSgjR(LMSON|3c>)<X
zgNwmu2ueH%9snN&?+15+n}7nBfJX_Od=>mYSOzWdJn%U15Ip@8AiRBp@VDkt_B!Zi
zzUsz<(PosYW)^4uK<&B0-I-tTT*0G$v>qIedco-hb>;3Cs36LcFi2Z!x5{GfT$z`=
zsyp!0R5gZv)brDJH%@|fKk+w%wsdxld~58>OYhsaPYshW%9_z=Gf2X2r$?zY$l77l
zkDK%JjM-jNbgYnCgph7KOZ+J9$H``MX}(gNhZH#=`5cE#zDJY6ri;ss125ZuL!+e{
zNB19WtjxFjVb)w~sjX`DB`Obl1>McHQPkaN3xzGmqeXMCANbiQ2|5wOORKGH(TXXQ
z`N?{aRVdUCC@_fqUS};%vp8ywjk%x1n`$C&MTMKgILRu5tF)}Er6&e~pG2Bun3}p;
zrS2+~Y@=rGl%F#E{P|9E<-ZS|_BV%v0O?Yt>qwjh3+kqDFi=sPDQ2Y`sIafXOm!2O
z6r{?J)LIz%$(D)}6-FohB=jR5YcXtBTI<Tx1wEnUh<_w)t7~+7U7Coa!Im1OJUq1#
zM5?iYybXfRW|*d7wBAr5GN`LJq?<;NByrNHw(bwoSgi$y$esvtH{OJ_K%EM+4b{-Z
zH`Hd}M>MI@O`lP%C-G>=Ad)}@{XR)W;mKgIRiPS!vW*}qH^^@_2>MxklAI&chUtW1
znV)TSFzEZEL8gu+qoA!0_)*@dC5ypDNLbV{G%TpKI3CF8dSN$HalbmcFoNT8n(jR5
z4?<)%jH7A+GgZf+tY>=`=|$9-AE=EPvLC2Te@mSRf}!fd;dS`X5`L+`%zjJdL@F+S
zFx>Fhf-LO9sx31@U#)gl>0%IWhFOqQX~^PO_f(AjNE|Wx!;#81n8Gf)fNw&TN!(I;
zh0!uo0hPyNrmK)qq2Y!g(>3&9;=ZYE=C!BbV7NKjbhD^MN1C@BNNE#h2c9$}mvCHj
zTd(W|CDgT4nk7@&uF1CKNmI&KeU@9snEL7(M2`9_1~&khjrOZD#Z3(3XYNL34mA7W
z#nv!TYjCB<3eaO)jnI5d@y)?1#aT;h17Toa%aNx?VgIZaqe*pf&EP?IG=RB!k%=N+
zYNs3iFj(HbqAo9uUuw)(>9k_mlM%#>b1s+gIVaPQp+`><O)Q#vJy6ZvEwyL9RUuAh
zLC+2*NUvMUCqcRq4;X2m*7Y=~C<uDcN`FC=qH%?n3<F)0w^kkHf>4Ld${b9cG?C~{
zEERH)`Qad~j<d9$b6#7qLOrEIj8I=5TN^P9)b-j1s#QIU9X+aaHK#Lg>kJDy9R}U7
zAIc1&P#JF)VohmWYdTS<jL-;FUzUYdURn(0zV5p^o<<U2th6d=P@Q&-5IPN{M<kMt
zY7M&^qGh%IWpQlyC&PG@<YQ1=8}TV&fl<I}6rG6TQ;`~=2L|X#$aUt~5`NuKbdgb^
zRxju6SuJiGS&)^+a$y+Lv|P@Qya^#m24%H>p;_#WmK{|l=17|rF)y+WVlrj%6uQPQ
zgi2bi4%Av2!(ctooEB9xL^c~GwimKrwOe|`o^<A}W{hklr$Y2#S#8KxXt;$LO8l*{
zWhqtxlcm*kNpwx`4AffpgKj&f^j0CCCC~M2$?f%xeCGG%d0ih&k(hi$p_jzi3%jk<
z;<($Wrl=bfq9~<0x&5!{#OX-3apdauikd=!uLfZ%Rxj^J>jWu@#PqGE7K@DxiTM_p
zuvSFOldI-QV{*yPa6vcf>EHGb!WXP#%)<yPixs6MY&cuBFk=JLSMewtj?nJAHNPXV
zUU_{}ko^#i6x$@bk>VpSc)c(CVyt6Y6tl}Xu!_%UkD_gMLZTGeePHK}an!@IJJFU(
zg5kht^Chau7)@&o&F-P-Xl`!qfXKe=rQ@CqP}E^fn`3HGGpJCu(#N8)kGW=B)CRBQ
zM_8to4XYRyTd#hfV;<(b%A2UJoJF_1Cky6+yeM0%meo`pt7J;evkOoiL^8}eon{&g
z`Yk)x!6Ft*yAEc6Rnax0Y<b4>(Y$o-#?@HrLn#W|&tf#$F{cWH<l69b7{M9O>va54
zwh<?t4vs+Mc${vGe1$~yMp4w@d1WUrrr+)2&(OH3*o{#Wc3&^wJ<t8HJE}YtzCFq?
zeWDEIiz`;~1#DHTdoa60OJ1kbA87}u<NE4_C&Lsy@8#lUdBSf`{Qm$S`4GOd`2X(w
z{fqeW?*w-P@%snh`QW?w^j`-b2d6;`Tm&A)m;Vm<3b+@X0IvlL;G6jJ_k(-ETftr6
zR`62rH1K%vRs8yQfC0D;EP@w;XM*#=gZTLmfRBRrgSUa}!A0Og@ObbbW7q;GfQ;uL
zm|Zgt%s4RPz>EVk4$L?(<G@da1NlanFV?w}J!`uXi_Uk$IO9PP&zHK1FHYsFB*Jt^
zqNB~Ze5cA5SqA|mI2vXW9KyM>ec0xWPtl$$g55Y>;{UPhi_*-;)ze$(Dmg9fV3iyK
z*$w723RNcfH6)B2WucBD%4T~A3ASSsIM6*s-N6XQ@X|^H`;B9$-6wJXD)bh?2pSX*
zs#kF=E36xH2FK&yM|BXT<gnQrRfkI`$X#pt+-16S-e=*@2_qQ3L?RWZ1f2>gnv<4m
zmBC7lVg}@wyDn35*3qheI7i5g^8n-MX+K?EmbMK6t^xNsR|*@7kGE<<@2BGAL~3I1
zr2;yxIN8pGsClk+`7DWpyDiST7G@puHW659dzmq<NWf40Fwd2CpW`)z+`NWJ%tS3@
zwpo=wX{(-TKS7SxJG%epIa^P5g~Q@udu~4DQ**d{!z=%g?~jGsIQ#mfX6#6$p%!tW
z314)BMoG9+wR%~)I!(SgX%X2Vv>B+?YEWTSw`UZeoV=KD=}%O?>jz?ZS`21_X-3qm
z)E1RW6jy3iCPPPgI6xvauUpdru*t=Mp766QvGI^br=gxlR5wfXiAc+Ql>?~yY1Ad<
zu_aEI*Zx1p;@(H(d>a41Zw=(v@by0i?gwuH>)<eubN;X4?>_`S5B?n71KtUagAg1B
zbKraU{*Qpa1s?!^0B!?s0<Q+Uz=hxf@NHrM{{a3Tybrt=NG?Dh>;&h5j}r%YH~39(
zJD3NO5AYFu{|xK}&jG&-{vF@{L*Q=kTi{OcVz3W95qyrA$7g}$3A`FCfhKqaW;_Jm
z1?~ZF07t;nz{BJWd;t6g7=jq=1TFAy<PO{m?gFm{yMO|hfJezCxDUJ?ycygE0?-35
z0zY~Jc^2T);3U`q9w2{U3!Db~zz>-pnWIk?c?7az-Nl(+->T`e6XWUzM4HLthiND5
zohAUb>0|5@b=7qB@44>f^Q;KKUWi^I*jBD8-^VRRVPh7zd3L!f4#_G_YKDk1G41AY
zP#t_8-x$@IHhWN2V-X)4rMsq4n)5BE(JW9gyk|!!F>T1qhi|n>%T1uuDl(tMl-r#B
zZl=xBz;s;D9Jia3br*5wu!*+iKv7QeOXo4|@*b%ya79h4rIyN+8D*%*@6bH71QnEv
z9LQC#z_zvejcZN1fs&jM49-ZnM?BtbscMeD8lLW~^k;H&R4GzonX4p|w4~>&9j4J}
zFlf%V{b-BK5m&-}8mX~n!UntxGhvb<DwVVZH!HWKEz=vz({?KdjgfCQ_??>OtCS#Y
z6Eam(sim4se2b_u*+2T!WxhO*b#(Zt9EdmLHTm~0^KItV&}1iB4|RTWs->waiE_tX
zqyy>EnXyhUXL*r#T3XPBL|PJD+vU%ksgwMMV+c`hnrmBbp9r=pD@xkV*LAK~JEGHM
z%j`D5g%Uo)4q&N0h`Y-RcHPMKU_~Uru}7aQFEUwdj0z@6($o@XC}e&Ox0*dNNA#&n
zQGi8!eUQ-gruLL_=ZFE951}fHy_1(Rr|9;~!mW0?ljN<EHWF57*`J~&N{M1{OxDga
z!zpt~Zy)tNyFDt+fpD~~hY$Z(#{13tE(_r@d?MFJ7-+tEQp{cXtewS%u5Og&o>)7i
z>CsPF+gW8HPukTrQ(YXzG&$15`;L5b#O1l0?lU(Yi$>Apzjc4bA-9yDAV&nn(4|>v
z^COy7f?Q9Ngt;eHLCZ03RUg>{db0qh*Kep*EqSXgwOZ_(SGCDr)f#eDFCFbdOlogO
zM&q)SJSmRaqDd!$ES3>zEJRZzsUqe;y}q4EOW;~}pQRmFov6qXhk#{w7Fx|YP%aIw
zr5)jwF-dZLOlQ@$wos~{jXlh>Bpxdd&bU~NSu50>P(~#yY*mX{lsbblEQ>Fad&{j=
zIn|FvMT%6T`*cdJ)e)W$AeW<Dm$N~P(j=U#>fBCY!sT;Nm2QhPPH;`oP)EJglBWk$
z8k1c*Ju)IljwACS341{&zgrrntGUz`-J+vZY7aRZk<M2bvldc#d}j3NyYR4>JjYr6
zsK_ZP=7`;&9IqQSHN~2k3}DaEC!dZn`As^KMJBl?M%xG-;}&3Y1+j9R%dls~8q`iN
z+^mVuHExW8)1-V!)D@nLGSWLYNp|BbU0v2|k$Gt!Ue>Fwpy}E}gx_ZM?O9QmIyTU5
z!NDD3jU#sNCjzAt>#_~fqfB<4&Tb3WH}Gq)d?lvBQOOlR-Xmr_!wy4lISYFS@#BOv
zo$f}^Jz?xU`!((V;|_b@v;Mz(zyC}4`F{p(2WwyfJRN)!zy5E4`1=`n1^5;4BYgNT
zf)9cBf%k&91LyCb;`=es0Ox~m;m<z=J`COkejOYHmx4#|=|2hX02^Q(yac=$NIt+1
z@$bI`J^=0n$HB|MOTi`JzZlRDz&F6>!0&;h;9~F{eEfd~UjTmx?gRILH-S^&B)AD&
z3HE|t0M7st6Zjyw9o!5S!Bya?;3Du%VgUaE{uJB-ZU(Ob3*g(t06q)u0IvhLfa72X
zcpUgLF@WCzN5Lz>BDf0tH*tWEf&0Pxz&pSy*b5rqnczI|IpPC%gSUdazz`e-PXym*
zUS)0{DdGmqwk#-jO)infwIc+DrHKJE8D9w&leMoC-)vU0PWZ+*%vZ4XyxQlm5R3*u
z^kuUr@!*nXa{i?w@>rg(({pKl`dA@Pl)|?ikeFFS@rP=`<m9o&C5x5?vK)0%ohi@T
z669Q(*BpqU9a-^QKhusD+;jQ#+>b5S#m+%^f^4SAeu{>d&RAAxatA9#y|YYFo3@qL
z$>wro6a}X$iDcp?8)nHdM?Q&eAI~P<W`a!`kUW(vXN1+wGND!3a-5nwf~L-$^7SAJ
z5}oKQD!cvK=$2dO+Tw=zSu@~86IZMJX<ki{^f<O}qW6<v6VjZ6uQRhM=Swv)W@nmg
zGN9V0b{AS#ItxyBgJGs#9&8<wV>n5`8at_zQp;L!E2lzjLrwGayf%@iW$UL)<p#2t
zQE4Jewo1Y-cdF0j&gr75oMCJv*w=^==@7&jE}vdupO7@?)m4jX&+cGn@$!$fP^CZ?
zfG4-zHP)&%AadeWeIoDUnEp!sXLXh)dn(4`xIT45Fv(6x)n?uF>h^F&m*zgDbk#z5
zA+$zZCEz$;$fd5+nb?($Z8=OSZ7e&)-8<%$88!;ttRs<h+|8m%;i#^lr)SI(%e_t(
zcg8j<o|}fro!vxJGA9I9^z4^yx?`=83a3O}!#hM!c%)w`<1Lw}k&$tn^1?z(9TyFc
z3ReM(Vap}vn!?4n3=>2{%U0UFjId{=crM%I$F%Fn2S2XokD8-g!_B;u(1HH6wkW?n
zO<Cs!6xT8Rs+Tnyg?Ts^-7U>Xps&!+VzG2K?`U(45IO0*PIDJM=+4E4nndL;X{Xp7
z(!epJxgnZt$XX~*s>}7$HyqcnwRC3Q@A8_U&OF9uEENWV<uYj+F2DT5DY#qNbQ_VW
zdc;TxPMRJKe<~&1I|?-=#=>SbCCU*cB474pth(Ncw&WnXu0$R$se4#dFQSR2+}Nll
z^u>g#(4()|4jQT)hU*ejhgScfN=8Ah9c}f-w8A<%$+1uVqLp?rm@$26vdUb_zk9ZX
znc0b{uimIFTu6Z}Y;RNx+t6Aqzr=&9eV|q2qNu^N+M3%0EV{(<>f_vS)9P|z%rqCO
zmW%?`8`Xw#zngOT_q>Sczx+qdUhinoM_-V^Q_60sNXsVMAQbp0_1Ag3;mSSjE7i-d
z&RgtBOrCcs^gCBC*xTI0yI$-~GQK{)FsHA$;n@CTN?-CuaTG^8<tW3|%3HhIl###N
zmb?Dg(f!8`Uw6iG$M!F|7PoDWSA%QI>4A9e(ZRzvxUbu`w*RGv&Z^weYhAJJM|<SD
zvm1rH=xkf8&{rMB;Ufocw8bi=><oI2rq>*C-?pdjz)}9azqxALll=dkc!d8V{vqf8
z{M6UK;_JT$oCG~^6zl@u$JhTP_#n6soCG7#1N*@<fSmW=2Sz~7_@4#7j$i)?AnyRY
z9P9;GfFI+}e-_*aQs9GCa0L)Q|1ZH`fVY4<Ko|TXI1l_a{`?!k2;2mo1-^{0{u%Hf
z_&9hQkn{Z`U^n;{zWN`5w}ZQYobMk7&juHQ|HNN^H%LJjtbrTBJdpnv@G$rSxE%!G
zCh#ioT<`;Y_V0sFgDtQIt_IHm596oH{}cFQ@FuVYPJ=~oKKKT{`u*T{K>&7x=YStG
zFETe@lKDB8D;E#@Wju5z8nNpANWnZw1Z`Ea+45*|pDK6HA}6I#G*Y2_NpJd4fZj~c
z=JjW<T*!Sln_2Dq;ay4bN%GG;NLnc!fJ!SZ{n9ppL)Vdf$^(efCui)w<)_{M$XA_k
zQ*u<L+x)&u#IbL3Lu=tQ6ld>wZ^gz)`W=6H-oEFYmoAsFMzX;fhK@fss;4}Ssv)PC
z7>=<r_1g)`>i4s%-4^Z1wxYhhd{e+<^NMxeM!^M<t^76?@PH0U3IoHnk^El*9q_bq
zCaFfEDcf%oA<$m2<Z`asjnY8!$x11korz6Y(X*bz@fh*;8?H(kUEy%m<-FCzv1Gm`
z<$q2K3}Y}3S6#m6x#xOH?~+Mc&54^HiXNAEIIKBQOtan)+Spt^UoxVxa(b6ZISI>D
zHYc#FS!{21NYxTI%x}3@PJ)%Gon%X?>}tKto;hW+#2M4vbl&VJXTqAoB7tM$uH}y@
zy`4<eaZ3B@Me>0sTya;8{G4slWj!-p!*d7zclnmn`8Q3r>Zh)hD(X4f<Wn=T>OPET
zf#&6_q9(O#*tTpFQswnedHvM>69|#n6*(-^|I$IUMNRN<ZYs|w&y?qKvV@ScW)l>Y
zY|}O(WKCA37p>X|mDMXIIf+K;kc?Y5_JUQgQC4vwiFN83Eu-JQ)@r&J>$uW@e@+sG
zUH$>fIVfgP`Dv|A7QUI)W|G$2E<t?2ASrZ;lyjGPwp{2hb3+?z$@a(}q@8rM>A7m<
zPg6t6>V9<A&Ff3`NQ^9eL08f8tXizi(vuv!iEgU)K5s+L`($Xk%W6|ACXb|@#X5vo
zOqQ!GpJGu6PI%>e?mPDviqUsUY?&yod0xt|DdHV&25y-glJv7uPnJAl2Xw}<*-RKR
zN9Nlp|4pgma(X;zq3)6aGaX*tGr1>8C_JwxD`#+-8B9Al)ouAlHqC0JT-vMO{>qbS
zjg9kC%%tX`C$-8K5;VT>OLLIdBm543svFg@pVF!hRkDb4x^zNsoh)0ya(NglMrge<
zX|BPs##lg{I_y(>CJL7dQb^8NDn%ys^jL+;+ABSirPtQoP6}ZCBtKU*UvC9ZCt~Fp
qn)St-rtEh4&XX0Z1R_C&<`mA=3(aIRQucMqQZP0v^`(DO)cy~*sZFH-

literal 0
HcmV?d00001

diff --git a/category_encoders/count.py b/category_encoders/count.py
index e18c69d3..70a3d639 100644
--- a/category_encoders/count.py
+++ b/category_encoders/count.py
@@ -116,6 +116,7 @@ def __init__(self, verbose=0, cols=None, drop_invariant=False,
         self.min_group_size = min_group_size
         self.min_group_name = min_group_name
         self.combine_min_nan_groups = combine_min_nan_groups
+        self.feature_names = None
 
         self._check_set_create_attrs()
 
@@ -159,17 +160,26 @@ def fit(self, X, y=None, **kwargs):
 
         self._fit_count_encode(X, y)
 
+        X_temp = self.transform(X, override_return_df=True)
+        self.feature_names = list(X_temp.columns)
+
         if self.drop_invariant:
             self.drop_cols = []
-            X_temp = self.transform(X)
             generated_cols = util.get_generated_cols(X, X_temp, self.cols)
             self.drop_cols = [
                 x for x in generated_cols if X_temp[x].var() <= 10e-5
             ]
 
+            try:
+                [self.feature_names.remove(x) for x in self.drop_cols]
+            except KeyError as e:
+                if self.verbose > 0:
+                    print("Could not remove column from feature names."
+                    "Not found in generated cols.\n{}".format(e))
+
         return self
 
-    def transform(self, X, y=None):
+    def transform(self, X, y=None, override_return_df=False):
         """Perform the transformation to new categorical data.
 
         Parameters
@@ -207,7 +217,7 @@ def transform(self, X, y=None):
             for col in self.drop_cols:
                 X.drop(col, 1, inplace=True)
 
-        if self.return_df:
+        if self.return_df or override_return_df:
             return X
         else:
             return X.values
@@ -262,7 +272,7 @@ def _transform_count_encode(self, X_in, y):
                         .fillna(X[col])
                     )
 
-            X[col] = X[col].map(self.mapping[col])
+            X[col] = X[col].astype(object).map(self.mapping[col])
 
             if isinstance(self._handle_unknown[col], (int, np.integer)):
                 X[col] = X[col].fillna(self._handle_unknown[col])
@@ -348,14 +358,14 @@ def _check_set_create_attrs(self):
                 "'combine_min_nan_groups' == 'force' for all columns."
             )
         
-        if (
-            self.combine_min_nan_groups is not None
-            and self.min_group_size is None
-        ):
-            raise ValueError(
-                "`combine_min_nan_groups` only works when `min_group_size` "
-                "is set for all columns."
-            )
+        # if (
+        #     self.combine_min_nan_groups is not None
+        #     and self.min_group_size is None
+        # ):
+        #     raise ValueError(
+        #         "`combine_min_nan_groups` only works when `min_group_size` "
+        #         "is set for all columns."
+        #     )
 
         if (
             self.min_group_name is not None
@@ -423,3 +433,19 @@ def _check_set_create_dict_attrs(self):
                     "is set for column %s."
                     % (col,)
                 )
+    
+    def get_feature_names(self):
+        """
+        Returns the names of all transformed / added columns.
+
+        Returns
+        -------
+        feature_names: list
+            A list with all feature names transformed or added.
+            Note: potentially dropped features are not included!
+
+        """
+        if not isinstance(self.feature_names, list):
+            raise ValueError("Estimator has to be fitted to return feature names.")
+        else:
+            return self.feature_names

From e1fe86a8b4ab7b62f323548a17e9932da476a2e7 Mon Sep 17 00:00:00 2001
From: makrobios <bayer.christoph@gmail.com>
Date: Thu, 9 Jul 2020 18:35:51 +0200
Subject: [PATCH 03/14] [Fix] CountEncoder test_metamorphic
 test_encoders.py::TestEncoders::test_metamorphic

---
 .gitignore                      |   6 +++++-
 category_encoders/.count.py.swp | Bin 28672 -> 0 bytes
 category_encoders/count.py      |   2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)
 delete mode 100644 category_encoders/.count.py.swp

diff --git a/.gitignore b/.gitignore
index dfcf8f8f..2e9e38fc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -60,4 +60,8 @@ docs/_build/
 # PyBuilder
 target/
 
-.pytest_cache/
\ No newline at end of file
+.pytest_cache/
+
+*~
+*.swp
+*.swo
\ No newline at end of file
diff --git a/category_encoders/.count.py.swp b/category_encoders/.count.py.swp
deleted file mode 100644
index 16dbc48b1e4ac392a74bf8e133fc0fec76ebaa8f..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 28672
zcmeI4dyFJkb%%>rOk!*UAtD3^U(d#LkLm5*#cM(a&%%0l4J+{4tk-y881>Y2SI^XL
zPgSR?dUwahfP{%0;~1nwfy5Z%h$spW6h$T>Kn&!CAT|L^5Rr&@gg+950$2tjghRe_
zANA<2p51joilQ#*H{D%zALpKX?m73IdurBpzv}R{>V@q+9@kSn@6M~w>nuO~Yu?3o
zdtP@#Zl3wd_U_$%=2D*b`tHWAVG<t?x>>rb>u14woNRT1s2lf!r1nlcin8`_>uegI
zm6>s1#)0V^7-jvPFTB9J;`w{@THP^ssrvb+-#ESe?8%G+GY-r+Fyp|C12YcHI56YD
zi~}<c{J-NsHhhxz21fOGJF;W;^KoOJPqM!=`{_IfSMG`Se&2q6%6_`%KVyIQ?B_1~
zz&(G0y|3dCeE;pS=l1dJW5$6Q2WA|YabU)Q83$$@m~mjnff)y89GG!n#(^0J9s>t_
z9P(#S?~USbbN=uA|8HI7c@Kbp1RnzL0k?wJfj-y?THtx$0`L@YKKSB=`0ii@>;>Pw
z!1MkJycygEUIZ=%&jwEh-+PMZJp#T0-VN>qzY2Z{JbJ$8eGuFOUI{J%PXO=zIp_rY
zz|+7de%A96a4C57$)5Lz;5Kk0I1haIJkL80o(}$rK*YV^dhjSgjR(LMSON|3c>)<X
zgNwmu2ueH%9snN&?+15+n}7nBfJX_Od=>mYSOzWdJn%U15Ip@8AiRBp@VDkt_B!Zi
zzUsz<(PosYW)^4uK<&B0-I-tTT*0G$v>qIedco-hb>;3Cs36LcFi2Z!x5{GfT$z`=
zsyp!0R5gZv)brDJH%@|fKk+w%wsdxld~58>OYhsaPYshW%9_z=Gf2X2r$?zY$l77l
zkDK%JjM-jNbgYnCgph7KOZ+J9$H``MX}(gNhZH#=`5cE#zDJY6ri;ss125ZuL!+e{
zNB19WtjxFjVb)w~sjX`DB`Obl1>McHQPkaN3xzGmqeXMCANbiQ2|5wOORKGH(TXXQ
z`N?{aRVdUCC@_fqUS};%vp8ywjk%x1n`$C&MTMKgILRu5tF)}Er6&e~pG2Bun3}p;
zrS2+~Y@=rGl%F#E{P|9E<-ZS|_BV%v0O?Yt>qwjh3+kqDFi=sPDQ2Y`sIafXOm!2O
z6r{?J)LIz%$(D)}6-FohB=jR5YcXtBTI<Tx1wEnUh<_w)t7~+7U7Coa!Im1OJUq1#
zM5?iYybXfRW|*d7wBAr5GN`LJq?<;NByrNHw(bwoSgi$y$esvtH{OJ_K%EM+4b{-Z
zH`Hd}M>MI@O`lP%C-G>=Ad)}@{XR)W;mKgIRiPS!vW*}qH^^@_2>MxklAI&chUtW1
znV)TSFzEZEL8gu+qoA!0_)*@dC5ypDNLbV{G%TpKI3CF8dSN$HalbmcFoNT8n(jR5
z4?<)%jH7A+GgZf+tY>=`=|$9-AE=EPvLC2Te@mSRf}!fd;dS`X5`L+`%zjJdL@F+S
zFx>Fhf-LO9sx31@U#)gl>0%IWhFOqQX~^PO_f(AjNE|Wx!;#81n8Gf)fNw&TN!(I;
zh0!uo0hPyNrmK)qq2Y!g(>3&9;=ZYE=C!BbV7NKjbhD^MN1C@BNNE#h2c9$}mvCHj
zTd(W|CDgT4nk7@&uF1CKNmI&KeU@9snEL7(M2`9_1~&khjrOZD#Z3(3XYNL34mA7W
z#nv!TYjCB<3eaO)jnI5d@y)?1#aT;h17Toa%aNx?VgIZaqe*pf&EP?IG=RB!k%=N+
zYNs3iFj(HbqAo9uUuw)(>9k_mlM%#>b1s+gIVaPQp+`><O)Q#vJy6ZvEwyL9RUuAh
zLC+2*NUvMUCqcRq4;X2m*7Y=~C<uDcN`FC=qH%?n3<F)0w^kkHf>4Ld${b9cG?C~{
zEERH)`Qad~j<d9$b6#7qLOrEIj8I=5TN^P9)b-j1s#QIU9X+aaHK#Lg>kJDy9R}U7
zAIc1&P#JF)VohmWYdTS<jL-;FUzUYdURn(0zV5p^o<<U2th6d=P@Q&-5IPN{M<kMt
zY7M&^qGh%IWpQlyC&PG@<YQ1=8}TV&fl<I}6rG6TQ;`~=2L|X#$aUt~5`NuKbdgb^
zRxju6SuJiGS&)^+a$y+Lv|P@Qya^#m24%H>p;_#WmK{|l=17|rF)y+WVlrj%6uQPQ
zgi2bi4%Av2!(ctooEB9xL^c~GwimKrwOe|`o^<A}W{hklr$Y2#S#8KxXt;$LO8l*{
zWhqtxlcm*kNpwx`4AffpgKj&f^j0CCCC~M2$?f%xeCGG%d0ih&k(hi$p_jzi3%jk<
z;<($Wrl=bfq9~<0x&5!{#OX-3apdauikd=!uLfZ%Rxj^J>jWu@#PqGE7K@DxiTM_p
zuvSFOldI-QV{*yPa6vcf>EHGb!WXP#%)<yPixs6MY&cuBFk=JLSMewtj?nJAHNPXV
zUU_{}ko^#i6x$@bk>VpSc)c(CVyt6Y6tl}Xu!_%UkD_gMLZTGeePHK}an!@IJJFU(
zg5kht^Chau7)@&o&F-P-Xl`!qfXKe=rQ@CqP}E^fn`3HGGpJCu(#N8)kGW=B)CRBQ
zM_8to4XYRyTd#hfV;<(b%A2UJoJF_1Cky6+yeM0%meo`pt7J;evkOoiL^8}eon{&g
z`Yk)x!6Ft*yAEc6Rnax0Y<b4>(Y$o-#?@HrLn#W|&tf#$F{cWH<l69b7{M9O>va54
zwh<?t4vs+Mc${vGe1$~yMp4w@d1WUrrr+)2&(OH3*o{#Wc3&^wJ<t8HJE}YtzCFq?
zeWDEIiz`;~1#DHTdoa60OJ1kbA87}u<NE4_C&Lsy@8#lUdBSf`{Qm$S`4GOd`2X(w
z{fqeW?*w-P@%snh`QW?w^j`-b2d6;`Tm&A)m;Vm<3b+@X0IvlL;G6jJ_k(-ETftr6
zR`62rH1K%vRs8yQfC0D;EP@w;XM*#=gZTLmfRBRrgSUa}!A0Og@ObbbW7q;GfQ;uL
zm|Zgt%s4RPz>EVk4$L?(<G@da1NlanFV?w}J!`uXi_Uk$IO9PP&zHK1FHYsFB*Jt^
zqNB~Ze5cA5SqA|mI2vXW9KyM>ec0xWPtl$$g55Y>;{UPhi_*-;)ze$(Dmg9fV3iyK
z*$w723RNcfH6)B2WucBD%4T~A3ASSsIM6*s-N6XQ@X|^H`;B9$-6wJXD)bh?2pSX*
zs#kF=E36xH2FK&yM|BXT<gnQrRfkI`$X#pt+-16S-e=*@2_qQ3L?RWZ1f2>gnv<4m
zmBC7lVg}@wyDn35*3qheI7i5g^8n-MX+K?EmbMK6t^xNsR|*@7kGE<<@2BGAL~3I1
zr2;yxIN8pGsClk+`7DWpyDiST7G@puHW659dzmq<NWf40Fwd2CpW`)z+`NWJ%tS3@
zwpo=wX{(-TKS7SxJG%epIa^P5g~Q@udu~4DQ**d{!z=%g?~jGsIQ#mfX6#6$p%!tW
z314)BMoG9+wR%~)I!(SgX%X2Vv>B+?YEWTSw`UZeoV=KD=}%O?>jz?ZS`21_X-3qm
z)E1RW6jy3iCPPPgI6xvauUpdru*t=Mp766QvGI^br=gxlR5wfXiAc+Ql>?~yY1Ad<
zu_aEI*Zx1p;@(H(d>a41Zw=(v@by0i?gwuH>)<eubN;X4?>_`S5B?n71KtUagAg1B
zbKraU{*Qpa1s?!^0B!?s0<Q+Uz=hxf@NHrM{{a3Tybrt=NG?Dh>;&h5j}r%YH~39(
zJD3NO5AYFu{|xK}&jG&-{vF@{L*Q=kTi{OcVz3W95qyrA$7g}$3A`FCfhKqaW;_Jm
z1?~ZF07t;nz{BJWd;t6g7=jq=1TFAy<PO{m?gFm{yMO|hfJezCxDUJ?ycygE0?-35
z0zY~Jc^2T);3U`q9w2{U3!Db~zz>-pnWIk?c?7az-Nl(+->T`e6XWUzM4HLthiND5
zohAUb>0|5@b=7qB@44>f^Q;KKUWi^I*jBD8-^VRRVPh7zd3L!f4#_G_YKDk1G41AY
zP#t_8-x$@IHhWN2V-X)4rMsq4n)5BE(JW9gyk|!!F>T1qhi|n>%T1uuDl(tMl-r#B
zZl=xBz;s;D9Jia3br*5wu!*+iKv7QeOXo4|@*b%ya79h4rIyN+8D*%*@6bH71QnEv
z9LQC#z_zvejcZN1fs&jM49-ZnM?BtbscMeD8lLW~^k;H&R4GzonX4p|w4~>&9j4J}
zFlf%V{b-BK5m&-}8mX~n!UntxGhvb<DwVVZH!HWKEz=vz({?KdjgfCQ_??>OtCS#Y
z6Eam(sim4se2b_u*+2T!WxhO*b#(Zt9EdmLHTm~0^KItV&}1iB4|RTWs->waiE_tX
zqyy>EnXyhUXL*r#T3XPBL|PJD+vU%ksgwMMV+c`hnrmBbp9r=pD@xkV*LAK~JEGHM
z%j`D5g%Uo)4q&N0h`Y-RcHPMKU_~Uru}7aQFEUwdj0z@6($o@XC}e&Ox0*dNNA#&n
zQGi8!eUQ-gruLL_=ZFE951}fHy_1(Rr|9;~!mW0?ljN<EHWF57*`J~&N{M1{OxDga
z!zpt~Zy)tNyFDt+fpD~~hY$Z(#{13tE(_r@d?MFJ7-+tEQp{cXtewS%u5Og&o>)7i
z>CsPF+gW8HPukTrQ(YXzG&$15`;L5b#O1l0?lU(Yi$>Apzjc4bA-9yDAV&nn(4|>v
z^COy7f?Q9Ngt;eHLCZ03RUg>{db0qh*Kep*EqSXgwOZ_(SGCDr)f#eDFCFbdOlogO
zM&q)SJSmRaqDd!$ES3>zEJRZzsUqe;y}q4EOW;~}pQRmFov6qXhk#{w7Fx|YP%aIw
zr5)jwF-dZLOlQ@$wos~{jXlh>Bpxdd&bU~NSu50>P(~#yY*mX{lsbblEQ>Fad&{j=
zIn|FvMT%6T`*cdJ)e)W$AeW<Dm$N~P(j=U#>fBCY!sT;Nm2QhPPH;`oP)EJglBWk$
z8k1c*Ju)IljwACS341{&zgrrntGUz`-J+vZY7aRZk<M2bvldc#d}j3NyYR4>JjYr6
zsK_ZP=7`;&9IqQSHN~2k3}DaEC!dZn`As^KMJBl?M%xG-;}&3Y1+j9R%dls~8q`iN
z+^mVuHExW8)1-V!)D@nLGSWLYNp|BbU0v2|k$Gt!Ue>Fwpy}E}gx_ZM?O9QmIyTU5
z!NDD3jU#sNCjzAt>#_~fqfB<4&Tb3WH}Gq)d?lvBQOOlR-Xmr_!wy4lISYFS@#BOv
zo$f}^Jz?xU`!((V;|_b@v;Mz(zyC}4`F{p(2WwyfJRN)!zy5E4`1=`n1^5;4BYgNT
zf)9cBf%k&91LyCb;`=es0Ox~m;m<z=J`COkejOYHmx4#|=|2hX02^Q(yac=$NIt+1
z@$bI`J^=0n$HB|MOTi`JzZlRDz&F6>!0&;h;9~F{eEfd~UjTmx?gRILH-S^&B)AD&
z3HE|t0M7st6Zjyw9o!5S!Bya?;3Du%VgUaE{uJB-ZU(Ob3*g(t06q)u0IvhLfa72X
zcpUgLF@WCzN5Lz>BDf0tH*tWEf&0Pxz&pSy*b5rqnczI|IpPC%gSUdazz`e-PXym*
zUS)0{DdGmqwk#-jO)infwIc+DrHKJE8D9w&leMoC-)vU0PWZ+*%vZ4XyxQlm5R3*u
z^kuUr@!*nXa{i?w@>rg(({pKl`dA@Pl)|?ikeFFS@rP=`<m9o&C5x5?vK)0%ohi@T
z669Q(*BpqU9a-^QKhusD+;jQ#+>b5S#m+%^f^4SAeu{>d&RAAxatA9#y|YYFo3@qL
z$>wro6a}X$iDcp?8)nHdM?Q&eAI~P<W`a!`kUW(vXN1+wGND!3a-5nwf~L-$^7SAJ
z5}oKQD!cvK=$2dO+Tw=zSu@~86IZMJX<ki{^f<O}qW6<v6VjZ6uQRhM=Swv)W@nmg
zGN9V0b{AS#ItxyBgJGs#9&8<wV>n5`8at_zQp;L!E2lzjLrwGayf%@iW$UL)<p#2t
zQE4Jewo1Y-cdF0j&gr75oMCJv*w=^==@7&jE}vdupO7@?)m4jX&+cGn@$!$fP^CZ?
zfG4-zHP)&%AadeWeIoDUnEp!sXLXh)dn(4`xIT45Fv(6x)n?uF>h^F&m*zgDbk#z5
zA+$zZCEz$;$fd5+nb?($Z8=OSZ7e&)-8<%$88!;ttRs<h+|8m%;i#^lr)SI(%e_t(
zcg8j<o|}fro!vxJGA9I9^z4^yx?`=83a3O}!#hM!c%)w`<1Lw}k&$tn^1?z(9TyFc
z3ReM(Vap}vn!?4n3=>2{%U0UFjId{=crM%I$F%Fn2S2XokD8-g!_B;u(1HH6wkW?n
zO<Cs!6xT8Rs+Tnyg?Ts^-7U>Xps&!+VzG2K?`U(45IO0*PIDJM=+4E4nndL;X{Xp7
z(!epJxgnZt$XX~*s>}7$HyqcnwRC3Q@A8_U&OF9uEENWV<uYj+F2DT5DY#qNbQ_VW
zdc;TxPMRJKe<~&1I|?-=#=>SbCCU*cB474pth(Ncw&WnXu0$R$se4#dFQSR2+}Nll
z^u>g#(4()|4jQT)hU*ejhgScfN=8Ah9c}f-w8A<%$+1uVqLp?rm@$26vdUb_zk9ZX
znc0b{uimIFTu6Z}Y;RNx+t6Aqzr=&9eV|q2qNu^N+M3%0EV{(<>f_vS)9P|z%rqCO
zmW%?`8`Xw#zngOT_q>Sczx+qdUhinoM_-V^Q_60sNXsVMAQbp0_1Ag3;mSSjE7i-d
z&RgtBOrCcs^gCBC*xTI0yI$-~GQK{)FsHA$;n@CTN?-CuaTG^8<tW3|%3HhIl###N
zmb?Dg(f!8`Uw6iG$M!F|7PoDWSA%QI>4A9e(ZRzvxUbu`w*RGv&Z^weYhAJJM|<SD
zvm1rH=xkf8&{rMB;Ufocw8bi=><oI2rq>*C-?pdjz)}9azqxALll=dkc!d8V{vqf8
z{M6UK;_JT$oCG~^6zl@u$JhTP_#n6soCG7#1N*@<fSmW=2Sz~7_@4#7j$i)?AnyRY
z9P9;GfFI+}e-_*aQs9GCa0L)Q|1ZH`fVY4<Ko|TXI1l_a{`?!k2;2mo1-^{0{u%Hf
z_&9hQkn{Z`U^n;{zWN`5w}ZQYobMk7&juHQ|HNN^H%LJjtbrTBJdpnv@G$rSxE%!G
zCh#ioT<`;Y_V0sFgDtQIt_IHm596oH{}cFQ@FuVYPJ=~oKKKT{`u*T{K>&7x=YStG
zFETe@lKDB8D;E#@Wju5z8nNpANWnZw1Z`Ea+45*|pDK6HA}6I#G*Y2_NpJd4fZj~c
z=JjW<T*!Sln_2Dq;ay4bN%GG;NLnc!fJ!SZ{n9ppL)Vdf$^(efCui)w<)_{M$XA_k
zQ*u<L+x)&u#IbL3Lu=tQ6ld>wZ^gz)`W=6H-oEFYmoAsFMzX;fhK@fss;4}Ssv)PC
z7>=<r_1g)`>i4s%-4^Z1wxYhhd{e+<^NMxeM!^M<t^76?@PH0U3IoHnk^El*9q_bq
zCaFfEDcf%oA<$m2<Z`asjnY8!$x11korz6Y(X*bz@fh*;8?H(kUEy%m<-FCzv1Gm`
z<$q2K3}Y}3S6#m6x#xOH?~+Mc&54^HiXNAEIIKBQOtan)+Spt^UoxVxa(b6ZISI>D
zHYc#FS!{21NYxTI%x}3@PJ)%Gon%X?>}tKto;hW+#2M4vbl&VJXTqAoB7tM$uH}y@
zy`4<eaZ3B@Me>0sTya;8{G4slWj!-p!*d7zclnmn`8Q3r>Zh)hD(X4f<Wn=T>OPET
zf#&6_q9(O#*tTpFQswnedHvM>69|#n6*(-^|I$IUMNRN<ZYs|w&y?qKvV@ScW)l>Y
zY|}O(WKCA37p>X|mDMXIIf+K;kc?Y5_JUQgQC4vwiFN83Eu-JQ)@r&J>$uW@e@+sG
zUH$>fIVfgP`Dv|A7QUI)W|G$2E<t?2ASrZ;lyjGPwp{2hb3+?z$@a(}q@8rM>A7m<
zPg6t6>V9<A&Ff3`NQ^9eL08f8tXizi(vuv!iEgU)K5s+L`($Xk%W6|ACXb|@#X5vo
zOqQ!GpJGu6PI%>e?mPDviqUsUY?&yod0xt|DdHV&25y-glJv7uPnJAl2Xw}<*-RKR
zN9Nlp|4pgma(X;zq3)6aGaX*tGr1>8C_JwxD`#+-8B9Al)ouAlHqC0JT-vMO{>qbS
zjg9kC%%tX`C$-8K5;VT>OLLIdBm543svFg@pVF!hRkDb4x^zNsoh)0ya(NglMrge<
zX|BPs##lg{I_y(>CJL7dQb^8NDn%ys^jL+;+ABSirPtQoP6}ZCBtKU*UvC9ZCt~Fp
qn)St-rtEh4&XX0Z1R_C&<`mA=3(aIRQucMqQZP0v^`(DO)cy~*sZFH-

diff --git a/category_encoders/count.py b/category_encoders/count.py
index 70a3d639..83531a10 100644
--- a/category_encoders/count.py
+++ b/category_encoders/count.py
@@ -262,7 +262,7 @@ def _fit_count_encode(self, X_in, y):
     def _transform_count_encode(self, X_in, y):
         """Perform the transform count encoding."""
         X = X_in.copy(deep=True)
-        X.loc[:, self.cols] = X.fillna(value=np.nan)
+        X.fillna(value=np.nan, inplace=True)
 
         for col in self.cols:
             if self._min_group_size is not None:

From 857fd581835ec1c03b7293ecdeea82e7dc53e590 Mon Sep 17 00:00:00 2001
From: makrobios <bayer.christoph@gmail.com>
Date: Thu, 9 Jul 2020 18:59:26 +0200
Subject: [PATCH 04/14] [Fix] CountEncoder test handle_missing
 test_encoders.py::TestEncoders::test_handle_missing_error
 test_encoders.py::TestEncoders::test_handle_missing_error_2cols

---
 category_encoders/count.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/category_encoders/count.py b/category_encoders/count.py
index 83531a10..20601861 100644
--- a/category_encoders/count.py
+++ b/category_encoders/count.py
@@ -15,7 +15,7 @@
 class CountEncoder(BaseEstimator, TransformerMixin):
     def __init__(self, verbose=0, cols=None, drop_invariant=False,
                  return_df=True, handle_unknown=None,
-                 handle_missing='count',
+                 handle_missing='value',
                  min_group_size=None, combine_min_nan_groups=None,
                  min_group_name=None, normalize=False):
         """Count encoding for categorical features.
@@ -192,6 +192,9 @@ def transform(self, X, y=None, override_return_df=False):
         p : array, shape = [n_samples, n_numeric + N]
             Transformed values with encoding applied.
         """
+        if self.handle_missing == 'error':
+            if X[self.cols].isnull().any().any():
+                raise ValueError('Columns to be encoded can not contain null')
 
         if self._dim is None:
             raise ValueError(
@@ -239,10 +242,10 @@ def _fit_count_encode(self, X_in, y):
                         % (col,)
                     )
 
-                elif self._handle_missing[col] not in ['count', 'return_nan',  'error', None]:
+                elif self._handle_missing[col] not in ['value', 'return_nan',  'error', None]:
                     raise ValueError(
                         '%s key in `handle_missing` should be one of: '
-                        ' `count`, `return_nan` and `error` not `%s`.'
+                        ' `value`, `return_nan` and `error` not `%s`.'
                         % (col, str(self._handle_missing[col]))
                     )
 

From e7faa40223bff0c2a1590ff03692e2b92329a2d6 Mon Sep 17 00:00:00 2001
From: makrobios <bayer.christoph@gmail.com>
Date: Fri, 10 Jul 2020 14:44:47 +0200
Subject: [PATCH 05/14] Updated Docstring for CountEncoder

---
 category_encoders/count.py | 42 +++++++++++++++++++++-----------------
 tests/test_encoders.py     |  9 ++++++--
 2 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/category_encoders/count.py b/category_encoders/count.py
index 20601861..bbef2377 100644
--- a/category_encoders/count.py
+++ b/category_encoders/count.py
@@ -14,7 +14,7 @@
 
 class CountEncoder(BaseEstimator, TransformerMixin):
     def __init__(self, verbose=0, cols=None, drop_invariant=False,
-                 return_df=True, handle_unknown=None,
+                 return_df=True, handle_unknown='value',
                  handle_missing='value',
                  min_group_size=None, combine_min_nan_groups=None,
                  min_group_name=None, normalize=False):
@@ -38,31 +38,31 @@ def __init__(self, verbose=0, cols=None, drop_invariant=False,
             (otherwise it will be a numpy array).
         handle_missing: str
             how to handle missing values at fit time. Options are 'error', 'return_nan',
-            and 'count'. Default 'count', which treat NaNs as a countable category at
+            and 'value'. Default 'value', which treat NaNs as a countable category at
             fit time.
-        handle_unknown: str, int or dict of.
+        handle_unknown: str, int or dict of {column : option, ...}.
             how to handle unknown labels at transform time. Options are 'error'
             'return_nan' and an int. Defaults to None which uses NaN behaviour
             specified at fit time. Passing an int will fill with this int value.
-        normalize: bool or dict of.
+        normalize: bool or dict of {column : bool, ...}.
             whether to normalize the counts to the range (0, 1). See Pandas `value_counts`
             for more details.
-        min_group_size: int, float or dict of.
+        min_group_size: int, float or dict of {column : option, ...}.
             the minimal count threshold of a group needed to ensure it is not
             combined into a "leftovers" group. If float in the range (0, 1),
             `min_group_size` is calculated as int(X.shape[0] * min_group_size).
             Note: This value may change type based on the `normalize` variable. If True
             this will become a float. If False, it will be an int.
-        min_group_name: None, str or dict of.
+        min_group_name: None, str or dict of {column : option, ...}.
             Set the name of the combined minimum groups when the defaults become
             too long. Default None. In this case the category names will be joined
             alphabetically with a `_` delimiter.
             Note: The default name can be long ae may keep changing, for example, 
             in cross-validation.
-        combine_min_nan_groups: bool or dict of.
+        combine_min_nan_groups: bool or dict of {column : bool, ...}.
             whether to combine the leftovers group with NaN group. Default True. Can
             also be forced to combine with 'force' meaning small groups are effectively
-            counted as NaNs. Force can only used when 'handle_missing' is 'count' or 'error'.
+            counted as NaNs. Force can only used when 'handle_missing' is 'value' or 'error'.
             Note: Will not force if it creates an binary or invariant column.
 
 
@@ -246,7 +246,6 @@ def _fit_count_encode(self, X_in, y):
                     raise ValueError(
                         '%s key in `handle_missing` should be one of: '
                         ' `value`, `return_nan` and `error` not `%s`.'
-                        % (col, str(self._handle_missing[col]))
                     )
 
             self.mapping[col] = X[col].value_counts(
@@ -256,8 +255,13 @@ def _fit_count_encode(self, X_in, y):
 
             self.mapping[col].index = self.mapping[col].index.astype(object)
 
+
+
             if self._handle_missing[col] == 'return_nan':
                 self.mapping[col][np.NaN] = np.NaN
+            
+            elif self._handle_missing[col] == 'value':
+                self.mapping[col].loc[-2] = 0
 
         if any([val is not None for val in self._min_group_size.values()]):
             self.combine_min_categories(X)
@@ -361,14 +365,14 @@ def _check_set_create_attrs(self):
                 "'combine_min_nan_groups' == 'force' for all columns."
             )
         
-        # if (
-        #     self.combine_min_nan_groups is not None
-        #     and self.min_group_size is None
-        # ):
-        #     raise ValueError(
-        #         "`combine_min_nan_groups` only works when `min_group_size` "
-        #         "is set for all columns."
-        #     )
+        if (
+            self.combine_min_nan_groups is not None
+            and self.min_group_size is None
+        ):
+            raise ValueError(
+                "`combine_min_nan_groups` only works when `min_group_size` "
+                "is set for all columns."
+            )
 
         if (
             self.min_group_name is not None
@@ -389,8 +393,8 @@ def _check_set_create_dict_attrs(self):
             'min_group_name': None,
             'combine_min_nan_groups': True,
             'min_group_size': None,
-            'handle_unknown': 'count',
-            'handle_missing': None,
+            'handle_unknown': 'value',
+            'handle_missing': 'value',
         }
 
         for attr_name, attr_default in dict_attrs.items():
diff --git a/tests/test_encoders.py b/tests/test_encoders.py
index 3c73780a..5507c210 100644
--- a/tests/test_encoders.py
+++ b/tests/test_encoders.py
@@ -185,6 +185,7 @@ def test_handle_unknown_return_nan(self):
         y = pd.Series([1, 0])
 
         for encoder_name in (set(encoders.__all__) - {'HashingEncoder'}):  # HashingEncoder supports new values by design -> excluded
+            breakpoint()
             with self.subTest(encoder_name=encoder_name):
 
                 enc = getattr(encoders, encoder_name)(handle_unknown='return_nan')
@@ -216,6 +217,7 @@ def test_handle_missing_return_nan_test(self):
         y = pd.Series([1, 0, 1])
 
         for encoder_name in (set(encoders.__all__) - {'HashingEncoder'}):  # HashingEncoder supports new values by design -> excluded
+            breakpoint()
             with self.subTest(encoder_name=encoder_name):
                 enc = getattr(encoders, encoder_name)(handle_missing='return_nan')
                 result = enc.fit(X, y).transform(X_t).iloc[2, :]
@@ -229,13 +231,16 @@ def test_handle_unknown_value(self):
         train = pd.DataFrame({'city': ['chicago', 'los angeles']})
         test = pd.DataFrame({'city': ['chicago', 'denver']})
         y = pd.Series([1, 0])
-
         for encoder_name in (set(encoders.__all__) - {'HashingEncoder'}):  # HashingEncoder supports new values by design -> excluded
             with self.subTest(encoder_name=encoder_name):
-
+                breakpoint()
                 enc = getattr(encoders, encoder_name)(handle_unknown='value')
                 enc.fit(train, y)
+                print(enc.__class__.__name__)
+                print(enc.mapping)
                 result = enc.transform(test)
+                # print(enc.mapping)
+                print("result: ", result)
                 self.assertFalse(result.iloc[1, :].isnull().all())
 
     def test_sklearn_compliance(self):

From 2870455b4a6f3ef6929d79cfd58cf7c566d2c99f Mon Sep 17 00:00:00 2001
From: makrobios <bayer.christoph@gmail.com>
Date: Fri, 10 Jul 2020 14:44:47 +0200
Subject: [PATCH 06/14] Updated Docstring for CountEncoder

---
 category_encoders/count.py | 46 +++++++++++++++++++++-----------------
 tests/test_encoders.py     |  9 ++++++--
 2 files changed, 32 insertions(+), 23 deletions(-)

diff --git a/category_encoders/count.py b/category_encoders/count.py
index 20601861..3cbbca78 100644
--- a/category_encoders/count.py
+++ b/category_encoders/count.py
@@ -14,7 +14,7 @@
 
 class CountEncoder(BaseEstimator, TransformerMixin):
     def __init__(self, verbose=0, cols=None, drop_invariant=False,
-                 return_df=True, handle_unknown=None,
+                 return_df=True, handle_unknown='value',
                  handle_missing='value',
                  min_group_size=None, combine_min_nan_groups=None,
                  min_group_name=None, normalize=False):
@@ -38,31 +38,31 @@ def __init__(self, verbose=0, cols=None, drop_invariant=False,
             (otherwise it will be a numpy array).
         handle_missing: str
             how to handle missing values at fit time. Options are 'error', 'return_nan',
-            and 'count'. Default 'count', which treat NaNs as a countable category at
+            and 'value'. Default 'value', which treat NaNs as a countable category at
             fit time.
-        handle_unknown: str, int or dict of.
+        handle_unknown: str, int or dict of {column : option, ...}.
             how to handle unknown labels at transform time. Options are 'error'
-            'return_nan' and an int. Defaults to None which uses NaN behaviour
+            'return_nan', 'value' and int. Defaults to None which uses NaN behaviour
             specified at fit time. Passing an int will fill with this int value.
-        normalize: bool or dict of.
+        normalize: bool or dict of {column : bool, ...}.
             whether to normalize the counts to the range (0, 1). See Pandas `value_counts`
             for more details.
-        min_group_size: int, float or dict of.
+        min_group_size: int, float or dict of {column : option, ...}.
             the minimal count threshold of a group needed to ensure it is not
             combined into a "leftovers" group. If float in the range (0, 1),
             `min_group_size` is calculated as int(X.shape[0] * min_group_size).
             Note: This value may change type based on the `normalize` variable. If True
             this will become a float. If False, it will be an int.
-        min_group_name: None, str or dict of.
+        min_group_name: None, str or dict of {column : option, ...}.
             Set the name of the combined minimum groups when the defaults become
             too long. Default None. In this case the category names will be joined
             alphabetically with a `_` delimiter.
-            Note: The default name can be long ae may keep changing, for example, 
+            Note: The default name can be long and may keep changing, for example, 
             in cross-validation.
-        combine_min_nan_groups: bool or dict of.
+        combine_min_nan_groups: bool or dict of {column : bool, ...}.
             whether to combine the leftovers group with NaN group. Default True. Can
             also be forced to combine with 'force' meaning small groups are effectively
-            counted as NaNs. Force can only used when 'handle_missing' is 'count' or 'error'.
+            counted as NaNs. Force can only used when 'handle_missing' is 'value' or 'error'.
             Note: Will not force if it creates an binary or invariant column.
 
 
@@ -246,7 +246,6 @@ def _fit_count_encode(self, X_in, y):
                     raise ValueError(
                         '%s key in `handle_missing` should be one of: '
                         ' `value`, `return_nan` and `error` not `%s`.'
-                        % (col, str(self._handle_missing[col]))
                     )
 
             self.mapping[col] = X[col].value_counts(
@@ -256,8 +255,13 @@ def _fit_count_encode(self, X_in, y):
 
             self.mapping[col].index = self.mapping[col].index.astype(object)
 
+
+
             if self._handle_missing[col] == 'return_nan':
                 self.mapping[col][np.NaN] = np.NaN
+            
+            elif self._handle_missing[col] == 'value':
+                self.mapping[col].loc[-2] = 0
 
         if any([val is not None for val in self._min_group_size.values()]):
             self.combine_min_categories(X)
@@ -361,14 +365,14 @@ def _check_set_create_attrs(self):
                 "'combine_min_nan_groups' == 'force' for all columns."
             )
         
-        # if (
-        #     self.combine_min_nan_groups is not None
-        #     and self.min_group_size is None
-        # ):
-        #     raise ValueError(
-        #         "`combine_min_nan_groups` only works when `min_group_size` "
-        #         "is set for all columns."
-        #     )
+        if (
+            self.combine_min_nan_groups is not None
+            and self.min_group_size is None
+        ):
+            raise ValueError(
+                "`combine_min_nan_groups` only works when `min_group_size` "
+                "is set for all columns."
+            )
 
         if (
             self.min_group_name is not None
@@ -389,8 +393,8 @@ def _check_set_create_dict_attrs(self):
             'min_group_name': None,
             'combine_min_nan_groups': True,
             'min_group_size': None,
-            'handle_unknown': 'count',
-            'handle_missing': None,
+            'handle_unknown': 'value',
+            'handle_missing': 'value',
         }
 
         for attr_name, attr_default in dict_attrs.items():
diff --git a/tests/test_encoders.py b/tests/test_encoders.py
index 3c73780a..5507c210 100644
--- a/tests/test_encoders.py
+++ b/tests/test_encoders.py
@@ -185,6 +185,7 @@ def test_handle_unknown_return_nan(self):
         y = pd.Series([1, 0])
 
         for encoder_name in (set(encoders.__all__) - {'HashingEncoder'}):  # HashingEncoder supports new values by design -> excluded
+            breakpoint()
             with self.subTest(encoder_name=encoder_name):
 
                 enc = getattr(encoders, encoder_name)(handle_unknown='return_nan')
@@ -216,6 +217,7 @@ def test_handle_missing_return_nan_test(self):
         y = pd.Series([1, 0, 1])
 
         for encoder_name in (set(encoders.__all__) - {'HashingEncoder'}):  # HashingEncoder supports new values by design -> excluded
+            breakpoint()
             with self.subTest(encoder_name=encoder_name):
                 enc = getattr(encoders, encoder_name)(handle_missing='return_nan')
                 result = enc.fit(X, y).transform(X_t).iloc[2, :]
@@ -229,13 +231,16 @@ def test_handle_unknown_value(self):
         train = pd.DataFrame({'city': ['chicago', 'los angeles']})
         test = pd.DataFrame({'city': ['chicago', 'denver']})
         y = pd.Series([1, 0])
-
         for encoder_name in (set(encoders.__all__) - {'HashingEncoder'}):  # HashingEncoder supports new values by design -> excluded
             with self.subTest(encoder_name=encoder_name):
-
+                breakpoint()
                 enc = getattr(encoders, encoder_name)(handle_unknown='value')
                 enc.fit(train, y)
+                print(enc.__class__.__name__)
+                print(enc.mapping)
                 result = enc.transform(test)
+                # print(enc.mapping)
+                print("result: ", result)
                 self.assertFalse(result.iloc[1, :].isnull().all())
 
     def test_sklearn_compliance(self):

From a2f81ada784d4dc5393d3a850ddef81b7b3020d6 Mon Sep 17 00:00:00 2001
From: makrobios <bayer.christoph@gmail.com>
Date: Fri, 10 Jul 2020 16:27:29 +0200
Subject: [PATCH 07/14] cleaning code

---
 category_encoders/count.py | 2 +-
 tests/test_encoders.py     | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/category_encoders/count.py b/category_encoders/count.py
index 3cbbca78..ba504475 100644
--- a/category_encoders/count.py
+++ b/category_encoders/count.py
@@ -453,6 +453,6 @@ def get_feature_names(self):
 
         """
         if not isinstance(self.feature_names, list):
-            raise ValueError("Estimator has to be fitted to return feature names.")
+            raise ValueError("CountEncoder has to be fitted to return feature names.")
         else:
             return self.feature_names
diff --git a/tests/test_encoders.py b/tests/test_encoders.py
index 5507c210..b4bde6cd 100644
--- a/tests/test_encoders.py
+++ b/tests/test_encoders.py
@@ -185,7 +185,6 @@ def test_handle_unknown_return_nan(self):
         y = pd.Series([1, 0])
 
         for encoder_name in (set(encoders.__all__) - {'HashingEncoder'}):  # HashingEncoder supports new values by design -> excluded
-            breakpoint()
             with self.subTest(encoder_name=encoder_name):
 
                 enc = getattr(encoders, encoder_name)(handle_unknown='return_nan')
@@ -217,7 +216,6 @@ def test_handle_missing_return_nan_test(self):
         y = pd.Series([1, 0, 1])
 
         for encoder_name in (set(encoders.__all__) - {'HashingEncoder'}):  # HashingEncoder supports new values by design -> excluded
-            breakpoint()
             with self.subTest(encoder_name=encoder_name):
                 enc = getattr(encoders, encoder_name)(handle_missing='return_nan')
                 result = enc.fit(X, y).transform(X_t).iloc[2, :]
@@ -233,7 +231,6 @@ def test_handle_unknown_value(self):
         y = pd.Series([1, 0])
         for encoder_name in (set(encoders.__all__) - {'HashingEncoder'}):  # HashingEncoder supports new values by design -> excluded
             with self.subTest(encoder_name=encoder_name):
-                breakpoint()
                 enc = getattr(encoders, encoder_name)(handle_unknown='value')
                 enc.fit(train, y)
                 print(enc.__class__.__name__)

From ffcf88eda1f5a894370a939bd8529fb7144612a3 Mon Sep 17 00:00:00 2001
From: makrobios <bayer.christoph@gmail.com>
Date: Fri, 10 Jul 2020 16:27:29 +0200
Subject: [PATCH 08/14] cleaning code

---
 category_encoders/count.py | 2 +-
 tests/test_encoders.py     | 9 ++-------
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/category_encoders/count.py b/category_encoders/count.py
index 3cbbca78..ba504475 100644
--- a/category_encoders/count.py
+++ b/category_encoders/count.py
@@ -453,6 +453,6 @@ def get_feature_names(self):
 
         """
         if not isinstance(self.feature_names, list):
-            raise ValueError("Estimator has to be fitted to return feature names.")
+            raise ValueError("CountEncoder has to be fitted to return feature names.")
         else:
             return self.feature_names
diff --git a/tests/test_encoders.py b/tests/test_encoders.py
index 5507c210..3c73780a 100644
--- a/tests/test_encoders.py
+++ b/tests/test_encoders.py
@@ -185,7 +185,6 @@ def test_handle_unknown_return_nan(self):
         y = pd.Series([1, 0])
 
         for encoder_name in (set(encoders.__all__) - {'HashingEncoder'}):  # HashingEncoder supports new values by design -> excluded
-            breakpoint()
             with self.subTest(encoder_name=encoder_name):
 
                 enc = getattr(encoders, encoder_name)(handle_unknown='return_nan')
@@ -217,7 +216,6 @@ def test_handle_missing_return_nan_test(self):
         y = pd.Series([1, 0, 1])
 
         for encoder_name in (set(encoders.__all__) - {'HashingEncoder'}):  # HashingEncoder supports new values by design -> excluded
-            breakpoint()
             with self.subTest(encoder_name=encoder_name):
                 enc = getattr(encoders, encoder_name)(handle_missing='return_nan')
                 result = enc.fit(X, y).transform(X_t).iloc[2, :]
@@ -231,16 +229,13 @@ def test_handle_unknown_value(self):
         train = pd.DataFrame({'city': ['chicago', 'los angeles']})
         test = pd.DataFrame({'city': ['chicago', 'denver']})
         y = pd.Series([1, 0])
+
         for encoder_name in (set(encoders.__all__) - {'HashingEncoder'}):  # HashingEncoder supports new values by design -> excluded
             with self.subTest(encoder_name=encoder_name):
-                breakpoint()
+
                 enc = getattr(encoders, encoder_name)(handle_unknown='value')
                 enc.fit(train, y)
-                print(enc.__class__.__name__)
-                print(enc.mapping)
                 result = enc.transform(test)
-                # print(enc.mapping)
-                print("result: ", result)
                 self.assertFalse(result.iloc[1, :].isnull().all())
 
     def test_sklearn_compliance(self):

From ff57575d694e7c893a20c9000bb25b467d24225c Mon Sep 17 00:00:00 2001
From: makrobios <bayer.christoph@gmail.com>
Date: Mon, 13 Jul 2020 01:39:13 +0200
Subject: [PATCH 09/14] set default for min_group_size min_group_size is set to
 0.01, setting of some value required for combine_min_nan.

---
 category_encoders/count.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/category_encoders/count.py b/category_encoders/count.py
index ba504475..9d5164a2 100644
--- a/category_encoders/count.py
+++ b/category_encoders/count.py
@@ -49,7 +49,8 @@ def __init__(self, verbose=0, cols=None, drop_invariant=False,
             for more details.
         min_group_size: int, float or dict of {column : option, ...}.
             the minimal count threshold of a group needed to ensure it is not
-            combined into a "leftovers" group. If float in the range (0, 1),
+            combined into a "leftovers" group. Default value is 0.01. 
+            If float in the range (0, 1),
             `min_group_size` is calculated as int(X.shape[0] * min_group_size).
             Note: This value may change type based on the `normalize` variable. If True
             this will become a float. If False, it will be an int.
@@ -385,6 +386,7 @@ def _check_set_create_attrs(self):
 
         if self.combine_min_nan_groups is None:
             self.combine_min_nan_groups = True
+            self.min_group_size = 0.01
 
     def _check_set_create_dict_attrs(self):
         """Check attributes that can be dicts and format for all `self.cols`."""

From 78df7b1cf4f3b85635296d9df5ccfd547428d501 Mon Sep 17 00:00:00 2001
From: makrobios <bayer.christoph@gmail.com>
Date: Mon, 13 Jul 2020 22:28:50 +0200
Subject: [PATCH 10/14] added handle_unknown handling

to _transform_count_encode. Let's following tests fail:
FAILED test_encoders.py::TestEncoders::test_handle_missing_return_nan_test - AssertionError: False is not true
FAILED test_encoders.py::TestEncoders::test_handle_missing_return_nan_train - AssertionError: False is not true
---
 category_encoders/count.py | 8 +++++++-
 tests/test_encoders.py     | 4 ----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/category_encoders/count.py b/category_encoders/count.py
index 9d5164a2..0a98d2d8 100644
--- a/category_encoders/count.py
+++ b/category_encoders/count.py
@@ -279,11 +279,17 @@ def _transform_count_encode(self, X_in, y):
                         X[col].map(self._min_group_categories[col])
                         .fillna(X[col])
                     )
-
+            
             X[col] = X[col].astype(object).map(self.mapping[col])
 
             if isinstance(self._handle_unknown[col], (int, np.integer)):
                 X[col] = X[col].fillna(self._handle_unknown[col])
+            
+            # elif (self._handle_unknown[col] == 'value' and
+            #         X[col].isna().any()
+            #      ):
+            #      X[col].replace(np.nan, 0, inplace=True)   
+
             elif (
                 self._handle_unknown[col] == 'error'
                 and X[col].isnull().any()
diff --git a/tests/test_encoders.py b/tests/test_encoders.py
index 484bc0da..0ec857c5 100644
--- a/tests/test_encoders.py
+++ b/tests/test_encoders.py
@@ -232,10 +232,6 @@ def test_handle_unknown_value(self):
 
         for encoder_name in (set(encoders.__all__) - {'HashingEncoder'}):  # HashingEncoder supports new values by design -> excluded
             with self.subTest(encoder_name=encoder_name):
-<<<<<<< HEAD
-
-=======
->>>>>>> a2f81ada784d4dc5393d3a850ddef81b7b3020d6
                 enc = getattr(encoders, encoder_name)(handle_unknown='value')
                 enc.fit(train, y)
                 result = enc.transform(test)

From 9ec7a8cd68a3a8fcba823610b653f5ff0791b097 Mon Sep 17 00:00:00 2001
From: makrobios <bayer.christoph@gmail.com>
Date: Tue, 14 Jul 2020 17:23:15 +0200
Subject: [PATCH 11/14] Add logic to _transform_count_encode needed for
 _handle_unknown == 'value' not to interfere with _handle_unknown =
 'return_nan'

---
 category_encoders/count.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/category_encoders/count.py b/category_encoders/count.py
index 0a98d2d8..5d44e9b1 100644
--- a/category_encoders/count.py
+++ b/category_encoders/count.py
@@ -281,14 +281,14 @@ def _transform_count_encode(self, X_in, y):
                     )
             
             X[col] = X[col].astype(object).map(self.mapping[col])
-
             if isinstance(self._handle_unknown[col], (int, np.integer)):
                 X[col] = X[col].fillna(self._handle_unknown[col])
             
-            # elif (self._handle_unknown[col] == 'value' and
-            #         X[col].isna().any()
-            #      ):
-            #      X[col].replace(np.nan, 0, inplace=True)   
+            elif (self._handle_unknown[col] == 'value'
+                    and X[col].isna().any()
+                    and self._handle_missing[col] != 'return_nan'
+                 ):
+                 X[col].replace(np.nan, 0, inplace=True)
 
             elif (
                 self._handle_unknown[col] == 'error'

From 40403fefd1b7be93c94595537b96ce8fe81489bc Mon Sep 17 00:00:00 2001
From: makrobios <bayer.christoph@gmail.com>
Date: Wed, 15 Jul 2020 21:41:27 +0200
Subject: [PATCH 12/14] updated .gitignore

---
 .gitignore | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.gitignore b/.gitignore
index 2e9e38fc..63a1ec76 100644
--- a/.gitignore
+++ b/.gitignore
@@ -61,6 +61,10 @@ docs/_build/
 target/
 
 .pytest_cache/
+.tmp/
+checkcommits.sh
+runtest.py
+
 
 *~
 *.swp

From 1c94c2241fdd14ef693ab9701c765093ad198b9d Mon Sep 17 00:00:00 2001
From: makrobios <bayer.christoph@gmail.com>
Date: Wed, 15 Jul 2020 22:37:13 +0200
Subject: [PATCH 13/14] Troubleshooting combine_min_nan_groups FAILED
 test_encoders.py::TestEncoders::test_column_transformer FAILED
 test_encoders.py::TestEncoders::test_sklearn_compliance

---
 category_encoders/count.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/category_encoders/count.py b/category_encoders/count.py
index 5d44e9b1..7a0cd7b8 100644
--- a/category_encoders/count.py
+++ b/category_encoders/count.py
@@ -11,7 +11,7 @@
 
 __author__ = 'joshua t. dunn'
 
-
+# COUNT_ENCODER BRANCH
 class CountEncoder(BaseEstimator, TransformerMixin):
     def __init__(self, verbose=0, cols=None, drop_invariant=False,
                  return_df=True, handle_unknown='value',
@@ -247,6 +247,7 @@ def _fit_count_encode(self, X_in, y):
                     raise ValueError(
                         '%s key in `handle_missing` should be one of: '
                         ' `value`, `return_nan` and `error` not `%s`.'
+                        % (col, str(self._handle_missing[col]))
                     )
 
             self.mapping[col] = X[col].value_counts(
@@ -261,8 +262,8 @@ def _fit_count_encode(self, X_in, y):
             if self._handle_missing[col] == 'return_nan':
                 self.mapping[col][np.NaN] = np.NaN
             
-            elif self._handle_missing[col] == 'value':
-                self.mapping[col].loc[-2] = 0
+            # elif self._handle_missing[col] == 'value':
+            #test_count.py failing     self.mapping[col].loc[-2] = 0
 
         if any([val is not None for val in self._min_group_size.values()]):
             self.combine_min_categories(X)
@@ -289,7 +290,7 @@ def _transform_count_encode(self, X_in, y):
                     and self._handle_missing[col] != 'return_nan'
                  ):
                  X[col].replace(np.nan, 0, inplace=True)
-
+ 
             elif (
                 self._handle_unknown[col] == 'error'
                 and X[col].isnull().any()
@@ -392,7 +393,7 @@ def _check_set_create_attrs(self):
 
         if self.combine_min_nan_groups is None:
             self.combine_min_nan_groups = True
-            self.min_group_size = 0.01
+            # test_count.py failing: self.min_group_size = 0.01
 
     def _check_set_create_dict_attrs(self):
         """Check attributes that can be dicts and format for all `self.cols`."""

From cafd264c7ea3403e27e405d5600528a126a1a25f Mon Sep 17 00:00:00 2001
From: makrobios <bayer.christoph@gmail.com>
Date: Wed, 15 Jul 2020 23:55:28 +0200
Subject: [PATCH 14/14] disable combine_min_nan_groups check
 combine_min_nan_groups is set to True by default but not min_group_size
 leading to an exception maybe change combine_min_nan_groups to False by
 default in the future?

---
 category_encoders/count.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/category_encoders/count.py b/category_encoders/count.py
index 7a0cd7b8..69aa0af6 100644
--- a/category_encoders/count.py
+++ b/category_encoders/count.py
@@ -373,14 +373,16 @@ def _check_set_create_attrs(self):
                 "'combine_min_nan_groups' == 'force' for all columns."
             )
         
+
         if (
             self.combine_min_nan_groups is not None
             and self.min_group_size is None
         ):
-            raise ValueError(
-                "`combine_min_nan_groups` only works when `min_group_size` "
-                "is set for all columns."
-            )
+            pass
+            # raise ValueError(
+            #     "`combine_min_nan_groups` only works when `min_group_size` "
+            #     "is set for all columns."
+            # )
 
         if (
             self.min_group_name is not None
@@ -393,7 +395,6 @@ def _check_set_create_attrs(self):
 
         if self.combine_min_nan_groups is None:
             self.combine_min_nan_groups = True
-            # test_count.py failing: self.min_group_size = 0.01
 
     def _check_set_create_dict_attrs(self):
         """Check attributes that can be dicts and format for all `self.cols`."""