From 520914feaa39b457256c701588498a0a8c76b5cf Mon Sep 17 00:00:00 2001 From: mrbean-bremen Date: Fri, 12 Jun 2020 22:16:04 +0200 Subject: [PATCH] Added partial support for pandas to work with pyfakefs - support for read_csv and read_excel (with the default module) - see #531 --- CHANGES.md | 4 + docs/usage.rst | 14 ++- extra_requirements.txt | 7 +- pyfakefs/fake_filesystem_unittest.py | 39 +++++++-- pyfakefs/patched_packages.py | 109 ++++++++++++++++++++++++ pyfakefs/tests/all_tests.py | 31 ++++--- pyfakefs/tests/fixtures/excel_test.xlsx | Bin 0 -> 4790 bytes pyfakefs/tests/patched_packages_test.py | 52 +++++++++++ 8 files changed, 230 insertions(+), 26 deletions(-) create mode 100644 pyfakefs/patched_packages.py create mode 100644 pyfakefs/tests/fixtures/excel_test.xlsx create mode 100644 pyfakefs/tests/patched_packages_test.py diff --git a/CHANGES.md b/CHANGES.md index a475ae52..7e0ed50e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,10 @@ The released versions correspond to PyPi releases. ## Version 4.1.0 (as yet unreleased) +#### New Features + * Added some support for pandas (`read_csv`, `read_excel`) to work with + the fake filesystem (see [#531](../../issues/531)) + #### Fixes * Do not override global warnings setting in `Deprecator` (see [#526](../../issues/526)) diff --git a/docs/usage.rst b/docs/usage.rst index 60a618fd..f3f5be65 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -356,6 +356,17 @@ if the real user is a root user (e.g. has the user ID 0). If you want to run your tests as a non-root user regardless of the actual user rights, you may want to set this to ``False``. +use_known_patches +~~~~~~~~~~~~~~~~~ +If this is set to ``True`` (the default), ``pyfakefs`` patches some +libraries that are known to not work out of the box, to be able work with the +fake filesystem. Currently, this includes patches for the ``pandas`` methods +``read_csv`` and ``read_excel`` - more may follow. This flag is +there to be able to disable this functionality in case it causes any +problems. It may be removed or replaced by a more fine-grained argument in +future releases. + + Using convenience methods ------------------------- While ``pyfakefs`` can be used just with the standard Python file system @@ -605,9 +616,6 @@ A list of Python modules that are known to not work correctly with sufficient demand. - the ``Pillow`` image library does not work with pyfakefs at least if writing JPEG files (see `this issue `__) -- ``pandas`` (the Python data analysis library) uses its own internal file - system access, written in C, and does therefore not work with pyfakefs - (see `this issue `__) If you are not sure if a module can be handled, or how to do it, you can always write a new issue, of course! diff --git a/extra_requirements.txt b/extra_requirements.txt index 93525526..b9267c4d 100644 --- a/extra_requirements.txt +++ b/extra_requirements.txt @@ -9,5 +9,10 @@ # available at the time of writing. pathlib2>=2.3.2 - scandir>=1.8 + +# pandas + xlrd are used to test pandas-specific patches to allow +# pyfakefs to work with pandas +# we use the latest version to see any problems with new versions +pandas +xlrd diff --git a/pyfakefs/fake_filesystem_unittest.py b/pyfakefs/fake_filesystem_unittest.py index dc68e0e0..44ca7257 100644 --- a/pyfakefs/fake_filesystem_unittest.py +++ b/pyfakefs/fake_filesystem_unittest.py @@ -47,6 +47,8 @@ from pyfakefs.deprecator import Deprecator from pyfakefs.fake_filesystem import set_uid, set_gid, reset_ids from pyfakefs.helpers import IS_PYPY +from pyfakefs.patched_packages import get_modules_to_patch, \ + get_classes_to_patch, get_fake_module_classes try: from importlib.machinery import ModuleSpec @@ -74,7 +76,8 @@ def patchfs(_func=None, *, additional_skip_names=None, modules_to_reload=None, modules_to_patch=None, - allow_root_user=True): + allow_root_user=True, + use_known_patches=True): """Convenience decorator to use patcher with additional parameters in a test function. @@ -96,7 +99,8 @@ def wrapped(*args, **kwargs): additional_skip_names=additional_skip_names, modules_to_reload=modules_to_reload, modules_to_patch=modules_to_patch, - allow_root_user=allow_root_user) as p: + allow_root_user=allow_root_user, + use_known_patches=use_known_patches) as p: kwargs['fs'] = p.fs return f(*args, **kwargs) @@ -117,7 +121,8 @@ def load_doctests(loader, tests, ignore, module, additional_skip_names=None, modules_to_reload=None, modules_to_patch=None, - allow_root_user=True): # pylint: disable=unused-argument + allow_root_user=True, + use_known_patches=True): # pylint: disable=unused-argument """Load the doctest tests for the specified module into unittest. Args: loader, tests, ignore : arguments passed in from `load_tests()` @@ -129,7 +134,8 @@ def load_doctests(loader, tests, ignore, module, _patcher = Patcher(additional_skip_names=additional_skip_names, modules_to_reload=modules_to_reload, modules_to_patch=modules_to_patch, - allow_root_user=allow_root_user) + allow_root_user=allow_root_user, + use_known_patches=use_known_patches) globs = _patcher.replace_globs(vars(module)) tests.addTests(doctest.DocTestSuite(module, globs=globs, @@ -155,6 +161,8 @@ class TestCaseMixin: modules_to_patch: A dictionary of fake modules mapped to the fully qualified patched module names. Can be used to add patching of modules not provided by `pyfakefs`. + use_known_patches: If True (the default), some patches for commonly + used packges are applied which make them usable with pyfakes. If you specify some of these attributes here and you have DocTests, consider also specifying the same arguments to :py:func:`load_doctests`. @@ -190,7 +198,8 @@ def setUpPyfakefs(self, additional_skip_names=None, modules_to_reload=None, modules_to_patch=None, - allow_root_user=True): + allow_root_user=True, + use_known_patches=True): """Bind the file-related modules to the :py:class:`pyfakefs` fake file system instead of the real file system. Also bind the fake `open()` function. @@ -212,7 +221,8 @@ def setUpPyfakefs(self, additional_skip_names=additional_skip_names, modules_to_reload=modules_to_reload, modules_to_patch=modules_to_patch, - allow_root_user=allow_root_user + allow_root_user=allow_root_user, + use_known_patches=use_known_patches ) self._stubber.setUp() @@ -247,7 +257,8 @@ def __init__(self, methodName='runTest', additional_skip_names=None, modules_to_reload=None, modules_to_patch=None, - allow_root_user=True): + allow_root_user=True, + use_known_patches=True): """Creates the test class instance and the patcher used to stub out file system related modules. @@ -261,6 +272,7 @@ def __init__(self, methodName='runTest', self.modules_to_reload = modules_to_reload self.modules_to_patch = modules_to_patch self.allow_root_user = allow_root_user + self.use_known_patches = use_known_patches @Deprecator('add_real_file') def copyRealFile(self, real_file_path, fake_file_path=None, @@ -337,7 +349,7 @@ class Patcher: def __init__(self, additional_skip_names=None, modules_to_reload=None, modules_to_patch=None, - allow_root_user=True): + allow_root_user=True, use_known_patches=True): """For a description of the arguments, see TestCase.__init__""" if not allow_root_user: @@ -361,6 +373,12 @@ def __init__(self, additional_skip_names=None, self.modules_to_reload = modules_to_reload or [] + if use_known_patches: + modules_to_patch = modules_to_patch or {} + modules_to_patch.update(get_modules_to_patch()) + self._class_modules.update(get_classes_to_patch()) + self._fake_module_classes.update(get_fake_module_classes()) + if modules_to_patch is not None: for name, fake_module in modules_to_patch.items(): self._fake_module_classes[name] = fake_module @@ -516,7 +534,8 @@ def _find_modules(self): # where py.error has no __name__ attribute # see https://github.com/pytest-dev/py/issues/73 continue - + if name == 'pandas.io.parsers': + print(name) module_items = module.__dict__.copy().items() # suppress specific pytest warning - see #466 @@ -588,6 +607,8 @@ def start_patching(self): self._patching = True for name, modules in self._modules.items(): + if name == 'TextFileReader': + print(name, modules) for module, attr in modules: self._stubs.smart_set( module, name, self.fake_modules[attr]) diff --git a/pyfakefs/patched_packages.py b/pyfakefs/patched_packages.py new file mode 100644 index 00000000..f115839a --- /dev/null +++ b/pyfakefs/patched_packages.py @@ -0,0 +1,109 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Provides patches for some commonly used modules that enable them to work +with pyfakefs. +""" +import sys + +try: + import pandas.io.parsers as parsers +except ImportError: + parsers = None + +try: + import xlrd +except ImportError: + xlrd = None + + +def get_modules_to_patch(): + modules_to_patch = {} + if xlrd is not None: + modules_to_patch['xlrd'] = XLRDModule + return modules_to_patch + + +def get_classes_to_patch(): + classes_to_patch = {} + if parsers is not None: + classes_to_patch[ + 'TextFileReader' + ] = 'pandas.io.parsers' + return classes_to_patch + + +def get_fake_module_classes(): + fake_module_classes = {} + if parsers is not None: + fake_module_classes[ + 'TextFileReader' + ] = FakeTextFileReader + return fake_module_classes + + +if xlrd is not None: + class XLRDModule: + """Patches the xlrd module, which is used as the default Excel file + reader by pandas. Disables using memory mapped files, which are + implemented platform-specific on OS level.""" + + def __init__(self, _): + self._xlrd_module = xlrd + + def open_workbook(self, filename=None, + logfile=sys.stdout, + verbosity=0, + use_mmap=False, + file_contents=None, + encoding_override=None, + formatting_info=False, + on_demand=False, + ragged_rows=False): + return self._xlrd_module.open_workbook( + filename, logfile, verbosity, False, file_contents, + encoding_override, formatting_info, on_demand, ragged_rows) + + def __getattr__(self, name): + """Forwards any unfaked calls to the standard xlrd module.""" + return getattr(self._xlrd_module, name) + +if parsers is not None: + # we currently need to add fake modules for both the parser module and + # the contained text reader - maybe this can be simplified + + class FakeTextFileReader: + fake_parsers = None + + def __init__(self, filesystem): + if self.fake_parsers is None: + self.__class__.fake_parsers = ParsersModule(filesystem) + + def __call__(self, *args, **kwargs): + return self.fake_parsers.TextFileReader(*args, **kwargs) + + def __getattr__(self, name): + return getattr(self.fake_parsers.TextFileReader, name) + + class ParsersModule: + def __init__(self, _): + self._parsers_module = parsers + + class TextFileReader(parsers.TextFileReader): + def __init__(self, *args, **kwargs): + kwargs['engine'] = 'python' + super().__init__(*args, **kwargs) + + def __getattr__(self, name): + """Forwards any unfaked calls to the standard xlrd module.""" + return getattr(self._parsers_module, name) diff --git a/pyfakefs/tests/all_tests.py b/pyfakefs/tests/all_tests.py index d0971083..331a3d17 100644 --- a/pyfakefs/tests/all_tests.py +++ b/pyfakefs/tests/all_tests.py @@ -18,18 +18,22 @@ import sys import unittest -from pyfakefs.tests import dynamic_patch_test, fake_stat_time_test -from pyfakefs.tests import example_test -from pyfakefs.tests import fake_filesystem_glob_test -from pyfakefs.tests import fake_filesystem_shutil_test -from pyfakefs.tests import fake_filesystem_test -from pyfakefs.tests import fake_filesystem_unittest_test -from pyfakefs.tests import fake_filesystem_vs_real_test -from pyfakefs.tests import fake_open_test -from pyfakefs.tests import fake_os_test -from pyfakefs.tests import fake_pathlib_test -from pyfakefs.tests import fake_tempfile_test -from pyfakefs.tests import mox3_stubout_test +from pyfakefs.tests import ( + dynamic_patch_test, + fake_stat_time_test, + example_test, + fake_filesystem_glob_test, + fake_filesystem_shutil_test, + fake_filesystem_test, + fake_filesystem_unittest_test, + fake_filesystem_vs_real_test, + fake_open_test, + fake_os_test, + fake_pathlib_test, + fake_tempfile_test, + patched_packages_test, + mox3_stubout_test +) class AllTests(unittest.TestSuite): @@ -50,7 +54,8 @@ def suite(self): # pylint: disable-msg=C6409 loader.loadTestsFromModule(example_test), loader.loadTestsFromModule(mox3_stubout_test), loader.loadTestsFromModule(dynamic_patch_test), - loader.loadTestsFromModule(fake_pathlib_test) + loader.loadTestsFromModule(fake_pathlib_test), + loader.loadTestsFromModule(patched_packages_test) ]) return self diff --git a/pyfakefs/tests/fixtures/excel_test.xlsx b/pyfakefs/tests/fixtures/excel_test.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..6b6b64d503229784f6611ef35f13df36f8b37dc3 GIT binary patch literal 4790 zcmaJ_XIPWl(hWi&5Q=n&^eWN?X$J)hJ#+-5w;)Xd2}OE~NH0>9DpI6KkgC$9OO+;| z0qF`z2-21I1<(C1T<<;3`#gE`BeQq*%$iwiwvGlN5gh;o0s)LNdUOD142=J7iGaBv zMMX|N(U0CcwTY2G#08M$`OQ~95LeE$=*f9~xsGJeg;zZC*vi>-XGg-e+%vhnB`!7S z=N9Ma>RstbM)i-xFRk=HaJ>6wRf?mR&&G|vE{7ZS6KcNWeMEAh>f+UFf=o~5vy$&n zEr=O$J`z|Z(>Bp~NzFQRGusc)PE0CfH_E>!qm+;#;jy#2X6=r2%J&A?J}@MWR_5GPq_3lM$|#4&rzzNGE0w8>)P$n4S}HRZ>pOHa z%*(b1^7ec9N|jx{Ubl*dKwMSofJ%w#8R6V%_7^6`0HXIOf@8_?E~7^Aj9$V~;N zx{_N2&gQEZbSA=>5?CS|(MrxA5xS(L+5tD&;F$LfoSy_91Hq{;tD7OBUpri;2o>(UcT>jWp7Q< z#ThywR2P@_P(EmhFST4vb^MaGP#qWL1ATsy9N#P@3Lsn#(R-mO8j zrRKq^RIh`_vRvap;GIst4O-X+8&S#bb7flh=< z`OFak0N_LTC+JZ84IQM1j|&WW0vxQdwl+qLr4grZ+!W8lOZQ}$qjS{pF=;Fpr{-Tz z+SP;(_$$pF_O|wAwMI~_e4Y>Ki}^94&ASI1t3_T((J3=aiB4_FnX^k5+ar3Qi_laF z|N19E@@?;oxsa_X`4W>lVWPuSkrfn$()0xkA)%5IW_L`3$j&9*HaL&Wce*sI_NT8$QSOGdFty0;SaggIm*6Qk>Bxg#0#Wn!!8B$x(}1+}kbY#e+`lNWIPG4rfGJ z6gGetL*Nqhweg#8fve(5x-@d}R;k4><=$j!OpeBYy1-I&t20`t-(^3o#w|U;ld~k* z{yhhlp?vjK&Hg3fUY3PvMs5wGorjg}FMmH_1Pm(4zTw!e z(E1KgS}T!lo1V(bpn6nqdD%5>E6*y!tpLUG?dOu+Et$Zi;K1-^-(ini=}qb;PMWZ> zt;`W>{v+DElH(sEynjM)=_}-8Ifa?=7mrrzNok2QnpK9UPtY5q9Zna9Z+f=We?l+Q zY2!mWz+fIo(O)m|6CgJlY9cXG=Nos8-0r_CrstKb)PHExnNq?h5w{qbY0oDeVI0L2 zW=sqym}tHRULVmb99ia+Gj3h8*~Oa9v~<_w3dkvQVijvZcat2Mf_oCcHI}HS>|@Q1 zq=dF=9vI?ydHE* zs~pKkk?_*SZzA;2%%?fvt>0yjwTqg$UdVK}>+~#wA>sbipZm&;Dx%j3g1|EAL%6qe z_swVrOfHWUY<4zR&%C}ta0gvxI?pdJqT9qT6|q+_FX*sJVX_LoaUWcZ)i-&C2}Sjt zW5;1e4fPVS`n5Md^3NnxnV67-^gLTzCArZ6O3Ayer(S9n$*a2ji9D>YBQWRS@yoUu z=88#sD2-axvPX5(CPo__H(F>e!*n1^>q26uFurKQcgA$WiC0mXX zZTxuu*js;RVj8MVL}XWU{ZQ#)_cPts(DPtRqSwILbj>e}n;#D@XYa}n8GH_#&z$^@ z8aR~D%&V$Cu)Vn@%%Z5*KqC1{_rde#C15ba`=~pqe2{p8{#J4G3(+vz(Z@_HjG;)K z#=OWQMnyIXP3_8_H=9GOKl@UUNms6gN(ol74d0<*1v!o>AGotoH^&%ftC%&!7-y+8 ztZ{R#G{6;(y0Kl{R-J?y+0{X8?l%TE38Vvu{W11k3{WYr<{20i(XdA z*B#$Hy@N2YAnUfb1yc@|8S}bV>2jpn0a?_-5rE@cLak&qAD{_?t317IaJpkX5JrK3 zVBCf0HFNg>jmGX6Q{j^+mzt1mfw~dh(n}Q}Q8Vr-a$=3}6gP=1SrN#qvFyCyX!$}} zfp42s338j^n+jBIv8uES4L-MqjY>;DiyTfHO*>V?*_VD?ZehKu`Lx__WgH#Ub_L#W z&5dFIWd;vq<4rO}`f(Zp+-^U^xO>e#LpZV%JzRNA!gWY+QpsI$30F{f4nl$cRLOMz z;volX1k6s~1L5d;_ZJ<-zHcFG6(c8}sn^y;cxQ2O<)KMh_y zamlwEw=Fm8pe}}%i32k4(XM5JJd*QV`Lg*uD?9oBk0tf8*G<)G*)vQD00D+47NI$ zG&sK5q`JK#8M_kEev_QIB$wzxONSSzG30O!JIid?pZj^nfj7RKo#sUY?L;u6ny5+D zrpz4?o7RNC%=5d|wTj2P>%y?QqP-hxBbfyK^!jL1!fIDRoqA;Jak=d{1*K)4-&3)7 z6r}wKwnT{`y?C1$LD9=3Tlan}UR_k+Vx8dbc^URpPs*G0gQYzi&KC?P#h`TpR}XkL zH(sYHpp@+H#)aNEK)bu8GvKD9gb5MFAfq2rKcLnj-aj(XG8N^WC*0zm3wHOgR8x?Xk&y; z!}6sz$QHYA+wyp_rIR`LG&uleV&};<+>-TG+?|qdG-ix5;(V*e`JQ z0X*Ll!WDla2sh{3VCe~4;m}9BEXTgnkR5m1p46)Z?8x#y5cYt= zZ$qVZKE1?gp3wpUx=WR12dybAE%pQlZKVJLz1P_o5jRyfp6nm1GS?0aC_~8WdWc`; z9{RzC=~=BYHBB+$H1h{nV`KckJDa|D&bSwE zuTCjOf_2fTgsIb%4-4G$7WP?YW1vtwNA{w9L8y9xD8%Ilgi->S1Z%;|4>T{}L<7zYwmpdl>c3l&&5Ya$bEH7|&~v zTbdZjAL)3IfKA=OF&FCG=B?)7GBlJ&;?mM42A(^A;1hJSfMg|+vL+FEIHE>9wX(l> zFY~4`4_PWMqSw6f3kjv-v zP2j2zi4w&mOqRL(^(x7#mA&?%PN8+ZC#bll~|Y@YjYqd6a(^M#-bn;&e~Y5wLb8SMme6*UyHzs!v`-LapX>v#gZ zdToa-z4+3dP_ae5XDHuq!Qg2C(T8Ezf+&rEP+Z${pn_zh#NqX?S#PUc)5y)*2aZRH zAZv>9#1F?^)~~PnJuR~JTitoz7Q}L0$p1^s_TFWgy@1`P4MkeQkPz_3-pgs~p`Y*P z!|#s+BVMlRcGf+tyian1?&QiocXNDZjDRP7ncy59;Pi^&?2vtO#qghY=EC83$Fn2W z$qasKO!%#So5+86KC5s~rm$1f$Fn8g`E(}x-SsS`o=D(RqbC0MzW*hNe_!P+Rh`i9 zsbNU}{sX_0@$V~~ZRsbJc52;ZzgGAMg8O})v#sZZ08VWlpO62dgWuiHR^CZ}I<+u- qVE^s@uWt3b_gQ3|ROhL+QU1?b)zJXrqYMC$;vWTkv$X!T=l=odUi&Km literal 0 HcmV?d00001 diff --git a/pyfakefs/tests/patched_packages_test.py b/pyfakefs/tests/patched_packages_test.py new file mode 100644 index 00000000..bb2ed6d1 --- /dev/null +++ b/pyfakefs/tests/patched_packages_test.py @@ -0,0 +1,52 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Provides patches for some commonly used modules that enable them to work +with pyfakefs. +""" +import os + +from pyfakefs import fake_filesystem_unittest + +try: + import pandas as pd +except ImportError: + pd = None + +try: + import xlrd +except ImportError: + xlrd = None + + +class TestPatchedPackages(fake_filesystem_unittest.TestCase): + def setUp(self): + self.setUpPyfakefs() + + if pd is not None: + def test_load_csv(self): + path = '/foo/bar.csv' + self.fs.create_file(path, contents='1,2,3,4') + df = pd.read_csv(path) + assert (df.columns == ['1', '2', '3', '4']).all() + + if pd is not None and xlrd is not None: + def test_load_excel(self): + path = '/foo/bar.xlsx' + src_path = os.path.dirname(os.path.abspath(__file__)) + src_path = os.path.join(src_path, 'fixtures', 'excel_test.xlsx') + # map the file into another location to be sure that + # the real fs is not used + self.fs.add_real_file(src_path, target_path=path) + df = pd.read_excel(path) + assert (df.columns == [1, 2, 3, 4]).all()