Skip to content

Commit

Permalink
Added partial support for pandas to work with pyfakefs
Browse files Browse the repository at this point in the history
- support for read_csv and read_excel (with the default module)
- see pytest-dev#531
  • Loading branch information
mrbean-bremen committed Jun 13, 2020
1 parent d543219 commit 520914f
Show file tree
Hide file tree
Showing 8 changed files with 230 additions and 26 deletions.
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ The released versions correspond to PyPi releases.

## Version 4.1.0 (as yet unreleased)

#### New Features
* Added some support for pandas (`read_csv`, `read_excel`) to work with
the fake filesystem (see [#531](../../issues/531))

#### Fixes
* Do not override global warnings setting in `Deprecator`
(see [#526](../../issues/526))
Expand Down
14 changes: 11 additions & 3 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,17 @@ if the real user is a root user (e.g. has the user ID 0). If you want to run
your tests as a non-root user regardless of the actual user rights, you may
want to set this to ``False``.

use_known_patches
~~~~~~~~~~~~~~~~~
If this is set to ``True`` (the default), ``pyfakefs`` patches some
libraries that are known to not work out of the box, to be able work with the
fake filesystem. Currently, this includes patches for the ``pandas`` methods
``read_csv`` and ``read_excel`` - more may follow. This flag is
there to be able to disable this functionality in case it causes any
problems. It may be removed or replaced by a more fine-grained argument in
future releases.


Using convenience methods
-------------------------
While ``pyfakefs`` can be used just with the standard Python file system
Expand Down Expand Up @@ -605,9 +616,6 @@ A list of Python modules that are known to not work correctly with
sufficient demand.
- the ``Pillow`` image library does not work with pyfakefs at least if writing
JPEG files (see `this issue <https://github.com/jmcgeheeiv/pyfakefs/issues/529>`__)
- ``pandas`` (the Python data analysis library) uses its own internal file
system access, written in C, and does therefore not work with pyfakefs
(see `this issue <https://github.com/jmcgeheeiv/pyfakefs/issues/528>`__)
If you are not sure if a module can be handled, or how to do it, you can
always write a new issue, of course!
Expand Down
7 changes: 6 additions & 1 deletion extra_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,10 @@
# available at the time of writing.

pathlib2>=2.3.2

scandir>=1.8

# pandas + xlrd are used to test pandas-specific patches to allow
# pyfakefs to work with pandas
# we use the latest version to see any problems with new versions
pandas
xlrd
39 changes: 30 additions & 9 deletions pyfakefs/fake_filesystem_unittest.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
from pyfakefs.deprecator import Deprecator
from pyfakefs.fake_filesystem import set_uid, set_gid, reset_ids
from pyfakefs.helpers import IS_PYPY
from pyfakefs.patched_packages import get_modules_to_patch, \
get_classes_to_patch, get_fake_module_classes

try:
from importlib.machinery import ModuleSpec
Expand Down Expand Up @@ -74,7 +76,8 @@ def patchfs(_func=None, *,
additional_skip_names=None,
modules_to_reload=None,
modules_to_patch=None,
allow_root_user=True):
allow_root_user=True,
use_known_patches=True):
"""Convenience decorator to use patcher with additional parameters in a
test function.
Expand All @@ -96,7 +99,8 @@ def wrapped(*args, **kwargs):
additional_skip_names=additional_skip_names,
modules_to_reload=modules_to_reload,
modules_to_patch=modules_to_patch,
allow_root_user=allow_root_user) as p:
allow_root_user=allow_root_user,
use_known_patches=use_known_patches) as p:
kwargs['fs'] = p.fs
return f(*args, **kwargs)

Expand All @@ -117,7 +121,8 @@ def load_doctests(loader, tests, ignore, module,
additional_skip_names=None,
modules_to_reload=None,
modules_to_patch=None,
allow_root_user=True): # pylint: disable=unused-argument
allow_root_user=True,
use_known_patches=True): # pylint: disable=unused-argument
"""Load the doctest tests for the specified module into unittest.
Args:
loader, tests, ignore : arguments passed in from `load_tests()`
Expand All @@ -129,7 +134,8 @@ def load_doctests(loader, tests, ignore, module,
_patcher = Patcher(additional_skip_names=additional_skip_names,
modules_to_reload=modules_to_reload,
modules_to_patch=modules_to_patch,
allow_root_user=allow_root_user)
allow_root_user=allow_root_user,
use_known_patches=use_known_patches)
globs = _patcher.replace_globs(vars(module))
tests.addTests(doctest.DocTestSuite(module,
globs=globs,
Expand All @@ -155,6 +161,8 @@ class TestCaseMixin:
modules_to_patch: A dictionary of fake modules mapped to the
fully qualified patched module names. Can be used to add patching
of modules not provided by `pyfakefs`.
use_known_patches: If True (the default), some patches for commonly
used packges are applied which make them usable with pyfakes.
If you specify some of these attributes here and you have DocTests,
consider also specifying the same arguments to :py:func:`load_doctests`.
Expand Down Expand Up @@ -190,7 +198,8 @@ def setUpPyfakefs(self,
additional_skip_names=None,
modules_to_reload=None,
modules_to_patch=None,
allow_root_user=True):
allow_root_user=True,
use_known_patches=True):
"""Bind the file-related modules to the :py:class:`pyfakefs` fake file
system instead of the real file system. Also bind the fake `open()`
function.
Expand All @@ -212,7 +221,8 @@ def setUpPyfakefs(self,
additional_skip_names=additional_skip_names,
modules_to_reload=modules_to_reload,
modules_to_patch=modules_to_patch,
allow_root_user=allow_root_user
allow_root_user=allow_root_user,
use_known_patches=use_known_patches
)

self._stubber.setUp()
Expand Down Expand Up @@ -247,7 +257,8 @@ def __init__(self, methodName='runTest',
additional_skip_names=None,
modules_to_reload=None,
modules_to_patch=None,
allow_root_user=True):
allow_root_user=True,
use_known_patches=True):
"""Creates the test class instance and the patcher used to stub out
file system related modules.
Expand All @@ -261,6 +272,7 @@ def __init__(self, methodName='runTest',
self.modules_to_reload = modules_to_reload
self.modules_to_patch = modules_to_patch
self.allow_root_user = allow_root_user
self.use_known_patches = use_known_patches

@Deprecator('add_real_file')
def copyRealFile(self, real_file_path, fake_file_path=None,
Expand Down Expand Up @@ -337,7 +349,7 @@ class Patcher:

def __init__(self, additional_skip_names=None,
modules_to_reload=None, modules_to_patch=None,
allow_root_user=True):
allow_root_user=True, use_known_patches=True):
"""For a description of the arguments, see TestCase.__init__"""

if not allow_root_user:
Expand All @@ -361,6 +373,12 @@ def __init__(self, additional_skip_names=None,

self.modules_to_reload = modules_to_reload or []

if use_known_patches:
modules_to_patch = modules_to_patch or {}
modules_to_patch.update(get_modules_to_patch())
self._class_modules.update(get_classes_to_patch())
self._fake_module_classes.update(get_fake_module_classes())

if modules_to_patch is not None:
for name, fake_module in modules_to_patch.items():
self._fake_module_classes[name] = fake_module
Expand Down Expand Up @@ -516,7 +534,8 @@ def _find_modules(self):
# where py.error has no __name__ attribute
# see https://github.com/pytest-dev/py/issues/73
continue

if name == 'pandas.io.parsers':
print(name)
module_items = module.__dict__.copy().items()

# suppress specific pytest warning - see #466
Expand Down Expand Up @@ -588,6 +607,8 @@ def start_patching(self):
self._patching = True

for name, modules in self._modules.items():
if name == 'TextFileReader':
print(name, modules)
for module, attr in modules:
self._stubs.smart_set(
module, name, self.fake_modules[attr])
Expand Down
109 changes: 109 additions & 0 deletions pyfakefs/patched_packages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Provides patches for some commonly used modules that enable them to work
with pyfakefs.
"""
import sys

try:
import pandas.io.parsers as parsers
except ImportError:
parsers = None

try:
import xlrd
except ImportError:
xlrd = None


def get_modules_to_patch():
modules_to_patch = {}
if xlrd is not None:
modules_to_patch['xlrd'] = XLRDModule
return modules_to_patch


def get_classes_to_patch():
classes_to_patch = {}
if parsers is not None:
classes_to_patch[
'TextFileReader'
] = 'pandas.io.parsers'
return classes_to_patch


def get_fake_module_classes():
fake_module_classes = {}
if parsers is not None:
fake_module_classes[
'TextFileReader'
] = FakeTextFileReader
return fake_module_classes


if xlrd is not None:
class XLRDModule:
"""Patches the xlrd module, which is used as the default Excel file
reader by pandas. Disables using memory mapped files, which are
implemented platform-specific on OS level."""

def __init__(self, _):
self._xlrd_module = xlrd

def open_workbook(self, filename=None,
logfile=sys.stdout,
verbosity=0,
use_mmap=False,
file_contents=None,
encoding_override=None,
formatting_info=False,
on_demand=False,
ragged_rows=False):
return self._xlrd_module.open_workbook(
filename, logfile, verbosity, False, file_contents,
encoding_override, formatting_info, on_demand, ragged_rows)

def __getattr__(self, name):
"""Forwards any unfaked calls to the standard xlrd module."""
return getattr(self._xlrd_module, name)

if parsers is not None:
# we currently need to add fake modules for both the parser module and
# the contained text reader - maybe this can be simplified

class FakeTextFileReader:
fake_parsers = None

def __init__(self, filesystem):
if self.fake_parsers is None:
self.__class__.fake_parsers = ParsersModule(filesystem)

def __call__(self, *args, **kwargs):
return self.fake_parsers.TextFileReader(*args, **kwargs)

def __getattr__(self, name):
return getattr(self.fake_parsers.TextFileReader, name)

class ParsersModule:
def __init__(self, _):
self._parsers_module = parsers

class TextFileReader(parsers.TextFileReader):
def __init__(self, *args, **kwargs):
kwargs['engine'] = 'python'
super().__init__(*args, **kwargs)

def __getattr__(self, name):
"""Forwards any unfaked calls to the standard xlrd module."""
return getattr(self._parsers_module, name)
31 changes: 18 additions & 13 deletions pyfakefs/tests/all_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,22 @@
import sys
import unittest

from pyfakefs.tests import dynamic_patch_test, fake_stat_time_test
from pyfakefs.tests import example_test
from pyfakefs.tests import fake_filesystem_glob_test
from pyfakefs.tests import fake_filesystem_shutil_test
from pyfakefs.tests import fake_filesystem_test
from pyfakefs.tests import fake_filesystem_unittest_test
from pyfakefs.tests import fake_filesystem_vs_real_test
from pyfakefs.tests import fake_open_test
from pyfakefs.tests import fake_os_test
from pyfakefs.tests import fake_pathlib_test
from pyfakefs.tests import fake_tempfile_test
from pyfakefs.tests import mox3_stubout_test
from pyfakefs.tests import (
dynamic_patch_test,
fake_stat_time_test,
example_test,
fake_filesystem_glob_test,
fake_filesystem_shutil_test,
fake_filesystem_test,
fake_filesystem_unittest_test,
fake_filesystem_vs_real_test,
fake_open_test,
fake_os_test,
fake_pathlib_test,
fake_tempfile_test,
patched_packages_test,
mox3_stubout_test
)


class AllTests(unittest.TestSuite):
Expand All @@ -50,7 +54,8 @@ def suite(self): # pylint: disable-msg=C6409
loader.loadTestsFromModule(example_test),
loader.loadTestsFromModule(mox3_stubout_test),
loader.loadTestsFromModule(dynamic_patch_test),
loader.loadTestsFromModule(fake_pathlib_test)
loader.loadTestsFromModule(fake_pathlib_test),
loader.loadTestsFromModule(patched_packages_test)
])
return self

Expand Down
Binary file added pyfakefs/tests/fixtures/excel_test.xlsx
Binary file not shown.
52 changes: 52 additions & 0 deletions pyfakefs/tests/patched_packages_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Provides patches for some commonly used modules that enable them to work
with pyfakefs.
"""
import os

from pyfakefs import fake_filesystem_unittest

try:
import pandas as pd
except ImportError:
pd = None

try:
import xlrd
except ImportError:
xlrd = None


class TestPatchedPackages(fake_filesystem_unittest.TestCase):
def setUp(self):
self.setUpPyfakefs()

if pd is not None:
def test_load_csv(self):
path = '/foo/bar.csv'
self.fs.create_file(path, contents='1,2,3,4')
df = pd.read_csv(path)
assert (df.columns == ['1', '2', '3', '4']).all()

if pd is not None and xlrd is not None:
def test_load_excel(self):
path = '/foo/bar.xlsx'
src_path = os.path.dirname(os.path.abspath(__file__))
src_path = os.path.join(src_path, 'fixtures', 'excel_test.xlsx')
# map the file into another location to be sure that
# the real fs is not used
self.fs.add_real_file(src_path, target_path=path)
df = pd.read_excel(path)
assert (df.columns == [1, 2, 3, 4]).all()

0 comments on commit 520914f

Please sign in to comment.