Merge pull request #54 from Sage-Bionetworks/develop

Merge develop into master closes #44
Sage-Bionetworks · Feb 1, 2018 · 7f31277 · 7f31277
2 parents 48d758a + da6e2a7
commit 7f31277
Show file tree

Hide file tree

Showing 13 changed files with 758 additions and 24 deletions.
diff --git a/annotator/Pipeline.py b/annotator/Pipeline.py
@@ -2,7 +2,6 @@
 import pandas as pd
 import synapseclient as sc
 import readline
-import json
 from . import utils
 from . import schema as schemaModule
 from copy import deepcopy
@@ -44,9 +43,10 @@ def __init__(self, syn, view=None, meta=None, activeCols=[],
         """
         self.syn = syn
         self.view = view if view is None else self._parseView(view, sortCols)
-        self._entityViewSchema = self.syn.get(view) if isinstance(view, str) else None
+        self._entityViewSchema = (self.syn.get(view)
+                                  if isinstance(view, str) else None)
         self.schema = (schemaModule.flattenJson(schema)
-                        if isinstance(schema, str) else schema)
+                       if isinstance(schema, str) else schema)
         self._index = self.view.index if isinstance(
                 self.view, pd.DataFrame) else None
         self._activeCols = []
@@ -102,6 +102,38 @@ def shape(self):
         else:
             print("No data view set.")
 
+
+    def drop(self, labels, axis):
+        """ Delete rows or columns from a file view on Synapse.*
+            Rows are only dropped locally. Deleting rows from a
+            file view on Synapse would require deleting the file itself.
+            Columns are dropped both locally and remotely on Synapse.
+
+        Parameters
+        ----------
+        labels : str, list
+            Can either be a str indicating the index (usually formatted
+            ROWID_VERSION) or a list of str.
+            axis : int
+            For a two-dimensional dataframe, 0 indicates rows whereas
+            1 indicates columns.
+
+        Returns
+        -------
+        A list of indices deleted.
+        """
+        labels = [labels] if isinstance(labels, str) else labels
+        if axis == 0:
+            self._index = self._index.drop(labels)
+        elif axis == 1:
+            self._entityViewSchema = utils.dropColumns(
+                    self.syn, self._entityViewSchema, labels)
+            if isinstance(self.schema, pd.DataFrame):
+                self.schema = self.schema[[l not in labels
+                                           for l in self.schema.key]]
+        self.view = self.view.drop(labels, axis=axis)
+
+
     def metaHead(self):
         """ Print head of `self._meta` """
         if hasattr(self._meta, 'head'):
@@ -175,6 +207,32 @@ def metaActiveColumns(self, style="numbers"):
         else:
             print("No active columns.")
 
+
+    def addView(self, scope):
+        """ Add further Folders/Projects to the scope of `self.view`.
+
+        Parameters
+        ----------
+        scope : str, list
+            The Synapse IDs of the entites to add to the scope.
+
+        Returns
+        -------
+        synapseclient.Schema
+        """
+        self._entityViewSchema = utils.addToScope(self.syn,
+                self._entityViewSchema, scope)
+        # Assuming row version/id values stay the same for the before-update
+        # rows, we can carry over values from the old view.
+        oldIndices = self._index
+        oldColumns = self.view.columns
+        newView = utils.synread(self.syn, self._entityViewSchema.id, silent=True)
+        for c in oldColumns:
+            newView.loc[oldIndices,c] = self.view[c].values
+        self.view = newView
+        self._index = self.view.index
+
+
     def addActiveCols(self, activeCols, path=False, isMeta=False, backup=True):
         """ Add column names to `self._activeCols` or `self._metaActiveCols`.
 
@@ -411,7 +469,7 @@ def _parseView(self, view, sortCols, isMeta=False):
         TypeError if view is not a str, list, or pandas.DataFrame
         """
         if isinstance(view, str):
-            return utils.synread(self.syn, view, sortCols)
+            return utils.synread(self.syn, view, sortCols=sortCols)
         elif isinstance(view, list) and meta:
             return utils.combineSynapseTabulars(self.syn, view, axis=1)
         elif isinstance(view, pd.DataFrame):
@@ -612,11 +670,7 @@ def createFileView(self, name, parent, scope, addCols=None, schema=None):
         self.backup("createFileView")
 
         # Fetch default keys, plus any preexisting annotation keys
-        if isinstance(scope, str):
-            scope = [scope]
-        params = {'scope': scope, 'viewType': 'file'}
-        cols = self.syn.restPOST('/column/view/scope',
-                                 json.dumps(params))['results']
+        cols = utils.getDefaultColumnsForScope(self.syn, scope)
 
         # Store flattened schema, add keys to active columns list.
         if self.schema is None:
@@ -627,7 +681,7 @@ def createFileView(self, name, parent, scope, addCols=None, schema=None):
             for k in self.schema.index.unique():
                 self.addActiveCols(k)
             schemaCols = utils.makeColumns(list(self.schema.index.unique()),
-                    asSynapseCols=False)
+                                           asSynapseCols=False)
             cols = self._getUniqueCols(schemaCols, cols)
 
         # Add keys defined during initialization
@@ -638,7 +692,7 @@ def createFileView(self, name, parent, scope, addCols=None, schema=None):
 
         # Add keys passed to addCols
         if addCols:
-            if isinstance(addCols, dict) and addCols[k] is None:
+            if isinstance(addCols, dict):
                 unspecifiedCols = [k for k in addCols if addCols[k] is None]
                 self.addActiveCols(unspecifiedCols)
             elif isinstance(addCols, list):
@@ -650,7 +704,7 @@ def createFileView(self, name, parent, scope, addCols=None, schema=None):
         # are added to `self.view` but not yet stored to Synapse.
         cols = [sc.Column(**c) for c in cols]
         entityViewSchema = sc.EntityViewSchema(name=name, columns=cols,
-                                     parent=parent, scopes=scope)
+                                               parent=parent, scopes=scope)
         self._entityViewSchema = self.syn.store(entityViewSchema)
         self.view = utils.synread(self.syn, self._entityViewSchema.id)
         self._index = self.view.index

diff --git a/annotator/schema.py b/annotator/schema.py
@@ -6,6 +6,7 @@
 from . import utils
 
 
+
 def getAnnotationsRelease():
     """
 

diff --git a/annotator/utils.py b/annotator/utils.py
@@ -2,9 +2,11 @@
 import pandas as pd
 import synapseclient as sc
 import re
+import json
 
 
-def synread(syn_, obj, sortCols=True):
+def synread(syn_, obj, silent=True, sortCols=True):
+
     """ A simple way to read in Synapse entities to pandas.DataFrame objects.
 
     Parameters
@@ -27,10 +29,11 @@ def synread(syn_, obj, sortCols=True):
     elif isinstance(obj, str):
         f = syn_.get(obj)
         d = _synread(obj, f, syn_, sortCols)
-        if hasattr(d, 'head'):
-            print(d.head())
-        if hasattr(d, 'shape'):
-            print("Full size:", d.shape)
+        if not silent:
+            if hasattr(d, 'head'):
+                print(d.head())
+            if hasattr(d, 'shape'):
+                print("Full size:", d.shape)
     else:  # is list-like
         files = list(map(syn_.get, obj))
         d = [_synread(synId_, f, syn_, sortCols)
@@ -87,12 +90,15 @@ def _keyValCols(keys, values, asSynapseCols):
     -------
     A list of dictionaries compatible with synapseclient.Column objects.
     """
+    sanitize = lambda v : v if pd.notnull(v) else ''
+    keys = list(map(sanitize, keys))
+    values = list(map(sanitize, values))
     val_length = map(lambda v: len(v) if v else 50, values)
     cols = [{'name': k, 'maximumSize': l,
              'columnType': "STRING", "defaultValue": v}
             for k, v, l in zip(keys, values, val_length)]
     if asSynapseCols:
-        cols = list(map(sc.Column, cols))
+        cols = list(map(lambda c: sc.Column(**c), cols))
     return cols
 
 
@@ -110,8 +116,8 @@ def _colsFromFile(fromFile, asSynapseCols):
     -------
     A list of dictionaries compatible with synapseclient.Column objects.
     """
-    f = pd.read_csv(fromFile, header=None)
-    return _keyValCols(f[0].values, f[1].values, asSynapseCols)
+    f = pd.read_csv(fromFile, names=['keys', 'values'])
+    return _keyValCols(f['keys'], f['values'], asSynapseCols)
 
 
 def _colsFromDict(d, asSynapseCols):
@@ -173,6 +179,89 @@ def makeColumns(obj, asSynapseCols=True):
         return _colsFromDict(obj, asSynapseCols)
     elif isinstance(obj, list):
         return _colsFromList(obj, asSynapseCols)
+    else:
+        raise TypeError("{} is not a supported type.".format(type(obj)))
+
+
+def dropColumns(syn, target, cols):
+    """ Delete columns from a file view on Synapse.
+
+    Parameters
+    ----------
+    syn : synapseclient.Synapse
+    target : str, synapseclient.Schema
+        The Synapse ID of a Synapse Table or File View, or its schema.
+    cols : str, list
+        A str or list of str indicating column names to drop.
+
+    Returns
+    -------
+    synapseclient.table.EntityViewSchema
+    """
+    cols = [cols] if isinstance(cols, str) else cols
+    schema = syn.get(target) if isinstance(target, str) else target
+    cols_ = syn.getTableColumns(schema.id)
+    for c in cols_:
+        if c.name in cols:
+            schema.removeColumn(c)
+    schema = syn.store(schema)
+    return schema
+
+
+def addToScope(syn, target, scope):
+    """ Add further Folders/Projects to the scope of a file view.
+
+    Parameters
+    ----------
+    syn : synapseclient.Synapse
+    target : str, synapseclient.Schema
+        The Synapse ID of the file view to update or its schema.
+    scope : str, list
+        The Synapse IDs of the entites to add to the scope.
+
+    Returns
+    -------
+    synapseclient.Schema
+    """
+    scope = [scope] if isinstance(scope, str) else scope
+    target = syn.get(target) if isinstance(target, str) else target
+    cols = list(syn.getTableColumns(target.id))
+    totalScope = target['scopeIds']
+    for s in scope:
+        totalScope.append(s)
+    # We need to preserve columns that are currently in the file view
+    # but aren't automatically created when synapseclient.EntityViewSchema'ing.
+    defaultCols = getDefaultColumnsForScope(syn, totalScope)
+    defaultCols = [sc.Column(**c) for c in defaultCols]
+    colNames = [c['name'] for c in cols]
+    for c in defaultCols: # Preexisting columns have priority over defaults
+        if c['name'] not in colNames:
+            cols.append(c)
+    schema = sc.EntityViewSchema(name=target.name, parent=target.parentId,
+            columns=cols, scopes=totalScope, add_default_columns=False)
+    schema = syn.store(schema)
+    return schema
+
+
+def getDefaultColumnsForScope(syn, scope):
+    """ Fetches the columns which would be used in the creation
+    of a file view with the given scope.
+
+    Parameters
+    ----------
+    syn : synapseclient.Synapse
+    scope : str, list
+        The Synapse IDs of the entites to fetch columns for.
+
+    Returns
+    -------
+    list of dict
+    """
+    scope = [scope] if isinstance(scope, str) else scope
+    params = {'scope': scope, 'viewType': 'file'}
+    cols = syn.restPOST('/column/view/scope',
+                             json.dumps(params))['results']
+    return cols
 
 
 def combineSynapseTabulars(syn, tabulars, axis=0):
@@ -256,9 +345,6 @@ def substituteColumnValues(referenceList, mod):
     """
     if isinstance(mod, dict):
         referenceList = [mod[v] if v in mod else v for v in referenceList]
-    else:
-        raise TypeError("{} is not a supported referenceList type".format(
-            type(referenceList)))
     return referenceList
Original file line number	Diff line number	Diff line change
Expand Up		@@ -6,6 +6,7 @@
		from . import utils



		def getAnnotationsRelease():
		"""

Expand Down