Skip to content

Commit

Permalink
Merge pull request #54 from Sage-Bionetworks/develop
Browse files Browse the repository at this point in the history
Merge develop into master closes #44
  • Loading branch information
philerooski authored Feb 1, 2018
2 parents 48d758a + da6e2a7 commit 7f31277
Show file tree
Hide file tree
Showing 13 changed files with 758 additions and 24 deletions.
78 changes: 66 additions & 12 deletions annotator/Pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import pandas as pd
import synapseclient as sc
import readline
import json
from . import utils
from . import schema as schemaModule
from copy import deepcopy
Expand Down Expand Up @@ -44,9 +43,10 @@ def __init__(self, syn, view=None, meta=None, activeCols=[],
"""
self.syn = syn
self.view = view if view is None else self._parseView(view, sortCols)
self._entityViewSchema = self.syn.get(view) if isinstance(view, str) else None
self._entityViewSchema = (self.syn.get(view)
if isinstance(view, str) else None)
self.schema = (schemaModule.flattenJson(schema)
if isinstance(schema, str) else schema)
if isinstance(schema, str) else schema)
self._index = self.view.index if isinstance(
self.view, pd.DataFrame) else None
self._activeCols = []
Expand Down Expand Up @@ -102,6 +102,38 @@ def shape(self):
else:
print("No data view set.")


def drop(self, labels, axis):
""" Delete rows or columns from a file view on Synapse.*
Rows are only dropped locally. Deleting rows from a
file view on Synapse would require deleting the file itself.
Columns are dropped both locally and remotely on Synapse.
Parameters
----------
labels : str, list
Can either be a str indicating the index (usually formatted
ROWID_VERSION) or a list of str.
axis : int
For a two-dimensional dataframe, 0 indicates rows whereas
1 indicates columns.
Returns
-------
A list of indices deleted.
"""
labels = [labels] if isinstance(labels, str) else labels
if axis == 0:
self._index = self._index.drop(labels)
elif axis == 1:
self._entityViewSchema = utils.dropColumns(
self.syn, self._entityViewSchema, labels)
if isinstance(self.schema, pd.DataFrame):
self.schema = self.schema[[l not in labels
for l in self.schema.key]]
self.view = self.view.drop(labels, axis=axis)


def metaHead(self):
""" Print head of `self._meta` """
if hasattr(self._meta, 'head'):
Expand Down Expand Up @@ -175,6 +207,32 @@ def metaActiveColumns(self, style="numbers"):
else:
print("No active columns.")


def addView(self, scope):
""" Add further Folders/Projects to the scope of `self.view`.
Parameters
----------
scope : str, list
The Synapse IDs of the entites to add to the scope.
Returns
-------
synapseclient.Schema
"""
self._entityViewSchema = utils.addToScope(self.syn,
self._entityViewSchema, scope)
# Assuming row version/id values stay the same for the before-update
# rows, we can carry over values from the old view.
oldIndices = self._index
oldColumns = self.view.columns
newView = utils.synread(self.syn, self._entityViewSchema.id, silent=True)
for c in oldColumns:
newView.loc[oldIndices,c] = self.view[c].values
self.view = newView
self._index = self.view.index


def addActiveCols(self, activeCols, path=False, isMeta=False, backup=True):
""" Add column names to `self._activeCols` or `self._metaActiveCols`.
Expand Down Expand Up @@ -411,7 +469,7 @@ def _parseView(self, view, sortCols, isMeta=False):
TypeError if view is not a str, list, or pandas.DataFrame
"""
if isinstance(view, str):
return utils.synread(self.syn, view, sortCols)
return utils.synread(self.syn, view, sortCols=sortCols)
elif isinstance(view, list) and meta:
return utils.combineSynapseTabulars(self.syn, view, axis=1)
elif isinstance(view, pd.DataFrame):
Expand Down Expand Up @@ -612,11 +670,7 @@ def createFileView(self, name, parent, scope, addCols=None, schema=None):
self.backup("createFileView")

# Fetch default keys, plus any preexisting annotation keys
if isinstance(scope, str):
scope = [scope]
params = {'scope': scope, 'viewType': 'file'}
cols = self.syn.restPOST('/column/view/scope',
json.dumps(params))['results']
cols = utils.getDefaultColumnsForScope(self.syn, scope)

# Store flattened schema, add keys to active columns list.
if self.schema is None:
Expand All @@ -627,7 +681,7 @@ def createFileView(self, name, parent, scope, addCols=None, schema=None):
for k in self.schema.index.unique():
self.addActiveCols(k)
schemaCols = utils.makeColumns(list(self.schema.index.unique()),
asSynapseCols=False)
asSynapseCols=False)
cols = self._getUniqueCols(schemaCols, cols)

# Add keys defined during initialization
Expand All @@ -638,7 +692,7 @@ def createFileView(self, name, parent, scope, addCols=None, schema=None):

# Add keys passed to addCols
if addCols:
if isinstance(addCols, dict) and addCols[k] is None:
if isinstance(addCols, dict):
unspecifiedCols = [k for k in addCols if addCols[k] is None]
self.addActiveCols(unspecifiedCols)
elif isinstance(addCols, list):
Expand All @@ -650,7 +704,7 @@ def createFileView(self, name, parent, scope, addCols=None, schema=None):
# are added to `self.view` but not yet stored to Synapse.
cols = [sc.Column(**c) for c in cols]
entityViewSchema = sc.EntityViewSchema(name=name, columns=cols,
parent=parent, scopes=scope)
parent=parent, scopes=scope)
self._entityViewSchema = self.syn.store(entityViewSchema)
self.view = utils.synread(self.syn, self._entityViewSchema.id)
self._index = self.view.index
Expand Down
1 change: 1 addition & 0 deletions annotator/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from . import utils



def getAnnotationsRelease():
"""
Expand Down
108 changes: 97 additions & 11 deletions annotator/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
import pandas as pd
import synapseclient as sc
import re
import json


def synread(syn_, obj, sortCols=True):
def synread(syn_, obj, silent=True, sortCols=True):

""" A simple way to read in Synapse entities to pandas.DataFrame objects.
Parameters
Expand All @@ -27,10 +29,11 @@ def synread(syn_, obj, sortCols=True):
elif isinstance(obj, str):
f = syn_.get(obj)
d = _synread(obj, f, syn_, sortCols)
if hasattr(d, 'head'):
print(d.head())
if hasattr(d, 'shape'):
print("Full size:", d.shape)
if not silent:
if hasattr(d, 'head'):
print(d.head())
if hasattr(d, 'shape'):
print("Full size:", d.shape)
else: # is list-like
files = list(map(syn_.get, obj))
d = [_synread(synId_, f, syn_, sortCols)
Expand Down Expand Up @@ -87,12 +90,15 @@ def _keyValCols(keys, values, asSynapseCols):
-------
A list of dictionaries compatible with synapseclient.Column objects.
"""
sanitize = lambda v : v if pd.notnull(v) else ''
keys = list(map(sanitize, keys))
values = list(map(sanitize, values))
val_length = map(lambda v: len(v) if v else 50, values)
cols = [{'name': k, 'maximumSize': l,
'columnType': "STRING", "defaultValue": v}
for k, v, l in zip(keys, values, val_length)]
if asSynapseCols:
cols = list(map(sc.Column, cols))
cols = list(map(lambda c: sc.Column(**c), cols))
return cols


Expand All @@ -110,8 +116,8 @@ def _colsFromFile(fromFile, asSynapseCols):
-------
A list of dictionaries compatible with synapseclient.Column objects.
"""
f = pd.read_csv(fromFile, header=None)
return _keyValCols(f[0].values, f[1].values, asSynapseCols)
f = pd.read_csv(fromFile, names=['keys', 'values'])
return _keyValCols(f['keys'], f['values'], asSynapseCols)


def _colsFromDict(d, asSynapseCols):
Expand Down Expand Up @@ -173,6 +179,89 @@ def makeColumns(obj, asSynapseCols=True):
return _colsFromDict(obj, asSynapseCols)
elif isinstance(obj, list):
return _colsFromList(obj, asSynapseCols)
else:
raise TypeError("{} is not a supported type.".format(type(obj)))


def dropColumns(syn, target, cols):
""" Delete columns from a file view on Synapse.
Parameters
----------
syn : synapseclient.Synapse
target : str, synapseclient.Schema
The Synapse ID of a Synapse Table or File View, or its schema.
cols : str, list
A str or list of str indicating column names to drop.
Returns
-------
synapseclient.table.EntityViewSchema
"""
cols = [cols] if isinstance(cols, str) else cols
schema = syn.get(target) if isinstance(target, str) else target
cols_ = syn.getTableColumns(schema.id)
for c in cols_:
if c.name in cols:
schema.removeColumn(c)
schema = syn.store(schema)
return schema


def addToScope(syn, target, scope):
""" Add further Folders/Projects to the scope of a file view.
Parameters
----------
syn : synapseclient.Synapse
target : str, synapseclient.Schema
The Synapse ID of the file view to update or its schema.
scope : str, list
The Synapse IDs of the entites to add to the scope.
Returns
-------
synapseclient.Schema
"""
scope = [scope] if isinstance(scope, str) else scope
target = syn.get(target) if isinstance(target, str) else target
cols = list(syn.getTableColumns(target.id))
totalScope = target['scopeIds']
for s in scope:
totalScope.append(s)
# We need to preserve columns that are currently in the file view
# but aren't automatically created when synapseclient.EntityViewSchema'ing.
defaultCols = getDefaultColumnsForScope(syn, totalScope)
defaultCols = [sc.Column(**c) for c in defaultCols]
colNames = [c['name'] for c in cols]
for c in defaultCols: # Preexisting columns have priority over defaults
if c['name'] not in colNames:
cols.append(c)
schema = sc.EntityViewSchema(name=target.name, parent=target.parentId,
columns=cols, scopes=totalScope, add_default_columns=False)
schema = syn.store(schema)
return schema


def getDefaultColumnsForScope(syn, scope):
""" Fetches the columns which would be used in the creation
of a file view with the given scope.
Parameters
----------
syn : synapseclient.Synapse
scope : str, list
The Synapse IDs of the entites to fetch columns for.
Returns
-------
list of dict
"""
scope = [scope] if isinstance(scope, str) else scope
params = {'scope': scope, 'viewType': 'file'}
cols = syn.restPOST('/column/view/scope',
json.dumps(params))['results']
return cols


def combineSynapseTabulars(syn, tabulars, axis=0):
Expand Down Expand Up @@ -256,9 +345,6 @@ def substituteColumnValues(referenceList, mod):
"""
if isinstance(mod, dict):
referenceList = [mod[v] if v in mod else v for v in referenceList]
else:
raise TypeError("{} is not a supported referenceList type".format(
type(referenceList)))
return referenceList


Expand Down
Loading

0 comments on commit 7f31277

Please sign in to comment.