-
Notifications
You must be signed in to change notification settings - Fork 358
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Enabling binary operations with list-like Python objects. #2054
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,6 +58,7 @@ | |
scol_for, | ||
validate_axis, | ||
ERROR_MESSAGE_CANNOT_COMBINE, | ||
check_same_length, | ||
) | ||
from databricks.koalas.frame import DataFrame | ||
|
||
|
@@ -321,6 +322,9 @@ def spark_column(self) -> Column: | |
__neg__ = column_op(Column.__neg__) | ||
|
||
def __add__(self, other) -> Union["Series", "Index"]: | ||
if isinstance(other, (list, tuple)): | ||
pindex_ops, other = check_same_length(self, other) | ||
return ks.from_pandas(pindex_ops + other) # type: ignore | ||
if not isinstance(self.spark.data_type, StringType) and ( | ||
(isinstance(other, IndexOpsMixin) and isinstance(other.spark.data_type, StringType)) | ||
or isinstance(other, str) | ||
|
@@ -339,6 +343,9 @@ def __add__(self, other) -> Union["Series", "Index"]: | |
return column_op(Column.__add__)(self, other) | ||
|
||
def __sub__(self, other) -> Union["Series", "Index"]: | ||
if isinstance(other, (list, tuple)): | ||
pindex_ops, other = check_same_length(self, other) | ||
return ks.from_pandas(pindex_ops - other) # type: ignore | ||
if ( | ||
isinstance(self.spark.data_type, StringType) | ||
or (isinstance(other, IndexOpsMixin) and isinstance(other.spark.data_type, StringType)) | ||
|
@@ -383,6 +390,9 @@ def __sub__(self, other) -> Union["Series", "Index"]: | |
return column_op(Column.__sub__)(self, other) | ||
|
||
def __mul__(self, other) -> Union["Series", "Index"]: | ||
if isinstance(other, (list, tuple)): | ||
pindex_ops, other = check_same_length(self, other) | ||
return ks.from_pandas(pindex_ops * other) # type: ignore | ||
if isinstance(other, str): | ||
raise TypeError("multiplication can not be applied to a string literal.") | ||
|
||
|
@@ -422,6 +432,9 @@ def __truediv__(self, other) -> Union["Series", "Index"]: | |
| -10 | null | -np.inf | | ||
+-----------------------|---------|---------+ | ||
""" | ||
if isinstance(other, (list, tuple)): | ||
pindex_ops, other = check_same_length(self, other) | ||
return ks.from_pandas(pindex_ops / other) # type: ignore | ||
|
||
if ( | ||
isinstance(self.spark.data_type, StringType) | ||
|
@@ -440,6 +453,9 @@ def truediv(left, right): | |
return numpy_column_op(truediv)(self, other) | ||
|
||
def __mod__(self, other) -> Union["Series", "Index"]: | ||
if isinstance(other, (list, tuple)): | ||
pindex_ops, other = check_same_length(self, other) | ||
return ks.from_pandas(pindex_ops % other) # type: ignore | ||
if ( | ||
isinstance(self.spark.data_type, StringType) | ||
or (isinstance(other, IndexOpsMixin) and isinstance(other.spark.data_type, StringType)) | ||
|
@@ -453,6 +469,11 @@ def mod(left, right): | |
return column_op(mod)(self, other) | ||
|
||
def __radd__(self, other) -> Union["Series", "Index"]: | ||
if isinstance(other, (list, tuple)): | ||
pindex_ops, other = check_same_length(self, other) | ||
return ks.from_pandas(other + pindex_ops) # type: ignore | ||
if isinstance(other, (list, tuple)): | ||
other = ks.Index(other, name=self.name) # type: ignore | ||
Comment on lines
+475
to
+476
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not needed? |
||
# Handle 'literal' + df['col'] | ||
if not isinstance(self.spark.data_type, StringType) and isinstance(other, str): | ||
raise TypeError("string addition can only be applied to string series or literals.") | ||
|
@@ -466,6 +487,9 @@ def __radd__(self, other) -> Union["Series", "Index"]: | |
return column_op(Column.__radd__)(self, other) | ||
|
||
def __rsub__(self, other) -> Union["Series", "Index"]: | ||
if isinstance(other, (list, tuple)): | ||
pindex_ops, other = check_same_length(self, other) | ||
return ks.from_pandas(other - pindex_ops) # type: ignore | ||
if isinstance(self.spark.data_type, StringType) or isinstance(other, str): | ||
raise TypeError("substraction can not be applied to string series or literals.") | ||
|
||
|
@@ -495,9 +519,12 @@ def __rsub__(self, other) -> Union["Series", "Index"]: | |
return -column_op(F.datediff)(self, F.lit(other)).astype("long") | ||
else: | ||
raise TypeError("date subtraction can only be applied to date series.") | ||
return column_op(Column.__rsub__)(self, other) | ||
return column_op(lambda left, right: right - left)(self, other) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FYI: >>> kdf = ks.DataFrame({"A": [1, 2, 3, 4], "B": [10, 20, 30, 40]})
>>> sdf = kdf.to_spark()
>>> col1 = sdf.A
>>> col2 = sdf.B
>>> Column.__rsub__(col1, col2)
Traceback (most recent call last):
...
TypeError: Column is not iterable There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It does support: >>> Column.__rsub__(df.id, 1)
Column<'(1 - id)'> It doesn't work in your case above because the instance is Spark column. In practice, that wouldn't happen because it will only be called when the first operand doesn't know how to handle Spark column e.g.) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does it cause any exception? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we use >>> kser = ks.Series([1, 2, 3, 4])
>>> [10, 20, 30, 40] - kser
Traceback (most recent call last):
...
TypeError: Column is not iterable There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not that this case must be handled in lines 490-492. We can move back to |
||
|
||
def __rmul__(self, other) -> Union["Series", "Index"]: | ||
if isinstance(other, (list, tuple)): | ||
pindex_ops, other = check_same_length(self, other) | ||
return ks.from_pandas(other * pindex_ops) # type: ignore | ||
if isinstance(other, str): | ||
raise TypeError("multiplication can not be applied to a string literal.") | ||
|
||
|
@@ -512,6 +539,9 @@ def __rmul__(self, other) -> Union["Series", "Index"]: | |
return column_op(Column.__rmul__)(self, other) | ||
|
||
def __rtruediv__(self, other) -> Union["Series", "Index"]: | ||
if isinstance(other, (list, tuple)): | ||
pindex_ops, other = check_same_length(self, other) | ||
return ks.from_pandas(other / pindex_ops) # type: ignore | ||
if isinstance(self.spark.data_type, StringType) or isinstance(other, str): | ||
raise TypeError("division can not be applied on string series or literals.") | ||
|
||
|
@@ -539,6 +569,9 @@ def __floordiv__(self, other) -> Union["Series", "Index"]: | |
| -10 | null | -np.inf | | ||
+-----------------------|---------|---------+ | ||
""" | ||
if isinstance(other, (list, tuple)): | ||
pindex_ops, other = check_same_length(self, other) | ||
return ks.from_pandas(pindex_ops // other) # type: ignore | ||
if ( | ||
isinstance(self.spark.data_type, StringType) | ||
or (isinstance(other, IndexOpsMixin) and isinstance(other.spark.data_type, StringType)) | ||
|
@@ -560,6 +593,11 @@ def floordiv(left, right): | |
return numpy_column_op(floordiv)(self, other) | ||
|
||
def __rfloordiv__(self, other) -> Union["Series", "Index"]: | ||
if isinstance(other, (list, tuple)): | ||
pindex_ops, other = check_same_length(self, other) | ||
return ks.from_pandas(other // pindex_ops) # type: ignore | ||
if isinstance(other, (list, tuple)): | ||
other = ks.Index(other, name=self.name) # type: ignore | ||
Comment on lines
+599
to
+600
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not needed? |
||
if isinstance(self.spark.data_type, StringType) or isinstance(other, str): | ||
raise TypeError("division can not be applied on string series or literals.") | ||
|
||
|
@@ -571,6 +609,9 @@ def rfloordiv(left, right): | |
return numpy_column_op(rfloordiv)(self, other) | ||
|
||
def __rmod__(self, other) -> Union["Series", "Index"]: | ||
if isinstance(other, (list, tuple)): | ||
pindex_ops, other = check_same_length(self, other) | ||
return ks.from_pandas(other % pindex_ops) # type: ignore | ||
if isinstance(self.spark.data_type, StringType) or isinstance(other, str): | ||
raise TypeError("modulo can not be applied on string series or literals.") | ||
|
||
|
@@ -580,12 +621,20 @@ def rmod(left, right): | |
return column_op(rmod)(self, other) | ||
|
||
def __pow__(self, other) -> Union["Series", "Index"]: | ||
if isinstance(other, (list, tuple)): | ||
pindex_ops, other = check_same_length(self, other) | ||
return ks.from_pandas(pindex_ops ** other) # type: ignore | ||
|
||
def pow_func(left, right): | ||
return F.when(left == 1, left).otherwise(Column.__pow__(left, right)) | ||
|
||
return column_op(pow_func)(self, other) | ||
|
||
def __rpow__(self, other) -> Union["Series", "Index"]: | ||
if isinstance(other, (list, tuple)): | ||
pindex_ops, other = check_same_length(self, other) | ||
return ks.from_pandas(other ** pindex_ops) # type: ignore | ||
|
||
def rpow_func(left, right): | ||
return F.when(F.lit(right == 1), right).otherwise(Column.__rpow__(left, right)) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shall we avoid using
# type: ignore
as possible? We can usecast
instead.