Skip to content

Commit

Permalink
[CLIENT-1824] Add HyperLogLog class to represent HLL values (#509)
Browse files Browse the repository at this point in the history
  • Loading branch information
juliannguyen4 authored Nov 15, 2023
1 parent b949c4f commit 4b7c4aa
Show file tree
Hide file tree
Showing 5 changed files with 174 additions and 26 deletions.
12 changes: 12 additions & 0 deletions aerospike_helpers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
##########################################################################

class HyperLogLog(bytes):
"""
Represents a HyperLogLog value. This can be returned from the server or created in order to be sent to the server.
The constructor takes in any argument that the :class:`bytes` constructor takes in.
>>> h = HyperLogLog([1, 2, 3])
>>> client.put(key, {"hyperloglog": h})
"""
def __new__(cls, o) -> "HyperLogLog":
return super().__new__(cls, o)
39 changes: 16 additions & 23 deletions doc/data_mapping.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,29 +46,21 @@ Data Mappings

The following table shows which Python types map directly to Aerospike server types.

+---------------------------------+------------------------+
| Python Type | Server type |
+=================================+========================+
|:class:`int` |`integer`_ |
+---------------------------------+------------------------+
|:class:`bool` |depends on send_bool_as |
+---------------------------------+------------------------+
|:class:`str` |`string`_ |
+---------------------------------+------------------------+
|:class:`unicode` |`string`_ |
+---------------------------------+------------------------+
|:class:`float` |`double`_ |
+---------------------------------+------------------------+
|:class:`dict` |`map`_ |
+---------------------------------+------------------------+
|:class:`aerospike.KeyOrderedDict`|`key ordered map`_ |
+---------------------------------+------------------------+
|:class:`list` |`list`_ |
+---------------------------------+------------------------+
|:class:`bytes` |`blob`_ |
+---------------------------------+------------------------+
|:class:`aerospike.GeoJSON` |`GeoJSON`_ |
+---------------------------------+------------------------+
======================================== =========================
Python Type Server type
======================================== =========================
:class:`int` `integer`_
:class:`bool` depends on send_bool_as
:class:`str` `string`_
:class:`unicode` `string`_
:class:`float` `double`_
:class:`dict` `map`_
:class:`aerospike.KeyOrderedDict` `key ordered map`_
:class:`list` `list`_
:class:`bytes` `blob`_
:class:`aerospike.GeoJSON` `GeoJSON`_
:class:`aerospike_helpers.HyperLogLog` `HyperLogLog`_
======================================== =========================

.. note::

Expand All @@ -86,3 +78,4 @@ as a value.
.. _list: https://docs.aerospike.com/server/guide/data-types/cdt-list
.. _blob: https://docs.aerospike.com/server/guide/data-types/blob
.. _GeoJSON: https://docs.aerospike.com/server/guide/data-types/geospatial
.. _HyperLogLog: https://docs.aerospike.com/server/guide/data-types/hll
58 changes: 55 additions & 3 deletions src/main/conversions.c
Original file line number Diff line number Diff line change
Expand Up @@ -771,6 +771,41 @@ as_status pyobject_to_map(AerospikeClient *self, as_error *err,
return err->code;
}

static bool is_aerospike_hll_type(PyObject *obj)
{
if (strcmp(obj->ob_type->tp_name, "HyperLogLog")) {
// Class name is not HyperLogLog
return false;
}

PyObject *py_module_name =
PyDict_GetItemString(obj->ob_type->tp_dict, "__module__");
if (!py_module_name) {
// Class does not belong to any module
return false;
}

bool retval = true;

Py_INCREF(py_module_name);
if (!PyUnicode_Check(py_module_name)) {
// Invalid module name
retval = false;
goto CLEANUP;
}

const char *module_name = PyUnicode_AsUTF8(py_module_name);
if (strcmp(module_name, "aerospike_helpers")) {
// Class belongs to the wrong module
retval = false;
goto CLEANUP;
}

CLEANUP:
Py_DECREF(py_module_name);
return retval;
}

as_status pyobject_to_val(AerospikeClient *self, as_error *err,
PyObject *py_obj, as_val **val,
as_static_pool *static_pool, int serializer_type)
Expand Down Expand Up @@ -823,9 +858,23 @@ as_status pyobject_to_val(AerospikeClient *self, as_error *err,
Py_DECREF(py_ustr);
}
else if (PyBytes_Check(py_obj)) {
uint8_t *b = (uint8_t *)PyBytes_AsString(py_obj);
uint32_t b_len = (uint32_t)PyBytes_Size(py_obj);
*val = (as_val *)as_bytes_new_wrap(b, b_len, false);
char *py_obj_buffer = PyBytes_AsString(py_obj);
Py_ssize_t b_len = PyBytes_Size(py_obj);
uint8_t *new_buffer = (uint8_t *)malloc(sizeof(uint8_t) * b_len);
memcpy(new_buffer, py_obj_buffer, sizeof(uint8_t) * b_len);

as_bytes *bytes = as_bytes_new_wrap(new_buffer, b_len, true);
if (bytes == NULL) {
free(new_buffer);
return as_error_update(
err, AEROSPIKE_ERR_CLIENT,
"Unable to convert Python bytes to C client's as_bytes");
}
*val = (as_val *)bytes;

if (is_aerospike_hll_type(py_obj)) {
bytes->type = AS_BYTES_HLL;
}
}
else if (!strcmp(py_obj->ob_type->tp_name, "aerospike.Geospatial")) {
PyObject *py_parameter = PyUnicode_FromString("geo_data");
Expand Down Expand Up @@ -1032,6 +1081,9 @@ as_status pyobject_to_record(AerospikeClient *self, as_error *err,
char *str = PyBytes_AsString(value);
as_bytes_set(bytes, 0, (const uint8_t *)str, str_len);

if (is_aerospike_hll_type(value)) {
bytes->type = AS_BYTES_HLL;
}
ret_val = as_record_set_bytes(rec, name, bytes);
}
else if (PyByteArray_Check(value)) {
Expand Down
53 changes: 53 additions & 0 deletions src/main/serializer.c
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,59 @@ extern as_status deserialize_based_on_as_bytes_type(AerospikeClient *self,
}
}
} break;
case AS_BYTES_HLL: {
// Convert bytes to Python bytes object
PyObject *py_bytes = PyBytes_FromStringAndSize(
(const char *)bytes->value, (Py_ssize_t)bytes->size);
if (py_bytes == NULL) {
as_error_update(
error_p, AEROSPIKE_ERR_CLIENT,
"Unable to convert C client's as_bytes to Python bytes");
goto CLEANUP;
}
// Pass bytes object to new HLL class instance
PyObject *py_aerospike_helpers_module =
PyImport_ImportModule("aerospike_helpers");
if (py_aerospike_helpers_module == NULL) {
as_error_update(error_p, AEROSPIKE_ERR_CLIENT,
"Unable to import aerospike_helpers module");
goto HLL_CLEANUP1;
}

PyObject *py_hll_class =
PyObject_GetAttrString(py_aerospike_helpers_module, "HyperLogLog");
if (py_hll_class == NULL) {
as_error_update(error_p, AEROSPIKE_ERR,
"Unable to import HyperLogLog class from "
"aerospike_helpers module");
goto HLL_CLEANUP2;
}

if (!PyCallable_Check(py_hll_class)) {
as_error_update(error_p, AEROSPIKE_ERR,
"Unable to create HyperLogLog instance; "
"HyperLogLog class is not callable");
goto HLL_CLEANUP3;
}

PyObject *py_hll_instance =
PyObject_CallFunctionObjArgs(py_hll_class, py_bytes, NULL);
if (py_hll_instance == NULL) {
// An exception has been thrown by calling the HLL constructor
// We want to show the original exception instead of throwing our own exception
goto HLL_CLEANUP3;
}

*retval = py_hll_instance;

HLL_CLEANUP3:
Py_DECREF(py_hll_class);
HLL_CLEANUP2:
Py_DECREF(py_aerospike_helpers_module);
HLL_CLEANUP1:
Py_DECREF(py_bytes);
break;
}
default: {
// First try to return a raw byte array, if that fails raise an error
uint32_t bval_size = as_bytes_size(bytes);
Expand Down
38 changes: 38 additions & 0 deletions test/new_tests/test_hll.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pytest
from aerospike import exception as e
from aerospike_helpers.operations import hll_operations
from aerospike_helpers import HyperLogLog
from math import sqrt


Expand Down Expand Up @@ -451,3 +452,40 @@ def test_pos_hll_update(self):
_, _, res = self.as_connection.operate(self.test_keys[0], ops)

assert res["hll_bine"] == 3

def test_get_put_operate_hll(self):
"""
Can you read and write HLL bins to the server and still perform HLL operations on those bins?
"""
_, _, rec = self.as_connection.get(self.test_keys[0])
assert type(rec["mh_bin"]) == HyperLogLog

self.as_connection.put(self.test_keys[0], {"mh_bin": rec["mh_bin"]})

# mh_bin should return the same results as before reading and rewritting the bin
ops = [hll_operations.hll_describe("mh_bin")]
_, _, res = self.as_connection.operate(self.test_keys[0], ops)
assert res["mh_bin"] == [6, 12]

def test_put_get_hll_list(self):
"""
This is to cover putting nested HLLs in the server
Since the conversion for nested HLLs to the C client equivalent is separate from top-level HLLs
"""
# Test setup to retrieve an HLL bin
_, _, rec = self.as_connection.get(self.test_keys[0])

self.as_connection.put(
self.test_keys[0],
{
"hll_list": [
rec["hll_bin"]
]
}
)
# Verify we stored the HLL in the list as an HLL type
_, _, rec = self.as_connection.get(self.test_keys[0])
assert type(rec["hll_list"][0]) == HyperLogLog

def test_hll_superclass(self):
assert issubclass(HyperLogLog, bytes)

0 comments on commit 4b7c4aa

Please sign in to comment.