Updated to document sparse cubes

Also cleaned up some formatting.
deshaw · Aug 15, 2024 · d9dc29d · d9dc29d
1 parent 113e789
commit d9dc29d
Showing 1 changed file with 75 additions and 38 deletions.
diff --git a/python/tests/hypercube.ipynb b/python/tests/hypercube.ipynb
@@ -79,11 +79,11 @@
     "#\n",
     "# We also pipe the stdout and stderr of the Java process to\n",
     "# /dev/null by means of setting the filehandles to None.\n",
-    "c = pjrmi.connect_to_child_jvm(stdout=None,\n",
-    "                               stderr=None,\n",
-    "                               application_args=('num_workers=4',),\n",
-    "                               use_shm_arg_passing=True,\n",
-    "                               java_args=['--add-modules', 'jdk.incubator.vector'])"
+    "cx = pjrmi.connect_to_child_jvm(stdout=None,\n",
+    "                                stderr=None,\n",
+    "                                application_args=('num_workers=4',),\n",
+    "                                use_shm_arg_passing=True,\n",
+    "                                java_args=['--add-modules', 'jdk.incubator.vector'])"
    ]
   },
   {
@@ -106,18 +106,19 @@
    "outputs": [],
    "source": [
     "# Defining all the relevant Hypercube classes\n",
-    "Dimension              = c.class_for_name('com.deshaw.hypercube.Dimension')\n",
-    "Hypercube              = c.class_for_name('com.deshaw.hypercube.Hypercube')\n",
-    "BooleanBitSetHypercube = c.class_for_name('com.deshaw.hypercube.BooleanBitSetHypercube')\n",
-    "DoubleArrayHypercube   = c.class_for_name('com.deshaw.hypercube.DoubleArrayHypercube')\n",
-    "FloatArrayHypercube    = c.class_for_name('com.deshaw.hypercube.FloatArrayHypercube')\n",
-    "IntegerArrayHypercube  = c.class_for_name('com.deshaw.hypercube.IntegerArrayHypercube')\n",
-    "LongArrayHypercube     = c.class_for_name('com.deshaw.hypercube.LongArrayHypercube')\n",
-    "DoubleMappedHypercube  = c.class_for_name('com.deshaw.hypercube.DoubleMappedHypercube')\n",
+    "Dimension              = cx.class_for_name('com.deshaw.hypercube.Dimension')\n",
+    "Hypercube              = cx.class_for_name('com.deshaw.hypercube.Hypercube')\n",
+    "BooleanBitSetHypercube = cx.class_for_name('com.deshaw.hypercube.BooleanBitSetHypercube')\n",
+    "DoubleArrayHypercube   = cx.class_for_name('com.deshaw.hypercube.DoubleArrayHypercube')\n",
+    "FloatArrayHypercube    = cx.class_for_name('com.deshaw.hypercube.FloatArrayHypercube')\n",
+    "IntegerArrayHypercube  = cx.class_for_name('com.deshaw.hypercube.IntegerArrayHypercube')\n",
+    "LongArrayHypercube     = cx.class_for_name('com.deshaw.hypercube.LongArrayHypercube')\n",
+    "DoubleMappedHypercube  = cx.class_for_name('com.deshaw.hypercube.DoubleMappedHypercube')\n",
+    "DoubleSparseHypercube  = cx.class_for_name('com.deshaw.hypercube.DoubleSparseHypercube')\n",
     "\n",
     "# Defining our two Cube Math implementations and aliases for them\n",
-    "CubeMath           = cm  = c.class_for_name('com.deshaw.hypercube.CubeMath')\n",
-    "VectorizedCubeMath = vcm = c.class_for_name('com.deshaw.hypercube.VectorizedCubeMath')"
+    "CubeMath           = cm  = cx.class_for_name('com.deshaw.hypercube.CubeMath')\n",
+    "VectorizedCubeMath = vcm = cx.class_for_name('com.deshaw.hypercube.VectorizedCubeMath')"
    ]
   },
   {
@@ -224,15 +225,18 @@
    "source": [
     "# First, let's reshape the cube, so we have multiple dimensions to work with.\n",
     "reshaped = cube.reshape((2, 5))\n",
-    "print(\"Reshaped:\\n\", reshaped)\n",
+    "print(\"Reshaped:\")\n",
+    "print(reshaped)\n",
     "\n",
     "# Now, shift (i.e., \"flat roll\") the cube.\n",
     "shifted = reshaped.roll(2)\n",
-    "print(\"Shifted:\\n\", shifted)\n",
+    "print(\"Shifted:\")\n",
+    "print(shifted)\n",
     "\n",
     "# Now, let's try rolling the cube across both axes.\n",
     "rolled = reshaped.roll((1, 2))\n",
-    "print(\"Rolled:\\n\", rolled)\n",
+    "print(\"Rolled:\")\n",
+    "print(rolled)\n",
     "\n",
     "# We can also roll a cube by specifying a specific dimension\n",
     "print(reshaped.roll(2, axis=1))"
@@ -260,20 +264,53 @@
     "marray = numpy.memmap('/dev/shm/example.dat', dtype = numpy.float64, mode = 'w+', shape = (3,3), order = 'C')\n",
     "mcube  = DoubleMappedHypercube('/dev/shm/example.dat', Dimension.of((3, 3)))\n",
     "\n",
-    "print(\"Memory-mapped array:\\n\", marray)\n",
-    "print(\"Memory-mapped cube:\\n\",  mcube)\n",
+    "print(\"Memory-mapped array:\")\n",
+    "print(marray)\n",
+    "print(\"Memory-mapped cube:\")\n",
+    "print(mcube)\n",
     "\n",
     "# Let's make a change in our array\n",
     "marray += 1\n",
     "\n",
     "# Let's see the change reflected in our cube\n",
-    "print(\"Updated memory-mapped cube:\\n\", mcube)\n",
+    "print(\"Updated memory-mapped cube:\")\n",
+    "print(mcube)\n",
     "\n",
     "# Now let's make a change to our cube\n",
     "CubeMath.negative(mcube, mcube)\n",
     "\n",
     "# Let's see the change reflected in our array\n",
-    "print(\"Updated memory-mapped array:\\n\", marray)"
+    "print(\"Updated memory-mapped array:\")\n",
+    "print(marray)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "26b9e0bd-dd83-4df5-872c-a2bca967a14d",
+   "metadata": {},
+   "source": [
+    "### Sparse Hypercube\n",
+    "\n",
+    "For high-dimensional data which isn't dense it may be desirable to have a more memory efficient backing store. The sparse cubes provide this support."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3e604b08-94ff-4187-a900-b831b28766e4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sparse = DoubleSparseHypercube(Dimension.of((4, 4)))\n",
+    "k = 0\n",
+    "for i in range(0, 4):\n",
+    "    for j in range(0, 4):\n",
+    "        if ((i + j) % 2) == 0:\n",
+    "            sparse[i,j] = k\n",
+    "            k += 1\n",
+    "print(\"Sparse:\")\n",
+    "print(sparse)\n",
+    "print(\"Sum: \", CubeMath.nansum(sparse))"
    ]
   },
   {
@@ -423,11 +460,11 @@
    },
    "outputs": [],
    "source": [
-    "print(\"Sin:\\n\", CubeMath.sin(double_cube))\n",
-    "print(\"Cos:\\n\", CubeMath.cos(double_cube))\n",
-    "print(\"Tanh:\\n\", CubeMath.tanh(double_cube))\n",
-    "print(\"Exp:\\n\", CubeMath.exp(double_cube))\n",
-    "print(\"Log:\\n\", CubeMath.log(double_cube))\n",
+    "print(\"Sin:\\n\",   CubeMath.sin(double_cube))\n",
+    "print(\"Cos:\\n\",   CubeMath.cos(double_cube))\n",
+    "print(\"Tanh:\\n\",  CubeMath.tanh(double_cube))\n",
+    "print(\"Exp:\\n\",   CubeMath.exp(double_cube))\n",
+    "print(\"Log:\\n\",   CubeMath.log(double_cube))\n",
     "print(\"Log10:\\n\", CubeMath.log10(double_cube))"
    ]
   },
@@ -513,14 +550,14 @@
    "source": [
     "reshaped_array = np.array(reshaped)\n",
     "\n",
-    "print(\"Our previously reshaped cube as an array:\\n\", reshaped_array)\n",
-    "print(\"Now with the axes swapped:\\n\", reshaped_array.swapaxes(0, 1))\n",
+    "print(\"Our previously reshaped cube as an array:\")\n",
+    "print(reshaped_array)\n",
+    "print(\"Now with the axes swapped:\")\n",
+    "print(reshaped_array.swapaxes(0, 1))\n",
     "\n",
     "# Now try swapping the cube's axes through the static np.swapaxes method\n",
-    "print(\"Swapping the cube's axes:\\n\", np.swapaxes(reshaped, 0, 1))\n",
-    "\n",
-    "# Now uncomment the following line to do the same but through the class attribute .swapaxes method instead.\n",
-    "# print(\"Swapping the cube's axes:\\n\", reshaped.swapaxes(0, 1))"
+    "print(\"Swapping the cube's axes:\")\n",
+    "print(np.swapaxes(reshaped, 0, 1))"
    ]
   },
   {
@@ -778,7 +815,7 @@
    "id": "c9dfb422",
    "metadata": {},
    "source": [
-    "Again, let's repopulate our array and cubes here to make sure we're using numbers between [0, 1] so that the result of `power` always stays in the same."
+    "Again, let's repopulate our array and cubes here to make sure we're using numbers between [0, 1] so that the result of `power` always stays in the same range."
    ]
   },
   {
@@ -823,7 +860,7 @@
     "2. Complex Math Operations (e.g., sin, cos, exp, power, etc.):\n",
     "VectorizedCubeMath outperforms CubeMath (and at times `numpy`) in these operations because it uses specialized SIMD-instructions, as well as a highly optimized algorithm, written in native assembly and C. The exact performance of `numpy` will depend on the underlying backend libraries, and its configuration.\n",
     "3. Reductive operations (e.g., sum, nansum, etc.):\n",
-    "VectorizedCubeMath and CubeMath perform similarly for reductive operations, with CubeMath being faster for some (e.g., min) and VectorizedCubeMath being faster in others (e.g., sum). Notably, CubeMath and VectorizedCubeMath tend to outperform `numpy` in nansum. We speculate that `numpy` may be using a Python implementation for this operation.\n",
+    "VectorizedCubeMath and CubeMath perform similarly for reductive operations, with CubeMath being faster for some (e.g., min) and VectorizedCubeMath being faster in others (e.g., sum). Notably, CubeMath and VectorizedCubeMath tend to outperform `numpy` in nansum, since the latter uses `sum()` with a masked ndarray.\n",
     "\n",
     "It's worth noting once again that depending on the available hardware and the specific configurations, these libraries may perform differently.\n"
    ]
@@ -841,9 +878,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 Maestro (10GB)",
+   "display_name": "Python 3",
    "language": "python",
-   "name": "maestro"
+   "name": "name"
   },
   "language_info": {
    "codemirror_mode": {
@@ -855,7 +892,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.4"
+   "version": "3.11.8"
   },
   "widgets": {
    "application/vnd.jupyter.widget-state+json": {