Skip to content

Commit 5b51e57

Browse files
committed
Finish up spherical geometry docs
1 parent e5dd6f0 commit 5b51e57

File tree

6 files changed

+211
-13
lines changed

6 files changed

+211
-13
lines changed

TODO

-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
- Finish up section documentation
2-
31
- some UDFs and stored procedures still need unit tests
42

53
- Consider creating temp file for median() and select() in the MySQL TMP_DIR

doc/docs.css

+3-3
Original file line numberDiff line numberDiff line change
@@ -108,15 +108,15 @@ ul {
108108
.section h2 {
109109
background: #EEF;
110110
border-top: 1px solid #99B;
111-
margin: 0 0 1em 0;
111+
margin: 2em 0 1em 0;
112112
padding: 0.1em 0.1em 0.1em 0.5em;
113113
}
114114
.section-docs {
115115
margin-left: 50px;
116116
}
117117
.section-docs h3 {
118-
margin-left: -50px;
119-
padding-left: 1em;
118+
margin: 2em 0 1em -50px;
119+
padding-left: 1em;
120120
border-top: 1px solid #99B;
121121
background: #EEF;
122122
}

scripts/demo.mysql

+6-6
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ CREATE TABLE Science_Ccd_Exposure (
2525
ccdPoly BINARY(120) NOT NULL
2626
) ENGINE=MyISAM;
2727

28-
CREATE TABLE Science_Ccd_Exposure_IdRanges (
28+
CREATE TABLE Science_Ccd_Exposure_HtmId10 (
2929
scienceCcdExposureId BIGINT NOT NULL,
3030
htmId10 INTEGER NOT NULL,
3131
PRIMARY KEY (htmId10, scienceCcdExposureId),
@@ -38,7 +38,7 @@ INSERT INTO Science_Ccd_Exposure VALUE (43808501263, 359.960732808801, 0.0370
3838
INSERT INTO Science_Ccd_Exposure VALUE (43856062009, 359.963752800388, -0.000164598414602848, 0.0184876953780186, 0.214756306613358, 0.23727926928965, 0.159035132069238, 0.182543079658319, -0.0558855823940503, 2.37455e+12, 0, '');
3939
INSERT INTO Science_Ccd_Exposure VALUE (43904968207, 359.958110037772, 0.0734879497011048, 0.159109863573049, 0.167182106788634, 0.254490784321818, -0.0374364004155128, 0.0534909527032384, -0.131130475563235, 4.00006e+12, 0, '');
4040

41-
INSERT INTO Science_Ccd_Exposure_IdRanges VALUES
41+
INSERT INTO Science_Ccd_Exposure_HtmId10 VALUES
4242
(43799463478,8388608),
4343
(43799463478,8388611),
4444
(43799463478,12058624),
@@ -65,7 +65,7 @@ INSERT INTO Science_Ccd_Exposure_IdRanges VALUES
6565
(43799463478,16252941),
6666
(43799463478,16252942),
6767
(43799463478,16252943);
68-
INSERT INTO Science_Ccd_Exposure_IdRanges VALUES
68+
INSERT INTO Science_Ccd_Exposure_HtmId10 VALUES
6969
(43808367119,8388608),
7070
(43808367119,8388609),
7171
(43808367119,8388610),
@@ -91,7 +91,7 @@ INSERT INTO Science_Ccd_Exposure_IdRanges VALUES
9191
(43808367119,16252941),
9292
(43808367119,16252942),
9393
(43808367119,16252943);
94-
INSERT INTO Science_Ccd_Exposure_IdRanges VALUES
94+
INSERT INTO Science_Ccd_Exposure_HtmId10 VALUES
9595
(43808501263,8388608),
9696
(43808501263,8388609),
9797
(43808501263,8388610),
@@ -116,7 +116,7 @@ INSERT INTO Science_Ccd_Exposure_IdRanges VALUES
116116
(43808501263,16252941),
117117
(43808501263,16252942),
118118
(43808501263,16252943);
119-
INSERT INTO Science_Ccd_Exposure_IdRanges VALUES
119+
INSERT INTO Science_Ccd_Exposure_HtmId10 VALUES
120120
(43856062009,8388608),
121121
(43856062009,8388610),
122122
(43856062009,8388611),
@@ -141,7 +141,7 @@ INSERT INTO Science_Ccd_Exposure_IdRanges VALUES
141141
(43856062009,16252942),
142142
(43856062009,16252943),
143143
(43856062009,16252986);
144-
INSERT INTO Science_Ccd_Exposure_IdRanges VALUES
144+
INSERT INTO Science_Ccd_Exposure_HtmId10 VALUES
145145
(43904968207,8388608),
146146
(43904968207,8388609),
147147
(43904968207,8388610),

tools/docgen.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ def __init__(self, elt):
280280
self.udfs = []
281281
self.procs = []
282282
# Extract example source code
283-
exlist = _find_many(elt, 'example', required=False, attrib=['lang', 'test'])
283+
exlist = list(elt.iter('example'))
284284
self.examples = map(Example, exlist)
285285
# Turn <example> tags into <pre> tags with the appropriate prettify attributes
286286
for ex in exlist:

tools/mysql.py

+1
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ def check_mysql(self, **kw):
7373
if not os.path.isfile(mysql) or not os.access(mysql, os.X_OK):
7474
self.fatal('${PREFIX}/bin/mysql does not identify an executable')
7575
self.env.MYSQL = mysql
76+
self.env.MYSQL_DIR = self.env.PREFIX
7677
self.end_msg(mysql)
7778
self.env.MYSQL_USER = self.options.mysql_user
7879
self.env.MYSQL_SOCKET = self.options.mysql_socket

tools/templates/sections.xml

+200-1
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,207 @@
135135
<section name="s2" title="Spherical Geometry">
136136
<div class="section-docs">
137137
<p>
138-
TODO
138+
The aim of the spherical geometry UDFs and stored procedures is to
139+
allow quick answers to the following sorts of questions:
139140
</p>
141+
<ol>
142+
<li>
143+
<em>Which points in a table lie inside a region on the sphere?</em> For example,
144+
an astronomer might wish to know which stars and galaxies lie inside the
145+
region of the sky observed by a single camera CCD.
146+
</li>
147+
<li>
148+
<em>Which spherical regions in a table contain a particular point?</em> For
149+
example, an astronomer might with to know which telescope images overlap
150+
the position of interesting object X.
151+
</li>
152+
</ol>
153+
154+
<h3>HTM indexing</h3>
155+
<p>
156+
To accelerate these types of queries, SciSQL maps points/regions
157+
to the integer ID(s) of their containing/overlapping triangles in a
158+
Hierarchical Triangular Mesh (HTM). This is a decomposition of the
159+
unit sphere defined by A. Szalay, T. Budavari, G. Fekete at the
160+
Johns Hopkins University, and Jim Gray, Microsoft Research. See
161+
the following links for more information:
162+
</p>
163+
<ul>
164+
<li><a href="http://voservices.net/spherical/">http://voservices.net/spherical/</a></li>
165+
<li><a href="http://adsabs.harvard.edu/abs/2010PASP..122.1375B">http://adsabs.harvard.edu/abs/2010PASP..122.1375B</a></li>
166+
</ul>
167+
<p>
168+
To accelerate spatial queries, standard B-tree indexes are created
169+
on the point/region HTM IDs and spatial constraints are expressed
170+
in terms of those IDs. This allows the database optimizer to restrict
171+
the rows that must be considered by a spatial query.
172+
</p>
173+
<p>
174+
Read on to learn how to create and take advantage of HTM indexes on
175+
tables containing spatial data. The examples below can be run in the
176+
scisql_demo database, which contains all of the referenced tables
177+
and a tiny amount of sample data.
178+
</p>
179+
180+
<h3>Supported region types</h3>
181+
<p>
182+
SciSQL supports 4 kinds of regions: longitude/latitude angle boxes,
183+
spherical circles (defined by a center and opening angle), spherical
184+
ellipses (the orthographic projection of a standard 2-d ellipse onto
185+
the sphere, where the 2-d ellipse is on a plane tangent to the unit
186+
sphere at the ellipse center), and spherical convex polygons (where
187+
polygon edges are great circles). Note also that spherical convex
188+
polygons have a binary representation, produced by s2CPolyToBin(),
189+
allowing them to be stored as values in a BINARY table column.
190+
</p>
191+
192+
<h3>Points-in-region queries</h3>
193+
<p>
194+
SciSQL contains several UDFs for checking whether a point lies inside
195+
a region. These are: s2PtInBox(), s2PtInCircle(), s2PtInCPoly() and
196+
s2PtInEllipse(). They return 1 if the input point is inside the input
197+
region and 0 otherwise.
198+
</p>
199+
<p>
200+
Given these UDFs, a simple way to answer question 1 is illustrated by
201+
the following example:
202+
</p>
203+
<example>
204+
SELECT objectId
205+
FROM Object
206+
WHERE s2PtInCircle(ra_PS, decl_PS, 0, 0, 0.01) = 1;</example>
207+
<p>
208+
This query returns all the objects within 0.01 degrees of
209+
(RA, Dec) = (0, 0). It is inefficient for small search regions
210+
because the s2PtInCircle() UDF must be called for every row in
211+
the <tt>Object</tt> table.
212+
</p>
213+
<p>
214+
Lets assume that <tt>Object</tt> contains an indexed BIGINT column
215+
named <tt>htmId20</tt>. If it does not, the column and index can be
216+
added with ALTER TABLE. <tt>htmId20</tt> can be populated with the
217+
subdivision-level 20 HTM IDs of object positions as follows:
218+
</p>
219+
<example>
220+
ALTER TABLE Object DISABLE KEYS;
221+
UPDATE Object
222+
SET htmId20 = s2HtmId(ra_PS, decl_PS, 20);
223+
ALTER TABLE Object ENABLE KEYS;</example>
224+
<p>
225+
The HTM subdivision level must be between 0 and 24. At subdivision
226+
level N, there are 8*4<sup>N</sup> triangles in the mesh, so the
227+
higher subdivision levels correspond to finer tesselations of the
228+
unit sphere.
229+
</p>
230+
<p>
231+
Now that HTM IDs for object positions are available and indexed,
232+
the query above can be made more efficient:
233+
</p>
234+
<example>
235+
CALL scisql.s2CircleRegion(0, 0, 0.01, 20);
236+
237+
SELECT o.objectId
238+
FROM Object AS o INNER JOIN scisql.Region AS r
239+
ON (o.htmId20 BETWEEN r.htmMin AND r.htmMax)
240+
WHERE s2PtInCircle(o.ra_PS, o.decl_PS, 0, 0, 0.01) = 1;</example>
241+
<p>
242+
What's going on here? The first line in the example calls the
243+
s2CircleRegion() stored procedure. This procedure creates a temporary
244+
table called <tt>scisql.Region</tt> with two BIGINT NOT NULL columns
245+
named htmMin and htmMax. It then stores the HTM IDs overlapping the
246+
search region in <tt>scisql.Region</tt> (as ranges).
247+
</p>
248+
<p>
249+
Next, the original query is augmented with a join against
250+
<tt>scisql.Region</tt>. This limits the objects considered by
251+
s2PtInCircle() to those within the HTM triangles overlapping the
252+
search region; the index on htmId20 allows MySQL to retrieve these
253+
objects very quickly when the search region is small. Note that if
254+
the search region is large (meaning that a large fraction of the
255+
table being searched is inside the search region), then the original
256+
query may actually be faster.
257+
</p>
258+
<p>
259+
Here is another example, this time with a search region taken from
260+
a table called <tt>Science_Ccd_Exposure</tt>. This table includes a
261+
a column named ccdPoly that contains polygonal approximations to the
262+
regions of the sphere observed by CCD exposures.
263+
</p>
264+
<example>
265+
SELECT ccdPoly FROM Science_Ccd_Exposure
266+
WHERE scienceCcdExposureId = 43856062009
267+
INTO @poly;
268+
269+
CALL scisql.s2CPolyRegion(@poly, 20);
270+
271+
SELECT o.objectId
272+
FROM Object AS o INNER JOIN scisql.Region AS r
273+
ON (o.htmId20 BETWEEN r.htmMin AND r.htmMax)
274+
WHERE s2PtInCPoly(o.ra_PS, o.decl_PS, @poly) = 1;</example>
275+
<p>
276+
The first statement stores the polygonal boundary of a particular CCD
277+
exposure into the user variable <tt>@poly</tt>, the second computes
278+
overlapping HTM IDs, and the third performs the points-in-region
279+
query as before.
280+
</p>
281+
282+
<h3>Regions-containing-point queries</h3>
283+
<p>
284+
An example for this type of query is:
285+
</p>
286+
<example>
287+
SELECT scienceCcdExposureId FROM Science_Ccd_Exposure
288+
WHERE s2PtInCPoly(0, 0, ccdPoly) = 1;</example>
289+
<p>
290+
This query returns all the CCD exposures containing the point
291+
(RA, Dec) = (0, 0). To accelerate it using HTM indexing, an
292+
auxiliary table is introduced:
293+
</p>
294+
<example test="false">
295+
CREATE TABLE Science_Ccd_Exposure_HtmId10 (
296+
scienceCcdExposureId BIGINT NOT NULL,
297+
htmId10 INTEGER NOT NULL,
298+
PRIMARY KEY (htmId10, scienceCcdExposureId),
299+
KEY (scienceCcdExposureId)
300+
);</example>
301+
<p>
302+
<tt>Science_Ccd_Exposure_HtmId10</tt> will store the level 10 HTM ID
303+
of each triangle overlapping each CCD exposure. To populate it, start
304+
by dumping the primary key and polygon vertex colunms from
305+
<tt>Science_Ccd_Exposure</tt>:
306+
</p>
307+
<example>
308+
SELECT scienceCcdExposureId,
309+
llcRa, llcDecl,
310+
ulcRa, ulcDecl,
311+
urcRa, urcDecl,
312+
lrcRa, lrcDecl
313+
FROM Science_Ccd_Exposure
314+
INTO OUTFILE '/tmp/ccds.tsv';</example>
315+
<p>
316+
Then, run the SciSQL region indexing utility:
317+
</p>
318+
<example lang="bash">
319+
${MYSQL_DIR}/bin/scisql_index -l 10 /tmp/ccd_htmid10.tsv /tmp/ccds.tsv</example>
320+
<p>
321+
and load the results:
322+
</p>
323+
<example>
324+
TRUNCATE TABLE Science_Ccd_Exposure_HtmId10;
325+
LOAD DATA LOCAL INFILE '/tmp/ccd_htmid10.tsv' INTO TABLE Science_Ccd_Exposure_HtmId10;</example>
326+
<p>
327+
The example regions-containing-point query can now be expressed
328+
more efficiently as:
329+
</p>
330+
<example>
331+
SELECT sce.scienceCcdExposureId
332+
FROM Science_Ccd_Exposure AS sce, (
333+
SELECT scienceCcdExposureId
334+
FROM Science_Ccd_Exposure_HtmId10
335+
WHERE htmId10 = s2HtmId(0, 0, 10)
336+
) AS h
337+
WHERE sce.scienceCcdExposureId = h.scienceCcdExposureId AND
338+
s2PtInCPoly(0, 0, sce.ccdPoly) = 1;</example>
140339
</div>
141340
</section>
142341

0 commit comments

Comments
 (0)