Skip to content

Commit

Permalink
Release 7.033 && Fixes #434
Browse files Browse the repository at this point in the history
  • Loading branch information
cnuernber committed Nov 16, 2024
1 parent 471b526 commit 12c9a96
Show file tree
Hide file tree
Showing 40 changed files with 67 additions and 52 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# Changelog
# 7.033
* [issue-434](https://github.com/techascent/tech.ml.dataset/issues/413) - bad transit encoding - packed instants are microseconds since epoch and have been for a while - not milliseconds since epoch.

# 7.031
* [issue-413](https://github.com/techascent/tech.ml.dataset/issues/413) - reduce with packed columns.
* [issue-414](https://github.com/techascent/tech.ml.dataset/issues/414) - categorical maps are now integers.
Expand Down
4 changes: 2 additions & 2 deletions deps.edn
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{:paths ["src" "resources" "target/classes"]
:deps {;;org.clojure/clojure {:mvn/version "1.11.1"}
cnuernber/dtype-next {:mvn/version "10.116"}
cnuernber/dtype-next {:mvn/version "10.120"}
techascent/tech.io {:mvn/version "4.31"
:exclusions [org.apache.commons/commons-compress]}
org.apache.datasketches/datasketches-java {:mvn/version "4.2.0"}
Expand All @@ -14,7 +14,7 @@
:exec-fn codox.main/-main
:exec-args {:group-id "techascent"
:artifact-id "tech.ml.dataset"
:version "7.032"
:version "7.033"
:name "TMD"
:description "A Clojure high performance data processing system"
:metadata {:doc/format :markdown}
Expand Down
2 changes: 1 addition & 1 deletion docs/000-getting-started.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/100-walkthrough.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/200-quick-reference.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/columns-readers-and-datatypes.html

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions docs/index.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/nippy-serialization-rocks.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/supported-datatypes.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.dataset.categorical.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.dataset.clipboard.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.dataset.column-filters.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.dataset.column.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.dataset.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.dataset.io.csv.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.dataset.io.datetime.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.dataset.io.string-row-parser.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.dataset.io.univocity.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.dataset.join.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.dataset.math.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.dataset.metamorph.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.dataset.modelling.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.dataset.print.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.dataset.reductions.apache-data-sketch.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.dataset.reductions.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.dataset.rolling.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.dataset.set.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.dataset.tensor.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.dataset.zip.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.libs.arrow.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.libs.clj-transit.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.libs.fastexcel.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.libs.guava.cache.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.libs.parquet.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.libs.poi.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.libs.smile.data.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tech.v3.libs.tribuo.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion scripts/deploy
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

set -e

scripts/run-tests
scripts/run-tests-m1
rm -rf pom.xml
clojure -T:build jar
cp target/classes/META-INF/maven/techascent/tech.ml.dataset/pom.xml .
Expand Down
27 changes: 14 additions & 13 deletions src/tech/v3/libs/clj_transit.clj
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@
(#{:packed-local-date :local-date} col-dt)
(obj-col->numeric-b64 col :int32 dtype-dt/local-date->days-since-epoch)
(#{:packed-instant :instant} col-dt)
(obj-col->numeric-b64 col :int64 dtype-dt/instant->milliseconds-since-epoch)
(obj-col->numeric-b64 col :int64 dtype-dt/instant->microseconds-since-epoch)
:else ;;Punt!!
(vec col))}))

Expand Down Expand Up @@ -260,12 +260,12 @@
(def ^{:doc "Transit write handlers for java.time.LocalDate and java.time.Instant"}
java-time-write-handlers
{LocalDate (t/write-handler "java.time.LocalDate" dtype-dt/local-date->days-since-epoch)
Instant (t/write-handler "java.time.Instant" dtype-dt/instant->milliseconds-since-epoch)})
Instant (t/write-handler "java.time.Instant" dtype-dt/instant->microseconds-since-epoch)})

(def ^{:doc "Transit read handlers for java.time.LocalDate and java.time.Instant"}
java-time-read-handlers
{"java.time.LocalDate" (t/read-handler dtype-dt/days-since-epoch->local-date)
"java.time.Instant" (t/read-handler dtype-dt/milliseconds-since-epoch->instant)})
"java.time.Instant" (t/read-handler dtype-dt/microseconds-since-epoch->instant)})


(defn dataset->transit
Expand Down Expand Up @@ -315,17 +315,18 @@
(comment
(defn master-ds
[]
(ds/->dataset {:a (mapv double (range 5))
:b (repeat 5 :a)
:c (repeat 5 "hey")
:d (repeat 5 {:a 1 :b 2})
:e (repeat 4 [1 2 3])
:f (repeat 5 (dtype-dt/local-date))
:g (repeat 5 (dtype-dt/instant))
:h [true false true true false]
:i (repeat 5 "text")
:j [1 nil 2 nil 3]}
(ds/->dataset (array-map :a (mapv double (range 5))
:b (repeat 5 :a)
:c (repeat 5 "hey")
:d (repeat 5 {:a 1 :b 2})
:e (repeat 4 [1 2 3])
:f (repeat 5 (dtype-dt/local-date))
:g (repeat 5 (dtype-dt/instant))
:h [true false true true false]
:i (repeat 5 "text")
:j [1 nil 2 nil 3])
{:parser-fn {:i :text}}))



(-> (master-ds)
Expand Down
11 changes: 11 additions & 0 deletions test/tech/v3/dataset/parse_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,17 @@
(is (= (ds :b) (nds :b)))))


(deftest issue-434-transit-support
(let [ds (ds/->dataset {:a [1 2 3]
:b [:one :two :three]
:c [(java.time.Instant/now) (java.time.Instant/now)]})
str-data (ds-transit/dataset->transit-str ds)
nds (ds-transit/transit-str->dataset str-data)]
(is (= (ds :a) (nds :a)))
(is (= (ds :b) (nds :b)))
(is (= (ds :c) (nds :c)))))


(deftest issue-414-json-parser-fn
(is (= [1 2 3] (get (ds/->dataset "test/data/local_date.json"
{:parser-fn {:time-period :local-date}})
Expand Down

0 comments on commit 12c9a96

Please sign in to comment.