diff --git a/src/tablecloth/api.clj b/src/tablecloth/api.clj index da8e6a0..2d0b3f6 100644 --- a/src/tablecloth/api.clj +++ b/src/tablecloth/api.clj @@ -263,6 +263,8 @@ (defn anti-join + ([ds-left ds-right] + (tablecloth.api.join-concat-ds/anti-join ds-left ds-right)) ([ds-left ds-right columns-selector] (tablecloth.api.join-concat-ds/anti-join ds-left ds-right columns-selector)) ([ds-left ds-right columns-selector options] @@ -315,6 +317,8 @@ (defn asof-join + ([ds-left ds-right] + (tablecloth.api.join-concat-ds/asof-join ds-left ds-right)) ([ds-left ds-right columns-selector] (tablecloth.api.join-concat-ds/asof-join ds-left ds-right columns-selector)) ([ds-left ds-right columns-selector options] @@ -1180,6 +1184,8 @@ column-names function returns names according to columns-selector (defn full-join "Join keeping all rows" + ([ds-left ds-right] + (tablecloth.api.join-concat-ds/full-join ds-left ds-right)) ([ds-left ds-right columns-selector] (tablecloth.api.join-concat-ds/full-join ds-left ds-right columns-selector)) ([ds-left ds-right columns-selector options] @@ -1337,6 +1343,8 @@ column-names function returns names according to columns-selector (defn inner-join + ([ds-left ds-right] + (tablecloth.api.join-concat-ds/inner-join ds-left ds-right)) ([ds-left ds-right columns-selector] (tablecloth.api.join-concat-ds/inner-join ds-left ds-right columns-selector)) ([ds-left ds-right columns-selector options] @@ -1351,7 +1359,7 @@ column-names function returns names according to columns-selector (defn join-columns - "Join clumns of dataset. Accepts: + "Join columns of dataset. Accepts: dataset column selector (as in select-columns) options @@ -1393,6 +1401,8 @@ column-names function returns names according to columns-selector (defn left-join + ([ds-left ds-right] + (tablecloth.api.join-concat-ds/left-join ds-left ds-right)) ([ds-left ds-right columns-selector] (tablecloth.api.join-concat-ds/left-join ds-left ds-right columns-selector)) ([ds-left ds-right columns-selector options] @@ -2078,6 +2088,8 @@ column-names function returns names according to columns-selector (defn right-join + ([ds-left ds-right] + (tablecloth.api.join-concat-ds/right-join ds-left ds-right)) ([ds-left ds-right columns-selector] (tablecloth.api.join-concat-ds/right-join ds-left ds-right columns-selector)) ([ds-left ds-right columns-selector options] @@ -2187,6 +2199,8 @@ column-names function returns names according to columns-selector (defn semi-join + ([ds-left ds-right] + (tablecloth.api.join-concat-ds/semi-join ds-left ds-right)) ([ds-left ds-right columns-selector] (tablecloth.api.join-concat-ds/semi-join ds-left ds-right columns-selector)) ([ds-left ds-right columns-selector options] diff --git a/src/tablecloth/api/join_concat_ds.clj b/src/tablecloth/api/join_concat_ds.clj index ce20745..e8ce3be 100644 --- a/src/tablecloth/api/join_concat_ds.clj +++ b/src/tablecloth/api/join_concat_ds.clj @@ -53,22 +53,51 @@ (impl [(first cols-left) (first cols-right)] ds-left ds-right (or options {})) (multi-join impl ds-left ds-right cols-left cols-right options)))) +(defn- automatic-columns-selector [ds-left ds-right] + (let [cols-l (set (column-names ds-left)) + cols-r (set (column-names ds-right))] + (vec (s/intersection cols-l cols-r))) ) + (defn left-join + "Applies the left-join operation on the datasets. If no automatic selector is + provided, common columns between two datasets are used as column-selectors. Options is a map with following keys - + - `hashing` - Hashing function to use (default identity) + - `drop-join-column?` - Remove joined columns (default true)" + ([ds-left ds-right] + (left-join ds-left ds-right (automatic-columns-selector ds-left ds-right))) ([ds-left ds-right columns-selector] (left-join ds-left ds-right columns-selector nil)) ([ds-left ds-right columns-selector options] (apply-join j/left-join ds-left ds-right columns-selector options))) (defn right-join + "Applies the right-join operation on the datasets. If no automatic selector is + provided, common columns between two datasets are used as column-selectors. Options is a map with following keys - + - `hashing` - Hashing function to use (default identity) + - `drop-join-column?` - Remove joined columns (default true)" + ([ds-left ds-right] + (right-join ds-left ds-right (automatic-columns-selector ds-left ds-right))) ([ds-left ds-right columns-selector] (right-join ds-left ds-right columns-selector nil)) ([ds-left ds-right columns-selector options] (apply-join j/right-join ds-left ds-right columns-selector options))) (defn inner-join + "Applies the inner-join operation on the datasets. If no automatic selector is + provided, common columns between two datasets are used as column-selectors. Options is a map with following keys - + - `hashing` - Hashing function to use (default identity) + - `drop-join-column?` - Remove joined columns (default true)" + ([ds-left ds-right] + (inner-join ds-left ds-right (automatic-columns-selector ds-left ds-right))) ([ds-left ds-right columns-selector] (inner-join ds-left ds-right columns-selector nil)) ([ds-left ds-right columns-selector options] (apply-join j/inner-join ds-left ds-right columns-selector options))) (defn asof-join + "Applies the asof-join operation on the datasets. If no automatic selector is + provided, common columns between two datasets are used as column-selectors. Options is a map with following keys - + - `hashing` - Hashing function to use (default identity) + - `drop-join-column?` - Remove joined columns (default true)" + ([ds-left ds-right] + (asof-join ds-left ds-right (automatic-columns-selector ds-left ds-right))) ([ds-left ds-right columns-selector] (asof-join ds-left ds-right columns-selector nil)) ([ds-left ds-right columns-selector options] (apply-join j/left-join-asof ds-left ds-right columns-selector options))) @@ -81,7 +110,12 @@ (j/pd-merge ds-left ds-right (assoc options :left-on left :right-on right :how :outer)))) (defn full-join - "Join keeping all rows" + "Join keeping all rows. If no automatic selector is + provided, common columns between two datasets are used as column-selectors. Options is a map with following keys - + - `hashing` - Hashing function to use (default identity) + - `drop-join-column?` - Remove joined columns (default true)" + ([ds-left ds-right] + (full-join ds-left ds-right (automatic-columns-selector ds-left ds-right))) ([ds-left ds-right columns-selector] (full-join ds-left ds-right columns-selector nil)) ([ds-left ds-right columns-selector options] (apply-join full-join-wrapper ds-left ds-right columns-selector options))) @@ -95,12 +129,24 @@ (distinct))) (defn semi-join + "Applies the semi-join operation on the datasets. If no automatic selector is + provided, common columns between two datasets are used as column-selectors. Options is a map with following keys - + - `hashing` - Hashing function to use (default identity) + - `drop-join-column?` - Remove joined columns (default true)" + ([ds-left ds-right] + (semi-join ds-left ds-right (automatic-columns-selector ds-left ds-right))) ([ds-left ds-right columns-selector] (semi-join ds-left ds-right columns-selector nil)) ([ds-left ds-right columns-selector options] (->> (semi-anti-join-indexes ds-left ds-right columns-selector options) (select-rows ds-left)))) (defn anti-join + "Applies the anti-join operation on the datasets. If no automatic selector is + provided, common columns between two datasets are used as column-selectors. Options is a map with following keys - + - `hashing` - Hashing function to use (default identity) + - `drop-join-column?` - Remove joined columns (default true)" + ([ds-left ds-right] + (anti-join ds-left ds-right (automatic-columns-selector ds-left ds-right))) ([ds-left ds-right columns-selector] (anti-join ds-left ds-right columns-selector nil)) ([ds-left ds-right columns-selector options] (->> (semi-anti-join-indexes ds-left ds-right columns-selector options) diff --git a/src/tablecloth/api/join_separate.clj b/src/tablecloth/api/join_separate.clj index 861aae4..e85d8f7 100644 --- a/src/tablecloth/api/join_separate.clj +++ b/src/tablecloth/api/join_separate.clj @@ -18,7 +18,7 @@ (if drop-columns? (drop-columns result col-names) result))) (defn join-columns - "Join clumns of dataset. Accepts: + "Join columns of dataset. Accepts: dataset column selector (as in select-columns) options diff --git a/test/tablecloth/api/join_concat_ds_test.clj b/test/tablecloth/api/join_concat_ds_test.clj index 0ac5928..cdf66ea 100644 --- a/test/tablecloth/api/join_concat_ds_test.clj +++ b/test/tablecloth/api/join_concat_ds_test.clj @@ -84,6 +84,22 @@ [:i :y]) (api/rows :as-maps)) => [{:i "foo", :y 2022, :right.i "foo", :right.y 2022, :s "2022"}]) +(fact "int-string join with automatic column selector" + (-> (api/left-join (-> (api/dataset [{:i "foo" :y 2022}])) + (-> (api/dataset [{:i "foo" :y 2022 :s "2022"} + {:i "foo" :y 2023 :s "2023"}]))) + (api/rows :as-maps)) => [{:i "foo", :y 2022, :right.i "foo", :right.y 2022, :s "2022"}] + (-> (api/left-join (-> (api/dataset [{:i "foo" :y 2022}]) + (api/convert-types {:y :int16})) + (-> (api/dataset [{:i "foo" :y 2022 :s "2022"} + {:i "foo" :y 2023 :s "2023"}]))) + (api/rows :as-maps)) => [{:i "foo", :y 2022, :right.i "foo", :right.y 2022, :s "2022"}]) + +(fact "int-string join with automatic column selector - when there are no common columns" + (-> (api/left-join (-> (api/dataset [{:i "foo" :x 2022}])) + (-> (api/dataset [{:y 2022 :z "bar"}]))) + (api/rows :as-maps)) => [{:i "foo", :x 2022, :y 2022 :z "bar"}]) + (fact "left join on shorts packed into the vector" (-> (api/left-join (-> (api/dataset [{:iy ["foo" (short 2022)]}])) (-> (api/dataset [{:iy ["foo" (long 2022)] :s "2022"} @@ -111,3 +127,39 @@ (api/dataset [{:k "baz"}]) [:k]) (api/rows :as-maps)) => [{:k "baz", :v "\"baz\""} {:k "baz", :v "\"baz\""}]) + +(fact "right join with automatic column selector" + (-> (api/right-join (api/dataset [{:i "foo" :y 2022}]) + (api/dataset [{:i "foo" :y 2022 :s "2022"} + {:i "foo" :y 2023 :s "2023"}])) + (api/rows :as-maps)) => [{:i "foo", :y 2022, :right.i "foo", :right.y 2022, :s "2022"} + {:i nil, :y nil, :right.i "foo", :right.y 2023, :s "2023"}]) + +(fact "inner join with automatic column selector" + (-> (api/inner-join (api/dataset [{:i "foo" :y 2022}]) + (api/dataset [{:i "foo" :y 2022 :s "2022"} + {:i "foo" :y 2023 :s "2023"}])) + (api/rows :as-maps)) => [{:i "foo", :y 2022, :right.i "foo", :right.y 2022, :s "2022"}]) + +(fact "full join with automatic column selector" + (-> (api/full-join (api/dataset [{:i "foo" :y 2022} + {:i "bar" :y 2021 }]) + (api/dataset [{:i "foo" :y 2022 :s "2022"} + {:i "foo" :y 2023 :s "2023"}])) + (api/rows :as-maps)) => [{:i "foo", :y 2022, :right.i "foo", :right.y 2022, :s "2022"} + {:i "bar", :y 2021, :right.i nil, :right.y nil, :s nil} + {:i nil, :y nil, :right.i "foo", :right.y 2023, :s "2023"}]) + +(fact "anti join with automatic column selector" + (-> (api/anti-join (api/dataset [{:i "foo" :y 2022} + {:i "bar" :y 2021 }]) + (api/dataset [{:i "foo" :y 2022 :s "2022"} + {:i "foo" :y 2023 :s "2023"}])) + (api/rows :as-maps)) => [{:i "bar", :y 2021}]) + +(fact "semi join with automatic column selector" + (-> (api/semi-join (api/dataset [{:i "foo" :y 2022} + {:i "bar" :y 2021 }]) + (api/dataset [{:i "foo" :y 2022 :s "2022"} + {:i "foo" :y 2023 :s "2023"}])) + (api/rows :as-maps)) => [{:i "foo", :y 2022}])