apache
diff --git a/‎common/src/test/java/org/apache/drill/categories/IPFSStorageTest.java‎
Lines changed: 27 additions & 0 deletions b/‎common/src/test/java/org/apache/drill/categories/IPFSStorageTest.java‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎contrib/native/client/src/protobuf/UserBitShared.pb.cc‎
Lines changed: 8 additions & 7 deletions b/‎contrib/native/client/src/protobuf/UserBitShared.pb.cc‎
Lines changed: 8 additions & 7 deletions
diff --git a/‎contrib/native/client/src/protobuf/UserBitShared.pb.h‎
Lines changed: 3 additions & 2 deletions b/‎contrib/native/client/src/protobuf/UserBitShared.pb.h‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎contrib/pom.xml‎
Lines changed: 1 addition & 0 deletions b/‎contrib/pom.xml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎contrib/storage-ipfs/README.md‎
Lines changed: 99 additions & 0 deletions b/‎contrib/storage-ipfs/README.md‎
Lines changed: 99 additions & 0 deletions
diff --git a/‎contrib/storage-ipfs/pom.xml‎
Lines changed: 87 additions & 0 deletions b/‎contrib/storage-ipfs/pom.xml‎
Lines changed: 87 additions & 0 deletions
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.drill.categories;
+
+/**
+ * This is a category used to mark unit tests that test the IPFS storage plugin.
+ */
+public interface IPFSStorageTest {
+}
+
@@ -56,6 +56,7 @@
     <module>storage-kudu</module>
     <module>storage-opentsdb</module>
     <module>storage-http</module>
+    <module>storage-ipfs</module>
     <module>storage-druid</module>
   </modules>
 
 
@@ -0,0 +1,99 @@
+# Drill Storage Plugin for IPFS (Minerva)
+
+## Contents
+
+0. [Introduction](#Introduction)
+1. [Configuration](#Configuration)
+2. [Usage Notes](#Usage Notes)
+
+## Introduction
+
+Minerva is a storage plugin of Drill that connects IPFS's decentralized storage and Drill's flexible query engine. Any data file stored on IPFS can be easily accessed from Drill's query interface, just like a file stored on a local disk. Moreover, with Drill's capability of distributed execution, other instances who are also running Minerva can help accelerate the execution: the data stays where it is, and the queries go to the most suitable nodes which stores the data locally, and from there the operations can be performed most efficiently. 
+
+## Configuration
+
+1. Set Drill hostname to the IP address of the node to run Drill:
+    
+    Edit file `conf/drill-env.sh` and change the environment variable `DRILL_HOST_NAME` to the IP address of the node. Use private or global addresses, depending on whether you plan to run it in a private cluster or on the open Internet.
+
+2. Configure the IPFS storage plugin:
+    
+    The default configuration of the IPFS storage plugin is located at `src/resources/bootstrap-storage-plugins.json`:
+    
+    ```
+    "ipfs" : {
+      "type":"ipfs",
+      "host": "127.0.0.1",
+      "port": 5001,
+      "max-nodes-per-leaf": 3,
+      "ipfs-timeouts": {
+        "find-provider": 4,
+        "find-peer-info": 4,
+        "fetch-data": 5
+      },
+      "ipfs-caches": {
+        "peer": {"size": 100, "ttl": 600},
+        "provider": {"size": 1000, "ttl": 600}
+      },
+      "groupscan-worker-threads": 50,
+      "formats": null,
+      "enabled": true
+    }
+    ```
+    
+    where 
+    
+    `host` and `port` are the host and API port where your IPFS daemon will be listening. Change it so that it matches the configuration of your IPFS instance.
+
+    `max-nodes-per-leaf` controls how many provider nodes will be considered when the query is being planned. A larger value increases the parallelization width but typically takes longer to find enough providers from DHT resolution. A smaller value does the opposite.
+    
+    `ipfs-timeouts` set the maximum amount of time in seconds for various time-consuming operations: 
+    
+    * `find-provider` is the time allowed to do DHT queries to find providers.
+    * `find-peer-info` is the time allowed to resolve the network addresses of the providers.
+    * `fetch-data` is the time the actual transmission is allowed to take. 
+    
+    `ipfs-caches` control the size and TTL in seconds of cache entries of various caches used to accelerate query execution:
+    
+    * `peer` cache caches peers addresses.
+    * `provider` cache caches which providers provide a particular IPFS object.
+    
+    `groupscan-worker-threads` limits the number of worker threads when the planner communicate with the IPFS daemon to resolve providers and peer info.
+    
+    `formats` specifies the formats of the files. It is unimplemented for now and does nothing.
+    
+3. Configure IPFS
+
+    Start the IPFS daemon first. 
+    
+    Set a Drill-ready flag to the node:
+    
+    ```
+    $ IPFS_NULL_OBJECT=$(ipfs object new)
+    $ ipfs object patch add-link $IPFS_NULL_OBJECT "drill-ready" $IPFS_NULL_OBJECT
+    QmeXLv7D5uV2uXHejg7St7mSXDtqwTw8LuywSBawSxy5iA
+    $ ipfs name publish /ipfs/QmeXLv7D5uV2uXHejg7St7mSXDtqwTw8LuywSBawSxy5iA
+    Published to <your-node-id>: /ipfs/QmeXLv7D5uV2uXHejg7St7mSXDtqwTw8LuywSBawSxy5iA
+    ```
+    
+    This flag indicates that an IPFS node is also capable of handling Drill queries, and the planner will consider it when scheduling a query to execute distributedly. A node without this flag will be ignored.
+    
+    Also, pin the flag so that it will stick on your node:
+    
+    ```
+    $ ipfs pin add -r QmeXLv7D5uV2uXHejg7St7mSXDtqwTw8LuywSBawSxy5iA
+    ```
+    
+## Usage Notes
+
+1. Compatible data formats
+
+    Currently only JSON files are supported by this storage plugin.
+    
+2. Add datasets to IPFS
+    
+    IPFS provides the `ipfs add` command to conveniently add a file to IPFS. Unfortunately that command does not split data files into chunks on line boundaries. Use [this script](https://gist.github.com/dbw9580/250e52a54e39a34083f815dea34a89e0) to do proper chunking and add files to IPFS. 
+    
+3. Timeout exceptions
+
+    IPFS operations can be time-consuming, and sometimes an operation can take forever (e.g. querying the DHT for a non-existent object). Adjust the timeout values in the config to avoid most timeout exceptions.
@@ -0,0 +1,87 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>drill-contrib-parent</artifactId>
+        <groupId>org.apache.drill.contrib</groupId>
+        <version>1.18.0-SNAPSHOT</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <artifactId>drill-ipfs-storage</artifactId>
+    <name>contrib/ipfs-storage-plugin</name>
+
+    <properties>
+        <ipfs.TestSuite>**/IPFSTestSuit.class</ipfs.TestSuite>
+    </properties>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.drill.exec</groupId>
+            <artifactId>drill-java-exec</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>com.github.ipfs</groupId>
+            <artifactId>java-ipfs-http-client</artifactId>
+            <version>v1.3.3</version>
+        </dependency>
+
+        <!-- Test dependencies -->
+        <dependency>
+            <groupId>org.apache.drill.exec</groupId>
+            <artifactId>drill-java-exec</artifactId>
+            <classifier>tests</classifier>
+            <version>${project.version}</version>
+            <scope>test</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.drill</groupId>
+            <artifactId>drill-common</artifactId>
+            <classifier>tests</classifier>
+            <version>${project.version}</version>
+            <scope>test</scope>
+        </dependency>
+
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-surefire-plugin</artifactId>
+                <configuration>
+                    <forkCount>0</forkCount>
+                    <includes>
+                        <include>${ipfs.TestSuite}</include>
+                    </includes>
+                    <excludes>
+                        <exclude>**/TestIPFSQueries.java</exclude>
+                    </excludes>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+</project>