diff --git a/AUTHORS b/AUTHORS index d968ef6..1921adf 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,2 +1,3 @@ Nimesh Kiran Verma -Utsav Tiwary \ No newline at end of file +Utsav Tiwary +Allon Hadaya diff --git a/README.md b/README.md index 8b6475b..e836ab8 100644 --- a/README.md +++ b/README.md @@ -9,15 +9,15 @@ *** To install the package, type the following - - pip install mongoschema + pip install mongoschema ## Sample data - Populating MongoDB with sample data *** Navigate to `test/sample_data` in the `mongojoin` directory and type the following command - - > mongoimport --dbname test --collection supplier --file supplier.json - > mongoimport --dbname test --collection order --file order.json + > mongoimport --dbname test --collection supplier --file supplier.json + > mongoimport --dbname test --collection order --file order.json This will create and populate the required collections with sample data. @@ -25,34 +25,35 @@ This will create and populate the required collections with sample data. The two collections *supplier* and *order* will be used to demonstrate how to use **mongoschema**. To check the contents of the collection, the following command can be used in the MongoDB shell : - > use test - > db.supplier.find({}) - > db.order.find({}) + > use test + > db.supplier.find({}) + > db.order.find({}) ## Using `mongoschema` to get the schema information of a MongoDB collections *** Type the following in Python shell to import `mongoschema`- - >>> from mongoschema import Schema + >>> from mongoschema import Schema To create a `Schema` object for the collection to be analysed, type the following - - >>> schema = Schema("test", "supplier") + >>> from pymongo import MongoClient + >>> client = MongoClient() + >>> schema = Schema(client.test.supplier) where `test` is the DB name and `supplier` is the Collection name. Additional parameters - -`host` : Mongo uri (String) -`port` : Port Number (Integer) -`limit`: Number of docs to be sampled +- `filter`: Query for subset of docs to be sampled +- `limit` : Number of docs to be sampled To get the stats of the collection - - >>> num_docs, result = schema.get_schema() + >>> num_docs, result = schema.get_schema() -`num_docs`: Total number of docs sampled -`result` : Dictionary containing the stats +- `num_docs`: Total number of docs sampled +- `result` : Dictionary containing the stats Use the following command to pretty print the results - @@ -67,4 +68,11 @@ Use the following command to pretty print the results - | _id | 7 | 100.0 | other | 100.0 | +-------------+------------------+-----------------------+------------------+-----------------------+ +To print the schema of every collection in every database, type the following - + + >>> for database in client.database_names(): + >>> for collection in client[database].collection_names(): + >>> print('%s.%s:' % (database, collection)) + >>> Schema(client[database][collection]).print_schema() + More contents here - https://pypi.python.org/pypi/mongoschema/ diff --git a/mongoschema/mongoschema.py b/mongoschema/mongoschema.py index 4996890..7b8b21c 100644 --- a/mongoschema/mongoschema.py +++ b/mongoschema/mongoschema.py @@ -1,6 +1,3 @@ -import sys -import json -from pymongo import MongoClient from collections import defaultdict from prettytable import PrettyTable @@ -9,65 +6,23 @@ class Schema(object): "Gets the schema of a MongoDB collection" - DEFAULT_MONGO_URI = 'mongodb://localhost:27017/' - DEFAULT_PORT = 27017 - - def __init__(self, db_name, collection_name, where_dict={}, limit=0, mongo_uri=DEFAULT_MONGO_URI, host=None, port=None): + def __init__(self, collection, where_dict={}, limit=0): """ - Initializes Mongo Credentials given by user - - :param db_name: Name of the database - :type db_name: string + Initializes Schema for a given collection - :param collection_name: Name of the collection - :type collection_name: string + :param collection: The collection instance + :type collection: pymongo.collection.Collection :param where_dict: Filters (specific fields/value ranges etc.) :type where_dict: dictionary - :param mongo_uri: Mongo Server and Port information - :type mongo_uri: string - :param limit: Number of docs to be sampled :type limit: int """ - - self.db_name = db_name - self.collection = collection_name + self.collection = collection self.where_dict = where_dict self.limit = limit - self.mongo_uri = mongo_uri - self.host = host - self.port = port - - def get_mongo_cursor(self): - """ - Returns Mongo cursor using the class variables - - :return: mongo collection for which cursor will be created - :rtype: mongo colection object - """ - try: - if self.host: - if self.port: - client = MongoClient(self.host, self.port) - else: - client = MongoClient( - self.host, MongoCollection.DEFAULT_PORT) - else: - - client = MongoClient(self.mongo_uri) - - db = client[self.db_name] - cursor = db[self.collection] - - return cursor - - except Exception as e: - msg = "Mongo Connection could not be established for Mongo Uri: {mongo_uri}, Database: {db_name}, Collection {col}, Error: {error}".format( - mongo_uri=self.mongo_uri, db_name=self.db_name, col=self.collection, error=str(e)) - raise Exception(msg) def get_pretty_table(self, key_type_count, total_docs): """ @@ -125,8 +80,7 @@ def get_schema(self, return_dict=True): "other": 0, } - cursor = self.get_mongo_cursor() - mongo_collection_docs = cursor.find( + mongo_collection_docs = self.collection.find( self.where_dict).limit(self.limit) key_type_count = defaultdict(lambda: dict(key_type_default_count)) diff --git a/setup.py b/setup.py index 199aa4c..778158d 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ packages=['mongoschema'], version='1.2', description='Python library for getting schema details of MongoDB collections', - author='Nimesh Kiran, Utsav Tiwary', + author='Nimesh Kiran, Utsav Tiwary, Allon Hadaya', author_email='nimesh.aug11@gmail.com', url='https://github.com/nimeshkverma/mongo_schema', download_url='https://github.com/nimeshkverma/mongo_schema/tarball/1.2',