Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion AUTHORS
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Nimesh Kiran Verma
Utsav Tiwary
Utsav Tiwary
Allon Hadaya
36 changes: 22 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,50 +9,51 @@
***
To install the package, type the following -

pip install mongoschema
pip install mongoschema


## Sample data - Populating MongoDB with sample data
***
Navigate to `test/sample_data` in the `mongojoin` directory and type the following command -

> mongoimport --dbname test --collection supplier --file supplier.json
> mongoimport --dbname test --collection order --file order.json
> mongoimport --dbname test --collection supplier --file supplier.json
> mongoimport --dbname test --collection order --file order.json

This will create and populate the required collections with sample data.


The two collections *supplier* and *order* will be used to demonstrate how to use **mongoschema**.
To check the contents of the collection, the following command can be used in the MongoDB shell :

> use test
> db.supplier.find({})
> db.order.find({})
> use test
> db.supplier.find({})
> db.order.find({})

## Using `mongoschema` to get the schema information of a MongoDB collections
***
Type the following in Python shell to import `mongoschema`-

>>> from mongoschema import Schema
>>> from mongoschema import Schema

To create a `Schema` object for the collection to be analysed, type the following -

>>> schema = Schema("test", "supplier")
>>> from pymongo import MongoClient
>>> client = MongoClient()
>>> schema = Schema(client.test.supplier)

where `test` is the DB name and `supplier` is the Collection name.


Additional parameters -
`host` : Mongo uri (String)
`port` : Port Number (Integer)
`limit`: Number of docs to be sampled
- `filter`: Query for subset of docs to be sampled
- `limit` : Number of docs to be sampled

To get the stats of the collection -

>>> num_docs, result = schema.get_schema()
>>> num_docs, result = schema.get_schema()

`num_docs`: Total number of docs sampled
`result` : Dictionary containing the stats
- `num_docs`: Total number of docs sampled
- `result` : Dictionary containing the stats

Use the following command to pretty print the results -

Expand All @@ -67,4 +68,11 @@ Use the following command to pretty print the results -
| _id | 7 | 100.0 | other | 100.0 |
+-------------+------------------+-----------------------+------------------+-----------------------+

To print the schema of every collection in every database, type the following -

>>> for database in client.database_names():
>>> for collection in client[database].collection_names():
>>> print('%s.%s:' % (database, collection))
>>> Schema(client[database][collection]).print_schema()

More contents here - https://pypi.python.org/pypi/mongoschema/
58 changes: 6 additions & 52 deletions mongoschema/mongoschema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
import sys
import json
from pymongo import MongoClient
from collections import defaultdict
from prettytable import PrettyTable

Expand All @@ -9,65 +6,23 @@ class Schema(object):

"Gets the schema of a MongoDB collection"

DEFAULT_MONGO_URI = 'mongodb://localhost:27017/'
DEFAULT_PORT = 27017

def __init__(self, db_name, collection_name, where_dict={}, limit=0, mongo_uri=DEFAULT_MONGO_URI, host=None, port=None):
def __init__(self, collection, where_dict={}, limit=0):
"""
Initializes Mongo Credentials given by user

:param db_name: Name of the database
:type db_name: string
Initializes Schema for a given collection

:param collection_name: Name of the collection
:type collection_name: string
:param collection: The collection instance
:type collection: pymongo.collection.Collection

:param where_dict: Filters (specific fields/value ranges etc.)
:type where_dict: dictionary

:param mongo_uri: Mongo Server and Port information
:type mongo_uri: string

:param limit: Number of docs to be sampled
:type limit: int

"""

self.db_name = db_name
self.collection = collection_name
self.collection = collection
self.where_dict = where_dict
self.limit = limit
self.mongo_uri = mongo_uri
self.host = host
self.port = port

def get_mongo_cursor(self):
"""
Returns Mongo cursor using the class variables

:return: mongo collection for which cursor will be created
:rtype: mongo colection object
"""
try:
if self.host:
if self.port:
client = MongoClient(self.host, self.port)
else:
client = MongoClient(
self.host, MongoCollection.DEFAULT_PORT)
else:

client = MongoClient(self.mongo_uri)

db = client[self.db_name]
cursor = db[self.collection]

return cursor

except Exception as e:
msg = "Mongo Connection could not be established for Mongo Uri: {mongo_uri}, Database: {db_name}, Collection {col}, Error: {error}".format(
mongo_uri=self.mongo_uri, db_name=self.db_name, col=self.collection, error=str(e))
raise Exception(msg)

def get_pretty_table(self, key_type_count, total_docs):
"""
Expand Down Expand Up @@ -125,8 +80,7 @@ def get_schema(self, return_dict=True):
"other": 0,
}

cursor = self.get_mongo_cursor()
mongo_collection_docs = cursor.find(
mongo_collection_docs = self.collection.find(
self.where_dict).limit(self.limit)

key_type_count = defaultdict(lambda: dict(key_type_default_count))
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
packages=['mongoschema'],
version='1.2',
description='Python library for getting schema details of MongoDB collections',
author='Nimesh Kiran, Utsav Tiwary',
author='Nimesh Kiran, Utsav Tiwary, Allon Hadaya',
author_email='[email protected]',
url='https://github.com/nimeshkverma/mongo_schema',
download_url='https://github.com/nimeshkverma/mongo_schema/tarball/1.2',
Expand Down