pymongo

In [1]:
# conda env py36db
In [1]:
# !python -m pip install pymongo
Collecting pymongo
  Downloading pymongo-3.11.0-cp36-cp36m-win_amd64.whl (390 kB)
Installing collected packages: pymongo
Successfully installed pymongo-3.11.0
In [2]:
import pymongo

Create database, collection, documents

Important: In MongoDB, a database is not created until it gets content!

In [15]:
import datetime

client = pymongo.MongoClient('localhost', 27017)

db = client["mydatabase"]
collection = db["customers"]

dict = { "name": "John", "address": "Highway 37"}

dicts = collection.insert_one(dict)

Check Database

In [19]:
print(client.list_database_names())
['admin', 'config', 'local', 'mydatabase', 'mydb']
In [20]:
dblist = client.list_database_names()
if "mydatabase" in dblist:
  print("The database exists.")
The database exists.

Check Collection

In [21]:
print(db.list_collection_names())
['customers']
In [22]:
collist = db.list_collection_names()
if "customers" in collist:
  print("The collection exists.")
The collection exists.

Check Document

In [23]:
print(dicts.inserted_id)
5f7b0ebd582db084a852ba98

Insert

Insert mutliple documents

In [24]:
import pymongo

client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client["mydatabase"]
collection = db["customers"]

new_list = [
  { "name": "Amy", "address": "Apple st 652"},
  { "name": "Hannah", "address": "Mountain 21"},
  { "name": "Michael", "address": "Valley 345"},
  { "name": "Sandy", "address": "Ocean blvd 2"},
  { "name": "Betty", "address": "Green Grass 1"},
  { "name": "Richard", "address": "Sky st 331"},
  { "name": "Susan", "address": "One way 98"},
  { "name": "Vicky", "address": "Yellow Garden 2"},
  { "name": "Ben", "address": "Park Lane 38"},
  { "name": "William", "address": "Central st 954"},
  { "name": "Chuck", "address": "Main Road 989"},
  { "name": "Viola", "address": "Sideway 1633"}
]

insert_list = collection.insert_many(new_list)

#print list of the _id values of the inserted documents:
print(insert_list.inserted_ids)
[ObjectId('5f7b1042582db084a852ba9a'), ObjectId('5f7b1042582db084a852ba9b'), ObjectId('5f7b1042582db084a852ba9c'), ObjectId('5f7b1042582db084a852ba9d'), ObjectId('5f7b1042582db084a852ba9e'), ObjectId('5f7b1042582db084a852ba9f'), ObjectId('5f7b1042582db084a852baa0'), ObjectId('5f7b1042582db084a852baa1'), ObjectId('5f7b1042582db084a852baa2'), ObjectId('5f7b1042582db084a852baa3'), ObjectId('5f7b1042582db084a852baa4'), ObjectId('5f7b1042582db084a852baa5')]

Insert Multiple Documents, with Specified IDs

In [25]:
new_list = [
  { "_id": 1, "name": "John", "address": "Highway 37"},
  { "_id": 2, "name": "Peter", "address": "Lowstreet 27"},
  { "_id": 3, "name": "Amy", "address": "Apple st 652"},
  { "_id": 4, "name": "Hannah", "address": "Mountain 21"},
  { "_id": 5, "name": "Michael", "address": "Valley 345"},
  { "_id": 6, "name": "Sandy", "address": "Ocean blvd 2"},
  { "_id": 7, "name": "Betty", "address": "Green Grass 1"},
  { "_id": 8, "name": "Richard", "address": "Sky st 331"},
  { "_id": 9, "name": "Susan", "address": "One way 98"},
  { "_id": 10, "name": "Vicky", "address": "Yellow Garden 2"},
  { "_id": 11, "name": "Ben", "address": "Park Lane 38"},
  { "_id": 12, "name": "William", "address": "Central st 954"},
  { "_id": 13, "name": "Chuck", "address": "Main Road 989"},
  { "_id": 14, "name": "Viola", "address": "Sideway 1633"}
]

insert_list = collection.insert_many(new_list)

#print list of the _id values of the inserted documents:
print(insert_list.inserted_ids)
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]

Find

find_one

In [27]:
client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client["mydatabase"]
collection = db["customers"]

find_doc = collection.find_one()

print(find_doc)
{'_id': ObjectId('5f7b0ebd582db084a852ba98'), 'name': 'John', 'address': 'Highway 37'}

find

In [30]:
for x in collection.find():
  print(x)
{'_id': ObjectId('5f7b0ebd582db084a852ba98'), 'name': 'John', 'address': 'Highway 37'}
{'_id': ObjectId('5f7b1042582db084a852ba9a'), 'name': 'Amy', 'address': 'Apple st 652'}
{'_id': ObjectId('5f7b1042582db084a852ba9b'), 'name': 'Hannah', 'address': 'Mountain 21'}
{'_id': ObjectId('5f7b1042582db084a852ba9c'), 'name': 'Michael', 'address': 'Valley 345'}
{'_id': ObjectId('5f7b1042582db084a852ba9d'), 'name': 'Sandy', 'address': 'Ocean blvd 2'}
{'_id': ObjectId('5f7b1042582db084a852ba9e'), 'name': 'Betty', 'address': 'Green Grass 1'}
{'_id': ObjectId('5f7b1042582db084a852ba9f'), 'name': 'Richard', 'address': 'Sky st 331'}
{'_id': ObjectId('5f7b1042582db084a852baa0'), 'name': 'Susan', 'address': 'One way 98'}
{'_id': ObjectId('5f7b1042582db084a852baa1'), 'name': 'Vicky', 'address': 'Yellow Garden 2'}
{'_id': ObjectId('5f7b1042582db084a852baa2'), 'name': 'Ben', 'address': 'Park Lane 38'}
{'_id': ObjectId('5f7b1042582db084a852baa3'), 'name': 'William', 'address': 'Central st 954'}
{'_id': ObjectId('5f7b1042582db084a852baa4'), 'name': 'Chuck', 'address': 'Main Road 989'}
{'_id': ObjectId('5f7b1042582db084a852baa5'), 'name': 'Viola', 'address': 'Sideway 1633'}
{'_id': 1, 'name': 'John', 'address': 'Highway 37'}
{'_id': 2, 'name': 'Peter', 'address': 'Lowstreet 27'}
{'_id': 3, 'name': 'Amy', 'address': 'Apple st 652'}
{'_id': 4, 'name': 'Hannah', 'address': 'Mountain 21'}
{'_id': 5, 'name': 'Michael', 'address': 'Valley 345'}
{'_id': 6, 'name': 'Sandy', 'address': 'Ocean blvd 2'}
{'_id': 7, 'name': 'Betty', 'address': 'Green Grass 1'}
{'_id': 8, 'name': 'Richard', 'address': 'Sky st 331'}
{'_id': 9, 'name': 'Susan', 'address': 'One way 98'}
{'_id': 10, 'name': 'Vicky', 'address': 'Yellow Garden 2'}
{'_id': 11, 'name': 'Ben', 'address': 'Park Lane 38'}
{'_id': 12, 'name': 'William', 'address': 'Central st 954'}
{'_id': 13, 'name': 'Chuck', 'address': 'Main Road 989'}
{'_id': 14, 'name': 'Viola', 'address': 'Sideway 1633'}
In [31]:
for x in collection.find({},{ "_id": 0, "name": 1, "address": 1 }):
  print(x)
{'name': 'John', 'address': 'Highway 37'}
{'name': 'Amy', 'address': 'Apple st 652'}
{'name': 'Hannah', 'address': 'Mountain 21'}
{'name': 'Michael', 'address': 'Valley 345'}
{'name': 'Sandy', 'address': 'Ocean blvd 2'}
{'name': 'Betty', 'address': 'Green Grass 1'}
{'name': 'Richard', 'address': 'Sky st 331'}
{'name': 'Susan', 'address': 'One way 98'}
{'name': 'Vicky', 'address': 'Yellow Garden 2'}
{'name': 'Ben', 'address': 'Park Lane 38'}
{'name': 'William', 'address': 'Central st 954'}
{'name': 'Chuck', 'address': 'Main Road 989'}
{'name': 'Viola', 'address': 'Sideway 1633'}
{'name': 'John', 'address': 'Highway 37'}
{'name': 'Peter', 'address': 'Lowstreet 27'}
{'name': 'Amy', 'address': 'Apple st 652'}
{'name': 'Hannah', 'address': 'Mountain 21'}
{'name': 'Michael', 'address': 'Valley 345'}
{'name': 'Sandy', 'address': 'Ocean blvd 2'}
{'name': 'Betty', 'address': 'Green Grass 1'}
{'name': 'Richard', 'address': 'Sky st 331'}
{'name': 'Susan', 'address': 'One way 98'}
{'name': 'Vicky', 'address': 'Yellow Garden 2'}
{'name': 'Ben', 'address': 'Park Lane 38'}
{'name': 'William', 'address': 'Central st 954'}
{'name': 'Chuck', 'address': 'Main Road 989'}
{'name': 'Viola', 'address': 'Sideway 1633'}
In [38]:
for x in collection.find({"name": "Betty"}):
  print(x)
{'_id': ObjectId('5f7b1042582db084a852ba9e'), 'name': 'Betty', 'address': 'Green Grass 1'}
{'_id': 7, 'name': 'Betty', 'address': 'Green Grass 1'}
In [34]:
import pprint

pprint.pprint(collection.find_one())
{'_id': ObjectId('5f7b0ebd582db084a852ba98'),
 'address': 'Highway 37',
 'name': 'John'}
In [37]:
pprint.pprint(collection.find_one({"name": "Betty"}))
{'_id': ObjectId('5f7b1042582db084a852ba9e'),
 'address': 'Green Grass 1',
 'name': 'Betty'}

Query

In [39]:
query = { "address": "Park Lane 38" }

doc = collection.find(query)

for x in doc:
  print(x)
{'_id': ObjectId('5f7b1042582db084a852baa2'), 'name': 'Ben', 'address': 'Park Lane 38'}
{'_id': 11, 'name': 'Ben', 'address': 'Park Lane 38'}
In [40]:
# Find documents where the address starts with the letter "S" or higher:

query = { "address": { "$gt": "S" } }

doc = collection.find(query)

for x in doc:
  print(x)
{'_id': ObjectId('5f7b1042582db084a852ba9c'), 'name': 'Michael', 'address': 'Valley 345'}
{'_id': ObjectId('5f7b1042582db084a852ba9f'), 'name': 'Richard', 'address': 'Sky st 331'}
{'_id': ObjectId('5f7b1042582db084a852baa1'), 'name': 'Vicky', 'address': 'Yellow Garden 2'}
{'_id': ObjectId('5f7b1042582db084a852baa5'), 'name': 'Viola', 'address': 'Sideway 1633'}
{'_id': 5, 'name': 'Michael', 'address': 'Valley 345'}
{'_id': 8, 'name': 'Richard', 'address': 'Sky st 331'}
{'_id': 10, 'name': 'Vicky', 'address': 'Yellow Garden 2'}
{'_id': 14, 'name': 'Viola', 'address': 'Sideway 1633'}

Regular Expressions

In [41]:
query = { "address": { "$regex": "^S" } }

doc = collection.find(query)

for x in doc:
  print(x)
{'_id': ObjectId('5f7b1042582db084a852ba9f'), 'name': 'Richard', 'address': 'Sky st 331'}
{'_id': ObjectId('5f7b1042582db084a852baa5'), 'name': 'Viola', 'address': 'Sideway 1633'}
{'_id': 8, 'name': 'Richard', 'address': 'Sky st 331'}
{'_id': 14, 'name': 'Viola', 'address': 'Sideway 1633'}

Sort

In [44]:
#  ascending

doc = collection.find().sort("name")

for x in doc:
  print(x)
{'_id': ObjectId('5f7b1042582db084a852ba9a'), 'name': 'Amy', 'address': 'Apple st 652'}
{'_id': 3, 'name': 'Amy', 'address': 'Apple st 652'}
{'_id': ObjectId('5f7b1042582db084a852baa2'), 'name': 'Ben', 'address': 'Park Lane 38'}
{'_id': 11, 'name': 'Ben', 'address': 'Park Lane 38'}
{'_id': ObjectId('5f7b1042582db084a852ba9e'), 'name': 'Betty', 'address': 'Green Grass 1'}
{'_id': 7, 'name': 'Betty', 'address': 'Green Grass 1'}
{'_id': ObjectId('5f7b1042582db084a852baa4'), 'name': 'Chuck', 'address': 'Main Road 989'}
{'_id': 13, 'name': 'Chuck', 'address': 'Main Road 989'}
{'_id': ObjectId('5f7b1042582db084a852ba9b'), 'name': 'Hannah', 'address': 'Mountain 21'}
{'_id': 4, 'name': 'Hannah', 'address': 'Mountain 21'}
{'_id': ObjectId('5f7b0ebd582db084a852ba98'), 'name': 'John', 'address': 'Highway 37'}
{'_id': 1, 'name': 'John', 'address': 'Highway 37'}
{'_id': ObjectId('5f7b1042582db084a852ba9c'), 'name': 'Michael', 'address': 'Valley 345'}
{'_id': 5, 'name': 'Michael', 'address': 'Valley 345'}
{'_id': 2, 'name': 'Peter', 'address': 'Lowstreet 27'}
{'_id': ObjectId('5f7b1042582db084a852ba9f'), 'name': 'Richard', 'address': 'Sky st 331'}
{'_id': 8, 'name': 'Richard', 'address': 'Sky st 331'}
{'_id': ObjectId('5f7b1042582db084a852ba9d'), 'name': 'Sandy', 'address': 'Ocean blvd 2'}
{'_id': 6, 'name': 'Sandy', 'address': 'Ocean blvd 2'}
{'_id': ObjectId('5f7b1042582db084a852baa0'), 'name': 'Susan', 'address': 'One way 98'}
{'_id': 9, 'name': 'Susan', 'address': 'One way 98'}
{'_id': ObjectId('5f7b1042582db084a852baa1'), 'name': 'Vicky', 'address': 'Yellow Garden 2'}
{'_id': 10, 'name': 'Vicky', 'address': 'Yellow Garden 2'}
{'_id': ObjectId('5f7b1042582db084a852baa5'), 'name': 'Viola', 'address': 'Sideway 1633'}
{'_id': 14, 'name': 'Viola', 'address': 'Sideway 1633'}
{'_id': ObjectId('5f7b1042582db084a852baa3'), 'name': 'William', 'address': 'Central st 954'}
{'_id': 12, 'name': 'William', 'address': 'Central st 954'}
In [45]:
#  descending

doc = collection.find().sort("name", -1)

for x in doc:
  print(x)
{'_id': ObjectId('5f7b1042582db084a852baa3'), 'name': 'William', 'address': 'Central st 954'}
{'_id': 12, 'name': 'William', 'address': 'Central st 954'}
{'_id': ObjectId('5f7b1042582db084a852baa5'), 'name': 'Viola', 'address': 'Sideway 1633'}
{'_id': 14, 'name': 'Viola', 'address': 'Sideway 1633'}
{'_id': ObjectId('5f7b1042582db084a852baa1'), 'name': 'Vicky', 'address': 'Yellow Garden 2'}
{'_id': 10, 'name': 'Vicky', 'address': 'Yellow Garden 2'}
{'_id': ObjectId('5f7b1042582db084a852baa0'), 'name': 'Susan', 'address': 'One way 98'}
{'_id': 9, 'name': 'Susan', 'address': 'One way 98'}
{'_id': ObjectId('5f7b1042582db084a852ba9d'), 'name': 'Sandy', 'address': 'Ocean blvd 2'}
{'_id': 6, 'name': 'Sandy', 'address': 'Ocean blvd 2'}
{'_id': ObjectId('5f7b1042582db084a852ba9f'), 'name': 'Richard', 'address': 'Sky st 331'}
{'_id': 8, 'name': 'Richard', 'address': 'Sky st 331'}
{'_id': 2, 'name': 'Peter', 'address': 'Lowstreet 27'}
{'_id': ObjectId('5f7b1042582db084a852ba9c'), 'name': 'Michael', 'address': 'Valley 345'}
{'_id': 5, 'name': 'Michael', 'address': 'Valley 345'}
{'_id': ObjectId('5f7b0ebd582db084a852ba98'), 'name': 'John', 'address': 'Highway 37'}
{'_id': 1, 'name': 'John', 'address': 'Highway 37'}
{'_id': ObjectId('5f7b1042582db084a852ba9b'), 'name': 'Hannah', 'address': 'Mountain 21'}
{'_id': 4, 'name': 'Hannah', 'address': 'Mountain 21'}
{'_id': ObjectId('5f7b1042582db084a852baa4'), 'name': 'Chuck', 'address': 'Main Road 989'}
{'_id': 13, 'name': 'Chuck', 'address': 'Main Road 989'}
{'_id': ObjectId('5f7b1042582db084a852ba9e'), 'name': 'Betty', 'address': 'Green Grass 1'}
{'_id': 7, 'name': 'Betty', 'address': 'Green Grass 1'}
{'_id': ObjectId('5f7b1042582db084a852baa2'), 'name': 'Ben', 'address': 'Park Lane 38'}
{'_id': 11, 'name': 'Ben', 'address': 'Park Lane 38'}
{'_id': ObjectId('5f7b1042582db084a852ba9a'), 'name': 'Amy', 'address': 'Apple st 652'}
{'_id': 3, 'name': 'Amy', 'address': 'Apple st 652'}

Delete

In [46]:
collection = db["customers"]

query = { "address": "Mountain 21" }

collection.delete_one(query)
Out[46]:
<pymongo.results.DeleteResult at 0x28bfcbd2588>

Delete Many Documents

In [47]:
collection = db["customers"]

query = { "address": {"$regex": "^S"} }

x = collection.delete_many(query)

print(x.deleted_count, " documents deleted.")
4  documents deleted.

Drop

In [ ]:
db = client["mydatabase"]
collection = db["customers"]

collection.drop()

Update

In [48]:
query = { "address": "Valley 345" }
newvalues = { "$set": { "address": "Canyon 123" } }

collection.update_one(query, newvalues)

#print "customers" after the update:
for x in collection.find():
  print(x)
{'_id': ObjectId('5f7b0ebd582db084a852ba98'), 'name': 'John', 'address': 'Highway 37'}
{'_id': ObjectId('5f7b1042582db084a852ba9a'), 'name': 'Amy', 'address': 'Apple st 652'}
{'_id': ObjectId('5f7b1042582db084a852ba9c'), 'name': 'Michael', 'address': 'Canyon 123'}
{'_id': ObjectId('5f7b1042582db084a852ba9d'), 'name': 'Sandy', 'address': 'Ocean blvd 2'}
{'_id': ObjectId('5f7b1042582db084a852ba9e'), 'name': 'Betty', 'address': 'Green Grass 1'}
{'_id': ObjectId('5f7b1042582db084a852baa0'), 'name': 'Susan', 'address': 'One way 98'}
{'_id': ObjectId('5f7b1042582db084a852baa1'), 'name': 'Vicky', 'address': 'Yellow Garden 2'}
{'_id': ObjectId('5f7b1042582db084a852baa2'), 'name': 'Ben', 'address': 'Park Lane 38'}
{'_id': ObjectId('5f7b1042582db084a852baa3'), 'name': 'William', 'address': 'Central st 954'}
{'_id': ObjectId('5f7b1042582db084a852baa4'), 'name': 'Chuck', 'address': 'Main Road 989'}
{'_id': 1, 'name': 'John', 'address': 'Highway 37'}
{'_id': 2, 'name': 'Peter', 'address': 'Lowstreet 27'}
{'_id': 3, 'name': 'Amy', 'address': 'Apple st 652'}
{'_id': 4, 'name': 'Hannah', 'address': 'Mountain 21'}
{'_id': 5, 'name': 'Michael', 'address': 'Valley 345'}
{'_id': 6, 'name': 'Sandy', 'address': 'Ocean blvd 2'}
{'_id': 7, 'name': 'Betty', 'address': 'Green Grass 1'}
{'_id': 9, 'name': 'Susan', 'address': 'One way 98'}
{'_id': 10, 'name': 'Vicky', 'address': 'Yellow Garden 2'}
{'_id': 11, 'name': 'Ben', 'address': 'Park Lane 38'}
{'_id': 12, 'name': 'William', 'address': 'Central st 954'}
{'_id': 13, 'name': 'Chuck', 'address': 'Main Road 989'}

Limit

In [49]:
doc = collection.find().limit(5)

#print the result:
for x in doc:
  print(x)
{'_id': ObjectId('5f7b0ebd582db084a852ba98'), 'name': 'John', 'address': 'Highway 37'}
{'_id': ObjectId('5f7b1042582db084a852ba9a'), 'name': 'Amy', 'address': 'Apple st 652'}
{'_id': ObjectId('5f7b1042582db084a852ba9c'), 'name': 'Michael', 'address': 'Canyon 123'}
{'_id': ObjectId('5f7b1042582db084a852ba9d'), 'name': 'Sandy', 'address': 'Ocean blvd 2'}
{'_id': ObjectId('5f7b1042582db084a852ba9e'), 'name': 'Betty', 'address': 'Green Grass 1'}

Count

In [51]:
collection.count_documents({})
Out[51]:
22

Create dataframe

In [43]:
import pandas as pd

query = { "address": "Park Lane 38" }

doc = collection.find(query)

df =  pd.DataFrame(list(doc))

df
Out[43]:
_id name address
0 5f7b1042582db084a852baa2 Ben Park Lane 38
1 11 Ben Park Lane 38

Functions

In [ ]:
import pandas as pd
from pymongo import MongoClient


def mongo_connection(host, port, username, password, db):
    """ Create connection to mongo """

    if username and password:
        mongo_uri = 'mongodb://%s:%s@%s:%s/%s' % (username, password, host, port, db)
        connection = client(mongo_uri)
    else:
        connection = client(host, port)

    return connection[db]


def read_mongo(db, collection, query={}, host='localhost', port=27017, username=None, password=None, no_id=True):
    """ Read from Mongo and Store into DataFrame """

    # Connect to MongoDB
    db = mongo_connection(host=host, port=port, username=username, password=password, db=db)

    # Make a query to the specific DB and Collection
    cursor = db[collection].find(query)

    # Expand the cursor and construct the DataFrame
    df =  pd.DataFrame(list(cursor))

    # Delete the _id
    if no_id:
        del df['_id']

    return df

# if __name__ == '__main__':
#     mongo_connection()
#     df = read_mongo()