1. MongoDB 簡(jiǎn)介
MongoDB -- document database
-- not .pdf or .doc/.docx
-- is associative array
-- document == json object
-- document == php array
-- document == python dict
-- document == ruby hash
MongoDB 是一個(gè) NoSQL 數(shù)據(jù)庫(kù)。
你可以在這個(gè)官方網(wǎng)站了解 JSON 數(shù)據(jù)格式。
4. 為何使用Mongodb
-- flexible schema
-- oriented toward programmers
-- flexible deployment
-- designed for big data
-- aggregation framework
你可以從官方 MongoDB 頁(yè)面下載安裝 MongoDB。你還可以閱讀具體的 MongoDB 安裝說(shuō)明。
對(duì)于本課程中的大多數(shù)練習(xí),你無(wú)需在計(jì)算機(jī)上安裝 MongoDB,但是要想獲得最好的學(xué)習(xí)體驗(yàn),我們建議你這么做。安裝既快捷又簡(jiǎn)單!
MongoDB 有大量驅(qū)動(dòng)程序和客戶(hù)端庫(kù)。我們將在本課程中使用的是 PyMongo。請(qǐng)查閱官方文檔,以了解 PyMongo 安裝說(shuō)明。
5 預(yù)先了解MongoDB
安裝 pymongo 以便在本地運(yùn)行此代碼:
pip install pymongo
def add_city(db):
db.cities.insert({"name" : "Chicago"})
def get_city(db):
return db.cities.find_one()
def get_db():
# For local use
from pymongo import MongoClient
client = MongoClient('localhost:27017')
# 'examples' here is the database name. It will be created if it does not exist.
db = client.examples
return db
if __name__ == "__main__":
# For local use
# db = get_db() # uncomment this line if you want to run this locally
add_city(db)
print get_city(db)
8.PyMongo簡(jiǎn)介
tesla_s = {
"manufacturer" : "Tesla Motors",
"class" : "full-size",
"body style" : "5-door liftback",
"production" : [2012,2013],
"model years" : [2013],
"layout" : ["Rear-motor","rear-wheel drive"],
"designer" : {
"firstname":"Franz",
"surname":"von Holzhausen"
},
"assembly" : [
{
"country":"United State",
"city" : "Fremont",
"state" : "california"
},
{
"country":"the netherlands",
"city":"tilburg"
}
]
}
from pymongo import MongoClient
import pprint
client = MongoClient('mongodb://localhost:27017/') #創(chuàng)建客戶(hù)端對(duì)象,指定連接字符串
tesla_s={}
db = client.examples #指定我們需要使用的示例數(shù)據(jù)庫(kù)
db.autos.insert(tesla_s) #insert document 'tesla_s' in the autos collection for the example database
#將文檔tesla_s保存在集合autos的示例數(shù)據(jù)庫(kù)中
for a in db.autos.find(): #db.autos.find()返回autos集合中所有文檔的指針
pprint.pprint(a)
MongoDB ensures that any document we insert can be uniquely identified by it's _id field,and if we don's specify value for _id,mongoDB will create one for us.
9.使用字段選擇進(jìn)行查詢(xún)
We construct the query document,having the fields and values for those fields that we'd like to see in every documen in our result set.
We're simply looping through our results and printing out each one of them.
def find():
autos = db.autos.find({'manufacturer':'Toyota'})
for a in autos:
pprint.pprint(a)
示例
"""
Your task is to complete the 'porsche_query' function and in particular the query
to find all autos where the manufacturer field matches "Porsche".
"""
def porsche_query():
# Please fill in the query to find all autos manuafactured by Porsche.
query = {"manufacturer" : "Porsche"}
return query
# Code here is for local use on your own computer.
def get_db(db_name):
# For local use
from pymongo import MongoClient
client = MongoClient('localhost:27017')
db = client[db_name]
return db
def find_porsche(db, query):
# For local use
return db.autos.find(query)
if __name__ == "__main__":
# For local use
db = get_db('examples')
query = porsche_query()
results = find_porsche(db, query)
print "Printing first 3 results\n"
import pprint
for car in results[:3]:
pprint.pprint(car)
11.多項(xiàng)字段查詢(xún)
def find():
autos = db.autos.find({'manufacturer':'Toyota','class':'mid-size car'})
for a in autos:
pprint.pprint(a)
12.投影查詢(xún)
def find():
query = {'manufacturer':'Toyota','class':'mid-size car'}
projectiong = {'_id':0,'name':1}
autos = db.autos.find({'manufacturer':'Toyota','class':'mid-size car'})
for a in autos:
pprint.pprint(a)
13.將數(shù)據(jù)導(dǎo)入 MongoDB
client = MongoClient('mongodb://localhost:27017/')
db = client.examples
num_autos = db.myautos.find().count()
print "num_autos before:",num_autos
for a in autos:
db.myautos.insert(a)
num_autos = db.myautos.find().count()
print "num_autos after",num_autos
14 插入多個(gè)文檔
"""
Add a single line of code to the insert_autos function that will insert the
automobile data into the 'autos' collection. The data variable that is
returned from the process_file function is a list of dictionaries, as in the
example in the previous video.
"""
from autos import process_file
def insert_autos(infile, db):
data = process_file(infile)
# Add your code here. Insert the data in one command.
db.autos.insert(data)
if __name__ == "__main__":
# Code here is for local use on your own computer.
from pymongo import MongoClient
client = MongoClient("mongodb://localhost:27017")
db = client.examples
insert_autos('autos-small.csv', db)
print db.autos.find_one()
15.使用mongoimport
將所有文檔輸出成JSON文檔
實(shí)際兩個(gè)步驟:
1.數(shù)據(jù)清理
2.將數(shù)據(jù)導(dǎo)入MongoDB
查看幫助文檔:
mongoimport --help
mongoimport -d examples -c myautos2 --file autos.json
-d examples 指定數(shù)據(jù)庫(kù)
-c myautos2 指定存儲(chǔ)數(shù)據(jù)的集合
--file autos.json 指定實(shí)際導(dǎo)入的文件名 該文件和位于examples文件夾內(nèi)
16.運(yùn)算符
不等式運(yùn)算符
$gt (>)
$lt(<)
$gte(≥)
$lte(≤)
$nt(≠)
def find():
query={'polulation':{'$gt':250000}}
cities=db.cities.find(query)
num_cities=0
for c in cities:
pprint.pprint(c)
num_cities +=1
print "\nNumber of cities matching:%d\n" % num_cities
def find():
quety={'polulation':{'$gt':250000,'$lte':500000}}
cities = db.cities.find(query)
num_cities=0
for c in cities:
pprint.pprint(c)
num_cities +=1
print "\nNumber of cities matching:%d\n" % num_cities
def find():
quety={'name':{'$gte':'X','$lt':'Y'}}
cities = db.cities.find(query)
num_cities=0
for c in cities:
pprint.pprint(c)
num_cities +=1
print "\nNumber of cities matching:%d\n" % num_cities
def find():
quety={'foundingDate':{'$gte':datetime(1837,1,1),'$lte':datetime(1837,12,31)}}
cities = db.cities.find(query)
num_cities=0
for c in cities:
pprint.pprint(c)
num_cities +=1
print "\nNumber of cities matching:%d\n" % num_cities
def find():
quety={'country':{'$ne':'United States'}}
cities = db.cities.find(query)
num_cities=0
for c in cities:
pprint.pprint(c)
num_cities +=1
print "\nNumber of cities matching:%d\n" % num_cities
"""
Your task is to write a query that will return all cities
that are founded in 21st century.
Please modify only 'range_query' function, as only that will be taken into account.
"""
from datetime import datetime
def range_query():
# Modify the below line with your query.
# You can use datetime(year, month, day) to specify date in the query
query = {"foundingDate":{"$gte":datetime(2001,1,1)}}
return query
# Do not edit code below this line in the online code editor.
# Code here is for local use on your own computer.
def get_db():
from pymongo import MongoClient
client = MongoClient('localhost:27017')
db = client.examples
return db
if __name__ == "__main__":
# For local use
db = get_db()
query = range_query()
cities = db.cities.find(query)
print "Found cities:", cities.count()
import pprint
pprint.pprint(cities[0])
19.存在(exists運(yùn)算符)
要在本地啟動(dòng) mongo shell,請(qǐng)?jiān)诮K端中輸入以下命令:
mongo
>use examples
switched to db examples
>db.cities.find() #將返回所有結(jié)果
exists運(yùn)算符允許我們基于文檔是否包含特殊字符來(lái)檢索文檔
db.cities.find({"governmentType":{"$exists":1}}).count() #{"$exists":1}表示存在 count()表示對(duì)查詢(xún)結(jié)果計(jì)數(shù)
db.cities.find({"governmentType":{"$exist":0}}).pretty() #{"$exists":0}表示不存在 pretty()表示查看其中的一個(gè)文檔
20.正則運(yùn)算符($regex)
MongoDB支持使用$regex查詢(xún)字符串模式
$regex
--based on a regular expression library specially PCRE(perl compatible regular expression library)
--allow us to do regular expression queries in MongoDB
db.cities.find({"motto":{"$regex":"friendship"}}).pretty()
if i do the query this way,i should match only documents where "friendship" is the entire string of the motto
db.cities.find({"motto":{"$regex":"[Ff]riendship"}}).pretty()
查找包含“frienship”一詞的所有座右銘的文件,其中friendship的f可以大寫(xiě),也可以小寫(xiě)
db.cities.find({"motto":{"$regex":"[Ff]riendship|[Pp]ride"}}).pretty
該正則表達(dá)式將確定motto包含詞語(yǔ)friendship或者pride的所有文檔,任何一個(gè)詞語(yǔ)都可以大寫(xiě)或者小寫(xiě)
21. 使用標(biāo)量查詢(xún)
db.autos.find({"modelYears":1980}).pretty
modelYears字段對(duì)應(yīng)的值是數(shù)組
23. 使用$in 運(yùn)算符查詢(xún)
$in 運(yùn)算符允許我們指定數(shù)組值
db.autos.find({"modelYears":{"$in":[1965,1966,1967]}}).count()
本查詢(xún)將檢索modelYears字段中包含數(shù)組[1965,1966,1967]中任意一個(gè)值的文檔
示例
def in_query():
# Modify the below line with your query; try to use the $in operator.
query = {"manufacturer":"Ford Motor Company","assembly":{"$in":["Germany","Japan","United Kingdom"]}}
return query
# Do not edit code below this line in the online code editor.
# Code here is for local use on your own computer.
def get_db():
from pymongo import MongoClient
client = MongoClient('localhost:27017')
db = client.examples
return db
if __name__ == "__main__":
db = get_db()
query = in_query()
autos = db.autos.find(query, {"name":1, "manufacturer":1, "assembly": 1, "_id":0})
print "Found autos:", autos.count()
import pprint
for a in autos:
pprint.pprint(a)
24 使用$all 運(yùn)算符查詢(xún)
將檢索字段包含的所有值
db.autos.find({"modelYears":{"$all":[1965,1966,1967]}})
25.點(diǎn)表示法
query for values inside nested documents
db.tweets.find().pretty()
#!/usr/bin/env python
"""
Your task is to write a query that will return all cars with width dimension
greater than 2.5. Please modify only the 'dot_query' function, as only that
will be taken into account.
Your code will be run against a MongoDB instance that we have provided.
If you want to run this code locally on your machine, you will need to install
MongoDB, download and insert the dataset. For instructions related to MongoDB
setup and datasets, please see the Course Materials.
"""
def dot_query():
# Edit the line below with your query - try to use dot notation.
# You can check out example_auto.txt for an example of the document
# structure in the collection.
query = {"dimensions.width":{"$gt":2.5}}
return query
# Do not edit code below this line in the online code editor.
# Code here is for local use on your own computer.
def get_db():
from pymongo import MongoClient
client = MongoClient('localhost:27017')
db = client.examples
return db
if __name__ == "__main__":
db = get_db()
query = dot_query()
cars = db.cars.find(query)
print "Printing first 3 results\n"
import pprint
for car in cars[:3]:
pprint.pprint(car)
26. 更新
對(duì)集合中現(xiàn)有文檔進(jìn)行修改
save()
def main():
city=db.cities.find_one({"name":"munchen",
"country":"Germany"}) #returns the first document it finds
city['isoCountryCode']='DEU'
db.cities.save(city)
save()
a method on collections objects
調(diào)用save()時(shí),將更新本文檔以包括該字段
27 設(shè)置與復(fù)位 $set & $reset
update()將查詢(xún)文檔作為第一個(gè)參數(shù),將更新文檔作為第二個(gè)參數(shù)
by default,update operates on just one document
$set
def find():
city=db.cities.update({"name":"munchen",
"country":"Germany"},
{"$set":
{"isoCountryCode":"DEU"
}})
$set的語(yǔ)義是:找到匹配的文檔后,
如果該文檔不包含這里指定的字段,那么字段添加該值
如果該文檔已包含這里指定的字段,那么該字段更新為提供的值
$unset
def find():
city=db.cities.update({"name":"munchen",
"country":"Germany"},
{"$set":
{"isoCountryCode":""
}})
$unset的語(yǔ)義是:找到匹配的文檔后,無(wú)論什么文檔與該查詢(xún)匹配
如果有這里指定的字段,刪除該字段,忽略該值
如果文檔沒(méi)有這里指定的字段,那么該調(diào)用無(wú)效
28. 多項(xiàng)更新
def find():
city=db.cities.update({"country":"Germany"},
{"$set": {"isoCountryCode":"DEU"}},multi=True)
by default,update will modify just the first document it finds,
in order to modify all document match the query,we need to specify
multi=True
29 刪除文檔
> use examples
switched to db examples
>db.cities.find() #返回集合中的所有文檔
>db.cities.remove() #刪除該集合的所有數(shù)據(jù)
>db.cities.drop() #刪除集合以及與其相關(guān)的任何元數(shù)據(jù),比如索引
>db.cities.remove({"name":"Chicago"}) #刪除集合中與chicago相關(guān)的所有文檔
>db.cities.remove({"name":{"$exist":0}}) #刪除集合中所有name字段不存在的文檔