在Django中如需要新建一條紀(jì)錄,肯定會(huì)調(diào)用model.save()方法。那么django之后到底做了哪些工作呢?官方文檔是這樣說的:
- 發(fā)送一個(gè)
django.db.models.signals.pre_save信號(hào),以允許監(jiān)聽該信號(hào)的函數(shù)完成一些自定義的動(dòng)作。- 預(yù)處理數(shù)據(jù)。 如果需要,對(duì)對(duì)象的每個(gè)字段進(jìn)行自動(dòng)轉(zhuǎn)換。大部分字段不需要預(yù)處理,字段的數(shù)據(jù)將保持原樣。預(yù)處理只用于具有特殊行為的字段。例如,如果你的模型具有一個(gè)
auto_now=True的DateField,那么預(yù)處理階段將修改對(duì)象中的數(shù)據(jù)以確保該日期字段包含當(dāng)前的時(shí)間戳。- 準(zhǔn)備數(shù)據(jù)庫數(shù)據(jù)。 要求每個(gè)字段提供的當(dāng)前值是能夠?qū)懭氲綄?duì)應(yīng)數(shù)據(jù)庫中的類型。大部分字段不需要數(shù)據(jù)準(zhǔn)備。簡(jiǎn)單的數(shù)據(jù)類型,例如整數(shù)和字符串,是可以直接寫入的
Python對(duì)象。但是,復(fù)雜的數(shù)據(jù)類型通常需要一些改動(dòng)。例如,DateField字段使用Python的datetime對(duì)象來保存數(shù)據(jù)。數(shù)據(jù)庫保存的不是datetime對(duì)象,所以該字段的值必須轉(zhuǎn)換成ISO兼容的日期字符串才能插入到數(shù)據(jù)庫中。- 插入數(shù)據(jù)到數(shù)據(jù)庫中。 將預(yù)處理過、準(zhǔn)備好的數(shù)據(jù)組織成一個(gè)SQL 語句插入到數(shù)據(jù)庫中。
- 發(fā)送一個(gè)
django.db.models.signals.post_save信號(hào),以允許監(jiān)聽聽信號(hào)的函數(shù)完成一些自定義的動(dòng)作。
看了說明,有個(gè)疑問還是不太清楚,save()是如何判斷對(duì)象的紀(jì)錄是新增呢還是更新呢?還是看源碼能夠比較清晰的回答這個(gè)疑問。下面是django 2.0的源碼,根據(jù)我自己的理解寫上了注釋。
就從save()開始
# django/db/models/base.py
def save(self, force_insert=False, force_update=False, using=None, update_fields=None):
"""
force_insert, force_update 強(qiáng)制 save() 執(zhí)行INSERT或UPDATE
using 指定保存的數(shù)據(jù)庫,setting.py中配置的DATABASES的key,默認(rèn)是'default'
這個(gè)函數(shù)主要是檢查和處理字段的一些額外情況
"""
# 檢查外鍵關(guān)系的正確性并清除緩存
for field in self._meta.concrete_fields:
if field.is_relation and field.is_cached(self):
obj = getattr(self, field.name, None)
if obj and obj.pk is None:
if not field.remote_field.multiple:
field.remote_field.delete_cached_value(obj)
raise ValueError(
"save() prohibited to prevent data loss due to "
"unsaved related object '%s'." % field.name
)
using = using or router.db_for_write(self.__class__, instance=self)
if force_insert and (force_update or update_fields):
raise ValueError("Cannot force both insert and updating in model saving.")
deferred_fields = self.get_deferred_fields()
# 處理有指定更新的字段
if update_fields is not None:
if len(update_fields) == 0:
return
update_fields = frozenset(update_fields) # fronzenset 創(chuàng)建的是一個(gè)不可變集合
field_names = set()
for field in self._meta.fields:
if not field.primary_key:
field_names.add(field.name)
if field.name != field.attname:
field_names.add(field.attname)
non_model_fields = update_fields.difference(field_names)
if non_model_fields:
raise ValueError("The following fields do not exist in this "
"model or are m2m fields: %s"
% ', '.join(non_model_fields))
# 這里不大明白什么情況會(huì)觸發(fā)
elif not force_insert and deferred_fields and using == self._state.db:
field_names = set()
for field in self._meta.concrete_fields:
if not field.primary_key and not hasattr(field, 'through'):
field_names.add(field.attname)
loaded_fields = field_names.difference(deferred_fields)
if loaded_fields:
update_fields = frozenset(loaded_fields)
# 保存動(dòng)作進(jìn)入下一個(gè)函數(shù)
self.save_base(using=using, force_insert=force_insert,
force_update=force_update, update_fields=update_fields)
進(jìn)入save_base(),主要邏輯是在這里處理
def save_base(self, raw=False, force_insert=False,
force_update=False, using=None, update_fields=None):
"""
這個(gè)函數(shù)的主要功能有
1 跳過代理
2 發(fā)出信號(hào)
3 開啟事務(wù)執(zhí)行
"""
# 再次檢查參數(shù)正確性
using = using or router.db_for_write(self.__class__, instance=self)
assert not (force_insert and (force_update or update_fields))
assert update_fields is None or len(update_fields) > 0
cls = origin = self.__class__
# 如果是代理model,則跳過代理
if cls._meta.proxy:
cls = cls._meta.concrete_model
meta = cls._meta
# 步驟1 發(fā)出保存前信號(hào)
if not meta.auto_created:
pre_save.send(
sender=origin, instance=self, raw=raw, using=using,
update_fields=update_fields,
)
# 在事務(wù)下執(zhí)行
with transaction.atomic(using=using, savepoint=False):
if not raw:
self._save_parents(cls, using, update_fields)
updated = self._save_table(raw, cls, force_insert, force_update, using, update_fields) # 執(zhí)行函數(shù),進(jìn)入下一步
# 保存狀態(tài)
self._state.db = using
self._state.adding = False
# 步驟5 發(fā)出保存完成的信號(hào)
if not meta.auto_created:
post_save.send(
sender=origin, instance=self, created=(not updated),
update_fields=update_fields, raw=raw, using=using,
)
進(jìn)入_save_table()
def _save_table(self, raw=False, cls=None, force_insert=False,
force_update=False, using=None, update_fields=None):
"""
這個(gè)函數(shù)主要的功能就是分配UPDATE或INSERT操作
"""
meta = cls._meta
non_pks = [f for f in meta.local_concrete_fields if not f.primary_key]
if update_fields:
non_pks = [f for f in non_pks
if f.name in update_fields or f.attname in update_fields]
# 獲取pk的值,如果是更新則pk值就可以獲取到
pk_val = self._get_pk_val(meta)
if pk_val is None:
pk_val = meta.pk.get_pk_value_on_save(self)
setattr(self, meta.pk.attname, pk_val)
pk_set = pk_val is not None
if not pk_set and (force_update or update_fields):
raise ValueError("Cannot force an update in save() with no primary key.")
updated = False
# 如果pk值有了嘗試使用UPDATE,所以save()處理是更新還是插入紀(jì)錄就靠這個(gè)pk值來判斷
if pk_set and not force_insert:
base_qs = cls._base_manager.using(using)
values = [(f, None, (getattr(self, f.attname) if raw else f.pre_save(self, False)))
for f in non_pks]
forced_update = update_fields or force_update
# _do_update() 更新操作
updated = self._do_update(base_qs, using, pk_val, values, update_fields,
forced_update)
if force_update and not updated:
raise DatabaseError("Forced update did not affect any rows.")
if update_fields and not updated:
raise DatabaseError("Save with update_fields did not affect any rows.")
# 執(zhí)行插入
if not updated:
if meta.order_with_respect_to:
field = meta.order_with_respect_to
filter_args = field.get_filter_kwargs_for_object(self)
order_value = cls._base_manager.using(using).filter(**filter_args).count()
self._order = order_value
fields = meta.local_concrete_fields
if not pk_set:
fields = [f for f in fields if f is not meta.auto_field]
update_pk = meta.auto_field and not pk_set
# _do_insert() 插入操作, 返回一個(gè)新建的pk值
result = self._do_insert(cls._base_manager, using, fields, update_pk, raw)
# 設(shè)置這個(gè)新新生成的pk值,用于判斷
if update_pk:
setattr(self, meta.pk.attname, result)
return updated
先看INSERT一條紀(jì)錄,進(jìn)入_do_insert()
def _do_insert(self, manager, using, fields, update_pk, raw):
"""
調(diào)用model的管理器執(zhí)行INSERT
"""
return manager._insert([self], fields=fields, return_id=update_pk,
using=using, raw=raw)
Manager的工作比較少,進(jìn)入 _insert()
# django/db/models/query.py
def _insert(self, objs, fields, return_id=False, raw=False, using=None):
"""
管理器單獨(dú)新建一個(gè)InserQuery實(shí)例,通過查詢集去轉(zhuǎn)化
"""
self._for_write = True
if using is None:
using = self.db
query = sql.InsertQuery(self.model)
query.insert_values(fields, objs, raw=raw)
return query.get_compiler(using=using).execute_sql(return_id)
結(jié)果實(shí)際的sql轉(zhuǎn)化交給了Query去實(shí)現(xiàn),下面看這個(gè)InsertQuery是如何轉(zhuǎn)化的。
首先,獲得編譯器get_compiler()
class InsertQuery(Query):
compiler = 'SQLInsertCompiler'
def get_compiler(self, using=None, connection=None):
"""
這個(gè)函數(shù)的主要功能就是找出對(duì)應(yīng)數(shù)據(jù)庫(比如mysql)中SQLInsertCompiler的實(shí)現(xiàn)
"""
if using is None and connection is None:
raise ValueError("Need either using or connection")
if using:
connection = connections[using]
return connection.ops.compiler(self.compiler)(self, connection, using)
第二,找到了對(duì)應(yīng)的編譯器那就執(zhí)行SQL,繼續(xù)看_insert()中最后的execute_sql()
# django/db/models/sql/compiler.py
class SQLInsertCompiler(SQLCompiler):
def execute_sql(self, return_id=False):
"""
這個(gè)函數(shù)主要就是執(zhí)行sql,返回最后生成的pk值
"""
assert not (
return_id and len(self.query.objs) != 1 and
not self.connection.features.can_return_ids_from_bulk_insert
)
self.return_id = return_id
# 字段是怎么變成sql語句的,最后就是在這個(gè)as_sql()中
with self.connection.cursor() as cursor:
for sql, params in self.as_sql():
# 步驟4 執(zhí)行SQL
cursor.execute(sql, params)
if not (return_id and cursor):
return
if self.connection.features.can_return_ids_from_bulk_insert and len(self.query.objs) > 1:
return self.connection.ops.fetch_returned_insert_ids(cursor)
if self.connection.features.can_return_id_from_insert:
assert len(self.query.objs) == 1
return self.connection.ops.fetch_returned_insert_id(cursor)
return self.connection.ops.last_insert_id(
cursor, self.query.get_meta().db_table, self.query.get_meta().pk.column
)
終于到了最神秘也是最關(guān)鍵的地方,ORM中對(duì)象轉(zhuǎn)化成sql的部分as_sql()
def as_sql(self):
"""
這個(gè)函數(shù)主要就是執(zhí)行步驟3準(zhǔn)備數(shù)據(jù)庫數(shù)據(jù),生成SQL和值
"""
qn = self.connection.ops.quote_name
# 根據(jù)不同的數(shù)據(jù)庫做不同的處理比如mysql:
# >>> name = "green"
# >>> qn(name)
#>>> "`green`"
opts = self.query.get_meta()
result = ['INSERT INTO %s' % qn(opts.db_table)]
has_fields = bool(self.query.fields)
fields = self.query.fields if has_fields else [opts.pk]
# 加入字段名稱
result.append('(%s)' % ', '.join(qn(f.column) for f in fields))
if has_fields:
# self.pre_save_val(field, obj) 實(shí)際調(diào)用的是field.pre_save(obj, add=True)
# 將轉(zhuǎn)化交給field自己去處理
# prepare_value() 對(duì)需要特殊轉(zhuǎn)化的字段進(jìn)行處理如DatetimeField
value_rows = [
[self.prepare_value(field, self.pre_save_val(field, obj)) for field in fields]
for obj in self.query.objs
]
else:
# An empty object.
value_rows = [[self.connection.ops.pk_default_value()] for _ in self.query.objs]
fields = [None]
# 數(shù)據(jù)庫能否支持批量插入
can_bulk = (not self.return_id and self.connection.features.has_bulk_insert)
# 生成字段和值對(duì)應(yīng)的sql
placeholder_rows, param_rows = self.assemble_as_sql(fields, value_rows)
# 下面就是根據(jù)不同的數(shù)據(jù)庫能接受不同形式的SQL的處理,最終返回拼裝好的SQL和值
if self.return_id and self.connection.features.can_return_id_from_insert:
if self.connection.features.can_return_ids_from_bulk_insert:
result.append(self.connection.ops.bulk_insert_sql(fields, placeholder_rows))
params = param_rows
else:
result.append("VALUES (%s)" % ", ".join(placeholder_rows[0]))
params = [param_rows[0]]
col = "%s.%s" % (qn(opts.db_table), qn(opts.pk.column))
r_fmt, r_params = self.connection.ops.return_insert_id()
if r_fmt:
result.append(r_fmt % col)
params += [r_params]
return [(" ".join(result), tuple(chain.from_iterable(params)))]
if can_bulk:
result.append(self.connection.ops.bulk_insert_sql(fields, placeholder_rows))
return [(" ".join(result), tuple(p for ps in param_rows for p in ps))]
else:
return [
(" ".join(result + ["VALUES (%s)" % ", ".join(p)]), vals)
for p, vals in zip(placeholder_rows, param_rows)
]
最終生成了SQL執(zhí)行完成,整個(gè)流程完成。
看到這里是不是覺得django調(diào)用鏈很長(zhǎng),我第一次看也沒堅(jiān)持看完。下面來整理這個(gè)流程以便于理解,如果沒看完源碼也可以看下面的整理。
- Model
model主要是處理外鍵關(guān)系 ,發(fā)送信號(hào),判斷是更新還是插入操作,然后交給manager - Manager
manager找到處理的Query - Query
query根據(jù)不同數(shù)據(jù)庫的拼接sql ,最后執(zhí)行