ObjC Runtime 中 Weak 属性的实现 (中)

导语

 

在上一篇中简单分析了 Weak 属性是如何被存储,获取和销毁的,其中的 SideTable 结构体当做黑盒进行处理。本文尝试对 SideTable 的结构进行一些分析。

观察

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
struct SideTable {
spinlock_t slock;
RefcountMap refcnts;
weak_table_t weak_table;

SideTable() {
memset(&weak_table, 0, sizeof(weak_table));
}

~SideTable() {
_objc_fatal("Do not delete SideTable.");
}

void lock() { slock.lock(); }
void unlock() { slock.unlock(); }
void forceReset() { slock.forceReset(); }

// Address-ordered lock discipline for a pair of side tables.

template<HaveOld, HaveNew>
static void lockTwo(SideTable *lock1, SideTable *lock2);
template<HaveOld, HaveNew>
static void unlockTwo(SideTable *lock1, SideTable *lock2);
};

SideTable 主要分为 3 部分

  • weak_table_t: weak 引用的全局 hash
  • RefcountMap : 引用计数的 hash
  • slock: 保证原子操作的自旋锁

static id storeWeak(id *location, objc_object *newObj) 方法中有

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// Assign new value, if any.
if (haveNew) {
newObj = (objc_object *)
weak_register_no_lock(&newTable->weak_table, (id)newObj, location,
crashIfDeallocating);
// weak_register_no_lock returns nil if weak store should be rejected

// Set is-weakly-referenced bit in refcount table.
if (newObj && !newObj->isTaggedPointer()) {
newObj->setWeaklyReferenced_nolock();
}

// Do not set *location anywhere else. That would introduce a race.
*location = (id)newObj;
}

可知对于弱引用变量的保存,主要还是看 weak_table 这个属性

测试代码

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#import <Foundation/Foundation.h>

@interface WeakProperty : NSObject

@property (nonatomic,weak) NSObject *obj;
@property (nonatomic,weak) NSObject *obj2;
@property (nonatomic,weak) NSObject *obj3;
@property (nonatomic,weak) NSObject *obj4;
@property (nonatomic,weak) NSObject *obj5;

@end

@implementation WeakProperty

- (void)dealloc {
NSLog(@"%s",__func__);
}

@end


int main(int argc, const char * argv[]) {
@autoreleasepool {
WeakProperty *property = [[WeakProperty alloc] init];
NSObject *obj = [[NSObject alloc] init];
property.obj = obj;
NSLog(@"%@",property.obj);

// [1]
property.obj2 = obj;

// [2]
property.obj3 = obj;
property.obj4 = obj;
property.obj5 = obj;

// [3]
property.obj = nil;
}
return 0;
}

结构体: weak_table_t

 

weak_table_t 的定义在 objc-weak.h

1
2
3
4
5
6
7
8
9
10
/**
* The global weak references table. Stores object ids as keys,
* and weak_entry_t structs as their values.
*/
struct weak_table_t {
weak_entry_t *weak_entries;
size_t num_entries;
uintptr_t mask;
uintptr_t max_hash_displacement;
};

说明:

  • 一个指向 weak_entry_t 的指针
  • size_t(即 unsigned ) 类型的 num_entries ,用于描述 hash 表的长度
  • uintptr_t(即 unsigned long) 类型的 mask(掩码)
  • uintptr_t(即 unsigned long) 类型的 max_hash_displacement

结构体: weak_entry_t

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
struct weak_entry_t {
DisguisedPtr<objc_object> referent;
union {
struct {
weak_referrer_t *referrers;
uintptr_t out_of_line_ness : 2;
uintptr_t num_refs : PTR_MINUS_2;
uintptr_t mask;
uintptr_t max_hash_displacement;
};
struct {
// out_of_line_ness field is low bits of inline_referrers[1]
weak_referrer_t inline_referrers[WEAK_INLINE_COUNT];
};
};

bool out_of_line() {
return (out_of_line_ness == REFERRERS_OUT_OF_LINE);
}

weak_entry_t& operator=(const weak_entry_t& other) {
memcpy(this, &other, sizeof(other));
return *this;
}

weak_entry_t(objc_object *newReferent, objc_object **newReferrer)
: referent(newReferent)
{
inline_referrers[0] = newReferrer;
for (int i = 1; i < WEAK_INLINE_COUNT; i++) {
inline_referrers[i] = nil;
}
}
};

C++ 中,结构体是由关键词 struct 定义的一种数据类型。他的成员和基类默认为公有的(public)。由关键词 class 定义的成员和基类默认为私有的(private)。这是 C++结构体和类仅有的区别

类: DisguisedPtr

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
// DisguisedPtr<T> acts like pointer type T*, except the 
// stored value is disguised to hide it from tools like `leaks`.
// nil is disguised as itself so zero-filled memory works as expected,
// which means 0x80..00 is also disguised as itself but we don't care.
// Note that weak_entry_t knows about this encoding.
template <typename T>
class DisguisedPtr {
uintptr_t value; // unsigned long

static uintptr_t disguise(T* ptr) {
return -(uintptr_t)ptr;
}

static T* undisguise(uintptr_t val) {
return (T*)-val;
}

public:
DisguisedPtr() { }
DisguisedPtr(T* ptr)
: value(disguise(ptr)) { }
DisguisedPtr(const DisguisedPtr<T>& ptr)
: value(ptr.value) { }

DisguisedPtr<T>& operator = (T* rhs) {
value = disguise(rhs);
return *this;
}
DisguisedPtr<T>& operator = (const DisguisedPtr<T>& rhs) {
value = rhs.value;
return *this;
}

// 重载了一些指针的运算符
operator T* () const {
return undisguise(value);
}
T* operator -> () const {
return undisguise(value);
}
T& operator * () const {
return *undisguise(value);
}
T& operator [] (size_t i) const {
return undisguise(value)[i];
}

// pointer arithmetic operators omitted
// because we don't currently use them anywhere
};
1
2
3
4
// The address of a __weak variable.
// These pointers are stored disguised so memory analysis tools
// don't see lots of interior pointers from the weak table into objects.
typedef DisguisedPtr<objc_object *> weak_referrer_t;

小结

 

weak_entry_t 包含一个 DisguisedPtr<objc_object>,Disguised 是伪装的意思,根据注释可知,可以将 DisguisedPtr<T> 当成 T * 指针类型即可,在当前场景可看作是一个指向 objc_object 的指针类型

weak_referrer_tDisguisedPtr<objc_object *> 可以看成是 objc_object 指针的地址

接着是一个 unionout_of_line_nessinline_referrers[1] 共用了低 2 位,因为

1
2
3
4
5
6
// out_of_line_ness field overlaps with the low two bits of inline_referrers[1].
// inline_referrers[1] is a DisguisedPtr of a pointer-aligned address.
// The low two bits of a pointer-aligned DisguisedPtr will always be 0b00
// (disguised nil or 0x80..00) or 0b11 (any other address).
// Therefore out_of_line_ness == 0b10 is used to mark the out-of-line state.
#define REFERRERS_OUT_OF_LINE 2

注释说明了 DisuisedPtr 的低 2 位不是 0b00 就是 0b11,所以要表示 out-of-line 只能使用 out_of_line_ness == 0b10 (当 out_of_line_ness0b010b10 会分别得到 falsetrue )

num_refs30 (32位系统) / 62(64位系统)
maskmax_hash_displacementweak_table_t 中也有。

out_of_line() 方法在上面算是已经说过了

weak_entry_t& operator=(const weak_entry_t& other) {...} 重载运算符 =

The memcpy() function copies n bytes from memory area src
to memory area dest. The memory areas may not overlap.
Use memmove(3) if the memory areas do overlap.

从参数 other 所指的内存地址的起始位置开始拷贝 sizeof(other) 字节到 this 指针指向的当前对象的起始地址。

weak_entry_t(objc_object *newReferent, objc_object **newReferrer) : referent(newReferent)

: referent(newReferent) 是初始化列表,代表用参数 newReferent来初始化结构体中的 referent 属性。联合类型中的 inline_referrers[0] 接收参数 newReferrer ,并将剩下的 1,2,3 都置为 nil

函数: weak_register_no_lock

 

注释 [2] & [3]

1
2
3
4
/// Adds an (object, weak pointer) pair to the weak table.
/// 添加一个 (对象,弱引用指针)到 weak hash 表中
id weak_register_no_lock(weak_table_t *weak_table, id referent,
id *referrer, bool crashIfDeallocating);

具体实现如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
/** 
* Registers a new (object, weak pointer) pair. Creates a new weak
* object entry if it does not exist.
*
* @param weak_table The global weak table.
* @param referent The object pointed to by the weak reference.
* @param referrer The weak pointer address.
*/
id
weak_register_no_lock(weak_table_t *weak_table, id referent_id,
id *referrer_id, bool crashIfDeallocating)
{
objc_object *referent = (objc_object *)referent_id;
objc_object **referrer = (objc_object **)referrer_id;

if (!referent || referent->isTaggedPointer()) return referent_id;

// ensure that the referenced object is viable
bool deallocating;
if (!referent->ISA()->hasCustomRR()) {
deallocating = referent->rootIsDeallocating();
}
else {
BOOL (*allowsWeakReference)(objc_object *, SEL) =
(BOOL(*)(objc_object *, SEL))
object_getMethodImplementation((id)referent,
SEL_allowsWeakReference);
if ((IMP)allowsWeakReference == _objc_msgForward) {
return nil;
}
deallocating =
! (*allowsWeakReference)(referent, SEL_allowsWeakReference);
}

if (deallocating) {
if (crashIfDeallocating) {
_objc_fatal("Cannot form weak reference to instance (%p) of "
"class %s. It is possible that this object was "
"over-released, or is in the process of deallocation.",
(void*)referent, object_getClassName((id)referent));
} else {
return nil;
}
}

// now remember it and where it is being stored
weak_entry_t *entry;
if ((entry = weak_entry_for_referent(weak_table, referent))) {
append_referrer(entry, referrer);
}
else {
weak_entry_t new_entry(referent, referrer);
weak_grow_maybe(weak_table);
weak_entry_insert(weak_table, &new_entry);
}

// Do not set *referrer. objc_storeWeak() requires that the
// value not change.

return referent_id;
}

弱引用属性 obj 在函数 weak_register_no_lock 中传递给行参 referent_id, 赋值给局部变量 referentlocation 传递给形参 referrer_id,赋值给局部变量 referrer

经过一些检查,比如是否允许弱引用,弱引用对象是否可用。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
/** 
* Return the weak reference table entry for the given referent.
* If there is no entry for referent, return NULL.
* Performs a lookup.
*
* @param weak_table
* @param referent The object. Must not be nil.
*
* @return The table of weak referrers to this object.
*/
static weak_entry_t *
weak_entry_for_referent(weak_table_t *weak_table, objc_object *referent)
{
assert(referent);

weak_entry_t *weak_entries = weak_table->weak_entries;

if (!weak_entries) return nil;

size_t begin = hash_pointer(referent) & weak_table->mask;
size_t index = begin;
size_t hash_displacement = 0;
while (weak_table->weak_entries[index].referent != referent) {
index = (index+1) & weak_table->mask;
if (index == begin) bad_weak_table(weak_table->weak_entries);
hash_displacement++;
if (hash_displacement > weak_table->max_hash_displacement) {
return nil;
}
}

return &weak_table->weak_entries[index];
}

根据 referentkey ,在 weak_table 中通过遍历 weak_entries 数组,对referent 属性值进行比较的方式来查找元素,未找到,走 else

1
2
3
4
5
6
7
8
weak_entry_t(objc_object *newReferent, objc_object **newReferrer)
: referent(newReferent)
{
inline_referrers[0] = newReferrer;
for (int i = 1; i < WEAK_INLINE_COUNT; i++) {
inline_referrers[i] = nil;
}
}

执行 weak_entry_t 结构体的初始化

通过强转操作来伪装指针。接收 newReferrerreferrerinline_referrers[0] 在这里 *referrer 等于 nil 所以 inline_referres 数组元素全指向 nil,因为是无符号长整数,因此就是 0

函数: weak_grow_maybe

 

当弱引用的 hash 表的空间使用率达到 3/4 后,扩充 hash

1
2
3
4
5
6
7
8
9
10
// Grow the given zone's table of weak references if it is full.
static void weak_grow_maybe(weak_table_t *weak_table)
{
size_t old_size = TABLE_SIZE(weak_table);

// Grow if at least 3/4 full.
if (weak_table->num_entries >= old_size * 3 / 4) {
weak_resize(weak_table, old_size ? old_size*2 : 64);
}
}

函数: weak_entry_insert

 

添加元素到弱引用的 hash 表中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
/** 
* Add new_entry to the object's table of weak references.
* Does not check whether the referent is already in the table.
*/
static void weak_entry_insert(weak_table_t *weak_table, weak_entry_t *new_entry)
{
weak_entry_t *weak_entries = weak_table->weak_entries;
assert(weak_entries != nil);

size_t begin = hash_pointer(new_entry->referent) & (weak_table->mask);
size_t index = begin;
size_t hash_displacement = 0;
while (weak_entries[index].referent != nil) {
index = (index+1) & weak_table->mask;
if (index == begin) bad_weak_table(weak_entries);
hash_displacement++;
}

weak_entries[index] = *new_entry;
weak_table->num_entries++;

if (hash_displacement > weak_table->max_hash_displacement) {
weak_table->max_hash_displacement = hash_displacement;
}
}

获取 new_entryreferent 属性,即弱引用的 obj 属性,以其地址的无符号长整数取相反数来做参数,通过移位与位移进行 hash 操作,通过 weak_table->mask(63 = 0b111111) 掩码保留 hash 操作后的低 6 位( 64 位系统),作为索引,接下来用 while (weak_entries[index].referent != nil) {...} ,解决 hash 碰撞的问题。然后添加到 hash 表中,修改表的长度

效果如上图所示,static id storeWeak(id *location, objc_object *newObj)locationnewObj 分别被保存到 weak_table_t 结构体的 referentinline_referrers 数组的首位。

查找 referent 是否存在的条件是

1
2
3
4
5
6
7
8
while (weak_table->weak_entries[index].referent != referent) {
index = (index+1) & weak_table->mask;
if (index == begin) bad_weak_table(weak_table->weak_entries);
hash_displacement++;
if (hash_displacement > weak_table->max_hash_displacement) {
return nil;
}
}

注释 [2] & [3]

进入 append_referrer 函数后

1
2
3
4
5
6
7
8
if (! entry->out_of_line()) {
// Try to insert inline.
for (size_t i = 0; i < WEAK_INLINE_COUNT; i++) {
if (entry->inline_referrers[i] == nil) {
entry->inline_referrers[i] = new_referrer;
return;
}
}

因为 entry-> out_of_line() 等于 false 会尝试添加到 (entry->inline_referrers 数组中。

取消 [2] 的注释,因为已经达到 4 个,所以在 obj5 时,会扩充。

1
2
3
4
5
6
7
8
9
10
11
// Couldn't insert inline. Allocate out of line.
weak_referrer_t *new_referrers = (weak_referrer_t *)
calloc(WEAK_INLINE_COUNT, sizeof(weak_referrer_t));
// This constructed table is invalid, but grow_refs_and_insert
// will fix it and rehash it.
for (size_t i = 0; i < WEAK_INLINE_COUNT; i++) {
new_referrers[i] = entry->inline_referrers[i];
}
entry->referrers = new_referrers;
entry->num_refs = WEAK_INLINE_COUNT;
entry->out_of_line_ness = REFERRERS_OUT_OF_LINE;

会设置 entry->out_of_line_nessREFERRERS_OUT_OF_LINE

结合注释

1
2
3
4
5
6
7
/**
* The internal structure stored in the weak references table.
* It maintains and stores
* a hash set of weak references pointing to an object.
* If out_of_line_ness != REFERRERS_OUT_OF_LINE then the set
* is instead a small inline array.
*/

可知当 weak 变量引用数量不多于 4 个时,会使用数组方式进行存储,而多于 4 个后会用 hash 表的方式进行存储。

函数: weak_unregister_no_lock

1
2
3
/// Removes an (object, weak pointer) pair from the weak table.
/// 从 weak hash 表中移除一个(对象,弱引用指针)
void weak_unregister_no_lock(weak_table_t *weak_table, id referent, id *referrer);

具体实现如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
/** 
* Unregister an already-registered weak reference.
* This is used when referrer's storage is about to go away, but referent
* isn't dead yet. (Otherwise, zeroing referrer later would be a
* bad memory access.)
* Does nothing if referent/referrer is not a currently active weak reference.
* Does not zero referrer.
*
* FIXME currently requires old referent value to be passed in (lame)
* FIXME unregistration should be automatic if referrer is collected
*
* @param weak_table The global weak table.
* @param referent The object.
* @param referrer The weak reference.
*/
void
weak_unregister_no_lock(weak_table_t *weak_table, id referent_id,
id *referrer_id)
{
objc_object *referent = (objc_object *)referent_id;
objc_object **referrer = (objc_object **)referrer_id;

weak_entry_t *entry;

if (!referent) return;

if ((entry = weak_entry_for_referent(weak_table, referent))) {
remove_referrer(entry, referrer);
bool empty = true;
if (entry->out_of_line() && entry->num_refs != 0) {
empty = false;
}
else {
for (size_t i = 0; i < WEAK_INLINE_COUNT; i++) {
if (entry->inline_referrers[i]) {
empty = false;
break;
}
}
}

if (empty) {
weak_entry_remove(weak_table, entry);
}
}

// Do not set *referrer = nil. objc_storeWeak() requires that the
// value not change.
}

同样是使用 weak_entry_for_referent 函数查找弱引用是否存在

注释 [1] & [2] ,取消注释 [3]

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
/** 
* Remove old_referrer from set of referrers, if it's present.
* Does not remove duplicates, because duplicates should not exist.
*
* @todo this is slow if old_referrer is not present. Is this ever the case?
*
* @param entry The entry holding the referrers.
* @param old_referrer The referrer to remove.
*/
static void remove_referrer(weak_entry_t *entry, objc_object **old_referrer)
{
if (! entry->out_of_line()) {
for (size_t i = 0; i < WEAK_INLINE_COUNT; i++) {
if (entry->inline_referrers[i] == old_referrer) {
entry->inline_referrers[i] = nil;
return;
}
}
...
objc_weak_error();
return;
}

size_t begin = w_hash_pointer(old_referrer) & (entry->mask);
size_t index = begin;
size_t hash_displacement = 0;
while (entry->referrers[index] != old_referrer) {
index = (index+1) & entry->mask;
if (index == begin) bad_weak_table(entry);
hash_displacement++;
if (hash_displacement > entry->max_hash_displacement) {
...
return;
}
}
entry->referrers[index] = nil;
entry->num_refs--;
}

移除时,是以 referrer 属性来比较,发现地址相同,将其置为 nil 来实现移除的效果。

函数: weak_clear_no_lock

1
2
3
/// Called on object destruction. Sets all remaining weak pointers to nil.
/// 在对象调用析构方法时,设置所有留下的弱引用指针为nil
void weak_clear_no_lock(weak_table_t *weak_table, id referent);

具体实现如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
/** 
* Called by dealloc; nils out all weak pointers that point to the
* provided object so that they can no longer be used.
*
* @param weak_table
* @param referent The object being deallocated.
*/
void
weak_clear_no_lock(weak_table_t *weak_table, id referent_id)
{
objc_object *referent = (objc_object *)referent_id;

weak_entry_t *entry = weak_entry_for_referent(weak_table, referent);
if (entry == nil) {
/// XXX shouldn't happen, but does with mismatched CF/objc
//printf("XXX no entry for clear deallocating %p\n", referent);
return;
}

// zero out references
weak_referrer_t *referrers;
size_t count;

if (entry->out_of_line()) {
referrers = entry->referrers;
count = TABLE_SIZE(entry);
}
else {
referrers = entry->inline_referrers;
count = WEAK_INLINE_COUNT;
}

for (size_t i = 0; i < count; ++i) {
objc_object **referrer = referrers[i];
if (referrer) {
if (*referrer == referent) {
*referrer = nil;
}
else if (*referrer) {
_objc_inform("__weak variable at %p holds %p instead of %p. "
"This is probably incorrect use of "
"objc_storeWeak() and objc_loadWeak(). "
"Break on objc_weak_error to debug.\n",
referrer, (void*)*referrer, (void*)referent);
objc_weak_error();
}
}
}

weak_entry_remove(weak_table, entry);
}

会被 dealloc 调用,根据注释和代码可知同样以 referentkey 遍历,然后依次将置为 nil,但是测试时,走的都是 if (entry == nil) 然后直接 return

总结

 

弱引用查找根据 referent 属性,首次会被存储到 weak_table_t 结构体 referentinline_referrers[0],当继续添加时,如果引用次数不大于 4 个保存在数组inline_referrers 中,当超过 4 个后以 hash 表的形式进行存储。移除时,根据 referrerinline_referrers 中移除。

参考

  1. int - What is size_t in C? - Stack Overflow
  2. wiki - C++类
  3. wiki - 位段
  4. Linux Programmer’s Manual memcpy
  5. inline bool objc_object::isTaggedPointer(); How does the function work?
  6. PHP哈希表碰撞攻击原理
  7. wiki - 掩码
  8. wiki - 哈希表
  9. When to use reinterpret_cast?
  10. OSObject
  11. c++ operator操作符的两种用法:重载和隐式类型转换,string转其他基本数据类型的简洁实现string_cast
  12. c++中冒号(:)和双冒号(::)的用法
  13. ARC 引用计数之weak