リビジョン | d3f71ae711cebdeaff12989761f48bd4230e83d5 (tree) |
---|---|
日時 | 2016-01-30 08:46:49 |
作者 | Linus Torvalds <torvalds@linu...> |
コミッター | Linus Torvalds |
Merge branch 'for-linus-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs fixes from Chris Mason:
* 'for-linus-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
@@ -328,8 +328,8 @@ static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq, | ||
328 | 328 | list_add_tail(&work->ordered_list, &wq->ordered_list); |
329 | 329 | spin_unlock_irqrestore(&wq->list_lock, flags); |
330 | 330 | } |
331 | - queue_work(wq->normal_wq, &work->normal_work); | |
332 | 331 | trace_btrfs_work_queued(work); |
332 | + queue_work(wq->normal_wq, &work->normal_work); | |
333 | 333 | } |
334 | 334 | |
335 | 335 | void btrfs_queue_work(struct btrfs_workqueue *wq, |
@@ -182,6 +182,7 @@ static struct btrfs_lockdep_keyset { | ||
182 | 182 | { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" }, |
183 | 183 | { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" }, |
184 | 184 | { .id = BTRFS_UUID_TREE_OBJECTID, .name_stem = "uuid" }, |
185 | + { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, .name_stem = "free-space" }, | |
185 | 186 | { .id = 0, .name_stem = "tree" }, |
186 | 187 | }; |
187 | 188 |
@@ -1787,7 +1788,6 @@ static int cleaner_kthread(void *arg) | ||
1787 | 1788 | int again; |
1788 | 1789 | struct btrfs_trans_handle *trans; |
1789 | 1790 | |
1790 | - set_freezable(); | |
1791 | 1791 | do { |
1792 | 1792 | again = 0; |
1793 | 1793 |
@@ -153,6 +153,20 @@ static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize) | ||
153 | 153 | |
154 | 154 | static unsigned long *alloc_bitmap(u32 bitmap_size) |
155 | 155 | { |
156 | + void *mem; | |
157 | + | |
158 | + /* | |
159 | + * The allocation size varies, observed numbers were < 4K up to 16K. | |
160 | + * Using vmalloc unconditionally would be too heavy, we'll try | |
161 | + * contiguous allocations first. | |
162 | + */ | |
163 | + if (bitmap_size <= PAGE_SIZE) | |
164 | + return kzalloc(bitmap_size, GFP_NOFS); | |
165 | + | |
166 | + mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_NOWARN); | |
167 | + if (mem) | |
168 | + return mem; | |
169 | + | |
156 | 170 | return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO, |
157 | 171 | PAGE_KERNEL); |
158 | 172 | } |
@@ -289,7 +303,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, | ||
289 | 303 | |
290 | 304 | ret = 0; |
291 | 305 | out: |
292 | - vfree(bitmap); | |
306 | + kvfree(bitmap); | |
293 | 307 | if (ret) |
294 | 308 | btrfs_abort_transaction(trans, root, ret); |
295 | 309 | return ret; |
@@ -438,7 +452,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans, | ||
438 | 452 | |
439 | 453 | ret = 0; |
440 | 454 | out: |
441 | - vfree(bitmap); | |
455 | + kvfree(bitmap); | |
442 | 456 | if (ret) |
443 | 457 | btrfs_abort_transaction(trans, root, ret); |
444 | 458 | return ret; |
@@ -7116,21 +7116,41 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | ||
7116 | 7116 | if (ret) |
7117 | 7117 | return ERR_PTR(ret); |
7118 | 7118 | |
7119 | - em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, | |
7120 | - ins.offset, ins.offset, ins.offset, 0); | |
7121 | - if (IS_ERR(em)) { | |
7122 | - btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); | |
7123 | - return em; | |
7124 | - } | |
7125 | - | |
7119 | + /* | |
7120 | + * Create the ordered extent before the extent map. This is to avoid | |
7121 | + * races with the fast fsync path that would lead to it logging file | |
7122 | + * extent items that point to disk extents that were not yet written to. | |
7123 | + * The fast fsync path collects ordered extents into a local list and | |
7124 | + * then collects all the new extent maps, so we must create the ordered | |
7125 | + * extent first and make sure the fast fsync path collects any new | |
7126 | + * ordered extents after collecting new extent maps as well. | |
7127 | + * The fsync path simply can not rely on inode_dio_wait() because it | |
7128 | + * causes deadlock with AIO. | |
7129 | + */ | |
7126 | 7130 | ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, |
7127 | 7131 | ins.offset, ins.offset, 0); |
7128 | 7132 | if (ret) { |
7129 | 7133 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); |
7130 | - free_extent_map(em); | |
7131 | 7134 | return ERR_PTR(ret); |
7132 | 7135 | } |
7133 | 7136 | |
7137 | + em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, | |
7138 | + ins.offset, ins.offset, ins.offset, 0); | |
7139 | + if (IS_ERR(em)) { | |
7140 | + struct btrfs_ordered_extent *oe; | |
7141 | + | |
7142 | + btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); | |
7143 | + oe = btrfs_lookup_ordered_extent(inode, start); | |
7144 | + ASSERT(oe); | |
7145 | + if (WARN_ON(!oe)) | |
7146 | + return em; | |
7147 | + set_bit(BTRFS_ORDERED_IOERR, &oe->flags); | |
7148 | + set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags); | |
7149 | + btrfs_remove_ordered_extent(inode, oe); | |
7150 | + /* Once for our lookup and once for the ordered extents tree. */ | |
7151 | + btrfs_put_ordered_extent(oe); | |
7152 | + btrfs_put_ordered_extent(oe); | |
7153 | + } | |
7134 | 7154 | return em; |
7135 | 7155 | } |
7136 | 7156 |
@@ -575,7 +575,8 @@ static int is_cowonly_root(u64 root_objectid) | ||
575 | 575 | root_objectid == BTRFS_TREE_LOG_OBJECTID || |
576 | 576 | root_objectid == BTRFS_CSUM_TREE_OBJECTID || |
577 | 577 | root_objectid == BTRFS_UUID_TREE_OBJECTID || |
578 | - root_objectid == BTRFS_QUOTA_TREE_OBJECTID) | |
578 | + root_objectid == BTRFS_QUOTA_TREE_OBJECTID || | |
579 | + root_objectid == BTRFS_FREE_SPACE_TREE_OBJECTID) | |
579 | 580 | return 1; |
580 | 581 | return 0; |
581 | 582 | } |
@@ -202,6 +202,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF); | ||
202 | 202 | BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56); |
203 | 203 | BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA); |
204 | 204 | BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES); |
205 | +BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE); | |
205 | 206 | |
206 | 207 | static struct attribute *btrfs_supported_feature_attrs[] = { |
207 | 208 | BTRFS_FEAT_ATTR_PTR(mixed_backref), |
@@ -213,6 +214,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = { | ||
213 | 214 | BTRFS_FEAT_ATTR_PTR(raid56), |
214 | 215 | BTRFS_FEAT_ATTR_PTR(skinny_metadata), |
215 | 216 | BTRFS_FEAT_ATTR_PTR(no_holes), |
217 | + BTRFS_FEAT_ATTR_PTR(free_space_tree), | |
216 | 218 | NULL |
217 | 219 | }; |
218 | 220 |
@@ -780,6 +782,39 @@ failure: | ||
780 | 782 | return error; |
781 | 783 | } |
782 | 784 | |
785 | + | |
786 | +/* | |
787 | + * Change per-fs features in /sys/fs/btrfs/UUID/features to match current | |
788 | + * values in superblock. Call after any changes to incompat/compat_ro flags | |
789 | + */ | |
790 | +void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info, | |
791 | + u64 bit, enum btrfs_feature_set set) | |
792 | +{ | |
793 | + struct btrfs_fs_devices *fs_devs; | |
794 | + struct kobject *fsid_kobj; | |
795 | + u64 features; | |
796 | + int ret; | |
797 | + | |
798 | + if (!fs_info) | |
799 | + return; | |
800 | + | |
801 | + features = get_features(fs_info, set); | |
802 | + ASSERT(bit & supported_feature_masks[set]); | |
803 | + | |
804 | + fs_devs = fs_info->fs_devices; | |
805 | + fsid_kobj = &fs_devs->fsid_kobj; | |
806 | + | |
807 | + if (!fsid_kobj->state_initialized) | |
808 | + return; | |
809 | + | |
810 | + /* | |
811 | + * FIXME: this is too heavy to update just one value, ideally we'd like | |
812 | + * to use sysfs_update_group but some refactoring is needed first. | |
813 | + */ | |
814 | + sysfs_remove_group(fsid_kobj, &btrfs_feature_attr_group); | |
815 | + ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group); | |
816 | +} | |
817 | + | |
783 | 818 | static int btrfs_init_debugfs(void) |
784 | 819 | { |
785 | 820 | #ifdef CONFIG_DEBUG_FS |
@@ -56,7 +56,7 @@ static struct btrfs_feature_attr btrfs_attr_##_name = { \ | ||
56 | 56 | #define BTRFS_FEAT_ATTR_COMPAT(name, feature) \ |
57 | 57 | BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature) |
58 | 58 | #define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \ |
59 | - BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT, feature) | |
59 | + BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT_RO, feature) | |
60 | 60 | #define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \ |
61 | 61 | BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature) |
62 | 62 |
@@ -90,4 +90,7 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs, | ||
90 | 90 | struct kobject *parent); |
91 | 91 | int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs); |
92 | 92 | void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs); |
93 | +void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info, | |
94 | + u64 bit, enum btrfs_feature_set set); | |
95 | + | |
93 | 96 | #endif /* _BTRFS_SYSFS_H_ */ |
@@ -82,18 +82,18 @@ void btrfs_destroy_test_fs(void) | ||
82 | 82 | struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void) |
83 | 83 | { |
84 | 84 | struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info), |
85 | - GFP_NOFS); | |
85 | + GFP_KERNEL); | |
86 | 86 | |
87 | 87 | if (!fs_info) |
88 | 88 | return fs_info; |
89 | 89 | fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices), |
90 | - GFP_NOFS); | |
90 | + GFP_KERNEL); | |
91 | 91 | if (!fs_info->fs_devices) { |
92 | 92 | kfree(fs_info); |
93 | 93 | return NULL; |
94 | 94 | } |
95 | 95 | fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block), |
96 | - GFP_NOFS); | |
96 | + GFP_KERNEL); | |
97 | 97 | if (!fs_info->super_copy) { |
98 | 98 | kfree(fs_info->fs_devices); |
99 | 99 | kfree(fs_info); |
@@ -180,11 +180,11 @@ btrfs_alloc_dummy_block_group(unsigned long length) | ||
180 | 180 | { |
181 | 181 | struct btrfs_block_group_cache *cache; |
182 | 182 | |
183 | - cache = kzalloc(sizeof(*cache), GFP_NOFS); | |
183 | + cache = kzalloc(sizeof(*cache), GFP_KERNEL); | |
184 | 184 | if (!cache) |
185 | 185 | return NULL; |
186 | 186 | cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), |
187 | - GFP_NOFS); | |
187 | + GFP_KERNEL); | |
188 | 188 | if (!cache->free_space_ctl) { |
189 | 189 | kfree(cache); |
190 | 190 | return NULL; |
@@ -94,7 +94,7 @@ static int test_find_delalloc(void) | ||
94 | 94 | * test. |
95 | 95 | */ |
96 | 96 | for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) { |
97 | - page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); | |
97 | + page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL); | |
98 | 98 | if (!page) { |
99 | 99 | test_msg("Failed to allocate test page\n"); |
100 | 100 | ret = -ENOMEM; |
@@ -113,7 +113,7 @@ static int test_find_delalloc(void) | ||
113 | 113 | * |--- delalloc ---| |
114 | 114 | * |--- search ---| |
115 | 115 | */ |
116 | - set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS); | |
116 | + set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_KERNEL); | |
117 | 117 | start = 0; |
118 | 118 | end = 0; |
119 | 119 | found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, |
@@ -144,7 +144,7 @@ static int test_find_delalloc(void) | ||
144 | 144 | test_msg("Couldn't find the locked page\n"); |
145 | 145 | goto out_bits; |
146 | 146 | } |
147 | - set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS); | |
147 | + set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_KERNEL); | |
148 | 148 | start = test_start; |
149 | 149 | end = 0; |
150 | 150 | found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, |
@@ -199,7 +199,7 @@ static int test_find_delalloc(void) | ||
199 | 199 | * |
200 | 200 | * We are re-using our test_start from above since it works out well. |
201 | 201 | */ |
202 | - set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS); | |
202 | + set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_KERNEL); | |
203 | 203 | start = test_start; |
204 | 204 | end = 0; |
205 | 205 | found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, |
@@ -262,7 +262,7 @@ static int test_find_delalloc(void) | ||
262 | 262 | } |
263 | 263 | ret = 0; |
264 | 264 | out_bits: |
265 | - clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS); | |
265 | + clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_KERNEL); | |
266 | 266 | out: |
267 | 267 | if (locked_page) |
268 | 268 | page_cache_release(locked_page); |
@@ -360,7 +360,7 @@ static int test_eb_bitmaps(void) | ||
360 | 360 | |
361 | 361 | test_msg("Running extent buffer bitmap tests\n"); |
362 | 362 | |
363 | - bitmap = kmalloc(len, GFP_NOFS); | |
363 | + bitmap = kmalloc(len, GFP_KERNEL); | |
364 | 364 | if (!bitmap) { |
365 | 365 | test_msg("Couldn't allocate test bitmap\n"); |
366 | 366 | return -ENOMEM; |
@@ -974,7 +974,7 @@ static int test_extent_accounting(void) | ||
974 | 974 | (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095, |
975 | 975 | EXTENT_DELALLOC | EXTENT_DIRTY | |
976 | 976 | EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0, |
977 | - NULL, GFP_NOFS); | |
977 | + NULL, GFP_KERNEL); | |
978 | 978 | if (ret) { |
979 | 979 | test_msg("clear_extent_bit returned %d\n", ret); |
980 | 980 | goto out; |
@@ -1045,7 +1045,7 @@ static int test_extent_accounting(void) | ||
1045 | 1045 | BTRFS_MAX_EXTENT_SIZE+8191, |
1046 | 1046 | EXTENT_DIRTY | EXTENT_DELALLOC | |
1047 | 1047 | EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, |
1048 | - NULL, GFP_NOFS); | |
1048 | + NULL, GFP_KERNEL); | |
1049 | 1049 | if (ret) { |
1050 | 1050 | test_msg("clear_extent_bit returned %d\n", ret); |
1051 | 1051 | goto out; |
@@ -1079,7 +1079,7 @@ static int test_extent_accounting(void) | ||
1079 | 1079 | ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, |
1080 | 1080 | EXTENT_DIRTY | EXTENT_DELALLOC | |
1081 | 1081 | EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, |
1082 | - NULL, GFP_NOFS); | |
1082 | + NULL, GFP_KERNEL); | |
1083 | 1083 | if (ret) { |
1084 | 1084 | test_msg("clear_extent_bit returned %d\n", ret); |
1085 | 1085 | goto out; |
@@ -1096,7 +1096,7 @@ out: | ||
1096 | 1096 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, |
1097 | 1097 | EXTENT_DIRTY | EXTENT_DELALLOC | |
1098 | 1098 | EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, |
1099 | - NULL, GFP_NOFS); | |
1099 | + NULL, GFP_KERNEL); | |
1100 | 1100 | iput(inode); |
1101 | 1101 | btrfs_free_dummy_root(root); |
1102 | 1102 | return ret; |
@@ -4127,7 +4127,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | ||
4127 | 4127 | struct inode *inode, |
4128 | 4128 | struct btrfs_path *path, |
4129 | 4129 | struct list_head *logged_list, |
4130 | - struct btrfs_log_ctx *ctx) | |
4130 | + struct btrfs_log_ctx *ctx, | |
4131 | + const u64 start, | |
4132 | + const u64 end) | |
4131 | 4133 | { |
4132 | 4134 | struct extent_map *em, *n; |
4133 | 4135 | struct list_head extents; |
@@ -4166,7 +4168,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | ||
4166 | 4168 | } |
4167 | 4169 | |
4168 | 4170 | list_sort(NULL, &extents, extent_cmp); |
4169 | - | |
4171 | + /* | |
4172 | + * Collect any new ordered extents within the range. This is to | |
4173 | + * prevent logging file extent items without waiting for the disk | |
4174 | + * location they point to being written. We do this only to deal | |
4175 | + * with races against concurrent lockless direct IO writes. | |
4176 | + */ | |
4177 | + btrfs_get_logged_extents(inode, logged_list, start, end); | |
4170 | 4178 | process: |
4171 | 4179 | while (!list_empty(&extents)) { |
4172 | 4180 | em = list_entry(extents.next, struct extent_map, list); |
@@ -4701,7 +4709,7 @@ log_extents: | ||
4701 | 4709 | goto out_unlock; |
4702 | 4710 | } |
4703 | 4711 | ret = btrfs_log_changed_extents(trans, root, inode, dst_path, |
4704 | - &logged_list, ctx); | |
4712 | + &logged_list, ctx, start, end); | |
4705 | 4713 | if (ret) { |
4706 | 4714 | err = ret; |
4707 | 4715 | goto out_unlock; |