Discussion:
[Cocci] [bug] exists do not work if file group is too big (>49)
Jerome Glisse
2018-05-16 19:16:34 UTC
Permalink
When operating on too many files the exists keyword no longer work. For
instance:

@exists@
expression E1;
@@
set_page_dirty(
+NULL,
E1)

Using linux kernel as playground and looking at at fs/f2fs/gc.c where
there is set_page_dirty() calls in nested blocks.

Works:

spatch --no-includes --sp-file test.spatch arch/powerpc/kvm/book3s_64_mmu_radix.c arch/powerpc/kvm/e500_mmu.c arch/s390/kvm/interrupt.c arch/x86/kvm/svm.c block/bio.c drivers/block/zram/zram_drv.c drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c drivers/gpu/drm/drm_gem.c drivers/gpu/drm/exynos/exynos_drm_g2d.c drivers/gpu/drm/i915/i915_gem.c drivers/gpu/drm/i915/i915_gem_fence_reg.c drivers/gpu/drm/i915/i915_gem_userptr.c drivers/gpu/drm/radeon/radeon_ttm.c drivers/gpu/drm/ttm/ttm_tt.c drivers/infiniband/core/umem.c drivers/infiniband/core/umem_odp.c drivers/infiniband/hw/hfi1/user_pages.c drivers/infiniband/hw/qib/qib_user_pages.c drivers/infiniband/hw/usnic/usnic_uiom.c drivers/media/common/videobuf2/videobuf2-dma-contig.c drivers/media/common/videobuf2/videobuf2-dma-sg.c drivers/media/common/videobuf2/videobuf2-vmalloc.c drivers/misc/genwqe/card_utils.c drivers/misc/vmw_vmci/vmci_queue_pair.c drivers/mtd/devices/block2mtd.c drivers/platform/goldfish/goldfish_pipe.c drivers/sbus/char/oradax.c drivers/staging/lustre/lustre/llite/rw26.c drivers/staging/lustre/lustre/llite/vvp_io.c drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c drivers/vhost/vhost.c drivers/video/fbdev/core/fb_defio.c fs/9p/vfs_addr.c fs/afs/dir.c fs/afs/file.c --include fs/afs/internal.h fs/afs/write.c fs/aio.c fs/block_dev.c fs/btrfs/disk-io.c fs/btrfs/extent_io.c fs/btrfs/file.c fs/btrfs/inode.c fs/btrfs/ioctl.c fs/btrfs/relocation.c fs/buffer.c fs/ceph/addr.c fs/f2fs/gc.c > toto


Don't works:

spatch --no-includes --sp-file test.spatch arch/powerpc/kvm/book3s_64_mmu_radix.c arch/powerpc/kvm/e500_mmu.c arch/s390/kvm/interrupt.c arch/x86/kvm/svm.c block/bio.c drivers/block/zram/zram_drv.c drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c drivers/gpu/drm/drm_gem.c drivers/gpu/drm/exynos/exynos_drm_g2d.c drivers/gpu/drm/i915/i915_gem.c drivers/gpu/drm/i915/i915_gem_fence_reg.c drivers/gpu/drm/i915/i915_gem_userptr.c drivers/gpu/drm/radeon/radeon_ttm.c drivers/gpu/drm/ttm/ttm_tt.c drivers/infiniband/core/umem.c drivers/infiniband/core/umem_odp.c drivers/infiniband/hw/hfi1/user_pages.c drivers/infiniband/hw/qib/qib_user_pages.c drivers/infiniband/hw/usnic/usnic_uiom.c drivers/media/common/videobuf2/videobuf2-dma-contig.c drivers/media/common/videobuf2/videobuf2-dma-sg.c drivers/media/common/videobuf2/videobuf2-vmalloc.c drivers/misc/genwqe/card_utils.c drivers/misc/vmw_vmci/vmci_queue_pair.c drivers/mtd/devices/block2mtd.c drivers/platform/goldfish/goldfish_pipe.c drivers/sbus/char/oradax.c drivers/staging/lustre/lustre/llite/rw26.c drivers/staging/lustre/lustre/llite/vvp_io.c drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c drivers/vhost/vhost.c drivers/video/fbdev/core/fb_defio.c fs/9p/vfs_addr.c fs/afs/dir.c fs/afs/file.c --include fs/afs/internal.h fs/afs/write.c fs/aio.c fs/block_dev.c fs/btrfs/disk-io.c fs/btrfs/extent_io.c fs/btrfs/file.c fs/btrfs/inode.c fs/btrfs/ioctl.c fs/btrfs/relocation.c fs/buffer.c fs/ceph/addr.c fs/cifs/cifssmb.c fs/f2fs/gc.c > toto


Exact same command line except the latter have 49 files before
fs/f2fs/gc.c while the former has 48 (and it works then). Note
that it seems that running the script multiple times even when
they are too many files will eventualy work.

Cheers,
Jérôme
Julia Lawall
2018-05-16 19:37:39 UTC
Permalink
Post by Jerome Glisse
When operating on too many files the exists keyword no longer work. For
@exists@
expression E1;
@@
set_page_dirty(
+NULL,
E1)
Using linux kernel as playground and looking at at fs/f2fs/gc.c where
there is set_page_dirty() calls in nested blocks.
spatch --no-includes --sp-file test.spatch arch/powerpc/kvm/book3s_64_mmu_radix.c arch/powerpc/kvm/e500_mmu.c arch/s390/kvm/interrupt.c arch/x86/kvm/svm.c block/bio.c drivers/block/zram/zram_drv.c drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c drivers/gpu/drm/drm_gem.c drivers/gpu/drm/exynos/exynos_drm_g2d.c drivers/gpu/drm/i915/i915_gem.c drivers/gpu/drm/i915/i915_gem_fence_reg.c drivers/gpu/drm/i915/i915_gem_userptr.c drivers/gpu/drm/radeon/radeon_ttm.c drivers/gpu/drm/ttm/ttm_tt.c drivers/infiniband/core/umem.c drivers/infiniband/core/umem_odp.c drivers/infiniband/hw/hfi1/user_pages.c drivers/infiniband/hw/qib/qib_user_pages.c drivers/infiniband/hw/usnic/usnic_uiom.c drivers/media/common/videobuf2/videobuf2-dma-contig.c drivers/media/common/videobuf2/videobuf2-dma-sg.c drivers/media/common/videobuf2/videobuf2-vmalloc.c drivers/misc/genwqe/card_utils.c drivers/misc/vmw_vmci/vmci_queue_pair.c drivers/mtd/devices/block2mtd.c drivers/platform/goldfish/goldfish_pipe.c drivers/sbus/!
char/oradax.c drivers/staging/lustre/lustre/llite/rw26.c drivers/staging/lustre/lustre/llite/vvp_io.c drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c drivers/vhost/vhost.c drivers/video/fbdev/core/fb_defio.c fs/9p/vfs_addr.c fs/afs/dir.c fs/afs/file.c --include fs/afs/internal.h fs/afs/write.c fs/aio.c fs/block_dev.c fs/btrfs/disk-io.c fs/btrfs/extent_io.c fs/btrfs/file.c fs/btrfs/inode.c fs/btrfs/ioctl.c fs/btrfs/relocation.c fs/buffer.c fs/ceph/addr.c fs/f2fs/gc.c > toto
Post by Jerome Glisse
spatch --no-includes --sp-file test.spatch arch/powerpc/kvm/book3s_64_mmu_radix.c arch/powerpc/kvm/e500_mmu.c arch/s390/kvm/interrupt.c arch/x86/kvm/svm.c block/bio.c drivers/block/zram/zram_drv.c drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c drivers/gpu/drm/drm_gem.c drivers/gpu/drm/exynos/exynos_drm_g2d.c drivers/gpu/drm/i915/i915_gem.c drivers/gpu/drm/i915/i915_gem_fence_reg.c drivers/gpu/drm/i915/i915_gem_userptr.c drivers/gpu/drm/radeon/radeon_ttm.c drivers/gpu/drm/ttm/ttm_tt.c drivers/infiniband/core/umem.c drivers/infiniband/core/umem_odp.c drivers/infiniband/hw/hfi1/user_pages.c drivers/infiniband/hw/qib/qib_user_pages.c drivers/infiniband/hw/usnic/usnic_uiom.c drivers/media/common/videobuf2/videobuf2-dma-contig.c drivers/media/common/videobuf2/videobuf2-dma-sg.c drivers/media/common/videobuf2/videobuf2-vmalloc.c drivers/misc/genwqe/card_utils.c drivers/misc/vmw_vmci/vmci_queue_pair.c drivers/mtd/devices/block2mtd.c drivers/platform/goldfish/goldfish_pipe.c drivers/sbus/!
char/oradax.c drivers/staging/lustre/lustre/llite/rw26.c drivers/staging/lustre/lustre/llite/vvp_io.c drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c drivers/vhost/vhost.c drivers/video/fbdev/core/fb_defio.c fs/9p/vfs_addr.c fs/afs/dir.c fs/afs/file.c --include fs/afs/internal.h fs/afs/write.c fs/aio.c fs/block_dev.c fs/btrfs/disk-io.c fs/btrfs/extent_io.c fs/btrfs/file.c fs/btrfs/inode.c fs/btrfs/ioctl.c fs/btrfs/relocation.c fs/buffer.c fs/ceph/addr.c fs/cifs/cifssmb.c fs/f2fs/gc.c > toto
Post by Jerome Glisse
Exact same command line except the latter have 49 files before
fs/f2fs/gc.c while the former has 48 (and it works then). Note
that it seems that running the script multiple times even when
they are too many files will eventualy work.
I'm surprised that the number of files would have an impact, unless by not
working you mean that it runs out of memory or doesn't terminate.

In general, you should only give one file name or one directory name to
Coccinelle. If you give multiple files, then they are all treated at
once, ie their CFGs are all in memory at the same time. If you give the
name of a directory containing the files, then it will work on one file at
a time, and you can give the option -j XXX to have it run XXX threads in
parallel. Giving multiple file names on the command line is only for the
case where information collected from one file is needed for the treatment
of another file.

If the set of files you want to work on doesn't correspond to the
directory structure, you can make a file with the names of the files to
treat one by one each separated by a blank line, and then give the command
line argument --file-groups filename.

Actually, exists is not relevant to your rule, because it has no ...
Exists means that there exists a path through the ... that matches the
pattern. Otherwise, all paths have to match the pattern.

julia
Post by Jerome Glisse
Cheers,
Jérôme
_______________________________________________
Cocci mailing list
https://systeme.lip6.fr/mailman/listinfo/cocci
Jerome Glisse
2018-05-16 19:54:17 UTC
Permalink
Post by Jerome Glisse
Post by Jerome Glisse
When operating on too many files the exists keyword no longer work. For
@exists@
expression E1;
@@
set_page_dirty(
+NULL,
E1)
Using linux kernel as playground and looking at at fs/f2fs/gc.c where
there is set_page_dirty() calls in nested blocks.
spatch --no-includes --sp-file test.spatch arch/powerpc/kvm/book3s_64_mmu_radix.c arch/powerpc/kvm/e500_mmu.c arch/s390/kvm/interrupt.c arch/x86/kvm/svm.c block/bio.c drivers/block/zram/zram_drv.c drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c drivers/gpu/drm/drm_gem.c drivers/gpu/drm/exynos/exynos_drm_g2d.c drivers/gpu/drm/i915/i915_gem.c drivers/gpu/drm/i915/i915_gem_fence_reg.c drivers/gpu/drm/i915/i915_gem_userptr.c drivers/gpu/drm/radeon/radeon_ttm.c drivers/gpu/drm/ttm/ttm_tt.c drivers/infiniband/core/umem.c drivers/infiniband/core/umem_odp.c drivers/infiniband/hw/hfi1/user_pages.c drivers/infiniband/hw/qib/qib_user_pages.c drivers/infiniband/hw/usnic/usnic_uiom.c drivers/media/common/videobuf2/videobuf2-dma-contig.c drivers/media/common/videobuf2/videobuf2-dma-sg.c drivers/media/common/videobuf2/videobuf2-vmalloc.c drivers/misc/genwqe/card_utils.c drivers/misc/vmw_vmci/vmci_queue_pair.c drivers/mtd/devices/block2mtd.c drivers/platform/goldfish/goldfish_pipe.c drivers/sbus/char/orad
ax.c drivers/staging/lustre/lustre/llite/rw26.c drivers/staging/lustre/lustre/llite/vvp_io.c drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c drivers/vhost/vhost.c drivers/video/fbdev/core/fb_defio.c fs/9p/vfs_addr.c fs/afs/dir.c fs/afs/file.c --include fs/afs/internal.h fs/afs/write.c fs/aio.c fs/block_dev.c fs/btrfs/disk-io.c fs/btrfs/extent_io.c fs/btrfs/file.c fs/btrfs/inode.c fs/btrfs/ioctl.c fs/btrfs/relocation.c fs/buffer.c fs/ceph/addr.c fs/f2fs/gc.c > toto
Post by Jerome Glisse
spatch --no-includes --sp-file test.spatch arch/powerpc/kvm/book3s_64_mmu_radix.c arch/powerpc/kvm/e500_mmu.c arch/s390/kvm/interrupt.c arch/x86/kvm/svm.c block/bio.c drivers/block/zram/zram_drv.c drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c drivers/gpu/drm/drm_gem.c drivers/gpu/drm/exynos/exynos_drm_g2d.c drivers/gpu/drm/i915/i915_gem.c drivers/gpu/drm/i915/i915_gem_fence_reg.c drivers/gpu/drm/i915/i915_gem_userptr.c drivers/gpu/drm/radeon/radeon_ttm.c drivers/gpu/drm/ttm/ttm_tt.c drivers/infiniband/core/umem.c drivers/infiniband/core/umem_odp.c drivers/infiniband/hw/hfi1/user_pages.c drivers/infiniband/hw/qib/qib_user_pages.c drivers/infiniband/hw/usnic/usnic_uiom.c drivers/media/common/videobuf2/videobuf2-dma-contig.c drivers/media/common/videobuf2/videobuf2-dma-sg.c drivers/media/common/videobuf2/videobuf2-vmalloc.c drivers/misc/genwqe/card_utils.c drivers/misc/vmw_vmci/vmci_queue_pair.c drivers/mtd/devices/block2mtd.c drivers/platform/goldfish/goldfish_pipe.c drivers/sbus/char/orad
ax.c drivers/staging/lustre/lustre/llite/rw26.c drivers/staging/lustre/lustre/llite/vvp_io.c drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c drivers/vhost/vhost.c drivers/video/fbdev/core/fb_defio.c fs/9p/vfs_addr.c fs/afs/dir.c fs/afs/file.c --include fs/afs/internal.h fs/afs/write.c fs/aio.c fs/block_dev.c fs/btrfs/disk-io.c fs/btrfs/extent_io.c fs/btrfs/file.c fs/btrfs/inode.c fs/btrfs/ioctl.c fs/btrfs/relocation.c fs/buffer.c fs/ceph/addr.c fs/cifs/cifssmb.c fs/f2fs/gc.c > toto
Post by Jerome Glisse
Exact same command line except the latter have 49 files before
fs/f2fs/gc.c while the former has 48 (and it works then). Note
that it seems that running the script multiple times even when
they are too many files will eventualy work.
I'm surprised that the number of files would have an impact, unless by not
working you mean that it runs out of memory or doesn't terminate.
Don't work == only set_page_dirty in non nested block are modified,
set_page_dirty in nested block are not. So it runs and terminate and
all files are processed (ie it is not a command line size limitation
in the shell).
Post by Jerome Glisse
In general, you should only give one file name or one directory name to
Coccinelle. If you give multiple files, then they are all treated at
once, ie their CFGs are all in memory at the same time. If you give the
name of a directory containing the files, then it will work on one file at
a time, and you can give the option -j XXX to have it run XXX threads in
parallel. Giving multiple file names on the command line is only for the
case where information collected from one file is needed for the treatment
of another file.
Saddly for modification i am doing i need to treat many files as one
as otherwise it does not work and even sadder for me is that this groups
can get pretty big.
Post by Jerome Glisse
If the set of files you want to work on doesn't correspond to the
directory structure, you can make a file with the names of the files to
treat one by one each separated by a blank line, and then give the command
line argument --file-groups filename.
This is _slower_ (by an order of magnitude) and also spit out a lot of
error messages (about trying to cd to directory and failing) than passing
all the files on the command line.
Post by Jerome Glisse
Actually, exists is not relevant to your rule, because it has no ...
Exists means that there exists a path through the ... that matches the
pattern. Otherwise, all paths have to match the pattern.
Oh i thought i would need it, nonetheless when there is too many files
the above snipet stop at first level blocks and never modify block with
deeper nesting.

Thank you for all your feedback :)

Cheers,
Jérôme
Julia Lawall
2018-05-16 20:02:08 UTC
Permalink
Post by Jerome Glisse
Post by Jerome Glisse
Post by Jerome Glisse
When operating on too many files the exists keyword no longer work. For
@exists@
expression E1;
@@
set_page_dirty(
+NULL,
E1)
Using linux kernel as playground and looking at at fs/f2fs/gc.c where
there is set_page_dirty() calls in nested blocks.
spatch --no-includes --sp-file test.spatch arch/powerpc/kvm/book3s_64_mmu_radix.c arch/powerpc/kvm/e500_mmu.c arch/s390/kvm/interrupt.c arch/x86/kvm/svm.c block/bio.c drivers/block/zram/zram_drv.c drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c drivers/gpu/drm/drm_gem.c drivers/gpu/drm/exynos/exynos_drm_g2d.c drivers/gpu/drm/i915/i915_gem.c drivers/gpu/drm/i915/i915_gem_fence_reg.c drivers/gpu/drm/i915/i915_gem_userptr.c drivers/gpu/drm/radeon/radeon_ttm.c drivers/gpu/drm/ttm/ttm_tt.c drivers/infiniband/core/umem.c drivers/infiniband/core/umem_odp.c drivers/infiniband/hw/hfi1/user_pages.c drivers/infiniband/hw/qib/qib_user_pages.c drivers/infiniband/hw/usnic/usnic_uiom.c drivers/media/common/videobuf2/videobuf2-dma-contig.c drivers/media/common/videobuf2/videobuf2-dma-sg.c drivers/media/common/videobuf2/videobuf2-vmalloc.c drivers/misc/genwqe/card_utils.c drivers/misc/vmw_vmci/vmci_queue_pair.c drivers/mtd/devices/block2mtd.c drivers/platform/goldfish/goldfish_pipe.c drivers/s!
bus/char/orad
Post by Jerome Glisse
Post by Jerome Glisse
ax.c drivers/staging/lustre/lustre/llite/rw26.c drivers/staging/lustre/lustre/llite/vvp_io.c drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c drivers/vhost/vhost.c drivers/video/fbdev/core/fb_defio.c fs/9p/vfs_addr.c fs/afs/dir.c fs/afs/file.c --include fs/afs/internal.h fs/afs/write.c fs/aio.c fs/block_dev.c fs/btrfs/disk-io.c fs/btrfs/extent_io.c fs/btrfs/file.c fs/btrfs/inode.c fs/btrfs/ioctl.c fs/btrfs/relocation.c fs/buffer.c fs/ceph/addr.c fs/f2fs/gc.c > toto
Post by Jerome Glisse
spatch --no-includes --sp-file test.spatch arch/powerpc/kvm/book3s_64_mmu_radix.c arch/powerpc/kvm/e500_mmu.c arch/s390/kvm/interrupt.c arch/x86/kvm/svm.c block/bio.c drivers/block/zram/zram_drv.c drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c drivers/gpu/drm/drm_gem.c drivers/gpu/drm/exynos/exynos_drm_g2d.c drivers/gpu/drm/i915/i915_gem.c drivers/gpu/drm/i915/i915_gem_fence_reg.c drivers/gpu/drm/i915/i915_gem_userptr.c drivers/gpu/drm/radeon/radeon_ttm.c drivers/gpu/drm/ttm/ttm_tt.c drivers/infiniband/core/umem.c drivers/infiniband/core/umem_odp.c drivers/infiniband/hw/hfi1/user_pages.c drivers/infiniband/hw/qib/qib_user_pages.c drivers/infiniband/hw/usnic/usnic_uiom.c drivers/media/common/videobuf2/videobuf2-dma-contig.c drivers/media/common/videobuf2/videobuf2-dma-sg.c drivers/media/common/videobuf2/videobuf2-vmalloc.c drivers/misc/genwqe/card_utils.c drivers/misc/vmw_vmci/vmci_queue_pair.c drivers/mtd/devices/block2mtd.c drivers/platform/goldfish/goldfish_pipe.c drivers/s!
bus/char/orad
Post by Jerome Glisse
Post by Jerome Glisse
ax.c drivers/staging/lustre/lustre/llite/rw26.c drivers/staging/lustre/lustre/llite/vvp_io.c drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c drivers/vhost/vhost.c drivers/video/fbdev/core/fb_defio.c fs/9p/vfs_addr.c fs/afs/dir.c fs/afs/file.c --include fs/afs/internal.h fs/afs/write.c fs/aio.c fs/block_dev.c fs/btrfs/disk-io.c fs/btrfs/extent_io.c fs/btrfs/file.c fs/btrfs/inode.c fs/btrfs/ioctl.c fs/btrfs/relocation.c fs/buffer.c fs/ceph/addr.c fs/cifs/cifssmb.c fs/f2fs/gc.c > toto
Post by Jerome Glisse
Exact same command line except the latter have 49 files before
fs/f2fs/gc.c while the former has 48 (and it works then). Note
that it seems that running the script multiple times even when
they are too many files will eventualy work.
I'm surprised that the number of files would have an impact, unless by not
working you mean that it runs out of memory or doesn't terminate.
Don't work == only set_page_dirty in non nested block are modified,
set_page_dirty in nested block are not. So it runs and terminate and
all files are processed (ie it is not a command line size limitation
in the shell).
Post by Jerome Glisse
In general, you should only give one file name or one directory name to
Coccinelle. If you give multiple files, then they are all treated at
once, ie their CFGs are all in memory at the same time. If you give the
name of a directory containing the files, then it will work on one file at
a time, and you can give the option -j XXX to have it run XXX threads in
parallel. Giving multiple file names on the command line is only for the
case where information collected from one file is needed for the treatment
of another file.
Saddly for modification i am doing i need to treat many files as one
as otherwise it does not work and even sadder for me is that this groups
can get pretty big.
Post by Jerome Glisse
If the set of files you want to work on doesn't correspond to the
directory structure, you can make a file with the names of the files to
treat one by one each separated by a blank line, and then give the command
line argument --file-groups filename.
This is _slower_ (by an order of magnitude) and also spit out a lot of
error messages (about trying to cd to directory and failing) than passing
all the files on the command line.
Post by Jerome Glisse
Actually, exists is not relevant to your rule, because it has no ...
Exists means that there exists a path through the ... that matches the
pattern. Otherwise, all paths have to match the pattern.
Oh i thought i would need it, nonetheless when there is too many files
the above snipet stop at first level blocks and never modify block with
deeper nesting.
Thank you for all your feedback :)
Sorry, but none of what you are saying makes any sense to me. Coccinele
works one function at a time. There is no reason why its behavior on
nested calls would be affected by the number of files. And no reason why
working on one file at a time would be slower than working on all of the
files at once. I'll have to try your command lines, but I probably can't
do that until tomorrow afternoon...

julia
Jerome Glisse
2018-05-16 20:15:59 UTC
Permalink
Post by Julia Lawall
Post by Jerome Glisse
Post by Jerome Glisse
Post by Jerome Glisse
When operating on too many files the exists keyword no longer work. For
@exists@
expression E1;
@@
set_page_dirty(
+NULL,
E1)
Using linux kernel as playground and looking at at fs/f2fs/gc.c where
there is set_page_dirty() calls in nested blocks.
spatch --no-includes --sp-file test.spatch arch/powerpc/kvm/book3s_64_mmu_radix.c arch/powerpc/kvm/e500_mmu.c arch/s390/kvm/interrupt.c arch/x86/kvm/svm.c block/bio.c drivers/block/zram/zram_drv.c drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c drivers/gpu/drm/drm_gem.c drivers/gpu/drm/exynos/exynos_drm_g2d.c drivers/gpu/drm/i915/i915_gem.c drivers/gpu/drm/i915/i915_gem_fence_reg.c drivers/gpu/drm/i915/i915_gem_userptr.c drivers/gpu/drm/radeon/radeon_ttm.c drivers/gpu/drm/ttm/ttm_tt.c drivers/infiniband/core/umem.c drivers/infiniband/core/umem_odp.c drivers/infiniband/hw/hfi1/user_pages.c drivers/infiniband/hw/qib/qib_user_pages.c drivers/infiniband/hw/usnic/usnic_uiom.c drivers/media/common/videobuf2/videobuf2-dma-contig.c drivers/media/common/videobuf2/videobuf2-dma-sg.c drivers/media/common/videobuf2/videobuf2-vmalloc.c drivers/misc/genwqe/card_utils.c drivers/misc/vmw_vmci/vmci_queue_pair.c drivers/mtd/devices/block2mtd.c drivers/platform/goldfish/goldfish_pipe.c drivers/sbus/char/
orad
Post by Jerome Glisse
Post by Jerome Glisse
ax.c drivers/staging/lustre/lustre/llite/rw26.c drivers/staging/lustre/lustre/llite/vvp_io.c drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c drivers/vhost/vhost.c drivers/video/fbdev/core/fb_defio.c fs/9p/vfs_addr.c fs/afs/dir.c fs/afs/file.c --include fs/afs/internal.h fs/afs/write.c fs/aio.c fs/block_dev.c fs/btrfs/disk-io.c fs/btrfs/extent_io.c fs/btrfs/file.c fs/btrfs/inode.c fs/btrfs/ioctl.c fs/btrfs/relocation.c fs/buffer.c fs/ceph/addr.c fs/f2fs/gc.c > toto
Post by Jerome Glisse
spatch --no-includes --sp-file test.spatch arch/powerpc/kvm/book3s_64_mmu_radix.c arch/powerpc/kvm/e500_mmu.c arch/s390/kvm/interrupt.c arch/x86/kvm/svm.c block/bio.c drivers/block/zram/zram_drv.c drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c drivers/gpu/drm/drm_gem.c drivers/gpu/drm/exynos/exynos_drm_g2d.c drivers/gpu/drm/i915/i915_gem.c drivers/gpu/drm/i915/i915_gem_fence_reg.c drivers/gpu/drm/i915/i915_gem_userptr.c drivers/gpu/drm/radeon/radeon_ttm.c drivers/gpu/drm/ttm/ttm_tt.c drivers/infiniband/core/umem.c drivers/infiniband/core/umem_odp.c drivers/infiniband/hw/hfi1/user_pages.c drivers/infiniband/hw/qib/qib_user_pages.c drivers/infiniband/hw/usnic/usnic_uiom.c drivers/media/common/videobuf2/videobuf2-dma-contig.c drivers/media/common/videobuf2/videobuf2-dma-sg.c drivers/media/common/videobuf2/videobuf2-vmalloc.c drivers/misc/genwqe/card_utils.c drivers/misc/vmw_vmci/vmci_queue_pair.c drivers/mtd/devices/block2mtd.c drivers/platform/goldfish/goldfish_pipe.c drivers/sbus/char/
orad
Post by Jerome Glisse
Post by Jerome Glisse
ax.c drivers/staging/lustre/lustre/llite/rw26.c drivers/staging/lustre/lustre/llite/vvp_io.c drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c drivers/vhost/vhost.c drivers/video/fbdev/core/fb_defio.c fs/9p/vfs_addr.c fs/afs/dir.c fs/afs/file.c --include fs/afs/internal.h fs/afs/write.c fs/aio.c fs/block_dev.c fs/btrfs/disk-io.c fs/btrfs/extent_io.c fs/btrfs/file.c fs/btrfs/inode.c fs/btrfs/ioctl.c fs/btrfs/relocation.c fs/buffer.c fs/ceph/addr.c fs/cifs/cifssmb.c fs/f2fs/gc.c > toto
Post by Jerome Glisse
Exact same command line except the latter have 49 files before
fs/f2fs/gc.c while the former has 48 (and it works then). Note
that it seems that running the script multiple times even when
they are too many files will eventualy work.
I'm surprised that the number of files would have an impact, unless by not
working you mean that it runs out of memory or doesn't terminate.
Don't work == only set_page_dirty in non nested block are modified,
set_page_dirty in nested block are not. So it runs and terminate and
all files are processed (ie it is not a command line size limitation
in the shell).
Post by Jerome Glisse
In general, you should only give one file name or one directory name to
Coccinelle. If you give multiple files, then they are all treated at
once, ie their CFGs are all in memory at the same time. If you give the
name of a directory containing the files, then it will work on one file at
a time, and you can give the option -j XXX to have it run XXX threads in
parallel. Giving multiple file names on the command line is only for the
case where information collected from one file is needed for the treatment
of another file.
Saddly for modification i am doing i need to treat many files as one
as otherwise it does not work and even sadder for me is that this groups
can get pretty big.
Post by Jerome Glisse
If the set of files you want to work on doesn't correspond to the
directory structure, you can make a file with the names of the files to
treat one by one each separated by a blank line, and then give the command
line argument --file-groups filename.
This is _slower_ (by an order of magnitude) and also spit out a lot of
error messages (about trying to cd to directory and failing) than passing
all the files on the command line.
Post by Jerome Glisse
Actually, exists is not relevant to your rule, because it has no ...
Exists means that there exists a path through the ... that matches the
pattern. Otherwise, all paths have to match the pattern.
Oh i thought i would need it, nonetheless when there is too many files
the above snipet stop at first level blocks and never modify block with
deeper nesting.
Thank you for all your feedback :)
Sorry, but none of what you are saying makes any sense to me. Coccinele
works one function at a time. There is no reason why its behavior on
nested calls would be affected by the number of files. And no reason why
working on one file at a time would be slower than working on all of the
files at once. I'll have to try your command lines, but I probably can't
do that until tomorrow afternoon...
Yes this is weird bug i actually tested again and it only shows if
exists is use. If i remove exists like you adviced it seems to work
no matter how many files i pass as argument.

When the rules has the exists modifier then it only modify inside
nested block if i pass 48 files or less. If i pass 49 files or more
then nested block are not modified but first level block is.

My buest guess is some memory corruption somewhere or some variable
that is not reset properly.

I am on version 1.0.6 (fedora 27).

Note i am in no rush, i just wanted to report this as it is likely a
bug somewhere moreover i have a work around.

Cheers,
Jérôme
Jerome Glisse
2018-05-16 20:35:23 UTC
Permalink
spatch version 1.0.6 compiled with OCaml version 4.05.0
Flags passed to the configure script: --build=x86_64-redhat-linux-gnu --host=x86_64-redhat-linux-gnu --program-prefix= --disable-dependency-tracking --prefix=/usr --exec-prefix=/usr --bindir=/usr/bin --sbindir=/usr/sbin --sysconfdir=/etc --datadir=/usr/share --includedir=/usr/include --libdir=/usr/lib64 --libexecdir=/usr/libexec --localstatedir=/var --sharedstatedir=/var/lib --mandir=/usr/share/man --infodir=/usr/share/info --enable-release=yes --with-python=/usr/bin/python3 --with-menhir=/usr/bin/menhir
Python scripting support: yes
Syntax of regular expresssions: PCRE
I don't think this indicates whether it is bytecode or native code, and I
don't know what the fedora people offer.
My bytecode executable is 41M and my native code executable is 13M.
I am guessing native code:
-rwxr-xr-x. 1 root root 1.3K May 10 13:51 /usr/bin/spatch

Jérôme
Jerome Glisse
2018-05-17 14:50:25 UTC
Permalink
Note i am in no rush, i just wanted to report this as it is likely a bug somewhere
Thanks for your description of a strange software behaviour.
How often do you work with the specification “exists” in other SmPL scripts?
I need to review all of them but i think i can remove most of them
now that Julia explained that i do not need them if there is no ...
between first match and second match in my rules.
moreover i have a work around.
Is the other transformation approach the solution which is really desired?
My work around was to add function with nested block to an extra
group on which i run the same semantic patch again to take care of
nested block when the original group as two big.

This is a minor inconvenience now that i have found why my semantic
patch was not modifying nested block (took me a while to converge
on the number of files as root of the issue).

Cheers,
Jérôme
Jerome Glisse
2018-05-17 21:32:59 UTC
Permalink
In terms of the running time, I get a running time of 11 seconds with the
command line of 48 files and I get a running time of 22 seconds if I just
run spatch on the entire kernel. That seems slower, but if I use the
command line of 48 files, I get changes in 27 files, while if I run it on
the entire kernel, I get changes in 75 files. If I run it on the entire
kernel using 40 cores (-j 40), I get changes in 75 files in 1.7 seconds.
In this simple test case this would work but in my real spatch i have to
change one function at a time as otherwise there is conflict on header
update. Moreover i also have to use --recursive-headers if i don't do the
git grep to provide file list which is also one of the reasons why it
was much slower.
Ideally if header files could be updated --in-place even when there is
conflict i might first add proper #include as a first step to avoid the
recursive flag.
I'm not sure to understand any of the above. At the moment you have
--no-includes, so no headers are consulted. What do you need from header
files? Do you need type information? Do you need to change the files?
Do the changes in the header files interact with the changes in the .c
file?
If it just happens that there are also calls to set_page_dirty in the
header files that you want to update, then you can give the options
--no-includes --include-headers.
If the only thing you want from the header files is type information, then
you can say eg --recursive-includes --include-headers-for-types. That
would be much faster, because the transformation rules wouldn't be applied
over and over to the header files.
If you want to do both, the above options can be combined.
I need to update function prototype and one semantic patch might update
multiples function and those function might have function prototype in
same header files just couple lines of each others in which case this does
not work. This is an issue mostly when updating callback for which i do
not know the function name at time of writing the semantic patch.

The recursive is needed because in many intance the function prototype is
define in a header file that is not directly included by the c file but is
included through another include or a complex include chains.
int set_page_dirty(struct page *page);
int set_page_dirty_lock(struct page *page);
then because inside include/linux/mm.h they are declare one after the
other --in-place will fails. This case is not the worse, i can split
this in 2 spatches and it would work. I can't do that when updating
callback which have same pattern ie 2 callback prototype declare one
after the other.
Do you actually need to automate this part?
I think I could be more helpful if I had an example that more competely
shows what you want to do.
Below is an actual example, i run it with:

spatch --dir . --include-headers -D grep --sp-file spatch

cat /tmp/unicorn-functions | while read line ; do git grep -l \
$line -- '*.[ch]' | sed -e "s/\(.\+\)\.h/--include \1.h/g" \
/tmp/unicorn-group ; echo "--include include/linux/fs.h" \
/tmp/unicorn-group ; echo "--include include/linux/mm.h" \
/tmp/unicorn-group ; cat /tmp/unicorn-group | tr '\n' ' ' ; \
echo ; done > /tmp/unicorn-groups

cat /tmp/unicorn-groups | while read line ; do spatch --in-place \
--no-includes --sp-file spatch $line ; done


spatch:

// First part of this file is only executed when -D grep argument is
// provided to spatch. It use to grep all function name that needs to
// be modified. Collected function name are written into a temporary
// file (/tmp/unicorn-functions) and git grep is use to find all the
// files that will be modified using second part of this semantic
// patch
virtual grep

// initialize file where we collect all function name (erase it)
@initialize:python depends on grep@
@@
file=open('/tmp/unicorn-functions', 'w')
file.close()

// match function name use as a callback
@G1 depends on grep@
identifier I1, FN;
@@
struct address_space_operations I1 = {..., .set_page_dirty = FN, ...};

@script:python depends on G1@
funcname << G1.FN;
@@
if funcname != "NULL":
file=open('/tmp/unicorn-functions', 'a')
file.write(funcname + '\n')
file.close()

// Also collect all function name that use a_ops->set_page_dirty()
@G2 depends on grep exists@
expression E1, E2;
identifier FN;
type T1;
@@
T1 FN(...) { ...
(
E1.a_ops->set_page_dirty(E2)
|
E1->a_ops->set_page_dirty(E2)
)
... }

@script:python depends on G2@
funcname << G2.FN;
@@
file=open('/tmp/unicorn-functions', 'a')
file.write(funcname + '\n')
print(funcname)
file.close()


// -------------------------------------------------------------------

// Below is the actual modification. We use the fn argument (provided
// by passing -D fn=value to spatch). To identify which function is
// being modified in this run. Semantic patch is run once per function
// because of conflict when updating multiple functions in one run.

// Update the address_space_operations structure (should happens only
// once with the first group of files)
@depends on !grep@
@@
struct address_space_operations { ... int (*set_page_dirty)(
+struct address_space *,
struct page *page); ... };

// Update caller of address_space_operation.set_page_dirty (pass NULL)
@depends on !grep@
expression E1, E2;
@@
E1.a_ops->set_page_dirty(
+MAPPING_NULL,
E2)

@depends on !grep@
expression E1, E2;
@@
E1->a_ops->set_page_dirty(
+MAPPING_NULL,
E2)

// Find function name use for the set_page_dirty callback (does not change
// anything, this just find the name)
@R1 depends on !grep exists@
identifier I1, virtual.fn;
@@
struct address_space_operations I1 = {..., .set_page_dirty = fn ,...};

// Add address_space argument to the function (set_page_dirty callback one)
@depends on R1@
type T1;
identifier I1;
identifier virtual.fn;
@@
int fn(
+struct address_space *__mapping,
T1 I1) { ... }

// Modify every place the function (use for set_page_dirty callback) is call
// to pass NULL as for the new address_space argument.
@depends on R1@
identifier virtual.fn;
expression E1;
@@
fn(
+MAPPING_NULL,
E1)

// Modify local variable use to store pointer to callback
@R2 depends on !grep exists@
expression E1;
identifier I1;
@@
int (*I1)(
+struct address_space *,
struct page *) = E1->a_ops->set_page_dirty;

@depends on R2 exists@
expression E1;
identifier R2.I1;
@@
-(*I1)(E1)
+I1(MAPPING_NULL, E1)
Jerome Glisse
2018-05-17 21:04:01 UTC
Permalink
Concretely, the problem is with LOCK_REQ, which is used as a typedef name
in cifs and as a enum constant in f2fs.
Would there be any chance of getting rid of the LOCK_REQ typedef?
Typedefs for structure types are discouraged by the Linux kernel coding
style.
julia
I will try to make patches for that, thought not anytime soon i am going
on PTO so not before i get back.

Thank you for debugging this further.

Cheers,
Jérôme

Continue reading on narkive:
Loading...