pure subroutine radix_sort_u32_helper(N, arr, buf)
integer(kind=int_index), intent(in) :: N
integer(kind=int32), dimension(N), intent(inout) :: arr
integer(kind=int32), dimension(N), intent(inout) :: buf
integer(kind=int_index) :: i
integer :: b, b0, b1, b2, b3
integer(kind=int_index), dimension(0:radix_mask) :: c0, c1, c2, c3
c0(:) = 0
c1(:) = 0
c2(:) = 0
c3(:) = 0
do i = 1, N
b0 = iand(arr(i), radix_mask_i32)
b1 = iand(ishft(arr(i), -radix_bits_i32), radix_mask_i32)
b2 = iand(ishft(arr(i), -2*radix_bits_i32), radix_mask_i32)
b3 = ishft(arr(i), -3*radix_bits_i32)
c0(b0) = c0(b0) + 1
c1(b1) = c1(b1) + 1
c2(b2) = c2(b2) + 1
c3(b3) = c3(b3) + 1
end do
do b = 1, radix_mask
c0(b) = c0(b) + c0(b - 1)
c1(b) = c1(b) + c1(b - 1)
c2(b) = c2(b) + c2(b - 1)
c3(b) = c3(b) + c3(b - 1)
end do
do i = N, 1, -1
b0 = iand(arr(i), radix_mask_i32)
buf(c0(b0)) = arr(i)
c0(b0) = c0(b0) - 1
end do
do i = N, 1, -1
b1 = iand(ishft(buf(i), -radix_bits_i32), radix_mask_i32)
arr(c1(b1)) = buf(i)
c1(b1) = c1(b1) - 1
end do
do i = N, 1, -1
b2 = iand(ishft(arr(i), -2*radix_bits_i32), radix_mask_i32)
buf(c2(b2)) = arr(i)
c2(b2) = c2(b2) - 1
end do
do i = N, 1, -1
b3 = ishft(buf(i), -3*radix_bits_i32)
arr(c3(b3)) = buf(i)
c3(b3) = c3(b3) - 1
end do
end subroutine radix_sort_u32_helper