pure subroutine radix_sort_u64_helper(N, arr, buffer)
integer(kind=int_index), intent(in) :: N
integer(kind=int64), dimension(N), intent(inout) :: arr
integer(kind=int64), dimension(N), intent(inout) :: buffer
integer(kind=int_index) :: i
integer(kind=int64) :: b, b0, b1, b2, b3, b4, b5, b6, b7
integer(kind=int_index), dimension(0:radix_mask) :: c0, c1, c2, c3, c4, c5, c6, c7
c0(:) = 0
c1(:) = 0
c2(:) = 0
c3(:) = 0
c4(:) = 0
c5(:) = 0
c6(:) = 0
c7(:) = 0
do i = 1, N
b0 = iand(arr(i), radix_mask_i64)
b1 = iand(ishft(arr(i), -radix_bits_i64), radix_mask_i64)
b2 = iand(ishft(arr(i), -2*radix_bits_i64), radix_mask_i64)
b3 = iand(ishft(arr(i), -3*radix_bits_i64), radix_mask_i64)
b4 = iand(ishft(arr(i), -4*radix_bits_i64), radix_mask_i64)
b5 = iand(ishft(arr(i), -5*radix_bits_i64), radix_mask_i64)
b6 = iand(ishft(arr(i), -6*radix_bits_i64), radix_mask_i64)
b7 = ishft(arr(i), -7*radix_bits_i64)
c0(b0) = c0(b0) + 1
c1(b1) = c1(b1) + 1
c2(b2) = c2(b2) + 1
c3(b3) = c3(b3) + 1
c4(b4) = c4(b4) + 1
c5(b5) = c5(b5) + 1
c6(b6) = c6(b6) + 1
c7(b7) = c7(b7) + 1
end do
do b = 1, radix_mask
c0(b) = c0(b) + c0(b - 1)
c1(b) = c1(b) + c1(b - 1)
c2(b) = c2(b) + c2(b - 1)
c3(b) = c3(b) + c3(b - 1)
c4(b) = c4(b) + c4(b - 1)
c5(b) = c5(b) + c5(b - 1)
c6(b) = c6(b) + c6(b - 1)
c7(b) = c7(b) + c7(b - 1)
end do
do i = N, 1, -1
b0 = iand(arr(i), radix_mask_i64)
buffer(c0(b0)) = arr(i)
c0(b0) = c0(b0) - 1
end do
do i = N, 1, -1
b1 = iand(ishft(buffer(i), -radix_bits_i64), radix_mask_i64)
arr(c1(b1)) = buffer(i)
c1(b1) = c1(b1) - 1
end do
do i = N, 1, -1
b2 = iand(ishft(arr(i), -2*radix_bits_i64), radix_mask_i64)
buffer(c2(b2)) = arr(i)
c2(b2) = c2(b2) - 1
end do
do i = N, 1, -1
b3 = iand(ishft(buffer(i), -3*radix_bits_i64), radix_mask_i64)
arr(c3(b3)) = buffer(i)
c3(b3) = c3(b3) - 1
end do
do i = N, 1, -1
b4 = iand(ishft(arr(i), -4*radix_bits_i64), radix_mask_i64)
buffer(c4(b4)) = arr(i)
c4(b4) = c4(b4) - 1
end do
do i = N, 1, -1
b5 = iand(ishft(buffer(i), -5*radix_bits_i64), radix_mask_i64)
arr(c5(b5)) = buffer(i)
c5(b5) = c5(b5) - 1
end do
do i = N, 1, -1
b6 = iand(ishft(arr(i), -6*radix_bits_i64), radix_mask_i64)
buffer(c6(b6)) = arr(i)
c6(b6) = c6(b6) - 1
end do
do i = N, 1, -1
b7 = ishft(buffer(i), -7*radix_bits_i64)
arr(c7(b7)) = buffer(i)
c7(b7) = c7(b7) - 1
end do
end subroutine radix_sort_u64_helper