mqc_xyz_reader.f90 – metalquicha

XYZ molecular geometry file reader
This file depends on

Help
Source Code

!! XYZ molecular geometry file reader
module mqc_xyz_reader
   !! Provides functions to parse standard XYZ format files containing
   !! atomic coordinates and element symbols for molecular structures.
   use pic_types, only: dp
   use mqc_geometry, only: geometry_type
   use mqc_error, only: error_t, ERROR_IO, ERROR_PARSE
   implicit none
   private

   public :: read_xyz_file    !! Read XYZ file from disk
   public :: read_xyz_string  !! Parse XYZ data from string
   public :: split_lines      !! Split text into lines (for testing)

   ! Constants
   integer, parameter :: MAX_ELEMENT_SYMBOL_LEN = 4  !! Maximum element symbol length

contains

   subroutine read_xyz_file(filename, geom, error)
      !! Read molecular geometry from XYZ format file
      !!
      !! Parses standard XYZ files with format:
      !! Line 1: Number of atoms
      !! Line 2: Comment/title line
      !! Lines 3+: Element X Y Z (coordinates in Angstrom)
      character(len=*), intent(in) :: filename  !! Path to XYZ file
      type(geometry_type), intent(out) :: geom  !! Parsed molecular geometry
      type(error_t), intent(out) :: error       !! Error handling

      integer :: unit      !! File unit number
      integer :: io_stat   !! I/O operation status
      integer :: file_size  !! File size in bytes
      logical :: file_exists  !! Whether file exists on disk
      character(len=:), allocatable :: file_contents  !! Full file content buffer

      ! Check if file exists
      inquire (file=filename, exist=file_exists, size=file_size)
      if (.not. file_exists) then
         call error%set(ERROR_IO, "XYZ file not found: "//trim(filename))
         return
      end if

      ! Allocate buffer for entire file
      allocate (character(len=file_size) :: file_contents)

      ! Open and read entire file as stream
      open (newunit=unit, file=filename, status='old', action='read', &
            access='stream', form='unformatted', iostat=io_stat)
      if (io_stat /= 0) then
         call error%set(ERROR_IO, "Error opening file: "//trim(filename))
         return
      end if

      read (unit, iostat=io_stat) file_contents
      close (unit)

      if (io_stat /= 0) then
         call error%set(ERROR_IO, "Error reading file: "//trim(filename))
         return
      end if

      ! Parse the contents
      call read_xyz_string(file_contents, geom, error)
      if (error%has_error()) then
         call error%add_context("mqc_xyz_reader:read_xyz_file")
         return
      end if

   end subroutine read_xyz_file

   pure subroutine read_xyz_string(xyz_string, geom, error)
      !! Parse molecular geometry from XYZ format string
      character(len=*), intent(in) :: xyz_string
      type(geometry_type), intent(out) :: geom
      type(error_t), intent(out) :: error

      character(len=:), allocatable :: lines(:)
      integer :: nlines, iatom, io_stat
      character(len=256) :: element
      real(dp) :: x, y, z

      ! Split into lines
      call split_lines(xyz_string, lines, nlines)

      if (nlines < 2) then
         call error%set(ERROR_PARSE, "XYZ file must have at least 2 lines (natoms + comment)")
         return
      end if

      ! Read number of atoms from first line
      read (lines(1), *, iostat=io_stat) geom%natoms
      if (io_stat /= 0) then
         call error%set(ERROR_PARSE, "Failed to read number of atoms from first line")
         return
      end if

      if (geom%natoms < 0) then
         call error%set(ERROR_PARSE, "Number of atoms must be non-negative")
         return
      end if

      ! Store comment line
      geom%comment = trim(adjustl(lines(2)))

      ! Check we have enough lines
      if (nlines < 2 + geom%natoms) then
         call error%set(ERROR_PARSE, "XYZ file has insufficient lines: expected "// &
                        trim(int_to_string(2 + geom%natoms))//", got "// &
                        trim(int_to_string(nlines)))
         return
      end if

      ! Allocate arrays
      allocate (character(len=MAX_ELEMENT_SYMBOL_LEN) :: geom%elements(geom%natoms))
      allocate (geom%coords(3, geom%natoms))

      ! Read atom data
      do iatom = 1, geom%natoms
         read (lines(2 + iatom), *, iostat=io_stat) element, x, y, z
         if (io_stat /= 0) then
            call error%set(ERROR_PARSE, "Failed to parse atom data on line "// &
                           trim(int_to_string(2 + iatom))//": '"// &
                           trim(lines(2 + iatom))//"'")
            return
         end if

         geom%elements(iatom) = trim(adjustl(element))
         geom%coords(1, iatom) = x
         geom%coords(2, iatom) = y
         geom%coords(3, iatom) = z
      end do

   end subroutine read_xyz_string

   pure function int_to_string(i) result(str)
      !! Convert integer to string
      integer, intent(in) :: i
      character(len=:), allocatable :: str
      character(len=20) :: buffer

      write (buffer, '(I0)') i
      str = trim(adjustl(buffer))
   end function int_to_string

   pure subroutine split_lines(text, lines, nlines)
      !! Split input text into lines based on CR, LF, or CRLF line endings
      !! Trailing newlines do not create empty lines
      character(len=*), intent(in) :: text
      character(len=:), allocatable, intent(out) :: lines(:)
      integer, intent(out) :: nlines

      integer :: i, line_start, line_end, max_line_len
      character(len=:), allocatable :: temp_lines(:)

      if (len(text) == 0) then
         nlines = 0
         allocate (character(len=1) :: lines(0))
         return
      end if

      ! Pass 1: Count lines and find maximum line length
      nlines = 0
      max_line_len = 0
      line_start = 1
      i = 1

      do while (i <= len(text))
         ! Check for line ending
         if (text(i:i) == achar(13)) then  ! CR
            ! Check for CRLF
            if (i < len(text) .and. text(i + 1:i + 1) == achar(10)) then
               line_end = i - 1
               i = i + 2  ! Skip both CR and LF
            else
               line_end = i - 1
               i = i + 1
            end if
            nlines = nlines + 1
            max_line_len = max(max_line_len, line_end - line_start + 1)
            line_start = i
         else if (text(i:i) == achar(10)) then  ! LF
            line_end = i - 1
            nlines = nlines + 1
            max_line_len = max(max_line_len, line_end - line_start + 1)
            i = i + 1
            line_start = i
         else
            i = i + 1
         end if
      end do

      ! Handle last line if text doesn't end with newline
      if (line_start <= len(text)) then
         nlines = nlines + 1
         max_line_len = max(max_line_len, len(text) - line_start + 1)
      end if

      ! Handle empty text or ensure at least length 1
      if (max_line_len == 0) max_line_len = 1

      ! Allocate output array
      allocate (character(len=max_line_len) :: temp_lines(nlines))

      ! Pass 2: Extract lines
      nlines = 0
      line_start = 1
      i = 1

      do while (i <= len(text))
         ! Check for line ending
         if (text(i:i) == achar(13)) then  ! CR
            ! Check for CRLF
            if (i < len(text) .and. text(i + 1:i + 1) == achar(10)) then
               line_end = i - 1
               i = i + 2
            else
               line_end = i - 1
               i = i + 1
            end if
            nlines = nlines + 1
            temp_lines(nlines) = ""  ! Initialize line before copying
            if (line_end >= line_start) then
               ! Intel compiler workaround: use character-by-character copy
               block
                  integer :: j, line_len
                  line_len = line_end - line_start + 1
                  do j = 1, line_len
                     temp_lines(nlines) (j:j) = text(line_start + j - 1:line_start + j - 1)
                  end do
               end block
            end if
            line_start = i
         else if (text(i:i) == achar(10)) then  ! LF
            line_end = i - 1
            nlines = nlines + 1
            temp_lines(nlines) = ""  ! Initialize line before copying
            if (line_end >= line_start) then
               ! Intel compiler workaround: use character-by-character copy
               block
                  integer :: j, line_len
                  line_len = line_end - line_start + 1
                  do j = 1, line_len
                     temp_lines(nlines) (j:j) = text(line_start + j - 1:line_start + j - 1)
                  end do
               end block
            end if
            i = i + 1
            line_start = i
         else
            i = i + 1
         end if
      end do

      ! Handle last line if text doesn't end with newline
      if (line_start <= len(text)) then
         nlines = nlines + 1
         temp_lines(nlines) = ""  ! Initialize line before copying
         ! Intel compiler workaround: use character-by-character copy
         block
            integer :: j, line_len
            line_len = len(text) - line_start + 1
            do j = 1, line_len
               temp_lines(nlines) (j:j) = text(line_start + j - 1:line_start + j - 1)
            end do
         end block
      end if

      ! Copy to output (use explicit loop for Intel compiler compatibility)
      allocate (character(len=max_line_len) :: lines(nlines))
      block
         integer :: iline
         do iline = 1, nlines
            lines(iline) = temp_lines(iline)
         end do
      end block

   end subroutine split_lines

end module mqc_xyz_reader
mqc_xyz_reader.f90 Source File

Contents

Modules

Source Code

This file depends on

Files dependent on this one

Source Code