mqc_basis_reader.f90 Source File

This file contains basis set reader routines for basis sets


This file depends on

sourcefile~~mqc_basis_reader.f90~~EfferentGraph sourcefile~mqc_basis_reader.f90 mqc_basis_reader.f90 sourcefile~mqc_basis_file_reader.f90 mqc_basis_file_reader.f90 sourcefile~mqc_basis_reader.f90->sourcefile~mqc_basis_file_reader.f90 sourcefile~mqc_cgto.f90 mqc_cgto.f90 sourcefile~mqc_basis_reader.f90->sourcefile~mqc_cgto.f90 sourcefile~mqc_error.f90 mqc_error.f90 sourcefile~mqc_basis_reader.f90->sourcefile~mqc_error.f90 sourcefile~mqc_basis_file_reader.f90->sourcefile~mqc_error.f90

Source Code

!! This file contains basis set reader routines for basis sets
module mqc_basis_reader
   !! Gaussian basis set parser and molecular basis construction
   !!
   !! Provides utilities for parsing Gaussian-type orbital basis sets
   !! from text files and building molecular basis sets for quantum calculations.
   use mqc_cgto, only: cgto_type, atomic_basis_type, molecular_basis_type
   use mqc_basis_file_reader, only: strings_equal
   use mqc_error, only: error_t, ERROR_PARSE
   use pic_types, only: dp
   implicit none
   private

   public :: classify_line        !! Determine basis file line type
   public :: parse_element_basis  !! Parse basis for single element
   public :: build_molecular_basis  !! Build complete molecular basis
   public :: ang_mom_char_to_int  !! Convert angular momentum character to integer
   public :: ang_mom_int_to_char  !! Convert angular momentum integer to character

   ! Basis file line classification constants
   integer, parameter, public :: LINE_UNKNOWN = 0   !! Unrecognized line type
   integer, parameter, public :: LINE_ATOM = 1      !! Element specification line
   integer, parameter, public :: LINE_SHELL = 2     !! Shell definition line
   integer, parameter, public :: LINE_FUNCTION = 3  !! Basis function coefficient line

contains

   pure function ang_mom_char_to_int(ang_mom_char) result(ang_mom)
      !! Convert angular momentum character to integer
      !!
      !! Standard mapping: S=0, P=1, D=2, F=3, G=4, H=5, I=6
      !! Special case: L=-1 (combined S+P shell, requires splitting)
      character(len=1), intent(in) :: ang_mom_char  !! Angular momentum symbol
      integer :: ang_mom  !! Corresponding integer value

      select case (ang_mom_char)
      case ('S')
         ang_mom = 0
      case ('P')
         ang_mom = 1
      case ('D')
         ang_mom = 2
      case ('F')
         ang_mom = 3
      case ('G')
         ang_mom = 4
      case ('H')
         ang_mom = 5
      case ('I')
         ang_mom = 6
      case ('L')
         ang_mom = -1  ! Special case: L shells are split into S+P
      case default
         ang_mom = -1
      end select
   end function ang_mom_char_to_int

   pure function ang_mom_int_to_char(ang_mom) result(ang_mom_char)
      !! Convert angular momentum integer to character
      !!
      !! Inverse mapping: 0=S, 1=P, 2=D, 3=F, 4=G, 5=H, 6=I
      !! Returns '?' for invalid input values.
      integer, intent(in) :: ang_mom  !! Angular momentum quantum number
      character(len=1) :: ang_mom_char  !! Corresponding symbol character

      select case (ang_mom)
      case (0)
         ang_mom_char = 'S'
      case (1)
         ang_mom_char = 'P'
      case (2)
         ang_mom_char = 'D'
      case (3)
         ang_mom_char = 'F'
      case (4)
         ang_mom_char = 'G'
      case (5)
         ang_mom_char = 'H'
      case (6)
         ang_mom_char = 'I'
      case default
         ang_mom_char = '?'
      end select
   end function ang_mom_int_to_char

   pure function classify_line(line) result(line_type)
      !! Classify a line from a gamess formatted basis set file
      character(len=*), intent(in) :: line
      integer :: line_type

      character(len=:), allocatable :: line_trim

      line_trim = trim(adjustl(line))

      if (is_blank_or_control(line_trim)) then
         line_type = LINE_UNKNOWN
      else if (is_function_line(line_trim)) then
         line_type = LINE_FUNCTION
      else if (is_shell_header(line_trim)) then
         line_type = LINE_SHELL
      else
         line_type = LINE_ATOM
      end if

   end function classify_line

   pure function is_blank_or_control(line) result(res)
      !! Check if a line is blank or a control line (starts with '$')
      character(len=*), intent(in) :: line
      logical :: res
      integer :: trimmed_len

      trimmed_len = len_trim(line)

      if (trimmed_len == 0) then
         res = .true.
      else
         res = (line(1:1) == '$')
      end if
   end function is_blank_or_control

   pure function is_function_line(line) result(res)
      !! Check if a line is a function coefficient line (starts with a number)
      character(len=*), intent(in) :: line
      logical :: res
      character(len=1) :: first_char

      if (len_trim(line) == 0) then
         res = .false.
         return
      end if

      first_char = line(1:1)
      res = (first_char >= '0' .and. first_char <= '9')
   end function is_function_line

   pure function is_shell_header(line) result(res)
      !! Check if a line is a shell header line (starts with S, P, D, F, G, H, I, or L)
      character(len=*), intent(in) :: line
      logical :: res
      character(len=1) :: first_char
      integer :: ios, dummy

      res = .false.
      if (len_trim(line) == 0) return

      first_char = line(1:1)

      if (.not. any(first_char == ['S', 'P', 'D', 'F', 'G', 'H', 'I', 'L'])) return

      read (line(2:), *, iostat=ios) dummy
      res = (ios == 0)

   end function is_shell_header

   pure subroutine parse_element_basis(basis_string, element_name, atom_basis, error)
      !! Parse basis set for a specific element from a GAMESS formatted basis string
      character(len=*), intent(in) :: basis_string
      character(len=*), intent(in) :: element_name
      type(atomic_basis_type), intent(out) :: atom_basis
      type(error_t), intent(out) :: error

      integer :: nshells

      ! Pass 1: Find the element and count its shells
      call count_shells_for_element(basis_string, element_name, nshells, error)
      if (error%has_error()) then
         call error%add_context("mqc_basis_reader:parse_element_basis")
         return
      end if

      if (nshells == 0) then
         call error%set(ERROR_PARSE, "Element "//trim(element_name)//" not found in basis file")
         return
      end if

      ! ! Allocate shells
      atom_basis%element = trim(element_name)
      call atom_basis%allocate_shells(nshells)

      ! ! Pass 2: Parse and fill shell data
      call fill_element_basis(basis_string, element_name, atom_basis, error)

   end subroutine parse_element_basis

   pure subroutine count_shells_for_element(basis_string, element_name, nshells, error)
      !! Count the number of shells for a specific element in a GAMESS formatted basis string,
      character(len=*), intent(in) :: basis_string
      character(len=*), intent(in) :: element_name
      integer, intent(out) :: nshells
      type(error_t), intent(out) :: error

      integer :: line_start, line_end, line_type
      character(len=256) :: line
      logical :: in_target_element, found_element
      character(len=1) :: ang_mom

      nshells = 0
      in_target_element = .false.
      found_element = .false.
      line_start = 1

      do while (line_start <= len(basis_string))
         call get_next_line(basis_string, line_start, line, line_end)
         if (line_end == 0) exit

         line = adjustl(line)
         line_type = classify_line(line)

         select case (line_type)
         case (LINE_ATOM)
            ! Check if this is our target element
            if (strings_equal(line, element_name)) then
               in_target_element = .true.
               found_element = .true.
            else
               ! Different element - stop counting if we were in target
               if (in_target_element) exit
               in_target_element = .false.
            end if

         case (LINE_SHELL)
            if (in_target_element) then
               ! Extract angular momentum
               line = adjustl(line)
               ang_mom = line(1:1)

               ! L shells become 2 shells (S + P)
               if (ang_mom == 'L') then
                  nshells = nshells + 2
               else
                  nshells = nshells + 1
               end if
            end if

         case (LINE_UNKNOWN)
            ! Skip blank lines and comments
            continue
         case default
            ! Skip any other line types (e.g., LINE_FUNCTION)
            continue
         end select

         line_start = line_end
      end do

      ! Check if we found the element at all
      if (.not. found_element) then
         call error%set(ERROR_PARSE, "Element not found in basis string: "//trim(element_name))
      end if

   end subroutine count_shells_for_element

   pure subroutine get_next_line(string, line_start, line, line_end)
      !! Extract the next line from a string starting at line_start
      character(len=*), intent(in) :: string
      integer, intent(in) :: line_start
      character(len=*), intent(out) :: line
      integer, intent(out) :: line_end

      integer :: newline_pos

      if (line_start > len(string)) then
         line = ''
         line_end = 0
         return
      end if

      newline_pos = index(string(line_start:), new_line('a'))

      if (newline_pos == 0) then
         ! Last line (no newline at end)
         line = string(line_start:)
         line_end = len(string) + 1
      else
         line = string(line_start:line_start + newline_pos - 2)
         line_end = line_start + newline_pos
      end if

   end subroutine get_next_line

   pure subroutine parse_shell_header(line, ang_mom, nfunc, stat)
      !! Parse shell header line (e.g., "S 2" or "L 3")
      character(len=*), intent(in) :: line
      character(len=1), intent(out) :: ang_mom
      integer, intent(out) :: nfunc
      integer, intent(out) :: stat

      character(len=256) :: line_trim

      line_trim = adjustl(line)
      ang_mom = line_trim(1:1)

      ! Read the number of functions
      read (line_trim(2:), *, iostat=stat) nfunc

   end subroutine parse_shell_header

   pure subroutine parse_function_line(line, func_num, exponent, coeff_s, coeff_p, has_p, stat)
      !! Parse function line (e.g., "1 1.0 2.0" or "1 1.0 2.0 3.0" for L shells)
      character(len=*), intent(in) :: line
      integer, intent(out) :: func_num
      real(dp), intent(out) :: exponent
      real(dp), intent(out) :: coeff_s
      real(dp), intent(out), optional :: coeff_p
      logical, intent(out) :: has_p
      integer, intent(out) :: stat

      real(dp) :: temp_p

      has_p = .false.

      ! Try to read 4 values (func_num, exponent, coeff_s, coeff_p)
      read (line, *, iostat=stat) func_num, exponent, coeff_s, temp_p

      if (stat == 0) then
         ! Successfully read 4 values - this is an L shell
         has_p = .true.
         if (present(coeff_p)) coeff_p = temp_p
      else
         ! Try reading just 3 values (func_num, exponent, coeff_s)
         read (line, *, iostat=stat) func_num, exponent, coeff_s
      end if

   end subroutine parse_function_line

   pure subroutine fill_element_basis(basis_string, element_name, atom_basis, error)
      !! Fill in the shell data for a specific element from a GAMESS formatted basis string
      character(len=*), intent(in) :: basis_string
      character(len=*), intent(in) :: element_name
      type(atomic_basis_type), intent(inout) :: atom_basis
      type(error_t), intent(out) :: error

      integer :: line_start, line_end, line_type
      character(len=256) :: line
      logical :: in_data_block, in_target_element
      character(len=1) :: ang_mom
      integer :: nfunc, func_num, ishell, ifunc
      real(dp) :: exponent, coeff_s, coeff_p
      logical :: has_p

      ! L shell handling: we split into two shells, need to track both
      logical :: reading_l_shell
      integer :: l_shell_s_idx, l_shell_p_idx
      integer :: stat

      in_data_block = .false.
      in_target_element = .false.
      ishell = 0
      reading_l_shell = .false.

      line_start = 1
      do while (line_start <= len(basis_string))
         call get_next_line(basis_string, line_start, line, line_end)
         if (line_end == 0) exit

         line = adjustl(line)
         line_type = classify_line(line)

         select case (line_type)
            ! case (LINE_UNKNOWN)
            !   if (index(line, '$DATA') > 0) then
            !     in_data_block = .true.
            !   else if (index(line, '$END') > 0) then
            !     exit
            !   end if

         case (LINE_ATOM)
            if (strings_equal(line, element_name)) then
               in_target_element = .true.
            else
               if (in_target_element) exit
               in_target_element = .false.
            end if

         case (LINE_SHELL)
            if (in_target_element) then
               ! Parse shell header
               call parse_shell_header(line, ang_mom, nfunc, stat)
               if (stat /= 0) then
                  call error%set(ERROR_PARSE, "Failed to parse shell header: "//trim(line))
                  return
               end if

               if (ang_mom == 'L') then
                  ! L shell: create two shells (S and P)
                  reading_l_shell = .true.

                  ishell = ishell + 1
                  l_shell_s_idx = ishell
                  atom_basis%shells(ishell)%ang_mom = 0  ! S
                  call atom_basis%shells(ishell)%allocate_arrays(nfunc)

                  ishell = ishell + 1
                  l_shell_p_idx = ishell
                  atom_basis%shells(ishell)%ang_mom = 1  ! P
                  call atom_basis%shells(ishell)%allocate_arrays(nfunc)

                  ifunc = 0  ! Reset function counter
               else
                  ! Regular shell
                  reading_l_shell = .false.
                  ishell = ishell + 1

                  ! Set angular momentum (S=0, P=1, D=2, F=3, G=4, H=5, I=6)
                  atom_basis%shells(ishell)%ang_mom = ang_mom_char_to_int(ang_mom)

                  call atom_basis%shells(ishell)%allocate_arrays(nfunc)
                  ifunc = 0
               end if
            end if

         case (LINE_FUNCTION)
            if (in_target_element) then
               call parse_function_line(line, func_num, exponent, coeff_s, coeff_p, has_p, stat)
               if (stat /= 0) then
                  call error%set(ERROR_PARSE, "Failed to parse function line: "//trim(line))
                  return
               end if

               ifunc = ifunc + 1

               if (reading_l_shell) then
               if (.not. has_p) then
                  call error%set(ERROR_PARSE, "L shell requires both S and P coefficients")
                  return
               end if
               ! Store in both S and P shells
               atom_basis%shells(l_shell_s_idx)%exponents(ifunc) = exponent
               atom_basis%shells(l_shell_s_idx)%coefficients(ifunc) = coeff_s

               atom_basis%shells(l_shell_p_idx)%exponents(ifunc) = exponent
               atom_basis%shells(l_shell_p_idx)%coefficients(ifunc) = coeff_p
               else
               ! Store in current shell
               atom_basis%shells(ishell)%exponents(ifunc) = exponent
               atom_basis%shells(ishell)%coefficients(ifunc) = coeff_s
               end if
            end if

         case default
            ! Skip unknown line types (e.g., LINE_UNKNOWN, blank lines, comments)
            continue
         end select

         line_start = line_end
      end do

   end subroutine fill_element_basis

   pure subroutine find_unique_strings(input_array, unique_array, nunique)
      !! Find unique strings in an array
      !! Returns array of unique strings and count
      character(len=*), intent(in) :: input_array(:)
      character(len=:), allocatable, intent(out) :: unique_array(:)
      integer, intent(out) :: nunique

      integer :: i, j, n
      logical :: is_unique
      character(len=len(input_array)), allocatable :: temp_unique(:)

      n = size(input_array)
      allocate (temp_unique(n))  ! Max possible size
      nunique = 0

      do i = 1, n
         is_unique = .true.

         ! Check if we've already seen this string
         do j = 1, nunique
            if (strings_equal(input_array(i), temp_unique(j))) then
               is_unique = .false.
               exit
            end if
         end do

         if (is_unique) then
            nunique = nunique + 1
            temp_unique(nunique) = input_array(i)
         end if
      end do

      ! Allocate output array with exact size and copy
      allocate (character(len=len(input_array)) :: unique_array(nunique))
      unique_array = temp_unique(1:nunique)

   end subroutine find_unique_strings

   pure subroutine copy_atomic_basis(source, dest)
      !! Deep copy of atomic basis data from source to dest
      type(atomic_basis_type), intent(in) :: source
      type(atomic_basis_type), intent(out) :: dest
      integer :: ishell

      dest%element = source%element
      call dest%allocate_shells(source%nshells)

      do ishell = 1, source%nshells
         dest%shells(ishell)%ang_mom = source%shells(ishell)%ang_mom
         call dest%shells(ishell)%allocate_arrays(source%shells(ishell)%nfunc)
         dest%shells(ishell)%exponents = source%shells(ishell)%exponents
         dest%shells(ishell)%coefficients = source%shells(ishell)%coefficients
      end do

   end subroutine copy_atomic_basis

   subroutine build_molecular_basis(basis_string, element_names, mol_basis, error)
      !! Build molecular basis from geometry and basis file
      !! Only parses unique elements, then copies basis data to atoms
      character(len=*), intent(in) :: basis_string
      character(len=*), intent(in) :: element_names(:)  !! Element for each atom in geometry order
      type(molecular_basis_type), intent(out) :: mol_basis
      type(error_t), intent(out) :: error

      integer :: iatom, natoms, iunique, nunique
      character(len=:), allocatable :: unique_elements(:)
      type(atomic_basis_type), allocatable :: unique_bases(:)
      integer :: match_idx

      match_idx = 0
      natoms = size(element_names)

      ! Find unique elements
      call find_unique_strings(element_names, unique_elements, nunique)

      print *, "Found ", nunique, " unique elements out of ", natoms, " atoms"

      ! Allocate for unique bases
      allocate (unique_bases(nunique))

      ! Parse basis for each unique element
      do iunique = 1, nunique
         print *, "Parsing basis for: ", trim(unique_elements(iunique))
         call parse_element_basis(basis_string, unique_elements(iunique), &
                                  unique_bases(iunique), error)
         if (error%has_error()) then
            ! Prepend context to error message
            call error%add_context("mqc_basis_reader:read_basis_from_string")
            call error%set(ERROR_PARSE, "Failed to parse basis for element "// &
                           trim(unique_elements(iunique))//": "//error%get_message())
            return
         end if
      end do

      ! Allocate molecular basis and assign to each atom
      call mol_basis%allocate_elements(natoms)

      do iatom = 1, natoms
         ! Find which unique element this atom corresponds to
         do iunique = 1, nunique
            if (strings_equal(element_names(iatom), unique_elements(iunique))) then
               match_idx = iunique
               exit
            end if
         end do

         ! Copy the basis data
         call copy_atomic_basis(unique_bases(match_idx), mol_basis%elements(iatom))
      end do

      ! Clean up
      do iunique = 1, nunique
         call unique_bases(iunique)%destroy()
      end do

   end subroutine build_molecular_basis

end module mqc_basis_reader