Parser for MQC section-based input files (.mqc format)
!! Parser for MQC section-based input files (.mqc format) module mqc_config_parser !! Parses section-based input files with %section...end blocks !! This is the new format generated by mqc_prep.py use pic_types, only: dp, int32, int64 use mqc_method_types, only: method_type_from_string, METHOD_TYPE_GFN2, METHOD_TYPE_UNKNOWN use mqc_calc_types, only: calc_type_from_string, CALC_TYPE_ENERGY, CALC_TYPE_UNKNOWN use mqc_geometry, only: geometry_type use mqc_error, only: error_t, ERROR_IO, ERROR_PARSE, ERROR_VALIDATION use mqc_calculation_defaults, only: DEFAULT_DISPLACEMENT, DEFAULT_TEMPERATURE, & DEFAULT_PRESSURE, DEFAULT_AIMD_DT, & DEFAULT_AIMD_NSTEPS, DEFAULT_AIMD_TEMPERATURE, & DEFAULT_AIMD_OUTPUT_FREQ, DEFAULT_SCF_CONV, & DEFAULT_CPCM_NANG, DEFAULT_CPCM_RSCALE, & DEFAULT_FRAG_LEVEL, DEFAULT_MAX_INTERSECTION use mqc_physical_fragment, only: bond_t implicit none private public :: mqc_config_t, read_mqc_file public :: input_fragment_t, bond_t, molecule_t ! Maximum line length for reading (shared with submodules) integer, parameter, public :: MAX_LINE_LEN = 1024 ! Utility functions shared with submodules public :: strip_comment, skip_to_end, parse_method_string type :: input_fragment_t !! Input fragment definition with charge, multiplicity, and atom indices !! This is the parsed representation from the input file, not the computational fragment integer :: charge = 0 integer :: multiplicity = 1 integer, allocatable :: indices(:) !! Atom indices in this fragment contains procedure :: destroy => input_fragment_destroy end type input_fragment_t type :: molecule_t !! Single molecule definition with structure, geometry, fragments, and connectivity character(len=:), allocatable :: name !! Optional molecule name ! Structure information integer :: charge = 0 integer :: multiplicity = 1 ! Geometry type(geometry_type) :: geometry ! Fragments integer :: nfrag = 0 type(input_fragment_t), allocatable :: fragments(:) ! Connectivity integer :: nbonds = 0 integer :: nbroken = 0 type(bond_t), allocatable :: bonds(:) contains procedure :: destroy => molecule_destroy end type molecule_t type :: mqc_config_t !! Complete configuration from .mqc file ! Schema information character(len=:), allocatable :: schema_name character(len=:), allocatable :: schema_version integer :: index_base = 0 !! 0-based or 1-based indexing character(len=:), allocatable :: units !! angstrom or bohr ! Model information integer(int32) :: method = METHOD_TYPE_GFN2 character(len=:), allocatable :: basis character(len=:), allocatable :: aux_basis ! XTB solvation settings character(len=:), allocatable :: solvent !! Solvent name (e.g., "water", "ethanol") or empty for gas phase character(len=:), allocatable :: solvation_model !! Solvation model: "alpb" (default), "gbsa", or "cpcm" logical :: use_cds = .true. !! Include non-polar CDS terms in solvation (not for CPCM) logical :: use_shift = .true. !! Include solution state shift in solvation (not for CPCM) ! CPCM-specific settings real(dp) :: dielectric = -1.0_dp !! Direct dielectric constant (-1 = use solvent lookup) integer :: cpcm_nang = DEFAULT_CPCM_NANG !! Number of angular grid points for CPCM cavity real(dp) :: cpcm_rscale = DEFAULT_CPCM_RSCALE !! Radii scaling factor for CPCM cavity ! Driver information integer(int32) :: calc_type = CALC_TYPE_ENERGY ! Multiple molecules support integer :: nmol = 0 !! Number of molecules (0 = single molecule mode for backward compatibility) type(molecule_t), allocatable :: molecules(:) !! Array of molecules (if nmol > 0) ! Single molecule fields (backward compatibility - used if nmol == 0) ! Structure information integer :: charge = 0 integer :: multiplicity = 1 ! Geometry type(geometry_type) :: geometry ! Fragments integer :: nfrag = 0 type(input_fragment_t), allocatable :: fragments(:) ! Connectivity integer :: nbonds = 0 integer :: nbroken = 0 type(bond_t), allocatable :: bonds(:) ! SCF settings integer :: scf_maxiter = 300 !! Using 300 (parser-specific, different from DEFAULT_SCF_MAXITER) real(dp) :: scf_tolerance = DEFAULT_SCF_CONV ! Hessian settings real(dp) :: hessian_displacement = DEFAULT_DISPLACEMENT !! Finite difference displacement (Bohr) real(dp) :: hessian_temperature = DEFAULT_TEMPERATURE !! Temperature for thermochemistry (K) real(dp) :: hessian_pressure = DEFAULT_PRESSURE !! Pressure for thermochemistry (atm) ! AIMD settings real(dp) :: aimd_dt = DEFAULT_AIMD_DT !! Timestep (femtoseconds) integer :: aimd_nsteps = DEFAULT_AIMD_NSTEPS !! Number of MD steps (0 = no AIMD) real(dp) :: aimd_initial_temperature = DEFAULT_AIMD_TEMPERATURE !! Initial temperature for velocity init (K) integer :: aimd_output_frequency = DEFAULT_AIMD_OUTPUT_FREQ !! Write output every N steps ! Fragmentation settings character(len=:), allocatable :: frag_method !! MBE, etc. integer :: frag_level = DEFAULT_FRAG_LEVEL logical :: allow_overlapping_fragments = .false. integer :: max_intersection_level = DEFAULT_MAX_INTERSECTION !! Maximum k-way intersection depth for GMBE character(len=:), allocatable :: embedding character(len=:), allocatable :: cutoff_method character(len=:), allocatable :: distance_metric real(dp), allocatable :: fragment_cutoffs(:) !! Distance cutoffs indexed by n-mer level (2=dimer, 3=trimer, etc.) ! Logger settings (kept for compatibility) character(len=:), allocatable :: log_level ! Output control logical :: skip_json_output = .false. !! Skip JSON output for large calculations contains procedure :: destroy => config_destroy end type mqc_config_t ! Interfaces for submodule procedures interface ! Basic section parsers (mqc_config_parser_basic_sections) module subroutine parse_schema_section(unit, config, error) implicit none integer, intent(in) :: unit type(mqc_config_t), intent(inout) :: config type(error_t), intent(out) :: error end subroutine parse_schema_section module subroutine parse_model_section(unit, config, error) implicit none integer, intent(in) :: unit type(mqc_config_t), intent(inout) :: config type(error_t), intent(out) :: error end subroutine parse_model_section module subroutine parse_driver_section(unit, config, error) implicit none integer, intent(in) :: unit type(mqc_config_t), intent(inout) :: config type(error_t), intent(out) :: error end subroutine parse_driver_section module subroutine parse_scf_section(unit, config, error) implicit none integer, intent(in) :: unit type(mqc_config_t), intent(inout) :: config type(error_t), intent(out) :: error end subroutine parse_scf_section module subroutine parse_xtb_section(unit, config, error) implicit none integer, intent(in) :: unit type(mqc_config_t), intent(inout) :: config type(error_t), intent(out) :: error end subroutine parse_xtb_section module subroutine parse_system_section(unit, config, error) implicit none integer, intent(in) :: unit type(mqc_config_t), intent(inout) :: config type(error_t), intent(out) :: error end subroutine parse_system_section ! Structure & geometry parsers (mqc_config_parser_structure) module subroutine parse_structure_section(unit, config, error) implicit none integer, intent(in) :: unit type(mqc_config_t), intent(inout) :: config type(error_t), intent(out) :: error end subroutine parse_structure_section module subroutine parse_geometry_section(unit, config, error) implicit none integer, intent(in) :: unit type(mqc_config_t), intent(inout) :: config type(error_t), intent(out) :: error end subroutine parse_geometry_section module subroutine parse_structure_generic(unit, charge, multiplicity, error) implicit none integer, intent(in) :: unit integer, intent(inout) :: charge, multiplicity type(error_t), intent(out) :: error end subroutine parse_structure_generic module subroutine parse_geometry_generic(unit, geom, error) use mqc_geometry, only: geometry_type implicit none integer, intent(in) :: unit type(geometry_type), intent(inout) :: geom type(error_t), intent(out) :: error end subroutine parse_geometry_generic ! Fragments & connectivity parsers (mqc_config_parser_fragments) module subroutine parse_fragments_section(unit, config, error) implicit none integer, intent(in) :: unit type(mqc_config_t), intent(inout) :: config type(error_t), intent(out) :: error end subroutine parse_fragments_section module subroutine parse_connectivity_section(unit, config, error) implicit none integer, intent(in) :: unit type(mqc_config_t), intent(inout) :: config type(error_t), intent(out) :: error end subroutine parse_connectivity_section module subroutine parse_fragments_generic(unit, nfrag, fragments, error) implicit none integer, intent(in) :: unit integer, intent(inout) :: nfrag type(input_fragment_t), allocatable, intent(inout) :: fragments(:) type(error_t), intent(out) :: error end subroutine parse_fragments_generic module subroutine parse_connectivity_generic(unit, nbonds, nbroken, bonds, error) use mqc_physical_fragment, only: bond_t implicit none integer, intent(in) :: unit integer, intent(inout) :: nbonds, nbroken type(bond_t), allocatable, intent(inout) :: bonds(:) type(error_t), intent(out) :: error end subroutine parse_connectivity_generic ! Calculation settings parsers (mqc_config_parser_calc_settings) module subroutine parse_hessian_section(unit, config, error) implicit none integer, intent(in) :: unit type(mqc_config_t), intent(inout) :: config type(error_t), intent(out) :: error end subroutine parse_hessian_section module subroutine parse_aimd_section(unit, config, error) implicit none integer, intent(in) :: unit type(mqc_config_t), intent(inout) :: config type(error_t), intent(out) :: error end subroutine parse_aimd_section module subroutine parse_fragmentation_section(unit, config, error) implicit none integer, intent(in) :: unit type(mqc_config_t), intent(inout) :: config type(error_t), intent(out) :: error end subroutine parse_fragmentation_section ! Molecules section parser (mqc_config_parser_molecules) module subroutine parse_molecules_section(unit, config, error) implicit none integer, intent(in) :: unit type(mqc_config_t), intent(inout) :: config type(error_t), intent(out) :: error end subroutine parse_molecules_section end interface contains pure function strip_comment(line) result(stripped) !! Remove comments (! or #) from a line and trim result character(len=*), intent(in) :: line character(len=:), allocatable :: stripped integer :: comment_pos ! Find first occurrence of ! or # comment_pos = index(line, '!') if (comment_pos == 0) comment_pos = index(line, '#') if (comment_pos > 0) then ! Comment found - take everything before it stripped = trim(adjustl(line(1:comment_pos - 1))) else ! No comment - use full line stripped = trim(adjustl(line)) end if end function strip_comment subroutine skip_to_end(unit, error) !! Skip lines until 'end' marker is found integer, intent(in) :: unit type(error_t), intent(out) :: error character(len=MAX_LINE_LEN) :: line integer :: io_stat do read (unit, '(A)', iostat=io_stat) line if (io_stat /= 0) then call error%set(ERROR_IO, "Unexpected end of file while skipping section") return end if line = adjustl(line) if (trim(strip_comment(line)) == 'end') exit end do end subroutine skip_to_end function parse_method_string(method_str) result(method_type) !! Parse method string from input file (e.g., "XTB-GFN1" -> gfn1) character(len=*), intent(in) :: method_str integer(int32) :: method_type character(len=:), allocatable :: lower_str, method_part integer :: dash_pos, i ! Convert to lowercase allocate (character(len=len_trim(method_str)) :: lower_str) lower_str = trim(adjustl(method_str)) do i = 1, len(lower_str) if (lower_str(i:i) >= 'A' .and. lower_str(i:i) <= 'Z') then lower_str(i:i) = achar(iachar(lower_str(i:i)) + 32) end if end do ! Handle "XTB-GFN1" format -> extract "gfn1" if (index(lower_str, 'xtb') > 0) then dash_pos = index(lower_str, '-') if (dash_pos > 0) then method_part = lower_str(dash_pos + 1:) else method_part = lower_str end if else method_part = lower_str end if method_type = method_type_from_string(method_part) end function parse_method_string subroutine read_mqc_file(filename, config, error) !! Read and parse a .mqc format input file character(len=*), intent(in) :: filename type(mqc_config_t), intent(out) :: config type(error_t), intent(out) :: error integer :: unit, io_stat character(len=MAX_LINE_LEN) :: line logical :: file_exists type(error_t) :: parse_error inquire (file=filename, exist=file_exists) if (.not. file_exists) then call error%set(ERROR_IO, "Input file not found: "//trim(filename)) return end if open (newunit=unit, file=filename, status='old', action='read', iostat=io_stat) if (io_stat /= 0) then call error%set(ERROR_IO, "Error opening input file: "//trim(filename)) return end if ! Set defaults config%log_level = "info" ! Read file line by line and dispatch to section parsers do read (unit, '(A)', iostat=io_stat) line if (io_stat /= 0) exit line = adjustl(line) if (len_trim(line) == 0) cycle if (line(1:1) == '#' .or. line(1:1) == '!') cycle ! Check for section start if (line(1:1) == '%') then select case (trim(line)) case ('%schema') call parse_schema_section(unit, config, parse_error) case ('%model') call parse_model_section(unit, config, parse_error) case ('%driver') call parse_driver_section(unit, config, parse_error) case ('%structure') call parse_structure_section(unit, config, parse_error) case ('%geometry') call parse_geometry_section(unit, config, parse_error) case ('%fragments') call parse_fragments_section(unit, config, parse_error) case ('%connectivity') call parse_connectivity_section(unit, config, parse_error) case ('%scf') call parse_scf_section(unit, config, parse_error) case ('%xtb') call parse_xtb_section(unit, config, parse_error) case ('%hessian') call parse_hessian_section(unit, config, parse_error) case ('%aimd') call parse_aimd_section(unit, config, parse_error) case ('%fragmentation') call parse_fragmentation_section(unit, config, parse_error) case ('%system') call parse_system_section(unit, config, parse_error) case ('%molecules') call parse_molecules_section(unit, config, parse_error) case default ! Skip unknown sections call skip_to_end(unit, parse_error) end select if (parse_error%has_error()) then error = parse_error call error%add_context("mqc_config_parser:read_mqc_file") close (unit) return end if end if end do close (unit) ! Validate required fields if (.not. allocated(config%schema_name)) then call error%set(ERROR_VALIDATION, "Missing required section: %schema") return end if ! Validate geometry: required for single-molecule mode, not for multi-molecule mode if (config%nmol == 0) then ! Single molecule mode: require top-level geometry if (.not. allocated(config%geometry%coords) .or. config%geometry%natoms == 0) then call error%set(ERROR_VALIDATION, "Missing required section: %geometry") return end if else ! Multi-molecule mode: each molecule must have geometry (validated during parsing) ! No additional validation needed here end if end subroutine read_mqc_file subroutine molecule_destroy(this) !! Clean up allocated memory in molecule_t class(molecule_t), intent(inout) :: this integer :: i if (allocated(this%name)) deallocate (this%name) call this%geometry%destroy() if (allocated(this%fragments)) then do i = 1, size(this%fragments) call this%fragments(i)%destroy() end do deallocate (this%fragments) end if if (allocated(this%bonds)) deallocate (this%bonds) end subroutine molecule_destroy subroutine config_destroy(this) !! Clean up allocated memory in mqc_config_t class(mqc_config_t), intent(inout) :: this integer :: i if (allocated(this%schema_name)) deallocate (this%schema_name) if (allocated(this%schema_version)) deallocate (this%schema_version) if (allocated(this%units)) deallocate (this%units) if (allocated(this%basis)) deallocate (this%basis) if (allocated(this%aux_basis)) deallocate (this%aux_basis) if (allocated(this%log_level)) deallocate (this%log_level) if (allocated(this%frag_method)) deallocate (this%frag_method) if (allocated(this%embedding)) deallocate (this%embedding) if (allocated(this%cutoff_method)) deallocate (this%cutoff_method) if (allocated(this%distance_metric)) deallocate (this%distance_metric) if (allocated(this%fragment_cutoffs)) deallocate (this%fragment_cutoffs) call this%geometry%destroy() if (allocated(this%fragments)) then do i = 1, size(this%fragments) call this%fragments(i)%destroy() end do deallocate (this%fragments) end if if (allocated(this%bonds)) deallocate (this%bonds) ! Clean up molecules array (multi-molecule mode) if (allocated(this%molecules)) then do i = 1, size(this%molecules) call this%molecules(i)%destroy() end do deallocate (this%molecules) end if end subroutine config_destroy subroutine input_fragment_destroy(this) !! Clean up allocated memory in input_fragment_t class(input_fragment_t), intent(inout) :: this if (allocated(this%indices)) deallocate (this%indices) end subroutine input_fragment_destroy end module mqc_config_parser