diff --git a/src/fpm.f90 b/src/fpm.f90 index 561fb4e5e9..03211ae594 100644 --- a/src/fpm.f90 +++ b/src/fpm.f90 @@ -9,6 +9,7 @@ module fpm use fpm_dependency, only : new_dependency_tree use fpm_filesystem, only: is_dir, join_path, list_files, exists, & basename, filewrite, mkdir, run, os_delete_dir +use fpm_lock, only: fpm_lock_acquire, fpm_lock_release use fpm_model, only: fpm_model_t, srcfile_t, show_model, fortran_features_t, & FPM_SCOPE_UNKNOWN, FPM_SCOPE_LIB, FPM_SCOPE_DEP, & FPM_SCOPE_APP, FPM_SCOPE_EXAMPLE, FPM_SCOPE_TEST @@ -436,6 +437,11 @@ subroutine cmd_build(settings) integer :: i +call fpm_lock_acquire(error) +if (allocated(error)) then + call fpm_stop(1, '*cmd_build* Lock error: '//error%message) +end if + call get_package_data(package, "fpm.toml", error, apply_defaults=.true.) if (allocated(error)) then call fpm_stop(1,'*cmd_build* Package error: '//error%message) @@ -467,6 +473,11 @@ subroutine cmd_build(settings) call build_package(targets,model,verbose=settings%verbose) endif +call fpm_lock_release(error) +if (allocated(error)) then + call fpm_stop(1, '*cmd_build* Lock error: '//error%message) +end if + end subroutine cmd_build subroutine cmd_run(settings,test) @@ -487,6 +498,11 @@ subroutine cmd_run(settings,test) integer, allocatable :: stat(:),target_ID(:) character(len=:),allocatable :: line + call fpm_lock_acquire(error) + if (allocated(error)) then + call fpm_stop(1, '*cmd_run* Lock error: '//error%message) + end if + call get_package_data(package, "fpm.toml", error, apply_defaults=.true.) if (allocated(error)) then call fpm_stop(1, '*cmd_run* Package error: '//error%message) @@ -616,6 +632,11 @@ subroutine cmd_run(settings,test) end if + call fpm_lock_release(error) + if (allocated(error)) then + call fpm_stop(1, '*cmd_run* Lock error: '//error%message) + end if + contains subroutine compact_list_all() @@ -684,6 +705,11 @@ subroutine cmd_clean(settings) type(fpm_global_settings) :: global_settings type(error_t), allocatable :: error + call fpm_lock_acquire(error) + if (allocated(error)) then + call fpm_stop(1, '*cmd_clean* Lock error: '//error%message) + end if + ! Clear registry cache if (settings%registry_cache) then call get_global_settings(global_settings, error) @@ -708,6 +734,11 @@ subroutine cmd_clean(settings) else write (stdout, '(A)') "fpm: No build directory found." end if + + call fpm_lock_release(error) + if (allocated(error)) then + call fpm_stop(1, '*cmd_clean* Lock error: '//error%message) + end if end subroutine cmd_clean !> Sort executables by namelist ID, and trim unused values diff --git a/src/fpm/cmd/export.f90 b/src/fpm/cmd/export.f90 index d2ec0dbaf1..a3885f8ce4 100644 --- a/src/fpm/cmd/export.f90 +++ b/src/fpm/cmd/export.f90 @@ -3,6 +3,7 @@ module fpm_cmd_export use fpm_dependency, only : dependency_tree_t, new_dependency_tree use fpm_error, only : error_t, fpm_stop use fpm_filesystem, only : join_path + use fpm_lock, only : fpm_lock_acquire, fpm_lock_release use fpm_manifest, only : package_config_t, get_package_data use fpm_toml, only: name_is_json use fpm_model, only: fpm_model_t @@ -25,6 +26,9 @@ subroutine cmd_export(settings) integer :: ii character(len=:), allocatable :: filename + call fpm_lock_acquire(error) + call handle_error(error) + if (len_trim(settings%dump_manifest)<=0 .and. & len_trim(settings%dump_model)<=0 .and. & len_trim(settings%dump_dependencies)<=0) then @@ -69,6 +73,9 @@ subroutine cmd_export(settings) call handle_error(error) end if + call fpm_lock_release(error) + call handle_error(error) + end subroutine cmd_export !> Error handling for this command diff --git a/src/fpm/cmd/install.f90 b/src/fpm/cmd/install.f90 index 559cd81b55..26654749b6 100644 --- a/src/fpm/cmd/install.f90 +++ b/src/fpm/cmd/install.f90 @@ -5,6 +5,7 @@ module fpm_cmd_install use fpm_command_line, only : fpm_install_settings use fpm_error, only : error_t, fatal_error, fpm_stop use fpm_filesystem, only : join_path, list_files + use fpm_lock, only : fpm_lock_acquire, fpm_lock_release use fpm_installer, only : installer_t, new_installer use fpm_manifest, only : package_config_t, get_package_data use fpm_model, only : fpm_model_t, FPM_SCOPE_APP, FPM_SCOPE_TEST @@ -32,6 +33,9 @@ subroutine cmd_install(settings) logical :: installable integer :: ntargets + call fpm_lock_acquire(error) + call handle_error(error) + call get_package_data(package, "fpm.toml", error, apply_defaults=.true.) call handle_error(error) @@ -85,6 +89,9 @@ subroutine cmd_install(settings) end if + call fpm_lock_release(error) + call handle_error(error) + end subroutine cmd_install subroutine install_info(unit, verbose, targets, ntargets) diff --git a/src/fpm/cmd/publish.f90 b/src/fpm/cmd/publish.f90 index 225e83f923..1ceb28d28a 100644 --- a/src/fpm/cmd/publish.f90 +++ b/src/fpm/cmd/publish.f90 @@ -4,6 +4,7 @@ !> The token can be obtained from the registry website. It can be used as `fpm publish --token `. module fpm_cmd_publish use fpm_command_line, only: fpm_publish_settings + use fpm_lock, only : fpm_lock_acquire, fpm_lock_release use fpm_manifest, only: package_config_t, get_package_data use fpm_model, only: fpm_model_t use fpm_error, only: error_t, fpm_stop @@ -35,6 +36,11 @@ subroutine cmd_publish(settings) type(downloader_t) :: downloader integer :: i + call fpm_lock_acquire(error) + if (allocated(error)) then + call fpm_stop(1, '*cmd_publish* Lock error: '//error%message) + end if + ! Get package data to determine package version. call get_package_data(package, 'fpm.toml', error, apply_defaults=.true.) if (allocated(error)) call fpm_stop(1, '*cmd_build* Package error: '//error%message) @@ -106,6 +112,12 @@ subroutine cmd_publish(settings) call downloader%upload_form(official_registry_base_url//'/packages', upload_data, settings%verbose, error) call delete_file(tmp_file) if (allocated(error)) call fpm_stop(1, '*cmd_publish* Upload error: '//error%message) + + call fpm_lock_release(error) + if (allocated(error)) then + call fpm_stop(1, '*cmd_publish* Lock error: '//error%message) + end if + end subroutine print_upload_data(upload_data) diff --git a/src/fpm/cmd/update.f90 b/src/fpm/cmd/update.f90 index c1f09e07c6..1d9dc90e58 100644 --- a/src/fpm/cmd/update.f90 +++ b/src/fpm/cmd/update.f90 @@ -3,6 +3,7 @@ module fpm_cmd_update use fpm_dependency, only : dependency_tree_t, new_dependency_tree use fpm_error, only : error_t, fpm_stop use fpm_filesystem, only : exists, mkdir, join_path, delete_file, filewrite + use fpm_lock, only : fpm_lock_acquire, fpm_lock_release use fpm_manifest, only : package_config_t, get_package_data use fpm_toml, only: name_is_json implicit none @@ -22,6 +23,9 @@ subroutine cmd_update(settings) integer :: ii character(len=:), allocatable :: cache + call fpm_lock_acquire(error) + call handle_error(error) + call get_package_data(package, "fpm.toml", error, apply_defaults=.true.) call handle_error(error) @@ -63,6 +67,9 @@ subroutine cmd_update(settings) call handle_error(error) end if + call fpm_lock_release(error) + call handle_error(error) + end subroutine cmd_update !> Error handling for this command diff --git a/src/fpm_lock.f90 b/src/fpm_lock.f90 new file mode 100644 index 0000000000..9a5cf8d3f9 --- /dev/null +++ b/src/fpm_lock.f90 @@ -0,0 +1,273 @@ +!> Lock package directories before working on them. +!> +!># Synopsis +!> +!> Use the functions [[fpm_lock_acquire]] and [[fpm_lock_release]] to "lock" a +!> `fpm` package directory to prevent issues when multiple `fpm` process want +!> to work on the same package at the same time. Here's an example of how this +!> module is used in the rest of the codebase: +!> +!>```fortran +!> !> Entry point for the update subcommand +!>subroutine cmd_update(settings) +!> type(error_t), allocatable :: error +!> fpm_lock_acquire(error) +!> ! Do things here +!> fpm_lock_release(error) +!>end subroutine cmd_update +!>``` +!> +!># Background +!> +!> This module exists to fix a buggy behavior that exists in many package +!> managers (however, most users never experience issues with it). +!> +!> The buggy behaviors is that when many `fpm` processes try to work on the same +!> package at the same time the different processes sort of step on one another +!> an it leads to problems, for instance two processes might try to compile the +!> same file at the same time. +!> +!> Also see this issue: +!> [https://github.com/fortran-lang/fpm/issues/957](https://github.com/fortran-lang/fpm/issues/957) +!> for some +!> more details. +!> +!> What we need is for an `fpm` process \(A\) to see if another `fpm` process +!> \(B\) is already working on a package, and if so, wait for \(B\) to finish +!> that work before \(A\) steps in. The way we do this is with so-called +!> *lock-files*. Basically \(B\) creates a special file named +!> `.fpm-package-lock` in the package directory so that \(A\) will see that this +!> file exists and wait for it to be deleted by \(B\), when that is done it +!> means that the package directory is free, and so \(A\) now creates +!> `.fpm-package-lock` itself and does it's thing, after \(A\) is done it +!> deletes the lock-file again. +!> +!> That's pretty much the gist of it. It's complicated somewhat by the fact that +!> we need to consider certain rare cases (what if the program crashes and +!> leaves the lock-file behind for instance). Also, the lock-file operations +!> have to be what's called "atomic". For instance, consider this non-atomic way +!> of creating a lock-file: (in pseudocode) +!>``` +!>1) if file_exists('.fpm-package-lock') then +!> wait_for_file_to_be_deleted('.fpm-package-lock') +!>2) create_file('.fpm-package-lock') +!>3) do_something() +!>4) delete_file('.fpm-package-lock') +!>``` +!> The problem with this code is that `.fpm-packge-lock` may be created by some +!> other process after the check on line (1), but before line (2) has executed, +!> and then it's not very clear what will happen, both processes might think +!> that they are have acquired a lock on the package directory. A better piece +!> of code could be: +!>``` +!>error = create_file('.fpm-package-lock') +!>if error == ALREADY_EXISTS then +!> create_this_file_again_after_deletion('.fpm-package-lock') +!>do_something() +!>delete_file('.fpm-package-lock') +!>``` + +! IMPLEMENTATION NOTES(@emmabastas) +! +! There are many ways to lock a directory, and the approach we're using here +! is maybe the simplest: If `.fpm-package-lock` exists in the package directory +! then the directory is locked, if we manage to create `.fpm-package-lock` then +! we have the lock. +! +! The problem with this approach is that if `fpm` doesn't terminate normally, or +! maybe if there's some bug we might leave the lock-file behind, and later fpm +! process might wait indefinitely for the package to be unlocked with no way +! of knowing that the file was left behind by accident. +! +! The approach taken here is to simply print a warning/info message to the user +! about the lock-file, and that they can remove it manually if they suspect it's +! been left behind by accident, this is how `git` does it. +! +! (RANT STARTS HERE) +! +! A common attempt at improving this situation might be to write the PID + +! process start time into the lock-file, that way other processes can verify +! that the lock-file wasn't left behind on accident. However this adds quite a +! bit of complexity to the code, and it is very difficult / maybe even +! impossible to do without race-conditions: There is no atomic way to tell the +! OS to: +! > Open this file for reading with a shared lock in case it exists and +! > if it doesn't exists then create the file with for writing with an +! > exclusive lock. +! Even if we we're able to do it there is a conceptual flaw: Distributed file +! systems. If the package lives on another machine that you're accessing through +! something like NFS then you'll end up writing your PID to a file living on +! another machine that's not running the process, and so processes on that +! machine can't know whether the lock is valid or not. +! +! Now we might turn to actual OS file-locking primitives such as `fcntl` on UNIX +! and `LockFile` on Windows. This is again a step-up in complexity, and I don't +! know about `LockFile` but `fcntl` is fraught with problems: +! https://chris.improbable.org/2010/12/16/everything-you-never-wanted-to-know-about-file-locking/ +! +! My conclusion is that anything more advanced than the current implementation +! might just not be worth it, but I'm happy to be proven wrong! :-) + +module fpm_lock + +use :: fpm_error, only : error_t, fatal_error +use :: fpm_os, only : get_current_directory +use :: fpm_filesystem, only : join_path, delete_file +use, intrinsic :: iso_fortran_env, only : stderr => error_unit +use iso_c_binding, only : c_funptr, c_funloc + + +implicit none +private +public :: fpm_lock_acquire, fpm_lock_acquire_noblock, fpm_lock_release + +interface + ! atexit is a standard C90 function. + subroutine atexit(fptr) bind(c, name='atexit') + import c_funptr + type(c_funptr), value :: fptr + end subroutine atexit +end interface + +contains + +! This routine is called when fpm terminates normally and is used to remove +! the .fpm-package-lock in case we created it. +! +! Of note is that this only works when fpm is terminated "normally", meaning +! if a user manually kills the process this function won't be called. +subroutine atexit_cleanup() + type(error_t), allocatable :: error + call fpm_lock_release(error) + ! If there is an error there isn't all that much for us to do, we're exiting + ! the program after all. +end subroutine atexit_cleanup + +!> Like [[fpm_lock_acquire]] but it some other process already has a lock it +!> returns immediately instead of waiting indefinitely. +subroutine fpm_lock_acquire_noblock(error, success) + !> Error handling + type(error_t), allocatable, intent(out) :: error + + !> `.true.` if a package lock was acquired, `.false.` otherwise. + logical, optional, intent(out) :: success + + ! unit for open lock-file. + integer :: lock_unit + + ! Error status and message. + integer :: iostat + character(len=256) :: iomsg + + open(file='.fpm-package-lock', & + action='read', & + status='new', & + newunit=lock_unit, & + iostat=iostat, & + iomsg=iomsg) + + ! If there was an error we asume it's because the lock-file already exists + ! (but there could be other reasons too) + if (iostat /= 0) then + if (present(success)) success = .false. + return + end if + + ! At this point we have the lock. + + ! Close the unit without removing the lock-file + close(unit=lock_unit, & + status='keep', & + iostat=iostat) + + if (iostat /= 0) then + if (present(success)) success = .false. + call fatal_error(error, "Error closing lock-file") + return + end if + + if (present(success)) success = .true. + + ! Setup the atexit handler + !call atexit(c_funloc(atexit_cleanup)) +end subroutine fpm_lock_acquire_noblock + +!> Try to acquire a lock on the current package directory. If some other process +!> already has a lock this function blocks until it can get the lock. +!> @note +!> You cannot use this function multiple times without calling +!> [[fpm_lock_release]] first. +!> @endnote +subroutine fpm_lock_acquire(error) + !> Error handling + type(error_t), allocatable, intent(out) :: error + + logical :: got_lock + + character(len=:), allocatable :: cwd + character(len=:), allocatable :: lockfile_path + + call get_current_directory(cwd, error) + if (allocated(error)) return + + lockfile_path = join_path(cwd, '.fpm-pakage-lock') + + call fpm_lock_acquire_noblock(error, success=got_lock) + if (allocated(error)) return + + if (.not. got_lock) then + write(stderr, *) "Warning: file "//lockfile_path//" exists." + write(stderr, *) "" + write(stderr, *) "Another process seams to be working on this package" + write(stderr, *) "already and this process will wait for .fpm-package-lock" + write(stderr, *) "to be removed before proceeding. If you think that a" + write(stderr, *) "previous process crashed/terminated without removing" + write(stderr, *) ".fpm-package-lock then you can try removing it manually." + write(stderr, *) + write(stderr, *) "If the problem persists then please file a bug report" + write(stderr, *) "at https://github.com/fortran-lang/fpm/issues" + end if + + do while (.not. got_lock) + call sleep(1) ! not very sophisticated but it works :-) + call fpm_lock_acquire_noblock(error, success=got_lock) + if (allocated(error)) return + end do +end subroutine fpm_lock_acquire + +!> Release a lock on the current package directory +!> @note +!> You can only release a lock if you acquired it with [[fpm_lock_acquire]] +!> first. +!> @endnote +subroutine fpm_lock_release(error) + !> Error handling + type(error_t), allocatable, intent(out) :: error + + integer :: unit + integer :: iostat + character(len=256) :: iomsg + + open(file='.fpm-package-lock', & + action='read', & + status='old', & + newunit=unit, & + iostat=iostat, & + iomsg=iomsg) + + if (iostat /= 0) then + call fatal_error(error, "Error opening lock-file for deletion: "//iomsg) + return + end if + + close(unit=unit, & + status='delete', & + iostat=iostat) + + if (iostat /= 0) then + call fatal_error(error, "Error closing lock-file") + return + end if +end subroutine fpm_lock_release + +end module fpm_lock diff --git a/test/fpm_test/main.f90 b/test/fpm_test/main.f90 index d272761f93..895d8a9aef 100644 --- a/test/fpm_test/main.f90 +++ b/test/fpm_test/main.f90 @@ -6,6 +6,7 @@ program fpm_testing use test_compiler, only : collect_compiler use test_manifest, only : collect_manifest use test_filesystem, only : collect_filesystem + use test_lock, only : collect_lock use test_source_parsing, only : collect_source_parsing use test_module_dependencies, only : collect_module_dependencies use test_package_dependencies, only : collect_package_dependencies @@ -27,6 +28,7 @@ program fpm_testing & new_testsuite("fpm_toml", collect_toml), & & new_testsuite("fpm_manifest", collect_manifest), & & new_testsuite("fpm_filesystem", collect_filesystem), & + & new_testsuite("fpm_lock", collect_lock), & & new_testsuite("fpm_source_parsing", collect_source_parsing), & & new_testsuite("fpm_module_dependencies", collect_module_dependencies), & & new_testsuite("fpm_package_dependencies", collect_package_dependencies), & diff --git a/test/fpm_test/test_lock.f90 b/test/fpm_test/test_lock.f90 new file mode 100644 index 0000000000..864c3da034 --- /dev/null +++ b/test/fpm_test/test_lock.f90 @@ -0,0 +1,218 @@ +module test_lock + + use testsuite, only : new_unittest, unittest_t, test_failed + use fpm_error, only : error_t, fatal_error + use fpm_filesystem, only : run, exists + use fpm_lock, only : fpm_lock_acquire, fpm_lock_acquire_noblock, & + fpm_lock_release + + implicit none + private + public :: collect_lock + +contains + + !> Collect unit tests. + subroutine collect_lock(tests) + + !> Unit tests to collect. + type(unittest_t), allocatable, intent(out) :: tests(:) + + tests = [ & + & new_unittest('acquire-leaves-lockfile', acquire_leaves_lockfile), & + & new_unittest('aquire-release-leaves-nothing', acquire_release_leaves_nothing), & + & new_unittest('acquire-release-acquire-release', acquire_release_acquire_release), & + & new_unittest('double-acquire', double_acquire), & + & new_unittest('release', release, should_fail=.true.), & + & new_unittest('acquire-release-release', acquire_release_release, should_fail=.true.), & + & new_unittest('acquire-existing-lockfile-valid', acquire_existing_lockfile_valid), & + & new_unittest('acquire-blocks', acquire_blocks), & + & new_unittest('release-rouge-remove', release_rouge_remove) & + ] + end subroutine collect_lock + + !> Setup before each unittest + subroutine setup() + call run ('rm -f .fpm-package-lock') + end subroutine setup + + !> Cleanup after each unit test + subroutine cleanup() + call run ('touch .fpm-package-lock') + end subroutine cleanup + + !> Helper function to acquire a lock, and if that fails an error is raised. + subroutine acquire_lock(error) + type(error_t), allocatable, intent(out) :: error + logical :: success + + call fpm_lock_acquire_noblock(error, success) + if (allocated(error)) return + if (.not. success) then + call test_failed(error, "lock-file acquire failed") + end if + end subroutine acquire_lock + + !> A simple fpm_lock_acquire_noblock creates a lock-file. + subroutine acquire_leaves_lockfile(error) + type(error_t), allocatable, intent(out) :: error + logical :: success + + call setup() + + call acquire_lock(error) + if (allocated(error)) return + if (.not. exists('.fpm-package-lock')) then + call test_failed(error, "lock-file wasn't created") + end if + + call cleanup() + + end subroutine acquire_leaves_lockfile + + !> fpm_lock_release removes the lock-file. + subroutine acquire_release_leaves_nothing(error) + type(error_t), allocatable, intent(out) :: error + logical :: success + + call setup() + + call acquire_lock(error) + if (allocated(error)) return + + call fpm_lock_release(error) + if (allocated(error)) return + if (exists('.fpm-package-lock')) then + call test_failed(error, "lock-file wasn't removed") + end if + + call cleanup() + end subroutine acquire_release_leaves_nothing + + !> subsequent locks and releases work. + subroutine acquire_release_acquire_release (error) + type(error_t), allocatable, intent(out) :: error + + call setup() + + call acquire_lock(error) + if (allocated(error)) return + + call fpm_lock_release(error) + if (allocated(error)) return + + call acquire_lock(error) + if (allocated(error)) return + + call fpm_lock_release(error) + if (allocated(error)) return + + call cleanup() + end subroutine acquire_release_acquire_release + + !> Double acquire should not work the second time. + subroutine double_acquire(error) + type(error_t), allocatable, intent(out) :: error + logical :: success + + call setup() + + call fpm_lock_acquire_noblock(error) + if (allocated(error)) return + + call fpm_lock_acquire_noblock(error, success) + if (allocated(error)) return + + if (success) then + call test_failed(error, "Expected lock to not succeed.") + end if + + call cleanup() + end subroutine double_acquire + + !> Release without acquire should cause an error. + subroutine release(error) + type(error_t), allocatable, intent(out) :: error + + call setup() + + call fpm_lock_release(error) + + call cleanup() + end subroutine release + + !> One release to much should cause and error + subroutine acquire_release_release(error) + type(error_t), allocatable, intent(out) :: error + + call setup() + + call fpm_lock_acquire_noblock(error) + call fpm_lock_release(error) + call fpm_lock_release(error) + end subroutine acquire_release_release + + !> If a lock-file already exists then we shoudln't acquire a lock. + subroutine acquire_existing_lockfile_valid(error) + type(error_t), allocatable, intent(out) :: error + logical :: success + + call setup() + + ! Some other process acquires a lock. + call run('touch .fpm-package-lock') + + ! We expect this to not succeed, (but no errors should be raised). + call fpm_lock_acquire_noblock(error, success=success) + if (allocated(error)) return + if (success) then + call test_failed(error, "Expected package lock to fail") + end if + + call cleanup() + end subroutine acquire_existing_lockfile_valid + + !> A blocking acquire should resume when the lock-file is deleted. + subroutine acquire_blocks(error) + type(error_t), allocatable, intent(out) :: error + + call setup() + + ! Some other process acquires a lock to work on the package briefly. + call run('touch .fpm-package-lock') + call run('sleep 0.5 && rm .fpm-package-lock &') + + ! Our blocking acquire should wait for a bit and then go through + call fpm_lock_acquire(error) + if (allocated(error)) return + + call cleanup() + end subroutine + + !> If some other process removes our lock-file then fpm_lock_release should + !> give an error. + subroutine release_rouge_remove(error) + type(error_t), allocatable, intent(out) :: error + type(error_t), allocatable :: dummy_error + logical success + + call setup() + + call fpm_lock_acquire_noblock(error, success) + if (allocated(error)) return + if (.not. success) then + call test_failed(error, "lock-file acquire failed") + end if + + ! Some rouge process removes the lock-file + call run('rm .fpm-package-lock') + + call fpm_lock_release(dummy_error) + if (.not. allocated(dummy_error)) then + call test_failed(error, & + "Expected fpm_lock_release to fail, but it succeeded") + end if + + call cleanup() + end subroutine +end module test_lock