From a93a435bca563632142822b91253dd37f15be75a Mon Sep 17 00:00:00 2001 From: Dale Roberts Date: Tue, 14 Apr 2026 14:16:58 +1000 Subject: [PATCH] Memory and performance improvements - Matrix class: packed/symmetric storage, attach-to-mmap, calloc allocator - LM solver + Anderson acceleration CLI - Static build fixes (glibc 2.34+ pthread) - Matrix test suite --- dynadjust/CMakeLists.txt | 56 +- .../dynadjust/dnaadjust/dnaadjust-multi.cpp | 13 +- .../dynadjust/dnaadjust/dnaadjust-stage.cpp | 118 +- dynadjust/dynadjust/dnaadjust/dnaadjust.cpp | 1507 +++++++++++++---- dynadjust/dynadjust/dnaadjust/dnaadjust.hpp | 152 +- .../dnaadjustwrapper/dnaadjustprogress.cpp | 25 +- .../dnaadjustwrapper/dnaadjustwrapper.cpp | 190 ++- .../include/config/dnaoptions-interface.hpp | 17 + dynadjust/include/config/dnaoptions.hpp | 25 +- .../functions/dnatemplatematrixfuncs.hpp | 4 +- .../include/math/dnamatrix_contiguous.cpp | 791 ++++++++- .../include/math/dnamatrix_contiguous.hpp | 183 +- dynadjust/include/memory/dnafile_mapping.cpp | 78 +- dynadjust/include/memory/dnafile_mapping.hpp | 14 +- tests/test_matrix.cpp | 945 +++++++++++ 15 files changed, 3546 insertions(+), 572 deletions(-) diff --git a/dynadjust/CMakeLists.txt b/dynadjust/CMakeLists.txt index c6d523264..b3bdb844a 100644 --- a/dynadjust/CMakeLists.txt +++ b/dynadjust/CMakeLists.txt @@ -195,7 +195,7 @@ else() endif() message(STATUS "Found XercesC (${XercesC_INCLUDE_DIRS} ${XercesC_LIBRARIES})") -# Static builds need ICU for Xerces transcoding (glibc iconv can't dlopen gconv modules in static binaries) +# Static builds need ICU for Xerces transcoding (glibc iconv can't dlopen gconv modules in static binaries). if(BUILD_STATIC AND UNIX AND NOT APPLE) find_package(ICU COMPONENTS uc data) if(ICU_FOUND) @@ -1132,6 +1132,53 @@ if (BUILD_TESTING) add_test (NAME test-urban-phased-network COMMAND $ urban.phased.adj urban.phased.adj.expected --skip-to-marker "M Station 1" -t 0.001) add_test (NAME test-urban-thread-network COMMAND $ urban_mt.phased-mt.adj urban_mt.phased-mt.adj.expected --skip-to-marker "M Station 1" -t 0.01 -v) + # ........................................................................ + # LM / Trust-Region solver tests + # ........................................................................ + + # LM-1. GNSS simultaneous with LM — should match GN results + add_test (NAME import-gnss-lm COMMAND $ -n gnss_lm gnss-network.stn gnss-network.msr -r GDA2020 --flag-unused-stations --quiet) + add_test (NAME geoid-gnss-lm COMMAND $ gnss_lm -g gnss-network-geoid.gsb --export-dna-geo) + add_test (NAME reftran-gnss-lm COMMAND $ gnss_lm --export-dna --export-xml) + add_test (NAME adjust-gnss-lm COMMAND $ gnss_lm --output-adj-msr --scale-normals-to-unity --lm-enabled) + add_test (NAME test-gnss-lm-vs-expected COMMAND $ gnss_lm.simult.adj gnss.simult.adj.expected --skip-headers 52 -t 0.001) + + # LM-2. Urban phased with LM — should match GN results + add_test (NAME import-urban-lm COMMAND $ -n urban_lm urban-network.stn urban-network.msr --flag-unused-stations --quiet) + add_test (NAME geoid-urban-lm COMMAND $ urban_lm -g urban-network-geoid.gsb --export-dna-geo) + add_test (NAME segment-urban-lm COMMAND $ urban_lm --min 50 --max 150) + add_test (NAME adjust-urban-lm COMMAND $ urban_lm --output-adj-msr --phased --lm-enabled) + add_test (NAME test-urban-lm-vs-expected COMMAND $ urban_lm.phased.adj urban.phased.adj.expected --skip-to-marker "M Station 1" -t 0.001) + + # LM-3. Urban multi-threaded phased with LM — should match GN results + add_test (NAME import-urban-mt-lm COMMAND $ -n urban_mt_lm urban-network.stn urban-network.msr --quiet) + add_test (NAME reftran-urban-mt-lm COMMAND $ urban_mt_lm -r gda2020) + add_test (NAME geoid-urban-mt-lm COMMAND $ urban_mt_lm -g urban-network-geoid.gsb) + add_test (NAME segment-urban-mt-lm COMMAND $ urban_mt_lm --min 50 --max 85) + add_test (NAME adjust-urban-mt-lm COMMAND $ urban_mt_lm --output-adj-msr --multi --lm-enabled) + add_test (NAME test-urban-mt-lm-vs-expected COMMAND $ urban_mt_lm.phased-mt.adj urban_mt.phased-mt.adj.expected --skip-to-marker "M Station 1" -t 0.01) + + # LM-4. DSG distance-only simultaneous — multi-iteration stress test + add_test (NAME import-dsg-lm COMMAND $ -n dsg_lm dsg.stn dsg.msr --flag-unused-stations --quiet) + add_test (NAME adjust-dsg-lm COMMAND $ dsg_lm --output-adj-msr --max-iterations 50 --lm-enabled) + + # LM-5. DSG distance-only simultaneous — GN baseline for comparison + add_test (NAME import-dsg-gn COMMAND $ -n dsg_gn dsg.stn dsg.msr --flag-unused-stations --quiet) + add_test (NAME adjust-dsg-gn COMMAND $ dsg_gn --output-adj-msr --max-iterations 50) + add_test (NAME test-dsg-lm-vs-gn COMMAND $ dsg_lm.simult.adj dsg_gn.simult.adj --skip-headers 52 -t 0.01) + + # LM-6. DSG distance-only phased with LM + add_test (NAME import-dsg-ph-lm COMMAND $ -n dsg_ph_lm dsg.stn dsg.msr --flag-unused-stations --quiet) + add_test (NAME segment-dsg-ph-lm COMMAND $ dsg_ph_lm --min 10 --max 30) + add_test (NAME adjust-dsg-ph-lm COMMAND $ dsg_ph_lm --output-adj-msr --phased --max-iterations 50 --lm-enabled) + + # LM-7. GNSS simultaneous with large initial lambda (robustness check) + add_test (NAME import-gnss-lm-large-lambda COMMAND $ -n gnss_lm_ll gnss-network.stn gnss-network.msr -r GDA2020 --flag-unused-stations --quiet) + add_test (NAME geoid-gnss-lm-large-lambda COMMAND $ gnss_lm_ll -g gnss-network-geoid.gsb --export-dna-geo) + add_test (NAME reftran-gnss-lm-large-lambda COMMAND $ gnss_lm_ll --export-dna --export-xml) + add_test (NAME adjust-gnss-lm-large-lambda COMMAND $ gnss_lm_ll --output-adj-msr --scale-normals-to-unity --lm-enabled --lm-lambda-init 10.0) + add_test (NAME test-gnss-lm-large-lambda COMMAND $ gnss_lm_ll.simult.adj gnss.simult.adj.expected --skip-headers 52 -t 0.001) + # 8. Source tag preservation in --export-xml (issue #317) # Import XML data with tags and verify they are preserved in export add_test (NAME import-source-test COMMAND $ -n srctest source-test-stn.xml source-test-msr.xml --export-xml -r GDA2020) @@ -1998,6 +2045,13 @@ if (BUILD_TESTING) set_tests_properties(test-urban-phased-network PROPERTIES DEPENDS adjust-urban-network) set_tests_properties(test-urban-thread-network PROPERTIES DEPENDS adjust-urban-network-thread-01) + # LM test dependencies + set_tests_properties(test-gnss-lm-vs-expected PROPERTIES DEPENDS adjust-gnss-lm) + set_tests_properties(test-urban-lm-vs-expected PROPERTIES DEPENDS adjust-urban-lm) + set_tests_properties(test-urban-mt-lm-vs-expected PROPERTIES DEPENDS adjust-urban-mt-lm) + set_tests_properties(test-dsg-lm-vs-gn PROPERTIES DEPENDS "adjust-dsg-lm;adjust-dsg-gn") + set_tests_properties(test-gnss-lm-large-lambda PROPERTIES DEPENDS adjust-gnss-lm-large-lambda) + set_tests_properties(ref-itrf-pmm-06 PROPERTIES DEPENDS ref-itrf-pmm-05) #set_tests_properties(ref-itrf-pmm-07 PROPERTIES DEPENDS ref-itrf-pmm-06) diff --git a/dynadjust/dynadjust/dnaadjust/dnaadjust-multi.cpp b/dynadjust/dynadjust/dnaadjust/dnaadjust-multi.cpp index 7e01b3372..c9d5a96f8 100644 --- a/dynadjust/dynadjust/dnaadjust/dnaadjust-multi.cpp +++ b/dynadjust/dynadjust/dnaadjust/dnaadjust-multi.cpp @@ -91,6 +91,12 @@ bool prepareAdjustmentExceptionThrown(const std::vector& pre // void dna_adjust::AdjustPhasedMultiThread() { + if (projectSettings_.a.lm_enabled) + { + AdjustPhasedMultiThreadLM(); + return; + } + initialiseIteration(); std::string corr_msg; @@ -295,7 +301,7 @@ void dna_adjust::SolveMT(bool COMPUTE_INVERSE, const UINT32& block) { // Compute inverse of normals (aposteriori variance matrix) // (AT * V-1 * A)-1 - FormInverseVarianceMatrix(&(v_normalsR_.at(block)), false); + FormInverseVarianceMatrix(&(v_normalsR_.at(block)), false, true); } // compute weighted "measured minus computed" @@ -304,7 +310,10 @@ void dna_adjust::SolveMT(bool COMPUTE_INVERSE, const UINT32& block) // Solve corrections from normal equations v_correctionsR_.at(block).redim(v_designR_.at(block).columns(), 1); - v_correctionsR_.at(block).multiply(v_normalsR_.at(block), "N", At_Vinv_m, "N"); + if (v_normalsR_.at(block).is_symmetric()) + v_correctionsR_.at(block).multiply_sym(v_normalsR_.at(block), At_Vinv_m); + else + v_correctionsR_.at(block).multiply(v_normalsR_.at(block), "N", At_Vinv_m, "N"); // debug output? if (projectSettings_.g.verbose > 3) diff --git a/dynadjust/dynadjust/dnaadjust/dnaadjust-stage.cpp b/dynadjust/dynadjust/dnaadjust/dnaadjust-stage.cpp index e0e1175d9..c5e18e255 100644 --- a/dynadjust/dynadjust/dnaadjust/dnaadjust-stage.cpp +++ b/dynadjust/dynadjust/dnaadjust/dnaadjust-stage.cpp @@ -173,7 +173,7 @@ void dna_adjust::DeserialiseBlockFromMappedFile(const UINT32& block, const int f break; case sf_normals_r: addr = normalsR_map_.GetBlockRegionAddr(block); - v_normalsR_.at(block).ReadMappedFileRegion(addr); + v_normalsR_.at(block).AttachMappedFileRegion(addr); break; case sf_atvinv: v_AtVinv_.at(block).allocate(); @@ -183,47 +183,47 @@ void dna_adjust::DeserialiseBlockFromMappedFile(const UINT32& block, const int f break; case sf_meas_minus_comp: addr = measMinusComp_map_.GetBlockRegionAddr(block); - v_measMinusComp_.at(block).ReadMappedFileRegion(addr); + v_measMinusComp_.at(block).AttachMappedFileRegion(addr); break; case sf_estimated_stns: addr = estimatedStations_map_.GetBlockRegionAddr(block); - v_estimatedStations_.at(block).ReadMappedFileRegion(addr); + v_estimatedStations_.at(block).AttachMappedFileRegion(addr); break; case sf_original_stns: addr = originalStations_map_.GetBlockRegionAddr(block); - v_originalStations_.at(block).ReadMappedFileRegion(addr); + v_originalStations_.at(block).AttachMappedFileRegion(addr); break; case sf_rigorous_stns: addr = rigorousStations_map_.GetBlockRegionAddr(block); - v_rigorousStations_.at(block).ReadMappedFileRegion(addr); + v_rigorousStations_.at(block).AttachMappedFileRegion(addr); break; case sf_junction_vars: addr = junctionVariances_map_.GetBlockRegionAddr(block); - v_junctionVariances_.at(block).ReadMappedFileRegion(addr); + v_junctionVariances_.at(block).AttachMappedFileRegion(addr); break; case sf_junction_vars_f: addr = junctionVariancesFwd_map_.GetBlockRegionAddr(block); - v_junctionVariancesFwd_.at(block).ReadMappedFileRegion(addr); + v_junctionVariancesFwd_.at(block).AttachMappedFileRegion(addr); break; case sf_junction_ests_f: addr = junctionEstimatesFwd_map_.GetBlockRegionAddr(block); - v_junctionEstimatesFwd_.at(block).ReadMappedFileRegion(addr); + v_junctionEstimatesFwd_.at(block).AttachMappedFileRegion(addr); break; case sf_junction_ests_r: addr = junctionEstimatesRev_map_.GetBlockRegionAddr(block); - v_junctionEstimatesRev_.at(block).ReadMappedFileRegion(addr); + v_junctionEstimatesRev_.at(block).AttachMappedFileRegion(addr); break; case sf_rigorous_vars: addr = rigorousVariances_map_.GetBlockRegionAddr(block); - v_rigorousVariances_.at(block).ReadMappedFileRegion(addr); + v_rigorousVariances_.at(block).AttachMappedFileRegion(addr); break; case sf_prec_adj_msrs: addr = precAdjMsrs_map_.GetBlockRegionAddr(block); - v_precAdjMsrsFull_.at(block).ReadMappedFileRegion(addr); + v_precAdjMsrsFull_.at(block).AttachMappedFileRegion(addr); break; case sf_corrections: addr = corrections_map_.GetBlockRegionAddr(block); - v_corrections_.at(block).ReadMappedFileRegion(addr); + v_corrections_.at(block).AttachMappedFileRegion(addr); if (v_blockMeta_.at(block)._blockLast) v_correctionsR_.at(block).allocate(); @@ -240,8 +240,8 @@ void dna_adjust::SerialiseBlockToMappedFile(const UINT32& block, const int file_ { // serialise all. That is, call this function again, but with // arguments for all files - SerialiseBlockToMappedFile(block, 16, - sf_normals, sf_normals_r, sf_atvinv, sf_design, sf_meas_minus_comp, + SerialiseBlockToMappedFile(block, 12, + sf_normals_r, sf_meas_minus_comp, sf_estimated_stns, sf_original_stns, sf_rigorous_stns, sf_junction_vars, sf_junction_vars_f, sf_junction_ests_f, sf_junction_ests_r, sf_rigorous_vars, sf_prec_adj_msrs, sf_corrections); @@ -257,16 +257,10 @@ void dna_adjust::SerialiseBlockToMappedFile(const UINT32& block, const int file_ { switch (va_arg(vlist, int)) { - case sf_normals: - break; case sf_normals_r: addr = normalsR_map_.GetBlockRegionAddr(block); v_normalsR_.at(block).WriteMappedFileRegion(addr); break; - case sf_atvinv: - break; - case sf_design: - break; case sf_meas_minus_comp: addr = measMinusComp_map_.GetBlockRegionAddr(block); v_measMinusComp_.at(block).WriteMappedFileRegion(addr); @@ -395,7 +389,11 @@ void dna_adjust::OpenStageFileStreams(const int file_count, ...) } std::stringstream ss; - ss << projectSettings_.g.output_folder << FOLDER_SLASH << projectSettings_.g.network_name << "-"; + if (projectSettings_.a.stage_path.empty()) + ss << projectSettings_.g.output_folder; + else + ss << projectSettings_.a.stage_path; + ss << FOLDER_SLASH << projectSettings_.g.network_name << "-"; std::string filePath(ss.str()); v_stageFileStreams_.clear(); @@ -851,6 +849,46 @@ void dna_adjust::OffloadBlockToMappedFile(const UINT32& block) // Unload block matrix data from memory UnloadBlock(block); + + // Advise kernel that mapped pages for this block are no longer needed, + // freeing page cache for upcoming blocks + AdviseBlockDontNeed(block); +} + + +void dna_adjust::AdviseBlockDontNeed(const UINT32& block) +{ + using advice = boost::interprocess::mapped_region::advice_types; + normalsR_map_.AdviseRegion(block, advice::advice_dontneed); + measMinusComp_map_.AdviseRegion(block, advice::advice_dontneed); + estimatedStations_map_.AdviseRegion(block, advice::advice_dontneed); + originalStations_map_.AdviseRegion(block, advice::advice_dontneed); + rigorousStations_map_.AdviseRegion(block, advice::advice_dontneed); + junctionVariances_map_.AdviseRegion(block, advice::advice_dontneed); + junctionVariancesFwd_map_.AdviseRegion(block, advice::advice_dontneed); + junctionEstimatesFwd_map_.AdviseRegion(block, advice::advice_dontneed); + junctionEstimatesRev_map_.AdviseRegion(block, advice::advice_dontneed); + rigorousVariances_map_.AdviseRegion(block, advice::advice_dontneed); + precAdjMsrs_map_.AdviseRegion(block, advice::advice_dontneed); + corrections_map_.AdviseRegion(block, advice::advice_dontneed); +} + + +void dna_adjust::AdviseBlockWillNeed(const UINT32& block) +{ + using advice = boost::interprocess::mapped_region::advice_types; + normalsR_map_.AdviseRegion(block, advice::advice_willneed); + measMinusComp_map_.AdviseRegion(block, advice::advice_willneed); + estimatedStations_map_.AdviseRegion(block, advice::advice_willneed); + originalStations_map_.AdviseRegion(block, advice::advice_willneed); + rigorousStations_map_.AdviseRegion(block, advice::advice_willneed); + junctionVariances_map_.AdviseRegion(block, advice::advice_willneed); + junctionVariancesFwd_map_.AdviseRegion(block, advice::advice_willneed); + junctionEstimatesFwd_map_.AdviseRegion(block, advice::advice_willneed); + junctionEstimatesRev_map_.AdviseRegion(block, advice::advice_willneed); + rigorousVariances_map_.AdviseRegion(block, advice::advice_willneed); + precAdjMsrs_map_.AdviseRegion(block, advice::advice_willneed); + corrections_map_.AdviseRegion(block, advice::advice_willneed); } @@ -896,7 +934,7 @@ void dna_adjust::UnloadBlock(const UINT32& block, const int file_count, ...) if (file_count == 0) { // deserialise all - UnloadBlock(block, 16, + UnloadBlock(block, 15, sf_normals, sf_normals_r, sf_atvinv, sf_design, sf_meas_minus_comp, sf_estimated_stns, sf_original_stns, sf_rigorous_stns, sf_junction_vars, sf_junction_vars_f, sf_junction_ests_f, sf_junction_ests_r, @@ -907,58 +945,60 @@ void dna_adjust::UnloadBlock(const UINT32& block, const int file_count, ...) va_list vlist; va_start(vlist, file_count); - // Unload block matrix data from memory + // Unload block matrix data from memory by freeing buffers. + // Use deallocate() instead of explicit destructor calls to keep + // the matrix objects in a valid state for later reuse. for (UINT16 file(0); file combineAdjustmentQueue; concurrent_queue prepareAdjustmentQueue; std::exception_ptr fwd_error, rev_error, cmb_error, prep_error; +void dna_adjust::SetMaxBlasThreads(int n) { dynadjust::math::set_max_blas_threads(n); } +int dna_adjust::GetMaxBlasThreads() { return dynadjust::math::get_max_blas_threads(); } + dna_adjust::dna_adjust() : isPreparing_(false) , isAdjusting_(false) @@ -42,6 +45,7 @@ dna_adjust::dna_adjust() , isAdjustmentQuestionable_(false) , blockCount_(1) , currentBlock_(0) + , lastBlockElapsedMs_(0) , total_time_(0) , adjustStatus_(ADJUST_SUCCESS) , currentIteration_(0) @@ -172,8 +176,15 @@ UINT32& dna_adjust::incrementIteration() return ++currentIteration_; } -void dna_adjust::initialiseIteration(const UINT32& iteration) -{ +void dna_adjust::decrementIteration() +{ + std::lock_guard lock(current_iterationMutex); + if (currentIteration_ > 0) + --currentIteration_; +} + +void dna_adjust::initialiseIteration(const UINT32& iteration) +{ std::lock_guard lock(current_iterationMutex); currentIteration_ = iteration; } @@ -240,6 +251,7 @@ void dna_adjust::PrepareAdjustment(const project_settings& projectSettings) isPreparing_ = true; isAdjusting_ = true; isCombining_ = false; + rebuildingDesign_ = false; isFirstTimeAdjustment_ = true; projectSettings_ = projectSettings; @@ -528,14 +540,14 @@ void dna_adjust::UpdateAdjustment(bool iterate) // Hence, only design and msr-comp are required so as to compute stats if (!iterate) continue; - + switch (projectSettings_.a.adjust_mode) { case PhasedMode: case Phased_Block_1Mode: v_normals_.at(block).zero(); UpdateNormals(block, false); - + if (projectSettings_.a.multi_thread) { v_estimatedStationsR_.at(block) = v_rigorousStations_.at(block); @@ -545,9 +557,9 @@ void dna_adjust::UpdateAdjustment(bool iterate) } // Back up normals. This copy contains the contributions from all - // apriori measurement variances, excluding parameter station + // apriori measurement variances, excluding parameter station // variances and junction station variances - v_normalsR_.at(block) = v_normals_.at(block); + v_normalsR_.at(block) = v_normals_.at(block); AddConstraintStationstoNormalsForward(block); break; @@ -742,17 +754,17 @@ void dna_adjust::PrepareStationandVarianceMatrices(const UINT32& block) // resize rigorous coordinate estimate array v_rigorousStations_.at(block).redim(v_unknownsCount_.at(block), 1); - - // resize rigorous variances - v_rigorousVariances_.at(block).redim(v_unknownsCount_.at(block), v_unknownsCount_.at(block)); + + if (projectSettings_.a.stage || projectSettings_.a.multi_thread) + v_rigorousVariances_.at(block).redim_packed(v_unknownsCount_.at(block)); // resize junction variances - v_junctionVariances_.at(block).redim(j, j); + v_junctionVariances_.at(block).redim_packed(j); // resize junction station coordinate estimate arrays if (!v_blockMeta_.at(block)._blockLast && !v_blockMeta_.at(block)._blockIsolated) { - v_junctionVariancesFwd_.at(block).redim(j, j); + v_junctionVariancesFwd_.at(block).redim_packed(j); v_junctionEstimatesFwd_.at(block).redim(j, 1); v_junctionEstimatesRev_.at(block+1).redim(j, 1); } @@ -860,11 +872,10 @@ void dna_adjust::PrepareDesignAndMsrMnsCmpMatrices(const UINT32& block) // Simultaneous, phased (multithreaded and block1) adjustments // Redim all matrices - v_normals_.at(block).redim(v_unknownsCount_.at(block), v_unknownsCount_.at(block)); + v_normals_.at(block).redim_packed(v_unknownsCount_.at(block)); v_design_.at(block).redim(v_measurementCount_.at(block), v_unknownsCount_.at(block)); v_corrections_.at(block).redim(v_unknownsCount_.at(block), 1); - v_precAdjMsrsFull_.at(block).redim(v_measurementVarianceCount_.at(block), 1); // All blocks in v_correctionsR_ are used for multi thread mode, but only the last is used // in single thread mode @@ -897,9 +908,7 @@ void dna_adjust::PrepareDesignAndMsrMnsCmpMatricesStage(const UINT32& block) if (projectSettings_.a.recreate_stage_files || !bms_meta_.reduced) { // Redim NormalsR - v_normalsR_.at(block).redim( - v_unknownsCount_.at(block), - v_unknownsCount_.at(block)); + v_normalsR_.at(block).redim_packed(v_unknownsCount_.at(block)); // Redimension the corrections matrices v_corrections_.at(block).redim(v_unknownsCount_.at(block), 1); @@ -930,7 +939,41 @@ void dna_adjust::PrepareDesignAndMsrMnsCmpMatricesStage(const UINT32& block) // Creating new memory mapped files? Form the design matrices FillDesignNormalMeasurementsMatrices(true, block, false); } - + + +void dna_adjust::RebuildDesignAndAtVinv(const UINT32& block) +{ + UINT32 pseudoMsrElemCount(0); + + if (!v_blockMeta_.at(block)._blockIsolated) + { + UINT32 pseudoMsrCount = static_cast(v_JSL_.at(block).size()); + if (!v_blockMeta_.at(block)._blockFirst) + pseudoMsrCount += static_cast(v_JSL_.at(block-1).size()); + pseudoMsrElemCount = pseudoMsrCount * 3; + } + + v_design_.at(block).redim(v_measurementCount_.at(block), v_unknownsCount_.at(block)); + v_AtVinv_.at(block).redim( + v_unknownsCount_.at(block), + v_measurementCount_.at(block) + pseudoMsrElemCount); + + if (pseudoMsrElemCount > 0) + v_AtVinv_.at(block).shrink(0, pseudoMsrElemCount); + + assert(v_design_.at(block).getbuffer() != nullptr && "RebuildDesignAtVinv: design null after redim"); + assert(v_AtVinv_.at(block).getbuffer() != nullptr && "RebuildDesignAtVinv: AtVinv null after redim"); + assert(v_estimatedStations_.at(block).getbuffer() != nullptr && "RebuildDesignAtVinv: estimatedStations null"); + assert(v_measMinusComp_.at(block).getbuffer() != nullptr && "RebuildDesignAtVinv: measMinusComp null"); + + rebuildingDesign_ = true; + FillDesignNormalMeasurementsMatrices(true, block, false, true); + rebuildingDesign_ = false; + + assert(v_design_.at(block).getbuffer() != nullptr && "RebuildDesignAtVinv: design null after Fill"); + assert(v_AtVinv_.at(block).getbuffer() != nullptr && "RebuildDesignAtVinv: AtVinv null after Fill"); +} + // Re-form At * V-1 for next block using estimated junction parameter station variances // nextBlock = currentBlock+1 @@ -1440,7 +1483,7 @@ void dna_adjust::AddMsrtoNormalsCoVar3(const UINT32& design_row, const UINT32& s normals->elementadd(stn3+row, stn1+col, AtVinv->get(stn3+row, design_row) * design->get(design_row, stn1+col)); - + // 2-3 normals->elementadd(stn2+row, stn3+col, AtVinv->get(stn2+row, design_row) * design->get(design_row, stn3+col)); @@ -2382,9 +2425,20 @@ void dna_adjust::CloseOutputFiles() void dna_adjust::AdjustSimultaneous() { + if (projectSettings_.a.lm_enabled) + { + AdjustSimultaneousLM(); + return; + } + adjustStatus_ = ADJUST_SUCCESS; initialiseIteration(); - + + // Reset oscillation diagnostics + corrPrev_.clear(); + stnOscCount_.clear(); + oscHistory_.clear(); + std::ostringstream ss; std::string corr_msg; std::chrono::milliseconds elapsed_time(std::chrono::milliseconds(0)); @@ -2430,6 +2484,7 @@ void dna_adjust::AdjustSimultaneous() // Compute and print largest correction maxCorr_ = v_corrections_.at(0).compute_maximum_value(); OutputLargestCorrection(corr_msg); + UpdateIterationDiagnostics(); // update data for messages iterationCorrections_.add_message(corr_msg); @@ -2491,7 +2546,7 @@ void dna_adjust::ValidateandFinaliseAdjustment(cpu_timer& tot_time) fabs(maxCorr_) > projectSettings_.a.iteration_threshold) adjustStatus_ = ADJUST_MAX_ITERATIONS_EXCEEDED; - // Back up simultaneous rigorous variance estimates (for serialising + // Back up simultaneous rigorous variance estimates (for serialising // to disk at SerialiseAdjustedVarianceMatrices() ), so that executing adjust // in report-results mode has access to the latest variance estimates switch (projectSettings_.a.adjust_mode) @@ -2524,15 +2579,30 @@ void dna_adjust::PrintAdjustmentTime(cpu_timer& time, _TIMER_TYPE_ timerType) void dna_adjust::AdjustPhased() { + if (projectSettings_.a.lm_enabled) + { + AdjustPhasedLM(); + return; + } + initialiseIteration(); + // Reset oscillation diagnostics + corrPrev_.clear(); + stnOscCount_.clear(); + oscHistory_.clear(); + + // Initialise Anderson acceleration if enabled + if (projectSettings_.a.aa_enabled) + InitialiseAndersonAcceleration(); + std::string corr_msg; std::ostringstream ss; UINT32 i; bool iterate(true); cpu_timer it_time, tot_time; - + // do until convergence criteria is met for (i=0; iPrintIteration(incrementIteration()); - + it_time.start(); + // Snapshot coordinates before iteration for Anderson acceleration + if (projectSettings_.a.aa_enabled) + ExtractStationCoordinates(aa_x_snapshot_); + AdjustPhasedForward(); if (IsCancelled()) break; @@ -2569,6 +2643,11 @@ void dna_adjust::AdjustPhased() // Calculate and print largest adjustment correction and station ID OutputLargestCorrection(corr_msg); + UpdateIterationDiagnostics(); + + // Apply Anderson acceleration (mixes last m iterates to accelerate convergence) + if (projectSettings_.a.aa_enabled) + ApplyAndersonAcceleration(i); iterationCorrections_.add_message(corr_msg); iterationTimes_.add_message(FormatElapsedTime(it_time.elapsed().wall.count() / 1.0e9)); @@ -2584,7 +2663,7 @@ void dna_adjust::AdjustPhased() // Similar to PrepareAdjustment, UpdateAdjustment prepares every block // in the network so that forward and reverse adjustments can commence // at the same time. - UpdateAdjustment(iterate); + UpdateAdjustment(iterate); if (IsCancelled()) break; @@ -2593,7 +2672,7 @@ void dna_adjust::AdjustPhased() { // Compute network statistics ComputeStatisticsOnIteration(); - + // Print statistics summary to adj file printer_->PrintStatistics(false); } @@ -2699,9 +2778,10 @@ void dna_adjust::AdjustPhasedForward() UINT32 currentBlock(0); - // For staged adjustments, load the first block from mapped memory file + // For staged adjustments, prefetch and load the first block from mapped memory file if (projectSettings_.a.stage) { + AdviseBlockWillNeed(currentBlock); DeserialiseBlockFromMappedFile(currentBlock); RebuildNormals(currentBlock, __forward__, true, true); if (IsCancelled()) @@ -2739,16 +2819,24 @@ void dna_adjust::AdjustPhasedForward() // At this point, whether first iteration or not, if currentBlock is the first block, // the normals will have been initialised. For all later blocks, the normals will contain // the contribution of junction station coordinates and variances from preceding blocks. - // In either case, the block is ready for adjustment. Junction station coordinates and + // In either case, the block is ready for adjustment. Junction station coordinates and // variances are carried forward below + assert(v_design_.at(currentBlock).getbuffer() != nullptr && "AdjFwd: design null before SolveTry"); + assert(v_AtVinv_.at(currentBlock).getbuffer() != nullptr && "AdjFwd: AtVinv null before SolveTry"); + assert(v_normals_.at(currentBlock).getbuffer() != nullptr && "AdjFwd: normals null before SolveTry"); + assert(v_measMinusComp_.at(currentBlock).getbuffer() != nullptr && "AdjFwd: measMinusComp null before SolveTry"); + // Least Squares Solution - SolveTry(true, currentBlock); + if (projectSettings_.a.lm_enabled) + SolveLMTry(true, currentBlock, true); + else + SolveTry(true, currentBlock); // Does the user want to print adjusted measurements // on each iteration? - if (projectSettings_.o._adj_msr_iteration || - projectSettings_.o._adj_stn_iteration || + if (projectSettings_.o._adj_msr_iteration || + projectSettings_.o._adj_stn_iteration || projectSettings_.o._cmp_msr_iteration) adj_file << " done." << std::endl; @@ -2770,21 +2858,21 @@ void dna_adjust::AdjustPhasedForward() // OK, now shrink matrices back to normal size ShrinkForwardMatrices(currentBlock); - // Carry the estimated junction station coordinates and variances + // Carry the estimated junction station coordinates and variances // to the next block for applicable blocks only. For staged - // Deserialise the next block from mapped files and rebuild + // Deserialise the next block from mapped files and rebuild // normals CarryForwardJunctions(currentBlock, currentBlock+1); // For staged adjustments, write to disk and unload matrix data if (projectSettings_.a.stage) - // Don't offload last block since the reverse adjustment + // Don't offload last block since the reverse adjustment // will need this block if (currentBlock < (blockCount_ - 1)) OffloadBlockToMappedFile(currentBlock); } } - + void dna_adjust::PurgeMatricesFromDisk() { @@ -2889,8 +2977,7 @@ void dna_adjust::PrepareAdjustmentBlock(const UINT32 block, const UINT32 thread_ v_normalsR_.at(block) = v_normals_.at(block); if (projectSettings_.a.multi_thread) { - v_normalsRC_.at(block).redim( - v_normalsR_.at(block).rows(), v_normalsR_.at(block).columns()); + v_normalsRC_.at(block).redim_packed(v_normalsR_.at(block).rows()); } switch (projectSettings_.a.adjust_mode) @@ -2944,6 +3031,8 @@ void dna_adjust::ShrinkForwardMatrices(const UINT32 currentBlock) { pseudoMsrElemCount = static_cast(v_JSL_.at(currentBlock-1).size() * 3); + assert(v_measMinusComp_.at(currentBlock).getbuffer() != nullptr && "ShrinkFwd: measMinusComp null"); + assert(v_AtVinv_.at(currentBlock).getbuffer() != nullptr && "ShrinkFwd: AtVinv null"); v_measMinusComp_.at(currentBlock).shrink(pseudoMsrElemCount, 0); v_AtVinv_.at(currentBlock).shrink(0, pseudoMsrElemCount); } @@ -2978,7 +3067,9 @@ void dna_adjust::UpdateEstimatesForward(const UINT32 currentBlock) // Now copy 'estimated' coordinates to 'rigorous' for comparison on the next iteration v_rigorousStations_.at(currentBlock) = v_estimatedStations_.at(currentBlock); + assert(v_normals_.at(currentBlock).getbuffer() != nullptr && "UpdateEstFwd: normals null before copy to rigorousVariances"); v_rigorousVariances_.at(currentBlock) = v_normals_.at(currentBlock); + assert(v_rigorousVariances_.at(currentBlock).getbuffer() != nullptr && "UpdateEstFwd: rigorousVariances null after copy"); // Temporarily hold corrections for last block. These corrections will be restored // in UpdateEstimatesFinal() @@ -3028,9 +3119,10 @@ void dna_adjust::CarryForwardJunctions(const UINT32 thisBlock, const UINT32 next if (v_blockMeta_.at(nextBlock)._blockIsolated) return; - // For staged adjustments, load block info for nextBlock + // For staged adjustments, prefetch and load block info for nextBlock if (projectSettings_.a.stage) { + AdviseBlockWillNeed(nextBlock); DeserialiseBlockFromMappedFile(nextBlock); RebuildNormals(nextBlock, __forward__, true, true); } @@ -3065,7 +3157,7 @@ bool dna_adjust::PrepareAdjustmentReverse(const UINT32 currentBlock, bool MT_Rev // OK. currentBlock is the last block, and this is the commencement of // a reverse run, so reset coordinates - matrix_2d* estimatedStations(&v_estimatedStations_.at(currentBlock)); + matrix_2d* estimatedStations(&v_estimatedStations_.at(currentBlock)); if (MT_ReverseOrCombine) { @@ -3073,15 +3165,23 @@ bool dna_adjust::PrepareAdjustmentReverse(const UINT32 currentBlock, bool MT_Rev } else { - // Restore back up copy of normals for reverse adjustment in + // Restore back up copy of normals for reverse adjustment in // single thread mode + assert(v_normalsR_.at(currentBlock).getbuffer() != nullptr && "PrepareAdjReverse: normalsR null before copy"); + assert(v_normalsR_.at(currentBlock).rows() > 0 && "PrepareAdjReverse: normalsR has 0 rows"); v_normals_.at(currentBlock) = v_normalsR_.at(currentBlock); + assert(v_normals_.at(currentBlock).getbuffer() != nullptr && "PrepareAdjReverse: normals null after copy"); } + assert(v_originalStations_.at(currentBlock).getbuffer() != nullptr && "PrepareAdjReverse: originalStations null"); // Reset coordinates to original values *estimatedStations = v_originalStations_.at(currentBlock); - + + assert(v_design_.at(currentBlock).rows() > 0 && "PrepareAdjReverse: design has 0 rows"); + assert(v_AtVinv_.at(currentBlock).rows() > 0 && "PrepareAdjReverse: AtVinv has 0 rows"); + assert(v_measMinusComp_.at(currentBlock).rows() > 0 && "PrepareAdjReverse: measMinusComp has 0 rows"); + AddConstraintStationstoNormalsReverse(currentBlock, MT_ReverseOrCombine); return true; @@ -3421,19 +3521,27 @@ void dna_adjust::AdjustPhasedReverseCombine() if (projectSettings_.g.verbose > 3) debug_file << "In isolation" << std::endl; + assert(v_design_.at(currentBlock).getbuffer() != nullptr && "design null before SolveTry (reverse combine)"); + assert(v_AtVinv_.at(currentBlock).getbuffer() != nullptr && "AtVinv null before SolveTry (reverse combine)"); + assert(v_normals_.at(currentBlock).getbuffer() != nullptr && "normals null before SolveTry (reverse combine)"); + assert(v_measMinusComp_.at(currentBlock).getbuffer() != nullptr && "measMinusComp null before SolveTry (reverse combine)"); + // Backup normals prior to inversion for re-use in combination // adjustment... only if a combination is required BackupNormals(currentBlock, false); // Least Squares Solution - SolveTry(true, currentBlock); + if (projectSettings_.a.lm_enabled) + SolveLMTry(true, currentBlock, true); + else + SolveTry(true, currentBlock); if (projectSettings_.o._adj_stn_iteration) adj_file << " done." << std::endl; // Add corrections to estimates. // If --output-adj-iter-stat argument is supplied or currentBlock - // is the first block, then compute geographic coordinates, recompute + // is the first block, then compute geographic coordinates, recompute // meas-minus-computed vector, then print statistics. // Output of stats also prints largest correction. // If --output-adj-iter-stn argument is supplied, print station coords @@ -3441,16 +3549,16 @@ void dna_adjust::AdjustPhasedReverseCombine() // Debug and diagnose (if required) debug_BlockInformation(currentBlock, "(reverse, in isolation)"); - + // Carry the estimated junction station coordinates and variances to the next block and // combine. CarryReverseEstimates will return false if currentBlock is the first block - // or an isolated block. If estimates can be carried, CarryReverseEstimates updates + // or an isolated block. If estimates can be carried, CarryReverseEstimates updates // geographic coords (if req'd), recomputes meas-minus-comp vector and updates normals for // the next block. if (CarryReverseJunctions(currentBlock, currentBlock-1, false)) - { + { ////////////////////////////////////////////////////////////////////////// - // Combination Adjustment + // Combination Adjustment // // Perform combination on all blocks except the first and last as they will // be rigorous from reverse and forward adjustments respectively @@ -3458,19 +3566,22 @@ void dna_adjust::AdjustPhasedReverseCombine() // First, carry the junction station estimates and variances of the next block // (obtained during the forward pass). These were copied to v_junctionEstimatesFwd_ // and v_junctionVariances_ in CarryStnEstimatesandVariancesForward(..) during - // the forward pass. + // the forward pass. if (PrepareAdjustmentCombine(currentBlock, pseudomsrJSLCount, false)) { isCombining_ = true; if (projectSettings_.g.verbose > 3) debug_file << "Rigorous" << std::endl; - + if (projectSettings_.o._adj_stn_iteration) if (!v_blockMeta_.at(currentBlock)._blockFirst) adj_file << std::endl << std::left << "Adjusting block " << currentBlock+1 << " (reverse, rigorous)... "; // Least Squares Solution - SolveTry(true, currentBlock); + if (projectSettings_.a.lm_enabled) + SolveLMTry(true, currentBlock, true); + else + SolveTry(true, currentBlock); if (projectSettings_.o._adj_stn_iteration) adj_file << " done." << std::endl; @@ -3505,7 +3616,7 @@ void dna_adjust::AdjustPhasedReverseCombine() } // for (UINT32 block=0; blockgetbuffer() != nullptr && "UpdateEstFinal: AtVinv null"); + assert(measMinusComp->getbuffer() != nullptr && "UpdateEstFinal: measMinusComp null"); + assert(aposterioriVariances->getbuffer() != nullptr && "UpdateEstFinal: aposterioriVariances null"); if (v_blockMeta_.at(currentBlock)._blockFirst) { @@ -3762,13 +3883,16 @@ bool dna_adjust::CarryReverseJunctions(const UINT32 currentBlock, const UINT32 n v_normals_.at(nextBlock) = v_normalsR_.at(nextBlock); } - // For staged adjustments, load blocks info for nextBlock + // For staged adjustments, prefetch and load blocks info for nextBlock if (projectSettings_.a.stage) + { + AdviseBlockWillNeed(nextBlock); DeserialiseBlockFromMappedFile(nextBlock); + } // Next, update estimates for next block to original estimates *estimatedStationsNext = v_originalStations_.at(nextBlock); - + // For staged adjustments, rebuild design and AtVinv if (projectSettings_.a.stage) RebuildNormals(nextBlock, __reverse__, false, false); @@ -3792,25 +3916,25 @@ bool dna_adjust::CarryReverseJunctions(const UINT32 currentBlock, const UINT32 n // go through each of the measurements in the binary measurements file and formulate partial derivatives -void dna_adjust::FillDesignNormalMeasurementsMatrices(bool buildnewMatrices, const UINT32& block, bool MT_ReverseOrCombine) +void dna_adjust::FillDesignNormalMeasurementsMatrices(bool buildnewMatrices, const UINT32& block, bool MT_ReverseOrCombine, bool skipNormals) { UINT32 design_row(0); - + it_vUINT32 _it_block_msr; - it_vmsr_t _it_msr; + it_vmsr_t _it_msr; for (_it_block_msr=v_CML_.at(block).begin(); _it_block_msr!=v_CML_.at(block).end(); ++_it_block_msr) { if (InitialiseandValidateMsrPointer(_it_block_msr, _it_msr)) continue; - // When a target direction is found, continue to next element. + // When a target direction is found, continue to next element. if (_it_msr->measType == 'D') if (_it_msr->vectorCount2 < 1) continue; // Build AtVinv, Normals and Meas minus Comp vectors - UpdateDesignNormalMeasMatrices(&_it_msr, design_row, buildnewMatrices, block, MT_ReverseOrCombine); + UpdateDesignNormalMeasMatrices(&_it_msr, design_row, buildnewMatrices, block, MT_ReverseOrCombine, skipNormals); } } @@ -3819,6 +3943,9 @@ void dna_adjust::FillDesignNormalMeasurementsMatrices(bool buildnewMatrices, con // pre-adjustment value, otherwise back up the current value. bool dna_adjust::InitialiseMeasurement(pit_vmsr_t _it_msr, bool buildnewMatrices) { + if (rebuildingDesign_) + return false; + if (buildnewMatrices) { // Was this measurement reduced from previous @@ -3839,7 +3966,7 @@ bool dna_adjust::InitialiseMeasurement(pit_vmsr_t _it_msr, bool buildnewMatrices } -void dna_adjust::UpdateDesignNormalMeasMatrices(pit_vmsr_t _it_msr, UINT32& design_row, bool buildnewMatrices, const UINT32& block, bool MT_ReverseOrCombine) +void dna_adjust::UpdateDesignNormalMeasMatrices(pit_vmsr_t _it_msr, UINT32& design_row, bool buildnewMatrices, const UINT32& block, bool MT_ReverseOrCombine, bool skipNormals) { std::stringstream ss; @@ -3855,101 +3982,101 @@ void dna_adjust::UpdateDesignNormalMeasMatrices(pit_vmsr_t _it_msr, UINT32& desi design = &v_designR_.at(block); AtVinv = &v_AtVinvR_.at(block); measMinusComp = &v_measMinusCompR_.at(block); - } + } switch ((*_it_msr)->measType) { case 'A': // Horizontal angle UpdateDesignNormalMeasMatrices_A(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); break; case 'B': // Geodetic azimuth case 'K': // Astronomic azimuth - // Note: UpdateDesignNormalMeasMatrices_BK reduces K measurements to the geodetic reference frame, + // Note: UpdateDesignNormalMeasMatrices_BK reduces K measurements to the geodetic reference frame, // after which UpdateDesignNormalMeasMatrices_BK treats K measurements as B measurements upon forming // design matrix elements. UpdateDesignNormalMeasMatrices_BK(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); break; case 'C': // Chord dist UpdateDesignNormalMeasMatrices_C(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); break; - case 'D': // Direction set + case 'D': // Direction set UpdateDesignNormalMeasMatrices_D(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); break; case 'E': // Ellipsoid arc UpdateDesignNormalMeasMatrices_E(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); break; case 'G': // GPS Baseline UpdateDesignNormalMeasMatrices_G(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); break; case 'H': // Orthometric height // Note: UpdateDesignNormalMeasMatrices_H reduces term1 to ellipsoid height, after which // UpdateDesignNormalMeasMatrices_HR is used to form design elements. UpdateDesignNormalMeasMatrices_H(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); break; case 'I': // Astronomic latitude // Note: UpdateDesignNormalMeasMatrices_I reduces term1 to geodetic latitude, after which // UpdateDesignNormalMeasMatrices_IP is used to form design elements. UpdateDesignNormalMeasMatrices_I(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); break; case 'J': // Astronomic longitude // Note: UpdateDesignNormalMeasMatrices_J reduces term1 to geodetic longitude, after which // UpdateDesignNormalMeasMatrices_JQ is used to form design elements. UpdateDesignNormalMeasMatrices_J(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); - break; + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); + break; case 'L': // Level difference UpdateDesignNormalMeasMatrices_L(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); - break; + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); + break; case 'M': // MSL arc UpdateDesignNormalMeasMatrices_M(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); - break; + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); + break; case 'P': // Geodetic latitude // Note: UpdateDesignNormalMeasMatrices_P archives the raw measurement, after which // UpdateDesignNormalMeasMatrices_IP is used to form design elements. UpdateDesignNormalMeasMatrices_P(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); - break; + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); + break; case 'Q': // Geodetic longitude // Note: UpdateDesignNormalMeasMatrices_Q archives the raw measurement, after which // UpdateDesignNormalMeasMatrices_JQ is used to form design elements. UpdateDesignNormalMeasMatrices_Q(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); - break; + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); + break; case 'R': // Ellipsoidal height // Note: UpdateDesignNormalMeasMatrices_R archives the raw measurement, after which // UpdateDesignNormalMeasMatrices_HR is used to form design elements. UpdateDesignNormalMeasMatrices_R(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); break; case 'S': // Slope distance UpdateDesignNormalMeasMatrices_S(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); break; case 'V': // Zenith distance UpdateDesignNormalMeasMatrices_V(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); - break; + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); + break; case 'X': // GPS Baseline cluster UpdateDesignNormalMeasMatrices_X(_it_msr, design_row, block, - measMinusComp, estimatedStations, design, AtVinv, buildnewMatrices); - break; + measMinusComp, estimatedStations, design, AtVinv, buildnewMatrices, skipNormals); + break; case 'Y': // GPS Point cluster UpdateDesignNormalMeasMatrices_Y(_it_msr, design_row, block, - measMinusComp, estimatedStations, design, AtVinv, buildnewMatrices); - break; + measMinusComp, estimatedStations, design, AtVinv, buildnewMatrices, skipNormals); + break; case 'Z': // Vertical angle UpdateDesignNormalMeasMatrices_Z(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); break; default: ss << "UpdateDesignNormalMeasMatrices(): Unknown measurement type - '" << @@ -4656,8 +4783,8 @@ void dna_adjust::AddMsrtoMeasMinusComp(pit_vmsr_t _it_msr, const UINT32& design_ void dna_adjust::UpdateDesignNormalMeasMatrices_A(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, - matrix_2d* measMinusComp, matrix_2d* estimatedStations, - matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices) + matrix_2d* measMinusComp, matrix_2d* estimatedStations, + matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices, bool skipNormals) { UINT32 stn1(GetBlkMatrixElemStn1(block, _it_msr)); UINT32 stn2(GetBlkMatrixElemStn2(block, _it_msr)); @@ -4683,11 +4810,11 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_A(pit_vmsr_t _it_msr, UINT32& de &direction12, &direction13, &local_12e, &local_12n, &local_13e, &local_13n)); - // Initialise measurement. No need to test if no further calculations are + // Initialise measurement. No need to test if no further calculations are // required (as in stage mode), as this is done later (below) InitialiseMeasurement(_it_msr, buildnewMatrices); - if (buildnewMatrices) + if (buildnewMatrices && !rebuildingDesign_) { // deflections available? if (fabs(stn1_it->verticalDef) > E4_SEC_DEFLECTION || fabs(stn1_it->meridianDef) > E4_SEC_DEFLECTION) @@ -4806,7 +4933,7 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_A(pit_vmsr_t _it_msr, UINT32& de UpdateAtVinv(_it_msr, stn1, stn2, stn3, design_row, design, AtVinv, buildnewMatrices); - if (buildnewMatrices) + if (buildnewMatrices && !skipNormals) // Add weighted measurement contributions to normal matrix UpdateNormals_A(stn1, stn2, stn3, design_row, normals, design, AtVinv); else @@ -4815,8 +4942,8 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_A(pit_vmsr_t _it_msr, UINT32& de void dna_adjust::UpdateDesignNormalMeasMatrices_BK(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, - matrix_2d* measMinusComp, matrix_2d* estimatedStations, - matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices) + matrix_2d* measMinusComp, matrix_2d* estimatedStations, + matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices, bool skipNormals) { UINT32 stn1(GetBlkMatrixElemStn1(block, _it_msr)); UINT32 stn2(GetBlkMatrixElemStn2(block, _it_msr)); @@ -4838,11 +4965,11 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_BK(pit_vmsr_t _it_msr, UINT32& d stn1_it->currentLongitude, &local_12e, &local_12n)); - // Initialise measurement. No need to test if no further calculations are + // Initialise measurement. No need to test if no further calculations are // required (as in stage mode), as this is done later (below) InitialiseMeasurement(_it_msr, buildnewMatrices); - - if (buildnewMatrices) + + if (buildnewMatrices && !rebuildingDesign_) { // deflections available? if ((*_it_msr)->measType == 'K' && // Astro @@ -4910,19 +5037,19 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_BK(pit_vmsr_t _it_msr, UINT32& d // Update AtVinv based on new design matrix elements UpdateAtVinv(_it_msr, stn1, stn2, 0, design_row, design, AtVinv, buildnewMatrices); - if (buildnewMatrices) + if (buildnewMatrices && !skipNormals) // Add weighted measurement contributions to normal matrix UpdateNormals_BCEKLMSVZ(stn1, stn2, design_row, normals, design, AtVinv); else design_row++; } - + void dna_adjust::UpdateDesignNormalMeasMatrices_C(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, - matrix_2d* measMinusComp, matrix_2d* estimatedStations, - matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices) + matrix_2d* measMinusComp, matrix_2d* estimatedStations, + matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices, bool skipNormals) { - // Initialise measurement and test if no further calculations are + // Initialise measurement and test if no further calculations are // required (as in stage mode) if (InitialiseMeasurement(_it_msr, buildnewMatrices)) return; @@ -4933,13 +5060,13 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_C(pit_vmsr_t _it_msr, UINT32& de // Now call UpdateDesignNormalMeasMatrices_CEM UpdateDesignNormalMeasMatrices_CEM(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); } void dna_adjust::UpdateDesignNormalMeasMatrices_CEM(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, - matrix_2d* measMinusComp, matrix_2d* estimatedStations, - matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices) + matrix_2d* measMinusComp, matrix_2d* estimatedStations, + matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices, bool skipNormals) { UINT32 stn1(GetBlkMatrixElemStn1(block, _it_msr)); UINT32 stn2(GetBlkMatrixElemStn2(block, _it_msr)); @@ -4975,17 +5102,17 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_CEM(pit_vmsr_t _it_msr, UINT32& // Update AtVinv based on new design matrix elements UpdateAtVinv(_it_msr, stn1, stn2, 0, design_row, design, AtVinv, buildnewMatrices); - if (buildnewMatrices) + if (buildnewMatrices && !skipNormals) // Add weighted measurement contributions to normal matrix UpdateNormals_BCEKLMSVZ(stn1, stn2, design_row, normals, design, AtVinv); else design_row++; } - + void dna_adjust::UpdateDesignNormalMeasMatrices_D(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, - matrix_2d* measMinusComp, matrix_2d* estimatedStations, - matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices) + matrix_2d* measMinusComp, matrix_2d* estimatedStations, + matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices, bool skipNormals) { it_vmsr_t _it_msr_first(*_it_msr); UINT32 design_row_begin(design_row); @@ -5034,7 +5161,7 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_D(pit_vmsr_t _it_msr, UINT32& de it_angle->station3 = (*_it_msr)->station2; - if (buildnewMatrices) + if (buildnewMatrices && !rebuildingDesign_) { // Was this measurement reduced from previous // adjustment, which was serialised to disk? @@ -5067,7 +5194,7 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_D(pit_vmsr_t _it_msr, UINT32& de UpdateDesignNormalMeasMatrices_A(&it_angle, design_row, block, measMinusComp, estimatedStations, 0, design, AtVinv, buildnewMatrices); - if (buildnewMatrices) + if (buildnewMatrices && !rebuildingDesign_) { // Update derived angle, corrected for deflection of vertical (*_it_msr)->scale1 = it_angle->term1; @@ -5138,14 +5265,14 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_D(pit_vmsr_t _it_msr, UINT32& de it_angle++; } - if (buildnewMatrices) + if (buildnewMatrices && !skipNormals) // Add weighted measurement contributions to normal matrix UpdateNormals_D(block, _it_msr_first, design_row_begin, normals, design, AtVinv); } void dna_adjust::UpdateDesignNormalMeasMatrices_E(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, - matrix_2d* measMinusComp, matrix_2d* estimatedStations, - matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices) + matrix_2d* measMinusComp, matrix_2d* estimatedStations, + matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices, bool skipNormals) { // Initialise measurement and test if no further calculations are // required (as in stage mode) @@ -5181,7 +5308,7 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_E(pit_vmsr_t _it_msr, UINT32& de // Now that the ellipsoid arc has been reduced to a chord, call UpdateDesignNormalMeasMatrices_CEM UpdateDesignNormalMeasMatrices_CEM(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); } void dna_adjust::UpdateDesignMeasMatrices_GX(pit_vmsr_t _it_msr, UINT32& design_row, @@ -5255,8 +5382,8 @@ void dna_adjust::UpdateDesignMeasMatrices_GX(pit_vmsr_t _it_msr, UINT32& design_ } void dna_adjust::UpdateDesignNormalMeasMatrices_G(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, - matrix_2d* measMinusComp, matrix_2d* estimatedStations, - matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices) + matrix_2d* measMinusComp, matrix_2d* estimatedStations, + matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices, bool skipNormals) { it_vmsr_t _it_msr_first(*_it_msr); UINT32 stn1(GetBlkMatrixElemStn1(block, _it_msr)); @@ -5278,7 +5405,7 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_G(pit_vmsr_t _it_msr, UINT32& de // For first time run or staged adjustment mode - if (buildnewMatrices || projectSettings_.a.stage) + if (buildnewMatrices || projectSettings_.a.stage || skipNormals) { // Load GPS Variance matrix. For the first run, scaling is applied // and the variances are written to the binary measurements list. @@ -5287,23 +5414,23 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_G(pit_vmsr_t _it_msr, UINT32& de if (buildnewMatrices && projectSettings_.a.stage) return; - + // Build At * V-1 AtVinv->replace(stn1, design_row_begin, var_cart * -1); AtVinv->replace(stn2, design_row_begin, var_cart); - if (buildnewMatrices && !projectSettings_.a.stage) + if (buildnewMatrices && !projectSettings_.a.stage && !skipNormals) // Add weighted measurement contributions to normal matrix UpdateNormals_G(stn1, stn2, design_row_begin, normals, AtVinv); } - + design_row++; } - + void dna_adjust::UpdateDesignNormalMeasMatrices_M(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, - matrix_2d* measMinusComp, matrix_2d* estimatedStations, - matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices) + matrix_2d* measMinusComp, matrix_2d* estimatedStations, + matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices, bool skipNormals) { // Initialise measurement and test if no further calculations are // required (as in stage mode) @@ -5328,22 +5455,22 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_M(pit_vmsr_t _it_msr, UINT32& de // Now that the MSL arc has been reduced to a chord, call UpdateDesignNormalMeasMatrices_CEM UpdateDesignNormalMeasMatrices_CEM(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); } // Like zenith distances and vertical angles, the relationship between slope distances and the // coordinates of p1 and p2 requires a little extra work to take into consideration instrument // and target height. For the measurements-minus-computed vector, the "computed" distance -// is the true distance between the instrument and target, and so must take into consideration +// is the true distance between the instrument and target, and so must take into consideration // instrument and target heights. However, the dX, dY, dZ components for the partial // derivatives represent the true geometric difference between the two stations (not // instrument and target). void dna_adjust::UpdateDesignNormalMeasMatrices_S(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, - matrix_2d* measMinusComp, matrix_2d* estimatedStations, - matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices) + matrix_2d* measMinusComp, matrix_2d* estimatedStations, + matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices, bool skipNormals) { // preAdjMeas is used to store original measured MSL arc distance - if (buildnewMatrices) + if (buildnewMatrices && !rebuildingDesign_) { // This is the first time adjust has run, so // back up the raw measurement @@ -5353,7 +5480,7 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_S(pit_vmsr_t _it_msr, UINT32& de return; } - UINT32 stn1(GetBlkMatrixElemStn1(block, _it_msr)); + UINT32 stn1(GetBlkMatrixElemStn1(block, _it_msr)); UINT32 stn2(GetBlkMatrixElemStn2(block, _it_msr)); it_vstn_t_const stn1_it(bstBinaryRecords_.begin() + (*_it_msr)->station1); @@ -5361,7 +5488,7 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_S(pit_vmsr_t _it_msr, UINT32& de // compute dX, dY, dZ for instrument height (ih) and target height (th) double dXih, dYih, dZih, dXth, dYth, dZth; CartesianElementsFromInstrumentHeight((*_it_msr)->term3, // instrument height - &dXih, &dYih, &dZih, + &dXih, &dYih, &dZih, stn1_it->currentLatitude, stn1_it->currentLongitude); CartesianElementsFromInstrumentHeight((*_it_msr)->term4, // target height @@ -5389,7 +5516,7 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_S(pit_vmsr_t _it_msr, UINT32& de // Update AtVinv based on new design matrix elements UpdateAtVinv(_it_msr, stn1, stn2, 0, design_row, design, AtVinv, buildnewMatrices); - if (buildnewMatrices) + if (buildnewMatrices && !skipNormals) // Add weighted measurement contributions to normal matrix UpdateNormals_BCEKLMSVZ(stn1, stn2, design_row, normals, design, AtVinv); else @@ -5406,10 +5533,10 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_S(pit_vmsr_t _it_msr, UINT32& de // partial derivatives represent the true geometric difference between the two stations (not // instrument and target). void dna_adjust::UpdateDesignNormalMeasMatrices_V(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, - matrix_2d* measMinusComp, matrix_2d* estimatedStations, - matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices) + matrix_2d* measMinusComp, matrix_2d* estimatedStations, + matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices, bool skipNormals) { - UINT32 stn1(GetBlkMatrixElemStn1(block, _it_msr)); + UINT32 stn1(GetBlkMatrixElemStn1(block, _it_msr)); UINT32 stn2(GetBlkMatrixElemStn2(block, _it_msr)); it_vstn_t_const stn1_it(bstBinaryRecords_.begin() + (*_it_msr)->station1); @@ -5417,11 +5544,11 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_V(pit_vmsr_t _it_msr, UINT32& de double local_12e, local_12n, local_12up; - // Initialise measurement. No need to test if no further calculations are + // Initialise measurement. No need to test if no further calculations are // required (as in stage mode), as this is done later (below) InitialiseMeasurement(_it_msr, buildnewMatrices); - if (buildnewMatrices) + if (buildnewMatrices && !rebuildingDesign_) { // deflections available? if (fabs(stn1_it->verticalDef) > E4_SEC_DEFLECTION || fabs(stn1_it->meridianDef) > E4_SEC_DEFLECTION) @@ -5497,7 +5624,7 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_V(pit_vmsr_t _it_msr, UINT32& de // Update AtVinv based on new design matrix elements UpdateAtVinv(_it_msr, stn1, stn2, 0, design_row, design, AtVinv, buildnewMatrices); - if (buildnewMatrices) + if (buildnewMatrices && !skipNormals) // Add weighted measurement contributions to normal matrix UpdateNormals_BCEKLMSVZ(stn1, stn2, design_row, normals, design, AtVinv); else @@ -5510,15 +5637,15 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_V(pit_vmsr_t _it_msr, UINT32& de // Like zenith distances, the relationship between slope distances and the // coordinates of p1 and p2 requires a little extra work to take into consideration instrument // and target height. For the measurements-minus-computed vector, the "computed" distance -// is the true distance between the instrument and target, and so must take into consideration -// instrument and target heights. However, the dX, dY, dZ components for the partial -// derivatives represent the true geometric difference between the two stations (not +// is the true distance between the instrument and target, and so must take into consideration +// instrument and target heights. However, the dX, dY, dZ components for the partial +// derivatives represent the true geometric difference between the two stations (not // instrument and target). void dna_adjust::UpdateDesignNormalMeasMatrices_Z(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, - matrix_2d* measMinusComp, matrix_2d* estimatedStations, - matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices) + matrix_2d* measMinusComp, matrix_2d* estimatedStations, + matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices, bool skipNormals) { - UINT32 stn1(GetBlkMatrixElemStn1(block, _it_msr)); + UINT32 stn1(GetBlkMatrixElemStn1(block, _it_msr)); UINT32 stn2(GetBlkMatrixElemStn2(block, _it_msr)); it_vstn_t_const stn1_it(bstBinaryRecords_.begin() + (*_it_msr)->station1); @@ -5526,11 +5653,11 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_Z(pit_vmsr_t _it_msr, UINT32& de double local_12e, local_12n, local_12up; - // Initialise measurement. No need to test if no further calculations are + // Initialise measurement. No need to test if no further calculations are // required (as in stage mode), as this is done later (below) InitialiseMeasurement(_it_msr, buildnewMatrices); - if (buildnewMatrices) + if (buildnewMatrices && !rebuildingDesign_) { // deflections available? if (fabs(stn1_it->verticalDef) > E4_SEC_DEFLECTION || fabs(stn1_it->meridianDef) > E4_SEC_DEFLECTION) @@ -5606,21 +5733,21 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_Z(pit_vmsr_t _it_msr, UINT32& de // Update AtVinv based on new design matrix elements UpdateAtVinv(_it_msr, stn1, stn2, 0, design_row, design, AtVinv, buildnewMatrices); - if (buildnewMatrices) + if (buildnewMatrices && !skipNormals) // Add weighted measurement contributions to normal matrix UpdateNormals_BCEKLMSVZ(stn1, stn2, design_row, normals, design, AtVinv); else design_row++; } - + // Orthometric height difference. -// This function requires geoid-ellipsoid separation in order to reduce -// to ellipsoidal height difference counterpart. +// This function requires geoid-ellipsoid separation in order to reduce +// to ellipsoidal height difference counterpart. // Hence, run geoid with -f, -s and -n options void dna_adjust::UpdateDesignNormalMeasMatrices_L(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, - matrix_2d* measMinusComp, matrix_2d* estimatedStations, - matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices) + matrix_2d* measMinusComp, matrix_2d* estimatedStations, + matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices, bool skipNormals) { UINT32 stn1(GetBlkMatrixElemStn1(block, _it_msr)); UINT32 stn2(GetBlkMatrixElemStn2(block, _it_msr)); @@ -5645,11 +5772,11 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_L(pit_vmsr_t _it_msr, UINT32& de &h1, &h2, &nu1, &nu2, &Zn1, &Zn2, datum_.GetEllipsoidRef())); - // Initialise measurement. No need to test if no further calculations are + // Initialise measurement. No need to test if no further calculations are // required (as in stage mode), as this is done later (below) InitialiseMeasurement(_it_msr, buildnewMatrices); - if (buildnewMatrices) + if (buildnewMatrices && !rebuildingDesign_) { // N value available? if (fabs(stn1_it->geoidSep) > PRECISION_1E4 || @@ -5680,7 +5807,7 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_L(pit_vmsr_t _it_msr, UINT32& de // Update AtVinv based on new design matrix elements UpdateAtVinv(_it_msr, stn1, stn2, 0, design_row, design, AtVinv, buildnewMatrices); - if (buildnewMatrices) + if (buildnewMatrices && !skipNormals) // Add weighted measurement contributions to normal matrix UpdateNormals_BCEKLMSVZ(stn1, stn2, design_row, normals, design, AtVinv); else @@ -5688,17 +5815,17 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_L(pit_vmsr_t _it_msr, UINT32& de } void dna_adjust::UpdateDesignNormalMeasMatrices_I(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, - matrix_2d* measMinusComp, matrix_2d* estimatedStations, - matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices) + matrix_2d* measMinusComp, matrix_2d* estimatedStations, + matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices, bool skipNormals) { - // Initialise measurement. No need to test if no further calculations are + // Initialise measurement. No need to test if no further calculations are // required (as in stage mode), as this is done later (below) InitialiseMeasurement(_it_msr, buildnewMatrices); - if (buildnewMatrices) + if (buildnewMatrices && !rebuildingDesign_) { it_vstn_t_const stn1_it(bstBinaryRecords_.begin() + (*_it_msr)->station1); - + // deflections available? if (fabs(stn1_it->meridianDef) > E4_SEC_DEFLECTION) { @@ -5713,22 +5840,22 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_I(pit_vmsr_t _it_msr, UINT32& de } UpdateDesignNormalMeasMatrices_IP(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); } void dna_adjust::UpdateDesignNormalMeasMatrices_J(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, - matrix_2d* measMinusComp, matrix_2d* estimatedStations, - matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices) + matrix_2d* measMinusComp, matrix_2d* estimatedStations, + matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices, bool skipNormals) { - // Initialise measurement. No need to test if no further calculations are + // Initialise measurement. No need to test if no further calculations are // required (as in stage mode), as this is done later (below) InitialiseMeasurement(_it_msr, buildnewMatrices); - if (buildnewMatrices) + if (buildnewMatrices && !rebuildingDesign_) { it_vstn_t_const stn1_it(bstBinaryRecords_.begin() + (*_it_msr)->station1); - + // deflections available? if (fabs(stn1_it->verticalDef) > E4_SEC_DEFLECTION) { @@ -5744,27 +5871,27 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_J(pit_vmsr_t _it_msr, UINT32& de } UpdateDesignNormalMeasMatrices_JQ(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); } void dna_adjust::UpdateDesignNormalMeasMatrices_P(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, - matrix_2d* measMinusComp, matrix_2d* estimatedStations, - matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices) + matrix_2d* measMinusComp, matrix_2d* estimatedStations, + matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices, bool skipNormals) { - // Initialise measurement and test if no further calculations are + // Initialise measurement and test if no further calculations are // required (as in stage mode) if (InitialiseMeasurement(_it_msr, buildnewMatrices)) return; UpdateDesignNormalMeasMatrices_IP(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); } - + void dna_adjust::UpdateDesignNormalMeasMatrices_IP(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, - matrix_2d* measMinusComp, matrix_2d* estimatedStations, - matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices) + matrix_2d* measMinusComp, matrix_2d* estimatedStations, + matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices, bool skipNormals) { UINT32 stn1(GetBlkMatrixElemStn1(block, _it_msr)); @@ -5810,31 +5937,31 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_IP(pit_vmsr_t _it_msr, UINT32& d // Update AtVinv based on new design matrix elements UpdateAtVinv(_it_msr, stn1, 0, 0, design_row, design, AtVinv, buildnewMatrices); - if (buildnewMatrices) + if (buildnewMatrices && !skipNormals) // Add weighted measurement contributions to normal matrix UpdateNormals_HIJPQR(stn1, design_row, normals, design, AtVinv); else design_row++; } - + void dna_adjust::UpdateDesignNormalMeasMatrices_Q(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, - matrix_2d* measMinusComp, matrix_2d* estimatedStations, - matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices) + matrix_2d* measMinusComp, matrix_2d* estimatedStations, + matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices, bool skipNormals) { - // Initialise measurement and test if no further calculations are + // Initialise measurement and test if no further calculations are // required (as in stage mode) if (InitialiseMeasurement(_it_msr, buildnewMatrices)) return; UpdateDesignNormalMeasMatrices_JQ(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); } - + void dna_adjust::UpdateDesignNormalMeasMatrices_JQ(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, - matrix_2d* measMinusComp, matrix_2d* estimatedStations, - matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices) + matrix_2d* measMinusComp, matrix_2d* estimatedStations, + matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices, bool skipNormals) { UINT32 stn1(GetBlkMatrixElemStn1(block, _it_msr)); @@ -5862,26 +5989,26 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_JQ(pit_vmsr_t _it_msr, UINT32& d // Update AtVinv based on new design matrix elements UpdateAtVinv(_it_msr, stn1, 0, 0, design_row, design, AtVinv, buildnewMatrices); - if (buildnewMatrices) + if (buildnewMatrices && !skipNormals) // Add weighted measurement contributions to normal matrix UpdateNormals_HIJPQR(stn1, design_row, normals, design, AtVinv); else design_row++; } - + void dna_adjust::UpdateDesignNormalMeasMatrices_H(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, - matrix_2d* measMinusComp, matrix_2d* estimatedStations, - matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices) + matrix_2d* measMinusComp, matrix_2d* estimatedStations, + matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices, bool skipNormals) { - // Initialise measurement. No need to test if no further calculations are + // Initialise measurement. No need to test if no further calculations are // required (as in stage mode), as this is done later (below) InitialiseMeasurement(_it_msr, buildnewMatrices); - if (buildnewMatrices) + if (buildnewMatrices && !rebuildingDesign_) { it_vstn_t_const stn1_it(bstBinaryRecords_.begin() + (*_it_msr)->station1); - + // N value available? if (fabs(stn1_it->geoidSep) > PRECISION_1E4) { @@ -5895,13 +6022,13 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_H(pit_vmsr_t _it_msr, UINT32& de } UpdateDesignNormalMeasMatrices_HR(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); } void dna_adjust::UpdateDesignNormalMeasMatrices_HR(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, - matrix_2d* measMinusComp, matrix_2d* estimatedStations, - matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices) + matrix_2d* measMinusComp, matrix_2d* estimatedStations, + matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices, bool skipNormals) { UINT32 stn1(GetBlkMatrixElemStn1(block, _it_msr)); @@ -5935,31 +6062,31 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_HR(pit_vmsr_t _it_msr, UINT32& d // Update AtVinv based on new design matrix elements UpdateAtVinv(_it_msr, stn1, 0, 0, design_row, design, AtVinv, buildnewMatrices); - if (buildnewMatrices) + if (buildnewMatrices && !skipNormals) // Add weighted measurement contributions to normal matrix UpdateNormals_HIJPQR(stn1, design_row, normals, design, AtVinv); else design_row++; } - + void dna_adjust::UpdateDesignNormalMeasMatrices_R(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, - matrix_2d* measMinusComp, matrix_2d* estimatedStations, - matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices) + matrix_2d* measMinusComp, matrix_2d* estimatedStations, + matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices, bool skipNormals) { - // Initialise measurement and test if no further calculations are + // Initialise measurement and test if no further calculations are // required (as in stage mode) if (InitialiseMeasurement(_it_msr, buildnewMatrices)) return; UpdateDesignNormalMeasMatrices_HR(_it_msr, design_row, block, - measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices); + measMinusComp, estimatedStations, normals, design, AtVinv, buildnewMatrices, skipNormals); } - + void dna_adjust::UpdateDesignNormalMeasMatrices_X(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, - matrix_2d* measMinusComp, matrix_2d* estimatedStations, - matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices) + matrix_2d* measMinusComp, matrix_2d* estimatedStations, + matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices, bool skipNormals) { it_vmsr_t _it_msr_first(*_it_msr); @@ -5996,7 +6123,7 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_X(pit_vmsr_t _it_msr, UINT32& de matrix_2d var_cart(baseline_count * 3, baseline_count * 3); - if (buildnewMatrices || projectSettings_.a.stage) + if (buildnewMatrices || projectSettings_.a.stage || skipNormals) { // Load apriori variance matrix, and assign to binary measurement // If required, apply scalars @@ -6006,10 +6133,10 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_X(pit_vmsr_t _it_msr, UINT32& de return; } - // If this method is called via PrepareAdjustment() and the adjustment + // If this method is called via PrepareAdjustment() and the adjustment // mode is staged, then don't update the AtVinv matrix. This will be // done during an adjustment via AdjustPhasedForward(). - if (!buildnewMatrices && !projectSettings_.a.stage) + if (!buildnewMatrices && !projectSettings_.a.stage && !skipNormals) return; UINT32 cluster_cov; @@ -6068,11 +6195,11 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_X(pit_vmsr_t _it_msr, UINT32& de _it_msr_temp += 3; } - if (projectSettings_.a.stage || !buildnewMatrices) + if (projectSettings_.a.stage || !buildnewMatrices || skipNormals) return; - + _it_msr_temp = _it_msr_first; - + matrix_2d tmp0(3, 3); // Build At * V-1 * A variances @@ -6151,8 +6278,8 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_X(pit_vmsr_t _it_msr, UINT32& de void dna_adjust::UpdateDesignNormalMeasMatrices_Y(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, - matrix_2d* measMinusComp, matrix_2d* estimatedStations, - matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices) + matrix_2d* measMinusComp, matrix_2d* estimatedStations, + matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices, bool skipNormals) { it_vmsr_t _it_msr_first(*_it_msr); it_vmsr_t tmp_msr; @@ -6365,23 +6492,23 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_Y(pit_vmsr_t _it_msr, UINT32& de matrix_2d var_cart(point_count * 3, point_count * 3); - if (buildnewMatrices || projectSettings_.a.stage) + if (buildnewMatrices || projectSettings_.a.stage || skipNormals) { // Load apriori variance matrix, and assign to binary measurement - // If required, propagate to cartesian reference frame and apply + // If required, propagate to cartesian reference frame and apply // scalars LoadVarianceMatrix_Y(_it_msr_first, &var_cart, coordType); - + // If preparing for a stage adjustment, return // Normals will be built for each block as needed if (buildnewMatrices && projectSettings_.a.stage) return; } - - // If this method is called via PrepareAdjustment() and the adjustment + + // If this method is called via PrepareAdjustment() and the adjustment // mode is staged, then don't update the AtVinv matrix. This will be // done during an adjustment via AdjustPhasedForward(). - if (!buildnewMatrices && !projectSettings_.a.stage) + if (!buildnewMatrices && !projectSettings_.a.stage && !skipNormals) return; UINT32 cluster_cov; @@ -6424,9 +6551,9 @@ void dna_adjust::UpdateDesignNormalMeasMatrices_Y(pit_vmsr_t _it_msr, UINT32& de design_row_begin += 3; } - if (projectSettings_.a.stage || !buildnewMatrices) + if (projectSettings_.a.stage || !buildnewMatrices || skipNormals) return; - + _it_msr_temp = _it_msr_first; // Add to At * V-1 * A @@ -6489,6 +6616,13 @@ void dna_adjust::SolveTry(bool COMPUTE_INVERSE, const UINT32& block) void dna_adjust::Solve(bool COMPUTE_INVERSE, const UINT32& block) { + assert(v_design_.at(block).rows() > 0 && "Solve: design has 0 rows"); + assert(v_design_.at(block).columns() > 0 && "Solve: design has 0 columns"); + assert(v_AtVinv_.at(block).rows() > 0 && "Solve: AtVinv has 0 rows"); + assert(v_AtVinv_.at(block).columns() > 0 && "Solve: AtVinv has 0 columns"); + assert(v_normals_.at(block).rows() > 0 && "Solve: normals has 0 rows"); + assert(v_measMinusComp_.at(block).rows() > 0 && "Solve: measMinusComp has 0 rows"); + // debug matrices if required debug_SolutionInformation(block); @@ -6508,33 +6642,24 @@ void dna_adjust::Solve(bool COMPUTE_INVERSE, const UINT32& block) // the normal matrix before inversion and subsequently reversing the effect. // - // 1. Create scalar matrix - matrix_2d *S = nullptr, *SN = nullptr; + // 1. Scale normals to unity using diagonal scaling + UINT32 n = v_normals_.at(block).rows(); + std::vector s_diag; if (projectSettings_.a.scale_normals_to_unity) { - S = new matrix_2d(v_normals_.at(block).rows(), v_normals_.at(block).rows()); - SN = new matrix_2d(v_normals_.at(block).rows(), v_normals_.at(block).rows()); - for (UINT32 i(0); iput(i, i, sqrt(v_normals_.at(block).get(i, i))); - // 2. Scale Normals to reduce the diagonal elements of Normals to unity - //SN->multiply(*S, v_normals_.at(block)); - SN->multiply(*S, "N", v_normals_.at(block), "N"); - - //v_normals_.at(block).multiply(*SN, *S); - v_normals_.at(block).multiply(*SN, "N", *S, "N"); + s_diag.resize(n); + for (UINT32 i(0); imultiply(*S, v_normals_.at(block)); - SN->multiply(*S, "N", v_normals_.at(block), "N"); - - //v_normals_.at(block).multiply(*SN, *S); - v_normals_.at(block).multiply(*SN, "N", *S, "N"); - - delete S; - delete SN; + v_normals_.at(block).scale_symmetric_diagonal(s_diag.data()); } ////////////////// } @@ -6574,7 +6692,10 @@ void dna_adjust::Solve(bool COMPUTE_INVERSE, const UINT32& block) // Solve corrections from normal equations v_corrections_.at(block).redim(v_design_.at(block).columns(), 1); - v_corrections_.at(block).multiply(v_normals_.at(block), "N", At_Vinv_m, "N"); + if (v_normals_.at(block).is_symmetric()) + v_corrections_.at(block).multiply_sym(v_normals_.at(block), At_Vinv_m); + else + v_corrections_.at(block).multiply(v_normals_.at(block), "N", At_Vinv_m, "N"); if (projectSettings_.g.verbose > 0) { @@ -6627,60 +6748,663 @@ void dna_adjust::Solve(bool COMPUTE_INVERSE, const UINT32& block) } -void dna_adjust::DeSerialiseAdjustedVarianceMatrices() +// ============================================================================ +// Levenberg-Marquardt / Trust-Region methods +// ============================================================================ + +void dna_adjust::InitialiseLMState() { - // No need to facilitate serialising if network adjustment is in stage, - // as this will already be taken care of - if (projectSettings_.a.stage) - return; + lm_lambda_ = projectSettings_.a.lm_lambda_init; + chiSquaredPrev_ = 0.0; - // Create file streams and memory map regions - OpenStageFileStreams(2, sf_rigorous_vars, sf_prec_adj_msrs); - ReserveBlockMapRegions(2, sf_rigorous_vars, sf_prec_adj_msrs); + // Resize backup/gradient/station-backup vectors to match block count + v_normalsBackup_.resize(blockCount_); + v_gradient_.resize(blockCount_); + v_estimatedStationsBackup_.resize(blockCount_); +} - for (UINT32 block=0; block 0 && "SolveLM: design has 0 rows"); + assert(v_design_.at(block).columns() > 0 && "SolveLM: design has 0 columns"); + assert(v_AtVinv_.at(block).rows() > 0 && "SolveLM: AtVinv has 0 rows"); + assert(v_AtVinv_.at(block).columns() > 0 && "SolveLM: AtVinv has 0 columns"); + assert(v_normals_.at(block).rows() > 0 && "SolveLM: normals has 0 rows"); + assert(v_measMinusComp_.at(block).rows() > 0 && "SolveLM: measMinusComp has 0 rows"); + + debug_SolutionInformation(block); + + // 1. Compute gradient: g = At * V^-1 * (meas - comp) + UINT32 n = v_normals_.at(block).rows(); + v_gradient_.at(block).redim(n, 1); + v_gradient_.at(block).multiply(v_AtVinv_.at(block), "N", v_measMinusComp_.at(block), "N"); + + // Diagonal scaling factors (function-scope so both factor and solve steps can use them) + std::vector s_diag; + bool useScaling = projectSettings_.a.scale_normals_to_unity; + + if (COMPUTE_FACTOR) { - // Set Region offsets - SetRegionOffsets(block, 2, sf_rigorous_vars, sf_prec_adj_msrs); + // 2a. Backup pristine normals (before augmentation) + v_normalsBackup_.at(block) = v_normals_.at(block); - // Prepare mapping regions - PrepareMemoryMapRegions(block, 2, sf_rigorous_vars, sf_prec_adj_msrs); - } + // 2b. Apply optional diagonal scaling + if (useScaling) + { + s_diag.resize(n); + for (UINT32 i(0); i < n; ++i) + s_diag[i] = 1.0 / sqrt(v_normals_.at(block).get(i, i)); + v_normals_.at(block).scale_symmetric_diagonal(s_diag.data()); + } + + // 2c. Augment diagonal with identity damping: N_ii += lambda + // Identity damping (N + lambda*I) is used instead of Marquardt scaling + // (N + lambda*diag(N)) because the latter amplifies damping by the diagonal + // magnitude, which can be excessively aggressive for GPS networks where + // diagonal elements (~1e6) dwarf the smallest eigenvalues (~1e3). + // When scaling is active, we add lambda/N_ii to the scaled diagonal so + // that the effective damping in the original (unscaled) space is lambda*I. + for (UINT32 i(0); i < n; ++i) + { + double diag_val = useScaling ? (1.0 / v_normalsBackup_.at(block).get(i, i)) : 1.0; + v_normals_.at(block).elementadd(i, i, lm_lambda_ * diag_val); + } - - try { - // Set memory mapped file regions - // NOTE - previously created files must exist and match the - // dimensions of the current matrix sizes - SetMapRegions(2, sf_rigorous_vars, sf_prec_adj_msrs); - - } - catch (boost::interprocess::interprocess_exception& e){ - std::stringstream ss; - ss << "DeSerialiseAdjustedVarianceMatrices() terminated while creating memory map" << std::endl; - ss << " regions from mtx file. Details:\n " << e.what() << std::endl; - adj_file << std::endl << "- Error: " << ss.str() << std::endl; - SignalExceptionAdjustment(ss.str(), 0); + // 2d. Cholesky factor (dpotrf only, no inverse) + // No clearupper() needed — dpotrf(uplo='L') reads only the lower triangle, + // and packed storage has no upper triangle to clear. + v_normals_.at(block).cholesky_factor(false); + + // 2f. Scale gradient into scaled space for solve: g_scaled = S * g + if (useScaling) + { + for (UINT32 i(0); i < n; ++i) + *v_gradient_.at(block).getbuffer(i, 0) *= s_diag[i]; + } } - // Now load up matrix from .mtx file - for (UINT32 block=0; block g = g_scaled / S = g_scaled * sqrt(N_ii) + for (UINT32 i(0); i < n; ++i) + *v_gradient_.at(block).getbuffer(i, 0) /= s_diag[i]; } - // Close rigorous variances stream - f_rigorousVariances_.close(); - f_precAdjMsrs_.close(); -} - + // At this point, v_gradient_ and v_corrections_ are both in original (unscaled) space, + // so ComputePredictedReduction() will compute g^T * delta + lambda * delta^T * D * delta + // consistently. -void dna_adjust::SerialiseAdjustedVarianceMatrices() -{ - // No need to facilitate serialising if network adjustment is in stage, - // as this will already be taken care of + // 5. Compute inverse of undamped normals (for phased junction carry-forward). + // In phased mode, after solving with LM damping the code needs N^-1 in + // v_normals_ for the junction station variance carry-forward. Restore + // the pristine (undamped) normals from backup, then invert. + if (COMPUTE_INVERSE) + { + v_normals_.at(block) = v_normalsBackup_.at(block); + if (useScaling) + { + // Recompute scaling from restored normals + s_diag.resize(n); + for (UINT32 i(0); i < n; ++i) + s_diag[i] = 1.0 / sqrt(v_normals_.at(block).get(i, i)); + v_normals_.at(block).scale_symmetric_diagonal(s_diag.data()); + } + + FormInverseVarianceMatrix(&v_normals_.at(block), false, false); + + if (useScaling) + v_normals_.at(block).scale_symmetric_diagonal(s_diag.data()); + } + + if (projectSettings_.g.verbose > 0) + { + if (projectSettings_.a.multi_thread) + dbg_file_mutex.lock(); + + debug_file << "Block " << block + 1 << " (LM, lambda=" << lm_lambda_ << ")" << std::endl; + debug_file << "Corrections " << std::fixed << std::setprecision(16) << v_corrections_.at(block) << std::endl; + debug_file.flush(); + + if (projectSettings_.a.multi_thread) + dbg_file_mutex.unlock(); + } +} + +void dna_adjust::SolveLMTry(bool COMPUTE_FACTOR, const UINT32& block, bool COMPUTE_INVERSE) +{ + try { + SolveLM(COMPUTE_FACTOR, block, COMPUTE_INVERSE); + } + catch (const std::runtime_error& e) { + debug_SolutionInformation(block); + SignalExceptionAdjustment(e.what(), block); + } +} + +double dna_adjust::ComputePredictedReduction(const UINT32& block) +{ + // Predicted reduction = g^T * delta + lambda * delta^T * delta + // Uses identity damping (D = I), consistent with the augmentation + // in SolveLM which adds lambda*I to the normals. + double gTd = v_gradient_.at(block).dot(v_corrections_.at(block)); + double dTd = 0.0; + UINT32 n = v_corrections_.at(block).rows(); + for (UINT32 i = 0; i < n; ++i) + { + double d_i = v_corrections_.at(block).get(i, 0); + dTd += d_i * d_i; + } + return gTd + lm_lambda_ * dTd; +} + +double dna_adjust::ComputeNetworkPredictedReduction() +{ + double total = 0.0; + for (UINT32 block = 0; block < blockCount_; ++block) + total += ComputePredictedReduction(block); + return total; +} + + +// ----------------------------------------------------------------------- +// Anderson acceleration for phased Gauss-Newton +// ----------------------------------------------------------------------- + +void dna_adjust::InitialiseAndersonAcceleration() +{ + UINT32 m = projectSettings_.a.aa_depth; + UINT32 n = static_cast(bstBinaryRecords_.size()) * 3; + + aa_G_hist_.resize(m); + aa_F_hist_.resize(m); + for (UINT32 i = 0; i < m; ++i) + { + aa_G_hist_[i].resize(n, 0.0); + aa_F_hist_[i].resize(n, 0.0); + } + aa_x_snapshot_.resize(n, 0.0); + aa_hist_count_ = 0; + aa_hist_pos_ = 0; +} + +void dna_adjust::ExtractStationCoordinates(std::vector& coords) +{ + UINT32 totalStations = static_cast(bstBinaryRecords_.size()); + coords.resize(totalStations * 3); + + // Use a visited flag to avoid double-writing junction stations + std::vector visited(totalStations, false); + + for (UINT32 block = 0; block < blockCount_; ++block) + { + const auto& stnList = v_parameterStationList_.at(block); + const auto& estMat = v_estimatedStations_.at(block); + + for (UINT32 s = 0; s < stnList.size(); ++s) + { + UINT32 bstIdx = stnList[s]; + if (visited[bstIdx]) + continue; + visited[bstIdx] = true; + + UINT32 row = s * 3; + UINT32 ci = bstIdx * 3; + coords[ci] = estMat.get(row, 0); + coords[ci + 1] = estMat.get(row + 1, 0); + coords[ci + 2] = estMat.get(row + 2, 0); + } + } +} + +void dna_adjust::ScatterStationCoordinates(const std::vector& coords) +{ + for (UINT32 block = 0; block < blockCount_; ++block) + { + const auto& stnList = v_parameterStationList_.at(block); + auto& estMat = v_estimatedStations_.at(block); + + for (UINT32 s = 0; s < stnList.size(); ++s) + { + UINT32 bstIdx = stnList[s]; + UINT32 row = s * 3; + UINT32 ci = bstIdx * 3; + estMat.put(row, 0, coords[ci]); + estMat.put(row + 1, 0, coords[ci + 1]); + estMat.put(row + 2, 0, coords[ci + 2]); + } + } +} + +void dna_adjust::ApplyAndersonAcceleration(UINT32 iteration) +{ + UINT32 m = projectSettings_.a.aa_depth; + UINT32 n = static_cast(aa_x_snapshot_.size()); + + // Extract g_k = G(x_k) — post-iteration coordinates + std::vector g_k(n); + ExtractStationCoordinates(g_k); + + // Compute f_k = g_k - x_k (the fixed-point residual) + std::vector f_k(n); + for (UINT32 i = 0; i < n; ++i) + f_k[i] = g_k[i] - aa_x_snapshot_[i]; + + // Store in ring buffer + aa_G_hist_[aa_hist_pos_] = g_k; + aa_F_hist_[aa_hist_pos_] = f_k; + aa_hist_pos_ = (aa_hist_pos_ + 1) % m; + if (aa_hist_count_ < m) + aa_hist_count_++; + + // Need at least 2 history entries to mix + if (aa_hist_count_ < 2) + return; + + // m_k = number of difference columns + UINT32 m_k = aa_hist_count_ - 1; + + // Build ΔF columns: df[j] = f_k - f_{k-j-1} + // The most recent entry is at (aa_hist_pos_ - 1) % m + // The entry before that is at (aa_hist_pos_ - 2) % m, etc. + UINT32 curr = (aa_hist_pos_ + m - 1) % m; + + // Compute Gram matrix (ΔF^T ΔF) and RHS (ΔF^T f_k) — accumulate over stations + std::vector gram(m_k * m_k, 0.0); + std::vector rhs(m_k, 0.0); + + for (UINT32 j = 0; j < m_k; ++j) + { + UINT32 prev_j = (curr + m - j - 1) % m; + for (UINT32 k = j; k < m_k; ++k) + { + UINT32 prev_k = (curr + m - k - 1) % m; + double dot = 0.0; + for (UINT32 i = 0; i < n; ++i) + { + double df_j = f_k[i] - aa_F_hist_[prev_j][i]; + double df_k = f_k[i] - aa_F_hist_[prev_k][i]; + dot += df_j * df_k; + } + gram[j * m_k + k] = dot; + gram[k * m_k + j] = dot; // symmetric + } + // RHS: ΔF^T f_k + double dot = 0.0; + for (UINT32 i = 0; i < n; ++i) + { + double df_j = f_k[i] - aa_F_hist_[prev_j][i]; + dot += df_j * f_k[i]; + } + rhs[j] = dot; + } + + // Solve Gram * gamma = rhs via Cholesky (in-place) + // If ill-conditioned, skip acceleration + std::vector gamma(rhs); + for (UINT32 j = 0; j < m_k; ++j) + { + double sum = gram[j * m_k + j]; + for (UINT32 k = 0; k < j; ++k) + sum -= gram[j * m_k + k] * gram[j * m_k + k]; + if (sum <= 1e-30) + return; // ill-conditioned, skip acceleration + gram[j * m_k + j] = std::sqrt(sum); + for (UINT32 i = j + 1; i < m_k; ++i) + { + sum = gram[i * m_k + j]; + for (UINT32 k = 0; k < j; ++k) + sum -= gram[i * m_k + k] * gram[j * m_k + k]; + gram[i * m_k + j] = sum / gram[j * m_k + j]; + } + } + // Forward substitution: L y = rhs + for (UINT32 j = 0; j < m_k; ++j) + { + for (UINT32 k = 0; k < j; ++k) + gamma[j] -= gram[j * m_k + k] * gamma[k]; + gamma[j] /= gram[j * m_k + j]; + } + // Back substitution: L^T gamma = y + for (int j = static_cast(m_k) - 1; j >= 0; --j) + { + for (UINT32 k = static_cast(j) + 1; k < m_k; ++k) + gamma[j] -= gram[k * m_k + j] * gamma[k]; + gamma[j] /= gram[j * m_k + j]; + } + + // Compute mixed iterate: x_{k+1} = g_k - ΔG * gamma + // where ΔG[j] = g_k - g_{k-j-1} + std::vector x_mixed(g_k); + for (UINT32 j = 0; j < m_k; ++j) + { + UINT32 prev_j = (curr + m - j - 1) % m; + for (UINT32 i = 0; i < n; ++i) + x_mixed[i] -= gamma[j] * (g_k[i] - aa_G_hist_[prev_j][i]); + } + + // Safety check: reject if mixed correction is larger than un-mixed + double norm_unmixed = 0.0, norm_mixed = 0.0; + for (UINT32 i = 0; i < n; ++i) + { + double d_unmixed = g_k[i] - aa_x_snapshot_[i]; + double d_mixed = x_mixed[i] - aa_x_snapshot_[i]; + norm_unmixed += d_unmixed * d_unmixed; + norm_mixed += d_mixed * d_mixed; + } + + if (norm_mixed > norm_unmixed) + return; // reject — keep un-mixed iterate + + // Accept: scatter mixed coordinates back + ScatterStationCoordinates(x_mixed); + + // Update maxCorr_ based on mixed correction + double maxCorrMixed = 0.0; + for (UINT32 i = 0; i < n; ++i) + { + double corr = std::fabs(x_mixed[i] - aa_x_snapshot_[i]); + if (corr > maxCorrMixed) + maxCorrMixed = corr; + } + maxCorr_ = (maxCorr_ > 0.0 ? maxCorrMixed : -maxCorrMixed); +} + +void dna_adjust::AdjustSimultaneousLM() +{ + adjustStatus_ = ADJUST_SUCCESS; + initialiseIteration(); + InitialiseLMState(); + + // Reset oscillation diagnostics + corrPrev_.clear(); + stnOscCount_.clear(); + oscHistory_.clear(); + + std::ostringstream ss; + std::string corr_msg; + cpu_timer it_time, tot_time; + + UINT32 iterCount = 0; + + while (iterCount < projectSettings_.a.max_iterations) + { + if (IsCancelled()) + break; + + isIterationComplete_ = false; + + blockLargeCorr_ = 0; + largestCorr_ = 0.0; + + if (projectSettings_.o._cmp_msr_iteration) + printer_->PrintCompMeasurements(0, "a-priori"); + + ss.str(""); + it_time.start(); + + // Solve with LM damping. + // Always recompute the Cholesky factor because the LM solver restores + // the pristine (unfactored) normals from backup after each step, + // and lambda may have changed between iterations. + SolveLMTry(true, 0); + + // Print iteration to adj file + printer_->PrintIteration(incrementIteration()); + PrintAdjustmentTime(it_time, iteration_time); + + maxCorr_ = v_corrections_.at(0).compute_maximum_value(); + OutputLargestCorrection(corr_msg); + UpdateIterationDiagnostics(); + + // Apply corrections + v_estimatedStations_.at(0).add(v_corrections_.at(0)); + + // Update geographic coordinates if needed + if (v_msrTally_.at(0).ContainsNonGPS()) + UpdateGeographicCoords(); + + if (projectSettings_.g.verbose > 0) + { + debug_file << "LM iteration " << CurrentIteration() + << ": lambda=" << lm_lambda_ + << ", maxCorr=" << maxCorr_ + << std::endl; + debug_file.flush(); + } + + // Reduce damping each iteration — the damping serves as regularisation + // for early iterations and decays to zero (pure GN) as convergence nears + lm_lambda_ /= projectSettings_.a.lm_gamma_down; + + iterationCorrections_.add_message(corr_msg); + iterationQueue_.push_and_notify(CurrentIteration()); + isIterationComplete_ = true; + + // Convergence check + bool iterate = !IsCancelled() && fabs(maxCorr_) > projectSettings_.a.iteration_threshold; + if (!iterate) + break; + + // Per-iteration optional output + if (projectSettings_.o._adj_stat_iteration) + { + ComputeStatisticsOnIteration(); + printer_->PrintStatistics(false); + } + if (projectSettings_.o._adj_msr_iteration) + ComputeandPrintAdjMsrOnIteration(); + if (projectSettings_.o._adj_stn_iteration) + printer_->PrintBlockStations(adj_file, 0, &v_estimatedStations_.at(0), &v_normals_.at(0), + false, !v_msrTally_.at(0).ContainsNonGPS(), !v_msrTally_.at(0).ContainsNonGPS(), true, false); + + iterCount++; + + // Rebuild normals for next iteration + bool lastIteration = (iterCount >= projectSettings_.a.max_iterations); + + // Restore normals from backup before UpdateAdjustment rebuilds + v_normals_.at(0) = v_normalsBackup_.at(0); + UpdateAdjustment(!lastIteration); + } + + // Final: restore undamped N and compute full inverse for statistics + v_normals_.at(0) = v_normalsBackup_.at(0); + + // Apply same scaling as original Solve() if needed + UINT32 n = v_normals_.at(0).rows(); + std::vector s_diag; + if (projectSettings_.a.scale_normals_to_unity) + { + s_diag.resize(n); + for (UINT32 i(0); i < n; ++i) + s_diag[i] = 1.0 / sqrt(v_normals_.at(0).get(i, i)); + v_normals_.at(0).scale_symmetric_diagonal(s_diag.data()); + } + + // No clearupper() — FormInverseVarianceMatrix/cholesky_inverse handles + // both packed and full matrices directly (dpotrf(L) reads only the lower triangle). + FormInverseVarianceMatrix(&v_normals_.at(0), false, false); + + if (projectSettings_.a.scale_normals_to_unity) + v_normals_.at(0).scale_symmetric_diagonal(s_diag.data()); + + ValidateandFinaliseAdjustment(tot_time); +} + + +void dna_adjust::AdjustPhasedLM() +{ + initialiseIteration(); + InitialiseLMState(); + + std::string corr_msg; + bool iterate(true); + + cpu_timer it_time, tot_time; + + UINT32 iterCount = 0; + + while (iterCount < projectSettings_.a.max_iterations) + { + if (IsCancelled()) + break; + + isIterationComplete_ = false; + + blockLargeCorr_ = 0; + largestCorr_ = 0.0; + maxCorr_ = 0.0; + + potentialOutlierCount_ = 0; + chiSquaredStage_ = 0.; + measurementParams_ = 0; + + // Print the iteration # to adj file + printer_->PrintIteration(incrementIteration()); + + it_time.start(); + + AdjustPhasedForward(); + if (IsCancelled()) + break; + + AdjustPhasedReverseCombine(); + if (IsCancelled()) + break; + + // Calculate and print total time + PrintAdjustmentTime(it_time, iteration_time); + + // Calculate and print largest adjustment correction and station ID + OutputLargestCorrection(corr_msg); + UpdateIterationDiagnostics(); + + if (projectSettings_.g.verbose > 0) + { + debug_file << "LM phased iteration " << CurrentIteration() + << ": lambda=" << lm_lambda_ + << ", maxCorr=" << maxCorr_ + << std::endl; + debug_file.flush(); + } + + // Reduce damping each iteration — the damping serves as regularisation + // for early iterations and decays to zero (pure GN) as convergence nears + lm_lambda_ /= projectSettings_.a.lm_gamma_down; + + iterationCorrections_.add_message(corr_msg); + iterationQueue_.push_and_notify(CurrentIteration()); + isIterationComplete_ = true; + + // Continue iterating? + iterate = !IsCancelled() && fabs(maxCorr_) > projectSettings_.a.iteration_threshold; + if (!iterate) + break; + + // Update normals and measured-computed matrices for the next iteration + UpdateAdjustment(iterate); + if (IsCancelled()) + break; + + if (projectSettings_.o._adj_stat_iteration) + { + ComputeStatisticsOnIteration(); + printer_->PrintStatistics(false); + } + if (projectSettings_.o._adj_msr_iteration) + ComputeandPrintAdjMsrOnIteration(); + + iterCount++; + } + + chiSquared_ = chiSquaredStage_; + + ValidateandFinaliseAdjustment(tot_time); +} + + +void dna_adjust::AdjustPhasedMultiThreadLM() +{ + // The LM phased solver currently runs single-threaded. Temporarily clear + // multi_thread so that UpdateEstimatesFinal, UpdateAdjustment, and other + // helpers use the regular (non-MT) matrices consistently. Without this, + // grow/shrink operations during junction carry-forward target different + // matrix sets, leaving stale pseudo-measurements in the AtVinv and + // measMinusComp matrices after the reverse sweep. + bool savedMultiThread = projectSettings_.a.multi_thread; + projectSettings_.a.multi_thread = false; + AdjustPhasedLM(); + projectSettings_.a.multi_thread = savedMultiThread; +} + + +void dna_adjust::DeSerialiseAdjustedVarianceMatrices() +{ + // No need to facilitate serialising if network adjustment is in stage, + // as this will already be taken care of + if (projectSettings_.a.stage) + return; + + // Create file streams and memory map regions + OpenStageFileStreams(2, sf_rigorous_vars, sf_prec_adj_msrs); + ReserveBlockMapRegions(2, sf_rigorous_vars, sf_prec_adj_msrs); + + for (UINT32 block=0; blocksecond.cx; + double py = prevIt->second.cy; + double pz = prevIt->second.cz; + double magPrev = sqrt(px*px + py*py + pz*pz); + + // Update stored corrections + prevIt->second = {cx, cy, cz}; + + // Skip tiny corrections (sub-millimetre) + if (magCurr < 0.001 && magPrev < 0.001) + { + stnOscCount_[stnIdx] = 0; + continue; + } + + // Dot product and cosine of angle between correction vectors + double dot = cx*px + cy*py + cz*pz; + double denom = magCurr * magPrev; + double cosAngle = (denom > 1e-30) ? dot / denom : 0.0; + + // Magnitude ratio + double ratio = (magPrev > 1e-30) ? magCurr / magPrev : 0.0; + + // Oscillation: anti-parallel (cos < -0.5), similar magnitude (ratio 0.3–3.0) + if (cosAngle < -0.5 && ratio > 0.3 && ratio < 3.0) + stnOscCount_[stnIdx]++; + else + stnOscCount_[stnIdx] = 0; + + if (stnOscCount_[stnIdx] >= 2) + { + // Convert to local (e, n, up) + matrix_2d cart(3, 1), local(3, 1); + cart.put(0, 0, cx); + cart.put(1, 0, cy); + cart.put(2, 0, cz); + Rotate_CartLocal(cart, &local, + bstBinaryRecords_.at(stnIdx).currentLatitude, + bstBinaryRecords_.at(stnIdx).currentLongitude); + + double localMag = sqrt(local.get(0,0)*local.get(0,0) + + local.get(1,0)*local.get(1,0) + local.get(2,0)*local.get(2,0)); + + auto hit = oscHistory_.find(stnIdx); + if (hit == oscHistory_.end()) + { + OscillationRecord rec; + rec.stnBstIdx = stnIdx; + rec.firstIteration = CurrentIteration(); + rec.lastIteration = CurrentIteration(); + rec.maxCycles = stnOscCount_[stnIdx]; + rec.firstMag = localMag; + rec.lastMag = localMag; + rec.lastE = local.get(0, 0); + rec.lastN = local.get(1, 0); + rec.lastUp = local.get(2, 0); + oscHistory_[stnIdx] = rec; + } + else + { + hit->second.lastIteration = CurrentIteration(); + hit->second.maxCycles = stnOscCount_[stnIdx]; + hit->second.lastMag = localMag; + hit->second.lastE = local.get(0, 0); + hit->second.lastN = local.get(1, 0); + hit->second.lastUp = local.get(2, 0); + } + } + } + + // For staged adjustments, unload corrections + if (projectSettings_.a.stage) + UnloadBlock(block, 1, sf_corrections); + } +} + +void dna_adjust::PrintOscillationSummary() +{ + if (oscHistory_.empty()) + return; + + // Collect records with meaningful magnitude, sort descending + std::vector sorted; + for (auto& kv : oscHistory_) + { + double peak = std::max(kv.second.firstMag, kv.second.lastMag); + if (peak >= 0.1) + sorted.push_back(&kv.second); + } + + if (sorted.empty()) + return; + + std::sort(sorted.begin(), sorted.end(), + [](const OscillationRecord* a, const OscillationRecord* b) { + return std::max(a->firstMag, a->lastMag) > std::max(b->firstMag, b->lastMag); + }); + + // Cap at 20 stations + size_t limit = std::min(sorted.size(), static_cast(20)); + + std::cout << std::endl; + std::cout << "+ Oscillating stations detected (" << sorted.size() << " total, showing top " + << limit << "):" << std::endl; + + for (size_t i = 0; i < limit; ++i) + { + const auto* rec = sorted[i]; + + // Classify direction + double horizMag = sqrt(rec->lastE * rec->lastE + rec->lastN * rec->lastN); + double vertMag = fabs(rec->lastUp); + std::string direction; + if (vertMag < 0.01 * horizMag) + direction = "horizontal"; + else if (horizMag < 0.01 * vertMag) + direction = "vertical"; + else + direction = "3D"; + + std::cout << " - " << bstBinaryRecords_.at(rec->stnBstIdx).stationName + << std::fixed << std::setprecision(1) + << " — " << rec->firstMag << "m to " << rec->lastMag << "m" + << ", " << direction + << ", " << rec->maxCycles << " cycles" + << " (iterations " << rec->firstIteration << "-" << rec->lastIteration << ")" + << std::endl; + } +} + void dna_adjust::ComputePrecisionAdjMsrs(const UINT32& block /*= 0*/) { if (projectSettings_.a.report_mode) @@ -7352,6 +8235,7 @@ void dna_adjust::ComputePrecisionAdjMsrs(const UINT32& block /*= 0*/) // A*V-1*At, where: // - A is design matrix // - V is the inverse of the normals (i.e. precision of estimates) + v_precAdjMsrsFull_.at(block).redim(v_measurementVarianceCount_.at(block), 1); v_precAdjMsrsFull_.at(block).zero(); UINT32 design_row(0); @@ -8031,19 +8915,19 @@ void dna_adjust::ComputeChiSquare_D(it_vmsr_t& _it_msr, UINT32& measurement_inde } -void dna_adjust::FormInverseVarianceMatrix(matrix_2d* vmat, bool LOWER_IS_CLEARED) +void dna_adjust::FormInverseVarianceMatrix(matrix_2d* vmat, bool LOWER_IS_CLEARED, bool mark_symmetric) { if (vmat->rows() == 1) { vmat->put(0, 0, 1./vmat->get(0, 0)); return; } - + // As of version 3.2.0, force all inversions to use MKL. This change // is enforced for two reasons: // 1. Sweep, Gaussian inverse and numerical recipes cholesky - // all require matrix data to be stored in row wise fashion, upper - // triangle only, whereas contiguous matrix class stores matrix data + // all require matrix data to be stored in row wise fashion, upper + // triangle only, whereas contiguous matrix class stores matrix data // in column wise fashion, lower triangle. // 2. Sweep, Gaussian never really offered a stable solution. // The following switch is kept in case future development warrants @@ -8053,8 +8937,8 @@ void dna_adjust::FormInverseVarianceMatrix(matrix_2d* vmat, bool LOWER_IS_CLEARE // TODO: All functions which load variance matrix from binary files // store the data in upper triangular form. This could be changed to - // lower triangular form, thus alleviating the need to pass - // LOWER_IS_CLEARED. That is, force all operations to use a lower + // lower triangular form, thus alleviating the need to pass + // LOWER_IS_CLEARED. That is, force all operations to use a lower // triangular matrix. Not sure if this would create an efficiency or not. switch (projectSettings_.a.inverse_method_msr) { @@ -8066,8 +8950,7 @@ void dna_adjust::FormInverseVarianceMatrix(matrix_2d* vmat, bool LOWER_IS_CLEARE // break; case Cholesky_mkl: default: - // Inversion using Intel MKL - vmat->cholesky_inverse(LOWER_IS_CLEARED); + vmat->cholesky_inverse(LOWER_IS_CLEARED, mark_symmetric); break; // choleskyinverse broke once the storage order of the matrix buffer was // changed from row-wise to column wise. diff --git a/dynadjust/dynadjust/dnaadjust/dnaadjust.hpp b/dynadjust/dynadjust/dnaadjust/dnaadjust.hpp index c3c098131..f5f9be546 100644 --- a/dynadjust/dynadjust/dnaadjust/dnaadjust.hpp +++ b/dynadjust/dynadjust/dnaadjust/dnaadjust.hpp @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -289,16 +290,24 @@ class dna_adjust { // void PrintEstimatedStationCoordinatestoDNAXML_Y(const std::string& msrFile, // INPUT_FILE_TYPE t); + void PrintOscillationSummary(); void CloseOutputFiles(); void UpdateBinaryFiles(); UINT32 CurrentIteration() const; UINT32& incrementIteration(); + void decrementIteration(); void initialiseIteration(const UINT32& iteration = 0); inline UINT32 CurrentBlock() const { return currentBlock_; }; - inline void SetcurrentBlock(const UINT32 b) { currentBlock_ = b; }; + inline void SetcurrentBlock(const UINT32 b) { + auto now = std::chrono::steady_clock::now(); + if (b != currentBlock_ && blockStartTime_.time_since_epoch().count() > 0) + lastBlockElapsedMs_ = std::chrono::duration_cast(now - blockStartTime_).count(); + currentBlock_ = b; + blockStartTime_ = now; + }; inline void SetmaxCorr(const double c) { @@ -312,6 +321,15 @@ class dna_adjust { } inline bool processingForward() { return forward_; } inline bool processingCombine() { return isCombining_; } + inline int64_t LastBlockElapsedMs() const { return lastBlockElapsedMs_; } + inline UINT32 CurrentBlockStationCount() const { + if (currentBlock_ < v_blockStationsMap_.size()) + return static_cast(v_blockStationsMap_.at(currentBlock_).size()); + return 0; + } + static void SetMaxBlasThreads(int n); + static int GetMaxBlasThreads(); + inline int GetDegreesOfFreedom() const { return degreesofFreedom_; } inline UINT32 GetMeasurementCount() const { return measurementParams_; } inline UINT32 GetUnknownsCount() const { return unknownParams_; } @@ -387,12 +405,15 @@ class dna_adjust { bool isAdjusting_; bool isCombining_; bool forward_; + bool rebuildingDesign_; bool isFirstTimeAdjustment_; bool isIterationComplete_; bool isAdjustmentQuestionable_; UINT32 blockCount_; UINT32 currentBlock_; + std::chrono::steady_clock::time_point blockStartTime_; + int64_t lastBlockElapsedMs_; std::chrono::milliseconds total_time_; _ADJUST_STATUS_ adjustStatus_; @@ -506,6 +527,8 @@ class dna_adjust { void DeserialiseBlockFromMappedFile(const UINT32& block, const int count = 0, ...); void UnloadBlock(const UINT32& block, const int file_count = 0, ...); + void AdviseBlockDontNeed(const UINT32& block); + void AdviseBlockWillNeed(const UINT32& block); void PurgeMatricesFromDisk(); // Helpers @@ -541,7 +564,8 @@ class dna_adjust { void UpdateDesignNormalMeasMatrices(pit_vmsr_t _it_msr, UINT32& design_row, bool buildnewMatrices, const UINT32& block, - bool MT_ReverseOrCombine); + bool MT_ReverseOrCombine, + bool skipNormals = false); void UpdateDesignNormalMeasMatrices_A(pit_vmsr_t _it_msr, UINT32& design_row, @@ -549,40 +573,45 @@ class dna_adjust { matrix_2d* measMinusComp, matrix_2d* estimatedStations, matrix_2d* normals, matrix_2d* design, - matrix_2d* AtVinv, bool buildnewMatrices); + matrix_2d* AtVinv, bool buildnewMatrices, + bool skipNormals = false); void UpdateDesignNormalMeasMatrices_BK(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, matrix_2d* measMinusComp, matrix_2d* estimatedStations, matrix_2d* normals, matrix_2d* design, - matrix_2d* AtVinv, bool buildnewMatrices); + matrix_2d* AtVinv, bool buildnewMatrices, + bool skipNormals = false); void UpdateDesignNormalMeasMatrices_C(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, matrix_2d* measMinusComp, matrix_2d* estimatedStations, matrix_2d* normals, matrix_2d* design, - matrix_2d* AtVinv, bool buildnewMatrices); + matrix_2d* AtVinv, bool buildnewMatrices, + bool skipNormals = false); void UpdateDesignNormalMeasMatrices_CEM( pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, matrix_2d* measMinusComp, matrix_2d* estimatedStations, matrix_2d* normals, matrix_2d* design, matrix_2d* AtVinv, - bool buildnewMatrices); + bool buildnewMatrices, bool skipNormals = false); void UpdateDesignNormalMeasMatrices_D(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, matrix_2d* measMinusComp, matrix_2d* estimatedStations, matrix_2d* normals, matrix_2d* design, - matrix_2d* AtVinv, bool buildnewMatrices); + matrix_2d* AtVinv, bool buildnewMatrices, + bool skipNormals = false); void UpdateDesignNormalMeasMatrices_E(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, matrix_2d* measMinusComp, matrix_2d* estimatedStations, matrix_2d* normals, matrix_2d* design, - matrix_2d* AtVinv, bool buildnewMatrices); + matrix_2d* AtVinv, bool buildnewMatrices, + bool skipNormals = false); void UpdateDesignMeasMatrices_GX(pit_vmsr_t _it_msr, UINT32& design_row, matrix_2d* measMinusComp, matrix_2d* estimatedStations, @@ -594,113 +623,130 @@ class dna_adjust { matrix_2d* measMinusComp, matrix_2d* estimatedStations, matrix_2d* normals, matrix_2d* design, - matrix_2d* AtVinv, bool buildnewMatrices); + matrix_2d* AtVinv, bool buildnewMatrices, + bool skipNormals = false); void UpdateDesignNormalMeasMatrices_H(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, matrix_2d* measMinusComp, matrix_2d* estimatedStations, matrix_2d* normals, matrix_2d* design, - matrix_2d* AtVinv, bool buildnewMatrices); + matrix_2d* AtVinv, bool buildnewMatrices, + bool skipNormals = false); void UpdateDesignNormalMeasMatrices_HR(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, matrix_2d* measMinusComp, matrix_2d* estimatedStations, matrix_2d* normals, matrix_2d* design, - matrix_2d* AtVinv, bool buildnewMatrices); + matrix_2d* AtVinv, bool buildnewMatrices, + bool skipNormals = false); void UpdateDesignNormalMeasMatrices_I(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, matrix_2d* measMinusComp, matrix_2d* estimatedStations, matrix_2d* normals, matrix_2d* design, - matrix_2d* AtVinv, bool buildnewMatrices); + matrix_2d* AtVinv, bool buildnewMatrices, + bool skipNormals = false); void UpdateDesignNormalMeasMatrices_IP(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, matrix_2d* measMinusComp, matrix_2d* estimatedStations, matrix_2d* normals, matrix_2d* design, - matrix_2d* AtVinv, bool buildnewMatrices); + matrix_2d* AtVinv, bool buildnewMatrices, + bool skipNormals = false); void UpdateDesignNormalMeasMatrices_J(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, matrix_2d* measMinusComp, matrix_2d* estimatedStations, matrix_2d* normals, matrix_2d* design, - matrix_2d* AtVinv, bool buildnewMatrices); + matrix_2d* AtVinv, bool buildnewMatrices, + bool skipNormals = false); void UpdateDesignNormalMeasMatrices_JQ(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, matrix_2d* measMinusComp, matrix_2d* estimatedStations, matrix_2d* normals, matrix_2d* design, - matrix_2d* AtVinv, bool buildnewMatrices); + matrix_2d* AtVinv, bool buildnewMatrices, + bool skipNormals = false); void UpdateDesignNormalMeasMatrices_L(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, matrix_2d* measMinusComp, matrix_2d* estimatedStations, matrix_2d* normals, matrix_2d* design, - matrix_2d* AtVinv, bool buildnewMatrices); + matrix_2d* AtVinv, bool buildnewMatrices, + bool skipNormals = false); void UpdateDesignNormalMeasMatrices_M(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, matrix_2d* measMinusComp, matrix_2d* estimatedStations, matrix_2d* normals, matrix_2d* design, - matrix_2d* AtVinv, bool buildnewMatrices); + matrix_2d* AtVinv, bool buildnewMatrices, + bool skipNormals = false); void UpdateDesignNormalMeasMatrices_P(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, matrix_2d* measMinusComp, matrix_2d* estimatedStations, matrix_2d* normals, matrix_2d* design, - matrix_2d* AtVinv, bool buildnewMatrices); + matrix_2d* AtVinv, bool buildnewMatrices, + bool skipNormals = false); void UpdateDesignNormalMeasMatrices_Q(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, matrix_2d* measMinusComp, matrix_2d* estimatedStations, matrix_2d* normals, matrix_2d* design, - matrix_2d* AtVinv, bool buildnewMatrices); + matrix_2d* AtVinv, bool buildnewMatrices, + bool skipNormals = false); void UpdateDesignNormalMeasMatrices_R(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, matrix_2d* measMinusComp, matrix_2d* estimatedStations, matrix_2d* normals, matrix_2d* design, - matrix_2d* AtVinv, bool buildnewMatrices); + matrix_2d* AtVinv, bool buildnewMatrices, + bool skipNormals = false); void UpdateDesignNormalMeasMatrices_S(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, matrix_2d* measMinusComp, matrix_2d* estimatedStations, matrix_2d* normals, matrix_2d* design, - matrix_2d* AtVinv, bool buildnewMatrices); + matrix_2d* AtVinv, bool buildnewMatrices, + bool skipNormals = false); void UpdateDesignNormalMeasMatrices_V(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, matrix_2d* measMinusComp, matrix_2d* estimatedStations, matrix_2d* normals, matrix_2d* design, - matrix_2d* AtVinv, bool buildnewMatrices); + matrix_2d* AtVinv, bool buildnewMatrices, + bool skipNormals = false); void UpdateDesignNormalMeasMatrices_X( pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, matrix_2d* measMinusComp, matrix_2d* estimatedStations, - matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices); + matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices, + bool skipNormals = false); void UpdateDesignNormalMeasMatrices_Y( pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, matrix_2d* measMinusComp, matrix_2d* estimatedStations, - matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices); + matrix_2d* design, matrix_2d* AtVinv, bool buildnewMatrices, + bool skipNormals = false); void UpdateDesignNormalMeasMatrices_Z(pit_vmsr_t _it_msr, UINT32& design_row, const UINT32& block, matrix_2d* measMinusComp, matrix_2d* estimatedStations, matrix_2d* normals, matrix_2d* design, - matrix_2d* AtVinv, bool buildnewMatrices); + matrix_2d* AtVinv, bool buildnewMatrices, + bool skipNormals = false); void UpdateIgnoredMeasurements(pit_vmsr_t _it_msr, bool storeOriginalMeasurement); @@ -834,7 +880,9 @@ class dna_adjust { void PrepareDesignAndMsrMnsCmpMatricesStage(const UINT32& block); void FillDesignNormalMeasurementsMatrices(bool buildnewMatrices, const UINT32& block, - bool MT_ReverseOrCombine); + bool MT_ReverseOrCombine, + bool skipNormals = false); + void RebuildDesignAndAtVinv(const UINT32& block); // void RecomputeMeasurementsCommonJunctions(const UINT32& nextBlock, const // UINT32& thisBlock, const UINT32& prevBlock); @@ -936,7 +984,7 @@ class dna_adjust { matrix_2d* measMinusComp); void - FormInverseVarianceMatrix(matrix_2d* vmat, bool LOWER_IS_CLEARED = false); + FormInverseVarianceMatrix(matrix_2d* vmat, bool LOWER_IS_CLEARED = false, bool mark_symmetric = false); void FormInverseGPSVarianceMatrix(const it_vmsr_t& _it_msr, matrix_2d* vmat); bool @@ -1213,6 +1261,56 @@ class dna_adjust { v_mat_2d v_corrections_; // vector of residuals matrices v_mat_2d v_correctionsR_; // vector of residuals matrices + // ---------------------------------------------- + // Levenberg-Marquardt / Trust-Region state + double lm_lambda_; // current damping parameter + double chiSquaredPrev_; // chi-squared before step + v_mat_2d v_normalsBackup_; // pristine N per block (before lambda*D augmentation) + v_mat_2d v_gradient_; // g = AtV^-1 r per block + v_mat_2d v_estimatedStationsBackup_; // x_k snapshot for rollback + std::vector> v_scaleDiag_; // saved scaling vectors per block + + void InitialiseLMState(); + void SolveLM(bool COMPUTE_FACTOR, const UINT32& block, bool COMPUTE_INVERSE = false); + void SolveLMTry(bool COMPUTE_FACTOR, const UINT32& block, bool COMPUTE_INVERSE = false); + double ComputePredictedReduction(const UINT32& block); + double ComputeNetworkPredictedReduction(); + void AdjustSimultaneousLM(); + void AdjustPhasedLM(); + void AdjustPhasedMultiThreadLM(); + + // ---------------------------------------------- + // Anderson acceleration state + std::vector> aa_G_hist_; // ring buffer of g_k (post-iteration coords) + std::vector> aa_F_hist_; // ring buffer of f_k (residuals) + std::vector aa_x_snapshot_; // x_k saved before iteration + UINT32 aa_hist_count_; // entries stored so far + UINT32 aa_hist_pos_; // current write position in ring buffer + + void InitialiseAndersonAcceleration(); + void ExtractStationCoordinates(std::vector& coords); + void ScatterStationCoordinates(const std::vector& coords); + void ApplyAndersonAcceleration(UINT32 iteration); + + // ---------------------------------------------- + // Iteration diagnostics — oscillation detection + struct StationCorrRecord { + double cx, cy, cz; // Cartesian corrections + }; + struct OscillationRecord { + UINT32 stnBstIdx; // BST record index + UINT32 firstIteration; + UINT32 lastIteration; + UINT32 maxCycles; + double firstMag; + double lastMag; + double lastE, lastN, lastUp; + }; + std::map corrPrev_; // keyed by BST station index + std::map stnOscCount_; // keyed by BST station index + std::map oscHistory_; // keyed by BST station index + void UpdateIterationDiagnostics(); + // ---------------------------------------------- // Adjustment functions and variables for staged adjustment diff --git a/dynadjust/dynadjust/dnaadjustwrapper/dnaadjustprogress.cpp b/dynadjust/dynadjust/dnaadjustwrapper/dnaadjustprogress.cpp index 7365c622d..e97b27f52 100644 --- a/dynadjust/dynadjust/dnaadjustwrapper/dnaadjustprogress.cpp +++ b/dynadjust/dynadjust/dnaadjustwrapper/dnaadjustprogress.cpp @@ -21,6 +21,7 @@ #include #include +#include /// \cond #include @@ -366,24 +367,28 @@ void dna_adjust_progress_thread::processAdjustment() // print new block to screen when adjusting only if (block != currentBlock && _dnaAdj->IsAdjusting()) - { + { ss.str(""); ss << " Iteration " << std::right << std::setw(2) << std::fixed << std::setprecision(0) << _dnaAdj->CurrentIteration(); if (_p->a.multi_thread && !_dnaAdj->processingCombine()) ss << std::left << std::setw(13) << ", adjusting..."; else - ss << ", block " << std::left << std::setw(6) << std::fixed << std::setprecision(0) << _dnaAdj->CurrentBlock() + 1; - - sst.str(""); - if (first_time) { - sst << std::setw(PROGRESS_ADJ_BLOCK_28) << std::left << " "; - first_time = false; + ss << ", block " << std::left << std::setw(4) << std::fixed << std::setprecision(0) << _dnaAdj->CurrentBlock() + 1; + UINT32 stnCount = _dnaAdj->CurrentBlockStationCount(); + if (stnCount > 0) + { + int max_t = dna_adjust::GetMaxBlasThreads(); + ss << " (" << std::right << std::setw(5) << stnCount << " stns, " << std::setw(2) << max_t << "T)"; + } + int64_t elapsedMs = _dnaAdj->LastBlockElapsedMs(); + if (elapsedMs > 0) + ss << " " << std::fixed << std::setprecision(1) << (elapsedMs / 1000.0) << "s"; } - - sst << PROGRESS_BACKSPACE_28 << std::setw(PROGRESS_ADJ_BLOCK_28) << std::left << ss.str(); - coutMessage(sst.str()); + ss << std::endl; + coutMessage(ss.str()); + first_time = true; currentBlock = block; } diff --git a/dynadjust/dynadjust/dnaadjustwrapper/dnaadjustwrapper.cpp b/dynadjust/dynadjust/dnaadjustwrapper/dnaadjustwrapper.cpp index 48920196d..40b47f12a 100644 --- a/dynadjust/dynadjust/dnaadjustwrapper/dnaadjustwrapper.cpp +++ b/dynadjust/dynadjust/dnaadjustwrapper/dnaadjustwrapper.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -38,6 +39,7 @@ /// \endcond #include +#include #include #include #include @@ -60,6 +62,14 @@ using namespace dynadjust::iostreams; extern bool running; extern std::mutex cout_mutex; +static dna_adjust* g_netAdjust = nullptr; + +void sigint_handler(int) +{ + if (g_netAdjust) + g_netAdjust->CancelAdjustment(); +} + using namespace dynadjust; using namespace dynadjust::epsg; @@ -199,75 +209,75 @@ void DeserialiseVarianceMatrices(dna_adjust* netAdjust, const project_settings* void GenerateStatistics(dna_adjust* netAdjust, const project_settings* p) { // Generate statistics - // Don't produce statistics only for block 1 only adjustments - if (p->a.adjust_mode != Phased_Block_1Mode) + if (!p->g.quiet) { - if (!p->g.quiet) + std::cout << "+ Generating statistics..."; + std::cout.flush(); + } + netAdjust->GenerateStatistics(); + if (!p->g.quiet) + { + std::cout << " done." << std::endl; + + // Don't print detailed statistics for block 1 only adjustments + if (p->a.adjust_mode == Phased_Block_1Mode) + return; + + std::cout << "+ Adjustment results:" << std::endl << std::endl; + std::cout << "+" << OUTPUTLINE << std::endl; + std::cout << std::setw(PRINT_VAR_PAD) << std::left << " Number of unknown parameters" << std::fixed << std::setprecision(0) << netAdjust->GetUnknownsCount(); + if (netAdjust->GetAllFixed()) + std::cout << " (All stations held constrained)"; + std::cout << std::endl; + + std::cout << std::setw(PRINT_VAR_PAD) << std::left << " Number of measurements" << std::fixed << std::setprecision(0) << netAdjust->GetMeasurementCount(); + + if (netAdjust->GetPotentialOutlierCount() > 0) { - std::cout << "+ Generating statistics..."; - std::cout.flush(); + std::cout << " (" << netAdjust->GetPotentialOutlierCount() << " potential outlier"; + if (netAdjust->GetPotentialOutlierCount() > 1) + std::cout << "s"; + std::cout << ")"; } - netAdjust->GenerateStatistics(); - if (!p->g.quiet) - { - std::cout << " done." << std::endl; - - std::cout << "+ Adjustment results:" << std::endl << std::endl; - std::cout << "+" << OUTPUTLINE << std::endl; - std::cout << std::setw(PRINT_VAR_PAD) << std::left << " Number of unknown parameters" << std::fixed << std::setprecision(0) << netAdjust->GetUnknownsCount(); - if (netAdjust->GetAllFixed()) - std::cout << " (All stations held constrained)"; - std::cout << std::endl; + std::cout << std::endl; + std::cout << std::setw(PRINT_VAR_PAD) << std::left << " Degrees of freedom" << std::fixed << std::setprecision(0) << netAdjust->GetDegreesOfFreedom() << std::endl; + std::cout << std::setw(PRINT_VAR_PAD) << std::left << " Chi squared" << std::fixed << std::setprecision(2) << netAdjust->GetChiSquared() << std::endl; + std::cout << std::setw(PRINT_VAR_PAD) << std::left << " Rigorous sigma zero" << std::fixed << std::setprecision(3) << netAdjust->GetSigmaZero() << std::endl; + std::cout << std::setw(PRINT_VAR_PAD) << std::left << " Global (Pelzer) Reliability" << std::fixed << std::setw(8) << std::setprecision(3) << netAdjust->GetGlobalPelzerRel() << "(excludes non redundant measurements)" << std::endl << std::endl; - std::cout << std::setw(PRINT_VAR_PAD) << std::left << " Number of measurements" << std::fixed << std::setprecision(0) << netAdjust->GetMeasurementCount(); - - if (netAdjust->GetPotentialOutlierCount() > 0) - { - std::cout << " (" << netAdjust->GetPotentialOutlierCount() << " potential outlier"; - if (netAdjust->GetPotentialOutlierCount() > 1) - std::cout << "s"; - std::cout << ")"; - } - std::cout << std::endl; - std::cout << std::setw(PRINT_VAR_PAD) << std::left << " Degrees of freedom" << std::fixed << std::setprecision(0) << netAdjust->GetDegreesOfFreedom() << std::endl; - std::cout << std::setw(PRINT_VAR_PAD) << std::left << " Chi squared" << std::fixed << std::setprecision(2) << netAdjust->GetChiSquared() << std::endl; - std::cout << std::setw(PRINT_VAR_PAD) << std::left << " Rigorous sigma zero" << std::fixed << std::setprecision(3) << netAdjust->GetSigmaZero() << std::endl; - std::cout << std::setw(PRINT_VAR_PAD) << std::left << " Global (Pelzer) Reliability" << std::fixed << std::setw(8) << std::setprecision(3) << netAdjust->GetGlobalPelzerRel() << "(excludes non redundant measurements)" << std::endl << std::endl; - - std::stringstream ss(""); - ss << std::left << " Chi-Square test (" << std::setprecision(1) << std::fixed << p->a.confidence_interval << "%)"; - std::cout << std::setw(PRINT_VAR_PAD) << std::left << ss.str(); - ss.str(""); - ss << std::fixed << std::setprecision(3) << - netAdjust->GetChiSquaredLowerLimit() << " < " << - netAdjust->GetSigmaZero() << " < " << - netAdjust->GetChiSquaredUpperLimit(); - std::cout << std::setw(CHISQRLIMITS) << std::left << ss.str(); - ss.str(""); - - if (netAdjust->GetDegreesOfFreedom() < 1) - ss << "NO REDUNDANCY"; - else + std::stringstream ss(""); + ss << std::left << " Chi-Square test (" << std::setprecision(1) << std::fixed << p->a.confidence_interval << "%)"; + std::cout << std::setw(PRINT_VAR_PAD) << std::left << ss.str(); + ss.str(""); + ss << std::fixed << std::setprecision(3) << + netAdjust->GetChiSquaredLowerLimit() << " < " << + netAdjust->GetSigmaZero() << " < " << + netAdjust->GetChiSquaredUpperLimit(); + std::cout << std::setw(CHISQRLIMITS) << std::left << ss.str(); + ss.str(""); + + if (netAdjust->GetDegreesOfFreedom() < 1) + ss << "NO REDUNDANCY"; + else + { + ss << "*** "; + switch (netAdjust->GetTestResult()) { - ss << "*** "; - switch (netAdjust->GetTestResult()) - { - case test_stat_pass: - ss << "PASSED"; // within upper and lower - break; - case test_stat_warning: - ss << "WARNING"; // less than lower limit - break; - case test_stat_fail: - ss << "FAILED"; // greater than upper limit - break; - } - ss << " ***"; + case test_stat_pass: + ss << "PASSED"; // within upper and lower + break; + case test_stat_warning: + ss << "WARNING"; // less than lower limit + break; + case test_stat_fail: + ss << "FAILED"; // greater than upper limit + break; } - - std::cout << std::setw(PASS_FAIL) << std::right << ss.str() << std::endl; - std::cout << "+" << OUTPUTLINE << std::endl << std::endl; + ss << " ***"; } + + std::cout << std::setw(PASS_FAIL) << std::right << ss.str() << std::endl; + std::cout << "+" << OUTPUTLINE << std::endl << std::endl; } else std::cout << std::endl; @@ -617,6 +627,15 @@ int ParseCommandLineOptions(const int& argc, char* argv[], const boost::program_ // p.a.inverse_method_msr = p.a.inverse_method_lsq; if (vm.count(SCALE_NORMAL_UNITY)) p.a.scale_normals_to_unity = 1; + if (vm.count(LM_ENABLED)) + p.a.lm_enabled = true; + if (vm.count(AA_ENABLED)) + p.a.aa_enabled = true; + if (p.a.aa_enabled && p.a.lm_enabled) + { + std::cout << "Warning: --anderson-acceleration is for Gauss-Newton only; disabled because --lm-enabled is set." << std::endl; + p.a.aa_enabled = false; + } if (vm.count(OUTPUT_ADJ_MSR_TSTAT)) p.o._adj_msr_tstat = 1; if (vm.count(OUTPUT_ADJ_MSR_DBID)) @@ -861,6 +880,31 @@ int main(int argc, char* argv[]) "Type b uncertainties to be added to each computed uncertainty. arg is a comma delimited string that provides 1D, 2D or 3D uncertainties in the local reference frame (e.g. \"up\" or \"e,n\" or \"e,n,up\").") (TYPE_B_FILE, boost::program_options::value(&p.a.type_b_file), "Type b uncertainties file name. Full path to a file containing Type b uncertainties to be added to the computed uncertainty for specific sites.") + (LM_ENABLED, + "Enable Levenberg-Marquardt / trust-region solver.") + (LM_LAMBDA_INIT, boost::program_options::value(&p.a.lm_lambda_init), + (std::string("Initial LM damping parameter. Default is ")+ + StringFromT(p.a.lm_lambda_init)+std::string(".")).c_str()) + (LM_ETA_GOOD, boost::program_options::value(&p.a.lm_eta_good), + (std::string("Good-step threshold for lambda reduction. Default is ")+ + StringFromT(p.a.lm_eta_good)+std::string(".")).c_str()) + (LM_ETA_ACCEPT, boost::program_options::value(&p.a.lm_eta_accept), + (std::string("Minimum acceptable gain ratio. Default is ")+ + StringFromT(p.a.lm_eta_accept)+std::string(".")).c_str()) + (LM_GAMMA_UP, boost::program_options::value(&p.a.lm_gamma_up), + (std::string("Lambda increase factor on step rejection. Default is ")+ + StringFromT(p.a.lm_gamma_up)+std::string(".")).c_str()) + (LM_GAMMA_DOWN, boost::program_options::value(&p.a.lm_gamma_down), + (std::string("Lambda decrease factor on good step. Default is ")+ + StringFromT(p.a.lm_gamma_down)+std::string(".")).c_str()) + (LM_MAX_REJECTS, boost::program_options::value(&p.a.lm_max_rejects), + (std::string("Maximum consecutive LM rejections per iteration. Default is ")+ + StringFromT(p.a.lm_max_rejects)+std::string(".")).c_str()) + (AA_ENABLED, + "Enable Anderson acceleration for phased Gauss-Newton adjustment.") + (AA_DEPTH, boost::program_options::value(&p.a.aa_depth), + (std::string("Anderson acceleration history depth. Default is ")+ + StringFromT(p.a.aa_depth)+std::string(".")).c_str()) ; staged_adj_options.add_options() @@ -868,6 +912,10 @@ int main(int argc, char* argv[]) "Recreate memory mapped files.") (PURGE_STAGE_FILES, "Purge memory mapped files from disk upon adjustment completion.") + (STAGE_PATH, boost::program_options::value(&p.a.stage_path), + "Directory for memory mapped stage files. Default is the output folder.") + (MAX_THREADS, boost::program_options::value(&p.a.max_threads), + "Maximum number of BLAS threads for linear algebra operations. Default is 0 (auto).") ; output_options.add_options() @@ -1166,6 +1214,12 @@ int main(int argc, char* argv[]) if (p.a.scale_normals_to_unity) std::cout << std::setw(PRINT_VAR_PAD) << std::left << " Scale normals to unity: " << "yes" << std::endl; + if (p.a.lm_enabled) + { + std::cout << std::setw(PRINT_VAR_PAD) << std::left << " LM solver: " << "enabled" << std::endl; + std::cout << std::setw(PRINT_VAR_PAD) << std::left << " LM lambda init: " << p.a.lm_lambda_init << std::endl; + std::cout << std::setw(PRINT_VAR_PAD) << std::left << " LM eta good/accept: " << p.a.lm_eta_good << " / " << p.a.lm_eta_accept << std::endl; + } if (!p.a.station_constraints.empty()) { std::cout << std::setw(PRINT_VAR_PAD) << std::left << " Station constraints: " << p.a.station_constraints << std::endl; @@ -1328,7 +1382,12 @@ int main(int argc, char* argv[]) try { running = true; - int nthreads_la = init_linear_algebra_threads(); + // Install SIGINT handler for graceful cancellation + g_netAdjust = &netAdjust; + std::signal(SIGINT, sigint_handler); + + int nthreads_la = init_linear_algebra_threads(p.a.max_threads); + dna_adjust::SetMaxBlasThreads(nthreads_la); std::thread progress(dna_adjust_progress_thread(&netAdjust, &p)); // Do adjustment using linear algebra threads @@ -1355,7 +1414,10 @@ int main(int argc, char* argv[]) PrintSummaryMessage(&netAdjust, &p, &elapsed_time); if (netAdjust.GetStatus() > ADJUST_THRESHOLD_EXCEEDED) + { + netAdjust.PrintOscillationSummary(); return ADJUST_SUCCESS; + } // Generate statistics GenerateStatistics(&netAdjust, &p); @@ -1402,6 +1464,8 @@ int main(int argc, char* argv[]) return EXIT_FAILURE; } + netAdjust.PrintOscillationSummary(); + if (!p.g.quiet) std::cout << std::endl << "+ Open " << leafStr(p.o._adj_file) << " to view the adjustment details." << std::endl << std::endl; diff --git a/dynadjust/include/config/dnaoptions-interface.hpp b/dynadjust/include/config/dnaoptions-interface.hpp index d80b81615..4d3826a5e 100644 --- a/dynadjust/include/config/dnaoptions-interface.hpp +++ b/dynadjust/include/config/dnaoptions-interface.hpp @@ -151,8 +151,25 @@ const char* const LSQ_INVERSE_METHOD = "inversion-method"; const char* const SCALE_NORMAL_UNITY = "scale-normals-to-unity"; const char* const PURGE_STAGE_FILES = "purge-stage-files"; const char* const RECREATE_STAGE_FILES = "create-stage-files"; +const char* const STAGE_PATH = "stage-path"; const char* const UPDATE_ORIGINAL_STN_FILE = "update-orig-stn-file"; +// Levenberg-Marquardt / Trust-Region options +const char* const LM_ENABLED = "lm-enabled"; +const char* const LM_LAMBDA_INIT = "lm-lambda-init"; +const char* const LM_ETA_GOOD = "lm-eta-good"; +const char* const LM_ETA_ACCEPT = "lm-eta-accept"; +const char* const LM_GAMMA_UP = "lm-gamma-up"; +const char* const LM_GAMMA_DOWN = "lm-gamma-down"; +const char* const LM_MAX_REJECTS = "lm-max-rejects"; + +// Anderson acceleration options +const char* const AA_ENABLED = "anderson-acceleration"; +const char* const AA_DEPTH = "aa-depth"; + +// Threading options +const char* const MAX_THREADS = "max-threads"; + const char* const SEG_MIN_INNER_STNS = "min-inner-stns"; const char* const SEG_THRESHOLD_STNS = "max-block-stns"; const char* const SEG_STARTING_STN = "starting-stns"; diff --git a/dynadjust/include/config/dnaoptions.hpp b/dynadjust/include/config/dnaoptions.hpp index 77e21eeae..46ce79572 100644 --- a/dynadjust/include/config/dnaoptions.hpp +++ b/dynadjust/include/config/dnaoptions.hpp @@ -430,7 +430,13 @@ struct adjust_settings : private boost::equality_comparable { , iteration_threshold((float)0.0005), free_std_dev(10.0), fixed_std_dev(PRECISION_1E6), station_constraints("") , map_file(""), bst_file(""), bms_file(""), seg_file(""), comments("") , command_line_arguments("") - , type_b_global (""), type_b_file ("") {} + , type_b_global (""), type_b_file ("") + , lm_enabled(false), lm_lambda_init(1e-3) + , lm_eta_good(0.75), lm_eta_accept(0.25) + , lm_gamma_up(2.0), lm_gamma_down(3.0) + , lm_max_rejects(10) + , aa_enabled(false), aa_depth(5) + , max_threads(0) {} private: // Disallow use of compiler generated equality operator. @@ -471,6 +477,7 @@ struct adjust_settings : private boost::equality_comparable { UINT16 scale_normals_to_unity; // Scale normals to unity prior to inversion bool purge_stage_files; // Purge memory mapped files from disk upon adjustment completion. UINT16 recreate_stage_files; // Recreate memory mapped files. + std::string stage_path; // Directory for memory mapped stage files (default: output_folder). float iteration_threshold; // Convergence limit double free_std_dev; // SD for free stations double fixed_std_dev; // SD for fixed stations @@ -483,6 +490,22 @@ struct adjust_settings : private boost::equality_comparable { std::string command_line_arguments; std::string type_b_global; // Comma delimited string containing Type b uncertainties to be applied to all uncertainties computed from an adjustment std::string type_b_file; // File path to Type b uncertainties to be applied to specific site uncertainties computed from an adjustment + + // Levenberg-Marquardt / Trust-Region settings + bool lm_enabled; // Enable LM/trust-region solver + double lm_lambda_init; // Initial LM damping parameter + double lm_eta_good; // Good-step threshold for lambda reduction + double lm_eta_accept; // Minimum acceptable gain ratio + double lm_gamma_up; // Lambda increase factor on reject/mediocre + double lm_gamma_down; // Lambda decrease factor on good step + UINT32 lm_max_rejects; // Max consecutive rejections per iteration + + // Anderson acceleration settings + bool aa_enabled; // Enable Anderson acceleration for phased GN + UINT32 aa_depth; // Anderson acceleration history depth (default 5) + + // Threading + int max_threads; // Maximum BLAS threads (0 = auto) }; // datum and geoid settings diff --git a/dynadjust/include/functions/dnatemplatematrixfuncs.hpp b/dynadjust/include/functions/dnatemplatematrixfuncs.hpp index 05e0df853..092c7d896 100644 --- a/dynadjust/include/functions/dnatemplatematrixfuncs.hpp +++ b/dynadjust/include/functions/dnatemplatematrixfuncs.hpp @@ -56,7 +56,6 @@ void GetDirectionsVarianceMatrix(msr_t_Iterator begin, matrix_2d* vmat) UINT32 a, angle_count(bmsRecord->vectorCount2 - 1); // number of directions excluding the RO UINT32 skip(0), ignored(bmsRecord->vectorCount1 - bmsRecord->vectorCount2); - vmat->zero(); vmat->redim(angle_count, angle_count); bmsRecord++; @@ -90,8 +89,7 @@ void GetGPSVarianceMatrix(const msr_t_Iterator begin, matrix_2d* vmat) { msr_t_Iterator bmsRecord(begin); UINT32 variance_dim(bmsRecord->vectorCount1 * 3), covariance_dim, cov; - vmat->zero(); - vmat->redim(variance_dim, variance_dim); + vmat->redim(variance_dim, variance_dim); for (UINT32 var(0), cov_elem; var #include +#include #include #include #include @@ -29,6 +31,22 @@ namespace dynadjust { namespace math { +// Maximum BLAS thread count, set via set_max_blas_threads(). +static int g_max_blas_threads = 0; + +void set_max_blas_threads(int n) { g_max_blas_threads = n; } +int get_max_blas_threads() { return g_max_blas_threads; } + +// Compute BLAS thread count scaled proportionally to g_max_blas_threads. +// Tiers: max, max/2, max/4, 1 — selected by matrix dimension n. +static inline int blas_threads_for(lapack_int n) { + int max_t = g_max_blas_threads; + if (max_t <= 1) return 1; + if (n > 8000) return max_t; + if (n > 4000) return std::max(1, max_t / 2); + if (n > 2000) return std::max(1, max_t / 4); + return 1; +} std::ostream& operator<<(std::ostream& os, const matrix_2d& rhs) { if (os.iword(0) == binary) { @@ -44,18 +62,25 @@ std::ostream& operator<<(std::ostream& os, const matrix_2d& rhs) { os.write(reinterpret_cast(&rhs._mem_rows), sizeof(UINT32)); os.write(reinterpret_cast(&rhs._mem_cols), sizeof(UINT32)); + // Alignment padding — keeps data region at 8-byte aligned offset (24 bytes) + // Must match WriteMappedFileRegion / ReadMappedFileRegion / AttachMappedFileRegion layout + const UINT32 pad = 0; + os.write(reinterpret_cast(&pad), sizeof(UINT32)); + UINT32 c, r; switch (rhs._matrixType) { case mtx_lower: - // output lower triangular part of a square matrix if (rhs._mem_rows != rhs._mem_cols) throw std::runtime_error("matrix_2d operator<< (): Matrix is not square."); - // print each column - for (c = 0; c < rhs._mem_cols; ++c) - os.write(reinterpret_cast(rhs.getelementref(c, c)), (rhs._mem_rows - c) * sizeof(double)); - + if (rhs._packed) { + os.write(reinterpret_cast(rhs._buffer), + matrix_2d::packed_size(rhs._mem_rows) * sizeof(double)); + } else { + for (c = 0; c < rhs._mem_cols; ++c) + os.write(reinterpret_cast(rhs.getelementref(c, c)), (rhs._mem_rows - c) * sizeof(double)); + } break; case mtx_sparse: break; case mtx_full: @@ -108,7 +133,7 @@ void out_of_memory_handler() { } matrix_2d::matrix_2d() - : _mem_cols(0), _mem_rows(0), _cols(0), _rows(0), _buffer(0), _maxvalCol(0), _maxvalRow(0), _matrixType(mtx_full) { + : _mem_cols(0), _mem_rows(0), _cols(0), _rows(0), _buffer(0), _owns_buffer(true), _maxvalCol(0), _maxvalRow(0), _matrixType(mtx_full), _symmetric(false), _packed(false) { std::set_new_handler(out_of_memory_handler); // if this class were to be modified to use templates, each @@ -125,9 +150,12 @@ matrix_2d::matrix_2d(const UINT32& rows, const UINT32& columns) _cols(columns), _rows(rows), _buffer(0), + _owns_buffer(true), _maxvalCol(0), _maxvalRow(0), - _matrixType(mtx_full) { + _matrixType(mtx_full), + _symmetric(false), + _packed(false) { std::set_new_handler(out_of_memory_handler); allocate(_rows, _cols); @@ -140,9 +168,12 @@ matrix_2d::matrix_2d(const UINT32& rows, const UINT32& columns, const double dat _cols(columns), _rows(rows), _buffer(0), + _owns_buffer(true), _maxvalCol(0), _maxvalRow(0), - _matrixType(matrix_type) { + _matrixType(matrix_type), + _symmetric(false), + _packed(false) { std::set_new_handler(out_of_memory_handler); std::stringstream ss; @@ -197,17 +228,26 @@ matrix_2d::matrix_2d(const matrix_2d& newmat) _cols(newmat.columns()), _rows(newmat.rows()), _buffer(0), + _owns_buffer(true), _maxvalCol(newmat.maxvalueCol()), _maxvalRow(newmat.maxvalueRow()), - _matrixType(newmat.matrixType()) { + _matrixType(newmat.matrixType()), + _symmetric(newmat._symmetric), + _packed(newmat._packed) { std::set_new_handler(out_of_memory_handler); - allocate(_mem_rows, _mem_cols); - - const double* ptr = newmat.getbuffer(); - - // copy buffer - memcpy(_buffer, ptr, newmat.buffersize()); + if (_packed) { + std::size_t ps = packed_size(_mem_rows); + _buffer = static_cast(std::malloc(ps * sizeof(double))); + if (!_buffer) throw NetMemoryException("Insufficient memory for packed matrix copy."); + memcpy(_buffer, newmat.getbuffer(), ps * sizeof(double)); + } else { + // Allocate without zeroing — memcpy immediately overwrites entire buffer + std::size_t total_size = static_cast(_mem_rows) * _mem_cols; + _buffer = static_cast(std::malloc(total_size * sizeof(double))); + if (!_buffer) throw NetMemoryException("Insufficient memory for matrix copy."); + memcpy(_buffer, newmat.getbuffer(), total_size * sizeof(double)); + } } matrix_2d::~matrix_2d() { @@ -216,14 +256,21 @@ matrix_2d::~matrix_2d() { } std::size_t matrix_2d::get_size() { + // 8 UINT32s: matrixType, rows, cols, mem_rows, mem_cols, _pad, maxvalRow, maxvalCol + // The padding UINT32 ensures the data region starts at an 8-byte aligned offset (24 bytes) + // from the region base, enabling in-place mmap buffer attachment. size_t size = - (7 * sizeof(UINT32)); // UINT32 _matrixType, _mem_cols, _mem_rows, _cols, _rows, _maxvalRow, _maxvalCol + (8 * sizeof(UINT32)); switch (_matrixType) { case mtx_lower: size += sumOfConsecutiveIntegers(_mem_rows) * sizeof(double); break; case mtx_sparse: break; case mtx_full: - default: size += buffersize(); + default: + if (_packed) + size += packed_size(_mem_rows) * sizeof(double); + else + size += buffersize(); } return size; } @@ -241,7 +288,6 @@ void matrix_2d::ReadMappedFileRegion(void* addr) { switch (_matrixType) { case mtx_sparse: - // _mem_cols and _mem_rows equal _cols and _rows _mem_rows = _rows; _mem_cols = _cols; break; @@ -250,10 +296,21 @@ void matrix_2d::ReadMappedFileRegion(void* addr) { default: _mem_rows = *data_U++; _mem_cols = *data_U++; + ++data_U; // skip alignment padding UINT32 break; } - allocate(_mem_rows, _mem_cols); + if (_matrixType == mtx_lower && _mem_rows == _mem_cols) { + deallocate(); + _packed = true; + _symmetric = true; + std::size_t ps = packed_size(_mem_rows); + _buffer = static_cast(std::calloc(ps, sizeof(double))); + if (!_buffer) throw NetMemoryException("Insufficient memory for packed matrix read."); + } else { + _packed = false; + allocate(_mem_rows, _mem_cols); + } double* data_d; int* data_i; @@ -292,15 +349,21 @@ void matrix_2d::ReadMappedFileRegion(void* addr) { return; break; case mtx_lower: + assert(_mem_rows == _mem_cols && "ReadMappedFileRegion(mtx_lower): matrix must be square"); data_d = reinterpret_cast(data_U); - // read each column - for (c = 0; c < _mem_cols; ++c) { - memcpy(getelementref(c, c), data_d, (_mem_rows - c) * sizeof(double)); - data_d += (_mem_rows - c); + if (_packed) { + std::size_t ps = packed_size(_mem_rows); + memcpy(_buffer, data_d, ps * sizeof(double)); + data_d += ps; + } else { + for (c = 0; c < _mem_cols; ++c) { + memcpy(getbuffer(c, c), data_d, (_mem_rows - c) * sizeof(double)); + data_d += (_mem_rows - c); + } } - fillupper(); + _symmetric = true; break; case mtx_full: default: @@ -342,9 +405,32 @@ void matrix_2d::WriteMappedFileRegion(void* addr) { default: *data_U++ = _mem_rows; *data_U++ = _mem_cols; + *data_U++ = 0; // alignment padding break; } + // In-place mmap: data is already in the region, only update footer + if (!_owns_buffer) { + // Calculate footer position by skipping past data + double* data_d; + switch (_matrixType) { + case mtx_lower: + if (_packed) + data_d = reinterpret_cast(data_U) + packed_size(_mem_rows); + else + data_d = reinterpret_cast(data_U) + sumOfConsecutiveIntegers(_mem_rows); + break; + case mtx_full: + default: + data_d = reinterpret_cast(data_U) + static_cast(_mem_rows) * _mem_cols; + break; + } + PUINT32 footer = reinterpret_cast(data_d); + *footer++ = _maxvalRow; + *footer = _maxvalCol; + return; + } + double* data_d; int* data_i; @@ -384,10 +470,15 @@ void matrix_2d::WriteMappedFileRegion(void* addr) { case mtx_lower: data_d = reinterpret_cast(data_U); - // print each column - for (c = 0; c < _mem_cols; ++c) { - memcpy(data_d, getbuffer(c, c), (_mem_rows - c) * sizeof(double)); - data_d += (_mem_rows - c); + if (_packed) { + std::size_t ps = packed_size(_mem_rows); + memcpy(data_d, _buffer, ps * sizeof(double)); + data_d += ps; + } else { + for (c = 0; c < _mem_cols; ++c) { + memcpy(data_d, getbuffer(c, c), (_mem_rows - c) * sizeof(double)); + data_d += (_mem_rows - c); + } } break; case mtx_full: @@ -408,16 +499,111 @@ void matrix_2d::WriteMappedFileRegion(void* addr) { *data_U = _maxvalCol; } -void matrix_2d::allocate() { allocate(_mem_rows, _mem_cols); } -// creates memory for desired "memory size", not matrix dimensions -void matrix_2d::allocate(const UINT32& rows, const UINT32& columns) { - //_method_ = "allocate"; +void matrix_2d::AttachMappedFileRegion(void* addr) { + // Read header metadata — same layout as ReadMappedFileRegion + PUINT32 data_U = reinterpret_cast(addr); + _matrixType = *data_U++; + _rows = *data_U++; + _cols = *data_U++; + + switch (_matrixType) { + case mtx_sparse: + _mem_rows = _rows; + _mem_cols = _cols; + // Sparse cannot use in-place — fall back to copy path + ReadMappedFileRegion(addr); + return; + case mtx_lower: + case mtx_full: + default: + _mem_rows = *data_U++; + _mem_cols = *data_U++; + ++data_U; // skip alignment padding + break; + } + // Release any existing buffer deallocate(); - // an exception will be thrown by out_of_memory_handler - // if memory cannot be allocated + // Point _buffer directly at the data region in the mmap + _buffer = reinterpret_cast(data_U); + _owns_buffer = false; + + // Set packed/symmetric flags for lower-triangular matrices + if (_matrixType == mtx_lower && _mem_rows == _mem_cols) { + _packed = true; + _symmetric = true; + } else { + _packed = false; + } + + // Read footer (maxvalRow, maxvalCol) from after the data + double* data_d; + switch (_matrixType) { + case mtx_lower: + data_d = _buffer + packed_size(_mem_rows); + break; + case mtx_full: + default: + data_d = _buffer + static_cast(_mem_rows) * _mem_cols; + break; + } + + PUINT32 footer = reinterpret_cast(data_d); + _maxvalRow = *footer++; + _maxvalCol = *footer; +} + + +void matrix_2d::DetachMappedFileRegion(void* addr) { + if (_owns_buffer || _buffer == nullptr) + return; + + // Write updated footer (maxvalRow, maxvalCol) to the mmap region. + // Header: 6 UINT32s (matrixType, rows, cols, mem_rows, mem_cols, pad) = 24 bytes. + // Skip past data to find the footer position. + double* data_d; + switch (_matrixType) { + case mtx_lower: + data_d = _buffer + packed_size(_mem_rows); + break; + case mtx_full: + default: + data_d = _buffer + static_cast(_mem_rows) * _mem_cols; + break; + } + + PUINT32 footer = reinterpret_cast(data_d); + *footer++ = _maxvalRow; + *footer = _maxvalCol; + + // Detach without freeing the mmap memory + _buffer = nullptr; + _owns_buffer = true; +} + + +void matrix_2d::allocate() { + if (_matrixType == mtx_lower && _mem_rows == _mem_cols && _mem_rows > 0) { + deallocate(); + _packed = true; + _symmetric = true; + std::size_t ps = packed_size(_mem_rows); + _buffer = static_cast(std::calloc(ps, sizeof(double))); + if (!_buffer) { + std::stringstream ss; + ss << "Insufficient memory for a packed " << _mem_rows << " x " << _mem_rows << " matrix."; + throw NetMemoryException(ss.str()); + } + return; + } + allocate(_mem_rows, _mem_cols); +} + +// creates memory for desired "memory size", not matrix dimensions +void matrix_2d::allocate(const UINT32& rows, const UINT32& columns) { + deallocate(); buy(rows, columns, &_buffer); } @@ -429,26 +615,27 @@ void matrix_2d::buy(const UINT32& rows, const UINT32& columns, double** mem_spac __row__ = rows; __col__ = columns; - // an exception will be thrown by out_of_memory_handler - // if memory cannot be allocated + // calloc returns zero-initialised memory. For large allocations glibc + // satisfies calloc via mmap, whose anonymous pages are already zero-filled + // by the kernel — so calloc skips the redundant memset that was previously + // triggering page-fault storms (97 % of CPU time on the NSW benchmark). std::size_t total_size = static_cast(rows) * static_cast(columns); - (*mem_space) = new double[total_size]; + (*mem_space) = static_cast(std::calloc(total_size, sizeof(double))); if ((*mem_space) == nullptr) { std::stringstream ss; ss << "Insufficient memory for a " << rows << " x " << columns << " matrix."; throw NetMemoryException(ss.str()); } - - // Initialize memory to zero to prevent uninitialized values - std::memset((*mem_space), 0, total_size * sizeof(double)); } void matrix_2d::deallocate() { if (_buffer != nullptr) { - delete[] _buffer; + if (_owns_buffer) + std::free(_buffer); _buffer = nullptr; } + _owns_buffer = true; } matrix_2d matrix_2d::submatrix(const UINT32& row_begin, const UINT32& col_begin, const UINT32& rows, @@ -514,9 +701,14 @@ void matrix_2d::submatrix(const UINT32& row_begin, const UINT32& col_begin, matr } void matrix_2d::redim(const UINT32& rows, const UINT32& columns) { + if (_packed) { + deallocate(); + _packed = false; + } + _symmetric = false; // if new matrix size is smaller than or equal to the previous // matrix size, then simply change dimensions and return - if (rows <= _mem_rows && columns <= _mem_cols) { + if (_buffer != nullptr && rows <= _mem_rows && columns <= _mem_cols) { // Zero out the unused portions when reusing buffer // Zero partial columns (rows beyond new row count) for (UINT32 col = 0; col < columns && col < _mem_cols; ++col) { @@ -550,23 +742,46 @@ void matrix_2d::redim(const UINT32& rows, const UINT32& columns) { buy(rows, columns, &new_buffer); // Copy old data to new buffer if there was any + bool old_owns = _owns_buffer; if (old_buffer != nullptr && old_rows > 0 && old_cols > 0) { for (UINT32 col = 0; col < old_cols && col < columns; ++col) { for (UINT32 row = 0; row < old_rows && row < rows; ++row) { new_buffer[col * rows + row] = old_buffer[col * old_rows + row]; } } - // Delete old buffer - delete[] old_buffer; + if (old_owns) + std::free(old_buffer); } _buffer = new_buffer; - + _owns_buffer = true; _rows = _mem_rows = rows; _cols = _mem_cols = columns; } +void matrix_2d::redim_packed(const UINT32& n) { + std::size_t ps = packed_size(n); + + if (_buffer != nullptr && _packed && n <= _mem_rows) { + std::memset(_buffer, 0, ps * sizeof(double)); + _rows = _cols = _mem_rows = _mem_cols = n; + return; + } + + deallocate(); + _rows = _cols = _mem_rows = _mem_cols = n; + _packed = true; + _symmetric = true; + _matrixType = mtx_lower; + _buffer = static_cast(std::calloc(ps, sizeof(double))); + if (!_buffer) { + std::stringstream ss; + ss << "Insufficient memory for a packed " << n << " x " << n << " matrix."; + throw NetMemoryException(ss.str()); + } +} + void matrix_2d::shrink(const UINT32& rows, const UINT32& columns) { if (rows > _rows || columns > _cols) { std::stringstream ss; @@ -641,21 +856,53 @@ void matrix_2d::copybuffer(const UINT32& rowstart, const UINT32& columnstart, co } } -void matrix_2d::copyelements(const UINT32& row_dest, const UINT32& column_dest, const matrix_2d& src, - const UINT32& row_src, const UINT32& column_src, const UINT32& rows, - const UINT32& columns) { +void matrix_2d::copyelements_generic(const UINT32& row_dest, const UINT32& column_dest, const matrix_2d& src, + const UINT32& row_src, const UINT32& column_src, const UINT32& rows, + const UINT32& columns) { + assert(_buffer != nullptr && src._buffer != nullptr); + assert(row_dest + rows <= _mem_rows && "copyelements_generic(): dest row overflow"); + assert(column_dest + columns <= _mem_cols && "copyelements_generic(): dest col overflow"); + assert(row_src + rows <= src._mem_rows && "copyelements_generic(): src row overflow"); + assert(column_src + columns <= src._mem_cols && "copyelements_generic(): src col overflow"); + // Fast path: packed source → dense dest (avoids per-element get/put overhead) + if (src._packed && !_packed) { + for (UINT32 c = 0; c < columns; ++c) { + double* d = _buffer + static_cast(column_dest + c) * _mem_rows + row_dest; + UINT32 sc = column_src + c; + for (UINT32 r = 0; r < rows; ++r) { + UINT32 sr = row_src + r, sj = sc; + if (sr < sj) std::swap(sr, sj); + d[r] = src._buffer[packed_index(src._rows, sr, sj)]; + } + } + return; + } + // Fast path: dense source → packed dest + if (_packed && !src._packed && !src._symmetric) { + for (UINT32 c = 0; c < columns; ++c) { + const double* s = src._buffer + static_cast(column_src + c) * src._mem_rows + row_src; + UINT32 dc = column_dest + c; + for (UINT32 r = 0; r < rows; ++r) { + UINT32 dr = row_dest + r; + if (dr >= dc) + _buffer[packed_index(_rows, dr, dc)] = s[r]; + } + } + return; + } + // Fallback for remaining packed/symmetric combinations + if (src._symmetric || src._packed || _packed) { + for (UINT32 r = 0; r < rows; ++r) + for (UINT32 c = 0; c < columns; ++c) + put(row_dest + r, column_dest + c, src.get(row_src + r, column_src + c)); + return; + } UINT32 cd(0), cs(0), colend_dest(column_dest + columns); for (cd = column_dest, cs = column_src; cd < colend_dest; ++cd, ++cs) memcpy(getelementref(row_dest, cd), src.getbuffer(row_src, cs), static_cast(rows) * sizeof(double)); } -void matrix_2d::copyelements(const UINT32& row_dest, const UINT32& column_dest, const matrix_2d* src, - const UINT32& row_src, const UINT32& column_src, const UINT32& rows, - const UINT32& columns) { - copyelements(row_dest, column_dest, *src, row_src, column_src, rows, columns); -} - void matrix_2d::sweep(UINT32 k1, UINT32 k2) { double eps(1.0e-8), d; UINT32 i, j, k, it; @@ -705,16 +952,67 @@ matrix_2d matrix_2d::sweepinverse() { return *this; } -matrix_2d matrix_2d::cholesky_inverse(bool LOWER_IS_CLEARED /*=false*/) { +matrix_2d matrix_2d::cholesky_inverse(bool LOWER_IS_CLEARED /*=false*/, bool mark_symmetric /*=false*/) { if (_rows < 1) return *this; if (_rows != _cols) throw std::runtime_error("cholesky_inverse(): Matrix is not square."); + assert(_buffer != nullptr && "cholesky_inverse(): null buffer"); + assert(_mem_rows >= _rows && "cholesky_inverse(): mem_rows < rows"); + assert(_mem_cols >= _cols && "cholesky_inverse(): mem_cols < cols"); + assert(!(mark_symmetric && LOWER_IS_CLEARED) && + "cholesky_inverse(): mark_symmetric with LOWER_IS_CLEARED not supported"); + + if (_packed) { + // Unpack to full format for fast blocked dpotrf/dpotri, then repack. + // dpptrf/dpptri are element-by-element and orders of magnitude slower. + lapack_int n = _rows; + std::size_t full_size = static_cast(n) * n; + + // Reuse thread-local workspace to avoid repeated allocation/deallocation. + // dpotrf/dpotri with uplo='L' only read the lower triangle, so no + // memset is needed — the unpack loop sets all lower-triangle elements. + static thread_local std::vector chol_workspace; + if (chol_workspace.size() < full_size) + chol_workspace.resize(full_size); + double* full = chol_workspace.data(); + + for (UINT32 j = 0; j < _rows; ++j) + for (UINT32 i = j; i < _rows; ++i) + full[static_cast(j) * n + i] = _buffer[packed_index(_rows, i, j)]; + + int blas_threads = blas_threads_for(n); +#if defined(USE_MKL) || defined(__MKL__) + mkl_set_num_threads(blas_threads); +#elif !defined(__APPLE__) + openblas_set_num_threads(blas_threads); +#endif + + char uplo = LOWER_TRIANGLE; + lapack_int info, lda = n; + LAPACK_FUNC(dpotrf)(&uplo, &n, full, &lda, &info); + if (info != 0) throw MatrixInversionFailure("Matrix inversion failed, the matrix is singular."); + LAPACK_FUNC(dpotri)(&uplo, &n, full, &lda, &info); + if (info != 0) throw MatrixInversionFailure("Matrix inversion failed, the matrix is singular."); + + for (UINT32 j = 0; j < _rows; ++j) + for (UINT32 i = j; i < _rows; ++i) + _buffer[packed_index(_rows, i, j)] = full[static_cast(j) * n + i]; + return *this; + } + char uplo(LOWER_TRIANGLE); if (LOWER_IS_CLEARED) uplo = UPPER_TRIANGLE; lapack_int info, n = _rows; lapack_int lda = _mem_rows; + int blas_threads = blas_threads_for(n); +#if defined(USE_MKL) || defined(__MKL__) + mkl_set_num_threads(blas_threads); +#elif !defined(__APPLE__) + openblas_set_num_threads(blas_threads); +#endif + // Perform Cholesky factorisation LAPACK_FUNC(dpotrf)(&uplo, &n, _buffer, &lda, &info); @@ -727,23 +1025,195 @@ matrix_2d matrix_2d::cholesky_inverse(bool LOWER_IS_CLEARED /*=false*/) { if (info != 0) throw MatrixInversionFailure("Matrix inversion failed, the matrix is singular."); - if (LOWER_IS_CLEARED) + if (mark_symmetric) { + _symmetric = true; + } else if (LOWER_IS_CLEARED) { filllower(); - else + } else { fillupper(); + } + + return *this; +} + +matrix_2d matrix_2d::cholesky_factor(bool LOWER_IS_CLEARED /*=false*/) { + if (_rows < 1) return *this; + if (_rows != _cols) throw std::runtime_error("cholesky_factor(): Matrix is not square."); + + assert(_buffer != nullptr && "cholesky_factor(): null buffer"); + assert(_mem_rows >= _rows && "cholesky_factor(): mem_rows < rows"); + assert(_mem_cols >= _cols && "cholesky_factor(): mem_cols < cols"); + + if (_packed) { + // Unpack to full format for dpotrf (same approach as cholesky_inverse). + // The factor L is stored back in packed format. + lapack_int n = _rows; + std::size_t full_size = static_cast(n) * n; + + // Reuse thread-local workspace; no memset needed (dpotrf reads lower triangle only) + static thread_local std::vector chol_workspace; + if (chol_workspace.size() < full_size) + chol_workspace.resize(full_size); + double* full = chol_workspace.data(); + + for (UINT32 j = 0; j < _rows; ++j) + for (UINT32 i = j; i < _rows; ++i) + full[static_cast(j) * n + i] = _buffer[packed_index(_rows, i, j)]; + + int blas_threads = blas_threads_for(n); +#if defined(USE_MKL) || defined(__MKL__) + mkl_set_num_threads(blas_threads); +#elif !defined(__APPLE__) + openblas_set_num_threads(blas_threads); +#endif + + char uplo = LOWER_TRIANGLE; + lapack_int info, lda = n; + LAPACK_FUNC(dpotrf)(&uplo, &n, full, &lda, &info); + if (info != 0) throw MatrixInversionFailure("Cholesky factorisation failed, the matrix is singular."); + + // Store the L factor back in packed format + for (UINT32 j = 0; j < _rows; ++j) + for (UINT32 i = j; i < _rows; ++i) + _buffer[packed_index(_rows, i, j)] = full[static_cast(j) * n + i]; + return *this; + } + + char uplo(LOWER_TRIANGLE); + if (LOWER_IS_CLEARED) uplo = UPPER_TRIANGLE; + + lapack_int info, n = _rows; + lapack_int lda = _mem_rows; + + int blas_threads = blas_threads_for(n); +#if defined(USE_MKL) || defined(__MKL__) + mkl_set_num_threads(blas_threads); +#elif !defined(__APPLE__) + openblas_set_num_threads(blas_threads); +#endif + + LAPACK_FUNC(dpotrf)(&uplo, &n, _buffer, &lda, &info); + + if (info != 0) + throw MatrixInversionFailure("Cholesky factorisation failed, the matrix is singular."); return *this; } +void matrix_2d::cholesky_solve(matrix_2d& rhs, bool LOWER_IS_CLEARED /*=false*/) { + assert(_rows == _cols && "cholesky_solve(): factor matrix is not square"); + assert(_rows == rhs._rows && "cholesky_solve(): dimension mismatch"); + assert(_buffer != nullptr && "cholesky_solve(): null factor buffer"); + assert(rhs._buffer != nullptr && "cholesky_solve(): null rhs buffer"); + + if (_packed) { + // Unpack the L factor to full format for dpotrs. + lapack_int n = _rows; + std::size_t full_size = static_cast(n) * n; + + // Reuse thread-local workspace; no memset needed (dpotrs reads lower triangle only) + static thread_local std::vector chol_workspace; + if (chol_workspace.size() < full_size) + chol_workspace.resize(full_size); + double* full = chol_workspace.data(); + + for (UINT32 j = 0; j < _rows; ++j) + for (UINT32 i = j; i < _rows; ++i) + full[static_cast(j) * n + i] = _buffer[packed_index(_rows, i, j)]; + + char uplo = LOWER_TRIANGLE; + lapack_int nrhs = rhs._cols; + lapack_int lda = n; + lapack_int ldb = rhs._mem_rows; + lapack_int info; + + LAPACK_FUNC(dpotrs)(&uplo, &n, &nrhs, full, &lda, rhs._buffer, &ldb, &info); + + if (info != 0) + throw MatrixInversionFailure("Cholesky solve failed."); + return; + } + + char uplo(LOWER_TRIANGLE); + if (LOWER_IS_CLEARED) uplo = UPPER_TRIANGLE; + + lapack_int n = _rows; + lapack_int nrhs = rhs._cols; + lapack_int lda = _mem_rows; + lapack_int ldb = rhs._mem_rows; + lapack_int info; + + LAPACK_FUNC(dpotrs)(&uplo, &n, &nrhs, _buffer, &lda, rhs._buffer, &ldb, &info); + + if (info != 0) + throw MatrixInversionFailure("Cholesky solve failed."); +} + +double matrix_2d::dot(const matrix_2d& other) const { + assert(_cols == 1 && other._cols == 1 && "dot(): both matrices must be column vectors"); + assert(_rows == other._rows && "dot(): dimension mismatch"); + assert(_buffer != nullptr && other._buffer != nullptr && "dot(): null buffer"); + + double result = 0.0; + for (UINT32 i = 0; i < _rows; ++i) + result += get(i, 0) * other.get(i, 0); + return result; +} + matrix_2d matrix_2d::scale(const double& scalar) { + if (_packed) { + std::size_t ps = packed_size(_rows); + for (std::size_t k = 0; k < ps; ++k) + _buffer[k] *= scalar; + return *this; + } UINT32 i, j; for (i = 0; i < _rows; ++i) for (j = 0; j < _cols; ++j) *getelementref(i, j) *= scalar; return *this; } -void matrix_2d::blockadd(const UINT32& row_dest, const UINT32& col_dest, const matrix_2d& mat_src, - const UINT32& row_src, const UINT32& col_src, const UINT32& rows, const UINT32& cols) { +void matrix_2d::scale_symmetric_diagonal(const double* diag) { + assert(_symmetric && "scale_symmetric_diagonal(): matrix must be symmetric"); + if (_packed) { + for (UINT32 j = 0; j < _rows; ++j) + for (UINT32 i = j; i < _rows; ++i) + _buffer[packed_index(_rows, i, j)] *= diag[i] * diag[j]; + return; + } + for (UINT32 j = 0; j < _rows; ++j) + for (UINT32 i = j; i < _rows; ++i) { + double s = diag[i] * diag[j]; + *getelementref(i, j) *= s; + } +} + +void matrix_2d::blockadd_generic(const UINT32& row_dest, const UINT32& col_dest, const matrix_2d& mat_src, + const UINT32& row_src, const UINT32& col_src, const UINT32& rows, const UINT32& cols) { + // Fast path: dense source → packed dest (avoids per-element get/elementadd overhead) + if (_packed && !mat_src._packed && !mat_src._symmetric) { + for (UINT32 c = 0; c < cols; ++c) { + const double* s = mat_src._buffer + static_cast(col_src + c) * mat_src._mem_rows + row_src; + UINT32 dc = col_dest + c; + for (UINT32 r = 0; r < rows; ++r) { + UINT32 dr = row_dest + r; + if (dr >= dc) + _buffer[packed_index(_rows, dr, dc)] += s[r]; + } + } + return; + } + // Fast path: both dense (non-packed, non-symmetric) — use direct buffer arithmetic + if (!_packed && !mat_src._packed && !mat_src._symmetric) { + for (UINT32 c = 0; c < cols; ++c) { + double* d = _buffer + static_cast(col_dest + c) * _mem_rows + row_dest; + const double* s = mat_src._buffer + static_cast(col_src + c) * mat_src._mem_rows + row_src; + for (UINT32 r = 0; r < rows; ++r) + d[r] += s[r]; + } + return; + } + // Fallback: generic per-element path UINT32 i_dest, j_dest, i_src, j_src; UINT32 i_dest_end(row_dest + rows), j_dest_end(col_dest + cols); @@ -752,9 +1222,8 @@ void matrix_2d::blockadd(const UINT32& row_dest, const UINT32& col_dest, const m elementadd(i_dest, j_dest, mat_src.get(i_src, j_src)); } -// Same as blockadd, but adds transpose. mat_src must be square. -void matrix_2d::blockTadd(const UINT32& row_dest, const UINT32& col_dest, const matrix_2d& mat_src, - const UINT32& row_src, const UINT32& col_src, const UINT32& rows, const UINT32& cols) { +void matrix_2d::blockTadd_generic(const UINT32& row_dest, const UINT32& col_dest, const matrix_2d& mat_src, + const UINT32& row_src, const UINT32& col_src, const UINT32& rows, const UINT32& cols) { UINT32 i_dest, j_dest, i_src, j_src; UINT32 i_dest_end(row_dest + rows), j_dest_end(col_dest + cols); @@ -773,79 +1242,153 @@ void matrix_2d::blocksubtract(const UINT32& row_dest, const UINT32& col_dest, co elementsubtract(i_dest, j_dest, mat_src.get(i_src, j_src)); } -// clearlower() void matrix_2d::clearlower() { + assert(_buffer != nullptr); + assert(!_packed && "clearlower(): not valid for packed storage"); // Sets lower triangle elements to zero UINT32 col, row; - for (row = 1, col = 0; col < _mem_cols; ++col, ++row) + for (row = 1, col = 0; col < _mem_cols && row < _mem_rows; ++col, ++row) memset(getelementref(row, col), 0, (static_cast(_mem_rows) - row) * sizeof(double)); } -// clearupper() void matrix_2d::clearupper() { + assert(!_packed && "clearupper(): not valid for packed storage"); // Sets upper triangle elements to zero - UINT32 col, row; - for (row = 0; row < _rows; ++row) - for (col = row + 1; col < _cols; ++col) put(row, col, 0.0); + // Column-major: for each column, zero the rows above the diagonal + for (UINT32 col = 1; col < _cols; ++col) + memset(_buffer + col * _mem_rows, 0, col * sizeof(double)); } -// filllower() void matrix_2d::filllower() { - // copies upper triangle to lower triangle - UINT32 column, row; - for (row = 1; row < _rows; row++) - for (column = 0; column < row; column++) put(row, column, get(column, row)); + assert(_buffer != nullptr); + assert(!_packed && "filllower(): not valid for packed storage"); + assert(_rows == _cols && "filllower(): matrix must be square"); + // Copy upper triangle to lower: A(row,col) = A(col,row) for row > col + // Column-major: A(r,c) = _buffer[c * _mem_rows + r] + // Outer loop over destination columns so writes are sequential in memory. + const std::size_t mr = _mem_rows; + constexpr UINT32 BLK = 64; + for (UINT32 cb = 0; cb < _cols; cb += BLK) { + UINT32 ce = std::min(cb + BLK, _cols); + for (UINT32 rb = ce; rb < _rows; rb += BLK) { + UINT32 re = std::min(rb + BLK, _rows); + for (UINT32 col = cb; col < ce; col++) { + double* dst = _buffer + col * mr; + for (UINT32 row = rb; row < re; row++) + dst[row] = _buffer[row * mr + col]; + } + } + } + // Diagonal blocks: row > col within the same block + for (UINT32 cb = 0; cb < _cols; cb += BLK) { + UINT32 ce = std::min(cb + BLK, _cols); + for (UINT32 col = cb; col < ce; col++) { + double* dst = _buffer + col * mr; + for (UINT32 row = col + 1; row < ce; row++) + dst[row] = _buffer[row * mr + col]; + } + } } -// fillupper() void matrix_2d::fillupper() { - // copies lower triangle to upper triangle - UINT32 column, row; - for (row = 1; row < _rows; row++) - for (column = 0; column < row; column++) put(column, row, get(row, column)); + assert(_buffer != nullptr); + assert(!_packed && "fillupper(): not valid for packed storage"); + assert(_rows == _cols && "fillupper(): matrix must be square"); + // Copy lower triangle to upper: A(r,c) = A(c,r) for r < c + // Column-major: A(r,c) = _buffer[c * _mem_rows + r] + // + // Outer loop over destination columns so writes are sequential in memory. + // Reads are strided (one element per source column) but write-combining + // and store buffers make sequential writes much faster than sequential reads + // for large matrices. + const std::size_t mr = _mem_rows; + constexpr UINT32 BLK = 64; + for (UINT32 cb = 0; cb < _cols; cb += BLK) { + UINT32 ce = std::min(cb + BLK, _cols); + for (UINT32 rb = 0; rb < ce; rb += BLK) { + UINT32 re = std::min(rb + BLK, ce); + for (UINT32 col = cb; col < ce; col++) { + double* dst = _buffer + col * mr; + UINT32 r0 = rb; + UINT32 r1 = std::min(re, col); + for (UINT32 row = r0; row < r1; row++) + dst[row] = _buffer[row * mr + col]; + } + } + } } -// zero() -void matrix_2d::zero() { memset(_buffer, 0, buffersize()); } +void matrix_2d::zero() { + assert(_buffer != nullptr && "zero(): null buffer"); + if (_packed) + memset(_buffer, 0, packed_size(_mem_rows) * sizeof(double)); + else + memset(_buffer, 0, buffersize()); +} // zero() void matrix_2d::zero(const UINT32& row_begin, const UINT32& col_begin, const UINT32& rows, const UINT32& columns) { + assert(_buffer != nullptr && "zero(sub): null buffer"); + assert(row_begin + rows <= _mem_rows && "zero(sub): row overflow"); + assert(col_begin + columns <= _mem_cols && "zero(sub): col overflow"); UINT32 col(0), col_end(col_begin + columns); for (col = col_begin; col < col_end; ++col) memset(getelementref(row_begin, col), 0, rows * sizeof(double)); } matrix_2d& matrix_2d::operator=(const matrix_2d& rhs) { - // Overloaded assignment operator if (this == &rhs) return *this; - // If rhs data can fit within limits of this matrix, copy - // and return. Otherwise, allocate new memory + if (rhs._packed) { + std::size_t ps = packed_size(rhs._rows); + if (_packed && _mem_rows >= rhs._rows) { + _rows = _cols = rhs._rows; + memcpy(_buffer, rhs._buffer, ps * sizeof(double)); + } else { + deallocate(); + _rows = _cols = _mem_rows = _mem_cols = rhs._rows; + _buffer = static_cast(std::malloc(ps * sizeof(double))); + if (!_buffer) throw NetMemoryException("Insufficient memory for packed matrix assignment."); + memcpy(_buffer, rhs._buffer, ps * sizeof(double)); + } + _packed = true; + _symmetric = true; + _matrixType = rhs._matrixType; + _maxvalCol = rhs.maxvalueCol(); + _maxvalRow = rhs.maxvalueRow(); + return *this; + } + + // rhs is not packed + if (_packed) { + deallocate(); + _packed = false; + _mem_rows = _mem_cols = 0; + } + if (_mem_rows >= rhs.rows() && _mem_cols >= rhs.columns()) { - // don't change _mem_rows or _mem_cols. Simply update - // visible dimensions and copy buffer _rows = rhs.rows(); _cols = rhs.columns(); copybuffer(_rows, _cols, rhs); - _maxvalCol = rhs.maxvalueCol(); // col of max value - _maxvalRow = rhs.maxvalueRow(); // row of max value + _maxvalCol = rhs.maxvalueCol(); + _maxvalRow = rhs.maxvalueRow(); + _symmetric = rhs._symmetric; return *this; } - // Okay, rhs is larger, so allocate new memory. Call free - // memory first before changing row and column dimensions! deallocate(); - _mem_rows = rhs.memRows(); // change memory limits + _mem_rows = rhs.memRows(); _mem_cols = rhs.memColumns(); - _rows = rhs.rows(); // change matrix dimensions + _rows = rhs.rows(); _cols = rhs.columns(); allocate(_mem_rows, _mem_cols); copybuffer(_rows, _cols, rhs); - _maxvalCol = rhs.maxvalueCol(); // col of max value - _maxvalRow = rhs.maxvalueRow(); // row of max value + _maxvalCol = rhs.maxvalueCol(); + _maxvalRow = rhs.maxvalueRow(); + _symmetric = rhs._symmetric; return *this; } @@ -875,6 +1418,10 @@ matrix_2d matrix_2d::add(const matrix_2d& rhs) { // multiplies this matrix by rhs and stores the result in a new matrix // Uses Intel MKL dgemm matrix_2d matrix_2d::multiply(const char* lhs_trans, const matrix_2d& rhs, const char* rhs_trans) { + assert(!_symmetric && "multiply(dgemm): LHS is symmetric — use multiply_sym instead"); + assert(!rhs._symmetric && "multiply(dgemm): RHS is symmetric — upper triangle is unpopulated"); + assert(_buffer != nullptr && rhs._buffer != nullptr); + matrix_2d m(_rows, rhs.columns()); const double one = 1.0; @@ -914,6 +1461,10 @@ matrix_2d matrix_2d::multiply(const char* lhs_trans, const matrix_2d& rhs, const // Uses Intel MKL dgemm matrix_2d matrix_2d::multiply(const matrix_2d& lhs, const char* lhs_trans, const matrix_2d& rhs, const char* rhs_trans) { + assert(!lhs._symmetric && "multiply(dgemm): LHS is symmetric — use multiply_sym instead"); + assert(!rhs._symmetric && "multiply(dgemm): RHS is symmetric — upper triangle is unpopulated"); + assert(lhs._buffer != nullptr && rhs._buffer != nullptr && _buffer != nullptr); + const double one = 1.0; const double zero = 0.0; @@ -947,6 +1498,48 @@ matrix_2d::multiply(const matrix_2d& lhs, const char* lhs_trans, const matrix_2d return *this; } // Multiply() +// C (this) = sym_lhs * rhs, using dsymm or dspmv (packed) +matrix_2d matrix_2d::multiply_sym(const matrix_2d& sym_lhs, const matrix_2d& rhs) { + lapack_int m = sym_lhs.rows(); + lapack_int n = rhs.columns(); + + assert(sym_lhs._symmetric && "multiply_sym(): LHS must be marked symmetric"); + assert(sym_lhs._buffer != nullptr && "multiply_sym(): LHS null buffer"); + assert(rhs._buffer != nullptr && "multiply_sym(): RHS null buffer"); + assert(_buffer != nullptr && "multiply_sym(): result null buffer"); + assert(_buffer != sym_lhs._buffer && _buffer != rhs._buffer && + "multiply_sym(): result must not alias inputs"); + + if (sym_lhs.columns() != sym_lhs.rows()) + throw std::runtime_error("multiply_sym(): LHS matrix is not square."); + if (static_cast(rhs.rows()) != m) + throw std::runtime_error("multiply_sym(): Matrix dimensions are incompatible."); + if (_rows != static_cast(m) || _cols != static_cast(n)) + throw std::runtime_error("multiply_sym(): Result matrix dimensions are incompatible."); + + if (sym_lhs._packed) { + for (lapack_int j = 0; j < n; ++j) { + BLAS_FUNC(dspmv)(CblasColMajor, CblasLower, + m, 1.0, sym_lhs._buffer, + rhs._buffer + j * rhs._mem_rows, 1, + 0.0, _buffer + j * _mem_rows, 1); + } + return *this; + } + + assert(sym_lhs.memRows() >= sym_lhs.rows() && "multiply_sym(): LHS LDA < M"); + assert(rhs.memRows() >= rhs.rows() && "multiply_sym(): RHS LDA < M"); + assert(_mem_rows >= _rows && "multiply_sym(): result LDA < M"); + + BLAS_FUNC(dsymm)(CblasColMajor, CblasLeft, CblasLower, + m, n, 1.0, + sym_lhs.getbuffer(), sym_lhs.memRows(), + rhs.getbuffer(), rhs.memRows(), + 0.0, _buffer, _mem_rows); + + return *this; +} + // Transpose() matrix_2d matrix_2d::transpose(const matrix_2d& matA) { if ((matA.columns() != _rows) || (matA.rows() != _cols)) @@ -967,9 +1560,19 @@ matrix_2d matrix_2d::transpose() { return m; } // Transpose() -// computes and retains the maximum value in the matrix double matrix_2d::compute_maximum_value() { _maxvalCol = _maxvalRow = 0; + if (_packed) { + for (UINT32 j = 0; j < _rows; ++j) { + for (UINT32 i = j; i < _rows; ++i) { + if (fabs(get(i, j)) > fabs(get(_maxvalRow, _maxvalCol))) { + _maxvalRow = i; + _maxvalCol = j; + } + } + } + return get(_maxvalRow, _maxvalCol); + } UINT32 col, row; for (row = 0; row < _rows; ++row) { for (col = 0; col < _cols; col++) { diff --git a/dynadjust/include/math/dnamatrix_contiguous.hpp b/dynadjust/include/math/dnamatrix_contiguous.hpp index be4d79c85..127e32045 100644 --- a/dynadjust/include/math/dnamatrix_contiguous.hpp +++ b/dynadjust/include/math/dnamatrix_contiguous.hpp @@ -23,6 +23,7 @@ #define DNAMATRIX_CONTIGUOUS_H_ /// \cond +#include #include /// \endcond @@ -165,14 +166,26 @@ static_assert(sizeof(lapack_int) == 4, "LP64 interface requires 32-bit integers" extern "C" { void LAPACK_FUNC(dpotrf)(const char* uplo, const lapack_int* n, double* a, const lapack_int* lda, lapack_int* info); void LAPACK_FUNC(dpotri)(const char* uplo, const lapack_int* n, double* a, const lapack_int* lda, lapack_int* info); +void LAPACK_FUNC(dpptrf)(const char* uplo, const lapack_int* n, double* ap, lapack_int* info); +void LAPACK_FUNC(dpptri)(const char* uplo, const lapack_int* n, double* ap, lapack_int* info); void LAPACK_FUNC(dsytrf)(const char* uplo, const lapack_int* n, double* a, const lapack_int* lda, lapack_int* ipiv, double* work, const lapack_int* lwork, lapack_int* info); void LAPACK_FUNC(dsytri)(const char* uplo, const lapack_int* n, double* a, const lapack_int* lda, const lapack_int* ipiv, double* work, lapack_int* info); +void LAPACK_FUNC(dpotrs)(const char* uplo, const lapack_int* n, const lapack_int* nrhs, + const double* a, const lapack_int* lda, double* b, const lapack_int* ldb, lapack_int* info); void BLAS_FUNC(dgemm)(const enum CBLAS_ORDER ORDER, const enum CBLAS_TRANSPOSE TRANSA, const enum CBLAS_TRANSPOSE TRANSB, const lapack_int M, const lapack_int N, const lapack_int K, const double ALPHA, const double* A, const lapack_int LDA, const double* B, const lapack_int LDB, const double BETA, double* C, const lapack_int LDC); +void BLAS_FUNC(dsymm)(const enum CBLAS_ORDER ORDER, const enum CBLAS_SIDE SIDE, const enum CBLAS_UPLO UPLO, + const lapack_int M, const lapack_int N, + const double ALPHA, const double* A, const lapack_int LDA, const double* B, const lapack_int LDB, + const double BETA, double* C, const lapack_int LDC); +void BLAS_FUNC(dspmv)(const enum CBLAS_ORDER ORDER, const enum CBLAS_UPLO UPLO, + const lapack_int N, const double ALPHA, const double* AP, + const double* X, const lapack_int INCX, + const double BETA, double* Y, const lapack_int INCY); } #endif @@ -187,6 +200,10 @@ class MatrixInversionFailure : public std::runtime_error { using std::runtime_error::runtime_error; }; +// Set/get the maximum BLAS thread count used by matrix operations. +void set_max_blas_threads(int n); +int get_max_blas_threads(); + class matrix_2d; typedef std::vector v_mat_2d, *pv_mat_2d; typedef v_mat_2d::iterator _it_v_mat_2d; @@ -220,9 +237,23 @@ class matrix_2d : public new_handler_support { // element retrieval // see DNAMATRIX_ROW_WISE inline double& get(const UINT32& row, const UINT32& column) const { + assert(_buffer != nullptr); + assert(row < _mem_rows && "get(): row out of bounds"); + assert(column < _mem_cols && "get(): column out of bounds"); + if (_packed) { + UINT32 i = row, j = column; + if (i < j) { i = column; j = row; } + return _buffer[packed_index(_rows, i, j)]; + } + if (_symmetric && row < column) + return DNAMATRIX_ELEMENT(_buffer, _mem_rows, _mem_cols, column, row); return DNAMATRIX_ELEMENT(_buffer, _mem_rows, _mem_cols, row, column); } inline double* getbuffer(const UINT32& row, const UINT32& column) const { + assert(_buffer != nullptr); + assert(!_packed && "getbuffer(row,col): not valid for packed storage"); + assert(row < _mem_rows && "getbuffer(): row out of bounds"); + assert(column < _mem_cols && "getbuffer(): column out of bounds"); return _buffer + DNAMATRIX_INDEX(_mem_rows, _mem_cols, row, column); } @@ -236,9 +267,29 @@ class matrix_2d : public new_handler_support { inline UINT32 maxvalueCol() const { return _maxvalCol; } inline double* getelementref(const UINT32& row, const UINT32& column) const { + assert(_buffer != nullptr); + assert(row < _mem_rows && "getelementref(): row out of bounds"); + assert(column < _mem_cols && "getelementref(): column out of bounds"); + if (_packed) { + UINT32 i = row, j = column; + if (i < j) { i = column; j = row; } + return &_buffer[packed_index(_rows, i, j)]; + } + if (_symmetric && row < column) + return &(DNAMATRIX_ELEMENT(_buffer, _mem_rows, _mem_cols, column, row)); return &(DNAMATRIX_ELEMENT(_buffer, _mem_rows, _mem_cols, row, column)); } inline double* getelementref(const UINT32& row, const UINT32& column) { + assert(_buffer != nullptr); + assert(row < _mem_rows && "getelementref(): row out of bounds"); + assert(column < _mem_cols && "getelementref(): column out of bounds"); + if (_packed) { + UINT32 i = row, j = column; + if (i < j) { i = column; j = row; } + return &_buffer[packed_index(_rows, i, j)]; + } + if (_symmetric && row < column) + return &(DNAMATRIX_ELEMENT(_buffer, _mem_rows, _mem_cols, column, row)); return &(DNAMATRIX_ELEMENT(_buffer, _mem_rows, _mem_cols, row, column)); } @@ -250,34 +301,124 @@ class matrix_2d : public new_handler_support { inline void maxvalueCol(const UINT32& c) { _maxvalCol = c; } inline void put(const UINT32& row, const UINT32& column, const double& value) { + assert(_buffer != nullptr); + assert(row < _mem_rows && "put(): row out of bounds"); + assert(column < _mem_cols && "put(): column out of bounds"); + if (_packed) { + UINT32 i = row, j = column; + if (i < j) { i = column; j = row; } + _buffer[packed_index(_rows, i, j)] = value; + return; + } DNAMATRIX_ELEMENT(_buffer, _mem_rows, _mem_cols, row, column) = value; } inline UINT32 matrixType() const { return _matrixType; } inline void matrixType(const UINT32 t) { _matrixType = t; } + inline bool is_symmetric() const { return _symmetric; } + inline void set_symmetric(bool s) { + assert((!s || _rows == _cols) && "set_symmetric(true): matrix must be square"); + _symmetric = s; + } + + inline bool is_packed() const { return _packed; } + + static inline std::size_t packed_index(UINT32 n, UINT32 i, UINT32 j) { + return static_cast(j) * n - static_cast(j) * (j - 1) / 2 + (i - j); + } + + static inline std::size_t packed_size(UINT32 n) { + return static_cast(n) * (n + 1) / 2; + } + // Matrix functions - void copyelements(const UINT32& row_dest, const UINT32& column_dest, const matrix_2d& src, const UINT32& row_src, - const UINT32& column_src, const UINT32& rows, const UINT32& columns); - void copyelements(const UINT32& row_dest, const UINT32& column_dest, const matrix_2d* src, const UINT32& row_src, - const UINT32& column_src, const UINT32& rows, const UINT32& columns); + inline void copyelements(const UINT32& row_dest, const UINT32& column_dest, const matrix_2d& src, + const UINT32& row_src, const UINT32& column_src, + const UINT32& rows, const UINT32& columns) { + assert(_buffer != nullptr && src._buffer != nullptr); + assert(row_dest + rows <= _mem_rows && "copyelements(): dest row overflow"); + assert(column_dest + columns <= _mem_cols && "copyelements(): dest col overflow"); + assert(row_src + rows <= src._mem_rows && "copyelements(): src row overflow"); + assert(column_src + columns <= src._mem_cols && "copyelements(): src col overflow"); + if (rows == 3 && columns == 3 && !src._symmetric && !src._packed && !_packed) { + for (UINT32 c = 0; c < 3; ++c) { + double* dst = _buffer + static_cast(column_dest + c) * _mem_rows + row_dest; + const double* s = src._buffer + static_cast(column_src + c) * src._mem_rows + row_src; + dst[0] = s[0]; dst[1] = s[1]; dst[2] = s[2]; + } + return; + } + copyelements_generic(row_dest, column_dest, src, row_src, column_src, rows, columns); + } + void copyelements_generic(const UINT32& row_dest, const UINT32& column_dest, const matrix_2d& src, + const UINT32& row_src, const UINT32& column_src, + const UINT32& rows, const UINT32& columns); + inline void copyelements(const UINT32& row_dest, const UINT32& column_dest, const matrix_2d* src, + const UINT32& row_src, const UINT32& column_src, + const UINT32& rows, const UINT32& columns) { + copyelements(row_dest, column_dest, *src, row_src, column_src, rows, columns); + } inline void elementadd(const UINT32& row, const UINT32& column, const double& increment) { + assert(row < _rows && column < _cols && "elementadd(): out of bounds"); + if (_packed && row < column) return; *getelementref(row, column) += increment; } inline void elementsubtract(const UINT32& row, const UINT32& column, const double& decrement) { + assert(row < _rows && column < _cols && "elementsubtract(): out of bounds"); + if (_packed && row < column) return; *getelementref(row, column) -= decrement; } inline void elementmultiply(const UINT32& row, const UINT32& column, const double& scale) { + assert(row < _rows && column < _cols && "elementmultiply(): out of bounds"); *getelementref(row, column) *= scale; } - void blockadd(const UINT32& row_dest, const UINT32& col_dest, const matrix_2d& mat_src, const UINT32& row_src, - const UINT32& col_src, const UINT32& rows, const UINT32& cols); - void blockTadd(const UINT32& row_dest, const UINT32& col_dest, const matrix_2d& mat_src, const UINT32& row_src, - const UINT32& col_src, const UINT32& rows, const UINT32& cols); + inline void blockadd(const UINT32& row_dest, const UINT32& col_dest, const matrix_2d& mat_src, + const UINT32& row_src, const UINT32& col_src, const UINT32& rows, const UINT32& cols) { + assert(_buffer != nullptr && mat_src._buffer != nullptr); + assert(row_dest + rows <= _mem_rows && "blockadd(): dest row overflow"); + assert(col_dest + cols <= _mem_cols && "blockadd(): dest col overflow"); + assert(row_src + rows <= mat_src._mem_rows && "blockadd(): src row overflow"); + assert(col_src + cols <= mat_src._mem_cols && "blockadd(): src col overflow"); + if (rows == 3 && cols == 3 && !mat_src._symmetric && !mat_src._packed && !_packed) { + for (UINT32 c = 0; c < 3; ++c) { + double* dst = _buffer + static_cast(col_dest + c) * _mem_rows + row_dest; + const double* src = mat_src._buffer + static_cast(col_src + c) * mat_src._mem_rows + row_src; + dst[0] += src[0]; dst[1] += src[1]; dst[2] += src[2]; + } + return; + } + blockadd_generic(row_dest, col_dest, mat_src, row_src, col_src, rows, cols); + } + void blockadd_generic(const UINT32& row_dest, const UINT32& col_dest, const matrix_2d& mat_src, + const UINT32& row_src, const UINT32& col_src, const UINT32& rows, const UINT32& cols); + + inline void blockTadd(const UINT32& row_dest, const UINT32& col_dest, const matrix_2d& mat_src, + const UINT32& row_src, const UINT32& col_src, const UINT32& rows, const UINT32& cols) { + assert(_buffer != nullptr && mat_src._buffer != nullptr); + assert(row_dest + rows <= _mem_rows && "blockTadd(): dest row overflow"); + assert(col_dest + cols <= _mem_cols && "blockTadd(): dest col overflow"); + assert(row_src + cols <= mat_src._mem_rows && "blockTadd(): src row overflow (transposed)"); + assert(col_src + rows <= mat_src._mem_cols && "blockTadd(): src col overflow (transposed)"); + if (rows == 3 && cols == 3 && !_packed) { + for (UINT32 c = 0; c < 3; ++c) { + double* dst = _buffer + static_cast(col_dest + c) * _mem_rows + row_dest; + const std::size_t smr = mat_src._mem_rows; + const double* src_r0 = mat_src._buffer + static_cast(row_src) * smr + col_src + c; + dst[0] += src_r0[0]; + dst[1] += src_r0[smr]; + dst[2] += src_r0[2 * smr]; + } + return; + } + blockTadd_generic(row_dest, col_dest, mat_src, row_src, col_src, rows, cols); + } + void blockTadd_generic(const UINT32& row_dest, const UINT32& col_dest, const matrix_2d& mat_src, + const UINT32& row_src, const UINT32& col_src, const UINT32& rows, const UINT32& cols); void blocksubtract(const UINT32& row_dest, const UINT32& col_dest, const matrix_2d& mat_src, const UINT32& row_src, const UINT32& col_src, const UINT32& rows, const UINT32& cols); @@ -287,13 +428,19 @@ class matrix_2d : public new_handler_support { matrix_2d multiply(const char* lhs_trans, const matrix_2d& rhs, const char* rhs_trans); // multiplication matrix_2d multiply(const matrix_2d& lhs, const char* lhs_trans, const matrix_2d& rhs, const char* rhs_trans); // multiplication + // C = sym_lhs * rhs using dsymm (sym_lhs must be symmetric, lower triangle populated) + matrix_2d multiply_sym(const matrix_2d& sym_lhs, const matrix_2d& rhs); matrix_2d sweepinverse(); // Sweep inverse (good for rotation matrices) - matrix_2d cholesky_inverse(bool LOWER_IS_CLEARED = false); // Cholesky inverse + matrix_2d cholesky_inverse(bool LOWER_IS_CLEARED = false, bool mark_symmetric = false); // Cholesky inverse + matrix_2d cholesky_factor(bool LOWER_IS_CLEARED = false); // Cholesky factor only (dpotrf), no inverse + void cholesky_solve(matrix_2d& rhs, bool LOWER_IS_CLEARED = false); // Solve from pre-factored matrix (dpotrs) + double dot(const matrix_2d& other) const; // Inner product of two column vectors matrix_2d transpose(const matrix_2d&); // Transpose matrix_2d transpose(); // '' matrix_2d scale(const double& scalar); // scale + void scale_symmetric_diagonal(const double* diag); // overloaded operators // equality @@ -347,6 +494,7 @@ class matrix_2d : public new_handler_support { void setsize(const UINT32& rows, const UINT32& columns); // sets matrix size to rows * columns only (buffer not allocated any memory) void redim(const UINT32& rows, const UINT32& columns); // redimensions matrix to rows * columns + void redim_packed(const UINT32& n); // redimensions to packed symmetric n×n void replace(const UINT32& rowstart, const UINT32& columnstart, const matrix_2d& newmat); void replace(const UINT32& rowstart, const UINT32& columnstart, const UINT32& rows, const UINT32& columns, const matrix_2d& newmat); @@ -369,6 +517,14 @@ class matrix_2d : public new_handler_support { // Writing to memory mapped file void WriteMappedFileRegion(void* addr); + // In-place mmap: attach _buffer directly to mmap data region (no memcpy) + void AttachMappedFileRegion(void* addr); + + // In-place mmap: write footer back, detach _buffer + void DetachMappedFileRegion(void* addr); + + inline bool owns_buffer() const { return _owns_buffer; } + // debug #ifdef _MSDEBUG void trace(const std::string& comment, const std::string& format) const; @@ -376,12 +532,14 @@ class matrix_2d : public new_handler_support { const UINT32& row_begin, const UINT32& col_begin, const UINT32& rows, const UINT32& columns) const; #endif + void deallocate(); + private: inline std::size_t buffersize() const { + if (_packed) + return packed_size(_mem_rows) * sizeof(double); return static_cast(_mem_rows) * static_cast(_mem_cols) * sizeof(double); } - - void deallocate(); void buy(const UINT32& rows, const UINT32& columns, double** mem_space); void copybuffer(const UINT32& rows, const UINT32& columns, const matrix_2d& oldmat); void copybuffer(const UINT32& rowstart, const UINT32& columnstart, const UINT32& rows, const UINT32& columns, @@ -395,11 +553,14 @@ class matrix_2d : public new_handler_support { UINT32 _cols; // number of actual cols UINT32 _rows; // number of actual rows double* _buffer; // matrix buffer elements + bool _owns_buffer; // true if _buffer is heap-allocated (false when attached to mmap) UINT32 _maxvalCol; // col of max value UINT32 _maxvalRow; // row of max value UINT32 _matrixType; // full, upper/lower, sparse + bool _symmetric; // only lower triangle populated (upper is mirror) + bool _packed; // packed column-major lower-triangle storage (n*(n+1)/2 elements) }; } // namespace math diff --git a/dynadjust/include/memory/dnafile_mapping.cpp b/dynadjust/include/memory/dnafile_mapping.cpp index 1a301b315..675e3c600 100644 --- a/dynadjust/include/memory/dnafile_mapping.cpp +++ b/dynadjust/include/memory/dnafile_mapping.cpp @@ -22,7 +22,13 @@ #include -namespace dynadjust { +#ifdef __linux__ +#include +#include +#include +#endif + +namespace dynadjust { namespace memory { //block_map_t::block_map_t() @@ -72,15 +78,33 @@ block_map_t::block_map_t(const block_map_t &p) void block_map_t::MapRegion(FileMapPtr file_map_ptr) { region_ptr_.reset( new boost::interprocess::mapped_region( - *file_map_ptr, - boost::interprocess::read_write, - region_offset_, + *file_map_ptr, + boost::interprocess::read_write, + region_offset_, data_size_ ) ); } +void block_map_t::AdviseSequential() { + if (region_ptr_) + region_ptr_->advise(boost::interprocess::mapped_region::advice_sequential); +} + + +void block_map_t::AdviseDontNeed() { + if (region_ptr_) + region_ptr_->advise(boost::interprocess::mapped_region::advice_dontneed); +} + + +void block_map_t::AdviseWillNeed() { + if (region_ptr_) + region_ptr_->advise(boost::interprocess::mapped_region::advice_willneed); +} + + // class to hold addresses and sizes for all matrices // in a vector of segmented blocks vmat_file_map::vmat_file_map() @@ -129,10 +153,54 @@ void vmat_file_map::CreateFileMap() } -void vmat_file_map::MapRegion(const UINT32 block) +void vmat_file_map::MapRegion(const UINT32 block) { vblockMapRegions_.at(block).MapRegion(file_map_ptr_); } + + +void vmat_file_map::AdviseRegion(const UINT32 block, boost::interprocess::mapped_region::advice_types advice) +{ + auto& region = vblockMapRegions_.at(block); +#ifdef __linux__ + // Boost's mapped_region::advise calls madvise() without + // page-aligning the address. Stage regions are packed + // end-to-end so only the first region's address is page- + // aligned by chance; madvise on the rest fails silently + // with EINVAL. Round start up and end down to whole + // pages, then call madvise directly. For DONTNEED also + // flush dirty pages asynchronously so the kernel can + // evict them promptly. + if (region.region_ptr_ && region.data_size_ > 0) { + const std::size_t page_size = + static_cast(sysconf(_SC_PAGESIZE)); + auto base = reinterpret_cast( + region.region_ptr_->get_address()); + std::uintptr_t aligned_start = + (base + page_size - 1) & ~(page_size - 1); + std::uintptr_t aligned_end = + (base + region.data_size_) & ~(page_size - 1); + if (aligned_end > aligned_start) { + void* addr = reinterpret_cast(aligned_start); + std::size_t len = aligned_end - aligned_start; + int madv = MADV_NORMAL; + switch (advice) { + case boost::interprocess::mapped_region::advice_sequential: + madv = MADV_SEQUENTIAL; break; + case boost::interprocess::mapped_region::advice_willneed: + madv = MADV_WILLNEED; break; + case boost::interprocess::mapped_region::advice_dontneed: + msync(addr, len, MS_ASYNC); + madv = MADV_DONTNEED; break; + default: break; + } + madvise(addr, len, madv); + return; + } + } +#endif + region.region_ptr_->advise(advice); +} } // namespace memory diff --git a/dynadjust/include/memory/dnafile_mapping.hpp b/dynadjust/include/memory/dnafile_mapping.hpp index ff33a8eb2..abd08859e 100644 --- a/dynadjust/include/memory/dnafile_mapping.hpp +++ b/dynadjust/include/memory/dnafile_mapping.hpp @@ -64,12 +64,17 @@ class block_map_t inline size_t GetDataSize() const { return data_size_; } inline size_t GetRegionOffset() const { return region_offset_; } inline size_t GetCumulativeRegionOffset() const { return region_offset_ + data_size_; } - + inline void SetDataSize(const size_t& size) { data_size_ = size; } inline void SetRegionOffset(const size_t& size) { region_offset_ = size; } - + void MapRegion(FileMapPtr file_map_ptr); + // Memory advisory hints for mapped regions + void AdviseSequential(); + void AdviseDontNeed(); + void AdviseWillNeed(); + size_t data_size_; // Size of this matrix. size_t region_offset_; // Offset from the beginning of the region MapRegPtr region_ptr_; // shared pointer to the region @@ -92,10 +97,11 @@ class vmat_file_map { void setnewFilePath(const std::string& filePath, bool remove_mapped_file); void CreateFileMap(); void MapRegion(const UINT32 block); + void AdviseRegion(const UINT32 block, boost::interprocess::mapped_region::advice_types advice); inline FileMapPtr getFileMapPtr() const { return file_map_ptr_; } - inline void* GetBlockRegionAddr(const UINT32 block) const { - return vblockMapRegions_.at(block).region_ptr_->get_address(); + inline void* GetBlockRegionAddr(const UINT32 block) const { + return vblockMapRegions_.at(block).region_ptr_->get_address(); } vmat_file_map(const vmat_file_map&); // prevent copying diff --git a/tests/test_matrix.cpp b/tests/test_matrix.cpp index c96d32dfa..ae7169cfd 100644 --- a/tests/test_matrix.cpp +++ b/tests/test_matrix.cpp @@ -22,6 +22,7 @@ #define TESTING_MAIN #include +#include #include #include "math/dnamatrix_contiguous.hpp" @@ -587,3 +588,947 @@ TEST_CASE("Cholesky inverse with LOWER_IS_CLEARED", "[matrix_2d]") { REQUIRE(abs(inverse.get(0, 2) - inverse.get(2, 0)) < 1e-10); REQUIRE(abs(inverse.get(1, 2) - inverse.get(2, 1)) < 1e-10); } + +// ============================================================================ +// Symmetric matrix support +// ============================================================================ + +TEST_CASE("Symmetric flag default is false", "[matrix_2d][symmetric]") { + matrix_2d mat(3, 3); + REQUIRE(mat.is_symmetric() == false); +} + +TEST_CASE("Symmetric flag set and get", "[matrix_2d][symmetric]") { + matrix_2d mat(3, 3); + mat.set_symmetric(true); + REQUIRE(mat.is_symmetric() == true); + mat.set_symmetric(false); + REQUIRE(mat.is_symmetric() == false); +} + +TEST_CASE("Symmetric flag propagates through copy constructor", "[matrix_2d][symmetric]") { + matrix_2d mat(3, 3); + mat.put(0, 0, 1.0); mat.put(1, 0, 2.0); mat.put(1, 1, 3.0); + mat.put(2, 0, 4.0); mat.put(2, 1, 5.0); mat.put(2, 2, 6.0); + mat.set_symmetric(true); + + matrix_2d copy(mat); + REQUIRE(copy.is_symmetric() == true); + REQUIRE(copy.get(0, 1) == 2.0); // reflects from lower +} + +TEST_CASE("Symmetric flag propagates through operator=", "[matrix_2d][symmetric]") { + matrix_2d mat(3, 3); + mat.put(0, 0, 1.0); mat.put(1, 0, 2.0); mat.put(1, 1, 3.0); + mat.set_symmetric(true); + + matrix_2d other(3, 3); + other = mat; + REQUIRE(other.is_symmetric() == true); + REQUIRE(other.get(0, 1) == 2.0); +} + +TEST_CASE("Symmetric get reflects upper to lower", "[matrix_2d][symmetric]") { + // Populate only the lower triangle + diagonal + matrix_2d mat(4, 4); + mat.put(0, 0, 1.0); + mat.put(1, 0, 2.0); mat.put(1, 1, 3.0); + mat.put(2, 0, 4.0); mat.put(2, 1, 5.0); mat.put(2, 2, 6.0); + mat.put(3, 0, 7.0); mat.put(3, 1, 8.0); mat.put(3, 2, 9.0); mat.put(3, 3, 10.0); + mat.set_symmetric(true); + + // Diagonal + REQUIRE(mat.get(0, 0) == 1.0); + REQUIRE(mat.get(1, 1) == 3.0); + REQUIRE(mat.get(2, 2) == 6.0); + REQUIRE(mat.get(3, 3) == 10.0); + + // Lower triangle (direct) + REQUIRE(mat.get(1, 0) == 2.0); + REQUIRE(mat.get(2, 0) == 4.0); + REQUIRE(mat.get(3, 2) == 9.0); + + // Upper triangle (reflected from lower) + REQUIRE(mat.get(0, 1) == 2.0); + REQUIRE(mat.get(0, 2) == 4.0); + REQUIRE(mat.get(0, 3) == 7.0); + REQUIRE(mat.get(1, 2) == 5.0); + REQUIRE(mat.get(1, 3) == 8.0); + REQUIRE(mat.get(2, 3) == 9.0); +} + +TEST_CASE("Symmetric getelementref reflects upper to lower", "[matrix_2d][symmetric]") { + matrix_2d mat(3, 3); + mat.put(0, 0, 10.0); + mat.put(1, 0, 20.0); mat.put(1, 1, 30.0); + mat.put(2, 0, 40.0); mat.put(2, 1, 50.0); mat.put(2, 2, 60.0); + mat.set_symmetric(true); + + // const version + const matrix_2d& cmat = mat; + REQUIRE(*cmat.getelementref(0, 2) == 40.0); // reflects (0,2) -> (2,0) + REQUIRE(*cmat.getelementref(2, 0) == 40.0); // direct + + // non-const version + REQUIRE(*mat.getelementref(1, 2) == 50.0); // reflects (1,2) -> (2,1) +} + +TEST_CASE("Symmetric redim clears flag", "[matrix_2d][symmetric]") { + matrix_2d mat(3, 3); + mat.set_symmetric(true); + mat.redim(4, 4); + REQUIRE(mat.is_symmetric() == false); +} + +TEST_CASE("Cholesky inverse with mark_symmetric", "[matrix_2d][symmetric]") { + matrix_2d mat(3, 3); + mat.put(0, 0, 4.0); + mat.put(0, 1, -1.0); + mat.put(0, 2, -1.0); + mat.put(1, 0, -1.0); + mat.put(1, 1, 3.0); + mat.put(1, 2, -1.0); + mat.put(2, 0, -1.0); + mat.put(2, 1, -1.0); + mat.put(2, 2, 2.0); + + matrix_2d inverse = mat.cholesky_inverse(false, true); + + REQUIRE(inverse.is_symmetric() == true); + // Values should match the non-symmetric cholesky inverse + REQUIRE(abs(inverse.get(0, 0) - 0.384615) < 0.0001); + REQUIRE(abs(inverse.get(1, 1) - 0.538462) < 0.0001); + REQUIRE(abs(inverse.get(2, 2) - 0.846154) < 0.0001); + // Upper triangle via symmetric reflection + REQUIRE(abs(inverse.get(0, 1) - 0.230769) < 0.0001); + REQUIRE(abs(inverse.get(0, 2) - 0.307692) < 0.0001); + REQUIRE(abs(inverse.get(1, 2) - 0.384615) < 0.0001); + // Symmetry + REQUIRE(abs(inverse.get(0, 1) - inverse.get(1, 0)) < 1e-10); + REQUIRE(abs(inverse.get(0, 2) - inverse.get(2, 0)) < 1e-10); + REQUIRE(abs(inverse.get(1, 2) - inverse.get(2, 1)) < 1e-10); +} + +TEST_CASE("Cholesky mark_symmetric matches full inverse", "[matrix_2d][symmetric]") { + // Larger 5x5 SPD matrix + matrix_2d full(5, 5); + matrix_2d sym(5, 5); + double spd[] = { + 10, 1, 2, 0, 1, + 1, 8, 1, 2, 0, + 2, 1, 7, 1, 1, + 0, 2, 1, 6, 1, + 1, 0, 1, 1, 5 + }; + for (UINT32 i = 0; i < 5; ++i) + for (UINT32 j = 0; j < 5; ++j) { + full.put(i, j, spd[i * 5 + j]); + sym.put(i, j, spd[i * 5 + j]); + } + + full.cholesky_inverse(false, false); // full inverse with fillupper + sym.cholesky_inverse(false, true); // symmetric inverse + + REQUIRE(sym.is_symmetric() == true); + REQUIRE(full.is_symmetric() == false); + + // All elements should match + for (UINT32 i = 0; i < 5; ++i) + for (UINT32 j = 0; j < 5; ++j) + REQUIRE(abs(sym.get(i, j) - full.get(i, j)) < 1e-12); +} + +TEST_CASE("multiply_sym matches dgemm multiply", "[matrix_2d][symmetric]") { + // Create a symmetric 4x4 matrix and a 4x2 rhs + matrix_2d A(4, 4); + double spd[] = { + 5, 1, 2, 0, + 1, 4, 1, 1, + 2, 1, 6, 1, + 0, 1, 1, 3 + }; + for (UINT32 i = 0; i < 4; ++i) + for (UINT32 j = 0; j < 4; ++j) + A.put(i, j, spd[i * 4 + j]); + + matrix_2d B(4, 2); + B.put(0, 0, 1.0); B.put(0, 1, 5.0); + B.put(1, 0, 2.0); B.put(1, 1, 6.0); + B.put(2, 0, 3.0); B.put(2, 1, 7.0); + B.put(3, 0, 4.0); B.put(3, 1, 8.0); + + // dgemm result + matrix_2d C_full(4, 2); + C_full.multiply(A, "N", B, "N"); + + // dsymm result — only lower triangle in A_sym + matrix_2d A_sym(4, 4); + for (UINT32 i = 0; i < 4; ++i) + for (UINT32 j = 0; j <= i; ++j) + A_sym.put(i, j, spd[i * 4 + j]); + A_sym.set_symmetric(true); + + matrix_2d C_sym(4, 2); + C_sym.multiply_sym(A_sym, B); + + for (UINT32 i = 0; i < 4; ++i) + for (UINT32 j = 0; j < 2; ++j) + REQUIRE(abs(C_sym.get(i, j) - C_full.get(i, j)) < 1e-12); +} + +TEST_CASE("multiply_sym with column vector", "[matrix_2d][symmetric]") { + // This mirrors the Solve() hot path: N_inv * At_Vinv_m + matrix_2d N(3, 3); + N.put(0, 0, 4.0); + N.put(0, 1, -1.0); + N.put(0, 2, -1.0); + N.put(1, 0, -1.0); + N.put(1, 1, 3.0); + N.put(1, 2, -1.0); + N.put(2, 0, -1.0); + N.put(2, 1, -1.0); + N.put(2, 2, 2.0); + + // Full inverse + matrix_2d N_full(N); + N_full.cholesky_inverse(false, false); + + // Symmetric inverse + matrix_2d N_sym(N); + N_sym.cholesky_inverse(false, true); + + matrix_2d rhs(3, 1); + rhs.put(0, 0, 1.0); + rhs.put(1, 0, 2.0); + rhs.put(2, 0, 3.0); + + // dgemm + matrix_2d result_full(3, 1); + result_full.multiply(N_full, "N", rhs, "N"); + + // dsymm + matrix_2d result_sym(3, 1); + result_sym.multiply_sym(N_sym, rhs); + + for (UINT32 i = 0; i < 3; ++i) + REQUIRE(abs(result_sym.get(i, 0) - result_full.get(i, 0)) < 1e-12); +} + +TEST_CASE("Symmetric copyelements 3x3 from lower triangle", "[matrix_2d][symmetric]") { + matrix_2d src(6, 6); + // Fill lower triangle only + for (UINT32 i = 0; i < 6; ++i) + for (UINT32 j = 0; j <= i; ++j) + src.put(i, j, static_cast(i * 10 + j)); + src.set_symmetric(true); + + matrix_2d dst(3, 3); + + // Copy from lower triangle region (row >= col) — should work directly + dst.copyelements(0, 0, src, 3, 0, 3, 3); + REQUIRE(dst.get(0, 0) == 30.0); + REQUIRE(dst.get(1, 0) == 40.0); + REQUIRE(dst.get(2, 0) == 50.0); + REQUIRE(dst.get(0, 1) == 31.0); + REQUIRE(dst.get(1, 1) == 41.0); + REQUIRE(dst.get(2, 1) == 51.0); + REQUIRE(dst.get(0, 2) == 32.0); + REQUIRE(dst.get(1, 2) == 42.0); + REQUIRE(dst.get(2, 2) == 52.0); +} + +TEST_CASE("Symmetric copyelements 3x3 from upper triangle", "[matrix_2d][symmetric]") { + matrix_2d src(6, 6); + // Fill lower triangle only: src(i,j) = i*10+j for j<=i + for (UINT32 i = 0; i < 6; ++i) + for (UINT32 j = 0; j <= i; ++j) + src.put(i, j, static_cast(i * 10 + j)); + src.set_symmetric(true); + + matrix_2d dst(3, 3); + + // Copy from upper triangle region (row_src=0, col_src=3) + // dst(r,c) = src.get(r, 3+c) which reflects to src(3+c, r) = (3+c)*10 + r + dst.copyelements(0, 0, src, 0, 3, 3, 3); + REQUIRE(dst.get(0, 0) == 30.0); // src.get(0,3) = src(3,0) = 30 + REQUIRE(dst.get(1, 0) == 31.0); // src.get(1,3) = src(3,1) = 31 + REQUIRE(dst.get(2, 0) == 32.0); // src.get(2,3) = src(3,2) = 32 + REQUIRE(dst.get(0, 1) == 40.0); // src.get(0,4) = src(4,0) = 40 + REQUIRE(dst.get(1, 1) == 41.0); // src.get(1,4) = src(4,1) = 41 + REQUIRE(dst.get(2, 1) == 42.0); // src.get(2,4) = src(4,2) = 42 + REQUIRE(dst.get(0, 2) == 50.0); // src.get(0,5) = src(5,0) = 50 + REQUIRE(dst.get(1, 2) == 51.0); // src.get(1,5) = src(5,1) = 51 + REQUIRE(dst.get(2, 2) == 52.0); // src.get(2,5) = src(5,2) = 52 +} + +TEST_CASE("Symmetric blockadd 3x3 from upper triangle", "[matrix_2d][symmetric]") { + matrix_2d src(6, 6); + for (UINT32 i = 0; i < 6; ++i) + for (UINT32 j = 0; j <= i; ++j) + src.put(i, j, static_cast(i * 10 + j)); + src.set_symmetric(true); + + matrix_2d dst(3, 3); + // Pre-fill with 1.0 to verify addition + for (UINT32 i = 0; i < 3; ++i) + for (UINT32 j = 0; j < 3; ++j) + dst.put(i, j, 1.0); + + // blockadd from upper triangle (row_src=0, col_src=3) + dst.blockadd(0, 0, src, 0, 3, 3, 3); + + // dst(r,c) = 1.0 + src.get(r, 3+c) + REQUIRE(dst.get(0, 0) == 31.0); // 1 + 30 + REQUIRE(dst.get(1, 0) == 32.0); // 1 + 31 + REQUIRE(dst.get(2, 0) == 33.0); // 1 + 32 + REQUIRE(dst.get(0, 1) == 41.0); // 1 + 40 + REQUIRE(dst.get(1, 1) == 42.0); // 1 + 41 + REQUIRE(dst.get(2, 1) == 43.0); // 1 + 42 + REQUIRE(dst.get(0, 2) == 51.0); // 1 + 50 + REQUIRE(dst.get(1, 2) == 52.0); // 1 + 51 + REQUIRE(dst.get(2, 2) == 53.0); // 1 + 52 +} + +TEST_CASE("Symmetric copyelements generic from upper triangle", "[matrix_2d][symmetric]") { + matrix_2d src(8, 8); + for (UINT32 i = 0; i < 8; ++i) + for (UINT32 j = 0; j <= i; ++j) + src.put(i, j, static_cast(i * 10 + j)); + src.set_symmetric(true); + + // Copy a 4x4 block from upper triangle (row=0, col=4) + matrix_2d dst(4, 4); + dst.copyelements(0, 0, src, 0, 4, 4, 4); + + // Verify via get() which handles reflection + for (UINT32 i = 0; i < 4; ++i) + for (UINT32 j = 0; j < 4; ++j) + REQUIRE(abs(dst.get(i, j) - src.get(i, 4 + j)) < 1e-15); +} + +TEST_CASE("End-to-end: cholesky mark_symmetric then multiply_sym", "[matrix_2d][symmetric]") { + // Simulate the full Solve() path + matrix_2d N(3, 3); + N.put(0, 0, 4.0); N.put(0, 1, -1.0); N.put(0, 2, -1.0); + N.put(1, 0, -1.0); N.put(1, 1, 3.0); N.put(1, 2, -1.0); + N.put(2, 0, -1.0); N.put(2, 1, -1.0); N.put(2, 2, 2.0); + + matrix_2d N_ref(N); + + // Reference: full inverse + dgemm + N_ref.cholesky_inverse(false, false); + + matrix_2d rhs(3, 1); + rhs.put(0, 0, 10.0); + rhs.put(1, 0, 20.0); + rhs.put(2, 0, 30.0); + + matrix_2d ref_result(3, 1); + ref_result.multiply(N_ref, "N", rhs, "N"); + + // Test: symmetric inverse + dsymm + N.cholesky_inverse(false, true); + REQUIRE(N.is_symmetric() == true); + + matrix_2d sym_result(3, 1); + sym_result.multiply_sym(N, rhs); + + for (UINT32 i = 0; i < 3; ++i) + REQUIRE(abs(sym_result.get(i, 0) - ref_result.get(i, 0)) < 1e-12); +} + +// ============================================================================ +// Packed symmetric matrix storage +// ============================================================================ + +TEST_CASE("Packed allocation has correct buffer size", "[matrix_2d][packed]") { + matrix_2d mat; + mat.redim_packed(4); + REQUIRE(mat.rows() == 4); + REQUIRE(mat.columns() == 4); + REQUIRE(mat.is_packed() == true); + REQUIRE(mat.is_symmetric() == true); + // n*(n+1)/2 = 10 elements + REQUIRE(mat.memRows() == 4); +} + +TEST_CASE("Packed element access: put and get reflect symmetry", "[matrix_2d][packed]") { + matrix_2d mat; + mat.redim_packed(4); + + mat.put(0, 0, 1.0); + mat.put(1, 0, 2.0); mat.put(1, 1, 3.0); + mat.put(2, 0, 4.0); mat.put(2, 1, 5.0); mat.put(2, 2, 6.0); + mat.put(3, 0, 7.0); mat.put(3, 1, 8.0); mat.put(3, 2, 9.0); mat.put(3, 3, 10.0); + + // Diagonal + REQUIRE(mat.get(0, 0) == 1.0); + REQUIRE(mat.get(1, 1) == 3.0); + REQUIRE(mat.get(2, 2) == 6.0); + REQUIRE(mat.get(3, 3) == 10.0); + + // Lower triangle (direct) + REQUIRE(mat.get(1, 0) == 2.0); + REQUIRE(mat.get(2, 0) == 4.0); + REQUIRE(mat.get(3, 2) == 9.0); + + // Upper triangle (reflected from lower) + REQUIRE(mat.get(0, 1) == 2.0); + REQUIRE(mat.get(0, 2) == 4.0); + REQUIRE(mat.get(0, 3) == 7.0); + REQUIRE(mat.get(1, 2) == 5.0); + REQUIRE(mat.get(1, 3) == 8.0); + REQUIRE(mat.get(2, 3) == 9.0); +} + +TEST_CASE("Packed put via upper triangle maps to lower", "[matrix_2d][packed]") { + matrix_2d mat; + mat.redim_packed(3); + + mat.put(0, 1, 42.0); + REQUIRE(mat.get(1, 0) == 42.0); + REQUIRE(mat.get(0, 1) == 42.0); +} + +TEST_CASE("Packed elementadd skips upper triangle", "[matrix_2d][packed]") { + matrix_2d mat; + mat.redim_packed(3); + + mat.elementadd(1, 0, 10.0); + mat.elementadd(0, 1, 5.0); // skipped — upper triangle + REQUIRE(mat.get(1, 0) == 10.0); + REQUIRE(mat.get(0, 1) == 10.0); + + mat.elementadd(1, 0, 3.0); // lower triangle — accumulates + REQUIRE(mat.get(1, 0) == 13.0); +} + +TEST_CASE("Packed cholesky_inverse matches full inverse", "[matrix_2d][packed]") { + matrix_2d full(3, 3); + full.put(0, 0, 4.0); full.put(0, 1, -1.0); full.put(0, 2, -1.0); + full.put(1, 0, -1.0); full.put(1, 1, 3.0); full.put(1, 2, -1.0); + full.put(2, 0, -1.0); full.put(2, 1, -1.0); full.put(2, 2, 2.0); + + matrix_2d packed; + packed.redim_packed(3); + packed.put(0, 0, 4.0); + packed.put(1, 0, -1.0); packed.put(1, 1, 3.0); + packed.put(2, 0, -1.0); packed.put(2, 1, -1.0); packed.put(2, 2, 2.0); + + full.cholesky_inverse(); + packed.cholesky_inverse(); + + REQUIRE(packed.is_packed() == true); + for (UINT32 i = 0; i < 3; ++i) + for (UINT32 j = 0; j < 3; ++j) + REQUIRE(abs(packed.get(i, j) - full.get(i, j)) < 1e-10); +} + +TEST_CASE("Packed cholesky_inverse throws on indefinite matrix", "[matrix_2d][packed]") { + matrix_2d mat; + mat.redim_packed(2); + mat.put(0, 0, 2.0); + mat.put(1, 0, 1.0); + mat.put(1, 1, -1.0); + + bool caught = false; + try { mat.cholesky_inverse(); } + catch (const MatrixInversionFailure&) { caught = true; } + REQUIRE(caught); +} + +TEST_CASE("Packed multiply_sym with vector matches full result", "[matrix_2d][packed]") { + matrix_2d full(3, 3); + full.put(0, 0, 4.0); full.put(0, 1, -1.0); full.put(0, 2, -1.0); + full.put(1, 0, -1.0); full.put(1, 1, 3.0); full.put(1, 2, -1.0); + full.put(2, 0, -1.0); full.put(2, 1, -1.0); full.put(2, 2, 2.0); + + matrix_2d packed; + packed.redim_packed(3); + packed.put(0, 0, 4.0); + packed.put(1, 0, -1.0); packed.put(1, 1, 3.0); + packed.put(2, 0, -1.0); packed.put(2, 1, -1.0); packed.put(2, 2, 2.0); + + matrix_2d rhs(3, 1); + rhs.put(0, 0, 1.0); rhs.put(1, 0, 2.0); rhs.put(2, 0, 3.0); + + matrix_2d result_full(3, 1); + result_full.multiply(full, "N", rhs, "N"); + + matrix_2d result_packed(3, 1); + result_packed.multiply_sym(packed, rhs); + + for (UINT32 i = 0; i < 3; ++i) + REQUIRE(abs(result_packed.get(i, 0) - result_full.get(i, 0)) < 1e-12); +} + +TEST_CASE("Packed multiply_sym with multi-column rhs", "[matrix_2d][packed]") { + matrix_2d packed; + packed.redim_packed(4); + double spd[] = {5, 1, 2, 0, 1, 4, 1, 1, 2, 1, 6, 1, 0, 1, 1, 3}; + for (UINT32 i = 0; i < 4; ++i) + for (UINT32 j = 0; j <= i; ++j) + packed.put(i, j, spd[i * 4 + j]); + + matrix_2d full(4, 4); + for (UINT32 i = 0; i < 4; ++i) + for (UINT32 j = 0; j < 4; ++j) + full.put(i, j, spd[i * 4 + j]); + + matrix_2d B(4, 2); + B.put(0, 0, 1.0); B.put(0, 1, 5.0); + B.put(1, 0, 2.0); B.put(1, 1, 6.0); + B.put(2, 0, 3.0); B.put(2, 1, 7.0); + B.put(3, 0, 4.0); B.put(3, 1, 8.0); + + matrix_2d C_full(4, 2); + C_full.multiply(full, "N", B, "N"); + + matrix_2d C_packed(4, 2); + C_packed.multiply_sym(packed, B); + + for (UINT32 i = 0; i < 4; ++i) + for (UINT32 j = 0; j < 2; ++j) + REQUIRE(abs(C_packed.get(i, j) - C_full.get(i, j)) < 1e-12); +} + +TEST_CASE("Packed scale_symmetric_diagonal", "[matrix_2d][packed]") { + matrix_2d packed; + packed.redim_packed(3); + packed.put(0, 0, 4.0); + packed.put(1, 0, -1.0); packed.put(1, 1, 3.0); + packed.put(2, 0, -1.0); packed.put(2, 1, -1.0); packed.put(2, 2, 2.0); + + matrix_2d full(3, 3); + full.put(0, 0, 4.0); full.put(0, 1, -1.0); full.put(0, 2, -1.0); + full.put(1, 0, -1.0); full.put(1, 1, 3.0); full.put(1, 2, -1.0); + full.put(2, 0, -1.0); full.put(2, 1, -1.0); full.put(2, 2, 2.0); + full.set_symmetric(true); + + double diag[] = {2.0, 3.0, 0.5}; + packed.scale_symmetric_diagonal(diag); + full.scale_symmetric_diagonal(diag); + + for (UINT32 i = 0; i < 3; ++i) + for (UINT32 j = 0; j < 3; ++j) + REQUIRE(abs(packed.get(i, j) - full.get(i, j)) < 1e-12); +} + +TEST_CASE("Packed blockadd diagonal block", "[matrix_2d][packed]") { + matrix_2d packed; + packed.redim_packed(4); + + // Diagonal blocks only write lower triangle via elementadd + // (mirrors how AddMsrtoNormalsVar works) + packed.elementadd(1, 1, 10.0); + packed.elementadd(2, 1, 20.0); + packed.elementadd(2, 2, 40.0); + + REQUIRE(packed.get(1, 1) == 10.0); + REQUIRE(packed.get(2, 1) == 20.0); + REQUIRE(packed.get(1, 2) == 20.0); + REQUIRE(packed.get(2, 2) == 40.0); + REQUIRE(packed.get(0, 0) == 0.0); +} + +TEST_CASE("Packed submatrix extraction", "[matrix_2d][packed]") { + matrix_2d packed; + packed.redim_packed(4); + packed.put(0, 0, 1.0); + packed.put(1, 0, 2.0); packed.put(1, 1, 3.0); + packed.put(2, 0, 4.0); packed.put(2, 1, 5.0); packed.put(2, 2, 6.0); + packed.put(3, 0, 7.0); packed.put(3, 1, 8.0); packed.put(3, 2, 9.0); packed.put(3, 3, 10.0); + + matrix_2d sub = packed.submatrix(1, 1, 2, 2); + REQUIRE(sub.rows() == 2); + REQUIRE(sub.columns() == 2); + REQUIRE(sub.get(0, 0) == 3.0); + REQUIRE(sub.get(0, 1) == 5.0); + REQUIRE(sub.get(1, 0) == 5.0); + REQUIRE(sub.get(1, 1) == 6.0); +} + +TEST_CASE("Packed copy constructor", "[matrix_2d][packed]") { + matrix_2d orig; + orig.redim_packed(3); + orig.put(0, 0, 1.0); + orig.put(1, 0, 2.0); orig.put(1, 1, 3.0); + orig.put(2, 0, 4.0); orig.put(2, 1, 5.0); orig.put(2, 2, 6.0); + + matrix_2d copy(orig); + REQUIRE(copy.is_packed() == true); + REQUIRE(copy.is_symmetric() == true); + for (UINT32 i = 0; i < 3; ++i) + for (UINT32 j = 0; j < 3; ++j) + REQUIRE(copy.get(i, j) == orig.get(i, j)); +} + +TEST_CASE("Packed operator= packed-to-packed", "[matrix_2d][packed]") { + matrix_2d a; + a.redim_packed(3); + a.put(0, 0, 10.0); a.put(1, 0, 20.0); a.put(1, 1, 30.0); + a.put(2, 0, 40.0); a.put(2, 1, 50.0); a.put(2, 2, 60.0); + + matrix_2d b; + b.redim_packed(2); + b = a; + REQUIRE(b.is_packed() == true); + REQUIRE(b.rows() == 3); + for (UINT32 i = 0; i < 3; ++i) + for (UINT32 j = 0; j < 3; ++j) + REQUIRE(b.get(i, j) == a.get(i, j)); +} + +TEST_CASE("Packed operator= full-to-packed preserves full", "[matrix_2d][packed]") { + matrix_2d full(3, 3); + full.put(0, 0, 1.0); full.put(0, 1, 2.0); full.put(0, 2, 3.0); + full.put(1, 0, 4.0); full.put(1, 1, 5.0); full.put(1, 2, 6.0); + full.put(2, 0, 7.0); full.put(2, 1, 8.0); full.put(2, 2, 9.0); + + matrix_2d packed; + packed.redim_packed(3); + packed = full; + // Assigning a non-packed to a packed should convert to non-packed + REQUIRE(packed.is_packed() == false); + for (UINT32 i = 0; i < 3; ++i) + for (UINT32 j = 0; j < 3; ++j) + REQUIRE(packed.get(i, j) == full.get(i, j)); +} + +TEST_CASE("Packed redim_packed reuses buffer when shrinking", "[matrix_2d][packed]") { + matrix_2d mat; + mat.redim_packed(5); + mat.put(0, 0, 99.0); + + mat.redim_packed(3); + REQUIRE(mat.rows() == 3); + REQUIRE(mat.is_packed() == true); + REQUIRE(mat.get(0, 0) == 0.0); +} + +TEST_CASE("Packed zero", "[matrix_2d][packed]") { + matrix_2d mat; + mat.redim_packed(3); + mat.put(0, 0, 1.0); mat.put(1, 0, 2.0); mat.put(2, 2, 3.0); + mat.zero(); + for (UINT32 i = 0; i < 3; ++i) + for (UINT32 j = 0; j < 3; ++j) + REQUIRE(mat.get(i, j) == 0.0); +} + +TEST_CASE("Packed scale", "[matrix_2d][packed]") { + matrix_2d mat; + mat.redim_packed(3); + mat.put(0, 0, 2.0); + mat.put(1, 0, 3.0); mat.put(1, 1, 4.0); + mat.put(2, 0, 5.0); mat.put(2, 1, 6.0); mat.put(2, 2, 7.0); + mat.scale(2.0); + + REQUIRE(mat.get(0, 0) == 4.0); + REQUIRE(mat.get(1, 0) == 6.0); + REQUIRE(mat.get(2, 2) == 14.0); + REQUIRE(mat.get(0, 1) == 6.0); // reflected +} + +TEST_CASE("Packed end-to-end: cholesky then multiply_sym", "[matrix_2d][packed]") { + matrix_2d full(3, 3); + full.put(0, 0, 4.0); full.put(0, 1, -1.0); full.put(0, 2, -1.0); + full.put(1, 0, -1.0); full.put(1, 1, 3.0); full.put(1, 2, -1.0); + full.put(2, 0, -1.0); full.put(2, 1, -1.0); full.put(2, 2, 2.0); + + matrix_2d packed; + packed.redim_packed(3); + packed.put(0, 0, 4.0); + packed.put(1, 0, -1.0); packed.put(1, 1, 3.0); + packed.put(2, 0, -1.0); packed.put(2, 1, -1.0); packed.put(2, 2, 2.0); + + // Reference + full.cholesky_inverse(false, false); + matrix_2d rhs(3, 1); + rhs.put(0, 0, 10.0); rhs.put(1, 0, 20.0); rhs.put(2, 0, 30.0); + matrix_2d ref_result(3, 1); + ref_result.multiply(full, "N", rhs, "N"); + + // Packed path + packed.cholesky_inverse(); + matrix_2d packed_result(3, 1); + packed_result.multiply_sym(packed, rhs); + + for (UINT32 i = 0; i < 3; ++i) + REQUIRE(abs(packed_result.get(i, 0) - ref_result.get(i, 0)) < 1e-12); +} + +TEST_CASE("Packed copyelements from packed source", "[matrix_2d][packed]") { + matrix_2d packed; + packed.redim_packed(6); + for (UINT32 i = 0; i < 6; ++i) + for (UINT32 j = 0; j <= i; ++j) + packed.put(i, j, static_cast(i * 10 + j)); + + matrix_2d dst(3, 3); + dst.copyelements(0, 0, packed, 3, 0, 3, 3); + + REQUIRE(dst.get(0, 0) == 30.0); + REQUIRE(dst.get(1, 0) == 40.0); + REQUIRE(dst.get(2, 0) == 50.0); + REQUIRE(dst.get(0, 1) == 31.0); + REQUIRE(dst.get(1, 1) == 41.0); + REQUIRE(dst.get(2, 1) == 51.0); +} + +TEST_CASE("Packed 5x5 cholesky_inverse matches full", "[matrix_2d][packed]") { + double spd[] = { + 10, 1, 2, 0, 1, + 1, 8, 1, 2, 0, + 2, 1, 7, 1, 1, + 0, 2, 1, 6, 1, + 1, 0, 1, 1, 5 + }; + + matrix_2d full(5, 5); + matrix_2d packed; + packed.redim_packed(5); + + for (UINT32 i = 0; i < 5; ++i) + for (UINT32 j = 0; j < 5; ++j) { + full.put(i, j, spd[i * 5 + j]); + if (j <= i) + packed.put(i, j, spd[i * 5 + j]); + } + + full.cholesky_inverse(false, false); + packed.cholesky_inverse(); + + for (UINT32 i = 0; i < 5; ++i) + for (UINT32 j = 0; j < 5; ++j) + REQUIRE(abs(packed.get(i, j) - full.get(i, j)) < 1e-12); +} + +// ================================================================ +// In-place mmap buffer tests (AttachMappedFileRegion / DetachMappedFileRegion) +// ================================================================ + +TEST_CASE("In-place mmap round-trip: full matrix", "[matrix_2d][mmap]") { + // Create a 4x3 full matrix + matrix_2d orig(4, 3); + for (UINT32 r = 0; r < 4; ++r) + for (UINT32 c = 0; c < 3; ++c) + orig.put(r, c, (r + 1) * 10.0 + c); + orig.compute_maximum_value(); + + // Allocate a buffer to simulate an mmap region + std::size_t region_size = orig.get_size(); + std::vector region(region_size, 0); + void* addr = region.data(); + + // Write the matrix to the "mmap" region + orig.WriteMappedFileRegion(addr); + + // Attach in-place — should point _buffer at the data in the region + matrix_2d attached; + attached.AttachMappedFileRegion(addr); + + REQUIRE(!attached.owns_buffer()); + REQUIRE(attached.rows() == 4); + REQUIRE(attached.columns() == 3); + REQUIRE(attached.matrixType() == mtx_full); + + // Verify all elements match + for (UINT32 r = 0; r < 4; ++r) + for (UINT32 c = 0; c < 3; ++c) + REQUIRE(attached.get(r, c) == orig.get(r, c)); + + // Modify in-place (writes directly to region) + attached.put(2, 1, 999.0); + REQUIRE(attached.get(2, 1) == 999.0); + + // Detach + attached.DetachMappedFileRegion(addr); + REQUIRE(attached.empty()); + REQUIRE(attached.owns_buffer()); + + // Read back with copy-based Read to verify data persisted + matrix_2d readback; + readback.ReadMappedFileRegion(addr); + REQUIRE(readback.get(2, 1) == 999.0); + REQUIRE(readback.get(0, 0) == 10.0); +} + +TEST_CASE("In-place mmap round-trip: packed lower-triangular", "[matrix_2d][mmap][packed]") { + // Create a 4x4 packed lower-triangular matrix + matrix_2d orig; + orig.redim_packed(4); + for (UINT32 i = 0; i < 4; ++i) + for (UINT32 j = 0; j <= i; ++j) + orig.put(i, j, (i + 1) * 10.0 + j); + orig.compute_maximum_value(); + + std::size_t region_size = orig.get_size(); + std::vector region(region_size, 0); + void* addr = region.data(); + + orig.WriteMappedFileRegion(addr); + + matrix_2d attached; + attached.AttachMappedFileRegion(addr); + + REQUIRE(!attached.owns_buffer()); + REQUIRE(attached.is_packed()); + REQUIRE(attached.is_symmetric()); + REQUIRE(attached.rows() == 4); + + // Verify all elements + for (UINT32 i = 0; i < 4; ++i) + for (UINT32 j = 0; j <= i; ++j) + REQUIRE(attached.get(i, j) == orig.get(i, j)); + + // Symmetry: upper = lower + REQUIRE(attached.get(0, 1) == attached.get(1, 0)); + + // Modify in-place + attached.put(3, 0, -7.5); + + // WriteMappedFileRegion on in-place buffer should only write footer + attached.WriteMappedFileRegion(addr); + + // deallocate detaches without freeing + attached.deallocate(); + REQUIRE(attached.empty()); + REQUIRE(attached.owns_buffer()); + + // Read back to verify + matrix_2d readback; + readback.ReadMappedFileRegion(addr); + REQUIRE(readback.get(3, 0) == -7.5); + REQUIRE(readback.get(1, 0) == 20.0); +} + +TEST_CASE("In-place mmap: deallocate does not crash", "[matrix_2d][mmap]") { + matrix_2d orig(3, 3); + for (UINT32 i = 0; i < 9; ++i) + orig.put(i / 3, i % 3, i + 1.0); + + std::size_t region_size = orig.get_size(); + std::vector region(region_size, 0); + void* addr = region.data(); + orig.WriteMappedFileRegion(addr); + + matrix_2d attached; + attached.AttachMappedFileRegion(addr); + REQUIRE(!attached.owns_buffer()); + + // deallocate should not crash (no delete[] on mmap pointer) + attached.deallocate(); + REQUIRE(attached.empty()); + REQUIRE(attached.owns_buffer()); + + // Re-attach and verify data still intact + attached.AttachMappedFileRegion(addr); + REQUIRE(attached.get(1, 1) == 5.0); + attached.deallocate(); +} + +TEST_CASE("In-place mmap: copy constructor makes owned copy", "[matrix_2d][mmap]") { + matrix_2d orig(2, 2); + orig.put(0, 0, 1.0); orig.put(0, 1, 2.0); + orig.put(1, 0, 3.0); orig.put(1, 1, 4.0); + + std::size_t region_size = orig.get_size(); + std::vector region(region_size, 0); + void* addr = region.data(); + orig.WriteMappedFileRegion(addr); + + matrix_2d attached; + attached.AttachMappedFileRegion(addr); + + // Copy constructor should create an owned copy + matrix_2d copy(attached); + REQUIRE(copy.owns_buffer()); + REQUIRE(copy.get(1, 1) == 4.0); + + // Modifying copy should not affect mmap + copy.put(1, 1, 99.0); + REQUIRE(attached.get(1, 1) == 4.0); + + attached.deallocate(); +} + +TEST_CASE("In-place mmap: operator= into owned buffer", "[matrix_2d][mmap]") { + matrix_2d orig(3, 2); + for (UINT32 r = 0; r < 3; ++r) + for (UINT32 c = 0; c < 2; ++c) + orig.put(r, c, r * 2.0 + c); + + std::size_t region_size = orig.get_size(); + std::vector region(region_size, 0); + void* addr = region.data(); + orig.WriteMappedFileRegion(addr); + + matrix_2d attached; + attached.AttachMappedFileRegion(addr); + + // Assign to a pre-allocated owned matrix of same size + matrix_2d dest(3, 2); + dest = attached; + REQUIRE(dest.owns_buffer()); + REQUIRE(dest.get(2, 1) == 5.0); + + attached.deallocate(); +} + +TEST_CASE("In-place mmap: region size alignment", "[matrix_2d][mmap]") { + // Verify that get_size() returns 8-byte aligned sizes for proper mmap alignment + matrix_2d full(7, 5); + REQUIRE(full.get_size() % 8 == 0); + + matrix_2d packed; + packed.redim_packed(11); + REQUIRE(packed.get_size() % 8 == 0); + + matrix_2d col_vec(100, 1); + REQUIRE(col_vec.get_size() % 8 == 0); +} + +TEST_CASE("operator<< binary size matches get_size for mmap regions", "[matrix_2d][mmap]") { + // Verify that operator<< writes exactly get_size() bytes in binary mode. + // A mismatch here causes mmap region offsets to be wrong for all blocks + // after the first, leading to corrupted header reads. + + auto check_stream_size = [](matrix_2d& m, const std::string& label) { + std::ostringstream oss; + oss.iword(0) = binary; + oss << m; + std::size_t stream_bytes = oss.str().size(); + std::size_t region_bytes = m.get_size(); + REQUIRE(stream_bytes == region_bytes); + }; + + // Full matrix (column vector — the case that crashed) + matrix_2d col_vec(50, 1); + for (UINT32 r = 0; r < 50; ++r) + col_vec.put(r, 0, static_cast(r)); + check_stream_size(col_vec, "column vector"); + + // Full matrix + matrix_2d full(7, 5); + for (UINT32 r = 0; r < 7; ++r) + for (UINT32 c = 0; c < 5; ++c) + full.put(r, c, static_cast(r * 10 + c)); + check_stream_size(full, "full 7x5"); + + // Packed lower-triangular + matrix_2d packed; + packed.redim_packed(6); + for (UINT32 r = 0; r < 6; ++r) + for (UINT32 c = 0; c <= r; ++c) + packed.put(r, c, static_cast(r * 10 + c)); + check_stream_size(packed, "packed 6x6"); + + // Full matrix with slack (redim + shrink, simulating staged adjustment) + matrix_2d with_slack; + with_slack.redim(100, 1); + with_slack.shrink(30, 0); // _rows=70, _mem_rows=100 + for (UINT32 r = 0; r < 70; ++r) + with_slack.put(r, 0, static_cast(r)); + check_stream_size(with_slack, "column vector with slack"); +}