foedus_code/log__gleaner__impl_8cpp_source.html

 /*

  * Copyright (c) 2014-2015, Hewlett-Packard Development Company, LP.

  * This program is free software; you can redistribute it and/or modify it

  * under the terms of the GNU General Public License as published by the Free

  * Software Foundation; either version 2 of the License, or (at your option)

  * any later version.

  *

  * This program is distributed in the hope that it will be useful, but WITHOUT

  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for

  * more details. You should have received a copy of the GNU General Public

  * License along with this program; if not, write to the Free Software

  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

  *

  * HP designates this particular file as subject to the "Classpath" exception

  * as provided by HP in the LICENSE.txt file that accompanied this code.

  */

 #include "foedus/snapshot/log_gleaner_impl.hpp"


 #include <glog/logging.h>


 #include <algorithm>

 #include <chrono>

 #include <map>

 #include <ostream>

 #include <sstream>

 #include <string>

 #include <utility>

 #include <vector>


 #include "foedus/engine.hpp"

 #include "foedus/engine_options.hpp"

 #include "foedus/error_stack_batch.hpp"

 #include "foedus/debugging/stop_watch.hpp"

 #include "foedus/log/common_log_types.hpp"

 #include "foedus/memory/memory_id.hpp"

 #include "foedus/snapshot/log_gleaner_resource.hpp"

 #include "foedus/snapshot/log_mapper_impl.hpp"

 #include "foedus/snapshot/log_reducer_impl.hpp"

 #include "foedus/snapshot/snapshot.hpp"

 #include "foedus/snapshot/snapshot_manager.hpp"

 #include "foedus/snapshot/snapshot_manager_pimpl.hpp"

 #include "foedus/soc/soc_manager.hpp"

 #include "foedus/storage/composer.hpp"

 #include "foedus/storage/partitioner.hpp"

 #include "foedus/storage/storage_manager.hpp"

 #include "foedus/thread/stoppable_thread_impl.hpp"


 namespace foedus {

 namespace snapshot {


 LogGleaner::LogGleaner(

   Engine* engine,

   LogGleanerResource* gleaner_resource,

   const Snapshot& new_snapshot)

   : LogGleanerRef(engine),

     gleaner_resource_(gleaner_resource),

     new_snapshot_(new_snapshot) {

 }


 ErrorStack LogGleaner::cancel_reducers_mappers() {

   if (is_all_exitted()) {

     VLOG(0) << "All mappers/reducers have already exitted. " << *this;

     return kRetOk;

   }

   LOG(INFO) << "Requesting all mappers/reducers threads to stop.. " << *this;

   control_block_->cancelled_ = true;

   const uint32_t kTimeoutSleeps = 3000U;

   uint32_t count = 0;

   while (!is_all_exitted() && !is_error()) {

     std::this_thread::sleep_for(std::chrono::milliseconds(10));

     if (++count > kTimeoutSleeps) {

       return ERROR_STACK(kErrorCodeSnapshotExitTimeout);

     }

   }

   return kRetOk;

 }


 void LogGleaner::clear_all() {

   control_block_->clear_counts();

   control_block_->cur_snapshot_ = new_snapshot_;

   uint16_t node_count = engine_->get_options().thread_.group_count_;

   for (uint16_t node = 0; node < node_count; ++node) {

     LogReducerRef reducer(engine_, node);

     reducer.clear();

   }

   partitioner_metadata_[0].data_offset_ = 0;

   ASSERT_ND(partitioner_metadata_[0].data_size_

     == engine_->get_options().storage_.partitioner_data_memory_mb_ * (1ULL << 20));

   for (storage::StorageId i = 1; i <= new_snapshot_.max_storage_id_; ++i) {

     partitioner_metadata_[i].clear_counts();

   }

 }


 ErrorStack LogGleaner::design_partitions() {

   // so far single threaded to debug easily.

   // but, let's prepare for parallelization so that we can switch later.

   ErrorStack result;

   design_partitions_run(1U, new_snapshot_.max_storage_id_, &result);

   return result;

 }


 void LogGleaner::design_partitions_run(

   storage::StorageId from,

   storage::StorageId count,

   ErrorStack* result) {

   *result = kRetOk;

   LOG(INFO) << "Determining partitions for Storage-" << from << " to " << (from + count - 1) << ".";


   // working memory while designing. we auto-extend if the partitioner requests so.

   memory::AlignedMemory work_memory;

   work_memory.alloc(1U << 21, 1U << 12, memory::AlignedMemory::kNumaAllocOnnode, 0);


   // To read from snapshot pages while designing. This must be thread-private. So we instantiate

   // for each design_partitions_run()

   cache::SnapshotFileSet fileset(engine_);

   *result = fileset.initialize();

   if (result->is_error()) {

     LOG(ERROR) << "fileset.initialize() failed!" << *result;

     return;

   }

   UninitializeGuard fileset_guard(&fileset, UninitializeGuard::kWarnIfUninitializeError);


   storage::StorageManager* stm = engine_->get_storage_manager();

   for (storage::StorageId id = from; id < from + count; ++id) {

     if (!stm->get_storage(id)->exists()) {

       continue;

     }

     storage::Partitioner partitioner(engine_, id);

     storage::Partitioner::DesignPartitionArguments args = { &work_memory, &fileset };

     ErrorStack ret = partitioner.design_partition(args);

     if (ret.is_error()) {

       LOG(ERROR) << "Error while determining partitions for storage-" << id << ":" << ret;

       *result = ret;

       break;

     }

   }


   *result = fileset.uninitialize();

   if (result->is_error()) {

     LOG(ERROR) << "fileset.uninitialize() failed!" << *result;

     return;

   }

   work_memory.release_block();

   LOG(INFO) << "Determined partitions for Storage-" << from << " to " << (from + count - 1) << ".";

 }


 ErrorStack LogGleaner::execute() {

   LOG(INFO) << "Gleaner starts running: snapshot_id=" << get_snapshot_id();

   clear_all();


   LOG(INFO) << "Gleaner Step 1: Design partitions for all storages...";

   debugging::StopWatch watch1;

   // Another approach is to delay this step until some mapper really needs it so that we can

   // skip partition-designing for storages that weren't modified.

   // However, it requires synchronization in mapper/reducer and this step is anyway fast enough.

   // So, we so far simply design partitions for all of them.

   CHECK_ERROR(design_partitions());

   watch1.stop();

   LOG(INFO) << "Gleaner Step 1: Ended in " << watch1.elapsed_sec() << "s";


   LOG(INFO) << "Gleaner Step 2: Run mappers/reducers...";

   debugging::StopWatch watch2;

   // Request each node's snapshot manager to launch mappers/reducers threads

   control_block_->gleaning_ = true;

   engine_->get_soc_manager()->get_shared_memory_repo()->get_global_memory_anchors()->

     snapshot_manager_memory_->wakeup_snapshot_children();


   // then, wait until all mappers/reducers are done

   SPINLOCK_WHILE(!is_error() && !is_all_completed()) {

     // snapshot is an infrequent operation, doesn't have to wake up immediately.

     // just sleep for a while

     std::this_thread::sleep_for(std::chrono::milliseconds(10));

   }


   control_block_->gleaning_ = false;

   watch2.stop();

   LOG(INFO) << "Gleaner Step 2: Ended in " << watch2.elapsed_sec() << "s";


   LOG(INFO) << "Gleaner Step 3: Combine outputs from reducers (root page info)..." << *this;

   debugging::StopWatch watch3;

   if (is_error()) {

     LOG(ERROR) << "Some mapper/reducer got an error. " << *this;

   } else if (!is_all_completed()) {

     LOG(WARNING) << "gleaner stopped without completion. cancelled? " << *this;

   } else {

     LOG(INFO) << "All mappers/reducers successfully done. Now on to the final phase." << *this;

     CHECK_ERROR(construct_root_pages());

   }

   watch3.stop();

   LOG(INFO) << "Gleaner Step 3: Ended in " << watch3.elapsed_sec() << "s";


   LOG(INFO) << "Gleaner Step 4: Uninitializing...";

   CHECK_ERROR(cancel_reducers_mappers());

   ASSERT_ND(is_error() || is_all_exitted());

   LOG(INFO) << "Gleaner ends";

   return kRetOk;

 }


 ErrorStack LogGleaner::construct_root_pages() {

   ASSERT_ND(new_root_page_pointers_.size() == 0);

   debugging::StopWatch stop_watch;


   const uint16_t count = control_block_->reducers_count_;

   std::vector<const storage::Page*> tmp_array(count, nullptr);

   std::vector<uint16_t> cursors;

   std::vector<uint16_t> buffer_sizes;

   std::vector<const storage::Page*> buffers;

   for (uint16_t i = 0; i < count; ++i) {

     cursors.push_back(0);

     LogReducerRef reducer(engine_, i);

     buffer_sizes.push_back(reducer.get_total_storage_count());

     buffers.push_back(reducer.get_root_info_pages());

   }


   // composers read snapshot files.

   cache::SnapshotFileSet fileset(engine_);

   CHECK_ERROR(fileset.initialize());

   UninitializeGuard fileset_guard(&fileset, UninitializeGuard::kWarnIfUninitializeError);


   // composers need SnapshotWriter to write out to.

   SnapshotWriter snapshot_writer(

     engine_,

     0,

     get_snapshot_id(),

     &gleaner_resource_->writer_pool_memory_,

     &gleaner_resource_->writer_intermediate_memory_,

     true);  // we append to the node-0 snapshot file.

   CHECK_ERROR(snapshot_writer.open());


   storage::StorageId prev_storage_id = 0;

   // each reducer's root-info-page must be sorted by storage_id, so we do kind of merge-sort here.

   while (true) {

     // determine which storage to process by finding the smallest storage_id

     storage::StorageId min_storage_id = 0;

     for (uint16_t i = 0; i < count; ++i) {

       if (cursors[i] == buffer_sizes[i]) {

         continue;

       }

       const storage::Page* root_info_page = buffers[i] + cursors[i];

       storage::StorageId storage_id = root_info_page->get_header().storage_id_;

       ASSERT_ND(storage_id > prev_storage_id);

       if (min_storage_id == 0) {

         min_storage_id = storage_id;

       } else {

         min_storage_id = std::min(min_storage_id, storage_id);

       }

     }


     if (min_storage_id == 0) {

       break;  // all reducers' all root info pages processed

     }


     // fill tmp_array

     uint16_t input_count = 0;

     for (uint16_t i = 0; i < count; ++i) {

       if (cursors[i] == buffer_sizes[i]) {

         continue;

       }

       const storage::Page* root_info_page = buffers[i] + cursors[i];

       storage::StorageId storage_id = root_info_page->get_header().storage_id_;

       if (storage_id == min_storage_id) {

         tmp_array[input_count] = root_info_page;

         ++input_count;

       }

     }

     ASSERT_ND(input_count > 0);


     storage::Composer composer(engine_, min_storage_id);

     storage::SnapshotPagePointer new_root_page_pointer;

     storage::Composer::ConstructRootArguments args = {

       &snapshot_writer,

       &fileset,

       &tmp_array[0],

       input_count,

       gleaner_resource_,

       &new_root_page_pointer};

     CHECK_ERROR(composer.construct_root(args));

     ASSERT_ND(new_root_page_pointer > 0);

     ASSERT_ND(new_root_page_pointers_.find(min_storage_id) == new_root_page_pointers_.end());

     new_root_page_pointers_.insert(std::pair<storage::StorageId, storage::SnapshotPagePointer>(

       min_storage_id, new_root_page_pointer));


     // done for this storage. advance cursors

     prev_storage_id = min_storage_id;

     for (uint16_t i = 0; i < count; ++i) {

       if (cursors[i] == buffer_sizes[i]) {

         continue;

       }

       const storage::Page* root_info_page = buffers[i] + cursors[i];

       storage::StorageId storage_id = root_info_page->get_header().storage_id_;

       if (storage_id == min_storage_id) {

         cursors[i] = cursors[i] + 1;

       }

     }

   }


   snapshot_writer.close();

   CHECK_ERROR(fileset.uninitialize());


   stop_watch.stop();

   LOG(INFO) << "constructed root pages for " << new_root_page_pointers_.size()

     << " storages. in " << stop_watch.elapsed_ms() << "ms. "<< *this;

   return kRetOk;

 }


 std::string LogGleaner::to_string() const {

   std::stringstream stream;

   stream << *this;

   return stream.str();

 }

 std::ostream& operator<<(std::ostream& o, const LogGleaner& v) {

   o << "<LogGleaner>"

     << v.new_snapshot_

     << "<completed_count_>" << v.control_block_->completed_count_ << "</completed_count_>"

     << "<completed_mapper_count_>"

       << v.control_block_->completed_mapper_count_ << "</completed_mapper_count_>"

     << "<error_count_>" << v.control_block_->error_count_ << "</error_count_>"

     << "<exit_count_>" << v.control_block_->exit_count_ << "</exit_count_>";

   o << "</LogGleaner>";

   return o;

 }


 }  // namespace snapshot

 }  // namespace foedus

stoppable_thread_impl.hpp

foedus::snapshot::LogGleanerRef::partitioner_metadata_
storage::PartitionerMetadata * partitioner_metadata_
Definition: log_gleaner_ref.hpp:62

storage_manager.hpp

snapshot_manager.hpp

foedus::memory::AlignedMemory::kNumaAllocOnnode
numa_alloc_onnode() and numa_free().
Definition: aligned_memory.hpp:83

foedus::snapshot::LogGleaner::LogGleaner
LogGleaner()=delete

foedus::Engine::get_storage_manager
storage::StorageManager * get_storage_manager() const
See Storage Manager.
Definition: engine.cpp:60

foedus::UninitializeGuard::kWarnIfUninitializeError
Automatically calls if uninitialize() wasn't called when it gets out of scope, and just complains whe...
Definition: initializable.hpp:243

ERROR_STACK
#define ERROR_STACK(e)
Instantiates ErrorStack with the given foedus::error_code, creating an error stack with the current f...
Definition: error_stack.hpp:480

foedus::storage::StorageId
uint32_t StorageId
Unique ID for storage.
Definition: storage_id.hpp:55

foedus
Root package of FOEDUS (Fast Optimistic Engine for Data Unification Services).
Definition: assert_nd.hpp:44

snapshot.hpp

composer.hpp

foedus::soc::SharedMemoryRepo::get_global_memory_anchors
GlobalMemoryAnchors * get_global_memory_anchors()
Definition: shared_memory_repo.hpp:514

foedus::snapshot::LogGleaner
A log-gleaner, which constructs a new set of snapshot files during snapshotting.
Definition: log_gleaner_impl.hpp:104

engine.hpp

foedus::snapshot::LogGleanerResource
Local resource for the log gleaner, which runs only in the master node.
Definition: log_gleaner_resource.hpp:34

foedus::ErrorStack
Brings error stacktrace information as return value of functions.
Definition: error_stack.hpp:81

foedus::storage::PartitionerMetadata::clear_counts
void clear_counts()
Definition: partitioner.hpp:221

stop_watch.hpp

foedus::Attachable< LogGleanerControlBlock >::engine_
Engine * engine_
Most attachable object stores an engine pointer (local engine), so we define it here.
Definition: attachable.hpp:107

log_reducer_impl.hpp

memory_id.hpp
Definitions of IDs in this package and a few related constant values.

foedus::cache::SnapshotFileSet
Holds a set of read-only file objects for snapshot files.
Definition: snapshot_file_set.hpp:52

log_mapper_impl.hpp

common_log_types.hpp
Declares common log types used in all packages.

foedus::Engine::get_options
const EngineOptions & get_options() const
Definition: engine.cpp:39

partitioner.hpp

foedus::snapshot::LogGleanerControlBlock::exit_count_
std::atomic< uint16_t > exit_count_
count of mappers/reducers that have exitted.
Definition: snapshot_manager_pimpl.hpp:109

foedus::storage::StorageOptions::partitioner_data_memory_mb_
uint32_t partitioner_data_memory_mb_
Size in MB of a shared memory buffer allocated for all partitioners during log gleaning.
Definition: storage_options.hpp:51

snapshot_manager_pimpl.hpp

engine_options.hpp

foedus::snapshot::Snapshot::max_storage_id_
storage::StorageId max_storage_id_
Largest storage ID as of starting to take the snapshot.
Definition: snapshot.hpp:58

foedus::snapshot::LogReducerRef
A remote view of LogReducer from all engines.
Definition: log_reducer_ref.hpp:40

foedus::snapshot::LogGleanerControlBlock::error_count_
std::atomic< uint16_t > error_count_
count of mappers/reducers that have exitted with some error.
Definition: snapshot_manager_pimpl.hpp:103

foedus::snapshot::LogGleanerRef::is_error
bool is_error() const
Definition: log_gleaner_ref.cpp:71

foedus::Attachable< LogGleanerControlBlock >::control_block_
LogGleanerControlBlock * control_block_
The shared data on shared memory that has been initialized in some SOC or master engine.
Definition: attachable.hpp:111

foedus::EngineOptions::storage_
storage::StorageOptions storage_
Definition: engine_options.hpp:145

error_stack_batch.hpp

foedus::storage::SnapshotPagePointer
uint64_t SnapshotPagePointer
Page ID of a snapshot page.
Definition: storage_id.hpp:79

foedus::UninitializeGuard
Calls Initializable::uninitialize() automatically when it gets out of scope.
Definition: initializable.hpp:220

SPINLOCK_WHILE
#define SPINLOCK_WHILE(x)
A macro to busy-wait (spinlock) with occasional pause.
Definition: assorted_func.hpp:256

foedus::snapshot::LogGleanerRef::is_all_completed
bool is_all_completed() const
Definition: log_gleaner_ref.cpp:61

foedus::snapshot::LogGleanerRef::get_snapshot_id
SnapshotId get_snapshot_id() const
Definition: log_gleaner_ref.cpp:76

foedus::Engine
Database engine object that holds all resources and provides APIs.
Definition: engine.hpp:109

foedus::debugging::StopWatch::stop
uint64_t stop()
Take another current time tick.
Definition: stop_watch.cpp:35

foedus::snapshot::LogGleaner::to_string
std::string to_string() const
Definition: log_gleaner_impl.cpp:307

log_gleaner_impl.hpp

foedus::snapshot::Snapshot
Represents one snapshot that converts all logs from base epoch to valid_until epoch into snapshot fil...
Definition: snapshot.hpp:37

foedus::snapshot::LogGleanerResource::writer_pool_memory_
memory::AlignedMemory writer_pool_memory_
Definition: log_gleaner_resource.hpp:66

foedus::thread::ThreadOptions::group_count_
uint16_t group_count_
Number of ThreadGroup in the engine.
Definition: thread_options.hpp:43

foedus::storage::PartitionerMetadata::data_offset_
uint32_t data_offset_
Relative offset from the beginning of partitioner data block that points to variable-sized partitione...
Definition: partitioner.hpp:242

foedus::snapshot::LogGleaner::execute
ErrorStack execute()
Main routine of log gleaner.
Definition: log_gleaner_impl.cpp:148

foedus::snapshot::LogGleanerResource::writer_intermediate_memory_
memory::AlignedMemory writer_intermediate_memory_
Definition: log_gleaner_resource.hpp:67

foedus::snapshot::LogGleanerRef::is_all_exitted
bool is_all_exitted() const
Definition: log_gleaner_ref.cpp:57

foedus::EngineOptions::thread_
thread::ThreadOptions thread_
Definition: engine_options.hpp:146

foedus::kErrorCodeSnapshotExitTimeout
0x0603 : "SNAPSHT: Snapshot mappers/reducers take too long time to respond to exit request...
Definition: error_code.hpp:163

CHECK_ERROR
#define CHECK_ERROR(x)
This macro calls x and checks its returned value.
Definition: error_stack.hpp:517

foedus::kRetOk
const ErrorStack kRetOk
Normal return value for no-error case.
Definition: error_stack.hpp:251

foedus::debugging::StopWatch::elapsed_sec
double elapsed_sec() const
Definition: stop_watch.hpp:51

foedus::Engine::get_soc_manager
soc::SocManager * get_soc_manager() const
See SOC and IPC.
Definition: engine.cpp:59

foedus::snapshot::LogGleanerRef
A remote view of LogGleaner from all engines.
Definition: log_gleaner_ref.hpp:36

foedus::snapshot::LogGleanerControlBlock::completed_count_
std::atomic< uint16_t > completed_count_
count of mappers/reducers that have completed processing the current epoch.
Definition: snapshot_manager_pimpl.hpp:90

log_gleaner_resource.hpp

ASSERT_ND
#define ASSERT_ND(x)
A warning-free wrapper macro of assert() that has no performance effect in release mode even when 'x'...
Definition: assert_nd.hpp:72

foedus::debugging::StopWatch
A high-resolution stop watch.
Definition: stop_watch.hpp:30

soc_manager.hpp

foedus::snapshot::operator<<
std::ostream & operator<<(std::ostream &o, const SortedBuffer &v)
Definition: log_buffer.cpp:32

foedus::soc::SocManager::get_shared_memory_repo
SharedMemoryRepo * get_shared_memory_repo()
Returns the shared memories maintained across SOCs.
Definition: soc_manager.cpp:38

foedus::snapshot::LogGleanerControlBlock::completed_mapper_count_
std::atomic< uint16_t > completed_mapper_count_
We also have a separate count for mappers only to know if all mappers are done.
Definition: snapshot_manager_pimpl.hpp:97