diff --git a/.gitignore b/.gitignore index cf3940e5..3f2ce7f8 100644 --- a/.gitignore +++ b/.gitignore @@ -169,3 +169,4 @@ tags .DS_Store # End of https://www.gitignore.io/api/vim,git,sbt,java,maven,scala,intellij +B-Raft/src/.github/workflows/ci-test.yml diff --git a/B-Raft/.gitignore b/B-Raft/.gitignore new file mode 100644 index 00000000..a7c5d0ea --- /dev/null +++ b/B-Raft/.gitignore @@ -0,0 +1,4 @@ +experiments/resources/ +src/null/ +target/ +experiments/.idea/ diff --git a/B-Raft/README.md b/B-Raft/README.md new file mode 100644 index 00000000..02bad5d5 --- /dev/null +++ b/B-Raft/README.md @@ -0,0 +1,15 @@ +# [A Byzantine Fault-Tolerant Raft Algorithm Combined with Schnorr Signature (B-Raft), [1]](https://ieeexplore.ieee.org/document/9377376) +## Implemented in Scala for IN4391 Distributed Systems (2021/22 Q3) +Group 10: Julian Biesheuvel, Riley Jense & Pepijn te Marvelde + +## General Information +This repo contains the code for the project of the Distributed Systems course. It is an implementation of the B-Raft with Schnorr Signature [[1]](###-bibliography), based on a prototype Raft implementation by Max Bundscherer [[2]](###-biblography). Akka Actors are used for communication between nodes. + +## File Structure +Source code for the Raft/B-Raft implementation is found in [`src` (link)](src). Experiments, including result dataset and plotting code, are found in [`experiments` (link)](experiments). More info on each folder is found in their respective `README.md`'s. + + + +### Bibliography +1. S. Tian, Y. Liu, Y. Zhang and Y. Zhao, "A Byzantine Fault-Tolerant Raft Algorithm Combined with Schnorr Signature," 2021 15th International Conference on Ubiquitous Information Management and Communication (IMCOM), 2021, pp. 1-5, doi: 10.1109/IMCOM51814.2021.9377376. +2. M. Bundscherer, "(Prototype) Raft Consensus Algorithm in Scala," Github repository, 2020, https://github.com/maxbundscherer/prototype-scala-raft \ No newline at end of file diff --git a/B-Raft/experiments/DS Graphs.ipynb b/B-Raft/experiments/DS Graphs.ipynb new file mode 100644 index 00000000..a6537a11 --- /dev/null +++ b/B-Raft/experiments/DS Graphs.ipynb @@ -0,0 +1,567 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "aa69db8d", + "metadata": {}, + "outputs": [], + "source": [ + "import glob\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "from tqdm import tqdm\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "outputs": [], + "source": [ + "def purge_conf(conf_dict):\n", + " keep_keys = ['raftTypeStr', 'nodes']\n", + " return {k:v for k,v in conf_dict.items() if k in keep_keys}" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 37, + "outputs": [], + "source": [ + "def parse_line_election(message, timestamp, curr_dict, node_name, num_nodes):\n", + " append = False\n", + " # Start of election: \"Change behavior from 'LEADER' to 'SLEEP'\"\n", + " if \"Change behavior from 'LEADER' to 'SLEEP'\" in message:\n", + " curr_dict['election_start'] = timestamp\n", + " curr_dict['messages'] = [message]\n", + " elif \"Change behavior from 'CANDIDATE' to 'LEADER'\" in message and curr_dict.get('election_start'):\n", + " curr_dict['election_end'] = timestamp\n", + " curr_dict['messages'] = curr_dict['messages'] + [message]\n", + " append = True\n", + " elif \"Received message\" in message:\n", + " if curr_dict.get('election_start'):\n", + " curr_dict['messages'] = curr_dict.get('messages', []) + [message]\n", + "\n", + " return curr_dict, append\n", + "\n", + "def parse_line_appenddata(message, timestamp, curr_dict, node_name, num_nodes):\n", + " append = False\n", + " if \"SEND APR\" in message:\n", + " if not curr_dict.get('appenddata_start'):\n", + " curr_dict['appenddata_start'] = timestamp\n", + " if \"FOLLOWER WRITING DATA\" in message:\n", + " curr_dict['appenddata_end'] = timestamp\n", + " # (data = Map(x -> 5, y -> 4, z -> 3, q -> 2))\n", + " data_map_str = message.split(', (')[1][11:-2]\n", + " entries = data_map_str.split(', ')\n", + "\n", + " curr_node_entry_set = curr_dict['written_data'].get(node_name, set())\n", + " curr_node_entry_set.update(entries)\n", + " curr_dict['written_data'][node_name] = curr_node_entry_set\n", + "\n", + " # if all followers have written all data set append to True\n", + " num_appended = 0\n", + " for data in curr_dict['written_data'].values():\n", + " if len(data) == 4:\n", + " num_appended += 1\n", + " if num_appended + 1 == (num_nodes // 2) + 1:\n", + " # print(f\"Finish, nodes written = {num_appended}: {message = }\")\n", + " curr_dict['appenddata_majority'] = timestamp\n", + " if \"[VERIFY APPEND DATA], Consistent\" in message:\n", + " curr_dict.pop('written_data')\n", + " append = True\n", + "\n", + " return curr_dict, append\n", + "\n", + "def load_logfile(file, exp_type='election') -> list:\n", + " with open(file, 'r') as f:\n", + " logs = f.readlines()\n", + "\n", + " log_start_idx = -1\n", + " # find start of actual logs and parse config:\n", + " conf = {}\n", + " for i, log in enumerate(logs):\n", + " if 'Starting Main with Config' in log:\n", + " log_start_idx = i\n", + " conf = {x.split('=')[0]: x.split('=')[1] for x in log[:-2].split(\"Config:\")[-1].split(',')}\n", + " break\n", + "\n", + " logs = logs[log_start_idx:]\n", + " conf = purge_conf(conf)\n", + " conf['file'] = file\n", + " conf['nodes'] = int(conf['nodes'])\n", + "\n", + " if exp_type == 'appenddata':\n", + " conf['written_data'] = {}\n", + " parse_line_func = parse_line_appenddata\n", + " else:\n", + " parse_line_func = parse_line_election\n", + "\n", + " results = []\n", + " curr_dict = conf.copy()\n", + " for log in logs:\n", + " # split message, indices: 0=timestamp, 1=loglevel, 2=akka actor string, 3=message\n", + " timestamp, _, node_name, message = log.split('\\t')\n", + " timestamp = timestamp[1:-1]\n", + " node_name = node_name.split('/')[-1][:-1]\n", + " curr_dict, append = parse_line_func(message, timestamp, curr_dict, node_name, conf['nodes'])\n", + " if append:\n", + " # curr_dict.pop('written_data', None)\n", + " results.append(curr_dict)\n", + " curr_dict = conf.copy()\n", + "\n", + " return results" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 33, + "outputs": [ + { + "data": { + "text/plain": "[{'raftTypeStr': 'BRaft',\n 'nodes': 21,\n 'file': 'resources/output/append_data_1/run1_raftType=BRaft_crashIntervalHeartbeats=10000_nodes=21.log',\n 'written_data': {'BraftNodeActor-3': {'q -> 2',\n 'x -> 5',\n 'y -> 4',\n 'z -> 3'},\n 'BraftNodeActor-10': {'q -> 2', 'x -> 5', 'y -> 4', 'z -> 3'},\n 'BraftNodeActor-19': {'q -> 2', 'x -> 5', 'y -> 4', 'z -> 3'},\n 'BraftNodeActor-20': {'q -> 2', 'x -> 5', 'y -> 4', 'z -> 3'},\n 'BraftNodeActor-15': {'q -> 2', 'x -> 5', 'y -> 4', 'z -> 3'},\n 'BraftNodeActor-18': {'q -> 2', 'x -> 5', 'y -> 4', 'z -> 3'},\n 'BraftNodeActor-6': {'q -> 2', 'x -> 5', 'y -> 4', 'z -> 3'},\n 'BraftNodeActor-9': {'q -> 2', 'x -> 5', 'y -> 4', 'z -> 3'},\n 'BraftNodeActor-16': {'q -> 2', 'x -> 5', 'y -> 4', 'z -> 3'},\n 'BraftNodeActor-4': {'q -> 2', 'x -> 5', 'y -> 4', 'z -> 3'},\n 'BraftNodeActor-5': {'q -> 2', 'x -> 5', 'y -> 4', 'z -> 3'},\n 'BraftNodeActor-12': {'q -> 2', 'x -> 5', 'y -> 4', 'z -> 3'},\n 'BraftNodeActor-7': {'q -> 2', 'x -> 5', 'y -> 4', 'z -> 3'},\n 'BraftNodeActor-1': {'q -> 2', 'x -> 5', 'y -> 4', 'z -> 3'},\n 'BraftNodeActor-0': {'q -> 2', 'x -> 5', 'y -> 4', 'z -> 3'},\n 'BraftNodeActor-8': {'q -> 2', 'x -> 5', 'y -> 4', 'z -> 3'},\n 'BraftNodeActor-13': {'q -> 2', 'x -> 5', 'y -> 4', 'z -> 3'},\n 'BraftNodeActor-2': {'q -> 2', 'x -> 5', 'y -> 4', 'z -> 3'},\n 'BraftNodeActor-17': {'q -> 2', 'x -> 5', 'y -> 4', 'z -> 3'},\n 'BraftNodeActor-14': {'q -> 2', 'x -> 5', 'y -> 4', 'z -> 3'},\n 'BraftNodeActor-11': {'q -> 2', 'x -> 5', 'y -> 4', 'z -> 3'}},\n 'appenddata_start': '2022-04-13 20:46:48,780',\n 'appenddata_end': '2022-04-13 20:47:13,107',\n 'appenddata_majority': '2022-04-13 20:47:13,107'}]" + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "load_logfile('resources/output/append_data_1/run1_raftType=BRaft_crashIntervalHeartbeats=10000_nodes=21.log', 'appenddata')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 64, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\ptema\\AppData\\Local\\Temp\\ipykernel_15576\\3542859334.py:10: FutureWarning: casting timedelta64[ns] values to int64 with .astype(...) is deprecated and will raise in a future version. Use .view(...) instead.\n", + " append_df['duration_ms'] = (append_df.end - append_df.start).astype(np.int64) / int(1e6)\n", + "C:\\Users\\ptema\\AppData\\Local\\Temp\\ipykernel_15576\\3542859334.py:11: FutureWarning: casting timedelta64[ns] values to int64 with .astype(...) is deprecated and will raise in a future version. Use .view(...) instead.\n", + " append_df['duration_majority_ms'] = (append_df.majority - append_df.start).astype(np.int64) / int(1e6)\n" + ] + }, + { + "data": { + "text/plain": " raftTypeStr nodes file \\\n0 BRaft 12 resources/output\\append_data_1\\run10_raftType=... \n1 BRaft 16 resources/output\\append_data_1\\run10_raftType=... \n2 BRaft 18 resources/output\\append_data_1\\run10_raftType=... \n3 BRaft 21 resources/output\\append_data_1\\run10_raftType=... \n4 BRaft 3 resources/output\\append_data_1\\run10_raftType=... \n.. ... ... ... \n491 Raft 21 resources/output\\append_data_2\\run4_raftType=R... \n492 Raft 3 resources/output\\append_data_2\\run4_raftType=R... \n493 Raft 5 resources/output\\append_data_2\\run4_raftType=R... \n494 Raft 7 resources/output\\append_data_2\\run4_raftType=R... \n495 Raft 9 resources/output\\append_data_2\\run4_raftType=R... \n\n appenddata_start appenddata_end \\\n0 2022-04-13 22:17:24,913 2022-04-13 22:17:30,267 \n1 2022-04-13 22:22:43,684 2022-04-13 22:22:51,915 \n2 2022-04-13 22:23:03,264 2022-04-13 22:23:13,604 \n3 2022-04-13 22:23:26,520 2022-04-13 22:23:39,048 \n4 2022-04-13 22:16:18,810 2022-04-13 22:16:20,527 \n.. ... ... \n491 2022-04-14 08:33:48,784 2022-04-14 08:33:50,935 \n492 2022-04-14 08:31:57,141 2022-04-14 08:31:59,787 \n493 2022-04-14 08:32:11,079 2022-04-14 08:32:11,941 \n494 2022-04-14 08:32:25,042 2022-04-14 08:32:25,556 \n495 2022-04-14 08:32:38,988 2022-04-14 08:32:40,613 \n\n appenddata_majority start end \\\n0 2022-04-13 22:17:30,210 2022-04-13 22:17:24.913 2022-04-13 22:17:30.267 \n1 2022-04-13 22:22:50,916 2022-04-13 22:22:43.684 2022-04-13 22:22:51.915 \n2 2022-04-13 22:23:12,794 2022-04-13 22:23:03.264 2022-04-13 22:23:13.604 \n3 2022-04-13 22:23:38,150 2022-04-13 22:23:26.520 2022-04-13 22:23:39.048 \n4 2022-04-13 22:16:20,481 2022-04-13 22:16:18.810 2022-04-13 22:16:20.527 \n.. ... ... ... \n491 2022-04-14 08:33:48,898 2022-04-14 08:33:48.784 2022-04-14 08:33:50.935 \n492 2022-04-14 08:31:59,787 2022-04-14 08:31:57.141 2022-04-14 08:31:59.787 \n493 2022-04-14 08:32:11,940 2022-04-14 08:32:11.079 2022-04-14 08:32:11.941 \n494 2022-04-14 08:32:25,555 2022-04-14 08:32:25.042 2022-04-14 08:32:25.556 \n495 2022-04-14 08:32:39,607 2022-04-14 08:32:38.988 2022-04-14 08:32:40.613 \n\n majority duration_ms duration_majority_ms \\\n0 2022-04-13 22:17:30.210 5354.0 5297.0 \n1 2022-04-13 22:22:50.916 8231.0 7232.0 \n2 2022-04-13 22:23:12.794 10340.0 9530.0 \n3 2022-04-13 22:23:38.150 12528.0 11630.0 \n4 2022-04-13 22:16:20.481 1717.0 1671.0 \n.. ... ... ... \n491 2022-04-14 08:33:48.898 2151.0 114.0 \n492 2022-04-14 08:31:59.787 2646.0 2646.0 \n493 2022-04-14 08:32:11.940 862.0 861.0 \n494 2022-04-14 08:32:25.555 514.0 513.0 \n495 2022-04-14 08:32:39.607 1625.0 619.0 \n\n duration_majority_s duration_s \n0 5.297 5.354 \n1 7.232 8.231 \n2 9.530 10.340 \n3 11.630 12.528 \n4 1.671 1.717 \n.. ... ... \n491 0.114 2.151 \n492 2.646 2.646 \n493 0.861 0.862 \n494 0.513 0.514 \n495 0.619 1.625 \n\n[496 rows x 13 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
raftTypeStrnodesfileappenddata_startappenddata_endappenddata_majoritystartendmajorityduration_msduration_majority_msduration_majority_sduration_s
0BRaft12resources/output\\append_data_1\\run10_raftType=...2022-04-13 22:17:24,9132022-04-13 22:17:30,2672022-04-13 22:17:30,2102022-04-13 22:17:24.9132022-04-13 22:17:30.2672022-04-13 22:17:30.2105354.05297.05.2975.354
1BRaft16resources/output\\append_data_1\\run10_raftType=...2022-04-13 22:22:43,6842022-04-13 22:22:51,9152022-04-13 22:22:50,9162022-04-13 22:22:43.6842022-04-13 22:22:51.9152022-04-13 22:22:50.9168231.07232.07.2328.231
2BRaft18resources/output\\append_data_1\\run10_raftType=...2022-04-13 22:23:03,2642022-04-13 22:23:13,6042022-04-13 22:23:12,7942022-04-13 22:23:03.2642022-04-13 22:23:13.6042022-04-13 22:23:12.79410340.09530.09.53010.340
3BRaft21resources/output\\append_data_1\\run10_raftType=...2022-04-13 22:23:26,5202022-04-13 22:23:39,0482022-04-13 22:23:38,1502022-04-13 22:23:26.5202022-04-13 22:23:39.0482022-04-13 22:23:38.15012528.011630.011.63012.528
4BRaft3resources/output\\append_data_1\\run10_raftType=...2022-04-13 22:16:18,8102022-04-13 22:16:20,5272022-04-13 22:16:20,4812022-04-13 22:16:18.8102022-04-13 22:16:20.5272022-04-13 22:16:20.4811717.01671.01.6711.717
..........................................
491Raft21resources/output\\append_data_2\\run4_raftType=R...2022-04-14 08:33:48,7842022-04-14 08:33:50,9352022-04-14 08:33:48,8982022-04-14 08:33:48.7842022-04-14 08:33:50.9352022-04-14 08:33:48.8982151.0114.00.1142.151
492Raft3resources/output\\append_data_2\\run4_raftType=R...2022-04-14 08:31:57,1412022-04-14 08:31:59,7872022-04-14 08:31:59,7872022-04-14 08:31:57.1412022-04-14 08:31:59.7872022-04-14 08:31:59.7872646.02646.02.6462.646
493Raft5resources/output\\append_data_2\\run4_raftType=R...2022-04-14 08:32:11,0792022-04-14 08:32:11,9412022-04-14 08:32:11,9402022-04-14 08:32:11.0792022-04-14 08:32:11.9412022-04-14 08:32:11.940862.0861.00.8610.862
494Raft7resources/output\\append_data_2\\run4_raftType=R...2022-04-14 08:32:25,0422022-04-14 08:32:25,5562022-04-14 08:32:25,5552022-04-14 08:32:25.0422022-04-14 08:32:25.5562022-04-14 08:32:25.555514.0513.00.5130.514
495Raft9resources/output\\append_data_2\\run4_raftType=R...2022-04-14 08:32:38,9882022-04-14 08:32:40,6132022-04-14 08:32:39,6072022-04-14 08:32:38.9882022-04-14 08:32:40.6132022-04-14 08:32:39.6071625.0619.00.6191.625
\n

496 rows × 13 columns

\n
" + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# appenddata_results = []\n", + "# for file in glob.glob('resources/output/append_data*/*.log'):\n", + "# appenddata_results.extend(load_logfile(file, 'appenddata'))\n", + "#\n", + "# append_df = pd.json_normalize(appenddata_results)\n", + "# append_df['start'] = pd.to_datetime(append_df.appenddata_start)\n", + "# append_df['end'] = pd.to_datetime(append_df.appenddata_end)\n", + "# append_df['majority'] = pd.to_datetime(append_df.appenddata_majority)\n", + "#\n", + "# append_df['duration_ms'] = (append_df.end - append_df.start).astype(np.int64) / int(1e6)\n", + "# append_df['duration_majority_ms'] = (append_df.majority - append_df.start).astype(np.int64) / int(1e6)\n", + "# append_df['duration_majority_s'] = append_df.duration_majority_ms / 1000\n", + "# append_df['duration_s'] = append_df.duration_ms / 1000\n", + "# append_df.to_json('appenddata_results.json.gz', compression='gzip')\n", + "# append_df\n", + "\n", + "append_df = pd.read_json('appenddata_results.json.gz', compression='gzip')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 51, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig, axs = plt.subplots(2, 1, sharex='col', figsize=(10, 15))\n", + "axs = axs.flatten()\n", + "plot_box(append_df, ax=axs[0], y='duration_s', logy=False)\n", + "plot_line(append_df, ax=axs[1], y='duration_s', logy=False)\n", + "\n", + "for ax in axs:\n", + " ax.set_xlabel(\"Number of nodes\")\n", + " ax.set_ylabel(\"Duration (seconds) from append data to consistent state\")\n", + " ax.legend(title='Algorithm')\n", + "\n", + "# fig.savefig('appenddata_duration.png', dpi=250)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 63, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig, axs = plt.subplots(1, 2, figsize=(15, 10))\n", + "\n", + "# append_df['majority'] =\n", + "append_df['rank'] = append_df['nodes'].rank(method='dense') - 1\n", + "sns.lineplot(data=append_df[append_df.raftTypeStr == 'BRaft'], x='nodes', y='duration_s',ax=axs[0], label='Time until consistent')\n", + "sns.lineplot(data=append_df[append_df.raftTypeStr == 'BRaft'], x='nodes', y='duration_majority_s',ax=axs[0], label='Time until majority has replicated')\n", + "axs[0].set_title('BRaft')\n", + "sns.lineplot(data=append_df[append_df.raftTypeStr == 'Raft'], x='nodes', y='duration_s',ax=axs[1], label='Time until consistent')\n", + "sns.lineplot(data=append_df[append_df.raftTypeStr == 'Raft'], x='nodes', y='duration_majority_s',ax=axs[1], label='Time until majority has replicated')\n", + "axs[1].set_title('Raft')\n", + "\n", + "for ax in axs:\n", + " ax.set_xlabel(\"Number of nodes\")\n", + " ax.set_ylabel(\"Duration (seconds) from append data\")\n", + " # ax.legend(title='Algorithm')\n", + " ax.grid()\n", + "# fig.savefig('appenddata_time_to_majority.png', dpi=250)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 19, + "outputs": [ + { + "data": { + "text/plain": "(2520, 11)" + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_json('election_results.json.gz', compression='infer')\n", + "df['duration_s'] = df.duration_ms / 1000\n", + "df.shape" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 215, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig, ax = plt.subplots(1,1, figsize=(10,6))\n", + "sns.scatterplot(data=df, x='num_messages', y='duration_s', hue='nodes', style='raftTypeStr', s=10)\n", + "ax.set_xscale('log')\n", + "ax.set_yscale('log')\n", + "ax.grid()\n", + "# ax.get_legend().remove()\n", + "ax.set_xlabel(\"Number of messages\")\n", + "ax.set_ylabel(\"Duration (seconds)\")\n", + "fig.savefig('scatter.png', dpi=250)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 23, + "outputs": [], + "source": [ + "def plot_box(data, ax=None, save=False, fig=None, y='duration_s', logy=True):\n", + " if not ax:\n", + " fig, ax = plt.subplots(1,1,figsize=(20,10))\n", + " sns.boxplot(data=data, x='nodes', y=y, hue='raftTypeStr', ax=ax)\n", + " if logy:\n", + " ax.set_yscale('log')\n", + " ax.grid()\n", + " # ax.set_ylim([0, 30000])\n", + "\n", + " if fig and save:\n", + " fig.savefig('results_combined_box', dpi=250)\n", + " return fig, ax" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 22, + "outputs": [], + "source": [ + "# fig, axs = plt.subplots(1,2, sharey=True, figsize=(10,5))\n", + "def plot_line(data, ax, y='duration_s', logy=True):\n", + " data['rank'] = data['nodes'].rank(method='dense') - 1\n", + " sns.lineplot(data=data, x='rank', y=y, hue='raftTypeStr',ax=ax)\n", + " if logy:\n", + " ax.set_yscale('log')\n", + " ax.grid()\n", + " return fig, ax" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 20, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig, axs = plt.subplots(2, 1, sharex='col', figsize=(10, 15))\n", + "\n", + "plot_box(df, ax=axs[0])\n", + "plot_line(df, ax=axs[1])\n", + "\n", + "for ax in axs:\n", + " ax.set_xlabel(\"Number of nodes\")\n", + " ax.set_ylabel(\"Duration (seconds)\")\n", + " ax.legend(title='Algorithm')\n", + "\n", + "fig.savefig('election_duration.png', dpi=250)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 68, + "outputs": [ + { + "data": { + "text/plain": "140.0" + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape[0] / 9 / 2" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 228, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig, axs = plt.subplots(2, 1, sharex='col', figsize=(10, 15))\n", + "\n", + "plot_box(df, ax=axs[0], y='num_messages')\n", + "plot_line(df, ax=axs[1], y='num_messages')\n", + "\n", + "for ax in axs:\n", + " ax.set_xlabel(\"Number of nodes\")\n", + " ax.set_ylabel(\"Number of sent messages\")\n", + " ax.legend(title='Algorithm')\n", + "\n", + "fig.savefig('election_messages.png', dpi=250)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# fig, axs = plt.subplots(1,2, sharey=True, figsize=(10,5))\n", + "def plot_line_v1(data, ax=None, save=False, fig=None):\n", + " if not ax:\n", + " fig, axs = plt.subplots(1,2, sharey=True, figsize=(10,5))\n", + " else:\n", + " axs = ax\n", + "\n", + " for i, (algo, group) in enumerate(data.groupby('raftTypeStr')):\n", + " ax = axs if type(axs) != np.ndarray else axs[i]\n", + " agged = group.groupby('nodes').duration_ms.agg([np.mean, np.std])\n", + " agged.plot(kind='line', y='mean', ax=ax, label=algo)\n", + " ax.fill_between(agged.index, agged['mean'] - agged['std'], agged['mean'] + agged['std'], alpha = 0.1)\n", + "\n", + " # ax.set_yscale('log')\n", + " ax.set_xlabel('Number of Nodes')\n", + " ax.set_ylabel('Average time taken in ms')\n", + " ax.grid()\n", + " if fig and save:\n", + " fig.savefig('election_time', dpi=250)\n", + " return fig, ax" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "fig, ax = plt.subplots(1,1)\n", + "for i, (algo, group) in enumerate(df.groupby('raftTypeStr')):\n", + " group.groupby('nodes').duration_ms.agg([np.mean, np.std]).plot(kind='line', y='mean', yerr='std', ax=ax, label=algo)\n", + "\n", + "ax.set_yscale('log')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "for i in range(3):\n", + " print(i+1)\n", + " c = df[df.file.apply(lambda k: f'election_timer_{i+1}' in k)].groupby(['raftTypeStr', 'nodes', 'run']).count()\n", + " display(c[c==9].dropna())" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/B-Raft/experiments/README.md b/B-Raft/experiments/README.md new file mode 100644 index 00000000..1ba971ac --- /dev/null +++ b/B-Raft/experiments/README.md @@ -0,0 +1,46 @@ +# BRaft Experiments +To quantitatively assess the quality of our BRaft implementation run our cluster under different circumstances with different configurations. + +This is done in run_experiments.py. +At the bottom of this file one can specify what experiments to run and how many times to replicate these. + +The script then creates the different configurations and call BRaft Main with them, this starts our cluster. Logs are captured as they contain an accurate view of our system at any time. These logs are then parsed to collect metrics and draw conclusions. + +## Configuring experiments +Through the `experiment_config` dict all parameters in BRaft's [`application.conf` (link)](../src_/main/resources/application.conf) raftPrototype key can be controlled. These are: + +```editorconfig +raftPrototype { + raftType="BRaft" + + electionTimerIntervalMin=3 + + electionTimerIntervalMax=4 + + heartbeatTimerInterval=1 + + nodes=21 + + crashIntervalHeartbeats=1000000 + + sleepDowntime=8 + + maxTerm=9999 +} +``` + +### example +By setting +```python + num_replications = 4 + experiment_config = { + "raftType": ["Raft", "BRaft"], + "nodes": np.linspace(start=3, stop=21, num=9, dtype=int), + } +``` +in `run_experiments.py` 4 runs of experiments are done with every combination of nodes and raftType specified. This particular config will thus result in 2 * 9 * 4 = 54 output logs. + + +## Log parsing and plotting +Can be found in the [Plotting notebook](DS%20Graphs.ipynb) + diff --git a/B-Raft/experiments/__init__.py b/B-Raft/experiments/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/B-Raft/experiments/appenddata_results.json.gz b/B-Raft/experiments/appenddata_results.json.gz new file mode 100644 index 00000000..a4732e0e Binary files /dev/null and b/B-Raft/experiments/appenddata_results.json.gz differ diff --git a/B-Raft/experiments/election_results.json.gz b/B-Raft/experiments/election_results.json.gz new file mode 100644 index 00000000..f31af9b3 Binary files /dev/null and b/B-Raft/experiments/election_results.json.gz differ diff --git a/B-Raft/experiments/julian_run.txt b/B-Raft/experiments/julian_run.txt new file mode 100644 index 00000000..22c60aea --- /dev/null +++ b/B-Raft/experiments/julian_run.txt @@ -0,0 +1 @@ +C:\Users\Jbies\.jdks\azul-13.0.10\bin\java.exe "-javaagent:C:\Program Files\JetBrains\IntelliJ IDEA 2021.3.2\lib\idea_rt.jar=51173:C:\Program Files\JetBrains\IntelliJ IDEA 2021.3.2\bin" -Dfile.encoding=UTF-8 -classpath "C:\Users\Jbies\OneDrive\Masters\CSE\Q3\Distibuted Sytems\prototype-scala-raft\target\scala-2.13\classes;C:\Users\Jbies\OneDrive\Masters\CSE\Q3\Distibuted Sytems\prototype-scala-raft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\ch\qos\logback\logback-classic\1.2.3\logback-classic-1.2.3.jar;C:\Users\Jbies\OneDrive\Masters\CSE\Q3\Distibuted Sytems\prototype-scala-raft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\ch\qos\logback\logback-core\1.2.3\logback-core-1.2.3.jar;C:\Users\Jbies\OneDrive\Masters\CSE\Q3\Distibuted Sytems\prototype-scala-raft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\com\github\oshi\oshi-core\5.7.5\oshi-core-5.7.5.jar;C:\Users\Jbies\OneDrive\Masters\CSE\Q3\Distibuted Sytems\prototype-scala-raft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\com\typesafe\akka\akka-actor-typed_2.13\2.6.0\akka-actor-typed_2.13-2.6.0.jar;C:\Users\Jbies\OneDrive\Masters\CSE\Q3\Distibuted Sytems\prototype-scala-raft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\com\typesafe\akka\akka-actor_2.13\2.6.0\akka-actor_2.13-2.6.0.jar;C:\Users\Jbies\OneDrive\Masters\CSE\Q3\Distibuted Sytems\prototype-scala-raft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\com\typesafe\akka\akka-slf4j_2.13\2.6.0\akka-slf4j_2.13-2.6.0.jar;C:\Users\Jbies\OneDrive\Masters\CSE\Q3\Distibuted Sytems\prototype-scala-raft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\com\typesafe\config\1.4.1\config-1.4.1.jar;C:\Users\Jbies\OneDrive\Masters\CSE\Q3\Distibuted Sytems\prototype-scala-raft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\io\kamon\kamon-apm-reporter_2.13\2.5.1\kamon-apm-reporter_2.13-2.5.1.jar;C:\Users\Jbies\OneDrive\Masters\CSE\Q3\Distibuted Sytems\prototype-scala-raft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\io\kamon\kamon-bundle_2.13\2.5.1\kamon-bundle_2.13-2.5.1.jar;C:\Users\Jbies\OneDrive\Masters\CSE\Q3\Distibuted Sytems\prototype-scala-raft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\io\kamon\kamon-core_2.13\2.5.1\kamon-core_2.13-2.5.1.jar;C:\Users\Jbies\OneDrive\Masters\CSE\Q3\Distibuted Sytems\prototype-scala-raft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\net\java\dev\jna\jna-platform\5.8.0\jna-platform-5.8.0.jar;C:\Users\Jbies\OneDrive\Masters\CSE\Q3\Distibuted Sytems\prototype-scala-raft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\net\java\dev\jna\jna\5.8.0\jna-5.8.0.jar;C:\Users\Jbies\OneDrive\Masters\CSE\Q3\Distibuted Sytems\prototype-scala-raft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\org\scala-lang\modules\scala-java8-compat_2.13\0.9.0\scala-java8-compat_2.13-0.9.0.jar;C:\Users\Jbies\OneDrive\Masters\CSE\Q3\Distibuted Sytems\prototype-scala-raft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\org\scala-lang\scala-library\2.13.1\scala-library-2.13.1.jar;C:\Users\Jbies\OneDrive\Masters\CSE\Q3\Distibuted Sytems\prototype-scala-raft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\org\scala-lang\scala-reflect\2.13.1\scala-reflect-2.13.1.jar;C:\Users\Jbies\OneDrive\Masters\CSE\Q3\Distibuted Sytems\prototype-scala-raft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\org\scalactic\scalactic_2.13\3.0.8\scalactic_2.13-3.0.8.jar;C:\Users\Jbies\OneDrive\Masters\CSE\Q3\Distibuted Sytems\prototype-scala-raft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\org\slf4j\slf4j-api\1.7.30\slf4j-api-1.7.30.jar" de.maxbundscherer.scala.raft.Main \ No newline at end of file diff --git a/B-Raft/experiments/run_experiments.py b/B-Raft/experiments/run_experiments.py new file mode 100644 index 00000000..a18e9ab8 --- /dev/null +++ b/B-Raft/experiments/run_experiments.py @@ -0,0 +1,138 @@ +import atexit +import itertools +import logging +import os +import subprocess +import time + +import numpy as np +from tqdm import tqdm + +with open("run_main_pepijn.txt", "r") as f: + run_main_command = f.read() + +CURR_POPEN_PROCESS = None + + +def run_experiments( + experiment_config, + exp_dir=None, + max_elections=10, + num_replications=10, + follow_raft_log=True, +): + global CURR_POPEN_PROCESS + if not exp_dir: + exp_dir = time.strftime("%Y%m%d-%H%M%S") + + exp_dir = os.path.join(os.path.abspath(os.getcwd()), exp_dir) + logging.info(f"Creating experiment directory: {exp_dir}") + os.makedirs(exp_dir, exist_ok=True) + + experiments = [] + for key, values in experiment_config.items(): + experiments.append([(key, val) for val in values]) + + all_exps = sorted(list(itertools.product(*experiments)), key=lambda x: -x[1][1]) + + exp_string = "\n".join([str(x) for x in all_exps]) + logging.info("All experiments: \n" + exp_string) + with open(os.path.join(exp_dir, "experiments.txt"), "w+") as f: + f.write(exp_string) + + for run in range(num_replications): + logging.info(f"Replication run {run + 1}/{num_replications}") + for exp_num, value_list in tqdm(enumerate(all_exps)): + logging.info(f"Running exp {exp_num}/{len(all_exps)} with values: {value_list}") + experiment_vars = {} + for (variable, value) in value_list: + experiment_vars[variable] = str(value) + + logfile = os.path.join( + exp_dir, f"run{run + 1}_" + + "_".join([f"{key}={value}" for key, value in experiment_vars.items()]) + + ".log" + ) + # specify logfile + logging.info(f"Setting logfile to {logfile}") + experiment_vars["LOGFILE"] = logfile + + # run experiment + num_elections = 0 + starttime = time.time() + popen = subprocess.Popen( + run_main_command, + stdout=subprocess.PIPE, + env=experiment_vars, + bufsize=10, + stdin=subprocess.PIPE, + ) + + CURR_POPEN_PROCESS = popen + + for stdout_line in iter( + popen.stdout.readline, + "", + ): + stdout_line = str(stdout_line) + + if "[VERIFY APPEND DATA]" in stdout_line: + break + + if "[UNABLE TO VERIFY DUE TO TIMEOUT]" in stdout_line: + break + + if (time.time() - starttime) > 300: + logging.warning(f"Experiment {exp_num} took more than 5 minutes, skipping") + break + + popen.terminate() + logging.info( + f"Experiment {exp_num} finished in {time.time() - starttime :.2f} seconds" + ) + + +def cleanup(): + timeout_sec = 5 + p = CURR_POPEN_PROCESS + if not p: + return + p_sec = 0 + for second in range(timeout_sec): + if p.poll() == None: + time.sleep(1) + p_sec += 1 + if p_sec >= timeout_sec: + p.kill() # supported from python 2.6 + + logging.info("Killed all processes") + + +atexit.register(cleanup) + +if __name__ == "__main__": + # Specify the list of values to use here, see src/main/resources/application.conf for possible variables + # Every combination of the values set here will be run 'num_replications' times + num_replications = 4 + experiment_config = { + "raftType": ["Raft", "BRaft"], + "crashIntervalHeartbeats": [10000], + "nodes": np.linspace(start=3, stop=21, num=9, dtype=int), + } + + logging.getLogger().setLevel("INFO") + + # output experiment directory, replace with name of experiment(group) + experiment_dir = "resources/output/append_data_2" + + # set to true to see Raft logging in console of this process + follow_raft_log = False + + # actually run the experiments + run_experiments( + experiment_config, + exp_dir=experiment_dir, + max_elections=10, + num_replications=num_replications, + follow_raft_log=follow_raft_log, + ) diff --git a/B-Raft/experiments/run_main_pepijn.txt b/B-Raft/experiments/run_main_pepijn.txt new file mode 100644 index 00000000..886a7868 --- /dev/null +++ b/B-Raft/experiments/run_main_pepijn.txt @@ -0,0 +1 @@ +"C:\Program Files\Java\jdk-13.0.1\bin\java.exe" -Dfile.encoding=UTF-8 -classpath C:\Users\ptema\Documents\uni\y4\IN4391\lab\AkkaRaft\target\scala-2.13\classes;C:\Users\ptema\Documents\uni\y4\IN4391\lab\AkkaRaft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\ch\qos\logback\logback-classic\1.2.3\logback-classic-1.2.3.jar;C:\Users\ptema\Documents\uni\y4\IN4391\lab\AkkaRaft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\org\slf4j\slf4j-api\1.7.30\slf4j-api-1.7.30.jar;C:\Users\ptema\Documents\uni\y4\IN4391\lab\AkkaRaft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\org\scalactic\scalactic_2.13\3.0.8\scalactic_2.13-3.0.8.jar;C:\Users\ptema\Documents\uni\y4\IN4391\lab\AkkaRaft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\org\scala-lang\scala-reflect\2.13.1\scala-reflect-2.13.1.jar;C:\Users\ptema\Documents\uni\y4\IN4391\lab\AkkaRaft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\org\scala-lang\scala-library\2.13.1\scala-library-2.13.1.jar;C:\Users\ptema\Documents\uni\y4\IN4391\lab\AkkaRaft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\org\scala-lang\modules\scala-java8-compat_2.13\0.9.0\scala-java8-compat_2.13-0.9.0.jar;C:\Users\ptema\Documents\uni\y4\IN4391\lab\AkkaRaft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\net\java\dev\jna\jna\5.8.0\jna-5.8.0.jar;C:\Users\ptema\Documents\uni\y4\IN4391\lab\AkkaRaft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\net\java\dev\jna\jna-platform\5.8.0\jna-platform-5.8.0.jar;C:\Users\ptema\Documents\uni\y4\IN4391\lab\AkkaRaft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\io\kamon\kamon-core_2.13\2.5.1\kamon-core_2.13-2.5.1.jar;C:\Users\ptema\Documents\uni\y4\IN4391\lab\AkkaRaft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\io\kamon\kamon-bundle_2.13\2.5.1\kamon-bundle_2.13-2.5.1.jar;C:\Users\ptema\Documents\uni\y4\IN4391\lab\AkkaRaft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\io\kamon\kamon-apm-reporter_2.13\2.5.1\kamon-apm-reporter_2.13-2.5.1.jar;C:\Users\ptema\Documents\uni\y4\IN4391\lab\AkkaRaft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\com\typesafe\config\1.4.1\config-1.4.1.jar;C:\Users\ptema\Documents\uni\y4\IN4391\lab\AkkaRaft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\com\typesafe\akka\akka-slf4j_2.13\2.6.0\akka-slf4j_2.13-2.6.0.jar;C:\Users\ptema\Documents\uni\y4\IN4391\lab\AkkaRaft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\com\typesafe\akka\akka-actor_2.13\2.6.0\akka-actor_2.13-2.6.0.jar;C:\Users\ptema\Documents\uni\y4\IN4391\lab\AkkaRaft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\com\typesafe\akka\akka-actor-typed_2.13\2.6.0\akka-actor-typed_2.13-2.6.0.jar;C:\Users\ptema\Documents\uni\y4\IN4391\lab\AkkaRaft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\com\github\oshi\oshi-core\5.7.5\oshi-core-5.7.5.jar;C:\Users\ptema\Documents\uni\y4\IN4391\lab\AkkaRaft\null\Coursier\cache\v1\https\repo1.maven.org\maven2\ch\qos\logback\logback-core\1.2.3\logback-core-1.2.3.jar de.maxbundscherer.scala.raft.Main \ No newline at end of file diff --git a/B-Raft/src/.gitignore b/B-Raft/src/.gitignore new file mode 100644 index 00000000..c80514b0 --- /dev/null +++ b/B-Raft/src/.gitignore @@ -0,0 +1,9 @@ +../target/ +.idea/ +*.class +*.log +.DS_Store +.java-version +.idea +src/null/ +experiments/resources/ diff --git a/B-Raft/src/LICENSE b/B-Raft/src/LICENSE new file mode 100644 index 00000000..261eeb9e --- /dev/null +++ b/B-Raft/src/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/B-Raft/src/README.md b/B-Raft/src/README.md new file mode 100644 index 00000000..82895f6a --- /dev/null +++ b/B-Raft/src/README.md @@ -0,0 +1,402 @@ +# (Prototype) Raft Consensus Algorithm in Scala + +**Protoype [Raft Consensus](https://raft.github.io/raft.pdf) Algorithm in Scala** + +![](src/docImg/logos.png) + +Tested on ``macOs 10.15.2`` with ``openjdk64-11.0.2`` and ``sbt 1.3.3`` + +[![shields.io](http://img.shields.io/badge/license-Apache2-blue.svg)](http://www.apache.org/licenses/LICENSE-2.0.txt) +![](https://github.com/maxbundscherer/prototype-scala-raft/workflows/CI%20Test/badge.svg) + +Test line-coverage: 88,11% ([12-30-2019](src/docImg/test-report-12-30-2019.zip)) + +Author: [Maximilian Bundscherer](https://bundscherer-online.de) + +## Let's get started + +- [sbt](https://www.scala-sbt.org/) and [openjdk64-11.0.2](https://jdk.java.net/archive/) are required to build and run project + +- Run with: ``sbt run`` (see ***What happens in normal run?*** below) +- Test with: ``sbt test`` (or see ci-tests in GitHub-Actions-CI-Pipeline) (see ***What happens in test run?*** below) +- Generate test-coverage-html-report with: ``sbt jacoco`` + +### Used dependencies + +- [akka actors](https://doc.akka.io/docs/akka/current/actors.html): Actor model implementation (Scala/Java). +- [scalactic](http://www.scalactic.org/): Test kit for Scala. +- [sbt-jacoco](https://github.com/sbt/sbt-jacoco): SBT plugin for generating coverage-reports. + +### What is implemented? + +- RaftNode as Finite-state machine (**FSM**) with **key-value storage** + + - ``(Uninitialized)``: Not initialized + - ``Follower`` (Default behavior): Waiting for heartbeats from leader-node with hashCode from data. If local stored data's hashCode is not equal to leader-node data's hashCode, the node will synchronize with leader-node. If there is no heartbeat from leader-node in configured randomized interval received, the node will change to candidate-behavior. + - ``Candidate``: The candidate requests votes from all followers and votes for himself. If he wins the election in configured interval, he will become the leader. If not, he will become follower again. For winning the election the node requires the majority of votes. + - ``Leader``: The leader is sending continuous heartbeats to all followers with hashCode from his stored data. The leader is the only node that is allowed to write data. + - ``(Sleep)``: Is used for simulating leader-crashes (triggered by crashIntervalHeartbeats in normal run or by SimulateLeaderCrash in test run). In this behavior, the node does not respond to non-debug-messages. After configured downtime, the node changes to follower-behavior. + +![](src/docImg/raftFsm.png) + +#### Configuration + +There are two configurations: + +- ``./src/main/resources/application.conf`` used for normal run +- ``./src/test/resources/application.conf`` used for test run + +``` +akka { + + # Log Level (DEBUG, INFO, WARNING, ERROR) + loglevel = "DEBUG" + +} + +raftPrototype { + + # Election Timer Min (Seconds) + electionTimerIntervalMin = 2 + + # Election Timer Max (Seconds) + electionTimerIntervalMax = 3 + + # Heartbeat Timer Interval (Seconds) + heartbeatTimerInterval = 1 + + # Raft Nodes (Amount) + nodes = 5 + + # Crash Interval (auto simulate crash after some heartbeats in LEADER behavior) + crashIntervalHeartbeats = 10 + + # Sleep downtime (Seconds) (after simulated crash in SLEEP behavior) + sleepDowntime = 8 + +} +``` + +### What happens in normal run? + +All nodes start in follower behavior (some of them will change their behavior to candidate) and elect the first leader. + +After some (configured) heartbeats from leader, the leader is simulating its crash and is "sleeping" for configured downtime. The next leader will be elected. + +This happens again and again and again... until you stop the program or the earth is going to overheat. 😉 + +Data exchange (write data trough leader to followers) will be tested in test run (see below). + +### What happens in test run? + +1. Leader election (after init nodes) +2. Write data trough leader to followers (first write data to leader and replicate data to followers) +3. Get back data from all nodes (all nodes should have same data) +4. Simulate leader crash (triggered in test) +5. New leader election (old leader is gone) +6. Write data trough leader to followers (first write data to leader and replicate data to followers) +7. Get back data from all nodes (all nodes should have same data) + + +The ***integration-test*** is well documented - it is self explaining: + +- ``./src/test/scala/de/maxbundscherer/scala/raft/RaftServiceTest.scala`` + +## Exciting (scala) stuff + +Concurrent programming in Scala is usually done with akka actors. Akka actors is an actor model implementation for Scala and Java. Akka is developed/maintained by [Lightbend](https://www.lightbend.com/) (earlier called Typesafe). + +The program and business logic is divided into separated actors. Each of these actors has its own state (own protected memory) and can only communicate with other actors by immutable messages. + +![](src/docImg/ActorModel.png) + +([Image source](https://blog.scottlogic.com/2014/08/15/using-akka-and-scala-to-render-a-mandelbrot-set.html)) + +The ``RaftNodeActor`` has the following state implemented: + +```scala +/** + * Internal (mutable) actor state + * @param neighbours Vector with another actors + * @param electionTimer Cancellable for timer (used in FOLLOWER and CANDIDATE behavior) + * @param heartbeatTimer Cancellable for timer (used in LEADER behavior) + * @param alreadyVoted Boolean (has already voted in FOLLOWER behavior) + * @param voteCounter Int (counter in CANDIDATE behavior) + * @param majority Int (calculated majority - set up in init) + * @param heartbeatCounter Int (auto simulate crash after some heartbeats in LEADER behavior) + * @param data Map (String->String) (used in FOLLOWER and LEADER behavior) + * @param lastHashCode Int (last hashcode from data) (used in FOLLOWER and LEADER behavior) + */ + case class NodeState( + var neighbours : Vector[ActorRef] = Vector.empty, + var electionTimer : Option[Cancellable] = None, + var heartbeatTimer : Option[Cancellable] = None, + var alreadyVoted : Boolean = false, + var voteCounter : Int = 0, + var majority : Int = -1, + var heartbeatCounter : Int = 0, + var data : Map[String, String] = Map.empty, + var lastHashCode : Int = -1, + ) +``` + +### Akka Actors Example + +```scala +package de.maxbundscherer.scala.raft.examples + +import akka.actor.{Actor, ActorLogging} + +class SimpleActor extends Actor with ActorLogging { + + override def receive: Receive = { + + case data: String => + + sender ! data + "-pong" + + case any: Any => + + log.error(s"Got unhandled message '$any'") + + } + +} +``` + +In this example, you can see a very simple akka actor: The actor is waiting for string-messages and replies with a new string (``!`` is used for [fire-and-forget-pattern](https://doc.akka.io/docs/akka/current/typed/interaction-patterns.html#fire-and-forget) / use ``?`` to use [ask-pattern](https://doc.akka.io/docs/akka/current/typed/interaction-patterns.html#request-response-with-ask-from-outside-an-actor) instead). + +Non-string-messages are displayed by an error-logger. + +### Raft nodes as akka actors + +In this project, raft nodes are implemented as an akka actor (``RaftNodeActor``) with finite-state machine (FSM) behavior (see description and image above). + +#### Finite-state machine (FSM) in akka + +You can define multiple behaviors in an akka actor - see example: + +```scala +package de.maxbundscherer.scala.raft.examples + +import akka.actor.{Actor, ActorLogging} + +object SimpleFSMActor { + + //Initialize message/command + case class Initialize(state: Int) + +} + +class SimpleFSMActor extends Actor with ActorLogging { + + import SimpleFSMActor._ + + //Actor mutable state + private var state = -1 + + //Initialized behavior + def initialized: Receive = { + + case any: Any => log.info(s"Got message '$any'") + + } + + //Default behavior + override def receive: Receive = { + + case Initialize(newState) => + + state = newState + context.become(initialized) + + case any: Any => log.error(s"Not initialized '$any'") + + } + +} +``` + +#### Service-Layer + +Classic akka actors are not type safety. To "simulate" type safety, the service-layer (``RaftService``) was implemented. The service-layer is also used to spawn & initialize actors and to supervise the actor system - see examples: + +- Spawn akka actor: +```scala +actorSystem.actorOf(props = RaftNodeActor.props, name = "myRaftNode") +``` + +- Ask (type safety non-blocking request): +```scala +def ping(): Future[Pong] = { + ( actorRef ? Ping() ).asInstanceOf[Future[Pong]] +} +``` + +#### Aggregates + +The object (read-only-singleton) ``RaftAggregate`` includes all necessary classes and objects (actor messages) for ``RaftService``, ``RaftNodeActor`` and ``RaftScheduler``. + +#### Trait ``Configuration`` + +Scala traits are very similar to Java's interfaces. Traits can also include implementation. Normal classes can be extended (inheritance) by multiple traits, but only extend from one abstract class. Traits support multiple inheritance. + +In this project the trait ``Configuration`` with internal object (read-only-singleton) ``Config`` is used to pass user-config to program. + +The user-config is defined in the file ``application.conf`` and is loaded by a config-factory (see project dependencies). + +#### Trait ``RaftScheduler`` + +The trait ``RaftScheduler`` is used to control raft-nodes timers in ``RaftNodeActor`` with the following function-calls: + +- ``def stopElectionTimer()``: Used to stop electionTimer. This timer informs about "heartbeat-timeout" (``SchedulerTrigger.ElectionTimeout``) in FOLLOWER behavior and about "election-timeout" (``SchedulerTrigger.ElectionTimeout``) in CANDIDATE behavior. +- ``def restartElectionTimer()``: Used to stop and start electionTimer. +- ``def stopHeartbeatTimer()``: Used to stop heartbeatTimer. This timer informs about "send-heartbeat to all followers" (``SchedulerTrigger.Heartbeat``) in LEADER behavior. +- ``def restartHeartbeatTimer()``: Used to stop and start heartbeatTimer. +- ``def scheduleAwake()``: Used to trigger awakening automatically after downtime in SLEEP behavior (``SchedulerTrigger.Awake``). Awakening means: The node changes to follower-behavior. + +Timers are controlled by ``changeBehavior`` and ``followerBehavior`` in ``RaftNodeActor`` to stop and start timers dependent on the nodes' behavior: + +```scala +/** + * Before change of behavior + */ +val newBehavior: Receive = toBehavior match { + + [...] + + case BehaviorEnum.FOLLOWER => + restartElectionTimer() + stopHeartbeatTimer() + followerBehavior + + case BehaviorEnum.CANDIDATE => + restartElectionTimer() + stopHeartbeatTimer() + candidateBehavior + + [...] + +} +``` + +```scala +/** + * After change of behavior + */ +toBehavior match { + + [...] + + case BehaviorEnum.SLEEP => scheduleAwake() + + [...] + +} +``` + +```scala +/** + * In followerBehavior + */ +case Heartbeat(lastHashCode) => + + [...] + + restartElectionTimer() +``` + +#### Service Configurator Pattern + +The program architecture is based on the [Service Configurator Pattern](https://www.usenix.org/legacy/publications/library/proceedings/coots97/full_papers/jain/jain.pdf). + +The actor system & the services are started and configured in ... + +- ... object ``Main`` for normal run. +- ... trait ``BaseServiceTest`` for test run. + +## Scala compared to Go + +- Data-types in Scala and Go are strong, static, inferred and structural typed. +- Scala intends to multicore architectures and brings functional programming & object oriented programming together. To improve code quality, you should not mix both concepts. +- Go intends to multicore architectures, too, and is an alternative to the programming language C. +- Learning Scala is time-consuming and sometimes quite involved because of the necessity to be familiar with the concept of functional programming and the huge amounts of complex concepts in the basic-language implemented. +- Learning Go is not so time-consuming, because Go is built on easy & familiar concepts (for example the concept of object oriented programming). +- Scala is usually running in the Java-virtual-machine (JVM) and can interact with Java-libraries. Compiling Scala [native](https://github.com/scala-native/scala-native) is possible, but unusual. +- Go is running native (is not compiled to byte-code) and can interact with C-libraries. + +### Go concurrency + +The language provides multiple possibilities: + +- Concurrent execution ([goroutines](https://golangbot.com/goroutines/)) +- Synchronization and messaging ([channels](https://www.geeksforgeeks.org/channel-in-golang/) - very similar to akka actors - Buffered Channels - FIFO) +- Multi-way concurrent control ([select](https://gobyexample.com/select)) +- Low level blocking primitives ([locks/sync](https://golang.org/pkg/sync/)) + +### Scala concurrency + +In Scala ``ExecutionContext`` (default is ``ExecutionContext.global``) is responsible for executing computations. The default ``ExecutionContext`` is a global static thread pool and is based on [Java's Fork/Join](https://docs.oracle.com/javase/tutorial/essential/concurrency/forkjoin.html). For example, you can set: + +- ``scala.concurrent.context.minThreads`` +- ``scala.concurrent.context.maxThreads`` + +You can also use multiple ``ExecutionContext``s in your application (or server-cluster). + +Concurrent programming in Scala is usually done with akka actors. See "Exciting (scala) stuff" above. + +You can also use: + +- Scala Futures + +```scala +val future = Future { + getData() +} + +future.onComplete { + case Success(data) => println(s"Got $data") + case Failure(exception) => println(s"Got failure $exception") +} +``` + +- Threads and Thread Pools from Java (unusually) + +```scala +// This is unusually. Better use akka actors. + +class ExampleProcessor extends Thread { + override def run() { + while(true) { + val examples = Examples.getExamples() + examples.foreach{ example => + process(example) + } + } + } +} + +//Start new thread +val thread = new ExampleProcessor() +thread.start() + +//Wait for finishing +thread.join() +``` + +### My personal opinion: + +- Scala is more empowering and you need less code. +- Go runs faster and very effective but sometimes feels repetitive and very mechanic. +- Scala is used for high-level cloud-applications (for example [Apache Spark](https://spark.apache.org/)). +- Go is used for low-level applications to make high-level-applications possible (for example [Docker](https://www.docker.com/)). +- **Comparing both languages is quite inconclusive because of their different fields of application.** + +## Prospects + +- This implementation is a prototype and should not be used in production. +- You can use [akka cluster](https://doc.akka.io/docs/akka/current/cluster-usage.html) to run this implementation on network and different machines. You have to modify the ``RaftService`` to spawn actors in cluster. +- Do not use Java serializer in production. It is slow and not secure. Use [Protobuf](https://github.com/protocolbuffers/protobuf) instead. + + +![](src/docImg/logos.png) \ No newline at end of file diff --git a/B-Raft/src/build.sbt b/B-Raft/src/build.sbt new file mode 100644 index 00000000..2cff5a1f --- /dev/null +++ b/B-Raft/src/build.sbt @@ -0,0 +1,25 @@ +name := "prototype-scala-raft" +version := "0.1" +scalaVersion := "2.13.1" + +//Akka Actors +val akkaVersion = "2.6.0" +libraryDependencies += "com.typesafe.akka" %% "akka-actor-typed" % akkaVersion +libraryDependencies += "com.typesafe.akka" %% "akka-testkit" % akkaVersion % Test + +//ScalaTest +val scalaTestVersion = "3.0.8" +libraryDependencies += "org.scalactic" %% "scalactic" % scalaTestVersion +libraryDependencies += "org.scalatest" %% "scalatest" % scalaTestVersion % "test" +logBuffered in Test := false //Disable buffered logs in test +parallelExecution in Test := false //Run suites sequentially + +// Kamon (Akka telemetry) +libraryDependencies += "io.kamon" %% "kamon-bundle" % "2.5.1" +libraryDependencies += "io.kamon" %% "kamon-apm-reporter" % "2.5.1" + +//Config Factory +libraryDependencies += "com.typesafe" % "config" % "1.4.0" + +//Logger +libraryDependencies += "ch.qos.logback" % "logback-classic" % "1.2.3" \ No newline at end of file diff --git a/B-Raft/src/docImg/ActorModel.png b/B-Raft/src/docImg/ActorModel.png new file mode 100644 index 00000000..a089ca5e Binary files /dev/null and b/B-Raft/src/docImg/ActorModel.png differ diff --git a/B-Raft/src/docImg/logos.png b/B-Raft/src/docImg/logos.png new file mode 100644 index 00000000..cd447a5a Binary files /dev/null and b/B-Raft/src/docImg/logos.png differ diff --git a/B-Raft/src/docImg/raftFsm.png b/B-Raft/src/docImg/raftFsm.png new file mode 100644 index 00000000..2640d0e2 Binary files /dev/null and b/B-Raft/src/docImg/raftFsm.png differ diff --git a/B-Raft/src/project/build.properties b/B-Raft/src/project/build.properties new file mode 100644 index 00000000..010613d5 --- /dev/null +++ b/B-Raft/src/project/build.properties @@ -0,0 +1 @@ +sbt.version = 1.3.3 \ No newline at end of file diff --git a/B-Raft/src/project/plugins.sbt b/B-Raft/src/project/plugins.sbt new file mode 100644 index 00000000..731a40b0 --- /dev/null +++ b/B-Raft/src/project/plugins.sbt @@ -0,0 +1 @@ +addSbtPlugin("com.github.sbt" % "sbt-jacoco" % "3.1.0") \ No newline at end of file diff --git a/B-Raft/src/src/main/resources/application.conf b/B-Raft/src/src/main/resources/application.conf new file mode 100644 index 00000000..7dd70db9 --- /dev/null +++ b/B-Raft/src/src/main/resources/application.conf @@ -0,0 +1,50 @@ +akka { + + # Log Level (DEBUG, INFO, WARNING, ERROR) + loglevel = "INFO" +} + +kamon { + environment.service = "Braft" + apm.api-key = "" + apm.api-key = ${?KAMON_API_KEY} + instrumentals.akka.filters { + actors.track { + includes = [ "Braft/raftSystem/**"] + } + + dispatchers { + includes = [ "Braft/**" ] + } + + routers { + includes = [ "Braft/**" ] + } + } +} + +raftPrototype { + raftType="BRaft" + raftType=${?raftType} + + electionTimerIntervalMin=3 + electionTimerIntervalMin=${?electionTimerIntervalMin} + + electionTimerIntervalMax=4 + electionTimerIntervalMax=${?electionTimerIntervalMax} + + heartbeatTimerInterval=1 + heartbeatTimerInterval=${?heartbeatTimerInterval} + + nodes=21 + nodes=${?nodes} + + crashIntervalHeartbeats=1000000 + crashIntervalHeartbeats=${?crashIntervalHeartbeats} + + sleepDowntime=8 + sleepDowntime=${?sleepDowntime} + + maxTerm=9999 + maxterm=${?maxTerm} +} \ No newline at end of file diff --git a/B-Raft/src/src/main/resources/logback.xml b/B-Raft/src/src/main/resources/logback.xml new file mode 100644 index 00000000..2fdf6e9b --- /dev/null +++ b/B-Raft/src/src/main/resources/logback.xml @@ -0,0 +1,22 @@ + + + + + + [%date{ISO8601}]\t[%level]\t[%X{akkaSource}]\t- %msg%n + + + + + ${LOGFILE}} + + [%date{ISO8601}]\t[%level]\t[%X{akkaSource}]\t- %msg%n + + + + + + + + + \ No newline at end of file diff --git a/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/Main.scala b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/Main.scala new file mode 100644 index 00000000..e566f999 --- /dev/null +++ b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/Main.scala @@ -0,0 +1,86 @@ +package de.maxbundscherer.scala.raft + +import akka.actor.ActorSystem +import akka.event.LoggingAdapter +import akka.util.Timeout +import de.maxbundscherer.scala.raft.aggregates.Aggregate.ActualData +import de.maxbundscherer.scala.raft.services.RaftService +import kamon.Kamon +import de.maxbundscherer.scala.raft.utils.Configuration + +import scala.concurrent.ExecutionContextExecutor +import scala.concurrent.duration._ + + +object Main extends App with Configuration { + def checkFinished(data: Vector[ActualData]): Boolean = { + log.info("Checking if data is consistent") + val uniqueHashCodes: Vector[Int] = data.map(_.data.hashCode()).distinct + uniqueHashCodes.size == 1 && data.head.data.size == 4 + } + + import de.maxbundscherer.scala.raft.services._ +// Kamon.init() + private implicit val actorSystem: ActorSystem = ActorSystem("raftSystem") + private implicit val executionContext: ExecutionContextExecutor = actorSystem.dispatcher + private implicit val timeout: Timeout = 15.seconds + private val log: LoggingAdapter = actorSystem.log + log.warning(s"Starting Main with Config:$Config") + + // No nice inheritance here unfortunately (in the interest of time...) + if (Config.raftTypeStr == "Raft") { + val raftService = new RaftService(numberNodes = Config.nodes) + + Thread.sleep(8000) + + raftService.appendData("x", "5") + raftService.appendData("y", "4") + raftService.appendData("z", "3") + raftService.appendData("q", "2") + + Thread.sleep(5000) + try { + var data: Vector[ActualData] = raftService.evaluateActualData + while(!checkFinished(data)) { + Thread.sleep(10000) + data = raftService.evaluateActualData + } + log.info(s"[VERIFY APPEND DATA], Consistent") + } + catch { + case _ => log.info(s"[UNABLE TO VERIFY DUE TO TIMEOUT]") + } + + log.warning("Press [Enter] to terminate actorSystem") +// scala.io.StdIn.readLine() + raftService.terminate() + } else { + val raftService = new BRaftService(numberNodes = Config.nodes) + + Thread.sleep(8000) + + raftService.appendBRaftData("x", "5") + raftService.appendBRaftData("y", "4") + raftService.appendBRaftData("z", "3") + raftService.appendBRaftData("q", "2") + + Thread.sleep(5000) + try { + var data: Vector[ActualData] = raftService.evaluateActualData + while(!checkFinished(data)) { + Thread.sleep(10000) + data = raftService.evaluateActualData + } + log.info(s"[VERIFY APPEND DATA], Consistent") + } + catch { + case _ => log.info(s"[UNABLE TO VERIFY DUE TO TIMEOUT]") + } + + log.warning("Press [Enter] to terminate actorSystem") +// scala.io.StdIn.readLine() + raftService.terminate() + } + Kamon.stop() + System.exit(0) +} \ No newline at end of file diff --git a/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/actors/BRaftNodeActor.scala b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/actors/BRaftNodeActor.scala new file mode 100644 index 00000000..fabf2cb7 --- /dev/null +++ b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/actors/BRaftNodeActor.scala @@ -0,0 +1,824 @@ +package de.maxbundscherer.scala.raft.actors + +import akka.actor.{Actor, ActorLogging, ActorRef} +import de.maxbundscherer.scala.raft.aggregates.Aggregate.BehaviorEnum.BehaviorEnum +import de.maxbundscherer.scala.raft.aggregates.Aggregate._ +import de.maxbundscherer.scala.raft.aggregates.BRaftAggregate.GrantVote.GrantVoteSigned +import de.maxbundscherer.scala.raft.aggregates.BRaftAggregate.LogEntry +import de.maxbundscherer.scala.raft.schnorr.Schnorr.{string_sign, string_verify} +import de.maxbundscherer.scala.raft.utils.{Configuration, Hasher, RaftScheduler} + +import scala.collection.mutable.ArrayBuffer +import scala.concurrent.ExecutionContext + +// TODO verify/sign more (or all?) messages? +object BRaftNodeActor { + + import akka.actor.Props + + val prefix: String = "BraftNodeActor" + + def props()(implicit executionContext: ExecutionContext): Props = Props(new BRaftNodeActor()) + + /** + * Internal (mutable) actor state + * + */ + + /** + * + * @param lastHashCode Int (last hashcode from data) (used in FOLLOWER and LEADER behavior) + * @param entryLog ArrayBuffer[LogEntry] to keep track of all entries, in the order they arrive, and whether they + * are committed yet + * @param appendEntryResponseMap Map (Int ->Set[String]) that links LogEntry indices to a set of nodes that have already + * written that log entry. Used to control when an entry is committed by the node. + * @param publicKey PublicKey used by other nodes to verify messages + * @param privateKey Private Key used by this node to sign messages + * @param hasher Hasher (sha256) + * @param publicKeyStorage Map(String -> BigInt) stores the public keys of all other nodes, used for verifying messages + * were not tampered with. + * @param term current Leader term + * @param byzantineActor ??? TODO + * @param behaviour Current behaviour this node is exhibiting (one of : + * UNINITIALIZED, FOLLOWER, CANDIDATE, LEADER, SLEEP) + * @param forceIamNotConsistent boolean to force an IamNotConsistent RPC to be sent. + * Used when a majority of nodes committed a LogEntry, but this node + * has not received it yet. + * @param voteRequestResponses Map to track the Votes of each node in a Leader election (only for Leader state) + * Maps the node ID to GrantVoteSigned object + */ + case class BRaftNodeState( + var lastHashCode: BigInt = -1, + var entryLog: ArrayBuffer[LogEntry] = ArrayBuffer(), + var appendEntryResponseMap: Map[Int, Set[String]] = Map.empty, + var publicKey: BigInt = -1, + var privateKey: BigInt = -1, + var hasher: Hasher = Hasher(), + var publicKeyStorage: Map[String, BigInt] = Map.empty, + var term: Int = 0, + var byzantineActor: Option[ActorRef] = None, + var behaviour: BehaviorEnum = BehaviorEnum.UNINITIALIZED, + var forceIamNotConsistent: Boolean = false, + var voteRequestResponses: Map[String, GrantVoteSigned] = Map.empty, + var lastHeartBeatTimestamp: Long = 0 + ) extends NodeState + +} + +/** + * ------------------ + * --- BRaft Node ---- + * ------------------ + * + * # 5 Behaviors (Finite-state machine / FSM) + * + * !!! SEE PROJECT README !!! + * + * - (UNINITIALIZED) + * - FOLLOWER (Default - after init) + * - CANDIDATE (after election timeout) + * - LEADER + * - (SLEEP) (after simulated crash in LEADER) + */ +class BRaftNodeActor()(implicit val executionContext: ExecutionContext) + extends Actor + with ActorLogging + with RaftScheduler + with Configuration { + + import BRaftNodeActor._ + import de.maxbundscherer.scala.raft.aggregates.BRaftAggregate._ + + val notHandled = new Object + + val notHandledFun: Any => Object = (_: Any) => notHandled + + override def aroundReceive(receive: Receive, msg: Any): Unit = { + log.info(s"Received message:${msg.getClass.toGenericString},${this.state.behaviour},${this.state.term}") + super.aroundReceive(receive, msg) + } + + /** + * Mutable actor state + */ + override val state: BRaftNodeState = BRaftNodeState() + + log.info("Actor online (uninitialized)") + + /** + * Uninitialized behavior + */ + override def receive: Receive = { + + case InitActor(neighbours, keyPair, clientPublicKey, keys) => + log.info("Initializing actor") + + state.neighbours = neighbours + state.majority = ((neighbours.size + 1) / 2) + 1 + state.privateKey = keyPair._1 + state.publicKey = keyPair._2 + state.publicKeyStorage = keys + state.publicKeyStorage += (CLIENT_NAME -> clientPublicKey) + + log.debug(s"State: ${state.publicKeyStorage.toString()}") + log.debug(s"pubkey: ${state.publicKey}, keypair: ${keyPair}") + + changeBehavior( + fromBehavior = BehaviorEnum.UNINITIALIZED, + toBehavior = BehaviorEnum.FOLLOWER, + loggerMessage = s"Got ${state.neighbours.size} neighbours (majority=${state.majority})" + ) + + case any: Any => log.error(s"Node is not initialized but got message $any") + + } + + def verify_votes(term: Int, voteRequestResponses: Map[String, GrantVoteSigned]): Boolean = { + // For each voteRequestReponse verify signature, and count number of granted votes + val validVotes = voteRequestResponses.filter(entry => { + val node: String = entry._1 + val grantVoteSigned: GrantVoteSigned = entry._2 + string_verify(state.publicKeyStorage(node), grantVoteSigned.vote.toString, grantVoteSigned.signature) + }) + + validVotes.count(entry => entry._2.granted) >= state.majority && term > state.term + } + + /** + * Raft FOLLOWER + */ + def followerBehavior: Receive = { + + case GrantVoteSigned => //Ignore message + + case InitiateLeaderElection => + + changeBehavior(fromBehavior = BehaviorEnum.FOLLOWER, + toBehavior = BehaviorEnum.CANDIDATE, + loggerMessage = "No heartbeat from leader (InitiateLeaderElection case") + + case SchedulerTrigger.ElectionTimeout => + + changeBehavior(fromBehavior = BehaviorEnum.FOLLOWER, + toBehavior = BehaviorEnum.CANDIDATE, + loggerMessage = "No heartbeat from leader, ElectionTimeOut") + + case BroadcastTerm(term, voteRequestResponses) => + if (verify_votes(term, voteRequestResponses)) { + log.info(s"Updated term from: ${state.term}, to new term: ${term}, got enough valid votes") + state.term = term + + restartElectionTimer() + } else { + log.info(s"Node ${sender().path.name} says it is the new leader, but not enough votes are valid or granted, or term was lower") + changeBehavior(fromBehavior = BehaviorEnum.FOLLOWER, + toBehavior = BehaviorEnum.CANDIDATE, + loggerMessage = "Leader did not get enough valid votes") + } + + + case BroadcastKey(actorID, publicKey) => + + state.publicKeyStorage.get(actorID) match { + case Some(_) => // Already in the map + case None => + state.publicKeyStorage += (actorID -> publicKey) // Not in the map yet, therefore add it + log.debug(s"I received a BroadcastKey from: ${sender.path.name} and append it to my storage") + } + + + case SimulateLeaderCrash => sender ! IamNotTheLeader(actorName = self.path.name) + + case WhoIsLeader => sender ! IamNotTheLeader(actorName = self.path.name) + + case _: AppendData => sender ! IamNotTheLeader(actorName = self.path.name) + + case GetActualData => + + log.info(s"Returning actual data: ${state.data}") + sender ! ActualData(data = state.data) + + case GetState => + + log.info("I have sent my state") + sender ! MyStateIs(state) + + case Heartbeat(lastHashCode, publicKeysStorage, term) => + + // TODO : we have to take care of the scenario when a leader crash is initiated and the leader changes behavior from leader + // to sleep. In this period a new leader is elected and the publicKeyStorage and term will be updated accordingly. Only, as + // as the node is asleep it does not receive these updates. Therefore we need to make sure that this node will become eventually + // consistent i.e. has for the current term the right termID and publicKeyStorage to prevent inconsistent states. + // + // Do we want to send the publicKeyStorage everytime in a heartbeat? Does seem a bit cumbersome. + // Alternatively, we can change the behavior for a sleeping node as soon as it wakes up? But it is actually unaware who the + // new leader is therefore we can't ask or send IAmInconsistent to the leader. We have to wait for a heartbeat. + + log.debug(s"Got heartbeat from (${sender().path.name}), own entrylog: ${state.entryLog}") + log.debug(s"Got heartbeat from (${sender().path.name}), own data: ${state.data}") + + // One case in which this is called is when we had a sleeper node that just woken up and is unaware of the new + // term and key storage that were sent by the new leader. + val hashCodeEqual = lastHashCode.equals(state.lastHashCode) + val termEqual = this.state.term == term + val pubKeyStorageEqual = this.state.publicKeyStorage.equals(publicKeysStorage) +// val waitingForAERs = this.state.entryLog.count(logEntry => !logEntry.committed) > 0 + + if (!hashCodeEqual || !pubKeyStorageEqual || this.state.forceIamNotConsistent || !termEqual +// || waitingForAERs + ) { + this.state.forceIamNotConsistent = false + var reason = "" + if (!hashCodeEqual) reason += s"HashCode was not equal (was ${state.lastHashCode} expected $lastHashCode)," + if (!termEqual) reason += s"Term was not equal(was ${term} expected ${this.state.term}), " + if (!pubKeyStorageEqual) reason += s"PubKeyStorage was not equal(was ${this.state.publicKeyStorage.keys} expected ${publicKeysStorage.keys}), " + if (this.state.forceIamNotConsistent) reason += s"Forcing Inconsistent to update values" +// if (waitingForAERs) reason += s"Waiting for Append Entries Responses" + log.info(s"I am not consistent - request data from leader (reason: $reason)") + sender ! IamNotConsistent + } + + restartElectionTimer() + + /** + * AppendEntries happens here, but only to the log because we don't know if enough nodes have written it. + */ + case OverrideData(entryLog: ArrayBuffer[LogEntry], publicKeysStorage, term) => + log.info(s"Received OverrideData: entryLog: $entryLog, term: $term") + this.state.publicKeyStorage = publicKeysStorage + this.state.term = term + + val startIndex: Int = if (state.entryLog.nonEmpty) state.entryLog.length else 0 + // Only replay log if we got new entries + if (entryLog.nonEmpty && startIndex < entryLog.length) { + log.debug(s"Overriding data from index $startIndex, received log len: ${entryLog.length}, " + + s"entries: ${entryLog.toString()} \nslice: ${entryLog.slice(startIndex, entryLog.length)}") + // Replay all entries from latest index we have logged upwards + entryLog.slice(startIndex, entryLog.length).foreach(entry => handleReceivedEntry(entry)) + log.debug(s"finished replaying, own log: ${state.entryLog.toString()}") + log.info(s"Follower is writing data (newHashCode = ${state.entryLog.last.hash})") + } + + case RequestVote(term) => + + val heartBeatPeriodPassed = state.lastHeartBeatTimestamp < (System.currentTimeMillis() - electionTimeout) + log.info(s"Incoming VR from: ${sender.path.name}, state.alreadyVoted should be false," + + s"is ${state.alreadyVoted}, heartBeatPeriodPassed=$heartBeatPeriodPassed +, state.electionTimer.isDefined " + + s"should be true, is ${state.electionTimer.isDefined}, received term is $term, own term is ${state.term}") + + if (!state.alreadyVoted && term > state.term && heartBeatPeriodPassed) { + log.info(s"I voted for actor: ${sender.path.name}") + // We send the along with the signature, also the public key to the leader so that + // he can store all the public keys of all the actors that are currently participating + // in this term. + log.info(s"Granting vote to ${sender().path.name}") + val vote = Vote(granted = true, from = this.self.path.name, to = sender().path.name, term = term) + sender ! GrantVoteSigned(string_sign(this.state.privateKey, vote.toString), granted = true, vote) + state.alreadyVoted = true + } else { + log.info(s"I did not vote granted for actor: ${sender.path.name}") + val vote = Vote(granted = false, from = this.self.path.name, to = sender().path.name, term = state.term + 1) + sender ! GrantVoteSigned(string_sign(this.state.privateKey, vote.toString), granted = false, vote) + } + + case appendEntriesResponse: AppendEntriesResponse => + handleAppendEntriesResponse(appendEntriesResponse, sender().path.name) + + + case any: Any => + + log.warning(s"Got unhandled message in followerBehavior '${any.getClass.getSimpleName}' from (${sender().path.name})") + + } + + /** + * Raft CANDIDATE + */ + def candidateBehavior: Receive = { + + case GetActualData => + + log.info(s"Returning actual data: ${state.data}") + sender ! ActualData(data = state.data) + + case appendEntriesResponse: AppendEntriesResponse => + handleAppendEntriesResponse(appendEntriesResponse, sender().path.name) + + case BroadcastKey(_, _) => // Ignore this case here, a new node can not join during an election + + case GetState => + + log.info("I have sent my state") + sender ! MyStateIs(state) + + case SchedulerTrigger.ElectionTimeout => + + state.term = state.term - 1 + + changeBehavior( + fromBehavior = BehaviorEnum.CANDIDATE, + toBehavior = BehaviorEnum.FOLLOWER, + loggerMessage = s"Not enough votes (${state.voteCounter}/${state.majority})") + + case BroadcastTerm(_, _) => //Ignore + + case Heartbeat(_, _, term) => + + // if (state.term < term) { + // state.term = state.term - 1 + // + // changeBehavior( + // fromBehavior = BehaviorEnum.CANDIDATE, + // toBehavior = BehaviorEnum.FOLLOWER, + // loggerMessage = s"Not enough votes (${state.voteCounter}/${state.majority})") + // } + + case RequestVote => + val vote = Vote(granted = false, from = this.self.path.name, to = sender().path.name, term = this.state.term + 1) + sender ! GrantVoteSigned(string_sign(this.state.privateKey, vote.toString), granted = false, vote) + + case SimulateLeaderCrash => sender ! IamNotTheLeader(actorName = self.path.name) + + case WhoIsLeader => sender ! IamNotTheLeader(actorName = self.path.name) + + case _: AppendData => sender ! IamNotTheLeader(actorName = self.path.name) + + case grantVote: GrantVoteSigned => + if (grantVote.granted && string_verify(state.publicKeyStorage(sender().path.name), grantVote.vote.toString, grantVote.signature)) { + state.voteCounter = state.voteCounter + 1 + + log.debug(s"Got vote ${state.voteCounter}/${state.majority} from (${sender().path.name})") + state.voteRequestResponses += sender().path.name -> grantVote + if (state.voteCounter >= state.majority) { + + log.info(s"The new term is ${state.term}") + + changeBehavior( + fromBehavior = BehaviorEnum.CANDIDATE, + toBehavior = BehaviorEnum.LEADER, + loggerMessage = s"Become leader - enough votes (${state.voteCounter}/${state.majority})" + ) + + // TODO : as the leader is elected we want to broadcast all the publicKeys it has received during the + // leader election of the nodes that have participated. + + + log.info("Became leader, broadcasting term to all neighbours") + state.neighbours.foreach({ neighbour => + neighbour ! BroadcastTerm(state.term, state.voteRequestResponses) + }) + + } + } + + case any: Any => + log.warning(s"Got unhandled message in candidateBehavior '${any.getClass.getSimpleName}' from (${sender().path.name})") + } + + /** + * Raft LEADER + */ + def leaderBehavior: Receive = { + case msg: Any => + leaderMatch(msg) + } + + /** + * Default leader behavior + * + * @param msg received message + */ + def leaderMatch(msg: Any): Unit = msg match { + case BroadcastKey(actorID, publicKey) => + log.info("BroadCastKeys") + state.publicKeyStorage.get(actorID) match { + case Some(_) => // Already in the map + case None => state.publicKeyStorage += (actorID -> publicKey) // Not in the map yet, therefore add it + } + + case BroadcastTerm(term, voteRequestResponses) => + if (verify_votes(term, voteRequestResponses)) { + changeBehavior(fromBehavior = BehaviorEnum.LEADER, + toBehavior = BehaviorEnum.FOLLOWER, + loggerMessage = "there is a new leader with higher term and majority of the votes, stepping down") + } + + case SchedulerTrigger.Heartbeat => + + log.info(s"My state is: ${state.behaviour}, and I am going to send a heartbeat! (counter = ${state.heartbeatCounter})") + + state.neighbours.foreach(neighbour => neighbour ! Heartbeat(lastHashCode = state.lastHashCode, state.publicKeyStorage, state.term)) + + state.heartbeatCounter = state.heartbeatCounter + 1 + + if (state.heartbeatCounter >= Config.crashIntervalHeartbeats) { + changeBehavior( + fromBehavior = BehaviorEnum.LEADER, + toBehavior = BehaviorEnum.SLEEP, + loggerMessage = s"Simulated test crash (crashIntervalHeartbeats) - sleep ${Config.sleepDowntime} seconds now" + ) + } + + case BroadcastTerm => //Ignore + + case GrantVoteSigned => //Ignore message + + case GrantVote => //Ignore message + + case RequestVote => //Ignore message + + case appendEntriesResponse: AppendEntriesResponse => + handleAppendEntriesResponse(appendEntriesResponse, sender().path.name) + + case SimulateLeaderCrash => + + sender ! LeaderIsSimulatingCrash(actorName = self.path.name) + + changeBehavior( + fromBehavior = BehaviorEnum.LEADER, + toBehavior = BehaviorEnum.SLEEP, + loggerMessage = s"Simulated test crash (externalTrigger) - sleep ${Config.sleepDowntime} seconds now" + ) + + case WhoIsLeader => + + sender ! IamTheLeader(actorName = self.path.name) + + case AppendData(key, value, signature) => + log.info(s"Appending new data in Leader Node: ($key -> $value)") + + // Create the hash and find the relevant public key + val newHash = createIncrementalHash(key, value, state.lastHashCode) + val publicKey = state.publicKeyStorage(CLIENT_NAME) + + // Check if the signature is correct with verify + if (!string_verify(publicKey, s"$key,$value", signature)) { + log.warning("Signature from client was invalid. Ignoring this message") + sender ! WriteResponse(actorName = self.path.name, success = false, reason = "Signature from client was Invalid") + } else { + sender ! WriteResponse(actorName = self.path.name, success = true, "Write successful in leader") + handleReceivedEntry(LogEntry(key, value, newHash, signature)) + log.info(s"[SEND APR], ${System.currentTimeMillis()}, (key/value = $key->$value), (newHashCode = ${state.lastHashCode}), Leader is writing data") + + // AppendEntriesResponse: + // TODO what to do here, entry is not committed/replicated yet + // sender ! WriteSuccess(actorName = self.path.name) + } + + case GetActualData => + + log.info(s"Returning actual data: ${state.data}") + sender ! ActualData(data = state.data) + + case GetState => + + log.info("I have sent my state") + sender ! MyStateIs(state) + + case BroadcastKey(actorID, publicKey) => + + // First add the public key of the new node in the map, if it isn't already. Of course + // it is not always the case that an inconsistent node is a new node (such as a sleeper) + + // Check if the key is already in the store, if not we add it and broadcast the new key to all the followers as a heartbeat + + state.publicKeyStorage.get(actorID) match { + case Some(_) => // Hooray we already have this actor in our storage + case None => // This actor is unknown to us as of now, we are broadcasting the new key + + state.publicKeyStorage += (actorID -> publicKey) // Not in the map yet, therefore add it + log.debug(s"I received a BroadcastKey from: ${sender.path.name} and append it to my storage") + + val followers = state.neighbours.filter(neighbour => neighbour != sender) + + followers.foreach(follower => { + follower ! BroadcastKey(actorID, publicKey) + }) + } + + case IamNotConsistent => + // AppendEntries + log.info(s"Leader received IamNotConsistent from ${sender().path.name}") + sender ! OverrideData(data = state.entryLog, publicKeysStorage = this.state.publicKeyStorage, term = this.state.term) + + case Heartbeat(_, _, _) => + if (sender().path.name != this.self.path.name) { + log.debug(s"Received heartbeat from ${sender().path.name}") + } + + case BecomeByzantine => + changeBehavior( + fromBehavior = BehaviorEnum.LEADER, + toBehavior = BehaviorEnum.BYZANTINELEADER, + loggerMessage = s"Received BecomeByzantine RPC, transitioning to Byzantine Leader" + ) + + case any: Any => + log.warning(s"Got unhandled message in leaderBehavior '${any.getClass.getSimpleName}' from (${sender().path.name})") + } + + /** + * Byzantine leader behavior + * + * @param msg received message + */ + def byzantineLeaderMatch(msg: Any): Unit = msg match { + + case AppendData(key, value, signature) => + log.info(s"Appending new data in Byzantine Leader Node: ($key -> $value)") + + // Create the hash and find the relevant public key + val newHash = createIncrementalHash(key, value, state.lastHashCode) + val publicKey = state.publicKeyStorage(CLIENT_NAME) + + handleReceivedEntry(LogEntry(key, value, newHash, signature)) + log.info(s"Byzantine Leader is writing data ($key->$value) (newHashCode = ${newHash})\n " + + s"Waiting for enough nodes to write before committing") + + // AppendEntriesResponse: + // TODO what to do here, entry is not committed/replicated yet + // sender ! WriteSuccess(actorName = self.path.name) + sender ! WriteResponse(actorName = self.path.name, success = true, "Write successful in leader") + + case any => + leaderMatch(any) + } + + /** + * Byzantine Raft LEADER + */ + def byzantineLeaderBehavior: Receive = { + case msg: Any => + log.debug("ByzantineBehavior") + byzantineLeaderMatch(msg) + } + + /** + * Sleep behavior + */ + def sleepBehavior: Receive = { + + case GetActualData => + + log.info(s"Returning actual data: ${state.data}") + sender ! ActualData(data = state.data) + + case SchedulerTrigger.Awake => + + changeBehavior(fromBehavior = BehaviorEnum.SLEEP, + toBehavior = BehaviorEnum.FOLLOWER, + loggerMessage = s"Awake after ${Config.sleepDowntime} seconds downtime") + + case SimulateLeaderCrash => sender ! IamNotTheLeader(actorName = self.path.name) + + case WhoIsLeader => sender ! IamNotTheLeader(actorName = self.path.name) + + case _: AppendData => sender ! IamNotTheLeader(actorName = self.path.name) + + case appendEntriesResponse: AppendEntriesResponse => + handleAppendEntriesResponse(appendEntriesResponse, sender.path.name) + + } + + /** + * Store a newly received entry until we are confident more + * than half the nodes have written it. + */ + private def handleReceivedEntry(logEntry: LogEntry): Unit = { + log.info(s"Entry received: $logEntry") + if (addEntryToLog(logEntry)) { + val idx = state.entryLog.length - 1 + val s = this.state.appendEntryResponseMap.getOrElse(idx, Set.empty) + this.state.appendEntryResponseMap += (idx -> s) + + // Broadcast AppendEntriesResponse to all neighbours + val appendEntriesResponse = AppendEntriesResponse(idx, state.entryLog.last.hash) + appendEntriesResponse.sign(state.privateKey) + + log.debug(s"Signing entry at ${appendEntriesResponse.index} with hash ${appendEntriesResponse.hash}, " + + s"pubkey: ${state.publicKey}, private key: ${state.privateKey}: " + + s"signature: ${appendEntriesResponse.signature}") + + this.state.neighbours.foreach(node => node ! appendEntriesResponse) + } else { + // TODO inconsistent hash, but how to handle? + } + } + + /** + * Handle an AppendEntriesResponse received from another node. This message states + * that the sender has written the entry to their log. + * + * @param appendEntriesResponse Message sent by the node to notify the cluster that it has written an entry + * @param senderName Name of the node that sent this AppendEntriesResponse + */ + private def handleAppendEntriesResponse(appendEntriesResponse: AppendEntriesResponse, senderName: String): Unit = { + // verify signature + val index = appendEntriesResponse.index + val hash = appendEntriesResponse.hash + val signature = appendEntriesResponse.signature + // We only care about this message if + // 1. we have not received the message to write this entry + // 2. or we have not committed this entry ourselves. + if (state.entryLog.length <= index || (!state.entryLog(index).committed)) { + if (string_verify(state.publicKeyStorage(senderName), appendEntriesResponse.toString, signature)) { + + log.debug(s"Signature verified, updating uncommittedEntries at index $index to include $senderName") + var newCommittedNeighboursSet: Set[String] = state.appendEntryResponseMap.getOrElse(index, Set.empty) + newCommittedNeighboursSet += senderName + state.appendEntryResponseMap += (index -> newCommittedNeighboursSet) + + // Check if we need to write an uncommitted entry to data + if (newCommittedNeighboursSet.size >= state.majority && this.state.entryLog.size > index && hash == this.state.entryLog(index).hash) { + // Check if we have 'old' entries that also need to be committed (as they inductively got enough + // append entries responses) + val indicesToCommit = Range(0, index+1).filter(i => state.entryLog.size > i && !state.entryLog(i).committed) + log.info(s"enough nodes committed entry/entries at index $indicesToCommit, I am also committing this entry") + + indicesToCommit.foreach(f = commitEntry) + } + } else { + log.warning(s"Signature from $senderName was invalid, ignoring msg") + log.debug(s"AppendEntriesResponse at $index from $senderName, verifiedMessage = false, " + + s"pubkey of sender:\t ${state.publicKeyStorage(senderName)}, with msg: \t${appendEntriesResponse.toString}, " + + s"signature:\t $signature") + } + } else { + log.debug(s"Entry at $index already committed") + } + } + + /** + * Write a LogEntry from entryLog to state.data. + * Called when a majority of nodes has written an entry + * + * @param entryLogIndex Index of the entry to write. + */ + private def commitEntry(entryLogIndex: Int): Unit = { + if (state.entryLog.length <= entryLogIndex) { + // inconsistent, we don't have access to the leader here so we wait for + // a heartbeat and then respond with IamNotConsistent. + log.warning("Trying to commit log which is not in my entryLog, forcing IamNotConsistent") + this.state.forceIamNotConsistent = true + } else { + val entry = state.entryLog(entryLogIndex) + state.data += (entry.key -> entry.value) + log.info(s"[FOLLOWER WRITING DATA], ${System.currentTimeMillis()}, (data = ${state.data}), (newHashCode = ${state.lastHashCode}), The follower is writing the new data") + entry.committed = true + } + } + + private def clientEntryToString(key: String, value: String) = { + s"$key,$value" + } + + /** + * Write a LogEntry to the entryLog, also check the hash to verify the order. + * + * @param logEntry to write + * @return whether the write was successful + */ + private def addEntryToLog(logEntry: LogEntry): Boolean = { + val (key, value, expectedHash, clientSignature) = (logEntry.key, logEntry.value, logEntry.hash, logEntry.clientSignature) + + // Verify this message actually came from the client + if (string_verify(state.publicKeyStorage(CLIENT_NAME), clientEntryToString(key, value), clientSignature)) { + + log.debug(s"addEntryToLog(key=$key,value=$value), prevhash=${state.lastHashCode}") + val hashCode = createIncrementalHash(key, value, state.lastHashCode) + if (hashCode == expectedHash) { + this.state.lastHashCode = hashCode + state.entryLog.addOne(LogEntry(key, value, hashCode, clientSignature)) + log.debug(s"addEntryToLog finished successfuly, new hash=${state.lastHashCode}") + true + } else { + log.debug(s"Inconsistent hash in addEntryToLog, own computed hash=\n:$hashCode, but received hash was:" + + s"\n:$expectedHash, \n:${hashCode - expectedHash}") + false + } + } else { + log.warning(s"Received LogEntry with invalid client signature (was $clientSignature), discarding") + false + } + } + + /** + * Create the incremental hash used to verify data lineage. + * It's composed of the key and value of the current entry, and the hash of the + * previous entry. This ensures that if two nodes have equal hashes + * at index i, then all entries before index i were equal. + * + * @param key key + * @param value value + * @param lastHash previous incremental hash + * @return hash + */ + private def createIncrementalHash(key: String, value: String, lastHash: BigInt): BigInt = { + val hashstr = s"key=$key value=$value lasthash=$lastHash" + log.debug(s"Hashing: $hashstr") + val hash = this.state.hasher.hash(hashstr) + log.debug(s"create Incremental Hash: $hash") + hash + } + + /** + * Change actor behavior + * + * @param fromBehavior Behavior + * @param toBehavior Behavior + * @param loggerMessage String (logging) + */ + private def changeBehavior(fromBehavior: BehaviorEnum, + toBehavior: BehaviorEnum, + loggerMessage: String): Unit = { + + log.info(s"Change behavior from '$fromBehavior' to '$toBehavior' ($loggerMessage)") + + /** + * Before change of behavior + */ + val newBehavior: Receive = toBehavior match { + + case BehaviorEnum.FOLLOWER => + restartElectionTimer() + stopHeartbeatTimer() + followerBehavior + + case BehaviorEnum.CANDIDATE => + restartElectionTimer() + stopHeartbeatTimer() + candidateBehavior + + case BehaviorEnum.LEADER => + stopElectionTimer() + restartHeartbeatTimer() + leaderBehavior + + case BehaviorEnum.BYZANTINELEADER => + stopElectionTimer() + restartHeartbeatTimer() + byzantineLeaderBehavior + + case BehaviorEnum.SLEEP => + stopElectionTimer() + stopHeartbeatTimer() + sleepBehavior + + case _ => + stopElectionTimer() + stopHeartbeatTimer() + receive + + } + + /** + * Change of behavior + */ + context.become(newBehavior) + + /** + * After change of behavior + */ + toBehavior match { + + case BehaviorEnum.FOLLOWER => + + state.alreadyVoted = false + state.behaviour = BehaviorEnum.FOLLOWER + + case BehaviorEnum.CANDIDATE => + + state.voteCounter = 0 + + // Send a VR to all neighbours and itself with increased termID + // If the candidate becomes a leader the term should increased by one, as the leader election has ended + // Also the followers should be informed about this new term + state.term = state.term + 1 + state.neighbours.foreach(neighbour => neighbour ! RequestVote(state.term)) + val vote = Vote(granted = true, this.self.path.name, this.self.path.name, state.term) + self ! GrantVoteSigned(string_sign(this.state.privateKey, vote.toString), granted = true, vote) + + state.behaviour = BehaviorEnum.CANDIDATE + + case BehaviorEnum.LEADER => + + state.heartbeatCounter = 0 + state.behaviour = BehaviorEnum.LEADER + + case BehaviorEnum.BYZANTINELEADER => + log.info("Becoming Byzantine Leader") + state.heartbeatCounter = 0 + state.behaviour = BehaviorEnum.BYZANTINELEADER + + case BehaviorEnum.SLEEP => + + scheduleAwake() + state.behaviour = BehaviorEnum.SLEEP + + case _ => + + state.behaviour = BehaviorEnum.UNINITIALIZED + } + + } +} \ No newline at end of file diff --git a/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/actors/RaftNodeActor.scala b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/actors/RaftNodeActor.scala new file mode 100644 index 00000000..732ab61e --- /dev/null +++ b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/actors/RaftNodeActor.scala @@ -0,0 +1,345 @@ +package de.maxbundscherer.scala.raft.actors + +import akka.actor.{Actor, ActorLogging} +import de.maxbundscherer.scala.raft.aggregates.Aggregate.BehaviorEnum.BehaviorEnum +import de.maxbundscherer.scala.raft.aggregates.Aggregate._ +import de.maxbundscherer.scala.raft.aggregates.RaftAggregate._ +import de.maxbundscherer.scala.raft.utils.{Configuration, RaftScheduler} + +import scala.concurrent.ExecutionContext + +object RaftNodeActor { + + import akka.actor.Props + + val prefix: String = "raftNodeActor" + def props()(implicit executionContext: ExecutionContext): Props = Props(new RaftNodeActor()) + + /** + * Internal (mutable) actor state + * @param lastHashCode Int (last hashcode from data) (used in FOLLOWER and LEADER behavior) + */ + case class RaftNodeState( + var lastHashCode : Int = -1, + ) extends NodeState +} + +/** + * ------------------ + * --- Raft Node ---- + * ------------------ + * + * # 5 Behaviors (Finite-state machine / FSM) + * + * !!! SEE PROJECT README !!! + * + * - (UNINITIALIZED) + * - FOLLOWER (Default - after init) + * - CANDIDATE (after election timeout) + * - LEADER + * - (SLEEP) (after simulated crash in LEADER) + */ +class RaftNodeActor()(implicit val executionContext: ExecutionContext) + extends Actor + with ActorLogging + with RaftScheduler + with Configuration { + + import RaftNodeActor._ + + + /** + * Mutable actor state + */ + override val state: RaftNodeState = RaftNodeState() + + val notHandled = new Object + + val notHandledFun: Any => Object = (_: Any) => notHandled + + override def aroundReceive(receive: Receive, msg: Any): Unit = { + log.info(s"${System.currentTimeMillis()},Received message:${msg.getClass.toGenericString},,") + super.aroundReceive(receive, msg) + } + + log.info("Actor online (uninitialized)") + + /** + * Change actor behavior + * @param fromBehavior Behavior + * @param toBehavior Behavior + * @param loggerMessage String (logging) + */ + private def changeBehavior(fromBehavior: BehaviorEnum, + toBehavior: BehaviorEnum, + loggerMessage: String): Unit = { + + log.info(s"${System.currentTimeMillis()},Change behavior from '$fromBehavior' to '$toBehavior' ($loggerMessage)") + + /** + * Before change of behavior + */ + val newBehavior: Receive = toBehavior match { + + case BehaviorEnum.FOLLOWER => + restartElectionTimer() + stopHeartbeatTimer() + followerBehavior + + case BehaviorEnum.CANDIDATE => + restartElectionTimer() + stopHeartbeatTimer() + candidateBehavior + + case BehaviorEnum.LEADER => + stopElectionTimer() + restartHeartbeatTimer() + leaderBehavior + + case BehaviorEnum.SLEEP => + stopElectionTimer() + stopHeartbeatTimer() + sleepBehavior + + case _ => + stopElectionTimer() + stopHeartbeatTimer() + receive + + } + + /** + * Change of behavior + */ + context.become(newBehavior) + + /** + * After change of behavior + */ + toBehavior match { + + case BehaviorEnum.FOLLOWER => + + state.alreadyVoted = false + + case BehaviorEnum.CANDIDATE => + + state.voteCounter = 0 + state.neighbours.foreach(neighbour => neighbour ! RequestVote) + self ! GrantVote + + case BehaviorEnum.LEADER => + + state.heartbeatCounter = 0 + + case BehaviorEnum.SLEEP => + + scheduleAwake() + + case _ => + + } + + } + + /** + * Uninitialized behavior + */ + override def receive: Receive = { + + case InitActor(neighbours) => + + state.neighbours = neighbours + state.majority = ( (neighbours.size + 1) / 2 ) + 1 + + changeBehavior( + fromBehavior = BehaviorEnum.UNINITIALIZED, + toBehavior = BehaviorEnum.FOLLOWER, + loggerMessage = s"Got ${state.neighbours.size} neighbours (majority=${state.majority})" + ) + + case _: Any => log.error("Node is not initialized") + + } + + /** + * Raft FOLLOWER + */ + def followerBehavior: Receive = { + + case SchedulerTrigger.ElectionTimeout => + + changeBehavior(fromBehavior = BehaviorEnum.FOLLOWER, + toBehavior = BehaviorEnum.CANDIDATE, + loggerMessage = "No heartbeat from leader") + + case SimulateLeaderCrash => sender ! IamNotTheLeader(actorName = self.path.name) + + case WhoIsLeader => sender ! IamNotTheLeader(actorName = self.path.name) + + case _: AppendData => sender ! IamNotTheLeader(actorName = self.path.name) + + case GetActualData => + + log.info(s"Returning actual data ${state.data}") + sender ! ActualData(data = state.data) + + case Heartbeat(lastHashCode) => + + log.debug(s"Got heartbeat from (${sender().path.name})") + + if(! lastHashCode.equals(state.lastHashCode)) { + + log.info("I am not consistent - request data from leader") + sender ! IamNotConsistent + } + + restartElectionTimer() + + case OverrideData(newData) => + + state.data = newData + state.lastHashCode = state.data.hashCode() + + log.info(s"[FOLLOWER WRITING DATA], ${System.currentTimeMillis()}, (data = ${state.data}), (newHashCode = ${state.lastHashCode}), The follower is writing the new data") + + + //log.info(s"Follower is writing data (newHashCode = ${state.lastHashCode})") + + case RequestVote => + + if(!state.alreadyVoted) { + sender ! GrantVote + state.alreadyVoted = true + } + + case any: Any => + + log.warning(s"Got unhandled message in followerBehavior '${any.getClass.getSimpleName}' from (${sender().path.name})") + + } + + /** + * Raft CANDIDATE + */ + def candidateBehavior: Receive = { + + case SchedulerTrigger.ElectionTimeout => + + changeBehavior( + fromBehavior = BehaviorEnum.CANDIDATE, + toBehavior = BehaviorEnum.FOLLOWER, + loggerMessage = s"Not enough votes (${state.voteCounter}/${state.majority})") + + case _: Heartbeat => //Ignore message + + case RequestVote => //Ignore message + + case SimulateLeaderCrash => sender ! IamNotTheLeader(actorName = self.path.name) + + case WhoIsLeader => sender ! IamNotTheLeader(actorName = self.path.name) + + case _: AppendData => sender ! IamNotTheLeader(actorName = self.path.name) + + case GrantVote => + + state.voteCounter = state.voteCounter + 1 + + log.debug(s"Got vote ${state.voteCounter}/${state.majority} from (${sender().path.name})") + + if (state.voteCounter >= state.majority) { + + changeBehavior( + fromBehavior = BehaviorEnum.CANDIDATE, + toBehavior = BehaviorEnum.LEADER, + loggerMessage = s"Become leader - enough votes (${state.voteCounter}/${state.majority})" + ) + + } + + case any: Any => + + log.warning(s"Got unhandled message in candidateBehavior '${any.getClass.getSimpleName}' from (${sender().path.name})") + + } + + /** + * Raft LEADER + */ + def leaderBehavior: Receive = { + + case SchedulerTrigger.Heartbeat => + + state.neighbours.foreach(neighbour => neighbour ! Heartbeat(lastHashCode = state.lastHashCode)) + + state.heartbeatCounter = state.heartbeatCounter + 1 + + if (state.heartbeatCounter >= Config.crashIntervalHeartbeats) { + changeBehavior( + fromBehavior = BehaviorEnum.LEADER, + toBehavior = BehaviorEnum.SLEEP, + loggerMessage = s"Simulated test crash (crashIntervalHeartbeats) - sleep ${Config.sleepDowntime} seconds now" + ) + } + + case GrantVote => //Ignore message + + case RequestVote => //Ignore message + + case SimulateLeaderCrash => + + sender ! LeaderIsSimulatingCrash(actorName = self.path.name) + + changeBehavior( + fromBehavior = BehaviorEnum.LEADER, + toBehavior = BehaviorEnum.SLEEP, + loggerMessage = s"Simulated test crash (externalTrigger) - sleep ${Config.sleepDowntime} seconds now" + ) + + case WhoIsLeader => + + sender ! IamTheLeader(actorName = self.path.name) + + case AppendData(key, value) => + + state.data = state.data + (key -> value) + state.lastHashCode = state.data.hashCode() + + log.info(s"[SEND APR], ${System.currentTimeMillis()}, (key/value = $key->$value), (newHashCode = ${state.lastHashCode}), Leader is writing data") + + sender ! WriteSuccess(actorName = self.path.name) + + case GetActualData => + log.info(s"Returning actual data ${state.data}") + sender ! ActualData(data = state.data) + + case IamNotConsistent => + + sender ! OverrideData(data = state.data) + + case any: Any => + + log.warning(s"Got unhandled message in leaderBehavior '${any.getClass.getSimpleName}' from (${sender().path.name})") + + } + + /** + * Sleep behavior + */ + def sleepBehavior: Receive = { + + case SchedulerTrigger.Awake => + + changeBehavior(fromBehavior = BehaviorEnum.SLEEP, + toBehavior = BehaviorEnum.FOLLOWER, + loggerMessage = s"Awake after ${Config.sleepDowntime} seconds downtime") + + case SimulateLeaderCrash => sender ! IamNotTheLeader(actorName = self.path.name) + + case WhoIsLeader => sender ! IamNotTheLeader(actorName = self.path.name) + + case _: AppendData => sender ! IamNotTheLeader(actorName = self.path.name) + + } + +} \ No newline at end of file diff --git a/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/aggregates/Aggregate.scala b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/aggregates/Aggregate.scala new file mode 100644 index 00000000..361da38b --- /dev/null +++ b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/aggregates/Aggregate.scala @@ -0,0 +1,61 @@ +package de.maxbundscherer.scala.raft.aggregates + +import akka.actor.Cancellable + + +object Aggregate { + import akka.actor.ActorRef + + trait Request + trait Response + + /** + * Internal (mutable) actor state + * @param neighbours Vector with another actors + * @param electionTimer Cancellable for timer (used in FOLLOWER and CANDIDATE behavior) + * @param heartbeatTimer Cancellable for timer (used in LEADER behavior) + * @param alreadyVoted Boolean (has already voted in FOLLOWER behavior) + * @param voteCounter Int (counter in CANDIDATE behavior) + * @param majority Int (calculated majority - set up in init) + * @param heartbeatCounter Int (auto simulate crash after some heartbeats in LEADER behavior) + * @param data Map (String->String) (used in FOLLOWER and LEADER behavior) + */ + class NodeState( + var neighbours : Vector[ActorRef] = Vector.empty, + var electionTimer : Option[Cancellable] = None, + var heartbeatTimer : Option[Cancellable] = None, + var alreadyVoted : Boolean = false, + var voteCounter : Int = 0, + var majority : Int = -1, + var heartbeatCounter : Int = 0, + var data : Map[String, String] = Map.empty, + ) + + case class GetActualData(data: Map[String, String]) extends Request + case class ActualData(data: Map[String, String]) extends Response + + object WhoIsLeader extends Request + case class IamTheLeader(actorName: String) extends Response + case class IamNotTheLeader(actorName: String) extends Response + + object GetState extends Request + + object SimulateLeaderCrash extends Request + case class LeaderIsSimulatingCrash(actorName: String) extends Response + + object InitiateLeaderElection + extends Response + + //FSM States (RaftNodeActor) + object BehaviorEnum extends Enumeration { + type BehaviorEnum = Value + val UNINITIALIZED, FOLLOWER, CANDIDATE, LEADER, SLEEP, BYZANTINELEADER = Value + } + + //Used by RaftScheduler + object SchedulerTrigger { + object ElectionTimeout + object Heartbeat + object Awake + } +} diff --git a/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/aggregates/BRaftAggregate.scala b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/aggregates/BRaftAggregate.scala new file mode 100644 index 00000000..12610a39 --- /dev/null +++ b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/aggregates/BRaftAggregate.scala @@ -0,0 +1,68 @@ +package de.maxbundscherer.scala.raft.aggregates + +import de.maxbundscherer.scala.raft.actors.BRaftNodeActor.BRaftNodeState +import de.maxbundscherer.scala.raft.aggregates.Aggregate.{Request, Response} +import de.maxbundscherer.scala.raft.aggregates.BRaftAggregate.GrantVote.GrantVoteSigned +import de.maxbundscherer.scala.raft.schnorr.Schnorr.string_sign + +import scala.collection.mutable.ArrayBuffer + +object BRaftAggregate { + import akka.actor.ActorRef + + + case class InitActor(neighbours: Vector[ActorRef], keyPair: (BigInt, BigInt), clientPublicKey: BigInt, keys: Map[String, BigInt]) extends Request + + case class RequestVote(term: Int) extends Request + + object GrantVote { + sealed trait Response + final case class GrantVoteSigned(signature: BigInt, granted: Boolean, vote: Vote) extends Response + } + +// object GrantVote extends Response + case class RejectMessage(reason: String) + case class Heartbeat(lastHashCode: BigInt, publicKeysStorage: Map[String, BigInt], term: Int) extends Request + case class IamNotConsistent() extends Response + + case class OverrideData(data: ArrayBuffer[LogEntry], publicKeysStorage: Map[String, BigInt], term: Int) extends Request + case class AppendEntriesResponse(index: Int, hash: BigInt) { + var signature: BigInt = -1 + override def toString: String = { + s"hash=$hash,index=$index" + } + + def sign(privateKey: BigInt): Unit = { + signature = string_sign(privateKey, this.toString) + } + } + + case class BecomeByzantine() extends Request + + + case class AppendData(key: String, + value: String, + signature: BigInt) extends Request + case class WriteResponse(actorName: String, success: Boolean, reason: String) extends Response + + case class BroadcastKey(actorID: String, publicKeys: BigInt) extends Request + case class BroadcastTerm(term: Int, voteRequestResponses: Map[String, GrantVoteSigned]) extends Request + case class BroadcastVotesToClient() + + case class LogEntry(key: String, value: String, hash: BigInt, clientSignature: BigInt) { + var committed: Boolean = false + override def toString: String = { + s"$key->$value, hash:$hash" + } + } + + case class Vote(granted: Boolean, from: String, to:String, term:Int) { + override def toString: String = { + s"$from $granted vote $to in $term" + } + } + + case class MyStateIs(state: BRaftNodeState) extends Response + + val CLIENT_NAME: String = "client" +} \ No newline at end of file diff --git a/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/aggregates/RaftAggregate.scala b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/aggregates/RaftAggregate.scala new file mode 100644 index 00000000..302c03e4 --- /dev/null +++ b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/aggregates/RaftAggregate.scala @@ -0,0 +1,22 @@ +package de.maxbundscherer.scala.raft.aggregates + +import de.maxbundscherer.scala.raft.aggregates.Aggregate.Request +import de.maxbundscherer.scala.raft.aggregates.Aggregate.Response + +object RaftAggregate { + import akka.actor.ActorRef + + + case class InitActor(neighbours: Vector[ActorRef]) extends Request + + object RequestVote extends Request + object GrantVote extends Response + + case class Heartbeat(lastHashCode: Int) extends Request + object IamNotConsistent extends Response + case class OverrideData(data: Map[String, String]) extends Request + + case class AppendData(key: String, value: String) extends Request + case class WriteSuccess(actorName: String) extends Response + +} \ No newline at end of file diff --git a/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/examples/SimpleActor.scala b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/examples/SimpleActor.scala new file mode 100644 index 00000000..c37c5c3e --- /dev/null +++ b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/examples/SimpleActor.scala @@ -0,0 +1,19 @@ +package de.maxbundscherer.scala.raft.examples + +import akka.actor.{Actor, ActorLogging} + +class SimpleActor extends Actor with ActorLogging { + + override def receive: Receive = { + + case data: String => + + sender ! data + "-pong" + + case any: Any => + + log.error(s"Got unhandled message '$any'") + + } + +} \ No newline at end of file diff --git a/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/examples/SimpleFSMActor.scala b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/examples/SimpleFSMActor.scala new file mode 100644 index 00000000..5e552076 --- /dev/null +++ b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/examples/SimpleFSMActor.scala @@ -0,0 +1,38 @@ +package de.maxbundscherer.scala.raft.examples + +import akka.actor.{Actor, ActorLogging} + +object SimpleFSMActor { + + //Initialize message/command + case class Initialize(state: Int) + +} + +class SimpleFSMActor extends Actor with ActorLogging { + + import SimpleFSMActor._ + + //Actor mutable state + private var state = -1 + + //Initialized behavior + def initialized: Receive = { + + case any: Any => log.info(s"Got message '$any'") + + } + + //Default behavior + override def receive: Receive = { + + case Initialize(newState) => + + state = newState + context.become(initialized) + + case any: Any => log.error(s"Not initialized '$any'") + + } + +} \ No newline at end of file diff --git a/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/schnorr/Schnorr.scala b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/schnorr/Schnorr.scala new file mode 100644 index 00000000..83555889 --- /dev/null +++ b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/schnorr/Schnorr.scala @@ -0,0 +1,103 @@ +package de.maxbundscherer.scala.raft.schnorr + +import de.maxbundscherer.scala.raft.schnorr.SchnorrMath._ +import de.maxbundscherer.scala.raft.schnorr.SchnorrUtil._ +import de.maxbundscherer.scala.raft.utils.Hasher +import org.slf4j.{Logger, LoggerFactory} + +import scala.util.Random + +object Schnorr { + def main(args: Array[String]): Unit = { + test_vectors() + } + + private val hasher: Hasher = Hasher() + val logger: Logger = LoggerFactory.getLogger(this.toString) + + /** + * Generates a public key and private key + * @return the generated keys as pair (sk, pk) + */ + def generateKeypair(): (BigInt, BigInt) = { + val sk = BigInt.probablePrime(bitLength = 32*8, Random) + (sk, generatePublicKey(sk)) + } + + /** + * Generates a public key using a private key + * @param sk as the 32-byte private key + * @return the newly computed 32-byte public key + */ + def generatePublicKey(sk: BigInt): BigInt = { + if (sk == 0 || sk >= N) return null + val p = point_mul(Some(G), sk) + assert(p.isDefined) + p.get.x + } + + /** + * Sign some big integer using a private key and auxiliary data + * @param sk as the 32-byte private key + * @param msg as the 32-byte big integer message + * @param aux as the 32-byte random auxiliary data + * @return the newly computed 64-byte signature as big integer + */ + def sign(sk: BigInt, msg: BigInt, aux: BigInt = r_big()): BigInt = { + if (sk <= 0 || sk >= N) return null + val p: Point = point_mul(Some(G), sk).get + val d = if (even_y(Some(p))) sk else N - sk + val t = d ^ hashtag("BIP0340/aux", big2bytes(aux)) + val k_0 = hashtag("BIP0340/nonce", concat(t, p.x, msg)).mod(N) + if (k_0 <= 0) return null + val r = point_mul(Some(G), k_0).get + val k = if (even_y(Some(r))) k_0 else N - k_0 + val e = hashtag("BIP0340/challenge", concat(r.x, p.x, msg)).mod(N) + val signature = BigInt(1, concat(r.x, compute_sign(k, e, d).mod(N))) + if (!verify(p.x, msg, signature)) return null + signature + } + + /** + * Verify some big integer using a public key and a signature + * @param pk as the 32-byte public key + * @param msg as the 32-byte big integer message + * @param sig as the 64-byte signature to use + * @return true if the verification passed + */ + def verify(pk: BigInt, msg: BigInt, sig: BigInt): Boolean = { + val p = lift(pk) + val s_l = bytes2int(big2bytes(sig, 64).slice(0, 32)) + val s_r = bytes2int(big2bytes(sig, 64).slice(32, 64)) + if (p.isEmpty || s_l - P > 0 || s_r - N > 0) return false + val e = hashtag("BIP0340/challenge", concat(s_l, p.get.x, msg)).mod(N) + val r = point_add(point_mul(Some(G), s_r.mod(N)), point_mul(p, N - e.mod(N))) + if (is_infinite(r) || !even_y(r) || r.get.x - s_l != 0) return false + true + } + + /** + * Sign some string using a private key and auxiliary data + * @param sk as the 32-byte private key + * @param str as the message string with up to 32 chars + * @param aux as the 32-byte random auxiliary data + * @return the newly computed 64-byte signature as big integer + */ + implicit def string_sign(sk: BigInt, str: String, aux: BigInt = r_big()): BigInt = { + sign(sk, hasher.hash(str), aux) + } + + /** + * Verify some string using a public key and a signature + * @param pk as the 32-byte public key + * @param str as the 32-byte big integer message + * @param sig as the 64-byte signature to use + * @return true if the verification passed + */ + implicit def string_verify(pk: BigInt, str: String, sig: BigInt): Boolean = { + val hash = hasher.hash(str) + val res = verify(pk, hash, sig) +// logger.debug(s"String verify '$str' res: $res, hash:$hash, pubkey: $pk") + res + } +} diff --git a/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/schnorr/SchnorrMath.scala b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/schnorr/SchnorrMath.scala new file mode 100644 index 00000000..6735a994 --- /dev/null +++ b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/schnorr/SchnorrMath.scala @@ -0,0 +1,108 @@ +package de.maxbundscherer.scala.raft.schnorr + +import java.nio.charset.Charset +import java.security.MessageDigest +import de.maxbundscherer.scala.raft.schnorr.SchnorrUtil._ + +object SchnorrMath { + var P: BigInt = hex2big("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F") + var N: BigInt = hex2big("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141") + var X: BigInt = hex2big("79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798") + var Y: BigInt = hex2big("483ADA7726A3C4655DA4FBFC0E1108A8FD17B448A68554199C47D08FFB10D4B8") + var G: Point = Point(X, Y) + + /** + * Generate a 32-byte hash with SHA256(SHA256(tag) || SHA256(tag) || msg) + * @param tag to be used to hash with + * @param msg to be encoded + * @return the sha256 digest + */ + def hashtag(tag: String, msg: Array[Byte]): BigInt = { + val sha256 = MessageDigest.getInstance("SHA-256") + val bytes = sha256.digest(tag.getBytes(Charset.forName("UTF-8"))) + BigInt(1, sha256.digest(bytes ++ bytes ++ msg)) + } + + /** + * Computes the signature as a big integer + * @param k The nonce hash + * @param e The hashed message + * @param d The private key + * @return the resulting signature + */ + def compute_sign(k: BigInt, e: BigInt, d: BigInt): BigInt = { + val x1 = (BigInt(1, big2bytes(e)) * BigInt(1, big2bytes(d))).mod(N) + (x1 + BigInt(1, big2bytes(k))).mod(N) + } + + /** + * Determine whether the point lays at infinity + * @param p as the point to evaluate + * @return true if at infinity + */ + def is_infinite(p: Option[Point]): Boolean = p match { + case None ⇒ true + case _ ⇒ false + } + + /** + * Determine whether the y-coordinate is even + * @param p as the point to evaluate + * @return true if y is even + */ + def even_y(p: Option[Point]): Boolean = { + assert(!is_infinite(p)) + (p.get.y % 2) == 0 + } + + /** + * Performs elliptic curve group operation on two points + * @param p1 as the first point + * @param p2 as the second point + * @return the newly computed point + */ + def point_add(p1: Option[Point], p2: Option[Point]): Option[Point] = { + if (p1.isEmpty) return p2 + if (p2.isEmpty) return p1 + val (x1, x2) = (p1.get.x, p2.get.x) + val (y1, y2) = (p1.get.y, p2.get.y) + var lam: BigInt = 0 + if (x1 == x2) { + if (y1 != y2) return None + else lam = ((x1 * x1 * 3) * (y1 * 2).modPow(P - 2, P)).mod(P) + } else { + lam = ((y2 - y1) * (x2 - x1).modPow(P - 2, P)).mod(P) + } + val x3 = (lam * lam - x1 - x2).mod(P) + Some(Point(x3, ((lam * (x1 - x3)).mod(P) - y1).mod(P))) + } + + /** + * Performs elliptic curve point multiplication using some scalar + * @param p the point to multiply + * @param n the scalar as big integer + * @return the resulting point after repeated addition + */ + def point_mul(p: Option[Point], n: BigInt): Option[Point] = { + var q: Option[Point] = p + var r: Option[Point] = None + for (i ← 0 to 256) { + if (((n >> i) & 1) == 1) r = point_add(r, q) + q = point_add(q, q) + } + r + } + + /** + * Finds an even y-coordinate for some x if it exists + * @param x the x-coordinate to consider + * @return the point with even y if found + */ + def lift(x: BigInt): Option[Point] = { + if (x >= P) return None + val sq = (x.modPow(3, P) + 7).mod(P) + val y = sq.modPow(((P + 1) / 4).mod(P), P) // Maybe division needs flooring + if (sq != y.modPow(2, P)) return None + if (y.testBit(0)) Some(Point(x, P - y)) else Some(Point(x, y)) + } + } diff --git a/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/schnorr/SchnorrUtil.scala b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/schnorr/SchnorrUtil.scala new file mode 100644 index 00000000..bdbb00cd --- /dev/null +++ b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/schnorr/SchnorrUtil.scala @@ -0,0 +1,123 @@ +package de.maxbundscherer.scala.raft.schnorr + +import de.maxbundscherer.scala.raft.schnorr.Schnorr._ +import scala.io.Source +import scala.util.Random + +case class Point(x: BigInt, y: BigInt) + + object SchnorrUtil { + /** + * Runs the Schnorr sign and verification functions for some test vectors. + */ + def test_vectors(): Boolean = { + println("Secret Key, Public Key, Auxiliary, Message, Signature, Verification") + val bufferedSource = Source.fromFile(getClass.getClassLoader.getResource("vectors.csv").getPath.replace("%20", " ")) + for (line ← bufferedSource.getLines.drop(1)) { + val cols = line.split(",").map(_.trim) + // index secret_key public_key aux_rand message signature verification result comment + println(s"> SEC ${cols(1)}\n> PUB ${cols(2)}\n> AUX ${cols(3)}\n> MSG ${cols(4)}\n> SIG ${cols(5)}") + var verified = false + val string = "" + if (cols(1) == "") { + verified = verify(hex2big(cols(2)), hex2big(cols(4)), hex2big(cols(5))) + } else { + verified = verify(hex2big(cols(2)), hex2big(cols(4)), hex2big(cols(5))) + } + println(s"Expected ${cols(6).toLowerCase()} and got $verified $string \n") + } + bufferedSource.close + var (count, n) = (0, 10) + for (_ ← count to n) { + val msg = r_big() + val str = Random.nextString(32) + val (sk, pk) = generateKeypair() + if (!verify(pk, msg, sign(sk, msg))) count += 1 + if (!string_verify(pk, str, string_sign(sk, str))) count += 1 + } + println("Successful for " + (2 * n - count) + "/" + (2 * n) + " randomised vector sets") + true + } + + /** + * Generate a random with system time + * @return the random object + */ + def get_seed(): Random = { + new scala.util.Random(new java.util.Date().hashCode) + } + + /** + * Generate a random big integer using system time + * @return the random big integer + */ + def r_big(): BigInt = { + BigInt.probablePrime(bitLength = 32, get_seed()) + } + + /** + * Convert a hex string to a big integer + * @param hex as the hex string + * @return the big integer + */ + def hex2big(hex: String): BigInt = { + BigInt(("00" + hex).sliding(2, 2).toArray.map(Integer.parseInt(_, 16).toByte)) + } + + /** + * Convert a big integer to a hex string + * @param x as the big integer + * @param size as the byte length + * @return + */ + def big2hex(x: BigInt, size: Int = 32): String = { + "0x" + big2bytes(x, size).map("%02x" format _).mkString + } + + /** + * Convert a big integer to an unsigned byte array + * @param x as the big integer + * @param size as the byte length + * @return + */ + def big2bytes(x: BigInt, size: Int = 32): Array[Byte] = { + val bytes = x.toByteArray + if (bytes.length == size) x.toByteArray + else if (bytes.length > size) bytes.slice(bytes.length - size, bytes.length) + else { + val buffer = new Array[Byte](size) + Array.copy(bytes, 0, buffer, buffer.length - bytes.length, bytes.length) + buffer + } + } + + /** + * Convert some big integer to an unsigned big integer + * @param x as the signed big integer + * @return the unsigned big integer + */ + def bytes2int(x: Array[Byte]): BigInt = { + BigInt(1, x) + } + + /** + * Concatenate the unsigned byte arrays of two big integers + * @param x1 the first big integer + * @param x2 the second big integer + * @return + */ + def concat(x1: BigInt, x2: BigInt): Array[Byte] = { + big2bytes(x1) ++ big2bytes(x2) + } + + /** + * Concatenate the unsigned byte arrays of three big integers + * @param x1 the first big integer + * @param x2 the second big integer + * @param x3 the third big integer + * @return + */ + def concat(x1: BigInt, x2: BigInt, x3: BigInt): Array[Byte] = { + big2bytes(x1) ++ big2bytes(x2) ++ big2bytes(x3) + } + } diff --git a/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/services/BRaftService.scala b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/services/BRaftService.scala new file mode 100644 index 00000000..77d55727 --- /dev/null +++ b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/services/BRaftService.scala @@ -0,0 +1,178 @@ +package de.maxbundscherer.scala.raft.services + +import akka.pattern.ask +import akka.actor.{ActorRef, ActorSystem} +import akka.util.Timeout +import de.maxbundscherer.scala.raft.actors.BRaftNodeActor.BRaftNodeState +import de.maxbundscherer.scala.raft.aggregates.Aggregate.{ActualData, GetActualData, GetState, IamNotTheLeader, IamTheLeader, InitiateLeaderElection, LeaderIsSimulatingCrash, SimulateLeaderCrash, WhoIsLeader} +import de.maxbundscherer.scala.raft.schnorr.Schnorr.{generateKeypair, string_sign} + +import scala.concurrent.{ExecutionContext, Future} +import scala.concurrent.Await + +// Messages from client should be signed (Implement in RaftService) +class BRaftService(numberNodes: Int)(implicit actorSystem: ActorSystem, + timeout: Timeout, + executionContext: ExecutionContext) { + + import de.maxbundscherer.scala.raft.actors.BRaftNodeActor + import de.maxbundscherer.scala.raft.aggregates.BRaftAggregate._ + // Protected to give access in tests + private val (privateKey, publicKey): (BigInt, BigInt) = generateKeypair() + + /** + * Declare and start up nodes + */ + final val nodes: Map[Int, ActorRef] = + (0 until numberNodes) + .map(i => { + i -> actorSystem.actorOf(props = BRaftNodeActor.props, + name = s"${BRaftNodeActor.prefix}-$i") + }) + .toMap + + /** + * Init nodes with neighbors and keypair + */ + var keys: Map[(Int, String), (BigInt, BigInt)] = nodes.map(node => ((node._1, node._2.path.name), generateKeypair())) + + var pKeys : Map[String, BigInt] = keys.map(x => (x._1._2, x._2._2)) + + nodes.toList.sortBy(entry => entry._1).zip(keys.toList.sortBy(entry => entry._1._1)).foreach(pair => { + val node = pair._1 + val keypair = pair._2 + print(s"creating node $node with keypair $keypair") + node._2 ! InitActor(nodes.filter(_._1 != node._1).values.toVector, keypair._2, this.publicKey, pKeys) + }) + + /** + * Ask each node: Are you the leader? (Waiting for result - blocking) + * @return Vector with Either [Left = IamTheLeader, Right = IamNotTheLeader] + */ + def evaluateActualLeaders: Vector[Either[IamTheLeader, IamNotTheLeader]] = { + + nodes.map(node => { + + val awaitedResult = Await.result(node._2 ? WhoIsLeader, timeout.duration) + + awaitedResult match { + case msg: IamTheLeader => Left(msg) + case msg: IamNotTheLeader => Right(msg) + } + + }).toVector + + } + + /** + * Return current leader (or first leader if there are multiple...?) + * @return + */ + def getLeader: (Int, ActorRef) = { + nodes.filter(node => { + Await.result(node._2 ? WhoIsLeader, timeout.duration).isInstanceOf[IamTheLeader] + }).head + } + + /** + * Get all the actors in the system. + * @return Vector with all actors in the system + */ + def getNodeStates: Map[Int, BRaftNodeState] = { + + var nodeStates: Map[Int, BRaftNodeState] = Map.empty + + nodes.foreach(node => { + + val awaitedResult = Await.result(node._2 ? GetState, timeout.duration) + + awaitedResult match { + case MyStateIs(state) => nodeStates += (node._1 -> state) + case _ => //Ignore everything else + } + }) + + nodeStates + } + + /** + * Send SimulateLeaderCrash to each node (Leader is confirming - Waiting for result - blocking) + * @return Vector with Either [Left = LeaderIsSimulatingCrash, Right = IamNotTheLeader] + */ + def simulateLeaderCrash(): Vector[Either[LeaderIsSimulatingCrash, IamNotTheLeader]] = { + + nodes.map(node => { + + val awaitedResult = Await.result(node._2 ? SimulateLeaderCrash, timeout.duration) + awaitedResult match { + case msg: LeaderIsSimulatingCrash => Left(msg) + case msg: IamNotTheLeader => Right(msg) + } + + }).toVector + + } + + /** + * Send SimulateLeaderCrash to each node (Leader is confirming - Waiting for result - blocking) + * @return Vector with Either [Left = LeaderIsSimulatingCrash, Right = IamNotTheLeader] + */ + def simulateByzantineActorBecomingLeader(actor: ActorRef): Unit = { + actor ! InitiateLeaderElection + } + + + def appendData(key: String, value: String): Vector[Either[WriteResponse, IamNotTheLeader]] = { + this.appendData(key, value, this.privateKey) + } + + /** + * Append data (only leader is allowed to write data - synchronized by heartbeat from leader with followers - blocking) + * @param key String + * @param value String + * @return Vector with Either [Left = WriteSuccess, Right = IamNotTheLeader] + */ + def appendData(key: String, value: String, ownPrivateKey: BigInt): Vector[Either[WriteResponse, IamNotTheLeader]] = { + val signature = string_sign(ownPrivateKey, s"$key,$value") + + nodes.map(node => { + val awaitedResult = Await.result(node._2 ? AppendData(key = key, value = value, signature=signature), timeout.duration) + awaitedResult match { + case msg: WriteResponse => { + if (!msg.success) { + println(s"Appenddata unsuccesful in leader, reason: ${msg.reason}") + } + Left(msg) + } + case msg: IamNotTheLeader => Right(msg) + // case IamNotConsistent => null // TODO: do not return null + } + }).toVector + } + + def appendBRaftData(key: String, value: String): Unit = { + val currentLeader = getLeader + + currentLeader._2 ! appendData(key, value) + } + + + /** + * Ask each node: Provide your actual data (blocking) + * @return Vector with ActualData + */ + def evaluateActualData: Vector[ActualData] = { + nodes.map(node => { + + Await.result(node._2 ? GetActualData, timeout.duration).asInstanceOf[ActualData] + + }).toVector + + } + + /** + * Terminates actor system + */ + def terminate(): Future[Boolean] = actorSystem.terminate().map(_ => true) + +} diff --git a/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/services/RaftService.scala b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/services/RaftService.scala new file mode 100644 index 00000000..a7255b59 --- /dev/null +++ b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/services/RaftService.scala @@ -0,0 +1,112 @@ +package de.maxbundscherer.scala.raft.services + +import akka.actor.{ActorRef, ActorSystem} +import akka.pattern.ask +import akka.util.Timeout +import de.maxbundscherer.scala.raft.aggregates.Aggregate.{ActualData, GetActualData, IamNotTheLeader, IamTheLeader, LeaderIsSimulatingCrash, SimulateLeaderCrash, WhoIsLeader} + +import scala.concurrent.{Await, ExecutionContext, Future} + +class RaftService(numberNodes: Int)(implicit actorSystem: ActorSystem, + timeout: Timeout, + executionContext: ExecutionContext) { + + import de.maxbundscherer.scala.raft.actors.RaftNodeActor + import de.maxbundscherer.scala.raft.aggregates.RaftAggregate._ + + /** + * Declare and start up nodes + */ + final val nodes: Map[Int, ActorRef] = + (0 until numberNodes) + .map(i => { + i -> actorSystem.actorOf(props = RaftNodeActor.props, + name = s"${RaftNodeActor.prefix}-$i") + }) + .toMap + + /** + * Init nodes (each node with neighbors) + */ + nodes.foreach(node => + node._2 ! InitActor(nodes.filter(_._1 != node._1).values.toVector)) + + /** + * Ask each node: Are you the leader? (Waiting for result - blocking) + * @return Vector with Either [Left = IamTheLeader, Right = IamNotTheLeader] + */ + def evaluateActualLeaders: Vector[Either[IamTheLeader, IamNotTheLeader]] = { + + nodes.map(node => { + + val awaitedResult = Await.result(node._2 ? WhoIsLeader, timeout.duration) + + awaitedResult match { + case msg: IamTheLeader => Left(msg) + case msg: IamNotTheLeader => Right(msg) + } + + }).toVector + + } + + /** + * Send SimulateLeaderCrash to each node (Leader is confirming - Waiting for result - blocking) + * @return Vector with Either [Left = LeaderIsSimulatingCrash, Right = IamNotTheLeader] + */ + def simulateLeaderCrash(): Vector[Either[LeaderIsSimulatingCrash, IamNotTheLeader]] = { + + nodes.map(node => { + + val awaitedResult = Await.result(node._2 ? SimulateLeaderCrash, timeout.duration) + + awaitedResult match { + case msg: LeaderIsSimulatingCrash => Left(msg) + case msg: IamNotTheLeader => Right(msg) + } + + }).toVector + + } + + /** + * Append data (only leader is allowed to write data - synchronized by heartbeat from leader with followers - blocking) + * @param key String + * @param value String + * @return Vector with Either [Left = WriteSuccess, Right = IamNotTheLeader] + */ + def appendData(key: String, value: String): Vector[Either[WriteSuccess, IamNotTheLeader]] = { + + nodes.map(node => { + + val awaitedResult = Await.result(node._2 ? AppendData(key = key, value = value), timeout.duration) + + awaitedResult match { + case msg: WriteSuccess => Left(msg) + case msg: IamNotTheLeader => Right(msg) + } + + }).toVector + + } + + /** + * Ask each node: Provide your actual data (blocking) + * @return Vector with ActualData + */ + def evaluateActualData: Vector[ActualData] = { + + nodes.map(node => { + + Await.result(node._2 ? GetActualData, timeout.duration).asInstanceOf[ActualData] + + }).toVector + + } + + /** + * Terminates actor system + */ + def terminate(): Future[Boolean] = actorSystem.terminate().map(_ => true) + +} diff --git a/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/utils/Configuration.scala b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/utils/Configuration.scala new file mode 100644 index 00000000..85ba89f0 --- /dev/null +++ b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/utils/Configuration.scala @@ -0,0 +1,44 @@ +package de.maxbundscherer.scala.raft.utils + +import de.maxbundscherer.scala.raft.services.{BRaftService, RaftService} + +import akka.japi.Util +import com.typesafe.config.ConfigList + +trait Configuration { + + object Config { + + import com.typesafe.config.ConfigFactory + + private val raftPrototypeConfig = ConfigFactory.load().getConfig("raftPrototype") + + //Election Timer Min (Seconds) + val electionTimerIntervalMin: Int = raftPrototypeConfig.getInt("electionTimerIntervalMin") + + //Election Timer Max (Seconds) + val electionTimerIntervalMax: Int = raftPrototypeConfig.getInt("electionTimerIntervalMax") + + //Heartbeat Timer Interval (Seconds) + val heartbeatTimerInterval: Int = raftPrototypeConfig.getInt("heartbeatTimerInterval") + + //Raft Nodes (Amount) + val nodes : Int = raftPrototypeConfig.getInt("nodes") + + //Crash Interval (auto simulate crash after some heartbeats in LEADER behavior) + val crashIntervalHeartbeats: Int = raftPrototypeConfig.getInt("crashIntervalHeartbeats") + + // Sleep downtime (Seconds) (after simulated crash in SLEEP behavior) + val sleepDowntime: Int = raftPrototypeConfig.getInt("sleepDowntime") + + val raftTypeStr: String = raftPrototypeConfig.getString("raftType") + + val maxTerm: Int = raftPrototypeConfig.getInt("maxTerm") + + override def toString: String = { + s"sleepDowntime=$sleepDowntime,raftTypeStr=$raftTypeStr,maxTerm=$maxTerm" + + s"electionTimerIntervalMin=$electionTimerIntervalMin,electionTimerIntervalMax=$electionTimerIntervalMax," + + s"heartbeatTimerInterval=$heartbeatTimerInterval,nodes=$nodes,crashIntervalHeartbeats=$crashIntervalHeartbeats" + } + } +} \ No newline at end of file diff --git a/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/utils/Hasher.scala b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/utils/Hasher.scala new file mode 100644 index 00000000..2c3a5f44 --- /dev/null +++ b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/utils/Hasher.scala @@ -0,0 +1,26 @@ +package de.maxbundscherer.scala.raft.utils + +import de.maxbundscherer.scala.raft.schnorr.SchnorrUtil.hex2big +import org.slf4j.{Logger, LoggerFactory} + +import java.nio.charset.Charset +import java.security.MessageDigest + +class Hasher { + val UTF_8: Charset = Charset.forName("UTF-8") + val logger: Logger = LoggerFactory.getLogger(this.toString) + + def hash(toHash: String): BigInt = { + val messageDigest: MessageDigest = MessageDigest.getInstance("SHA-256") + val hex: String = messageDigest.digest(toHash.getBytes(UTF_8)).map("%02x".format(_)).mkString + val res = hex2big(hex) +// logger.debug(s"hasher Hashing '$toHash', result: $res") + res + } +} + +object Hasher { + def apply(): Hasher = { + new Hasher() + } +} \ No newline at end of file diff --git a/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/utils/RaftScheduler.scala b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/utils/RaftScheduler.scala new file mode 100644 index 00000000..77914720 --- /dev/null +++ b/B-Raft/src/src/main/scala/de/maxbundscherer/scala/raft/utils/RaftScheduler.scala @@ -0,0 +1,93 @@ +package de.maxbundscherer.scala.raft.utils + +import akka.actor.Actor +import de.maxbundscherer.scala.raft.aggregates.Aggregate.NodeState + +/** + * RaftScheduler + */ +trait RaftScheduler extends Actor with Configuration { + + import de.maxbundscherer.scala.raft.actors.BRaftNodeActor.BRaftNodeState + import de.maxbundscherer.scala.raft.aggregates.Aggregate.SchedulerTrigger + import scala.concurrent.ExecutionContext + import scala.concurrent.duration._ + + /* + * Set up by actor + */ + val state: NodeState + implicit val executionContext: ExecutionContext + + //Set electionTimeout randomized in [electionTimerIntervalMin, electionTimerIntervalMax] + def electionTimeout : Int = Config.electionTimerIntervalMin * 1000 + scala.util.Random.nextInt(Config.electionTimerIntervalMax * 1000) + + //Set heartbeat to fixed interval + private val heartbeatTimeout: Int = Config.heartbeatTimerInterval * 1000 + + /** + * Stop electionTimer + */ + def stopElectionTimer(): Unit = { + + if (state.electionTimer.isDefined) { + state.electionTimer.get.cancel() + state.electionTimer = None + } + + } + + /** + * Start electionTimer (if already running = stop timer) + */ + def restartElectionTimer(): Unit = { + + stopElectionTimer() + + state.electionTimer = Some( + context.system.scheduler.scheduleWithFixedDelay( + initialDelay = electionTimeout.millis, + delay = electionTimeout.millis, + receiver = self, + message = SchedulerTrigger.ElectionTimeout + )) + + } + + /** + * Stop heartbeatTimer + */ + def stopHeartbeatTimer(): Unit = { + + if (state.heartbeatTimer.isDefined) { + state.heartbeatTimer.get.cancel() + state.heartbeatTimer = None + } + + } + + /** + * Start heartbeatTimer (if already running = stop timer) + */ + def restartHeartbeatTimer(): Unit = { + + stopHeartbeatTimer() + + state.heartbeatTimer = Some( + context.system.scheduler.scheduleWithFixedDelay( + initialDelay = heartbeatTimeout.millis, + delay = heartbeatTimeout.millis, + receiver = self, + message = SchedulerTrigger.Heartbeat + )) + } + + /** + * Start heartbeatTimer (if already running = stop timer) + */ + def scheduleAwake(): Unit = + context.system.scheduler.scheduleOnce(delay = Config.sleepDowntime.seconds, + receiver = self, + message = SchedulerTrigger.Awake) + +} diff --git a/B-Raft/src/src/test/resources/application.conf b/B-Raft/src/src/test/resources/application.conf new file mode 100644 index 00000000..9bbc307a --- /dev/null +++ b/B-Raft/src/src/test/resources/application.conf @@ -0,0 +1,29 @@ +akka { + + # Log Level (DEBUG, INFO, WARNING, ERROR) + loglevel = "INFO" + +} + +raftPrototype { + + # Election Timer Min (Seconds) + electionTimerIntervalMin = 2 + + # Election Timer Max (Seconds) + electionTimerIntervalMax = 3 + + # Heartbeat Timer Interval (Seconds) + heartbeatTimerInterval = 2 + + # Raft Nodes (Amount) + nodes = 5 + + # Crash Interval (auto simulate crash after some heartbeats in LEADER behavior) + # Set to high number in test (ignore crash interval in testMode) + crashIntervalHeartbeats = 9999999 + + # Sleep downtime (Seconds) (after simulated crash in SLEEP behavior) + sleepDowntime = 8 + +} \ No newline at end of file diff --git a/B-Raft/src/src/test/scala/de/maxbundscherer/scala/raft/BRaft/BRaftServiceTest.scala b/B-Raft/src/src/test/scala/de/maxbundscherer/scala/raft/BRaft/BRaftServiceTest.scala new file mode 100644 index 00000000..b0dbcc91 --- /dev/null +++ b/B-Raft/src/src/test/scala/de/maxbundscherer/scala/raft/BRaft/BRaftServiceTest.scala @@ -0,0 +1,146 @@ +package de.maxbundscherer.scala.raft.BRaft + +import de.maxbundscherer.scala.raft.BRaft.utils.BaseServiceTest +import de.maxbundscherer.scala.raft.aggregates.Aggregate._ +import de.maxbundscherer.scala.raft.utils.Configuration + +class BRaftServiceTest extends BaseServiceTest with Configuration { + + import de.maxbundscherer.scala.raft.aggregates.BRaftAggregate._ + + "RaftService" should { + + var temporaryFirstLeaderName: String = "" + var temporaryData: Map[String, String] = Map.empty + var lastHash: BigInt = -1 + + "elect only one leader" in { + + freezeTest(seconds = Config.nodes * Config.electionTimerIntervalMax, loggerMessage = "Waiting for first leader election") + + val data: Vector[Either[IamTheLeader, IamNotTheLeader]] = raftService.evaluateActualLeaders + + val localLeaderName = data.filter(_.isLeft).head match { + case Left(left) => left.actorName + case _ => "" + } + + temporaryFirstLeaderName = localLeaderName + + data.count(_.isLeft) shouldBe 1 //Only on leader + data.count(_.isRight) shouldBe ( Config.nodes - 1 ) //Other nodes shouldBe follower + } + + "append data in leader node" in { + println("append data in leader node") + val newDataKey = "key1" + val newDataValue = "val1" + + temporaryData = temporaryData + (newDataKey->newDataValue) + + val data: Vector[Either[WriteResponse, IamNotTheLeader]] = raftService.appendData(key = newDataKey, value = newDataValue) + + val localLeaderName = data.filter(_.isLeft).head match { + case Left(left) => left.actorName + case _ => "" + } + + localLeaderName shouldBe temporaryFirstLeaderName + data.count(_.isLeft) shouldBe 1 //Only on leader + data.count(_.isRight) shouldBe ( Config.nodes - 1 ) //Other nodes shouldBe follower + } + + "has all nodes synchronized with new data" in { + + freezeTest(seconds = Config.nodes * Config.heartbeatTimerInterval * 2, loggerMessage = "Waiting for sync data") + + val data: Vector[ActualData] = raftService.evaluateActualData + log.info(s"Data: $data") + val uniqueHashCodes: Vector[Int] = data.map(_.data.hashCode()).distinct + + uniqueHashCodes.size shouldBe 1 + uniqueHashCodes.head shouldBe temporaryData.hashCode() + } + + "reject invalid client appendData message" in { + val newDataKey = "key" + val newDataValue = "key" + + val data: Vector[Either[WriteResponse, IamNotTheLeader]] = raftService.appendData(key = newDataKey, value = newDataValue, BigInt(101001)) + data.count(_.isLeft) shouldBe 1 //Only on leader + data.count(_.isRight) shouldBe ( Config.nodes - 1 ) //Other nodes shouldBe follower + data.filter(_.isLeft).head match { + case Left(left) => left.success shouldBe false + case _ => fail() + } + } + + "simulate leader crash" in { + + val data: Vector[Either[LeaderIsSimulatingCrash, IamNotTheLeader]] = raftService.simulateLeaderCrash() + + val localLeaderName = data.filter(_.isLeft).head match { + case Left(left) => left.actorName + case _ => "" + } + + localLeaderName shouldBe temporaryFirstLeaderName + data.count(_.isLeft) shouldBe 1 //Only one leader + data.count(_.isRight) shouldBe ( Config.nodes - 1 ) //Other nodes shouldBe follower + } + + + "elect new leader after leader crash" in { + + freezeTest(seconds = Config.nodes * Config.electionTimerIntervalMax, loggerMessage = "Waiting for second leader election") + + val data: Vector[Either[IamTheLeader, IamNotTheLeader]] = raftService.evaluateActualLeaders + + val localLeaderName = data.filter(_.isLeft).head match { + case Left(left) => left.actorName + case _ => "" + } + + localLeaderName should not be temporaryFirstLeaderName + data.count(_.isLeft) shouldBe 1 //Only on leader + data.count(_.isRight) shouldBe ( Config.nodes - 1 ) //Other nodes shouldBe follower + } + + "append new data in leader node" in { + println("append new data in leader node") + val newDataKey = "key2" + val newDataValue = "val2" + + temporaryData = temporaryData + (newDataKey->newDataValue) + val data: Vector[Either[WriteResponse, IamNotTheLeader]] = raftService.appendData(key = newDataKey, value = newDataValue) + + val localLeaderName = data.filter(_.isLeft).head match { + case Left(left) => left.actorName + case _ => "" + } + + localLeaderName should not be temporaryFirstLeaderName + data.count(_.isLeft) shouldBe 1 //Only on leader + data.count(_.isRight) shouldBe ( Config.nodes - 1 ) //Other nodes shouldBe follower + } + + "has all nodes synchronized with new data again" in { + + freezeTest(seconds = Config.nodes * Config.heartbeatTimerInterval, loggerMessage = "Waiting for sync data") + + val data: Vector[ActualData] = raftService.evaluateActualData + + val uniqueHashCodes: Vector[Int] = data.map(_.data.hashCode()).distinct + + uniqueHashCodes.size shouldBe 1 + uniqueHashCodes.head shouldBe temporaryData.hashCode() + } + + "terminate actor system" in { + + raftService.terminate().map(response => response shouldBe true) + } + + } + +} \ No newline at end of file diff --git a/B-Raft/src/src/test/scala/de/maxbundscherer/scala/raft/BRaft/BroadcastTest.scala b/B-Raft/src/src/test/scala/de/maxbundscherer/scala/raft/BRaft/BroadcastTest.scala new file mode 100644 index 00000000..3d36fcc7 --- /dev/null +++ b/B-Raft/src/src/test/scala/de/maxbundscherer/scala/raft/BRaft/BroadcastTest.scala @@ -0,0 +1,49 @@ +package de.maxbundscherer.scala.raft.BRaft + +import de.maxbundscherer.scala.raft.BRaft.utils.BaseServiceTest +import de.maxbundscherer.scala.raft.actors.BRaftNodeActor.BRaftNodeState +import de.maxbundscherer.scala.raft.utils.Configuration + +class BroadcastTest extends BaseServiceTest with Configuration { + + "Broadcast" should { + + "terms after after one election should be the same" in { + + Thread.sleep(3000) + + val nodeStates : Map[Int, BRaftNodeState] = raftService.getNodeStates + + val terms : Iterable[Int] = nodeStates.map(x => x._2.term) + + terms.forall(_ == terms.head) shouldBe(true) + } + +// "elect new leader after leader crash" in { +// +// Thread.sleep(3000) +// +// +// val data: Vector[Either[IamTheLeader, IamNotTheLeader]] = raftService.evaluateActualLeaders +// +// val localLeaderName = data.filter(_.isLeft).head match { +// case Left(left) => left.actorName +// case _ => "" +// } +// +// data.count(_.isLeft) shouldBe 1 //Only on leader +// data.count(_.isRight) shouldBe ( Config.nodes - 1 ) //Other nodes shouldBe follower +// +// val nodeStates : Map[Int, NodeState] = raftService.getNodeStates +// +// val terms : Iterable[Int] = nodeStates.map(x => x._2.term) +// +// terms.toList.distinct.length shouldBe(1) +// } + + "terminate actor system" in { + + raftService.terminate().map(response => response shouldBe true) + } + } +} \ No newline at end of file diff --git a/B-Raft/src/src/test/scala/de/maxbundscherer/scala/raft/BRaft/ByzantineLeaderTest.scala b/B-Raft/src/src/test/scala/de/maxbundscherer/scala/raft/BRaft/ByzantineLeaderTest.scala new file mode 100644 index 00000000..cf09d8e7 --- /dev/null +++ b/B-Raft/src/src/test/scala/de/maxbundscherer/scala/raft/BRaft/ByzantineLeaderTest.scala @@ -0,0 +1,35 @@ +package de.maxbundscherer.scala.raft.BRaft + +import de.maxbundscherer.scala.raft.BRaft.utils.BaseServiceTest +import de.maxbundscherer.scala.raft.aggregates.Aggregate.{BehaviorEnum, IamNotTheLeader} +import de.maxbundscherer.scala.raft.aggregates.BRaftAggregate.{BecomeByzantine, WriteResponse} +import de.maxbundscherer.scala.raft.utils.Configuration + +class ByzantineLeaderTest extends BaseServiceTest with Configuration { + // Test Byzantine Behavior + freezeTest(10, "Waiting for startup") + "start byzantine behavior" in { + val (leaderID, leaderNodeRef) = raftService.getLeader + leaderNodeRef ! BecomeByzantine + Thread.sleep(3000) + val leaderState = raftService.getNodeStates.filter(tuple => { + tuple._1 == leaderID + }).head._2 + + val leaders = raftService.getNodeStates.filter(tuple => { + tuple._2.behaviour == BehaviorEnum.BYZANTINELEADER || tuple._2.behaviour == BehaviorEnum.LEADER + }).keys + + log.info(s"leaders: $leaders") + Thread.sleep(8000) + leaders.size shouldBe 1 + leaderState.behaviour shouldBe BehaviorEnum.BYZANTINELEADER + } + + "byzantine leader adds entry to log by itself (not received from client)" in { + // AppendData, but don't sign message + val data: Vector[Either[WriteResponse, IamNotTheLeader]] = raftService.appendData("testkey", "testvalue", 999999999) + Thread.sleep(3000) + data.size shouldBe(5) + } +} diff --git a/B-Raft/src/src/test/scala/de/maxbundscherer/scala/raft/BRaft/FaultyLeaderElection.scala b/B-Raft/src/src/test/scala/de/maxbundscherer/scala/raft/BRaft/FaultyLeaderElection.scala new file mode 100644 index 00000000..f7647b00 --- /dev/null +++ b/B-Raft/src/src/test/scala/de/maxbundscherer/scala/raft/BRaft/FaultyLeaderElection.scala @@ -0,0 +1,93 @@ +package de.maxbundscherer.scala.raft.BRaft + +import akka.actor.ActorRef +import de.maxbundscherer.scala.raft.BRaft.utils.BaseServiceTest +import de.maxbundscherer.scala.raft.actors.BRaftNodeActor.BRaftNodeState +import de.maxbundscherer.scala.raft.aggregates.Aggregate._ +import de.maxbundscherer.scala.raft.utils.Configuration + +class FaultyLeaderElection extends BaseServiceTest with Configuration { + + "Election" should { + + var temporaryFirstLeaderName: String = "" + + "elect only one leader" in { + + freezeTest(seconds = Config.nodes * Config.electionTimerIntervalMax, loggerMessage = "Waiting for first leader election") + + val data: Vector[Either[IamTheLeader, IamNotTheLeader]] = raftService.evaluateActualLeaders + + val localLeaderName = data.filter(_.isLeft).head match { + case Left(left) => left.actorName + case _ => "" + } + + temporaryFirstLeaderName = localLeaderName + + data.count(_.isLeft) shouldBe 1 //Only on leader + data.count(_.isRight) shouldBe ( Config.nodes - 1 ) //Other nodes shouldBe follower + } + + "has node become candidate" in { + + val nodeStates : Map[Int, BRaftNodeState] = raftService.getNodeStates + + // Every node except the leader + val nodeFollowerStates = nodeStates.filter(x => x._2.behaviour != BehaviorEnum.LEADER) + + val (nodeActorID, _) : (Int, BRaftNodeState) = nodeFollowerStates.head + + val actor : Option[ActorRef] = raftService.nodes.get(nodeActorID) + + actor match { + case Some(actor) => actor ! InitiateLeaderElection + } + + + val nodeStates_after : Map[Int, BRaftNodeState] = raftService.getNodeStates + + val state = nodeStates_after.get(nodeActorID) + + state match { + case Some(state) => state.behaviour.shouldBe(BehaviorEnum.CANDIDATE) + } + } + + "simulate leader crash" in { + + val data: Vector[Either[LeaderIsSimulatingCrash, IamNotTheLeader]] = raftService.simulateLeaderCrash() + + val localLeaderName = data.filter(_.isLeft).head match { + case Left(left) => left.actorName + case _ => "" + } + + localLeaderName shouldBe temporaryFirstLeaderName + data.count(_.isLeft) shouldBe 1 //Only on leader + data.count(_.isRight) shouldBe ( Config.nodes - 1 ) //Other nodes shouldBe follower + } + + + "elect new leader after leader crash" in { + + freezeTest(seconds = Config.nodes * Config.electionTimerIntervalMax, loggerMessage = "Waiting for second leader election") + + val data: Vector[Either[IamTheLeader, IamNotTheLeader]] = raftService.evaluateActualLeaders + + val localLeaderName = data.filter(_.isLeft).head match { + case Left(left) => left.actorName + case _ => "" + } + + localLeaderName should not be temporaryFirstLeaderName + data.count(_.isLeft) shouldBe 1 //Only on leader + data.count(_.isRight) shouldBe ( Config.nodes - 1 ) //Other nodes shouldBe follower + } + + "terminate actor system" in { + + raftService.terminate().map(response => response shouldBe true) + } + } +} diff --git a/B-Raft/src/src/test/scala/de/maxbundscherer/scala/raft/BRaft/utils/BaseServiceTest.scala b/B-Raft/src/src/test/scala/de/maxbundscherer/scala/raft/BRaft/utils/BaseServiceTest.scala new file mode 100644 index 00000000..0aa61b0e --- /dev/null +++ b/B-Raft/src/src/test/scala/de/maxbundscherer/scala/raft/BRaft/utils/BaseServiceTest.scala @@ -0,0 +1,40 @@ +package de.maxbundscherer.scala.raft.BRaft.utils + +import de.maxbundscherer.scala.raft.services.{BRaftService, RaftService} +import akka.event.LoggingAdapter +import de.maxbundscherer.scala.raft.utils.Configuration +import org.scalatest.{AsyncWordSpec, Matchers} + +object BaseServiceTest extends Configuration { + + import akka.actor.ActorSystem + import akka.util.Timeout + import scala.concurrent.ExecutionContextExecutor + import scala.concurrent.duration._ + + private implicit val actorSystem: ActorSystem = ActorSystem("testSystem") + private implicit val executionContext: ExecutionContextExecutor = actorSystem.dispatcher + private implicit val timeout: Timeout = 15.seconds + private lazy val log: LoggingAdapter = actorSystem.log + + private lazy val raftService = new BRaftService(numberNodes = Config.nodes) + +} + +trait BaseServiceTest extends AsyncWordSpec with Matchers { + + val log: LoggingAdapter = BaseServiceTest.log + val raftService: BRaftService = BaseServiceTest.raftService + + /** + * Freeze test (actorSystem is still working) + * + * @param seconds Int + * @param loggerMessage String + */ + def freezeTest(seconds: Int, loggerMessage: String): Unit = { + log.warning(s"Test is in sleepMode for $seconds seconds ($loggerMessage)") + Thread.sleep(seconds * 1000) + log.warning(s"Test continues") + } +} \ No newline at end of file diff --git a/B-Raft/src/src/test/scala/de/maxbundscherer/scala/raft/Raft/RaftServiceTest.scala b/B-Raft/src/src/test/scala/de/maxbundscherer/scala/raft/Raft/RaftServiceTest.scala new file mode 100644 index 00000000..f1319694 --- /dev/null +++ b/B-Raft/src/src/test/scala/de/maxbundscherer/scala/raft/Raft/RaftServiceTest.scala @@ -0,0 +1,146 @@ +package de.maxbundscherer.scala.raft.Raft + +import de.maxbundscherer.scala.raft.Raft.utils.BaseServiceTest +import de.maxbundscherer.scala.raft.aggregates.Aggregate._ +import de.maxbundscherer.scala.raft.utils.Configuration + +class RaftServiceTest extends BaseServiceTest with Configuration { + + import de.maxbundscherer.scala.raft.aggregates.RaftAggregate._ + + /** + * Freeze test (actorSystem is still working) + * @param seconds Int + * @param loggerMessage String + */ + private def freezeTest(seconds: Int, loggerMessage: String): Unit = { + + log.warning(s"Test is in sleepMode for $seconds seconds ($loggerMessage)") + Thread.sleep(seconds * 1000) + log.warning(s"Test continues") + + } + + "RaftService" should { + + var temporaryFirstLeaderName: String = "" + var temporaryData: Map[String, String] = Map.empty + + "elect only one leader" in { + + freezeTest(seconds = Config.nodes * Config.electionTimerIntervalMax, loggerMessage = "Waiting for first leader election") + + val data: Vector[Either[IamTheLeader, IamNotTheLeader]] = raftService.evaluateActualLeaders + + val localLeaderName = data.filter(_.isLeft).head match { + case Left(left) => left.actorName + case _ => "" + } + + temporaryFirstLeaderName = localLeaderName + + data.count(_.isLeft) shouldBe 1 //Only on leader + data.count(_.isRight) shouldBe ( Config.nodes - 1 ) //Other nodes shouldBe follower + } + + "append data in leader node" in { + + val newDataKey = "key1" + val newDataValue = "val1" + + temporaryData = temporaryData + (newDataKey->newDataValue) + + val data: Vector[Either[WriteSuccess, IamNotTheLeader]] = raftService.appendData(key = newDataKey, value = newDataValue) + + val localLeaderName = data.filter(_.isLeft).head match { + case Left(left) => left.actorName + case _ => "" + } + + localLeaderName shouldBe temporaryFirstLeaderName + data.count(_.isLeft) shouldBe 1 //Only on leader + data.count(_.isRight) shouldBe ( Config.nodes - 1 ) //Other nodes shouldBe follower + } + + "has all nodes synchronized with new data" in { + + freezeTest(seconds = Config.nodes * Config.heartbeatTimerInterval, loggerMessage = "Waiting for sync data") + + val data: Vector[ActualData] = raftService.evaluateActualData + + val uniqueHashCodes: Vector[Int] = data.map(_.data.hashCode()).distinct + + uniqueHashCodes.size shouldBe 1 + uniqueHashCodes.head shouldBe temporaryData.hashCode() + } + + "simulate leader crash" in { + + val data: Vector[Either[LeaderIsSimulatingCrash, IamNotTheLeader]] = raftService.simulateLeaderCrash() + + val localLeaderName = data.filter(_.isLeft).head match { + case Left(left) => left.actorName + case _ => "" + } + + localLeaderName shouldBe temporaryFirstLeaderName + data.count(_.isLeft) shouldBe 1 //Only on leader + data.count(_.isRight) shouldBe ( Config.nodes - 1 ) //Other nodes shouldBe follower + } + + + "elect new leader after leader crash" in { + + freezeTest(seconds = Config.nodes * Config.electionTimerIntervalMax, loggerMessage = "Waiting for second leader election") + + val data: Vector[Either[IamTheLeader, IamNotTheLeader]] = raftService.evaluateActualLeaders + + val localLeaderName = data.filter(_.isLeft).head match { + case Left(left) => left.actorName + case _ => "" + } + + localLeaderName should not be temporaryFirstLeaderName + data.count(_.isLeft) shouldBe 1 //Only on leader + data.count(_.isRight) shouldBe ( Config.nodes - 1 ) //Other nodes shouldBe follower + } + + "append new data in leader node" in { + + val newDataKey = "key2" + val newDataValue = "val2" + + temporaryData = temporaryData + (newDataKey->newDataValue) + + val data: Vector[Either[WriteSuccess, IamNotTheLeader]] = raftService.appendData(key = newDataKey, value = newDataValue) + + val localLeaderName = data.filter(_.isLeft).head match { + case Left(left) => left.actorName + case _ => "" + } + + localLeaderName should not be temporaryFirstLeaderName + data.count(_.isLeft) shouldBe 1 //Only on leader + data.count(_.isRight) shouldBe ( Config.nodes - 1 ) //Other nodes shouldBe follower + } + + "has all nodes synchronized with new data again" in { + + freezeTest(seconds = Config.nodes * Config.heartbeatTimerInterval, loggerMessage = "Waiting for sync data") + + val data: Vector[ActualData] = raftService.evaluateActualData + + val uniqueHashCodes: Vector[Int] = data.map(_.data.hashCode()).distinct + + uniqueHashCodes.size shouldBe 1 + uniqueHashCodes.head shouldBe temporaryData.hashCode() + } + + "terminate actor system" in { + + raftService.terminate().map(response => response shouldBe true) + } + + } + +} \ No newline at end of file diff --git a/B-Raft/src/src/test/scala/de/maxbundscherer/scala/raft/Raft/utils/BaseServiceTest.scala b/B-Raft/src/src/test/scala/de/maxbundscherer/scala/raft/Raft/utils/BaseServiceTest.scala new file mode 100644 index 00000000..c4e95ed4 --- /dev/null +++ b/B-Raft/src/src/test/scala/de/maxbundscherer/scala/raft/Raft/utils/BaseServiceTest.scala @@ -0,0 +1,29 @@ +package de.maxbundscherer.scala.raft.Raft.utils + +import de.maxbundscherer.scala.raft.services.RaftService +import akka.event.LoggingAdapter +import de.maxbundscherer.scala.raft.utils.Configuration +import org.scalatest.{AsyncWordSpec, Matchers} + +object BaseServiceTest extends Configuration { + + import akka.actor.ActorSystem + import akka.util.Timeout + import scala.concurrent.ExecutionContextExecutor + import scala.concurrent.duration._ + + private implicit val actorSystem: ActorSystem = ActorSystem("testSystem") + private implicit val executionContext: ExecutionContextExecutor = actorSystem.dispatcher + private implicit val timeout: Timeout = 15.seconds + private lazy val log: LoggingAdapter = actorSystem.log + + private lazy val raftService = new RaftService(numberNodes = Config.nodes) + +} + +trait BaseServiceTest extends AsyncWordSpec with Matchers { + + val log: LoggingAdapter = BaseServiceTest.log + val raftService: RaftService = BaseServiceTest.raftService + +} \ No newline at end of file