diff --git a/ansible/bootstrap.yml b/ansible/bootstrap.yml index 27559952c..50be84534 100644 --- a/ansible/bootstrap.yml +++ b/ansible/bootstrap.yml @@ -185,8 +185,9 @@ become: yes tasks: - name: Install and configure tuneD - import_role: + include_role: name: tuned + tasks_from: "{{ 'configure.yml' if appliances_mode == 'configure' else 'main.yml' }}" - hosts: freeipa_server # Done here as it might be providing DNS @@ -216,31 +217,27 @@ become: yes tags: firewalld tasks: - - import_role: + - include_role: name: firewalld + tasks_from: "{{ 'runtime.yml' if appliances_mode == 'configure' else 'main.yml' }}" - hosts: fail2ban gather_facts: false become: yes tags: fail2ban tasks: - - import_role: + - include_role: name: fail2ban + tasks_from: "{{ 'configure.yml' if appliances_mode == 'configure' else 'main.yml' }}" - name: Setup podman gather_facts: false hosts: podman tags: podman tasks: - - import_role: - name: podman - tasks_from: prereqs.yml - tags: prereqs - - - import_role: + - include_role: name: podman - tasks_from: config.yml - tags: config + tasks_from: "{{ 'configure.yml' if appliances_mode == 'configure' else 'main.yml' }}" - hosts: update gather_facts: false @@ -314,10 +311,13 @@ - hosts: ansible_init gather_facts: yes become: yes + environment: "{{ appliances_environment_vars }}" tags: linux_ansible_init tasks: - - include_role: + - name: Install ansible-init + include_role: name: azimuth_cloud.image_utils.linux_ansible_init + when: "{{ appliances_mode == 'build' }}" - hosts: k3s:&builder become: yes diff --git a/ansible/extras.yml b/ansible/extras.yml index c7cacb877..ce50bf3c8 100644 --- a/ansible/extras.yml +++ b/ansible/extras.yml @@ -32,6 +32,7 @@ hosts: eessi tags: eessi become: true + environment: "{{ appliances_environment_vars }}" gather_facts: false tasks: - name: Install and configure EESSI diff --git a/ansible/fatimage.yml b/ansible/fatimage.yml index 0b4335b14..906386ae8 100644 --- a/ansible/fatimage.yml +++ b/ansible/fatimage.yml @@ -171,11 +171,9 @@ when: "'openondemand' in group_names" - name: slurm exporter - import_role: + include_role: name: slurm_exporter - tasks_from: install - vars: - slurm_exporter_state: stopped + tasks_from: install.yml when: "'slurm_exporter' in group_names" - name: Install alertmanager diff --git a/ansible/filesystems.yml b/ansible/filesystems.yml index 4665c0f8f..47f665c9b 100644 --- a/ansible/filesystems.yml +++ b/ansible/filesystems.yml @@ -20,6 +20,7 @@ - name: Setup Manila share mounts hosts: manila become: true + environment: "{{ appliances_environment_vars }}" tags: manila tasks: - include_role: diff --git a/ansible/iam.yml b/ansible/iam.yml index 857b8f840..f30fa70fe 100644 --- a/ansible/iam.yml +++ b/ansible/iam.yml @@ -17,6 +17,7 @@ - freeipa - freeipa_client gather_facts: yes + environment: "{{ appliances_environment_vars }}" become: yes tasks: - name: Install FreeIPA client diff --git a/ansible/monitoring.yml b/ansible/monitoring.yml index e97946212..9094cc0da 100644 --- a/ansible/monitoring.yml +++ b/ansible/monitoring.yml @@ -17,6 +17,7 @@ - name: Setup slurm stats hosts: slurm_stats tags: slurm_stats + environment: "{{ appliances_environment_vars }}" tasks: - include_role: name: slurm_stats @@ -25,14 +26,16 @@ hosts: filebeat tags: filebeat tasks: - - import_role: + - include_role: name: filebeat + tasks_from: "{{ 'runtime.yml' if appliances_mode == 'configure' else 'main.yml' }}" - name: Deploy node_exporter hosts: node_exporter tags: node_exporter tasks: - - import_role: name=cloudalchemy.node_exporter + - import_role: + name: cloudalchemy.node_exporter - name: Deploy OpenOndemand exporter hosts: openondemand @@ -49,9 +52,12 @@ hosts: control become: true tags: slurm_exporter + environment: "{{ appliances_environment_vars }}" tasks: - - import_role: + - include_role: name: slurm_exporter + tasks_from: "{{ 'configure.yml' if appliances_mode == 'configure' else 'main.yml' }}" + - name: Setup core monitoring software hosts: prometheus @@ -75,6 +81,7 @@ - name: Deploy grafana hosts: grafana tags: grafana + environment: "{{ appliances_environment_vars }}" tasks: - assert: that: vault_grafana_admin_password is defined diff --git a/ansible/portal.yml b/ansible/portal.yml index 2aa646ae9..39d51ee89 100644 --- a/ansible/portal.yml +++ b/ansible/portal.yml @@ -3,6 +3,7 @@ - openondemand - openondemand_server become: yes + environment: "{{ appliances_environment_vars }}" gather_facts: yes # TODO tasks: - import_role: @@ -14,6 +15,7 @@ - openondemand - openondemand_desktop become: yes + environment: "{{ appliances_environment_vars }}" gather_facts: yes tasks: - import_role: @@ -25,6 +27,7 @@ - openondemand - openondemand_jupyter become: yes + environment: "{{ appliances_environment_vars }}" gather_facts: yes tasks: - import_role: diff --git a/ansible/roles/fail2ban/tasks/configure.yml b/ansible/roles/fail2ban/tasks/configure.yml new file mode 100644 index 000000000..e4951f726 --- /dev/null +++ b/ansible/roles/fail2ban/tasks/configure.yml @@ -0,0 +1,15 @@ +--- +- name: Create config + template: + dest: /etc/fail2ban/jail.local + src: jail.local.j2 + notify: Restart fail2ban + +- name: flush handlers + meta: flush_handlers + +- name: Ensure fail2ban running even if no config change + service: + name: fail2ban + state: started + enabled: true diff --git a/ansible/roles/fail2ban/tasks/install.yml b/ansible/roles/fail2ban/tasks/install.yml new file mode 100644 index 000000000..65f3bfef2 --- /dev/null +++ b/ansible/roles/fail2ban/tasks/install.yml @@ -0,0 +1,11 @@ +--- +- name: Install EPEL repo + package: + name: epel-release + +- name: Install fail2ban packages + package: + name: + - fail2ban-server + - fail2ban-firewalld + state: present diff --git a/ansible/roles/fail2ban/tasks/main.yml b/ansible/roles/fail2ban/tasks/main.yml index 244a2edf9..410e9436d 100644 --- a/ansible/roles/fail2ban/tasks/main.yml +++ b/ansible/roles/fail2ban/tasks/main.yml @@ -1,26 +1,4 @@ --- -- name: Install EPEL repo - package: - name: epel-release -- name: Install fail2ban packages - package: - name: - - fail2ban-server - - fail2ban-firewalld - state: present - -- name: Create config - template: - dest: /etc/fail2ban/jail.local - src: jail.local.j2 - notify: Restart fail2ban - -- name: flush handlers - meta: flush_handlers - -- name: Ensure fail2ban running even if no config change - service: - name: fail2ban - state: started - enabled: true +- import_tasks: install.yml +- import_tasks: configure.yml diff --git a/ansible/roles/filebeat/defaults/main.yml b/ansible/roles/filebeat/defaults/main.yml index 4b4220a69..bdd02a2b7 100644 --- a/ansible/roles/filebeat/defaults/main.yml +++ b/ansible/roles/filebeat/defaults/main.yml @@ -1,6 +1,8 @@ --- #filebeat_config_path: undefined # REQUIRED. Path to filebeat.yml configuration file template +filebeat_debug: false + +# Note all the below can only be set/changed using the install.yml task file: filebeat_podman_user: "{{ ansible_user }}" # User that runs the filebeat container filebeat_version: 7.12.1 # latest usable with opensearch - see https://opensearch.org/docs/2.4/tools/index/#compatibility-matrix-for-beats -filebeat_debug: false diff --git a/ansible/roles/podman/tasks/config.yml b/ansible/roles/podman/tasks/configure.yml similarity index 100% rename from ansible/roles/podman/tasks/config.yml rename to ansible/roles/podman/tasks/configure.yml diff --git a/ansible/roles/podman/tasks/prereqs.yml b/ansible/roles/podman/tasks/install.yml similarity index 100% rename from ansible/roles/podman/tasks/prereqs.yml rename to ansible/roles/podman/tasks/install.yml diff --git a/ansible/roles/podman/tasks/main.yml b/ansible/roles/podman/tasks/main.yml new file mode 100644 index 000000000..2b65e84b4 --- /dev/null +++ b/ansible/roles/podman/tasks/main.yml @@ -0,0 +1,2 @@ +- import_tasks: install.yml +- import_tasks: configure.yml diff --git a/ansible/roles/proxy/README.md b/ansible/roles/proxy/README.md index 6d51fd9d4..1f6933b85 100644 --- a/ansible/roles/proxy/README.md +++ b/ansible/roles/proxy/README.md @@ -7,5 +7,6 @@ Define http/s proxy configuration. - `proxy_http_proxy`: Required. Address of http proxy. E.g. "http://10.1.0.28:3128" for a Squid proxy on default port. - `proxy_https_proxy`: Optional. Address of https proxy. Default is `{{ proxy_http_proxy }}`. - `proxy_no_proxy_extra`: Optional. List of additional addresses not to proxy. Will be combined with default list which includes `inventory_hostname` (for hostnames) and `ansible_host` (for host IPs) for all Ansible hosts. -- `proxy_dnf`: Optional bool. Whether to configure yum/dnf proxying through `proxy_http_proxy`. Default `true`. -- `proxy_systemd`: Optional bool. Whether to give processes started by systemd the above http, https and no_proxy configuration. **NB** Running services will need restarting if this is changed. Default `true`. +- `proxy_plays_only`: Optional bool. Whether to configure proxying only for Ansible plays (via the [environment](https://docs.ansible.com/ansible/latest/playbook_guide/playbooks_environment.html) keyword). Default `false` which means proxy configuration is written to the host and available to all users. If `true` this role must be run in the playbook for proxying to be available as it sets host facts. +- `proxy_dnf`: Optional bool. Whether to configure yum/dnf proxying through `proxy_http_proxy`. Default `true` unless `proxy_plays_only` is `true`. +- `proxy_systemd`: Optional bool. Whether to give processes started by systemd the above http, https and no_proxy configuration. **NB** Running services will need restarting if this is changed. Default `true` unless `proxy_plays_only` is `true`. diff --git a/ansible/roles/proxy/defaults/main.yml b/ansible/roles/proxy/defaults/main.yml index fd2b079ec..0a0b252ee 100644 --- a/ansible/roles/proxy/defaults/main.yml +++ b/ansible/roles/proxy/defaults/main.yml @@ -3,5 +3,6 @@ proxy_https_proxy: "{{ proxy_http_proxy }}" proxy_no_proxy_defaults: "{{ ['localhost', '127.0.0.1'] + groups['all'] + hostvars.values() | map(attribute='ansible_host') }}" proxy_no_proxy_extras: [] proxy_no_proxy: "{{ (proxy_no_proxy_defaults + proxy_no_proxy_extras) | unique | sort | join(',') }}" -proxy_dnf: true -proxy_systemd: true +proxy_plays_only: false +proxy_dnf: "{{ not proxy_plays_only }}" +proxy_systemd: "{{ not proxy_plays_only }}" diff --git a/ansible/roles/proxy/tasks/main.yml b/ansible/roles/proxy/tasks/main.yml index 70a7eca67..3df3fb2e8 100644 --- a/ansible/roles/proxy/tasks/main.yml +++ b/ansible/roles/proxy/tasks/main.yml @@ -9,6 +9,7 @@ state: present regexp: "{{ item.key }}=.*" line: "{{ item.key }}={{ item.value }}" + when: not proxy_plays_only | bool loop: - key: http_proxy value: "{{ proxy_http_proxy }}" @@ -63,3 +64,13 @@ - name: Reset connection to get new /etc/environment meta: reset_connection # NB: conditionals not supported + +- name: Add proxy vars to appliances_environment_vars + set_fact: + appliances_environment_vars: "{{ appliances_environment_vars | combine(_proxy_vars) }}" + vars: + _proxy_vars: + http_proxy: "{{ proxy_http_proxy }}" + https_proxy: "{{ proxy_https_proxy }}" + no_proxy: "{{ proxy_no_proxy }}" + when: proxy_plays_only | bool diff --git a/ansible/roles/slurm_exporter/tasks/configure.yml b/ansible/roles/slurm_exporter/tasks/configure.yml new file mode 100644 index 000000000..e511be02b --- /dev/null +++ b/ansible/roles/slurm_exporter/tasks/configure.yml @@ -0,0 +1,7 @@ +- name: Ensure slurm exporter state + systemd: + name: prometheus-slurm-exporter + state: "{{ slurm_exporter_state }}" + enabled: true + when: + - not ansible_check_mode diff --git a/ansible/roles/slurm_exporter/tasks/install.yml b/ansible/roles/slurm_exporter/tasks/install.yml index 49ee57fef..cba7aa95b 100644 --- a/ansible/roles/slurm_exporter/tasks/install.yml +++ b/ansible/roles/slurm_exporter/tasks/install.yml @@ -6,10 +6,3 @@ - meta: flush_handlers -- name: Ensure slurm exporter state - systemd: - name: prometheus-slurm-exporter - state: "{{ slurm_exporter_state }}" - enabled: true - when: - - not ansible_check_mode diff --git a/ansible/roles/slurm_exporter/tasks/main.yml b/ansible/roles/slurm_exporter/tasks/main.yml index 52b260f07..0171113a1 100644 --- a/ansible/roles/slurm_exporter/tasks/main.yml +++ b/ansible/roles/slurm_exporter/tasks/main.yml @@ -1,2 +1,3 @@ --- - import_tasks: install.yml +- import_tasks: configure.yml diff --git a/ansible/roles/squid/README.md b/ansible/roles/squid/README.md index e514c3605..112d4816d 100644 --- a/ansible/roles/squid/README.md +++ b/ansible/roles/squid/README.md @@ -20,20 +20,15 @@ Where noted these map to squid parameters of the same name without the `squid_` - `squid_maximum_object_size_in_memory`: Optional str. Upper size limit for objects in memory cache, default '64 MB'. See squid parameter. - `squid_maximum_object_size`: Optional str. Upper size limit for objects in disk cache, default '200 MB'. See squid parameter. - `squid_http_port`: Optional str. Socket addresses to listen for client requests, default '3128'. See squid parameter. -- `squid_acls`: Optional str, can be multiline. Define access lists. Default `acl anywhere src all`, i.e. rely on OpenStack security groups (or other firewall if deployed). See squid parameter `acl`. NB: The default template also defines acls for `SSL_ports` and `Safe_ports` as is common practice. -- `squid_http_access`: Optional str, can be multiline. Allow/deny access based on access lists. Default: +- `squid_acls`: Optional list of strs. Define access lists. Default: `['acl anywhere src all']`, i.e. allow connection from anywhere, relying on OpenStack security groups (or other firewall if deployed). See squid parameter `acl`. NB: The default template also defines acls for `SSL_ports` and `Safe_ports` as is common practice. +- `squid_http_access`: Optional str, can be multiline. Allow/deny access based on access lists. The default will: + - Deny requests to certain unsafe ports (see `squid.conf.j2`) + - Deny CONNECT to other than secure SSL ports + - Only allow cachemgr access from localhost + - Allow access for all ACLs defined in `squid_acls` + - Allow access for localhost + - Deny all other access - # Deny requests to certain unsafe ports - http_access deny !Safe_ports - # Deny CONNECT to other than secure SSL ports - http_access deny CONNECT !SSL_ports - # Only allow cachemgr access from localhost - http_access allow localhost manager - http_access deny manager - # Rules allowing http access - http_access allow anywhere - http_access allow localhost - # Finally deny all other access to this proxy - http_access deny all - See squid parameter. + +- `squid_auth_param`: Optional str, can be multiline. Parameters for authentication schemes. Default empty string. diff --git a/ansible/roles/squid/defaults/main.yml b/ansible/roles/squid/defaults/main.yml index 7457bdccf..b358286db 100644 --- a/ansible/roles/squid/defaults/main.yml +++ b/ansible/roles/squid/defaults/main.yml @@ -8,7 +8,8 @@ squid_cache_disk: "{{ undef(hint='squid_cache_disk (in MB) required, e.g. \"1024 squid_maximum_object_size_in_memory: '64 MB' squid_maximum_object_size: '200 MB' squid_http_port: 3128 -squid_acls: acl anywhere src all # rely on openstack security groups +squid_acls: + - acl anywhere src all # rely on openstack security groups squid_http_access: | # Deny requests to certain unsafe ports http_access deny !Safe_ports @@ -18,7 +19,10 @@ squid_http_access: | http_access allow localhost manager http_access deny manager # Rules allowing http access - http_access allow anywhere + {% for acl in squid_acls %} + http_access allow {{ (acl | split)[1] }} + {% endfor %} http_access allow localhost # Finally deny all other access to this proxy http_access deny all +squid_auth_param: '' diff --git a/ansible/roles/squid/templates/squid.conf.j2 b/ansible/roles/squid/templates/squid.conf.j2 index b6d10e7dc..9d7b0ad95 100644 --- a/ansible/roles/squid/templates/squid.conf.j2 +++ b/ansible/roles/squid/templates/squid.conf.j2 @@ -4,8 +4,14 @@ # - https://github.com/drosskopp/squid-cache/blob/main/squid.conf # +# Configure authentication parameters +# NB: required before ACL definitions using them +{{ squid_auth_param }} + # Define ACLs: -{{ squid_acls }} +{% for acl in squid_acls %} +{{ acl }} +{% endfor %} acl SSL_ports port 443 acl Safe_ports port 80 # http diff --git a/ansible/slurm.yml b/ansible/slurm.yml index d1bb93a9f..8cf2d3ee2 100644 --- a/ansible/slurm.yml +++ b/ansible/slurm.yml @@ -12,6 +12,7 @@ - name: Setup slurm-driven rebuild hosts: rebuild:!builder become: yes + environment: "{{ appliances_environment_vars }}" tags: - rebuild - openhpc diff --git a/docs/experimental/isolated-clusters.md b/docs/experimental/isolated-clusters.md new file mode 100644 index 000000000..202fb3b09 --- /dev/null +++ b/docs/experimental/isolated-clusters.md @@ -0,0 +1,164 @@ +# Isolated Clusters + +This document explains how to create clusters which do not have outbound internet +access by default. + +The approach is to: +- Create a squid proxy with basic authentication and add a user. +- Configure the appliance to set proxy environment variables via Ansible's + [remote environment support](https://docs.ansible.com/ansible/latest/playbook_guide/playbooks_environment.html). + +This means that proxy environment variables are not present on the hosts at all +and are only injected when running Ansible, meaning the basic authentication +credentials are not exposed to cluster users. + +## Deploying Squid using the appliance +If an external squid is not available, one can be deployed by the cluster on a +dual-homed host. See [docs/networks.md#proxies](../networks.md#proxies) for +guidance, but note a separate host should be used rather than a Slurm node, to +avoid users on that node getting direct access. + +If the deploy host is RockyLinux, this could be used as the squid host by adding +it to inventory: + +```ini +# environments/$ENV/inventory/squid +[squid] +# configure squid on deploy host +localhost ansible_host=10.20.0.121 ansible_connection=local +``` + +The IP address should be the deploy hosts's IP on the cluster network and is used +later to define the proxy address. Other connection variables (e.g. `ansible_user`) +could be set if required. + +## Using Squid with basic authentication + +First create usernames/passwords on the squid host (tested on RockyLinux 8.9): + +```shell +SQUID_USER=rocky +dnf install -y httpd-tools +htpasswd -c /etc/squid/passwords $SQUID_USER # enter pasword at prompt +sudo chown squid /etc/squid/passwords +sudo chmod u=rw,go= /etc/squid/passwords +``` + +This can be tested by running: +``` +/usr/lib64/squid/basic_ncsa_auth /etc/squid/passwords +``` + +and entering `$SQUID_USER PASSWORD`, which should respond `OK`. + +If using the appliance to deploy squid, override the default `squid` +configuration to use basic auth: + +```yaml +# environments/$ENV/inventory/group_vars/all/squid.yml: +squid_acls: + - acl ncsa_users proxy_auth REQUIRED +squid_auth_param: | + auth_param basic program /usr/lib64/squid/basic_ncsa_auth /etc/squid/passwords + auth_param basic children 5 + auth_param basic credentialsttl 1 minute +``` + +See the [squid docs](https://wiki.squid-cache.org/ConfigExamples/Authenticate/Ncsa) for more information. + +## Proxy Configuration + +Configure the appliance to configure proxying on all cluster nodes: + +```ini +# environments/.stackhpc/inventory/groups: +... +[proxy:children] +cluster +... +``` + +Now configure the appliance to set proxy variables via remote environment +rather than by writing it to the host, and provide the basic authentication +credentials: + +```yaml +#environments/$ENV/inventory/group_vars/all/proxy.yml: +proxy_basic_user: $SQUID_USER +proxy_basic_password: "{{ vault_proxy_basic_password }}" +proxy_plays_only: true +``` + +```yaml +#environments/$ENV/inventory/group_vars/all/vault_proxy.yml: +vault_proxy_basic_password: $SECRET +``` +This latter file should be vault-encrypted. + +If using an appliance-deployed squid then the other [proxy role variables](../../ansible/roles/proxy/README.md) +will be automatically constructed (see environments/common/inventory/group_vars/all/proxy.yml). +You may need to override `proxy_http_address` if the hostname of the squid node +is not resolvable by the cluster. This is typically the case if squid is deployed +to the deploy host, in which case the IP address may be specified instead using +the above example inventory as: + +``` +proxy_http_address: "{{ hostvars[groups['squid'] | first].ansible_host }}" +``` + +If using an external squid, at a minimum set `proxy_http_address`. You may +also need to set `proxy_http_port` or any other [proxy role's variables](../../ansible/roles/proxy/README.md) +if the calculated parameters are not appropriate. + +## Image build + +TODO: probably not currently functional! + +## EESSI + +Although EESSI will install with the above configuration, as there is no +outbound internet access except for Ansible tasks, making it functional will +require [configuring a proxy for CVMFS](https://multixscale.github.io/cvmfs-tutorial-hpc-best-practices/access/proxy/#client-system-configuration). + +## Isolation Using Security Group Rules + +The below shows the security groups/rules (as displayed by Horizon ) which can +be used to "isolate" a cluster when using a network which has a subnet gateway +provided by a router to an external network. It therefore also indicates what +access is required for a different networking configuration. + +Security group `isolated`: + + # allow outbound DNS + ALLOW IPv4 53/tcp to 0.0.0.0/0 + ALLOW IPv4 53/udp to 0.0.0.0/0 + + # allow everything within the cluster: + ALLOW IPv4 from isolated + ALLOW IPv4 to isolated + + # allow hosts to reach metadata server (e.g. for cloud-init keys): + ALLOW IPv4 80/tcp to 169.254.169.254/32 + + # allow hosts to reach squid proxy: + ALLOW IPv4 3128/tcp to 10.179.2.123/32 + +Security group `isolated-ssh-https` allows inbound ssh and https (for OpenOndemand): + + ALLOW IPv4 443/tcp from 0.0.0.0/0 + ALLOW IPv4 22/tcp from 0.0.0.0/0 + + +Then OpenTofu is configured as: + + + login_security_groups = [ + "isolated", # allow all in-cluster services + "isolated-ssh-https", # access via ssh and ondemand + ] + nonlogin_security_groups = [ + "isolated" + ] + +Note that DNS is required (and is configured by the cloud when the subnet has +a gateway) because name resolution happens on the hosts, not on the proxy. diff --git a/environments/.stackhpc/inventory/extra_groups b/environments/.stackhpc/inventory/extra_groups index 3c1ebe225..86bb04f11 100644 --- a/environments/.stackhpc/inventory/extra_groups +++ b/environments/.stackhpc/inventory/extra_groups @@ -35,6 +35,12 @@ builder # Install squid into fat image builder +[squid] +# DO NOT MERGE: +# configure squid on deploy host (steveb-dev) using stackhpc-dev network +localhost ansible_host=10.20.0.121 ansible_connection=local +# Note you could add any other connection vars e.g. ansible_user you need here + [sssd:children] # Install sssd into fat image builder @@ -47,3 +53,8 @@ cluster [compute_init:children] compute + +# DO NOT MERGE: +# turn on proxying for the cluster +[proxy:children] +cluster diff --git a/environments/.stackhpc/inventory/group_vars/all/proxy.yml b/environments/.stackhpc/inventory/group_vars/all/proxy.yml new file mode 100644 index 000000000..7c6703510 --- /dev/null +++ b/environments/.stackhpc/inventory/group_vars/all/proxy.yml @@ -0,0 +1,6 @@ +# DO NOT MERGE: +# proxy is steveb-dev on sussex-slurm-data +proxy_basic_user: rocky +proxy_basic_password: "{{ vault_proxy_basic_password }}" +proxy_http_address: "{{ hostvars[groups['squid'] | first].ansible_host }}" +proxy_plays_only: true diff --git a/environments/.stackhpc/inventory/group_vars/all/squid.yml b/environments/.stackhpc/inventory/group_vars/all/squid.yml new file mode 100644 index 000000000..f21c0ac6a --- /dev/null +++ b/environments/.stackhpc/inventory/group_vars/all/squid.yml @@ -0,0 +1,9 @@ +# DO NOT COMMIT: +squid_cache_disk: 1024 # MB +squid_cache_mem: "50 MB" +squid_acls: + - acl ncsa_users proxy_auth REQUIRED +squid_auth_param: | + auth_param basic program /usr/lib64/squid/basic_ncsa_auth /etc/squid/passwords + auth_param basic children 5 + auth_param basic credentialsttl 1 minute diff --git a/environments/.stackhpc/tofu/main.tf b/environments/.stackhpc/tofu/main.tf index 8d78401bf..adb550e3a 100644 --- a/environments/.stackhpc/tofu/main.tf +++ b/environments/.stackhpc/tofu/main.tf @@ -99,4 +99,11 @@ module "cluster" { state_volume_type = var.state_volume_type home_volume_type = var.home_volume_type + login_security_groups = [ + "isolated", # allow all in-cluster services + "isolated-ssh-https", # access via ssh and ondemand + ] + nonlogin_security_groups = [ + "isolated" + ] } diff --git a/environments/common/inventory/group_vars/all/defaults.yml b/environments/common/inventory/group_vars/all/defaults.yml index 1af2b36e5..c6ddaccdb 100644 --- a/environments/common/inventory/group_vars/all/defaults.yml +++ b/environments/common/inventory/group_vars/all/defaults.yml @@ -8,6 +8,7 @@ appliances_cockpit_state: absent # RHEL cockpit installed but not enabled in gen #appliances_state_dir: # define an absolute path here to use for persistent state: NB: This is defined as /var/lib/state in inventory by the default Terraform appliances_mode: configure appliances_pulp_url: https://ark.stackhpc.com +appliances_environment_vars: {} # Address(ip/dns) for internal communication between services. This is # normally traffic you do no want to expose to users. diff --git a/environments/common/inventory/group_vars/all/grafana.yml b/environments/common/inventory/group_vars/all/grafana.yml index b03d16f37..63232ccec 100644 --- a/environments/common/inventory/group_vars/all/grafana.yml +++ b/environments/common/inventory/group_vars/all/grafana.yml @@ -22,13 +22,14 @@ grafana_serve_from_sub_path: "{{ groups['openondemand'] | count > 0 }}" grafana_dashboards_default: # node exporter slurm: - - dashboard_id: 13427 - replacements: - - placeholder: DS_PROMETHEUS - replacement: prometheus - - placeholder: DS_ELASTICSEARCH - replacement: slurmstats - revision_id: 1 + # DO NOT MERGE: + # - dashboard_id: 13427 + # replacements: + # - placeholder: DS_PROMETHEUS + # replacement: prometheus + # - placeholder: DS_ELASTICSEARCH + # replacement: slurmstats + # revision_id: 1 # openhpc slurm: - dashboard_file: openhpc-slurm.json replacements: @@ -44,11 +45,12 @@ grafana_dashboards_default: - placeholder: DS_SLURMSTATS replacement: slurmstats # slurm exporter - - dashboard_id: 4323 - replacements: - - placeholder: DS_PROMETHEUS - replacement: prometheus - revision_id: 3 + # DO NOT MERGE: + # - dashboard_id: 4323 + # replacements: + # - placeholder: DS_PROMETHEUS + # replacement: prometheus + # revision_id: 3 grafana_dashboards: "{{ grafana_dashboards_default + (openondemand_dashboard if groups.get('openondemand') else []) }}" diff --git a/environments/common/inventory/group_vars/all/proxy.yml b/environments/common/inventory/group_vars/all/proxy.yml index d606ee1d9..dfe27a536 100644 --- a/environments/common/inventory/group_vars/all/proxy.yml +++ b/environments/common/inventory/group_vars/all/proxy.yml @@ -1,2 +1,17 @@ -# default proxy address to first squid api address port 3128 if squid group non-empty, else empty string to avoid breaking hostvars -proxy_http_proxy: "{{ 'http://' + hostvars[groups['squid'].0].api_address + ':' + (squid_http_port | string) if groups['squid'] else '' }}" +# default proxy address to hostname of first squid host, port 3128 +# if squid group non-empty, else else empty string to avoid breaking hostvars + +# override these to provide basic_auth: +proxy_basic_user: '' +proxy_basic_password: '' + +# some indirection to make this logic easier: +proxy_http_address: "{{ hostvars[groups['squid'].0].api_address }}" +proxy_http_port: "{{ squid_http_port }}" +proxy_basic_auth: "{{ proxy_basic_user }}:{{ proxy_basic_password }}@" +proxy_http_proxy_default: >- + {{ 'http://' + + (proxy_basic_auth if proxy_basic_password != '' else '') + + proxy_http_address + ':' + (proxy_http_port | string) + }} +proxy_http_proxy: "{{ proxy_http_proxy_default if groups['squid'] else '' }}" diff --git a/environments/common/inventory/group_vars/builder/defaults.yml b/environments/common/inventory/group_vars/builder/defaults.yml index dae4edd9a..46bf7f3d7 100644 --- a/environments/common/inventory/group_vars/builder/defaults.yml +++ b/environments/common/inventory/group_vars/builder/defaults.yml @@ -24,4 +24,5 @@ tuned_started: false tuned_enabled: false sssd_started: false sssd_enabled: false +slurm_exporter_state: stopped appliances_mode: build