From 720f818a7512d4831749943e7555b256ddae4478 Mon Sep 17 00:00:00 2001
From: "Daniel L. Parton" <Danny.Parton@gmail.com>
Date: Mon, 3 Aug 2015 17:56:53 -0400
Subject: [PATCH 01/10] Created and document CLI for residue renumbering

---
 docs/cli_docs.rst                           | 23 ++++++++++++++++
 ensembler/cli.py                            |  2 +-
 ensembler/cli_commands/__init__.py          |  4 ++-
 ensembler/cli_commands/general.py           |  1 +
 ensembler/cli_commands/refine_explicit.py   |  1 +
 ensembler/cli_commands/renumber_residues.py | 30 +++++++++++++++++++++
 ensembler/initproject.py                    |  1 +
 ensembler/tools/renumber_residues.py        |  2 ++
 8 files changed, 62 insertions(+), 2 deletions(-)
 create mode 100644 ensembler/cli_commands/renumber_residues.py
diff --git a/docs/cli_docs.rst b/docs/cli_docs.rst
index 72d225c..ba80c57 100644
--- a/docs/cli_docs.rst
+++ b/docs/cli_docs.rst
@@ -41,3 +41,26 @@ PDB-format coordinate files in the directory ``templates/structures-resolved``.
 Each structure should be named XXX.pdb, where XXX matches the identifier in the
 fasta file. The residues in the coordinate files should also match the
 sequences in the fasta file.
+
+Additional Tools
+================
+
+Ensembler includes a ``tools`` submodule, which allows the user to conduct
+various useful tasks which are not considered core pipeline functions. The
+use-cases for many of these tools are quite specific, so they may not be
+applicable to every project, and should also be used with caution.
+
+Residue renumbering according to UniProt sequence coordinates
+-------------------------------------------------------------
+
+::
+
+  $ ensembler renumber_residues --target EGFR_HUMAN_D0
+
+The given target ID must begin with a UniProt mnemonic, e.g. "EGFR_HUMAN".
+This will output two files in the ``models/[target_id]`` directory:
+``topol-renumbered-implicit.pdb`` and ``topol-renumbered-explicit.pdb``.
+The coordinates are simply copied from the first example found for each of
+``refined-implicit.pdb.gz`` and ``refined-explicit.pdb.gz``. The residue
+numbers are renumbered according to the canonical isoform sequence coordinates
+in the UniProt entry.
diff --git a/ensembler/cli.py b/ensembler/cli.py
index 76e15b6..49dce8d 100644
--- a/ensembler/cli.py
+++ b/ensembler/cli.py
@@ -42,4 +42,4 @@ def main():
 
     if not command_dispatched and args['--help']:
         print('\n'.join([ensembler.cli_commands.general.helpstring_header, ensembler.cli_commands.general.ensembler_helpstring]))
-        pass
\ No newline at end of file
+        pass
diff --git a/ensembler/cli_commands/__init__.py b/ensembler/cli_commands/__init__.py
index 1ebf6a4..e121d43 100644
--- a/ensembler/cli_commands/__init__.py
+++ b/ensembler/cli_commands/__init__.py
@@ -12,6 +12,7 @@
     'refine_explicit',
     'package_models',
     'quickmodel',
+    'renumber_residues',
 ]
 
 from . import general
@@ -27,4 +28,5 @@
 from . import solvate
 from . import refine_explicit
 from . import package_models
-from . import quickmodel
\ No newline at end of file
+from . import quickmodel
+from . import renumber_residues
\ No newline at end of file
diff --git a/ensembler/cli_commands/general.py b/ensembler/cli_commands/general.py
index 7724c41..01fd8c4 100644
--- a/ensembler/cli_commands/general.py
+++ b/ensembler/cli_commands/general.py
@@ -43,6 +43,7 @@
       [--template_pdbids <pdbids>] [--template_chainids <chainids>]
       [--template_uniprot_query <query>] [--template_seqid_cutoff <cutoff>] [--no-loopmodel]
       [--package_for_fah] [--nfahclones <nfahclones>] [--structure_dirs <structure_dirs>]
+  ensembler renumber_residues [-h | --help] [--target <targetid>] [-v | --verbose]
 
 Commands:
   init                          Initialize a new Ensembler project
diff --git a/ensembler/cli_commands/refine_explicit.py b/ensembler/cli_commands/refine_explicit.py
index 5143ae5..2aeeeb6 100644
--- a/ensembler/cli_commands/refine_explicit.py
+++ b/ensembler/cli_commands/refine_explicit.py
@@ -75,6 +75,7 @@
 helpstring = '\n\n'.join([helpstring_header, '\n\n'.join(helpstring_unique_options), '\n\n'.join(helpstring_nonunique_options)])
 docopt_helpstring = '\n\n'.join(helpstring_unique_options)
 
+
 def dispatch(args):
     if args['--targetsfile']:
         with open(args['--targetsfile'], 'r') as targetsfile:
diff --git a/ensembler/cli_commands/renumber_residues.py b/ensembler/cli_commands/renumber_residues.py
new file mode 100644
index 0000000..2019c54
--- /dev/null
+++ b/ensembler/cli_commands/renumber_residues.py
@@ -0,0 +1,30 @@
+from ensembler.tools.renumber_residues import RenumberResidues
+
+helpstring_header = """Renumber residues using the canonical UniProt sequence coordinates.
+Target IDs must start with the UniProt mnemonic, e.g. 'ABL1_HUMAN'
+"""
+
+helpstring_unique_options = [
+    """\
+  --target <targetid>          ID for target to work, e.g. 'ABL1_HUMAN_D0'""",
+]
+
+helpstring_nonunique_options = [
+    """\
+  -v --verbose                 """,
+]
+
+helpstring = '\n\n'.join([helpstring_header, '\n\n'.join(helpstring_unique_options), '\n\n'.join(helpstring_nonunique_options)])
+docopt_helpstring = '\n\n'.join(helpstring_unique_options)
+
+
+def dispatch(args):
+    if args['--verbose']:
+        log_level = 'debug'
+    else:
+        log_level = 'info'
+
+    RenumberResidues(
+        targetid=args['--target'],
+        log_level=log_level
+    )
diff --git a/ensembler/initproject.py b/ensembler/initproject.py
index 013f2ab..c11080b 100644
--- a/ensembler/initproject.py
+++ b/ensembler/initproject.py
@@ -11,6 +11,7 @@
 from Bio.SeqRecord import SeqRecord
 
 import ensembler
+import ensembler.version
 import ensembler.targetexplorer
 import ensembler.uniprot
 import ensembler.pdb
diff --git a/ensembler/tools/renumber_residues.py b/ensembler/tools/renumber_residues.py
index a4c9aed..b624ffe 100644
--- a/ensembler/tools/renumber_residues.py
+++ b/ensembler/tools/renumber_residues.py
@@ -19,6 +19,8 @@ def __init__(self, targetid, project_dir='.', log_level=None):
         set_loglevel(log_level)
         self.targetid = targetid
         self.models_target_dir = os.path.join(default_project_dirnames.models, self.targetid)
+        if not os.path.exists(self.models_target_dir):
+            raise Exception('Model "{}" not found'.format(self.targetid))
         self.project_dir = project_dir
         self.uniprot_mnemonic = '_'.join(self.targetid.split('_')[0:2])
         self._get_models()

From 59aa9c7d07bd3754dec41915f446699ae218b3b1 Mon Sep 17 00:00:00 2001
From: "Daniel L. Parton" <Danny.Parton@gmail.com>
Date: Mon, 3 Aug 2015 23:11:33 -0400
Subject: [PATCH 02/10] Refactored package_for_fah

---
 ensembler/cli_commands/__init__.py       |   2 +-
 ensembler/cli_commands/package_models.py |   2 +-
 ensembler/packaging.py                   | 551 ++++++++++++-----------
 ensembler/tests/integrationtest_utils.py |   4 +-
 ensembler/tests/test_packaging.py        |  47 ++
 ensembler/utils.py                       |  16 +-
 setup.py                                 |  11 +-
 7 files changed, 356 insertions(+), 277 deletions(-)
 create mode 100644 ensembler/tests/test_packaging.py

diff --git a/ensembler/cli_commands/__init__.py b/ensembler/cli_commands/__init__.py
index e121d43..54b9806 100644
--- a/ensembler/cli_commands/__init__.py
+++ b/ensembler/cli_commands/__init__.py
@@ -29,4 +29,4 @@
 from . import refine_explicit
 from . import package_models
 from . import quickmodel
-from . import renumber_residues
\ No newline at end of file
+from . import renumber_residues
diff --git a/ensembler/cli_commands/package_models.py b/ensembler/cli_commands/package_models.py
index 7e03132..1fe3c8d 100644
--- a/ensembler/cli_commands/package_models.py
+++ b/ensembler/cli_commands/package_models.py
@@ -109,5 +109,5 @@ def dispatch(args):
             template_seqid_cutoff=template_seqid_cutoff,
             nclones=n_fah_clones,
             archive=archive,
-            verbose=args['--verbose'],
+            loglevel=loglevel,
         )
\ No newline at end of file
diff --git a/ensembler/packaging.py b/ensembler/packaging.py
index 2e44c09..7d51d5a 100644
--- a/ensembler/packaging.py
+++ b/ensembler/packaging.py
@@ -1,16 +1,23 @@
 import os
 import subprocess
-import numpy as np
-import ensembler
-from ensembler.core import mpistate, logger
+from ensembler.core import mpistate, logger, default_project_dirnames
+from ensembler.core import get_targets_and_templates, select_templates_by_seqid_cutoff
+from ensembler.utils import set_loglevel, read_file_contents_gz_or_not
+from ensembler.refinement import auto_select_openmm_platform
 import simtk.unit as unit
 import simtk.openmm as openmm
 
+fah_projects_dir = os.path.join(default_project_dirnames.packaged_models, 'fah-projects')
+
 
 def package_for_fah(process_only_these_targets=None,
-                    process_only_these_templates=None, template_seqid_cutoff=None,
-                    verbose=False, nclones=1, archive=False):
-    '''Create the input files and directory structure necessary to start a Folding@Home project.
+                    process_only_these_templates=None,
+                    template_seqid_cutoff=None,
+                    nclones=1, archive=False,
+                    openmm_platform=None,
+                    loglevel=None):
+    """
+    Create the input files and directory structure necessary to start a Folding@Home project.
 
     MPI-enabled.
 
@@ -18,293 +25,293 @@ def package_for_fah(process_only_these_targets=None,
     ----------
     archive : Bool
         A .tgz compressed archive will be created for each individual RUN directory.
-    '''
-    models_dir = ensembler.core.default_project_dirnames.models
-    packaged_models_dir = ensembler.core.default_project_dirnames.packaged_models
-    projects_dir = os.path.join(packaged_models_dir, 'fah-projects')
+    """
+    set_loglevel(loglevel)
+
     if mpistate.rank == 0:
-        if not os.path.exists(projects_dir):
-            os.mkdir(projects_dir)
+        if not os.path.exists(fah_projects_dir):
+            os.mkdir(fah_projects_dir)
     mpistate.comm.Barrier()
 
-    targets, templates_resolved_seq = ensembler.core.get_targets_and_templates()
-
-    if process_only_these_templates:
-        selected_template_indices = [i for i, seq in enumerate(templates_resolved_seq) if seq.id in process_only_these_templates]
-    else:
-        selected_template_indices = range(len(templates_resolved_seq))
-
-    def generateRun(run):
-        """
-        Build Folding@Home RUN and CLONE subdirectories from (possibly compressed) OpenMM serialized XML files.
-
-        ARGUMENTS
-
-        run (int) - run index
-        """
-
-        if verbose: print("Building RUN %d" % run)
-
-        try:
-            import os, shutil
-            import gzip
-            
-            # Determine directory and pathnames.
-            rundir = os.path.join(project_dir, 'RUN%d' % run)
-            template_filename = os.path.join(rundir, 'template.txt')
-            seqid_filename = os.path.join(rundir, 'sequence-identity.txt')
-            system_filename = os.path.join(rundir, 'system.xml')
-            integrator_filename = os.path.join(rundir, 'integrator.xml')
-            protein_structure_filename = os.path.join(rundir, 'protein.pdb')
-            system_structure_filename = os.path.join(rundir, 'system.pdb')
-            final_state_filename = os.path.join(rundir, 'state%d.xml' % (nclones - 1))
-            protein_structure_gz_filename_source = os.path.join(source_dir, 'implicit-refined.pdb.gz')
-            system_structure_gz_filename_source = os.path.join(source_dir, 'explicit-refined.pdb.gz')
-
-            # Return if this directory has already been set up.
-            if os.path.exists(rundir): 
-                if os.path.exists(template_filename)\
-                        and os.path.exists(seqid_filename)\
-                        and os.path.exists(system_filename)\
-                        and os.path.exists(integrator_filename)\
-                        and os.path.exists(protein_structure_filename)\
-                        and os.path.exists(system_structure_filename)\
-                        and os.path.exists(final_state_filename):
-                    return
-            else:
-                # Construct run directory if it does not exist.
-                if not os.path.exists(rundir):
-                    os.makedirs(rundir)
-
-            # Write template information.
-            [filepath, template_name] = os.path.split(source_dir)
-            with open(template_filename, 'w') as outfile:
-                outfile.write(template_name + '\n')
-
-            # Write the protein and system structure pdbs
-            with gzip.open(protein_structure_gz_filename_source) as protein_structure_file_source:
-                with open(protein_structure_filename, 'w') as protein_structure_file:
-                    protein_structure_file.write(protein_structure_file_source.read())
-
-            with gzip.open(system_structure_gz_filename_source) as system_structure_file_source:
-                with open(system_structure_filename, 'w') as system_structure_file:
-                    system_structure_file.write(system_structure_file_source.read())
-
-            # Read system, integrator, and state.
-            def readFileContents(filename):
-                fullpath = os.path.join(source_dir, filename)
-
-                if os.path.exists(fullpath):
-                    infile = open(fullpath, 'r')
-                elif os.path.exists(fullpath+'.gz'):
-                    infile = gzip.open(fullpath+'.gz', 'r')
-                else:
-                    import ipdb; ipdb.set_trace()
-                    raise IOError('File %s not found' % filename)
-
-                contents = infile.read()
-                infile.close()
-                return contents
-
-            def writeFileContents(filepath, contents):
-                with open(filepath, 'w') as outfile:
-                    outfile.write(contents)
-
-            system = openmm.XmlSerializer.deserialize(readFileContents('explicit-system.xml'))
-            state = openmm.XmlSerializer.deserialize(readFileContents('explicit-state.xml'))
-
-            # Substitute default box vectors.
-            box_vectors = state.getPeriodicBoxVectors()
-            system.setDefaultPeriodicBoxVectors(*box_vectors)
-
-            # Write sequence identity.
-            contents = readFileContents('sequence-identity.txt')
-            writeFileContents(seqid_filename, contents)
-
-            # Integrator settings.
-            constraint_tolerance = 1.0e-5 
-            timestep = 2.0 * unit.femtoseconds
-            collision_rate = 1.0 / unit.picosecond
-            temperature = 300.0 * unit.kelvin
-
-            # Create new integrator to use.
-            integrator = openmm.LangevinIntegrator(temperature, collision_rate, timestep)
-            
-            # TODO: Make sure MonteCarloBarostat temperature matches set temperature.
-
-            # Serialize System.
-            writeFileContents(system_filename, openmm.XmlSerializer.serialize(system))
-
-            # Serialize Integrator
-            writeFileContents(integrator_filename, openmm.XmlSerializer.serialize(integrator))
-
-            # Create Context so we can randomize velocities.
-            platform = openmm.Platform.getPlatformByName('Reference')
-            context = openmm.Context(system, integrator, platform)
-            context.setPositions(state.getPositions())
-            context.setVelocities(state.getVelocities())
-            box_vectors = state.getPeriodicBoxVectors()
-            context.setPeriodicBoxVectors(*box_vectors)
-
-            # Create clones with different random initial velocities.
-            for clone_index in range(nclones):
-                state_filename = os.path.join(rundir, 'state%d.xml' % clone_index)
-                if os.path.exists(state_filename):
-                    continue
-                context.setVelocitiesToTemperature(temperature)
-                state = context.getState(getPositions=True, getVelocities=True, getForces=True, getEnergy=True, getParameters=True, enforcePeriodicBox=True)
-                writeFileContents(state_filename, openmm.XmlSerializer.serialize(state))
-
-            # Clean up.
-            del context, integrator, state, system
-
-        except Exception as e:
-            import traceback
-            print(traceback.format_exc())
-            print(str(e))
-
-        return
-
-
-    def archiveRun():
-        archive_filename = os.path.join(project_dir, 'RUN%d.tgz' % run_index)
-        run_dir = os.path.join(project_dir, 'RUN%d' % run_index)
-        subprocess.call(['tar', 'zcf', archive_filename, run_dir])
+    targets, templates_resolved_seq = get_targets_and_templates()
 
+    if not openmm_platform:
+        openmm_platform = auto_select_openmm_platform()
 
     for target in targets:
+        if process_only_these_targets and (target.id not in process_only_these_targets):
+            continue
 
-        # Process only specified targets if directed.
-        if process_only_these_targets and (target.id not in process_only_these_targets): continue
+        target_project_dir = os.path.join(fah_projects_dir, target.id)
 
-        models_target_dir = os.path.join(models_dir, target.id)
-        if mpistate.rank == 0:
-            if not os.path.exists(models_target_dir): continue
+        models_target_dir = os.path.join(default_project_dirnames.models, target.id)
+        if not os.path.exists(models_target_dir):
+            continue
 
         mpistate.comm.Barrier()
 
+        sorted_valid_templates = []
+
         if mpistate.rank == 0:
-            print("-------------------------------------------------------------------------")
-            print("Building FAH OpenMM project for target %s" % target.id)
-            print("-------------------------------------------------------------------------")
-
-        # ========
-        # Build a list of valid templates
-        # ========
-
-        # Process all templates.
-        if verbose: print("Building list of valid templates...")
-        valid_templates = list()
-
-        if template_seqid_cutoff:
-            process_only_these_templates = ensembler.core.select_templates_by_seqid_cutoff(target.id, seqid_cutoff=template_seqid_cutoff)
-            selected_template_indices = [i for i, seq in enumerate(templates_resolved_seq) if seq.id in process_only_these_templates]
-
-        ntemplates_selected = len(selected_template_indices)
-
-        for template_index in range(mpistate.rank, ntemplates_selected, mpistate.size):
-            template = templates_resolved_seq[selected_template_indices[template_index]]
-            # Check to make sure all files needed are present.
-            is_valid = True
-            filenames = ['explicit-system.xml', 'explicit-state.xml', 'explicit-integrator.xml']
-            for filename in filenames:
-                fullpath = os.path.join(models_target_dir, template.id, filename)
-                if not (os.path.exists(fullpath) or os.path.exists(fullpath+'.gz')):
-                    is_valid = False
-            # Exclude those that are not unique by clustering.
-            unique_by_clustering = os.path.exists(os.path.join(models_target_dir, template.id, 'unique_by_clustering'))
-            if not unique_by_clustering:
-                is_valid = False
-
-            # Append if valid.
-            if is_valid:
-                valid_templates.append(template)
-
-        nvalid = len(valid_templates)
-        if verbose: print("%d valid unique initial starting conditions found" % nvalid)
-
-        # ========
-        # Sort by sequence identity
-        # ========
-
-        if verbose: print("Sorting templates in order of decreasing sequence identity...")
-        sequence_identities = np.zeros([nvalid], np.float32)
-        for (template_index, template) in enumerate(valid_templates):
-            filename = os.path.join(models_target_dir, template.id, 'sequence-identity.txt')
-            with open(filename, 'r') as infile:
-                contents = infile.readline().strip()
-            sequence_identity = float(contents)
-            sequence_identities[template_index] = sequence_identity
-        sorted_indices = np.argsort(-sequence_identities)
-        valid_templates = [ valid_templates[index] for index in sorted_indices ]
-        if verbose: 
-            print("Sorted")
-            print(sequence_identities[sorted_indices])
-
-        # ========
-        # Create project directory
-        # ========
-
-        project_dir = os.path.join(projects_dir, target.id)
-        if mpistate.rank == 0:
-            if not os.path.exists(project_dir):
-                os.makedirs(project_dir)
+            logger.info('-------------------------------------------------------------------------')
+            logger.info('Building FAH OpenMM project for target {}'.format(target.id))
+            logger.info('-------------------------------------------------------------------------')
 
-        mpistate.comm.Barrier()
+            valid_templates = get_valid_templates_for_target(
+                target,
+                templates_resolved_seq,
+                process_only_these_templates,
+                template_seqid_cutoff
+            )
 
-        # ========
-        # Build runs in parallel
-        # ========
+            sorted_valid_templates = sort_valid_templates_by_seqid(
+                target,
+                valid_templates
+            )
 
-        if verbose: print("Building RUNs in parallel...")
-        for run_index in range(mpistate.rank, len(valid_templates), mpistate.size):
-            print("-------------------------------------------------------------------------")
-            print("Building RUN for template %s" % valid_templates[run_index].id)
-            print("-------------------------------------------------------------------------")
+            create_target_project_dir(target)
+
+        sorted_valid_templates = mpistate.comm.bcast(sorted_valid_templates, root=0)
+
+        logger.debug("Building RUNs in parallel...")
+
+        for run_index in range(mpistate.rank, len(sorted_valid_templates), mpistate.size):
+            logger.info('-------------------------------------------------------------------------')
+            logger.info('Building RUN for template {}'.format(sorted_valid_templates[run_index].id))
+            logger.info('-------------------------------------------------------------------------')
+
+            template = sorted_valid_templates[run_index]
+
+            source_dir = os.path.join(models_target_dir, template.id)
+            generate_fah_run(
+                target_project_dir,
+                template,
+                source_dir,
+                run_index,
+                nclones,
+                openmm_platform,
+            )
 
-            source_dir = os.path.join(models_target_dir, valid_templates[run_index].id)
-            generateRun(run_index)
             if archive:
-                archiveRun()
-
-        # TODO - get this working
-
-        # if mpistate.rank == 0:
-        #
-        #     # ========
-        #     # Metadata
-        #     # ========
-        #
-        #     import sys
-        #     import yaml
-        #     import ensembler.version
-        #     import simtk.openmm.version
-        #     datestamp = ensembler.core.get_utcnow_formatted()
-        #
-        #     meta_filepath = os.path.join(models_target_dir, 'meta.yaml')
-        #     with open(meta_filepath) as meta_file:
-        #         metadata = yaml.load(meta_file, Loader=ensembler.core.YamlLoader)
-        #
-        #     metadata['package_for_fah'] = {
-        #         'target_id': target.id,
-        #         'datestamp': datestamp,
-        #         'python_version': sys.version.split('|')[0].strip(),
-        #         'python_full_version': ensembler.core.literal_str(sys.version),
-        #         'ensembler_version': ensembler.version.short_version,
-        #         'ensembler_commit': ensembler.version.git_revision,
-        #         'biopython_version': Bio.__version__,
-        #         'openmm_version': simtk.openmm.version.short_version,
-        #         'openmm_commit': simtk.openmm.version.git_revision
-        #     }
-        #
-        #     meta_filepath = os.path.join(project_dir, 'meta.yaml')
-        #     metadata = ensembler.core.ProjectMetadata(metadata)
-        #     metadata.write(meta_filepath)
+                archive_fah_run(target, run_index)
 
     mpistate.comm.Barrier()
     if mpistate.rank == 0:
         print('Done.')
 
 
+filenames_necessary_for_fah_packaging = [
+    'unique_by_clustering',
+    'explicit-system.xml',
+    'explicit-state.xml',
+    'explicit-integrator.xml',
+]
+
+
+def get_valid_templates_for_target(target,
+                                   templates_resolved_seq,
+                                   process_only_these_templates,
+                                   template_seqid_cutoff
+                                   ):
+    logger.debug("Building list of valid templates...")
+    models_target_dir = os.path.join(default_project_dirnames.models, target.id)
+    if template_seqid_cutoff:
+        selected_templates = select_templates_by_seqid_cutoff(
+            target.id, seqid_cutoff=template_seqid_cutoff
+        )
+    elif process_only_these_templates:
+        selected_templates = [
+            seq_obj for seq_obj in templates_resolved_seq
+            if seq_obj.id in process_only_these_templates
+        ]
+    else:
+        selected_templates = templates_resolved_seq
+
+    valid_templates = []
+
+    for template in selected_templates:
+        # Check to make sure all files needed are present.
+        for filename in filenames_necessary_for_fah_packaging:
+            fullpath = os.path.join(models_target_dir, template.id, filename)
+            if not (os.path.exists(fullpath) or os.path.exists(fullpath+'.gz')):
+                continue
+        valid_templates.append(template)
+
+    logger.debug('{} valid unique initial starting conditions found'.format(len(valid_templates)))
+
+    return valid_templates
+
+
+def sort_valid_templates_by_seqid(target, valid_templates):
+    logger.debug("Sorting templates in order of decreasing sequence identity...")
+    models_target_dir = os.path.join(default_project_dirnames.models, target.id)
+
+    seqids = []
+
+    for template in valid_templates:
+        seqids.append(get_seqid_for_model(models_target_dir, template))
+
+    sorted_valid_templates_and_seqids = sorted(
+        zip(valid_templates, seqids),
+        reverse=True,
+        key=lambda x: x[1]
+    )
+
+    sorted_valid_templates = zip(*sorted_valid_templates_and_seqids)[0]
+    return sorted_valid_templates
+
+
+def get_seqid_for_model(models_target_dir, template):
+    seqid_filename = os.path.join(models_target_dir, template.id, 'sequence-identity.txt')
+    with open(seqid_filename, 'r') as infile:
+        seqid = float(infile.readline().strip())
+    return seqid
+
+
+def create_target_project_dir(target):
+    target_project_dir = os.path.join(fah_projects_dir, target.id)
+    if not os.path.exists(target_project_dir):
+        os.makedirs(target_project_dir)
+
+
+def generate_fah_run(target_project_dir,
+                     template,
+                     source_dir,
+                     run_index,
+                     nclones,
+                     openmm_platform,
+                     ):
+    """
+    Build Folding@Home RUN and CLONE subdirectories from (possibly compressed) OpenMM serialized XML files.
+
+    ARGUMENTS
+
+    run (int) - run index
+    """
+    logger.debug("Building RUN %d" % run_index)
+
+    try:
+        # Determine directory and pathnames.
+        run_dir = os.path.join(target_project_dir, 'RUN%d' % run_index)
+        run_template_id_filepath = os.path.join(run_dir, 'template.txt')
+        run_seqid_filepath = os.path.join(run_dir, 'sequence-identity.txt')
+        run_system_filepath = os.path.join(run_dir, 'system.xml')
+        run_integrator_filepath = os.path.join(run_dir, 'integrator.xml')
+        run_protein_structure_filepath = os.path.join(run_dir, 'protein.pdb')
+        run_system_structure_filepath = os.path.join(run_dir, 'system.pdb')
+        run_final_state_filepath = os.path.join(run_dir, 'state%d.xml' % (nclones - 1))
+        source_seqid_filepath = os.path.join(source_dir, 'sequence-identity.txt')
+        source_protein_structure_filepath = os.path.join(source_dir, 'implicit-refined.pdb.gz')
+        source_system_structure_filepath = os.path.join(source_dir, 'explicit-refined.pdb.gz')
+        source_openmm_system_filepath = os.path.join(source_dir, 'explicit-system.xml')
+        source_openmm_state_filepath = os.path.join(source_dir, 'explicit-state.xml')
+
+        # Return if this directory has already been set up.
+        if os.path.exists(run_dir):
+            if (
+                    os.path.exists(run_template_id_filepath)
+                    and os.path.exists(run_seqid_filepath)
+                    and os.path.exists(run_system_filepath)
+                    and os.path.exists(run_integrator_filepath)
+                    and os.path.exists(run_protein_structure_filepath)
+                    and os.path.exists(run_system_structure_filepath)
+                    and os.path.exists(run_final_state_filepath)
+                    ):
+                return
+        else:
+            # Construct run directory if it does not exist.
+            if not os.path.exists(run_dir):
+                os.makedirs(run_dir)
+
+        # Write template ID
+        with open(run_template_id_filepath, 'w') as outfile:
+            outfile.write(template.id + '\n')
+
+        # Write the protein and system structure pdbs
+
+        with open(run_protein_structure_filepath, 'w') as protein_structure_file:
+            protein_structure_file.write(
+                read_file_contents_gz_or_not(source_protein_structure_filepath)
+            )
+
+        with open(run_system_structure_filepath, 'w') as system_structure_file:
+            system_structure_file.write(
+                read_file_contents_gz_or_not(source_system_structure_filepath)
+            )
+
+        system = openmm.XmlSerializer.deserialize(
+            read_file_contents_gz_or_not(source_openmm_system_filepath)
+        )
+        state = openmm.XmlSerializer.deserialize(
+            read_file_contents_gz_or_not(source_openmm_state_filepath)
+        )
+
+        # Substitute default box vectors.
+        box_vectors = state.getPeriodicBoxVectors()
+        system.setDefaultPeriodicBoxVectors(*box_vectors)
+
+        # Write sequence identity.
+        with open(run_seqid_filepath, 'w') as run_seqid_file:
+            run_seqid_file.write(read_file_contents_gz_or_not(source_seqid_filepath))
+
+        # Integrator settings.
+        constraint_tolerance = 1.0e-5
+        timestep = 2.0 * unit.femtoseconds
+        collision_rate = 1.0 / unit.picosecond
+        temperature = 300.0 * unit.kelvin
+
+        # Create new integrator to use.
+        integrator = openmm.LangevinIntegrator(temperature, collision_rate, timestep)
+
+        # TODO: Make sure MonteCarloBarostat temperature matches set temperature.
+
+        # Serialize System.
+        with open(run_system_filepath, 'w') as run_system_file:
+            run_system_file.write(openmm.XmlSerializer.serialize(system))
+
+        # Serialize Integrator
+        with open(run_integrator_filepath, 'w') as run_integrator_file:
+            run_integrator_file.write(openmm.XmlSerializer.serialize(integrator))
+
+        # Create Context so we can randomize velocities.
+        platform = openmm.Platform.getPlatformByName(openmm_platform)
+        context = openmm.Context(system, integrator, platform)
+        context.setPositions(state.getPositions())
+        context.setVelocities(state.getVelocities())
+        box_vectors = state.getPeriodicBoxVectors()
+        context.setPeriodicBoxVectors(*box_vectors)
+
+        # Create clones with different random initial velocities.
+        for clone_index in range(nclones):
+            state_filename = os.path.join(run_dir, 'state%d.xml' % clone_index)
+            if os.path.exists(state_filename):
+                continue
+            context.setVelocitiesToTemperature(temperature)
+            state = context.getState(
+                getPositions=True,
+                getVelocities=True,
+                getForces=True,
+                getEnergy=True,
+                getParameters=True,
+                enforcePeriodicBox=True
+            )
+            with open(state_filename, 'w') as state_file:
+                state_file.write(openmm.XmlSerializer.serialize(state))
+
+    except Exception as e:
+        import traceback
+        print(traceback.format_exc())
+        print(str(e))
+
+    return
+
+
+def archive_fah_run(target, run_index):
+    project_target_dir = os.path.join(fah_projects_dir, target.id)
+    archive_filename = os.path.join(project_target_dir, 'RUN%d.tgz' % run_index)
+    run_dir = os.path.join(project_target_dir, 'RUN%d' % run_index)
+    subprocess.call(['tar', 'zcf', archive_filename, run_dir])
+
+
 def package_for_transfer(process_only_these_targets=None):
-    raise Exception('Not implemented yet.')
\ No newline at end of file
+    raise Exception('Not implemented yet.')
diff --git a/ensembler/tests/integrationtest_utils.py b/ensembler/tests/integrationtest_utils.py
index 2e93e66..2fc49b7 100644
--- a/ensembler/tests/integrationtest_utils.py
+++ b/ensembler/tests/integrationtest_utils.py
@@ -37,7 +37,9 @@ def init(self):
     def targets(self):
         self.init()
         distutils.dir_util.copy_tree(
-            get_installed_resource_filename(os.path.join('example_project', default_project_dirnames.targets)),
+            get_installed_resource_filename(
+                os.path.join('example_project', default_project_dirnames.targets)
+            ),
             os.path.join(self.project_dir, default_project_dirnames.targets)
         )
 
diff --git a/ensembler/tests/test_packaging.py b/ensembler/tests/test_packaging.py
new file mode 100644
index 0000000..1fc54c3
--- /dev/null
+++ b/ensembler/tests/test_packaging.py
@@ -0,0 +1,47 @@
+import os
+from nose.plugins.attrib import attr
+from ensembler.packaging import package_for_fah
+from ensembler.core import default_project_dirnames
+from ensembler.tests.integrationtest_utils import integrationtest_context
+
+
+@attr('unit')
+def test_package_for_fah():
+    with integrationtest_context(set_up_project_stage='refined_explicit'):
+        package_for_fah(
+            process_only_these_targets=['EGFR_HUMAN_D0'],
+            process_only_these_templates=[
+                'KC1D_HUMAN_D0_4HNF_A',
+                'KC1D_HUMAN_D0_4KB8_D'
+            ]
+        )
+        packaged_project_base_path = os.path.join(
+            default_project_dirnames.packaged_models,
+            'fah-projects',
+            'EGFR_HUMAN_D0'
+        )
+        assert os.path.exists(packaged_project_base_path)
+        assert os.path.exists(os.path.join(
+            packaged_project_base_path,
+            'RUN0'
+        ))
+        assert os.path.exists(os.path.join(
+            packaged_project_base_path,
+            'RUN1'
+        ))
+        run_filenames = [
+            'template.txt',
+            'system.pdb',
+            'protein.pdb',
+            'sequence-identity.txt',
+            'system.xml',
+            'integrator.xml',
+            'state0.xml',
+        ]
+        for run_id in range(2):
+            for run_filename in run_filenames:
+                assert os.path.exists(os.path.join(
+                    packaged_project_base_path,
+                    'RUN{}'.format(run_id),
+                    run_filename
+                ))
diff --git a/ensembler/utils.py b/ensembler/utils.py
index 226f2cb..e50fbfa 100644
--- a/ensembler/utils.py
+++ b/ensembler/utils.py
@@ -1,5 +1,6 @@
 import contextlib
 import os
+import gzip
 import logging
 import functools
 import shutil
@@ -114,4 +115,17 @@ def wrapper(*args, **kwargs):
 def set_arg_with_default(arg, default_arg):
     if arg is None:
         arg = default_arg
-    return arg
\ No newline at end of file
+    return arg
+
+
+def read_file_contents_gz_or_not(base_filepath):
+    if os.path.exists(base_filepath):
+        with open(base_filepath) as infile:
+            contents = infile.read()
+    elif os.path.exists(base_filepath+'.gz'):
+        with gzip.open(base_filepath+'.gz') as infile:
+            contents = infile.read()
+    else:
+        raise IOError('File {} not found'.format(base_filepath))
+
+    return contents
diff --git a/setup.py b/setup.py
index 333eb48..87ca3c6 100644
--- a/setup.py
+++ b/setup.py
@@ -4,17 +4,20 @@
 
 ##########################
 VERSION = "1.0.3"
-ISRELEASED = True
+ISRELEASED = False
 __version__ = VERSION
 ##########################
 
+
 def read_readme(fname):
     return open(os.path.join(os.path.dirname(__file__), fname)).read()
 
+
 ##########################
 # Function for determining current git commit
 ##########################
 
+
 def git_version():
     # Return the git revision as a string
     # copied from numpy setup.py
@@ -41,11 +44,14 @@ def _minimal_ext_cmd(cmd):
 
     return GIT_REVISION
 
+
 ##########################
 # Function for writing version.py (this will be copied to the install directory)
 ##########################
 
 ensembler_version_filepath = 'ensembler/version.py'
+
+
 def write_version_py(filename=ensembler_version_filepath):
     cnt = """# THIS FILE IS GENERATED FROM ENSEMBLER SETUP.PY
 short_version = '%(version)s'
@@ -77,10 +83,12 @@ def write_version_py(filename=ensembler_version_filepath):
     finally:
         a.close()
 
+
 ##########################
 # Find package data
 ##########################
 
+
 def find_package_data():
     package_data = []
     basepath = os.path.join('ensembler', 'tests')
@@ -96,6 +104,7 @@ def find_package_data():
                     package_data.append(filepath)
     return package_data
 
+
 ##########################
 # Setup
 ##########################

From 3c41111d3f570b86de4200b056595dcb27856e7d Mon Sep 17 00:00:00 2001
From: "Daniel L. Parton" <Danny.Parton@gmail.com>
Date: Mon, 3 Aug 2015 23:17:11 -0400
Subject: [PATCH 03/10] Refactored package_for_fah

---
 ensembler/packaging.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ensembler/packaging.py b/ensembler/packaging.py
index 7d51d5a..f4c3eb8 100644
--- a/ensembler/packaging.py
+++ b/ensembler/packaging.py
@@ -102,6 +102,7 @@ def package_for_fah(process_only_these_targets=None,
 
 filenames_necessary_for_fah_packaging = [
     'unique_by_clustering',
+    'sequence-identity.txt',
     'explicit-system.xml',
     'explicit-state.xml',
     'explicit-integrator.xml',

From e7cfc47140df40ab00b9022427c5fbe4fa88d5f2 Mon Sep 17 00:00:00 2001
From: "Daniel L. Parton" <Danny.Parton@gmail.com>
Date: Mon, 3 Aug 2015 23:22:29 -0400
Subject: [PATCH 04/10] Refactored package_for_fah

---
 ensembler/packaging.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/ensembler/packaging.py b/ensembler/packaging.py
index f4c3eb8..284675d 100644
--- a/ensembler/packaging.py
+++ b/ensembler/packaging.py
@@ -132,11 +132,17 @@ def get_valid_templates_for_target(target,
 
     for template in selected_templates:
         # Check to make sure all files needed are present.
+        not_valid = False
         for filename in filenames_necessary_for_fah_packaging:
             fullpath = os.path.join(models_target_dir, template.id, filename)
             if not (os.path.exists(fullpath) or os.path.exists(fullpath+'.gz')):
-                continue
-        valid_templates.append(template)
+                not_valid = True
+                break
+
+        if not_valid:
+            continue
+        else:
+            valid_templates.append(template)
 
     logger.debug('{} valid unique initial starting conditions found'.format(len(valid_templates)))
 

From f4378b9c04b1821878f6fe55f3c6ed8506f2d3cd Mon Sep 17 00:00:00 2001
From: "Daniel L. Parton" <Danny.Parton@gmail.com>
Date: Tue, 4 Aug 2015 18:11:26 -0400
Subject: [PATCH 05/10] package_for_fah now uses renumbered topologies if
 available, and also now only outputs a single system and integrator file for
 each target

---
 ensembler/cli_commands/general.py        |   2 +-
 ensembler/cli_commands/package_models.py |   6 +-
 ensembler/packaging.py                   | 153 +++++++++++++++++------
 ensembler/tests/integrationtest_utils.py |   2 +-
 ensembler/tests/test_packaging.py        |  14 ++-
 ensembler/tools/renumber_residues.py     |   4 +-
 ensembler/utils.py                       |  10 +-
 7 files changed, 140 insertions(+), 51 deletions(-)

diff --git a/ensembler/cli_commands/general.py b/ensembler/cli_commands/general.py
index 01fd8c4..cbf2922 100644
--- a/ensembler/cli_commands/general.py
+++ b/ensembler/cli_commands/general.py
@@ -36,7 +36,7 @@
       [--api_params <params>] [-v | --verbose]
   ensembler package_models [-h | --help] [--package_for <choice>] [--targets <target>]
       [--targetsfile <targetsfile>] [--templates <template>] [--templatesfile <templatesfile>]
-      [--template_seqid_cutoff <cutoff>] [--nfahclones <n>] [--archivefahproject] [-v | --verbose]
+      [--template_seqid_cutoff <cutoff>] [--nfahclones <n>] [--compressruns] [-v | --verbose]
   ensembler testrun_pipeline [-h | --help]
   ensembler quickmodel [-h | --help] [--targetid <id>] [--templateids <ids>]
       [--target_uniprot_entry_name <entry_name>] [--uniprot_domain_regex <regex>]
diff --git a/ensembler/cli_commands/package_models.py b/ensembler/cli_commands/package_models.py
index 1fe3c8d..e24285e 100644
--- a/ensembler/cli_commands/package_models.py
+++ b/ensembler/cli_commands/package_models.py
@@ -22,8 +22,8 @@
                                         to use for each model [default: 1].""",
 
     """\
-  --archivefahproject                   If packaging for Folding@Home, choose whether to compress
-                                        the results into a .tgz file.""",
+  --compressruns                        If packaging for Folding@Home, choose whether to compress
+                                        each RUN into a .tgz file.""",
 ]
 
 helpstring_nonunique_options = [
@@ -86,7 +86,7 @@ def dispatch(args):
     else:
         n_fah_clones = 1
 
-    if args['--archivefahproject']:
+    if args['--compressruns']:
         archive = True
     else:
         archive = False
diff --git a/ensembler/packaging.py b/ensembler/packaging.py
index 284675d..29b7009 100644
--- a/ensembler/packaging.py
+++ b/ensembler/packaging.py
@@ -6,6 +6,7 @@
 from ensembler.refinement import auto_select_openmm_platform
 import simtk.unit as unit
 import simtk.openmm as openmm
+import mdtraj
 
 fah_projects_dir = os.path.join(default_project_dirnames.packaged_models, 'fah-projects')
 
@@ -15,6 +16,9 @@ def package_for_fah(process_only_these_targets=None,
                     template_seqid_cutoff=None,
                     nclones=1, archive=False,
                     openmm_platform=None,
+                    timestep=2.0 * unit.femtoseconds,
+                    collision_rate=1.0 / unit.picosecond,
+                    temperature=300.0 * unit.kelvin,
                     loglevel=None):
     """
     Create the input files and directory structure necessary to start a Folding@Home project.
@@ -51,6 +55,8 @@ def package_for_fah(process_only_these_targets=None,
         mpistate.comm.Barrier()
 
         sorted_valid_templates = []
+        system = None
+        renumbered_resnums = {}
 
         if mpistate.rank == 0:
             logger.info('-------------------------------------------------------------------------')
@@ -71,7 +77,19 @@ def package_for_fah(process_only_these_targets=None,
 
             create_target_project_dir(target)
 
+            system = setup_system_and_integrator_files(
+                target,
+                sorted_valid_templates[0],
+                timestep,
+                collision_rate,
+                temperature
+            )
+
+            renumbered_resnums = get_renumbered_topol_resnums(target)
+
         sorted_valid_templates = mpistate.comm.bcast(sorted_valid_templates, root=0)
+        system = mpistate.comm.bcast(system, root=0)
+        renumbered_resnums = mpistate.comm.bcast(renumbered_resnums, root=0)
 
         logger.debug("Building RUNs in parallel...")
 
@@ -87,17 +105,22 @@ def package_for_fah(process_only_these_targets=None,
                 target_project_dir,
                 template,
                 source_dir,
+                system,
                 run_index,
                 nclones,
+                temperature,
+                collision_rate,
+                temperature,
                 openmm_platform,
+                renumbered_resnums,
             )
 
             if archive:
-                archive_fah_run(target, run_index)
+                tgz_fah_run(target, run_index)
 
     mpistate.comm.Barrier()
     if mpistate.rank == 0:
-        print('Done.')
+        logger.info('Done.')
 
 
 filenames_necessary_for_fah_packaging = [
@@ -181,12 +204,72 @@ def create_target_project_dir(target):
         os.makedirs(target_project_dir)
 
 
+def setup_system_and_integrator_files(target,
+                                      template,
+                                      timestep,
+                                      collision_rate,
+                                      temperature
+                                      ):
+    models_target_dir = os.path.join(default_project_dirnames.models, target.id)
+    template_dir = os.path.join(models_target_dir, template.id)
+    target_project_dir = os.path.join(fah_projects_dir, target.id)
+    source_system_filepath = os.path.join(template_dir, 'explicit-system.xml')
+    source_state_filepath = os.path.join(template_dir, 'explicit-state.xml')
+    dest_system_filepath = os.path.join(target_project_dir, 'system.xml')
+    dest_integrator_filepath = os.path.join(target_project_dir, 'integrator.xml')
+
+    system = openmm.XmlSerializer.deserialize(
+        read_file_contents_gz_or_not(source_system_filepath)
+    )
+    state = openmm.XmlSerializer.deserialize(
+        read_file_contents_gz_or_not(source_state_filepath)
+    )
+
+    # Substitute default box vectors.
+    box_vectors = state.getPeriodicBoxVectors()
+    system.setDefaultPeriodicBoxVectors(*box_vectors)
+
+    # Create new integrator to use.
+    integrator = openmm.LangevinIntegrator(temperature, collision_rate, timestep)
+
+    # TODO: Make sure MonteCarloBarostat temperature matches set temperature.
+
+    # Serialize System.
+    with open(dest_system_filepath, 'w') as dest_system_file:
+        dest_system_file.write(openmm.XmlSerializer.serialize(system))
+
+    # Serialize Integrator
+    with open(dest_integrator_filepath, 'w') as dest_integrator_file:
+        dest_integrator_file.write(openmm.XmlSerializer.serialize(integrator))
+
+    return system
+
+
+def get_renumbered_topol_resnums(target):
+    models_target_dir = os.path.join(default_project_dirnames.models, target.id)
+    renumbered_resnums = {}
+    for topol_type in ['implicit', 'explicit']:
+        topol_path = os.path.join(models_target_dir, 'topol-renumbered-{}.pdb'.format(topol_type))
+        if not os.path.exists(topol_path):
+            continue
+        traj = mdtraj.load_pdb(topol_path)
+        res_numbers = [resi.resSeq for resi in traj.top.residues]
+        renumbered_resnums[topol_type] = res_numbers
+        logger.info('Will use renumbered residues from {} for target {}'.format(topol_path, target.id))
+    return renumbered_resnums
+
+
 def generate_fah_run(target_project_dir,
                      template,
                      source_dir,
+                     system,
                      run_index,
                      nclones,
+                     temperature,
+                     collision_rate,
+                     timestep,
                      openmm_platform,
+                     renumbered_resnums,
                      ):
     """
     Build Folding@Home RUN and CLONE subdirectories from (possibly compressed) OpenMM serialized XML files.
@@ -202,15 +285,12 @@ def generate_fah_run(target_project_dir,
         run_dir = os.path.join(target_project_dir, 'RUN%d' % run_index)
         run_template_id_filepath = os.path.join(run_dir, 'template.txt')
         run_seqid_filepath = os.path.join(run_dir, 'sequence-identity.txt')
-        run_system_filepath = os.path.join(run_dir, 'system.xml')
-        run_integrator_filepath = os.path.join(run_dir, 'integrator.xml')
         run_protein_structure_filepath = os.path.join(run_dir, 'protein.pdb')
         run_system_structure_filepath = os.path.join(run_dir, 'system.pdb')
         run_final_state_filepath = os.path.join(run_dir, 'state%d.xml' % (nclones - 1))
         source_seqid_filepath = os.path.join(source_dir, 'sequence-identity.txt')
         source_protein_structure_filepath = os.path.join(source_dir, 'implicit-refined.pdb.gz')
         source_system_structure_filepath = os.path.join(source_dir, 'explicit-refined.pdb.gz')
-        source_openmm_system_filepath = os.path.join(source_dir, 'explicit-system.xml')
         source_openmm_state_filepath = os.path.join(source_dir, 'explicit-state.xml')
 
         # Return if this directory has already been set up.
@@ -218,8 +298,6 @@ def generate_fah_run(target_project_dir,
             if (
                     os.path.exists(run_template_id_filepath)
                     and os.path.exists(run_seqid_filepath)
-                    and os.path.exists(run_system_filepath)
-                    and os.path.exists(run_integrator_filepath)
                     and os.path.exists(run_protein_structure_filepath)
                     and os.path.exists(run_system_structure_filepath)
                     and os.path.exists(run_final_state_filepath)
@@ -235,51 +313,41 @@ def generate_fah_run(target_project_dir,
             outfile.write(template.id + '\n')
 
         # Write the protein and system structure pdbs
-
-        with open(run_protein_structure_filepath, 'w') as protein_structure_file:
-            protein_structure_file.write(
-                read_file_contents_gz_or_not(source_protein_structure_filepath)
+        if 'implicit' in renumbered_resnums:
+            write_renumbered_structure(
+                source_protein_structure_filepath,
+                run_protein_structure_filepath,
+                renumbered_resnums['implicit'],
             )
-
-        with open(run_system_structure_filepath, 'w') as system_structure_file:
-            system_structure_file.write(
-                read_file_contents_gz_or_not(source_system_structure_filepath)
+        else:
+            with open(run_protein_structure_filepath, 'w') as protein_structure_file:
+                protein_structure_file.write(
+                    read_file_contents_gz_or_not(source_protein_structure_filepath)
+                )
+
+        if 'explicit' in renumbered_resnums:
+            write_renumbered_structure(
+                source_system_structure_filepath,
+                run_system_structure_filepath,
+                renumbered_resnums['explicit'],
             )
+        else:
+            with open(run_system_structure_filepath, 'w') as system_structure_file:
+                system_structure_file.write(
+                    read_file_contents_gz_or_not(source_system_structure_filepath)
+                )
 
-        system = openmm.XmlSerializer.deserialize(
-            read_file_contents_gz_or_not(source_openmm_system_filepath)
-        )
         state = openmm.XmlSerializer.deserialize(
             read_file_contents_gz_or_not(source_openmm_state_filepath)
         )
 
-        # Substitute default box vectors.
-        box_vectors = state.getPeriodicBoxVectors()
-        system.setDefaultPeriodicBoxVectors(*box_vectors)
-
         # Write sequence identity.
         with open(run_seqid_filepath, 'w') as run_seqid_file:
             run_seqid_file.write(read_file_contents_gz_or_not(source_seqid_filepath))
 
-        # Integrator settings.
-        constraint_tolerance = 1.0e-5
-        timestep = 2.0 * unit.femtoseconds
-        collision_rate = 1.0 / unit.picosecond
-        temperature = 300.0 * unit.kelvin
-
         # Create new integrator to use.
         integrator = openmm.LangevinIntegrator(temperature, collision_rate, timestep)
 
-        # TODO: Make sure MonteCarloBarostat temperature matches set temperature.
-
-        # Serialize System.
-        with open(run_system_filepath, 'w') as run_system_file:
-            run_system_file.write(openmm.XmlSerializer.serialize(system))
-
-        # Serialize Integrator
-        with open(run_integrator_filepath, 'w') as run_integrator_file:
-            run_integrator_file.write(openmm.XmlSerializer.serialize(integrator))
-
         # Create Context so we can randomize velocities.
         platform = openmm.Platform.getPlatformByName(openmm_platform)
         context = openmm.Context(system, integrator, platform)
@@ -310,10 +378,15 @@ def generate_fah_run(target_project_dir,
         print(traceback.format_exc())
         print(str(e))
 
-    return
+
+def write_renumbered_structure(source_filepath, dest_filepath, renumbered_resnums):
+    traj = mdtraj.load_pdb(source_filepath)
+    for r, residue in enumerate(traj.top.residues):
+        residue.resSeq = renumbered_resnums[r]
+    traj.save_pdb(dest_filepath)
 
 
-def archive_fah_run(target, run_index):
+def tgz_fah_run(target, run_index):
     project_target_dir = os.path.join(fah_projects_dir, target.id)
     archive_filename = os.path.join(project_target_dir, 'RUN%d.tgz' % run_index)
     run_dir = os.path.join(project_target_dir, 'RUN%d' % run_index)
diff --git a/ensembler/tests/integrationtest_utils.py b/ensembler/tests/integrationtest_utils.py
index 2fc49b7..1433a38 100644
--- a/ensembler/tests/integrationtest_utils.py
+++ b/ensembler/tests/integrationtest_utils.py
@@ -25,7 +25,7 @@ def integrationtest_context(set_up_project_stage='init'):
 
 
 class SetUpSampleProject:
-    def __init__(self, project_dir):
+    def __init__(self, project_dir='.'):
         self.project_dir = project_dir
         self.targets_list = ['EGFR_HUMAN_D0', 'KC1D_HUMAN_D0']
         self.templates_list = ['KC1D_HUMAN_D0_4KB8_D', 'KC1D_HUMAN_D0_4HNF_A']
diff --git a/ensembler/tests/test_packaging.py b/ensembler/tests/test_packaging.py
index 1fc54c3..cfeb4ee 100644
--- a/ensembler/tests/test_packaging.py
+++ b/ensembler/tests/test_packaging.py
@@ -29,15 +29,25 @@ def test_package_for_fah():
             packaged_project_base_path,
             'RUN1'
         ))
+        target_filenames = [
+            'system.xml',
+            'integrator.xml',
+        ]
+
+        for target_filename in target_filenames:
+            assert os.path.exists(os.path.join(
+                packaged_project_base_path,
+                target_filename
+            ))
+
         run_filenames = [
             'template.txt',
             'system.pdb',
             'protein.pdb',
             'sequence-identity.txt',
-            'system.xml',
-            'integrator.xml',
             'state0.xml',
         ]
+
         for run_id in range(2):
             for run_filename in run_filenames:
                 assert os.path.exists(os.path.join(
diff --git a/ensembler/tools/renumber_residues.py b/ensembler/tools/renumber_residues.py
index b624ffe..d3250d3 100644
--- a/ensembler/tools/renumber_residues.py
+++ b/ensembler/tools/renumber_residues.py
@@ -1,7 +1,7 @@
 import os
 import Bio.SeqUtils
 import mdtraj
-from ensembler.core import check_project_toplevel_dir, default_project_dirnames
+from ensembler.core import check_project_toplevel_dir, default_project_dirnames, logger
 from ensembler.utils import set_loglevel
 from ensembler.uniprot import get_uniprot_xml
 
@@ -77,4 +77,4 @@ def _output_models(self):
             self.model[key].save_pdb(ofilepath)
 
     def _finish(self):
-        print('Done.')
+        logger.info('Done.')
diff --git a/ensembler/utils.py b/ensembler/utils.py
index e50fbfa..cbc4399 100644
--- a/ensembler/utils.py
+++ b/ensembler/utils.py
@@ -119,12 +119,18 @@ def set_arg_with_default(arg, default_arg):
 
 
 def read_file_contents_gz_or_not(base_filepath):
-    if os.path.exists(base_filepath):
-        with open(base_filepath) as infile:
+    """
+    gzipped file takes precedence
+    """
+    if os.path.exists(base_filepath) and len(base_filepath) > 3 and base_filepath[-3:] == '.gz':
+        with gzip.open(base_filepath) as infile:
             contents = infile.read()
     elif os.path.exists(base_filepath+'.gz'):
         with gzip.open(base_filepath+'.gz') as infile:
             contents = infile.read()
+    elif os.path.exists(base_filepath):
+        with open(base_filepath) as infile:
+            contents = infile.read()
     else:
         raise IOError('File {} not found'.format(base_filepath))
 

From 21a7fb4e9fc9bd3eb1f6dcc5de20be5509831958 Mon Sep 17 00:00:00 2001
From: "Daniel L. Parton" <Danny.Parton@gmail.com>
Date: Tue, 4 Aug 2015 18:20:01 -0400
Subject: [PATCH 06/10] package_for_fah now uses renumbered topologies if
 available, and also now only outputs a single system and integrator file for
 each target

---
 ensembler/packaging.py  | 2 +-
 ensembler/refinement.py | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/ensembler/packaging.py b/ensembler/packaging.py
index 29b7009..a620007 100644
--- a/ensembler/packaging.py
+++ b/ensembler/packaging.py
@@ -40,7 +40,7 @@ def package_for_fah(process_only_these_targets=None,
     targets, templates_resolved_seq = get_targets_and_templates()
 
     if not openmm_platform:
-        openmm_platform = auto_select_openmm_platform()
+        openmm_platform = auto_select_openmm_platform(available_platform_names=['CPU', 'Reference'])
 
     for target in targets:
         if process_only_these_targets and (target.id not in process_only_these_targets):
diff --git a/ensembler/refinement.py b/ensembler/refinement.py
index afe7179..0fdfa34 100644
--- a/ensembler/refinement.py
+++ b/ensembler/refinement.py
@@ -329,8 +329,10 @@ def simulate_implicit_md():
         print('Done.')
 
 
-def auto_select_openmm_platform():
-    for platform_name in ['CUDA', 'OpenCL', 'CPU', 'Reference']:
+def auto_select_openmm_platform(available_platform_names=None):
+    if available_platform_names is None:
+        available_platform_names = ['CUDA', 'OpenCL', 'CPU', 'Reference']
+    for platform_name in available_platform_names:
         try:
             platform = openmm.Platform.getPlatformByName(platform_name)
             if type(platform) == openmm.Platform:

From cfadb6b49622aa88bb1b4e06cb9650cbcad3dbe8 Mon Sep 17 00:00:00 2001
From: "Daniel L. Parton" <Danny.Parton@gmail.com>
Date: Tue, 4 Aug 2015 19:11:40 -0400
Subject: [PATCH 07/10] package_for_fah now uses renumbered topologies if
 available, and also now only outputs a single system and integrator file for
 each target

---
 ensembler/packaging.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ensembler/packaging.py b/ensembler/packaging.py
index a620007..877b22b 100644
--- a/ensembler/packaging.py
+++ b/ensembler/packaging.py
@@ -95,7 +95,7 @@ def package_for_fah(process_only_these_targets=None,
 
         for run_index in range(mpistate.rank, len(sorted_valid_templates), mpistate.size):
             logger.info('-------------------------------------------------------------------------')
-            logger.info('Building RUN for template {}'.format(sorted_valid_templates[run_index].id))
+            logger.info('Building RUN{} for template {}'.format(run_index, sorted_valid_templates[run_index].id))
             logger.info('-------------------------------------------------------------------------')
 
             template = sorted_valid_templates[run_index]

From c268b3e15022a00afb784adb67b6a3efe8af0356 Mon Sep 17 00:00:00 2001
From: "Daniel L. Parton" <Danny.Parton@gmail.com>
Date: Tue, 4 Aug 2015 19:13:56 -0400
Subject: [PATCH 08/10] package_for_fah now uses renumbered topologies if
 available, and also now only outputs a single system and integrator file for
 each target

---
 ensembler/packaging.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/ensembler/packaging.py b/ensembler/packaging.py
index 877b22b..36f36c5 100644
--- a/ensembler/packaging.py
+++ b/ensembler/packaging.py
@@ -94,12 +94,16 @@ def package_for_fah(process_only_these_targets=None,
         logger.debug("Building RUNs in parallel...")
 
         for run_index in range(mpistate.rank, len(sorted_valid_templates), mpistate.size):
+            template = sorted_valid_templates[run_index]
+
             logger.info('-------------------------------------------------------------------------')
-            logger.info('Building RUN{} for template {}'.format(run_index, sorted_valid_templates[run_index].id))
+            logger.info(
+                'Building RUN{} for template {}'.format(
+                    run_index, template.id
+                )
+            )
             logger.info('-------------------------------------------------------------------------')
 
-            template = sorted_valid_templates[run_index]
-
             source_dir = os.path.join(models_target_dir, template.id)
             generate_fah_run(
                 target_project_dir,

From 2118b135e41bc95a9f0e2854d09924b6a1c55c5c Mon Sep 17 00:00:00 2001
From: "Daniel L. Parton" <Danny.Parton@gmail.com>
Date: Tue, 4 Aug 2015 21:15:59 -0400
Subject: [PATCH 09/10] package_for_fah now uses renumbered topologies if
 available, and also now only outputs a single system and integrator file for
 each target

---
 ensembler/packaging.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/ensembler/packaging.py b/ensembler/packaging.py
index 36f36c5..93c11c9 100644
--- a/ensembler/packaging.py
+++ b/ensembler/packaging.py
@@ -3,7 +3,6 @@
 from ensembler.core import mpistate, logger, default_project_dirnames
 from ensembler.core import get_targets_and_templates, select_templates_by_seqid_cutoff
 from ensembler.utils import set_loglevel, read_file_contents_gz_or_not
-from ensembler.refinement import auto_select_openmm_platform
 import simtk.unit as unit
 import simtk.openmm as openmm
 import mdtraj
@@ -15,7 +14,7 @@ def package_for_fah(process_only_these_targets=None,
                     process_only_these_templates=None,
                     template_seqid_cutoff=None,
                     nclones=1, archive=False,
-                    openmm_platform=None,
+                    openmm_platform='Reference',
                     timestep=2.0 * unit.femtoseconds,
                     collision_rate=1.0 / unit.picosecond,
                     temperature=300.0 * unit.kelvin,
@@ -39,9 +38,6 @@ def package_for_fah(process_only_these_targets=None,
 
     targets, templates_resolved_seq = get_targets_and_templates()
 
-    if not openmm_platform:
-        openmm_platform = auto_select_openmm_platform(available_platform_names=['CPU', 'Reference'])
-
     for target in targets:
         if process_only_these_targets and (target.id not in process_only_these_targets):
             continue

From b2d85fcc522d89f0aaf419f2a2023c48c0363222 Mon Sep 17 00:00:00 2001
From: "Daniel L. Parton" <Danny.Parton@gmail.com>
Date: Wed, 5 Aug 2015 16:21:02 -0400
Subject: [PATCH 10/10] Version 1.0.3 release

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 87ca3c6..d7b8589 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@
 
 ##########################
 VERSION = "1.0.3"
-ISRELEASED = False
+ISRELEASED = True
 __version__ = VERSION
 ##########################