Skip to content

Feature: rich compiler prompts when coding in modern editors #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions .github/workflows/codespell.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Codespell configuration is within pyproject.toml
---
name: Codespell

on: [push]
#on:
# push:
# branches: [main]
# pull_request:
# branches: [main]

permissions:
contents: read

jobs:
codespell:
name: Check for spelling errors
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v4
- name: Codespell
uses: codespell-project/actions-codespell@v2
with:
ignore_words_list: Crate,crate,fo,tre

formatting:
permissions:
contents: read # for actions/checkout to fetch code
pull-requests: write # for marocchino/sticky-pull-request-comment to create or update PR comment
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- uses: mamba-org/setup-micromamba@v1
with:
environment-name: black
create-args: -c bioconda -c conda-forge black
cache-environment: false

- name: Check formatting
shell: bash -el {0}
run: black --check --diff .
32 changes: 32 additions & 0 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: pytest

on: [push]

jobs:
testing:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.9", "3.11", "3.13"]

steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install lib from source
shell: bash -el {0}
run: |
python -m pip install --upgrade pip pytest pandas
python -m pip install .

- name: Test local
env:
CI: true
#PYTHONTRACEMALLOC: 10
shell: bash -el {0}
run: |
python -m pytest -v --show-capture=stderr \
tests.py
6 changes: 3 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@ Records can be accessed through this list or by id. ``GenomeDiff`` is iterable a
::

>>> from genomediff import *
>>> document = GenomeDiff.read(open('MyDiff.gd', 'r', encoding='utf-8'))
>>> document = GenomeDiff.read("MyDiff.gd")
>>> document.metadata
{'GENOME_DIFF': '1.0', 'AUTHOR': ''}
{'GENOME_DIFF': ['1.0'], 'AUTHOR': ['']}
>>> document.mutations[0]
Record('SNP', 1, [191], new_seq='A', seq_id='NC_000913', snp_type='intergenic', position=12346)
>>> document.mutations[0].parent_ids
[191]
>>> document[191]
Record('RA', 191, None, tot_cov='46/42', new_base='A', insert_position=0, ref_base='G', seq_id='NC_000913', quality=252.9, position=12345)
>>> document.mutations[0].parents
>>> document.records.parents_of[0]
[Record('RA', 191, None, tot_cov='46/42', new_base='A', insert_position=0, ref_base='G', seq_id='NC_000913', quality=252.9, position=12345)]
>>> document.write(open('NewDiff.gd', 'w', encoding='utf-8'))
123 changes: 64 additions & 59 deletions src/genomediff/__init__.py
Original file line number Diff line number Diff line change
@@ -1,73 +1,78 @@
import itertools
from genomediff.parser import GenomeDiffParser
from genomediff.records import Metadata
# -*- coding: utf-8 -*-
from pathlib import Path
from typing import TextIO

from .parser import Metadata, MetadataContainer, load_json_ref_cov, parse
from .records import RecordCollection

class GenomeDiff(object):

def __init__(self):
self.metadata = {}
self.mutations = []
self.evidence = []
self.validation = []
self._index = {}
class GenomeDiff:
def __init__(
self,
metadata: MetadataContainer,
records: RecordCollection,
comments: "dict[int, str]|None" = None,
):
self._metadata = metadata
self._records = records
self._comments = comments if comments else {}

@classmethod
def read(cls, fsock):
gd = GenomeDiff()
@property
def metadata(self):
return self._metadata

@property
def records(self):
return self._records

for record in GenomeDiffParser(document=gd, fsock=fsock):
@classmethod
def read(cls, gdfile: "str|Path|TextIO"):
records = RecordCollection.new()
comments: dict[int, str] = {}
if hasattr(gdfile, "read"):
metadata = MetadataContainer(getattr(gdfile, "name", ""))
fsock: TextIO = gdfile # type: ignore[assignment]
else:
metadata = MetadataContainer(gdfile)
fsock = open(gdfile, "r")
self = cls(metadata, records, comments)
for i, record in parse(fsock):
if isinstance(record, Metadata):
gd.metadata[record.name] = record.value
metadata.set(record.name, record.value)
elif isinstance(record, str):
comments[i] = record
else:
if len(record.type) == 3:
gd.mutations.append(record)
if len(record.type) == 2:
gd.evidence.append(record)
if len(record.type) == 4:
gd.validation.append(record)
gd._index[record.id] = record
return gd

def __getitem__(self, item):
return self._index[item]
record.document = self
records.set(record)
if not hasattr(gdfile, "read"):
fsock.close()
return self

def write(self, fsock):
#Print our own version line first and ignore any in the metadata
print("#=GENOME_DIFF\t1.0", file = fsock)
for k, v in self.metadata.items():
if k != "GENOME_DIFF":
print("#={}\t{}".format(k, v), file = fsock)
for record in itertools.chain(self.mutations, self.evidence, self.validation):
print(str(record), file = fsock)
print("#=GENOME_DIFF\t1.0", file=fsock)
for l in self.metadata.lines:
if l.name != "GENOME_DIFF":
print(l, file=fsock)
for record in self.records:
print(str(record), file=fsock)

def __len__(self):
return len(self.mutations) + len(self.evidence) + len(self.validation)
@property
def cov_summary(self):
if outdir := self.metadata.output:
return load_json_ref_cov(outdir)
raise AttributeError("No output directory found in metadata")

def __iter__(self):
return itertools.chain(self.mutations, self.evidence, self.validation)
@property
def mutations(self):
return list(self.records.mutation)

def __str__(self):
return '\n'.join(["MUTATIONS:",'\n'.join([str(x) for x in self.mutations]),
"EVIDENCE:",'\n'.join([str(x) for x in self.evidence]),
"VALIDATION:",'\n'.join(self.validation)])
@property
def evidence(self):
return list(self.records.evidence)

@property
def validation(self):
return list(self.records.validation)

def remove(self,*args, mut_type=None):
'''
Remove mutations that satisfy the given conditions. Implementation of
gdtools REMOVE for genomediff objects.

Input: a variable number of conditions, e.g. 'gene_name==rrlA','frequency>=0.9'.
If mut_type is specified, only that mutation type will be removed.
Output: self.mutations is updated, with mutations satifying the conditions
having been removed.
'''
updated_mutations = []
for rec in self.mutations:
if (mut_type is None or mut_type == rec.type) and rec.satisfies(*args):
continue
else:
updated_mutations.append(rec)

self.mutations = updated_mutations
def __getitem__(self, key):
return self.records[key]
Loading