-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.yaml
203 lines (203 loc) · 6.09 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# Configuration file to set arguments for GCsnap.
# To change argument, change: value: entry.
# E.g. value: 1 to value: 2
# ---------------------------------------
#
out-label:
value: default
type: str
help: Name of output directory. If default, name of the input file.
tmp-mmseqs-folder:
value: None
type: str
help: The temporary folder to store mmseqs files. May be changed so that intermediary
mmseqs files are saved somewhere else then the automatic 'out-label' directory.
assemblies-data-folder:
value: None
type: str
help: Folder to store the downloaded assemblies. If not set, the assemblies will
be stored in the data directory in the GCsnap repository folder.
assemblies-data-update-age:
value: 14
type: int
help: Age of the assembly files in days until they are downlaoded again.
collect-only:
value: false
type: bool
help: Boolean statement to make GCsnap collect genomic contexts only, without
comparing them.
n-cpu:
value: 4
type: int
help: Number of cores to use.
clans-patterns:
value: None
type: str
help: Patterns to identify the clusters to analyse. They will be used to select
the individual clusters in the clans map to analyse.
nargs: +
clans-file:
value: None
type: str
help: Used only for advanced interactive output representation (Clans file if
the input is a clans file and -operon_cluster_advanced is set to True).
ncbi-user-email:
value: None
type: str
help: Email address of the user. May be required to access NCBI databases and
is not used for anything else.
ncbi-api-key:
value: None
type: str
help: The key for NCBI API, which allows for up to 10 queries per second to NCBI
databases. Shall be obtained after obtaining an NCBI account.
n-flanking5:
value: 4
type: int
help: Number of flanking sequences to take on 5' end.
n-flanking3:
value: 4
type: int
help: Number of flanking sequences to take on 3' end.
exclude-partial:
value: true
type: bool
help: Exclude partial operon/genomic_context blocks. If turned off, partial cases
will still be ignored to get the most common genomic features.
max-evalue:
value: 0.001
type: float
help: Max e-value at which two sequences are considered to be homologous. Required
to define protein families.
default-base:
value: 10
type: int
help: Artificial distance value for two sequences that do not match with an E-value
better than --max-evalue.
min-coverage:
value: 0.7
type: float
help: Minimum coverage of target and subject a match needs to be so that two sequences
are considered to be homologous. Required to define protein families.
num-iterations:
value: 1
type: int
help: Number of iterations for all-against-all searches. Required to define protein
families.
mmseqs-executable-path:
value: None
type: str
help: Path of MMseqs executable (i.e., mmseqs.bat) if not installed in Conda environment.
get-pdb:
value: true
type: bool
help: Get PDB information for representatives of the families found.
get-functional-annotations:
value: true
type: bool
help: Find functional annotations for representatives of the families found.
operon-cluster-advanced:
value: false
type: bool
help: Boolean statement to use the operon clustering advanced mode using PacMAP.
max-family-freq:
value: 20
type: int
help: Maximum frequency of a family in the set of genomic contexts found to be
considered for advanced operon clustering.
min-family-freq:
value: 2
type: int
help: Minimum frequency of a family in the set of genomic contexts found to be
considered for advanced operon clustering.
min-family-freq-accross-contexts:
value: 30
type: int
help: Minimum frequency of a family in a conserved genomic context type to be
considered as a member.
n-max-operons:
value: 30
type: int
help: Maximum number of top most populated operon/genomic_context block types.
get-taxonomy:
value: true
type: bool
help: Boolean statement to get and map taxonomy information.
annotate-TM:
value: false
type: bool
help: Boolean statement to find sequence features in the flanking genes.
annotation-TM-mode:
value: uniprot
type: str
help: Method to use to find transmembrane segments.
choices:
- phobius
- tmhmm
- uniprot
annotation-TM-file:
value: None
type: str
help: File with pre-computed transmembrane features. Only use when the targets
correspond to a single project (no multiple fasta or text files).
interactive:
value: true
type: bool
help: Boolean statement to make the interactive html output.
genomic-context-cmap:
value: Spectral
type: str
help: Color map (as of matplotlib) to assign colors to and plot the syntenic blocks.
gc-legend-mode:
value: species
type: str
help: Mode of the genomic context legend.
choices:
- species
- ncbi_code
out-format:
value: png
type: str
help: Output format of the core figures.
choices:
- png
- svg
- pdf
min-coocc:
value: 0.3
type: float
help: Minimum maximum co-occurrence of two genes to be connected in the graphs.
in-tree:
value: None
type: str
help: Input phylogenetic tree. Only use when the targets correspond to a single
project (no multiple fasta or text files).
in-tree-format:
value: newick
type: str
help: Format of the input phylogenetic tree.
choices:
- newick
- nexus
- phyloxml
- phyloxml-strict
- phyloxml-extended
- phyloxml-complete
sort-mode:
value: taxonomy
type: str
help: Mode to sort the genomic contexts.
choices:
- taxonomy
- as_input
- tree
- operon
- operon cluster
overwrite-config:
value: false
type: bool
help: Overwrite the argument value in config file with CLI value.
timing:
value: false
type: bool
help: Measure time of all GCsnap steps and export it as csv file.