Skip to content
This repository was archived by the owner on Nov 17, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,5 @@ bower_components
.bower-cache
.bower-registry
.bower-tmp

cell-lines
103 changes: 103 additions & 0 deletions scripts/convert_cell_catalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import json
import os


def sort_media_data(media_list):
new_media_dict = {"videos": []}
for media in media_list:
if media["type"] == "movie":
new_media_dict["videos"].append(
{"video": media["link"], "caption": f'"{media["caption"]}"'}
)
return new_media_dict

def check_status(status):
try:
if status == "In Progress":
return "in progress"
elif status == "Yes":
return "released"
except KeyError:
return "no status found"


with open("./data/cell_line_catalog.json", "r") as f:
data = cell_line_catalog = json.load(f)

for cell_line in data:
# gene data
gene_symbols = cell_line["Main_gene_symbol"]
gene_symbol = gene_symbols[0]
protein = cell_line["Main_protein"]
gene_id = cell_line["Main_gene_id"]
gene_name = cell_line["Main_gene_name"]
isoform = cell_line["Main_isoforms"]
structure = cell_line["Main_structure"]

# print(f"{gene_symbol} {protein} {gene_id} {gene_name} {isoform} {structure}")
if len(gene_symbols) > 1:
print(f"Multiple gene symbols found for {gene_symbols}")
continue
# with open(f"./genes/{gene_symbol.lower()}.md", "w") as f:
# f.write("---\n")
# f.write("templateKey: gene-name\n")
# f.write(f"symbol: {gene_symbol}\n")
# f.write(f"name: {gene_name}\n")
# f.write(f"protein: {protein}\n")
# f.write(f"structure: {structure}\n")
# if len(isoform) > 0:
# f.write(f"isoforms:\n")
# for iso in isoform:
# gene_name = iso["gene_name"]
# gene_isoforms = iso["gene_isoforms"]
# f.write(f" - name: {gene_name}\n")
# if len(gene_isoforms) > 0:
# f.write(f" ids:\n")
# for key in gene_isoforms:
# f.write(f" - {key}\n")
# f.write("---")

# Main cell line data
directory = "./cell-lines"
if not os.path.exists(directory):
os.mkdir(directory)
cell_line_id = int(cell_line["cell_line_id"].split("-")[1])
if cell_line['clone_number']:
cell_line_name = f"AICS-{cell_line_id}-{cell_line['clone_number']}"
else:
cell_line_name = f"AICS-{cell_line_id}-in-progress"
path = f"./cell-lines/{cell_line_name}"
if not os.path.exists(path):
os.mkdir(path) # create directory
new_media_data = sort_media_data(cell_line["Main_media"])
with open(f"./cell-lines/{cell_line_name}/index.md", "w") as f:
f.write("---\n")
f.write("templateKey: cell-line\n")
f.write(f"cell_line_id: {cell_line_id}\n")
f.write(f"status: {check_status(cell_line['status'])}\n")
# handle parental line thumbnail image
if cell_line_id == 13:
f.write(f"thumbnail_image: aics-{cell_line_id}.jpg\n")
elif cell_line_id == 75:
f.write(
"thumbnail_image: 20181023_m02_001_s13_cl85_cropped_scalebar20_withinset_rgb.jpg\n"
)
f.write(f"clone_number: {cell_line['clone_number']}\n")
f.write(f"allele_count: {cell_line['alleleCount']}\n")
f.write(f"parental_line: 0\n")
f.write(f"gene: {gene_symbol}\n")
f.write("tag_location:\n")
f.write(f" - {cell_line['Main_terminal_tagged']}\n")
f.write("fluorescent_tag:\n")
f.write(f" - {cell_line['Main_fluorescent_tag']}\n")
f.write(f"order_link: {cell_line['Main_order_link']}\n")
f.write(f"cofa: {cell_line['Main_cofa']}\n")
# In progress cell lines (AICS 70 and 122) have only limited data, check the keys before accessing
f.write(f"donor_plasmid: {cell_line.get('Main_donor_plasmid', '')}\n")
f.write(f"eu_hpsc_reg: {cell_line.get('Main_eu_hpsc_reg', '')}\n")
f.write(f"images_and_videos:\n")
f.write(f" videos:\n")
for video in new_media_data["videos"]:
f.write(f" - video: {video['video']}\n")
f.write(f" caption: {video['caption']}\n")
f.write("---")