Skip to content

Commit

Permalink
codebase cleaned.
Browse files Browse the repository at this point in the history
  • Loading branch information
sarthakchittawar committed Nov 6, 2023
1 parent e643833 commit c181684
Show file tree
Hide file tree
Showing 27 changed files with 41 additions and 492,327 deletions.
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
## Instructions for Setup

1. `cd mashqa_data`
2. `python format.py`
3. `cd ..; python dbGen.py`

## Instructions to run
1. `cd App; npx expo start`
2. `python backend.py`

## Future work
1. Dockerisation of the project to allow it to run from any IP address without having to do many changes.
15 changes: 2 additions & 13 deletions dbGen.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,7 @@
from langchain.document_loaders import JSONLoader,TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# med_loader = JSONLoader(
# jq_schema='.[]',
# file_path='qna/qna_train_webmd_squad_v2_full.json',
# text_content=False
# )

med_loader=TextLoader('sentences_train_webmd_squad_v2_full.txt')
med_loader=TextLoader('mashqa_data/sentences.txt')

med_data = med_loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 50)
Expand All @@ -25,10 +19,5 @@
encode_kwargs=encode_kwargs
)

vectorstore = Chroma.from_documents(documents=all_splits, embedding=hf, persist_directory='./chroma_db_train')
vectorstore = Chroma.from_documents(documents=all_splits, embedding=hf, persist_directory='./med_db')
vectorstore.persist()

question = 'What are some tips to handle bipolar disorder?'
docs = vectorstore.similarity_search(question)
print(len(docs))
print(docs)
60 changes: 0 additions & 60 deletions gpt.py

This file was deleted.

93 changes: 0 additions & 93 deletions mashqa_data/format.ipynb

This file was deleted.

26 changes: 26 additions & 0 deletions mashqa_data/format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import os
import json

# pick out some data from json file and make a new json
def make_json(path):
with open('{}'.format(path), 'r') as f:
data = json.load(f)

f = open('sentences.txt'.format(path[:-5]), 'a')
data = data['data']

for i in range(len(data)):
para = data[i]['paragraphs']
for j in range(len(para)):
sentences = para[j]['sent_list']
for k in sentences:
f.write(k)
f.write('\n')
f.write('\n\n')
f.close()

l = os.listdir('.')
for i in l:
if i.endswith('.json') and not i.startswith('new'):
make_json(i)
print('done {}'.format(i))
1 change: 0 additions & 1 deletion mashqa_data/new_test_webmd_squad_v2_consec.json

This file was deleted.

1 change: 0 additions & 1 deletion mashqa_data/new_test_webmd_squad_v2_full.json

This file was deleted.

1 change: 0 additions & 1 deletion mashqa_data/new_train_webmd_squad_v2_consec.json

This file was deleted.

1 change: 0 additions & 1 deletion mashqa_data/new_train_webmd_squad_v2_full.json

This file was deleted.

1 change: 0 additions & 1 deletion mashqa_data/new_val_webmd_squad_v2_consec.json

This file was deleted.

1 change: 0 additions & 1 deletion mashqa_data/new_val_webmd_squad_v2_full.json

This file was deleted.

1 change: 0 additions & 1 deletion mashqa_data/qna_test_webmd_squad_v2_consec.json

This file was deleted.

1 change: 0 additions & 1 deletion mashqa_data/qna_test_webmd_squad_v2_full.json

This file was deleted.

1 change: 0 additions & 1 deletion mashqa_data/qna_train_webmd_squad_v2_consec.json

This file was deleted.

1 change: 0 additions & 1 deletion mashqa_data/qna_train_webmd_squad_v2_full.json

This file was deleted.

1 change: 0 additions & 1 deletion mashqa_data/qna_val_webmd_squad_v2_consec.json

This file was deleted.

1 change: 0 additions & 1 deletion mashqa_data/qna_val_webmd_squad_v2_full.json

This file was deleted.

Loading

0 comments on commit c181684

Please sign in to comment.