-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprocessDBtoCSV
More file actions
33 lines (26 loc) · 1.19 KB
/
processDBtoCSV
File metadata and controls
33 lines (26 loc) · 1.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from pymongo import MongoClient
import csv
# Connect to MongoDB
client = MongoClient('mongodb+srv://mbillahmim:[email protected]/')
db = client.sample_mflix
# Access the collection
javaTestDataset = db.javaTestDataset
# Open a CSV file in write mode
with open('helpersSimplified.csv', 'w', newline='') as csvfile:
# Define the CSV writer
writer = csv.DictWriter(csvfile, fieldnames=['id', 'repo', 'method', 'docstring', 'path', 'helpers'])
# Write the header (field names)
writer.writeheader()
# Iterate over documents in the collection
for item in javaTestDataset.find():
# Process each document
id = item.get('_id')
repo = item.get('repo')
method = item.get('code')
docstring_tokens = item.get("docstring_tokens")
docstring = ' '.join(docstring_tokens)
path = item.get('path')
# Filter out empty MethodBody elements from helpers list
helpers = [helper for helper in item.get('helpers', []) if helper.get('MethodBody')]
# Write data to CSV
writer.writerow({'id': id, 'repo': repo, 'method': method, 'docstring': docstring, 'path': path, 'helpers': helpers})