-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchain.py
74 lines (62 loc) · 2.49 KB
/
chain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import random
import json
import os
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from nltk.tokenize import RegexpTokenizer
tokenizer = RegexpTokenizer(r'\w+')
def preprocess(text):
return " ".join(tokenizer.tokenize(text)).lower()
def GetPairs(text, phraseLen):
if type(text) != type([]) and type(text) != type(""):
raise ValueError(f"{text.__class__.__name__} type doesn't supported".capitalize())
if type(text) == type(""):
arrayWords = text.split()
if type(text) == type([]):
arrayWords = text.copy()
arrayWords += ["*END*"] * phraseLen
arrayPairs = []
for indexOfFirst in range(len(arrayWords) - phraseLen * 2 + 1):
arrayPairs.append([" ".join(arrayWords[indexOfFirst:indexOfFirst + phraseLen]),
" ".join(arrayWords[indexOfFirst + phraseLen:indexOfFirst + phraseLen + phraseLen])])
return arrayPairs
def GenChains(text, phraseLen):
chainsRaw = GetPairs(text, phraseLen)
chains = {}
for chainRaw in chainsRaw:
if chainRaw[0] in chains:
if not chainRaw[1] in chains[chainRaw[0]]:
chains[chainRaw[0]].append(chainRaw[1])
else:
chains[chainRaw[0]] = []
chains[chainRaw[0]].append(chainRaw[1])
return chains
def GenText(chains, limit = -1):
counter = 0
text = ""
currentWord = random.choice(list(chains.keys()))
while True:
currentWord = chains[currentWord][random.randint(0, len(chains[currentWord]) - 1)]
if "*END*" in currentWord or (counter >= limit and limit > 0):
text = text[:-1]
break
text += currentWord + " "
counter += 1
return text
def SaveChains(artist, chainName, chains):
with open(f'chains/{artist}_{chainName}.txt', 'w', encoding="utf8") as outfile:
json.dump(chains, outfile)
def LoadChains(artist, chainName):
if os.path.isfile('filename.txt'):
with open(f'chains/{artist}_{chainName}.txt', 'r', encoding="utf8") as outfile:
return json.load(outfile)
else:
SaveChains(artist, chainName, chains)
with open(f'chains/{artist}_{chainName}.txt', 'r', encoding="utf8") as outfile:
return json.load(outfile)
def SendPunch():
return GenText(LoadChains(artist, chainName), textLen)
artist = "Oxxxymiron"
chainName = "general"
textLen = 64
InputText = open("battle-mc/Oxxxymiron.txt", "r", encoding="utf8").read()
chains = GenChains(f"{preprocess(InputText)} *END*", 1)