34 lines
1.1 KiB
Python
34 lines
1.1 KiB
Python
import re
|
|
import math
|
|
|
|
|
|
# List of words which might lead to inaccurate translation
|
|
DICTIONARY = {
|
|
'violated': 'raped',
|
|
'semen': 'cum'
|
|
}
|
|
|
|
def replace_english_words(text: str) -> str:
|
|
return ' '.join(DICTIONARY.get(word, word) for word in text.split())
|
|
|
|
def remove_ass_statements(text: str) -> str:
|
|
# Regular expression to match everything between curly braces (non-greedy)
|
|
return re.sub(r'\{.*?\}', '', text)
|
|
|
|
def remove_new_line_statements(text: str) -> str:
|
|
return text.replace("\\N", " ")
|
|
|
|
def split_sentence(sentence, n, translated_sentence):
|
|
original_commas = sentence.count(',')
|
|
translated_commas = translated_sentence.count(',')
|
|
|
|
# Split by commas if commas align
|
|
if original_commas != 0 and original_commas == translated_commas:
|
|
return [chunk.strip() for chunk in translated_sentence.split(',')]
|
|
|
|
# Otherwise, split into n roughly equal chunks
|
|
words = translated_sentence.split()
|
|
chunk_size = math.ceil(len(words) / n)
|
|
chunks = [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
|
|
return chunks
|