ScriptIt/transcriptops.py at main · sameer-ahmd/ScriptIt · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import json


def transcript_parser(transcript_path, uid, time=-10):
    result = None
    with open(transcript_path, 'r') as content:
        result = json.load(content)

    result_word_alts = result['word_alternatives']
    result_length = len(result_word_alts)
    dom_elements = ''
    transcript_text = []

    for word_obj in result_word_alts:
        word_time = word_obj['start_time']
        best_word = word_obj['alternatives'][0]['word']
        best_word = best_word.replace('<', '')
        transcript_text.append(best_word)
        html_id = f'btn-{best_word}'
        dom_elements += (f'<button id="{html_id}" '
                         f'onclick=seekVideo({word_time})>'
                         f'{best_word}</button>\n')
        if word_time == time:
            dom_elements += f'<style>#{html_id} '
            dom_elements += '{background-color:#FFDD61;}</style>\n'

    # Append entire transcript text based on word_alternatives
    result['transcript'] = ' '.join(transcript_text)
    with open(transcript_path, 'w', encoding='utf-8') as out:
        json.dump(result, out, ensure_ascii=False, indent=4)

    dom_elements += (f'\n<button class="button1" '
                     f'onclick="editTranscript()">Edit</button>')

    return dom_elements


def transcript_edit(transcript_path):
    result = None
    with open(transcript_path, 'r') as content:
        result = json.load(content)

    result_word_alts = result['word_alternatives']
    dom_elements = ''

    for word_obj in result_word_alts:
        word_time = word_obj['start_time']
        alt_words = word_obj['alternatives']
        num_alts = len(alt_words)
        dom_elements += f'\n<div id={word_time}>'

        if num_alts > 1:
            dom_elements += (f'\n<select name={word_time} '
                             f'onchange="editCustom(this.name, this.value)">'
                             f'\n')

            alt_words_set = set([alt_words[k]['word'] for k in range(num_alts)])

            for j in range(len(alt_words_set)):
                alt_word = alt_words[j]['word']
                if '<' in alt_word:
                    alt_word = alt_word.replace('<', '')
                dom_elements += (f'<option value="{alt_word}">'
                                 f'{alt_word}</option>\n')

            dom_elements += f'<option value="CUSTOM412">CUSTOM...</option>\n'
            dom_elements += f'</select>\n'

        elif num_alts == 1:
            input_word = alt_words[0]['word']
            dom_elements += (f'\n<input type="text" name="{word_time}" '
                             f'value="{input_word}" '
                             f'onchange="editCustom(this.name, this.value)">'
                             f'\n')

        dom_elements += f'</div>\n'

    dom_elements += (f'\n<button class="button1" '
                     f'onclick="exitEditTranscript()">Exit</button>')

    return dom_elements


def transcript_modify(transcript_path, timestamp, new_word):
    result = None
    with open(transcript_path, 'r') as content:
        result = json.load(content)

    result_word_alts = result['word_alternatives']

    for word_obj in result_word_alts:
        if timestamp == word_obj['start_time']:
            old_word = word_obj['alternatives'][0]['word']
            old_word_obj = {'word': f'{old_word}', 'confidence': 0.0}
            new_word_obj = {'word': f'{new_word}', 'confidence': 1}
            word_obj['alternatives'][0] = new_word_obj
            word_obj['alternatives'].append(old_word_obj)

    with open(transcript_path, 'w', encoding='utf-8') as out:
        json.dump(result, out, ensure_ascii=False, indent=4)

    print(f'Updated Transcript! New word: {new_word}')
    return True


def transcript_search(word, transcript_path, uid, search_results):
    result = None
    index = 0

    with open(transcript_path, 'r') as content:
        result = json.load(content)

    if word in result['transcript']:
        for word_obj in result['word_alternatives']:
            if word_obj['alternatives'][0]['word'] == word:
                word_time = word_obj['start_time']
                search_results.append([word, word_time, uid])
            index += 1

    return search_results