sya

split youtube audio tracks, with an optional pyqt gui
git clone git://src.gearsix.net/sya
Log | Files | Refs | Atom | README

commit d78c56c9406479fd020a424117b650352ee2eade
parent c3bb455d0a00ece5dd6b7dcb788a3ccef5d2e342
Author: gearsix <gearsix@tuta.io>
Date:   Tue,  4 May 2021 17:22:38 +0100

session 2: need to start making these commits more incrementally...

- finished get_audio arg handling
- added '-o', '--output' argument
- improved arg handling through functions
- added check to see if timestamps are missing
- added TODO for find_times where timestamps are missing
- start split_tracks

Diffstat:
Msya.py | 91++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 72 insertions(+), 19 deletions(-)

diff --git a/sya.py b/sya.py @@ -2,19 +2,25 @@ import argparse import subprocess +import re +import os + +Timestamp = re.compile(':?\d\d') class TracklistItem: def __init__(self, timestamp, title): self.timestamp = timestamp self.title = title -def parseargs(): +def parse_args(): parser = argparse.ArgumentParser( description='download & split audio tracks long youtube videos') # arguments parser.add_argument('tracklist', metavar='TRACKLIST', help='tracklist to split audio by') # options + parser.add_argument('-o', '--output', metavar='PATH', type=str, nargs='?', default='out', + help='specify the directory to write output files to (default: ./out)') parser.add_argument('-f', '--format', type=str, nargs='?', default='mp3', help='specify the --audio-format argument to pass to youtube-dl (default: mp3)') parser.add_argument('-q', '--quality', type=str, nargs='?', default='320K', @@ -25,11 +31,11 @@ def parseargs(): parser.add_argument('--ffmpeg', metavar='PATH', type=str, nargs='?', default='ffmpeg', dest='ffmpeg', help='path of the "ffmpeg" binary to use') - parser.add_argument('-k', '--keep-full', action='store_true', - help='don\'t remove the full audio file after splitting') + parser.add_argument('-k', '--keep', action='store_true', + help='keep any files removed during processing (full video/audio file)') return parser.parse_args() -def checkbin(*binaries): +def check_bin(*binaries): for b in binaries: try: subprocess.call([b], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL) @@ -37,24 +43,71 @@ def checkbin(*binaries): print(b, 'failed to execute, check it exists in your $PATH.\n' 'Otherwise you can point to the binary using the relevant optional argument.') -def getaudio(youtubedl, url, format, quality): - subprocess.call([youtubedl, url, '-x', '--audio-format', format, '--audio-quality', quality, '-k']) +def get_audio(youtubedl, url, format='mp3', quality='320K', keep=True, ffmpeg='ffmpeg'): + cmd = [youtubedl, url, '--extract-audio', '--audio-format', format, + '--audio-quality', quality, '-o', 'audio.%(ext)s', '--prefer-ffmpeg'] + if keep == True: + cmd.append('-k') + if ffmpeg != 'ffmpeg': + cmd.append('--ffmpeg-location ', ffmpeg) + subprocess.call(cmd) + return './audio.mp3' -def readtracks(tracklist_path): - tracklist_file = open(tracklist_path, mode = 'r') - tracklist_lines = tracklist.readlines() +def load_tracklist(path): + tracklist = [] + tracklist_file = open(path, mode = 'r') + for t in tracklist_file.readlines(): + tracklist.append(t.strip('\n')) tracklist_file.close() + return tracklist - tracklist = [] - for line_count, line in enumerate(tracklist_lines): - if line_count == 0: - getaudio(args.youtubedl, line, args.format, args.quality) - continue +def parse_tracks(tracklist): + tracks = [] + for lcount, line in enumerate(tracklist): sline = line.split(' ', maxsplit=1) - tracklist.append(TracklistItem(sline[0], sline[1])) - return tracklist + if Timestamp.match(sline[0]): + tracks.append(TracklistItem(sline[0], sline[1])) + else: # assume there's no timestamp, whole line = track title + tracks.append(TracklistItem(None, line)) + return tracks + +def missing_times(tracks): + missing = [] + for i, t in enumerate(tracks): + if t.timestamp == None: + missing.append(i) + return missing + +def find_times(tracks, missing): + j = 0 + for i, t in enumerate(tracks): + if i == missing[j]: + # TODO + j += 1 + return tracks + +def split_tracks(ffmpeg, audio_fpath, tracks, outpath): + ret = subprocess.run(['ffmpeg', '-v', 'quiet', '-stats', '-i', 'audio.mp3', + '-f', 'null', '-'], stdout=subprocess.PIPE) + print(ret) + for i, t in enumerate(tracks): + end = 'END' + if i < len(tracks)-1: + end = tracks[i+1].timestamp + subprocess.call(['ffmpeg', '-nostdin', '-y', '-loglevel', 'error', + '-i', audio_fpath, '-ss', t.timestamp, '-to', end, '-acodec', 'copy', + '{}/{} - {}.mp3'.format(outpath, str(i).zfill(2), t.title)]) + return if __name__ == '__main__': - args = parseargs() - checkbin(args.youtubedl, args.ffmpeg) - tracklist = readtracks(args.tracklist) + args = parse_args() + check_bin(args.youtubedl, args.ffmpeg) + tracklist = load_tracklist(args.tracklist) + audio_fpath = get_audio(args.youtubedl, tracklist[0], + args.format, args.quality, args.keep, args.ffmpeg) + tracks = parse_tracks(tracklist[1:]) + missing = missing_times(tracks) + if len(missing) > 0: + tracks = find_times(tracks, missing) + os.makedirs(args.output, exist_ok=True) + split_tracks(args.ffmpeg, audio_fpath, tracks, args.output)