commit d78c56c9406479fd020a424117b650352ee2eade
parent c3bb455d0a00ece5dd6b7dcb788a3ccef5d2e342
Author: gearsix <gearsix@tuta.io>
Date: Tue, 4 May 2021 17:22:38 +0100
session 2: need to start making these commits more incrementally...
- finished get_audio arg handling
- added '-o', '--output' argument
- improved arg handling through functions
- added check to see if timestamps are missing
- added TODO for find_times where timestamps are missing
- start split_tracks
Diffstat:
M | sya.py | | | 91 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------- |
1 file changed, 72 insertions(+), 19 deletions(-)
diff --git a/sya.py b/sya.py
@@ -2,19 +2,25 @@
import argparse
import subprocess
+import re
+import os
+
+Timestamp = re.compile(':?\d\d')
class TracklistItem:
def __init__(self, timestamp, title):
self.timestamp = timestamp
self.title = title
-def parseargs():
+def parse_args():
parser = argparse.ArgumentParser(
description='download & split audio tracks long youtube videos')
# arguments
parser.add_argument('tracklist', metavar='TRACKLIST',
help='tracklist to split audio by')
# options
+ parser.add_argument('-o', '--output', metavar='PATH', type=str, nargs='?', default='out',
+ help='specify the directory to write output files to (default: ./out)')
parser.add_argument('-f', '--format', type=str, nargs='?', default='mp3',
help='specify the --audio-format argument to pass to youtube-dl (default: mp3)')
parser.add_argument('-q', '--quality', type=str, nargs='?', default='320K',
@@ -25,11 +31,11 @@ def parseargs():
parser.add_argument('--ffmpeg', metavar='PATH', type=str, nargs='?',
default='ffmpeg', dest='ffmpeg',
help='path of the "ffmpeg" binary to use')
- parser.add_argument('-k', '--keep-full', action='store_true',
- help='don\'t remove the full audio file after splitting')
+ parser.add_argument('-k', '--keep', action='store_true',
+ help='keep any files removed during processing (full video/audio file)')
return parser.parse_args()
-def checkbin(*binaries):
+def check_bin(*binaries):
for b in binaries:
try:
subprocess.call([b], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
@@ -37,24 +43,71 @@ def checkbin(*binaries):
print(b, 'failed to execute, check it exists in your $PATH.\n'
'Otherwise you can point to the binary using the relevant optional argument.')
-def getaudio(youtubedl, url, format, quality):
- subprocess.call([youtubedl, url, '-x', '--audio-format', format, '--audio-quality', quality, '-k'])
+def get_audio(youtubedl, url, format='mp3', quality='320K', keep=True, ffmpeg='ffmpeg'):
+ cmd = [youtubedl, url, '--extract-audio', '--audio-format', format,
+ '--audio-quality', quality, '-o', 'audio.%(ext)s', '--prefer-ffmpeg']
+ if keep == True:
+ cmd.append('-k')
+ if ffmpeg != 'ffmpeg':
+ cmd.append('--ffmpeg-location ', ffmpeg)
+ subprocess.call(cmd)
+ return './audio.mp3'
-def readtracks(tracklist_path):
- tracklist_file = open(tracklist_path, mode = 'r')
- tracklist_lines = tracklist.readlines()
+def load_tracklist(path):
+ tracklist = []
+ tracklist_file = open(path, mode = 'r')
+ for t in tracklist_file.readlines():
+ tracklist.append(t.strip('\n'))
tracklist_file.close()
+ return tracklist
- tracklist = []
- for line_count, line in enumerate(tracklist_lines):
- if line_count == 0:
- getaudio(args.youtubedl, line, args.format, args.quality)
- continue
+def parse_tracks(tracklist):
+ tracks = []
+ for lcount, line in enumerate(tracklist):
sline = line.split(' ', maxsplit=1)
- tracklist.append(TracklistItem(sline[0], sline[1]))
- return tracklist
+ if Timestamp.match(sline[0]):
+ tracks.append(TracklistItem(sline[0], sline[1]))
+ else: # assume there's no timestamp, whole line = track title
+ tracks.append(TracklistItem(None, line))
+ return tracks
+
+def missing_times(tracks):
+ missing = []
+ for i, t in enumerate(tracks):
+ if t.timestamp == None:
+ missing.append(i)
+ return missing
+
+def find_times(tracks, missing):
+ j = 0
+ for i, t in enumerate(tracks):
+ if i == missing[j]:
+ # TODO
+ j += 1
+ return tracks
+
+def split_tracks(ffmpeg, audio_fpath, tracks, outpath):
+ ret = subprocess.run(['ffmpeg', '-v', 'quiet', '-stats', '-i', 'audio.mp3',
+ '-f', 'null', '-'], stdout=subprocess.PIPE)
+ print(ret)
+ for i, t in enumerate(tracks):
+ end = 'END'
+ if i < len(tracks)-1:
+ end = tracks[i+1].timestamp
+ subprocess.call(['ffmpeg', '-nostdin', '-y', '-loglevel', 'error',
+ '-i', audio_fpath, '-ss', t.timestamp, '-to', end, '-acodec', 'copy',
+ '{}/{} - {}.mp3'.format(outpath, str(i).zfill(2), t.title)])
+ return
if __name__ == '__main__':
- args = parseargs()
- checkbin(args.youtubedl, args.ffmpeg)
- tracklist = readtracks(args.tracklist)
+ args = parse_args()
+ check_bin(args.youtubedl, args.ffmpeg)
+ tracklist = load_tracklist(args.tracklist)
+ audio_fpath = get_audio(args.youtubedl, tracklist[0],
+ args.format, args.quality, args.keep, args.ffmpeg)
+ tracks = parse_tracks(tracklist[1:])
+ missing = missing_times(tracks)
+ if len(missing) > 0:
+ tracks = find_times(tracks, missing)
+ os.makedirs(args.output, exist_ok=True)
+ split_tracks(args.ffmpeg, audio_fpath, tracks, args.output)