sya.py (7666B)
1 #!/usr/bin/env python3 2 3 # std 4 import argparse 5 import subprocess 6 import re 7 import os 8 import sys 9 10 Version = 'v1.0.1' 11 12 Shell = True if sys.platform == 'win32' else False 13 14 UnsafeFilenameChars = re.compile('[/\\?%*:|\"<>\x7F\x00-\x1F]') 15 TrackNum = re.compile('(?:\d+.? ?-? ?)') 16 Timestamp = re.compile('(?: - )?(?:[\t ]+)?(?:[\[\(]+)?((\d+[:.])+(\d+))(?:[\]\)])?(?:[\t ]+)?(?: - )?') 17 18 class TracklistItem: 19 def __init__(self, timestamp, title): 20 self.timestamp = timestamp 21 self.title = title 22 23 24 # utilities 25 def error_exit(msg): 26 print('exit failure "{}"'.format(msg)) 27 sys.exit() 28 29 def check_bin(*binaries): 30 for b in binaries: 31 try: 32 subprocess.call([b], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL, shell=Shell) 33 except: 34 error_exit('failed to execute {}'.format(b)) 35 36 # functions 37 def get_audio(youtubedl, url, outdir, format='mp3', quality='320K', keep=True, ffmpeg=''): 38 print('Downloading {} ({}, {})...'.format(url, format, quality)) 39 fname = '{}/{}'.format(outdir, os.path.basename(outdir), format) 40 cmd = [youtubedl, '--newline', '--extract-audio', '--audio-format', format, 41 '--audio-quality', quality, '-o', fname + '.%(ext)s'] 42 if ffmpeg != '': 43 cmd.append('--ffmpeg-location') 44 cmd.append(ffmpeg) 45 if keep == True: 46 cmd.append('-k') 47 cmd.append(url) 48 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=Shell) 49 for line in p.stdout.readlines(): 50 print(' {}'.format(line.decode('utf-8', errors='ignore').strip())) 51 return '{}.{}'.format(fname, format) 52 53 def load_tracklist(path): 54 tracklist = [] 55 url = '' 56 tracklist_file = open(path, mode = 'r') 57 for i, t in enumerate(tracklist_file.readlines()): 58 t = t.strip('\n\t ') 59 if i == 0: 60 url = t 61 else: 62 tracklist.append(t) 63 tracklist_file.close() 64 return url, tracklist 65 66 def parse_tracks(tracklist): 67 tracks = [] 68 weightR = 0 # num. timestamps on right-side 69 weightL = 0 # num. timestamps on left-side 70 for lcount, line in enumerate(tracklist): 71 sline = line.split(' ') 72 73 timestamp = None 74 for i, l in enumerate(sline): 75 if i != 0 and i != len(sline)-1: 76 continue 77 elif Timestamp.match(l): 78 if timestamp == None or weightR > weightL: 79 timestamp = l.strip(' \t[()]') 80 if i == 0: 81 weightL += 1 82 else: 83 weightR += 1 84 sline.remove(l) 85 if timestamp == None: 86 print('line {}, missing timestamp: "{}"'.format(lcount, line)) 87 88 line = ' '.join(sline) 89 line = re.sub(TrackNum, '', line) 90 title = re.sub(UnsafeFilenameChars, '', line) 91 92 tracks.append(TracklistItem(timestamp, title)) 93 return tracks 94 95 def missing_times(tracks): 96 missing = [] 97 for i, t in enumerate(tracks): 98 if t.timestamp == None: 99 missing.append(i) 100 return missing 101 102 def read_tracklen(ffmpeg, track_fpath): 103 cmd = [ffmpeg, '-v', 'quiet', '-stats', '-i', track_fpath, '-f', 'null', '-'] 104 length = '00:00' 105 try: 106 ret = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=Shell) 107 length = str(ret).split('\\r') 108 # some nasty string manip. to extract length (printed to stderr) 109 if sys.platform == 'win32': 110 length = length[len(length)-2].split(' ')[1].split('=')[1][:-3] 111 else: 112 length = length[len(length)-1].split(' ')[1].split('=')[1][:-3] 113 print('Track length: {}'.format(length)) 114 except: 115 error_exit('Failed to find track length, aborting.') 116 return length 117 118 def split_tracks(ffmpeg, audio_fpath, audio_len, tracks, format='mp3', outpath='out'): 119 print('Splitting...') 120 for i, t in enumerate(tracks): 121 outfile = '{}{}{} - {}.{}'.format(outpath, os.path.sep, str(i+1).zfill(2), t.title.strip(' - '), format) 122 end = audio_len 123 if i < len(tracks)-1: 124 end = tracks[i+1].timestamp 125 print(' {} ({} - {})'.format(outfile, t.timestamp, end)) 126 cmd = ['ffmpeg', '-nostdin', '-y', '-loglevel', 'error', 127 '-i', audio_fpath, '-ss', t.timestamp, '-to', end, 128 '-acodec', 'copy', outfile] 129 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=Shell) 130 for line in p.stdout.readlines(): 131 print(' {}'.format(line.decode('utf-8', errors='ignore').strip())) 132 return 133 134 # runtime 135 def parse_args(): 136 parser = argparse.ArgumentParser( 137 description='download & split audio tracks long youtube videos') 138 # arguments 139 parser.add_argument('tracklist', metavar='TRACKLIST', nargs='*', 140 help='tracklist of title and timestamp information to split audio by') 141 # options 142 parser.add_argument('-o', '--output', 143 metavar='PATH', type=str, nargs='?', dest='output', 144 help='specify the directory to write output files to (default: ./out)') 145 parser.add_argument('-f', '--format', 146 type=str, nargs='?', default='mp3', dest='format', 147 help='specify the --audio-format argument to pass to yt-dlp (default: mp3)') 148 parser.add_argument('-q', '--quality', 149 type=str, nargs='?', default='320K', dest='quality', 150 help='specify the --audio-quality argument to pass to yt-dlp (default: 320K)') 151 parser.add_argument('--yt-dlp', 152 metavar='PATH', type=str, nargs='?', dest='youtubedl', 153 help='path of the "yt-dlp" binary to use') 154 parser.add_argument('--ffmpeg', 155 metavar='PATH', type=str, nargs='?', dest='ffmpeg', 156 help='path of the "ffmpeg" binary to use') 157 parser.add_argument('-k', '--keep', 158 action='store_true', default=False, dest='keep', 159 help='keep any files removed during processing (full video/audio file)') 160 return parser.parse_args() 161 162 def sya(args): 163 if args.youtubedl == None: 164 args.youtubedl = 'yt-dlp.exe' if sys.platform == 'win32' else 'yt-dlp' 165 if args.ffmpeg == None: 166 args.ffmpeg = 'ffmpeg.exe' if sys.platform == 'win32' else 'ffmpeg' 167 168 if check_bin(args.youtubedl, args.ffmpeg) == False: 169 error_exit('required binaries are missing') 170 if args.output != None and len(args.output) > 0 and args.output[len(args.output)-1] == os.sep: 171 args.output = args.output[:-1] 172 173 for t in args.tracklist: 174 if args.tracklist == None or os.path.exists(t) == False: 175 error_exit('missing tracklist "{}"'.format(t)) 176 url, tracklist = load_tracklist(t) 177 178 output = args.output if args.output != None else os.path.splitext(t)[0] 179 if args.ffmpeg == 'ffmpeg' or args.ffmpeg == 'ffmpeg.exe': 180 audio_fpath = get_audio(args.youtubedl, url, output, args.format, args.quality, args.keep, '') 181 else: 182 audio_fpath = get_audio(args.youtubedl, url, output, args.format, args.quality, args.keep, args.ffmpeg) 183 if os.path.exists(audio_fpath) == False: 184 error_exit('download failed, aborting') 185 186 187 tracks = parse_tracks(tracklist) 188 189 missing = missing_times(tracks) 190 if len(missing) > 0: 191 error_exit('some tracks are missing timestamps') 192 193 length = read_tracklen(args.ffmpeg, audio_fpath) 194 os.makedirs(output, exist_ok=True) 195 split_tracks(args.ffmpeg, audio_fpath, length, tracks, args.format, output) 196 197 if args.keep is False: 198 os.remove(audio_fpath) 199 200 print('Success') 201 202 if __name__ == '__main__': 203 sya(parse_args()) 204