sya.py (8334B)
1 #!/usr/bin/env python3 2 3 # std 4 import argparse 5 import subprocess 6 import re 7 import os 8 import sys 9 10 Version = 'v1.3.0' 11 12 Shell = True if sys.platform == 'win32' else False 13 14 UnsafeFilenameChars = re.compile('[/\\?%*:|\"<>\x7F\x00-\x1F]') 15 TrackNum = re.compile(r'(?:\d+.? ?-? ?)') 16 Timestamp = re.compile(r'(?: - )?(?:[\t ]+)?(?:[\[\(]+)?((\d+[:.])+(\d+))(?:[\]\)])?(?:[\t ]+)?(?: - )?') 17 18 class TracklistItem: 19 def __init__(self, timestamp, title): 20 self.timestamp = timestamp 21 self.title = title 22 23 24 # utilities 25 def error_exit(msg): 26 print('exit failure "{}"'.format(msg)) 27 sys.exit() 28 29 def check_bin(*binaries): 30 for b in binaries: 31 try: 32 subprocess.call([b], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL, shell=Shell) 33 except: 34 error_exit('failed to execute {}'.format(b)) 35 36 # functions 37 def get_audio(youtubedl, url, outdir, format='mp3', quality='320K', keep=True, ffmpeg=''): 38 print('Downloading {} ({}, {})...'.format(url, format, quality)) 39 fname = '{}/{}'.format(outdir, os.path.basename(outdir), format) 40 cmd = [youtubedl, '--newline', '--extract-audio', '--audio-format', format, 41 '--audio-quality', quality, '-o', fname + '.%(ext)s'] 42 if ffmpeg != '': 43 cmd.append('--ffmpeg-location') 44 cmd.append(ffmpeg) 45 if keep == True: 46 cmd.append('-k') 47 cmd.append(url) 48 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=Shell) 49 for line in p.stdout.readlines(): 50 print(' {}'.format(line.decode('utf-8', errors='ignore').strip())) 51 return '{}.{}'.format(fname, format) 52 53 def load_tracklist(path): 54 tracklist = [] 55 url = '' 56 tracklist_file = open(path, mode = 'r', encoding='utf-8', errors='ignore') 57 for i, t in enumerate(tracklist_file.readlines()): 58 t = t.strip('\n\t ') 59 if i == 0: 60 url = t 61 else: 62 tracklist.append(t) 63 tracklist_file.close() 64 return url, tracklist 65 66 def parse_tracks(tracklist): 67 ''' 68 This function parses each line of tracklist and 69 matches it against the `Timestamp` regex. 70 There is a heuristic check against timestamps on 71 the right and left of lines (some track names may 72 have timestamp-looking elements). 73 ''' 74 tracks = [] 75 weightR = 0 # num. timestamps on right-side 76 weightL = 0 # num. timestamps on left-side 77 for lcount, line in enumerate(tracklist): 78 if len(line.strip(' ')) == 0: 79 continue 80 sline = line.split(' ') 81 82 timestamp = None 83 for i, l in enumerate(sline): 84 if i != 0 and i != len(sline)-1: 85 continue 86 elif Timestamp.match(l): 87 if timestamp == None or weightR > weightL: 88 timestamp = l.strip(' \t[()]') 89 if i == 0: 90 weightL += 1 91 else: 92 weightR += 1 93 sline.remove(l) 94 if timestamp == None: 95 print('line {}, missing timestamp: "{}"'.format(lcount, line)) 96 elif timestamp[0] != '0' and timestamp[1] == ':': 97 timestamp = '0' + timestamp 98 99 line = ' '.join(sline) 100 line = re.sub(TrackNum, '', line) 101 title = re.sub(UnsafeFilenameChars, '', line) 102 103 tracks.append(TracklistItem(timestamp, title)) 104 return tracks 105 106 def missing_times(tracks): 107 missing = [] 108 for i, t in enumerate(tracks): 109 if t.timestamp == None: 110 missing.append(i) 111 return missing 112 113 def read_tracklen(ffmpeg, track_fpath): 114 cmd = [ffmpeg, '-v', 'quiet', '-stats', '-i', track_fpath, '-f', 'null', '-'] 115 length = '00:00' 116 try: 117 ret = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=Shell) 118 length = str(ret).split('\\r') 119 # some nasty string manip. to extract length (printed to stderr) 120 if sys.platform == 'win32': 121 length = length[len(length)-2].split(' ')[1].split('=')[1][:-3] 122 else: 123 length = length[len(length)-1].split(' ')[1].split('=')[1][:-3] 124 print('Track length: {}'.format(length)) 125 except: 126 error_exit('Failed to find track length, aborting.') 127 return length 128 129 def split_tracks(ffmpeg, audio_fpath, audio_len, tracks, format='mp3', outpath='out'): 130 print('Splitting...') 131 for i, t in enumerate(tracks): 132 outfile = '{}{}{} - {}.{}'.format(outpath, os.path.sep, str(i+1).zfill(2), t.title.strip(' - '), format) 133 end = audio_len 134 if i < len(tracks)-1: 135 end = tracks[i+1].timestamp 136 print(' {} ({} - {})'.format(outfile, t.timestamp, end)) 137 cmd = ['ffmpeg', '-nostdin', '-y', '-loglevel', 'error', 138 '-i', audio_fpath, '-ss', t.timestamp, '-to', end, 139 '-acodec', 'copy', outfile] 140 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=Shell) 141 for line in p.stdout.readlines(): 142 print(' {}'.format(line.decode('utf-8', errors='ignore').strip())) 143 return 144 145 # runtime 146 def parse_args(): 147 parser = argparse.ArgumentParser( 148 description='download & split audio tracks long youtube videos') 149 # arguments 150 parser.add_argument('tracklist', metavar='TRACKLIST', nargs='*', 151 help='tracklist of title and timestamp information to split audio by') 152 # options 153 parser.add_argument('-v', '--version', 154 action='store_true', default=False, dest='vers', 155 help='Print version information') 156 parser.add_argument('-o', '--output', 157 metavar='PATH', type=str, nargs='?', dest='output', 158 help='specify the directory to write output files to (default: ./out)') 159 parser.add_argument('-f', '--format', 160 type=str, nargs='?', default='mp3', dest='format', 161 help='specify the --audio-format argument to pass to yt-dlp (default: mp3)') 162 parser.add_argument('-q', '--quality', 163 type=str, nargs='?', default='320K', dest='quality', 164 help='specify the --audio-quality argument to pass to yt-dlp (default: 320K)') 165 parser.add_argument('--yt-dlp', 166 metavar='PATH', type=str, nargs='?', dest='youtubedl', 167 help='path of the "yt-dlp" binary to use') 168 parser.add_argument('--ffmpeg', 169 metavar='PATH', type=str, nargs='?', dest='ffmpeg', 170 help='path of the "ffmpeg" binary to use') 171 parser.add_argument('-k', '--keep', 172 action='store_true', default=False, dest='keep', 173 help='keep any files removed during processing (full video/audio file)') 174 return parser.parse_args() 175 176 def sya(args): 177 if args.vers == True: 178 print(Version) 179 return 180 181 if args.youtubedl == None: 182 args.youtubedl = 'yt-dlp.exe' if sys.platform == 'win32' else 'yt-dlp' 183 if args.ffmpeg == None: 184 args.ffmpeg = 'ffmpeg.exe' if sys.platform == 'win32' else 'ffmpeg' 185 186 if check_bin(args.youtubedl, args.ffmpeg) == False: 187 error_exit('required binaries are missing') 188 if args.output != None and len(args.output) > 0 and args.output[len(args.output)-1] == os.sep: 189 args.output = args.output[:-1] 190 191 for t in args.tracklist: 192 if args.tracklist == None or os.path.exists(t) == False: 193 error_exit('missing tracklist "{}"'.format(t)) 194 url, tracklist = load_tracklist(t) 195 196 output = args.output if args.output != None else os.path.splitext(t)[0] 197 if args.ffmpeg == 'ffmpeg' or args.ffmpeg == 'ffmpeg.exe': 198 audio_fpath = get_audio(args.youtubedl, url, output, args.format, args.quality, args.keep, '') 199 else: 200 audio_fpath = get_audio(args.youtubedl, url, output, args.format, args.quality, args.keep, args.ffmpeg) 201 if os.path.exists(audio_fpath) == False: 202 error_exit('download failed, aborting') 203 204 205 tracks = parse_tracks(tracklist) 206 207 missing = missing_times(tracks) 208 if len(missing) > 0: 209 error_exit('some tracks are missing timestamps') 210 211 length = read_tracklen(args.ffmpeg, audio_fpath) 212 os.makedirs(output, exist_ok=True) 213 split_tracks(args.ffmpeg, audio_fpath, length, tracks, args.format, output) 214 215 if args.keep is False: 216 os.remove(audio_fpath) 217 218 print('Success') 219 220 if __name__ == '__main__': 221 sya(parse_args()) 222