sya

split youtube audio tracks, with an optional pyqt gui
git clone git://src.gearsix.net/syasya.zip
Log | Files | Refs | Atom | README

sya.py (raw) (8334B)


   1 #!/usr/bin/env python3
   2 
   3 # std
   4 import argparse
   5 import subprocess
   6 import re
   7 import os
   8 import sys
   9 
  10 Version = 'v1.3.0'
  11 
  12 Shell = True if sys.platform == 'win32' else False
  13 
  14 UnsafeFilenameChars = re.compile('[/\\?%*:|\"<>\x7F\x00-\x1F]')
  15 TrackNum = re.compile(r'(?:\d+.? ?-? ?)')
  16 Timestamp = re.compile(r'(?: - )?(?:[\t ]+)?(?:[\[\(]+)?((\d+[:.])+(\d+))(?:[\]\)])?(?:[\t ]+)?(?: - )?')
  17 
  18 class TracklistItem:
  19     def __init__(self, timestamp, title):
  20         self.timestamp = timestamp
  21         self.title = title
  22 
  23 
  24 # utilities
  25 def error_exit(msg):
  26     print('exit failure "{}"'.format(msg))
  27     sys.exit()
  28 
  29 def check_bin(*binaries):
  30     for b in binaries:
  31         try:
  32             subprocess.call([b], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL, shell=Shell)
  33         except:
  34             error_exit('failed to execute {}'.format(b))
  35 
  36 # functions
  37 def get_audio(youtubedl, url, outdir, format='mp3', quality='320K', keep=True, ffmpeg=''):
  38     print('Downloading {} ({}, {})...'.format(url, format, quality))
  39     fname = '{}/{}'.format(outdir, os.path.basename(outdir), format)
  40     cmd = [youtubedl, '--newline', '--extract-audio', '--audio-format', format,
  41             '--audio-quality', quality, '-o', fname + '.%(ext)s']
  42     if ffmpeg != '':
  43         cmd.append('--ffmpeg-location')
  44         cmd.append(ffmpeg)
  45     if keep == True:
  46         cmd.append('-k')
  47     cmd.append(url)
  48     p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=Shell)
  49     for line in p.stdout.readlines():
  50         print('    {}'.format(line.decode('utf-8', errors='ignore').strip()))
  51     return '{}.{}'.format(fname, format)
  52 
  53 def load_tracklist(path):
  54     tracklist = []
  55     url = ''
  56     tracklist_file = open(path, mode = 'r', encoding='utf-8', errors='ignore')
  57     for i, t in enumerate(tracklist_file.readlines()):
  58         t = t.strip('\n\t ')
  59         if i == 0:
  60             url = t
  61         else:
  62             tracklist.append(t)
  63     tracklist_file.close()
  64     return url, tracklist
  65 
  66 def parse_tracks(tracklist):
  67     '''
  68     This function parses each line of tracklist and
  69     matches it against the `Timestamp` regex.
  70     There is a heuristic check against timestamps on
  71     the right and left of lines (some track names may
  72     have timestamp-looking elements).
  73     '''
  74     tracks = []
  75     weightR = 0 # num. timestamps on right-side
  76     weightL = 0 # num. timestamps on left-side
  77     for lcount, line in enumerate(tracklist):
  78         if len(line.strip(' ')) == 0:
  79             continue
  80         sline = line.split(' ')
  81         
  82         timestamp = None
  83         for i, l in enumerate(sline):
  84             if i != 0 and i != len(sline)-1:
  85                 continue
  86             elif Timestamp.match(l):
  87                 if timestamp == None or weightR > weightL:
  88                     timestamp = l.strip(' \t[()]')
  89                 if i == 0:
  90                     weightL += 1
  91                 else:
  92                     weightR += 1
  93                 sline.remove(l)
  94         if timestamp == None:
  95             print('line {}, missing timestamp: "{}"'.format(lcount, line))
  96         elif timestamp[0] != '0' and timestamp[1] == ':':
  97             timestamp = '0' + timestamp
  98         
  99         line = ' '.join(sline)
 100         line = re.sub(TrackNum, '', line)
 101         title = re.sub(UnsafeFilenameChars, '', line)
 102         
 103         tracks.append(TracklistItem(timestamp, title))
 104     return tracks
 105 
 106 def missing_times(tracks):
 107     missing = []
 108     for i, t in enumerate(tracks):
 109         if t.timestamp == None:
 110             missing.append(i)
 111     return missing
 112 
 113 def read_tracklen(ffmpeg, track_fpath):
 114     cmd = [ffmpeg, '-v', 'quiet', '-stats', '-i', track_fpath, '-f', 'null', '-']
 115     length = '00:00'
 116     try:
 117         ret = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=Shell)
 118         length = str(ret).split('\\r')
 119         # some nasty string manip. to extract length (printed to stderr)
 120         if sys.platform == 'win32':
 121             length = length[len(length)-2].split(' ')[1].split('=')[1][:-3]
 122         else:
 123             length = length[len(length)-1].split(' ')[1].split('=')[1][:-3]
 124         print('Track length: {}'.format(length))
 125     except:
 126         error_exit('Failed to find track length, aborting.')
 127     return length
 128 
 129 def split_tracks(ffmpeg, audio_fpath, audio_len, tracks, format='mp3', outpath='out'):    
 130     print('Splitting...')
 131     for i, t in enumerate(tracks):
 132         outfile = '{}{}{} - {}.{}'.format(outpath, os.path.sep, str(i+1).zfill(2), t.title.strip(' - '), format)
 133         end = audio_len
 134         if i < len(tracks)-1:
 135             end = tracks[i+1].timestamp
 136         print('     {} ({} - {})'.format(outfile, t.timestamp, end))
 137         cmd = ['ffmpeg', '-nostdin', '-y', '-loglevel', 'error', 
 138             '-i', audio_fpath, '-ss', t.timestamp, '-to', end,
 139             '-acodec', 'copy', outfile]
 140         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=Shell)
 141         for line in p.stdout.readlines():
 142             print('    {}'.format(line.decode('utf-8', errors='ignore').strip()))
 143     return
 144 
 145 # runtime
 146 def parse_args():
 147     parser = argparse.ArgumentParser(
 148         description='download & split audio tracks long youtube videos')
 149     # arguments
 150     parser.add_argument('tracklist', metavar='TRACKLIST', nargs='*',
 151         help='tracklist of title and timestamp information to split audio by')
 152     # options
 153     parser.add_argument('-v', '--version',
 154         action='store_true', default=False, dest='vers',
 155         help='Print version information')
 156     parser.add_argument('-o', '--output',
 157         metavar='PATH', type=str, nargs='?', dest='output',
 158         help='specify the directory to write output files to (default: ./out)')
 159     parser.add_argument('-f', '--format',
 160         type=str, nargs='?', default='mp3', dest='format',
 161         help='specify the --audio-format argument to pass to yt-dlp (default: mp3)')
 162     parser.add_argument('-q', '--quality',
 163         type=str, nargs='?', default='320K', dest='quality',
 164         help='specify the --audio-quality argument to pass to yt-dlp (default: 320K)')
 165     parser.add_argument('--yt-dlp',
 166         metavar='PATH', type=str, nargs='?', dest='youtubedl',
 167         help='path of the "yt-dlp" binary to use')
 168     parser.add_argument('--ffmpeg',
 169         metavar='PATH', type=str, nargs='?', dest='ffmpeg',
 170         help='path of the "ffmpeg" binary to use')
 171     parser.add_argument('-k', '--keep',
 172         action='store_true', default=False, dest='keep',
 173         help='keep any files removed during processing (full video/audio file)')
 174     return parser.parse_args()
 175 
 176 def sya(args):
 177     if args.vers == True:
 178         print(Version)
 179         return
 180         
 181     if args.youtubedl == None:
 182         args.youtubedl = 'yt-dlp.exe' if sys.platform == 'win32' else 'yt-dlp'
 183     if args.ffmpeg == None:
 184         args.ffmpeg = 'ffmpeg.exe' if sys.platform == 'win32' else 'ffmpeg'
 185 
 186     if check_bin(args.youtubedl, args.ffmpeg) == False:
 187         error_exit('required binaries are missing')
 188     if args.output != None and len(args.output) > 0 and args.output[len(args.output)-1] == os.sep:
 189         args.output = args.output[:-1]
 190 
 191     for t in args.tracklist:
 192         if args.tracklist == None or os.path.exists(t) == False:
 193             error_exit('missing tracklist "{}"'.format(t))
 194         url, tracklist = load_tracklist(t)
 195 
 196         output = args.output if args.output != None else os.path.splitext(t)[0]
 197         if args.ffmpeg == 'ffmpeg' or args.ffmpeg == 'ffmpeg.exe':
 198             audio_fpath = get_audio(args.youtubedl, url, output, args.format, args.quality, args.keep, '')
 199         else:
 200             audio_fpath = get_audio(args.youtubedl, url, output, args.format, args.quality, args.keep, args.ffmpeg)
 201         if os.path.exists(audio_fpath) == False:
 202             error_exit('download failed, aborting')
 203 
 204     
 205         tracks = parse_tracks(tracklist)
 206         
 207         missing = missing_times(tracks)
 208         if len(missing) > 0:
 209             error_exit('some tracks are missing timestamps')
 210 
 211         length = read_tracklen(args.ffmpeg, audio_fpath)
 212         os.makedirs(output, exist_ok=True)
 213         split_tracks(args.ffmpeg, audio_fpath, length, tracks, args.format, output)
 214 
 215         if args.keep is False:
 216             os.remove(audio_fpath)
 217 
 218         print('Success')
 219 
 220 if __name__ == '__main__':
 221     sya(parse_args())
 222