sya

split youtube audio tracks, with an optional pyqt gui
git clone git://src.gearsix.net/sya
Log | Files | Refs | Atom | README

sya.py (7666B)


      1 #!/usr/bin/env python3
      2 
      3 # std
      4 import argparse
      5 import subprocess
      6 import re
      7 import os
      8 import sys
      9 
     10 Version = 'v1.0.1'
     11 
     12 Shell = True if sys.platform == 'win32' else False
     13 
     14 UnsafeFilenameChars = re.compile('[/\\?%*:|\"<>\x7F\x00-\x1F]')
     15 TrackNum = re.compile('(?:\d+.? ?-? ?)')
     16 Timestamp = re.compile('(?: - )?(?:[\t ]+)?(?:[\[\(]+)?((\d+[:.])+(\d+))(?:[\]\)])?(?:[\t ]+)?(?: - )?')
     17 
     18 class TracklistItem:
     19     def __init__(self, timestamp, title):
     20         self.timestamp = timestamp
     21         self.title = title
     22 
     23 
     24 # utilities
     25 def error_exit(msg):
     26     print('exit failure "{}"'.format(msg))
     27     sys.exit()
     28 
     29 def check_bin(*binaries):
     30     for b in binaries:
     31         try:
     32             subprocess.call([b], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL, shell=Shell)
     33         except:
     34             error_exit('failed to execute {}'.format(b))
     35 
     36 # functions
     37 def get_audio(youtubedl, url, outdir, format='mp3', quality='320K', keep=True, ffmpeg=''):
     38     print('Downloading {} ({}, {})...'.format(url, format, quality))
     39     fname = '{}/{}'.format(outdir, os.path.basename(outdir), format)
     40     cmd = [youtubedl, '--newline', '--extract-audio', '--audio-format', format,
     41             '--audio-quality', quality, '-o', fname + '.%(ext)s']
     42     if ffmpeg != '':
     43         cmd.append('--ffmpeg-location')
     44         cmd.append(ffmpeg)
     45     if keep == True:
     46         cmd.append('-k')
     47     cmd.append(url)
     48     p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=Shell)
     49     for line in p.stdout.readlines():
     50         print('    {}'.format(line.decode('utf-8', errors='ignore').strip()))
     51     return '{}.{}'.format(fname, format)
     52 
     53 def load_tracklist(path):
     54     tracklist = []
     55     url = ''
     56     tracklist_file = open(path, mode = 'r')
     57     for i, t in enumerate(tracklist_file.readlines()):
     58         t = t.strip('\n\t ')
     59         if i == 0:
     60             url = t
     61         else:
     62             tracklist.append(t)
     63     tracklist_file.close()
     64     return url, tracklist
     65 
     66 def parse_tracks(tracklist):
     67     tracks = []
     68     weightR = 0 # num. timestamps on right-side
     69     weightL = 0 # num. timestamps on left-side
     70     for lcount, line in enumerate(tracklist):
     71         sline = line.split(' ')
     72         
     73         timestamp = None
     74         for i, l in enumerate(sline):
     75             if i != 0 and i != len(sline)-1:
     76                 continue
     77             elif Timestamp.match(l):
     78                 if timestamp == None or weightR > weightL:
     79                     timestamp = l.strip(' \t[()]')
     80                 if i == 0:
     81                     weightL += 1
     82                 else:
     83                     weightR += 1
     84                 sline.remove(l)
     85         if timestamp == None:
     86             print('line {}, missing timestamp: "{}"'.format(lcount, line))
     87         
     88         line = ' '.join(sline)
     89         line = re.sub(TrackNum, '', line)
     90         title = re.sub(UnsafeFilenameChars, '', line)
     91         
     92         tracks.append(TracklistItem(timestamp, title))
     93     return tracks
     94 
     95 def missing_times(tracks):
     96     missing = []
     97     for i, t in enumerate(tracks):
     98         if t.timestamp == None:
     99             missing.append(i)
    100     return missing
    101 
    102 def read_tracklen(ffmpeg, track_fpath):
    103     cmd = [ffmpeg, '-v', 'quiet', '-stats', '-i', track_fpath, '-f', 'null', '-']
    104     length = '00:00'
    105     try:
    106         ret = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=Shell)
    107         length = str(ret).split('\\r')
    108         # some nasty string manip. to extract length (printed to stderr)
    109         if sys.platform == 'win32':
    110             length = length[len(length)-2].split(' ')[1].split('=')[1][:-3]
    111         else:
    112             length = length[len(length)-1].split(' ')[1].split('=')[1][:-3]
    113         print('Track length: {}'.format(length))
    114     except:
    115         error_exit('Failed to find track length, aborting.')
    116     return length
    117 
    118 def split_tracks(ffmpeg, audio_fpath, audio_len, tracks, format='mp3', outpath='out'):    
    119     print('Splitting...')
    120     for i, t in enumerate(tracks):
    121         outfile = '{}{}{} - {}.{}'.format(outpath, os.path.sep, str(i+1).zfill(2), t.title.strip(' - '), format)
    122         end = audio_len
    123         if i < len(tracks)-1:
    124             end = tracks[i+1].timestamp
    125         print('     {} ({} - {})'.format(outfile, t.timestamp, end))
    126         cmd = ['ffmpeg', '-nostdin', '-y', '-loglevel', 'error', 
    127             '-i', audio_fpath, '-ss', t.timestamp, '-to', end,
    128             '-acodec', 'copy', outfile]
    129         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=Shell)
    130         for line in p.stdout.readlines():
    131             print('    {}'.format(line.decode('utf-8', errors='ignore').strip()))
    132     return
    133 
    134 # runtime
    135 def parse_args():
    136     parser = argparse.ArgumentParser(
    137         description='download & split audio tracks long youtube videos')
    138     # arguments
    139     parser.add_argument('tracklist', metavar='TRACKLIST', nargs='*',
    140         help='tracklist of title and timestamp information to split audio by')
    141     # options
    142     parser.add_argument('-o', '--output',
    143         metavar='PATH', type=str, nargs='?', dest='output',
    144         help='specify the directory to write output files to (default: ./out)')
    145     parser.add_argument('-f', '--format',
    146         type=str, nargs='?', default='mp3', dest='format',
    147         help='specify the --audio-format argument to pass to yt-dlp (default: mp3)')
    148     parser.add_argument('-q', '--quality',
    149         type=str, nargs='?', default='320K', dest='quality',
    150         help='specify the --audio-quality argument to pass to yt-dlp (default: 320K)')
    151     parser.add_argument('--yt-dlp',
    152         metavar='PATH', type=str, nargs='?', dest='youtubedl',
    153         help='path of the "yt-dlp" binary to use')
    154     parser.add_argument('--ffmpeg',
    155         metavar='PATH', type=str, nargs='?', dest='ffmpeg',
    156         help='path of the "ffmpeg" binary to use')
    157     parser.add_argument('-k', '--keep',
    158         action='store_true', default=False, dest='keep',
    159         help='keep any files removed during processing (full video/audio file)')
    160     return parser.parse_args()
    161 
    162 def sya(args):
    163     if args.youtubedl == None:
    164         args.youtubedl = 'yt-dlp.exe' if sys.platform == 'win32' else 'yt-dlp'
    165     if args.ffmpeg == None:
    166         args.ffmpeg = 'ffmpeg.exe' if sys.platform == 'win32' else 'ffmpeg'
    167 
    168     if check_bin(args.youtubedl, args.ffmpeg) == False:
    169         error_exit('required binaries are missing')
    170     if args.output != None and len(args.output) > 0 and args.output[len(args.output)-1] == os.sep:
    171         args.output = args.output[:-1]
    172 
    173     for t in args.tracklist:
    174         if args.tracklist == None or os.path.exists(t) == False:
    175             error_exit('missing tracklist "{}"'.format(t))
    176         url, tracklist = load_tracklist(t)
    177 
    178         output = args.output if args.output != None else os.path.splitext(t)[0]
    179         if args.ffmpeg == 'ffmpeg' or args.ffmpeg == 'ffmpeg.exe':
    180             audio_fpath = get_audio(args.youtubedl, url, output, args.format, args.quality, args.keep, '')
    181         else:
    182             audio_fpath = get_audio(args.youtubedl, url, output, args.format, args.quality, args.keep, args.ffmpeg)
    183         if os.path.exists(audio_fpath) == False:
    184             error_exit('download failed, aborting')
    185 
    186     
    187         tracks = parse_tracks(tracklist)
    188         
    189         missing = missing_times(tracks)
    190         if len(missing) > 0:
    191             error_exit('some tracks are missing timestamps')
    192 
    193         length = read_tracklen(args.ffmpeg, audio_fpath)
    194         os.makedirs(output, exist_ok=True)
    195         split_tracks(args.ffmpeg, audio_fpath, length, tracks, args.format, output)
    196 
    197         if args.keep is False:
    198             os.remove(audio_fpath)
    199 
    200         print('Success')
    201 
    202 if __name__ == '__main__':
    203     sya(parse_args())
    204