Source code for ucsc_download_script

import argparse
import logging
import subprocess
from datetime import datetime
import mysql.connector
from mysql.connector import errorcode

"""
This script is used to download data, specifically the tables refGene and snp150Common from the UCSC MySQL database.
Because the access to this database is blocked by the UKA firewall, this script is not part of the main primertool and 
has to be run manually from command line to fetch the tables for the tool.
_Note_: Some tables are very large and may take a long time to download.
"""


[docs]def fetch_db_table(genome_assembly: str, table: str): download_dir = 'primertool/ucsc-local' time = datetime.today().strftime('%Y%m%d') logging.info(f"Downloading table '{table}' from UCSC MySQL database '{genome_assembly}' to '{download_dir}'") subprocess.Popen(f'mysqldump --single-transaction ' f'-u genome ' f'-h genome-euro-mysql.soe.ucsc.edu {genome_assembly} {table} > {download_dir}/{time}_{table}.sql', shell=True)
[docs]def create_db(genome_assembly, table: str): """ Not used """ # general ucsc sql database config ucsc_config = dict(user='genome', password='', host='localhost', database=genome_assembly, raise_on_warnings=True, ) query_result = None try: with mysql.connector.connect(**ucsc_config) as connection, connection.cursor() as cursor: # cursor.execute(query) cursor.execute(f"CREATE DATABASE IF NOT EXISTS {genome_assembly}") cursor.execute(f"USE {genome_assembly}") cursor.execute(f"SOURCE {table}.sql") query_result = cursor.fetchall() except mysql.connector.Error as err: if err.errno == errorcode.ER_ACCESS_DENIED_ERROR: print("Access denied. Check your username and password.") elif err.errno == errorcode.ER_BAD_DB_ERROR: print(f"Database '{genome_assembly}' does not exist.") else: print(err) if not query_result: logging.debug('This database query did not return any results. Please check your input.') return None return query_result
if __name__ == '__main__': # Argument parser parser = argparse.ArgumentParser(description="") parser.add_argument('--fetch', '-f', action='store_true') parser.add_argument('--genome-assembly', '-g', choices=['hg19', 'hg38'], default='hg38', action='store', help='Genome assembly to use (either hg19 or hg38; more are also available at UCSC') parser.add_argument("--table", "-t", nargs='*', default=['refGene', 'snp150Common'], help="Name of the database table to fetch from the UCSC database " "(default: refGene and snp150Common)") parser.add_argument('--create', '-c', action='store_true') args = parser.parse_args() # Fetch the tables if args.fetch: for table in args.table: fetch_db_table(args.genome_assembly, table) if args.create: create_db("hg39", "refGene")