#!/usr/local/bin/perl # # gene_names2orf2gene.pl # # Parse the gene_names file from Wormbase/sanger, and # turn it into the old format of orf2gene # # The new gene_names file looks like: (tab separated fields) # abl-1 Caenorhabditis elegans Gene M79.1b,M79.1c,M79.1a X:10596707..10614654 bp (-) # # orf2gene looks like: # Locus : "abl-1" # Genomic_Sequence "M79.1a" # Genomic_Sequence "M79.1b" # Genomic_Sequence "M79.1c" use strict; while(<>) { chomp; my @a = split("\t"); if( $a[0] ) { if( $a[3] =~ /,/ ) { my @b = split(",", $a[3] ); if( $#b > 0 ) { printf("Locus : \"%s\"\n", $a[0] ); for( my $i = 0; $i <= $#b; ++$i ) { printf("Genomic_Sequence\t \"%s\"\n", $b[$i] ); } printf("\n"); } } else { if( $a[3] ) { printf("Locus : \"%s\"\n", $a[0] ); printf("Genomic_Sequence\t \"%s\"\n\n", $a[3] ); } } } }