Retrosheet reader

OK, so you just got back from the Ohio LinuxFest 2009  (meh, not exactly a hoppin’ place this year, I was kind of disappointed), and you have your event files converted over to CSV files after you generated the BEVENT batch files per last weekend’s post.

Now, you would like to read in that data and start looking through it for anything useful. Well this weekend’s post will help you with the reading in part, and you can take it from there if you would like.

Here is the C# console code. I have put all of the CSV files, ROS (roster) files, and the TEAM???? file into a folder on my C: drive called baseball_data, if your folder is different then just change the constant defined in the code below:

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
 
namespace RetrosheetReader
{
 
    class Team
    {
        int year;
        string city;
        string nickname;
        string abbreviation;
        string league;
 
        public Team(int y, string[] a)
        {
            year = y;
            abbreviation = a[0];
            league = a[1];
            city = a[2];
            nickname = a[3];
        }
    }
 
    class Player
    {
        int year;
        string team;
        string playerID;
        string firstName;
        string lastName;
        string bats;
        string throws;
        string position;
 
        public Player(int y, string[] a)
        {
            year = y;
            playerID = a[0];
            lastName = a[1];
            firstName = a[2];
            bats = a[3];
            throws = a[4];
            team = a[5];
            position = a[6];
        }
    }
 
    class Event
    {
        string gameID;
        string visitingTeam;
        int inning;
        string battingTeam;
        int outs;
        int balls;
        int strikes;
        // yeah, there is still some work left here to do
        // maybe next time
 
        public Event(string[] a)
        {
            gameID = a[0];
            visitingTeam = a[1];
            inning = Convert.ToInt32(a[2]);
            battingTeam = a[3];
            outs = Convert.ToInt32(a[4]);
            balls = Convert.ToInt32(a[5]);
            strikes = Convert.ToInt32(a[6]);
        }
    }
 
    class Program
    {
        const string DATA = "c:\\baseball_data\\";
 
        static void Main(string[] args)
        {
            List teamList = new List();
            List playerList = new List();
            List eventList = new List();
 
            string s;
            string[] splitLine;
            int y;
 
            Console.WriteLine("Retrosheet Reader");
            Console.WriteLine();
 
            List tfs = Directory.GetFiles(DATA, "team*").ToList();
            foreach (string tf in tfs)
            {
                y = Convert.ToInt32(Path.GetFileName(tf).Substring(4));
                StreamReader sr = new StreamReader(tf);
                while ((s = sr.ReadLine()) != null)
                {
                    splitLine = s.Split(',');
                    if (splitLine.Count() == 4)
                    {
                        teamList.Add(new Team(y, splitLine));
                    }
                }
            }
 
            List rfs = Directory.GetFiles(DATA, "*.ros").ToList();
            foreach (string rf in rfs)
            {
                y = Convert.ToInt32(Path.GetFileName(rf).Substring(3).Split('.')[0]);
                StreamReader sr = new StreamReader(rf);
                while ((s = sr.ReadLine()) != null)
                {
                    splitLine = s.Split(',');
                    if (splitLine.Count() == 7)
                    {
                        playerList.Add(new Player(y, splitLine));
                    }
                }
            }
 
            List efs = Directory.GetFiles(DATA, "*.csv").ToList();
            foreach (string ef in efs)
            {
                Console.WriteLine("Reading events in " + ef);
                StreamReader sr = new StreamReader(ef);
                while ((s = sr.ReadLine()) != null)
                {
                    splitLine = s.Split(',');
                    if (splitLine.Count() == 97)
                    {
                        eventList.Add(new Event(splitLine));
                    }
                }
            }
 
            Console.WriteLine("Number of teams: " + teamList.Count().ToString());
            Console.WriteLine("Number of players: " + playerList.Count().ToString());
            Console.WriteLine("Number of events: " + eventList.Count().ToString());
 
            Console.WriteLine();
            Console.Write("Strike any key to end...");
            Console.ReadKey();
        }
    }
}

I have an idea as to the first thing that I am going to look for in the Retrosheet data, so tune in next weekend and I will (hopefully) have some interesting insights.

Leave a Reply