Python - loop through same query with different variables, merge data frames

Question

I have a query in SAS where I use a Macro variable to repeat a query to Teradata with a different variable. We have 5 databases, one for each state, where I run the same query, but use the variable to update the state, then mend all data sets. I'm looking for help in how I can do this in python.

loop through {state1, state2, state3, state4, state5} and save each query as {stateX}_df then merge all

import teradata as td
import pandas as pd
from teradata import tdodbc

udaExec = td.UdaExec(appConfigFile="udaexec.ini")

with udaExec.connect("${dataSourceName}", LoginTimeout=120) as session:     

query1 = """database my_db_{state1};"""

     query2 = """  
                select  distinct
                {state1}, item_a, item_b
                from table

              """  
    session.execute(query1)
    session.execute(query2)

    {stateX}_df = pd.read_sql(query2), session)

Answer 1

Not sure if you are using python 2 or python 3. If you can use python 3.6 or later, maybe something like the following could work?

import teradata as td
import pandas as pd

udaExec = td.UdaExec(appName="test", version="1.0", logConsole=False)
with udaExec.connect(
    method="odbc",
    system="host",
    username="username",
    password="password",
    driver="drivername"
    ) as conn: 

state_dataframes = []
STATES = ["state1", "state2", "state3", "state4", "state5"]

for state in STATES:
    sql = f"select distinct {state}, item_a, item_b from my_db_{state}.table;"
    state_dataframes.append(pd.read_sql(sql, conn))

combined_data = pd.concat(state_dataframes)

This isn't tested, but hopefully it gets you going in the right direction.

Answer 2

I was able to get this working on a single test query, which was really helpful so thank you @andrew madsen

What I have not solved yet is how to do this across multiple queries that I use. I have been reading about cursors and connections and I think that will get me there.

import teradata as td
import pandas as pd
from teradata import tdodbc

udaExec = td.UdaExec(appConfigFile="udaexec.ini")

with udaExec.connect("${dataSourceName}") as session:


    state_dataframes = []
    STATES = ["IL", "TX", "MT", "OK", "NM"]

    for state in STATES:

        sql = """      
        select top 10
        '{}' as state
        ,a.*
         from my_db_{}.table a
        """.format(state,state)

    state_dataframes.append(pd.read_sql(sql, session))

    all_states_df = pd.concat(state_dataframes)

Answer 3

Here is an improved version with volatile table use: Python SQL loop variables through multiple queries

udaExec = td.UdaExec(appConfigFile="udaexec.ini")

with udaExec.connect("${dataSourceName}") as session:

state_dataframes = []
STATES = ["state1", "state2", "state3", "state4", "state5"]

for state in STATES:

        query1 = """database my_db_{};"""

        query2 = """   
        create set volatile table v_table
        ,no fallback, no before journal, no after journal as
        (  
        select top 10
        '{}' as state
        ,t.*
        from table t
        )   
        with data
        primary index (dw_key)  
        on commit preserve rows;
        """

        query3 = """
        create set volatile table v_table_2
        ,no fallback, no before journal, no after journal as
        (  
        select t.*
        from v_table t
        )   
        with data
        primary index (dw_clm_key)  
        on commit preserve rows;

        """

        query4 = """

        select t.* 
        from v_table_2 t

        """

        session.execute(query1.format(state))
        session.execute(query2.format(state))
        session.execute(query3)
        session.execute(query4)
        state_dataframes.append(pd.read_sql(query4, session))
        session.execute("DROP TABLE v_table")
        session.execute("DROP TABLE v_table_2")

all_states_df = pd.concat(state_dataframes)

Python - loop through same query with different variables, merge data frames

Question

3 answers

solution1
1 2020-02-19 05:18:30

solution2
0 2020-02-20 13:27:16

solution3
0 ACCPTED 2020-02-23 01:12:00

Python - loop through same query with different variables, merge data frames

Question

3 answers

solution1 1 2020-02-19 05:18:30

solution2 0 2020-02-20 13:27:16

solution3 0 ACCPTED 2020-02-23 01:12:00

solution1
1 2020-02-19 05:18:30

solution2
0 2020-02-20 13:27:16

solution3
0 ACCPTED 2020-02-23 01:12:00