...
pytd.pandas_td.create_engine
(url, con=None, header=True, show_progress=5.0, clear_progress=True)[source]
...
pytd.query_engine.QueryEngine
Examples
Code Block | ||
---|---|---|
| ||
>>> import pytd.pandas_td as td >>> con = td.connect(apikey=apikey, endpoint="https://api.treasuredata.com") >>> engine = td.create_engine("presto:sample_datasets") |
pytd.pandas_td.read_td_query
pytd.pandas_td.read_td_query
(query, engine, index_col=None, parse_dates=None, distributed_join=False, params=None)[source]
...
Return type
pandas.DataFrame
pytd.pandas_td.read_td_job
pytd.pandas_td.read_td_job
(job_id, engine, index_col=None, parse_dates=None)[source]
...
Return type
pandas.DataFrame
pytd.pandas_td.read_td_table
pytd.pandas_td.read_td_table
(table_name, engine, index_col=None, parse_dates=None, columns=None, time_range=None, limit=10000)[source]
...
Return type
pandas.DataFrame
pytd.pandas_td.read_td
pytd.pandas_td.read_td
(query, engine, index_col=None, parse_dates=None, distributed_join=False, params=None)[source]
...
Return type
pandas.DataFrame
pytd.pandas_td.to_td
pytd.pandas_td.to_td
(frame, name, con, if_exists='fail', time_col=None, time_index=None, index=True, index_label=None, chunksize=10000, date_format=None, writer='bulk_import', **kwargs)[source]
...
|
---|
pytd.pandas_td.ipyhton.MagicContext
classpytd.pandas_td.ipython.MagicContext
[source]
__init__
()[source]
Initialize self. See help(type(self)) for accurate signature.
connect
()[source]
pytd.pandas_td.ipyhton.MagicTable
classpytd.pandas_td.ipython.MagicTable
(table)[source]
__init__
(table)[source]
Initialize self. See help(type(self)) for accurate signature.
pytd.pandas_td.ipyhton.get_td_magic_context()
pytd.pandas_td.ipyhton.TDMagics
classpytd.pandas_td.ipython.TDMagics
(shell)[source]
__init__
(shell)[source]
Create a configurable given a config config.
Parameters
config (Config) – If this is empty, default values are used. If config is a
Config
instance, it will be used to configure the instance.parent (Configurable instance, optional) – The parent Configurable instance of this object.
Notes
Subclasses of Configurable must call the __init__()
method of Configurable
before doing anything else and using super()
:
Code Block | ||
---|---|---|
| ||
class MyConfigurable(Configurable):
def __init__(self, config=None):
super(MyConfigurable, self).__init__(config=config)
# Then any other code you need to finish initialization.
|
This ensures that instances will be configured properly.
pytd.pandas_td.ipython.DatabasesMagics
classpytd.pandas_td.ipython.DatabasesMagics
(shell)[source]
td_databases
(pattern)
List databases in the form of pandas.DataFrame.
Code Block | ||
---|---|---|
| ||
%td_databases [<database_name_pattern>]
|
Parameters
<database_name_pattern> (string, optional) – List databases matched to a given pattern. If not given, all existing databases will be listed.
Returns
Return type
pandas.DataFrame
Examples
Code Block | ||
---|---|---|
| ||
In [1]: %load_ext pytd.pandas_td.ipython
In [2]: %td_databases sample
Out[2]:
name count permission created_at updated_at
0 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 348124 administrator 2019-01-23 05:48:11+00:00 2019-01-23 05:48:11+00:00
1 yyyyyyyyy 0 administrator 2017-12-14 07:52:34+00:00 2017-12-14 07:52:34+00:00
2 zzzzzzzzzzzzz 0 administrator 2016-05-25 23:12:06+00:00 2016-05-25 23:12:06+00:00
...
In [3]: %td_databases sample
Out[3]:
name count permission created_at updated_at
0 sampledb 2 administrator 2014-04-11 22:29:38+00:00 2014-04-11 22:29:38+00:00
1 sample_xxxxxxxx 2 administrator 2017-06-02 23:37:41+00:00 2017-06-02 23:37:41+00:00
2 sample_datasets 8812278 query_only 2014-10-04 01:13:11+00:00 2018-03-16 04:59:06+00:00
...
|
magics
= {'cell': {}, 'line': {'td_databases': 'td_databases'}}
registered
= True
pytd.pandas_td.ipython.TableMagics
classpytd.pandas_td.ipython.TablesMagics
(shell)[source]
td_tables
(pattern)
List tables in databases.
Code Block | ||
---|---|---|
| ||
%td_tables [<table_identifier_pattern>]
|
Parameters
<table_identifier_pattern> (string, optional) – List tables matched to a given pattern. Table identifier is represented as database_name.table_name
. If not given, all existing tables will be listed.
Returns
Return type
pandas.DataFrame
Examples
Code Block | ||
---|---|---|
| ||
In [1]: %load_ext pytd.pandas_td.ipython
In [2]: %td_tables
Out[2]:
db_name name count estimated_storage_size last_log_timestamp created_at
0 xxxxx_demo_aa customer_test 70 1047 2018-02-05 06:20:32+00:00 2018-02-05 06:20:24+00:00
1 xxxxx_demo_aa email_log 0 0 1970-01-01 00:00:00+00:00 2018-02-05 07:19:57+00:00
2 yy_wf topk_similar_items 10598 134208 2018-04-16 09:23:57+00:00 2018-04-16 09:59:48+00:00
...
In [3]: %td_tables sample
Out[3]:
db_name name count estimated_storage_size last_log_timestamp created_at
0 xx_test aaaaaaaa_sample 0 0 1970-01-01 00:00:00+00:00 2015-10-20 17:37:40+00:00
1 sampledb sampletbl 2 843 1970-01-01 00:00:00+00:00 2014-04-11 22:30:08+00:00
2 zzzz_test_db sample_output_tab 4 889 2018-06-06 08:26:20+00:00 2018-06-06 08:27:12+00:00
...
|
magics
= {'cell': {}, 'line': {'td_tables': 'td_tables'}}
registered
= True
pytd.pandas_td.ipython.JobMagics
classpytd.pandas_td.ipython.JobsMagics
(shell)[source]
td_jobs
(line)
List job activities in an account.
Code Block | ||
---|---|---|
| ||
%td_jobs
|
Returns
Return type
pandas.DataFrame
Examples
Code Block | ||
---|---|---|
| ||
In [1]: %load_ext pytd.pandas_td.ipython
In [2]: %td_jobs
Out[2]:
status job_id type start_at query
0 error 448650806 hive 2019-04-12 05:33:36+00:00 with null_samples as (\n select\n id,\n ...
1 success 448646994 presto 2019-04-12 05:23:29+00:00 -- read_td_query\n-- set session distributed_j...
2 success 448646986 presto 2019-04-12 05:23:27+00:00 -- read_td_query\n-- set session distributed_j...
...
|
magics
= {'cell': {}, 'line': {'td_jobs': 'td_jobs'}}
registered
= True
pytd.pandas_td.ipython.UseMagics
classpytd.pandas_td.ipython.UseMagics
(shell)[source]
td_use
(line)
Use a specific database.
This magic pushes all table names in a specified database into the current namespace.
Code Block | ||
---|---|---|
| ||
%td_use [<database_name>] |
Parameters
<database_name> (string) – Database name.
Examples
Code Block | ||
---|---|---|
| ||
In [1]: %load_ext pytd.pandas_td.ipython
In [2]: %td_use sample_datasets
INFO: import nasdaq
INFO: import www_access
In [3]: nasdaq # describe table columns in the form of DataFrame
Out[3]: <pytd.pandas_td.ipython.MagicTable at 0x117651908> |
magics
= {'cell': {}, 'line': {'td_use': 'td_use'}}
registered
= True
pytd.pandas_td.ipython.QueryMagics
classpytd.pandas_td.ipython.QueryMagics
(shell)[source]
create_job_parser
()[source]
parse_job_args
(line)[source]
create_query_parser
(engine_type)[source]
parse_query_args
(engine_type, line)[source]
push_code
(code, end='\n')[source]
display_code_block
()[source]
build_query
(cell)[source]
build_engine
(engine_type, database, args)[source]
convert_time
(d)[source]
set_index
(d, index, args)[source]
pivot
(d, args)[source]
post_process
(d, args)[source]
run_job
(line)[source]
run_query
(engine_type, line, cell)[source]
td_job
(line)
Get job result.
Code Block | ||
---|---|---|
| ||
%td_job [--pivot] [--plot] [--dry-run] [--verbose]
[--connection <connection>] [--dropna] [--out <out>]
[--out-file <out_file>] [--quiet] [--timezone <timezone>]
job_id |
Parameters
<job_id> (integer) – Job ID.
--pivot (optional) – Run pivot_table against dimensions.
--plot (optional) – Plot the query result.
-n (--dry_run,) – Output translated code without running query.
-v (--verbose,) – Verbose output.
<connection>, -c <connection> (--connection) – Use specified connection.
d (--dropna,) – Drop columns if all values are NA.
<out>, -o <out> (--out) – Store the result to variable.
<out_file>, -O <out_file> (--out-file) – Store the result to file.
q (--quiet,) – Disable progress output.
<timezone>, -T <timezone> (--timezone) – Set timezone to time index.
Returns
Return type
pandas.DataFrame
Examples
Code Block | ||
---|---|---|
| ||
In [1]: %load_ext pytd.pandas_td.ipython
In [2]: %td_job 451709460 # select * from sample_datasets.nasdaq limit 5
Out[2]:
symbol open volume high low close
time
1992-08-25 16:00:00 ATRO 0.0 3900 0.7076 0.7076 0.7076
1992-08-25 16:00:00 ALOG 0.0 11200 11.0000 10.6250 11.0000
1992-08-25 16:00:00 ATAX 0.0 11400 11.3750 11.0000 11.0000
1992-08-25 16:00:00 ATRI 0.0 5400 14.3405 14.0070 14.2571
1992-08-25 16:00:00 ABMD 0.0 38800 5.7500 5.2500 5.6875 |
td_hive
td_hive(line, cell)
Run a Hive query.
Code Block | ||
---|---|---|
| ||
%%td_hive [<database>] [--pivot] [--plot] [--dry-run] [--verbose]
[--connection <connection>] [--dropna] [--out <out>]
[--out-file <out_file>] [--quiet] [--timezone <timezone>]
<query> |
Parameters
<query> (string) – Hive query.
<database> (string, optional) – Database name.
--pivot (optional) – Run pivot_table against dimensions.
--plot (optional) – Plot the query result.
-n (--dry_run,) – Output translated code without running query.
-v (--verbose,) – Verbose output.
<connection>, -c <connection> (--connection) – Use specified connection.
-d (--dropna,) – Drop columns if all values are NA.
<out>, -o <out> (--out) – Store the result to variable.
<out_file>, -O <out_file> (--out-file) – Store the result to file.
-q (--quiet,) – Disable progress output.
<timezone>, -T <timezone> (--timezone) – Set timezone to time index.
Returns
Return type
pandas.DataFrame
Examples
Code Block | ||
---|---|---|
| ||
In [1]: %load_ext pytd.pandas_td.ipython
In [2]: %%td_hive
...: select hivemall_version()
...:
Out[2]:
_c0
0 0.6.0-SNAPSHOT-201901-r01 |
td_presto
td_presto(line,cell)
Run a Presto query.
Code Block | ||
---|---|---|
| ||
%%td_presto [<database>] [--pivot] [--plot] [--dry-run] [--verbose]
[--connection <connection>] [--dropna] [--out <out>]
[--out-file <out_file>] [--quiet] [--timezone <timezone>]
<query> |
Parameters
<query> (string) – Presto query.
<database> (string, optional) – Database name.
--pivot (optional) – Run pivot_table against dimensions.
--plot (optional) – Plot the query result.
-n (--dry_run,) – Output translated code without running query.
-v (--verbose,) – Verbose output.
<connection>, -c <connection> (--connection) – Use specified connection.
-d (--dropna,) – Drop columns if all values are NA.
<out>, -o <out> (--out) – Store the result to variable.
<out_file>, -O <out_file> (--out-file) – Store the result to file.
-q (--quiet,) – Disable progress output.
<timezone>, -T <timezone> (--timezone) – Set timezone to time index.
Returns
Return type
pandas.DataFrame
Examples
Code Block | ||
---|---|---|
| ||
In [1]: %load_ext pytd.pandas_td.ipython
In [2]: %%td_presto
...: select * from sample_datasets.nasdaq limit 5
...:
Out[2]:
symbol open volume high low close
time
1989-01-26 16:00:00 SMTC 0.0 8000 0.4532 0.4532 0.4532
1989-01-26 16:00:00 SEIC 0.0 163200 0.7077 0.6921 0.7025
1989-01-26 16:00:00 SIGI 0.0 2800 3.9610 3.8750 3.9610
1989-01-26 16:00:00 NAVG 0.0 1800 14.6740 14.1738 14.6740
1989-01-26 16:00:00 MOCO 0.0 71101 3.6722 3.5609 3.5980
|
magics
= {'cell': {'td_hive': 'td_hive', 'td_presto': 'td_presto'}, 'line': {'td_job': 'td_job'}}
registered
= True
pytd.pandas_td.ipython.load_ipython_extension
pytd.pandas_td.ipython.load_ipython_extension
(ipython)[source]