@@ -15,14 +15,17 @@ def connect_to_table(
1515 key_column : str = "id" ,
1616 thread_count : Optional [int ] = 1 ,
1717 ** kwargs ,
18- ):
18+ ) -> TableSegment :
1919 """Connects to the given database, and creates a TableSegment instance
2020
2121 Parameters:
2222 db_info: Either a URI string, or a dict of connection options.
2323 table_name: Name of the table as a string, or a tuple that signifies the path.
2424 key_column: Name of the key column
25- thread_count: Number of threads for this connection (only if using a threadpooled implementation)
25+ thread_count: Number of threads for this connection (only if using a threadpooled db implementation)
26+
27+ See Also:
28+ :meth:`connect`
2629 """
2730
2831 db = connect (db_info , thread_count = thread_count )
@@ -61,13 +64,39 @@ def diff_tables(
6164 # There may be many pools, so number of actual threads can be a lot higher.
6265 max_threadpool_size : Optional [int ] = 1 ,
6366) -> Iterator :
64- """Efficiently finds the diff between table1 and table2.
67+ """Finds the diff between table1 and table2.
68+
69+ Parameters:
70+ key_column (str): Name of the key column, which uniquely identifies each row (usually id)
71+ update_column (str, optional): Name of updated column, which signals that rows changed (usually updated_at or last_update).
72+ Used by `min_update` and `max_update`.
73+ extra_columns (Tuple[str, ...], optional): Extra columns to compare
74+ min_key (:data:`DbKey`, optional): Lowest key_column value, used to restrict the segment
75+ max_key (:data:`DbKey`, optional): Highest key_column value, used to restrict the segment
76+ min_update (:data:`DbTime`, optional): Lowest update_column value, used to restrict the segment
77+ max_update (:data:`DbTime`, optional): Highest update_column value, used to restrict the segment
78+ algorithm (:class:`Algorithm`): Which diffing algorithm to use (`HASHDIFF` or `JOINDIFF`)
79+ bisection_factor (int): Into how many segments to bisect per iteration. (when algorithm is `HASHDIFF`)
80+ bisection_threshold (Number): When should we stop bisecting and compare locally (when algorithm is `HASHDIFF`; in row count).
81+ threaded (bool): Enable/disable threaded diffing. Needed to take advantage of database threads.
82+ max_threadpool_size (int): Maximum size of each threadpool. ``None`` means auto. Only relevant when `threaded` is ``True``.
83+ There may be many pools, so number of actual threads can be a lot higher.
84+
85+ Note:
86+ The following parameters are used to override the corresponding attributes of the given :class:`TableSegment` instances:
87+ `key_column`, `update_column`, `extra_columns`, `min_key`, `max_key`. If different values are needed per table, it's
88+ possible to omit them here, and instead set them directly when creating each :class:`TableSegment`.
6589
6690 Example:
6791 >>> table1 = connect_to_table('postgresql:///', 'Rating', 'id')
6892 >>> list(diff_tables(table1, table1))
6993 []
7094
95+ See Also:
96+ :class:`TableSegment`
97+ :class:`HashDiffer`
98+ :class:`JoinDiffer`
99+
71100 """
72101 tables = [table1 , table2 ]
73102 override_attrs = {
0 commit comments