# -*- coding: utf-8 -*- from __future__ import print_function import pytest from datetime import datetime import re from pandas.compat import (zip, range, lrange, StringIO) from pandas import (DataFrame, Series, Index, date_range, compat, Timestamp) import pandas as pd from numpy import nan import numpy as np from pandas.util.testing import (assert_series_equal, assert_frame_equal) import pandas.util.testing as tm from pandas.tests.frame.common import TestData class TestDataFrameReplace(TestData): def test_replace_inplace(self): self.tsframe['A'][:5] = nan self.tsframe['A'][-5:] = nan tsframe = self.tsframe.copy() tsframe.replace(nan, 0, inplace=True) assert_frame_equal(tsframe, self.tsframe.fillna(0)) # mixed type mf = self.mixed_frame mf.iloc[5:20, mf.columns.get_loc('foo')] = nan mf.iloc[-10:, mf.columns.get_loc('A')] = nan result = self.mixed_frame.replace(np.nan, 0) expected = self.mixed_frame.fillna(value=0) assert_frame_equal(result, expected) tsframe = self.tsframe.copy() tsframe.replace([nan], [0], inplace=True) assert_frame_equal(tsframe, self.tsframe.fillna(0)) def test_regex_replace_scalar(self): obj = {'a': list('ab..'), 'b': list('efgh')} dfobj = DataFrame(obj) mix = {'a': lrange(4), 'b': list('ab..')} dfmix = DataFrame(mix) # simplest cases # regex -> value # obj frame res = dfobj.replace(r'\s*\.\s*', nan, regex=True) assert_frame_equal(dfobj, res.fillna('.')) # mixed res = dfmix.replace(r'\s*\.\s*', nan, regex=True) assert_frame_equal(dfmix, res.fillna('.')) # regex -> regex # obj frame res = dfobj.replace(r'\s*(\.)\s*', r'\1\1\1', regex=True) objc = obj.copy() objc['a'] = ['a', 'b', '...', '...'] expec = DataFrame(objc) assert_frame_equal(res, expec) # with mixed res = dfmix.replace(r'\s*(\.)\s*', r'\1\1\1', regex=True) mixc = mix.copy() mixc['b'] = ['a', 'b', '...', '...'] expec = DataFrame(mixc) assert_frame_equal(res, expec) # everything with compiled regexs as well res = dfobj.replace(re.compile(r'\s*\.\s*'), nan, regex=True) assert_frame_equal(dfobj, res.fillna('.')) # mixed res = dfmix.replace(re.compile(r'\s*\.\s*'), nan, regex=True) assert_frame_equal(dfmix, res.fillna('.')) # regex -> regex # obj frame res = dfobj.replace(re.compile(r'\s*(\.)\s*'), r'\1\1\1') objc = obj.copy() objc['a'] = ['a', 'b', '...', '...'] expec = DataFrame(objc) assert_frame_equal(res, expec) # with mixed res = dfmix.replace(re.compile(r'\s*(\.)\s*'), r'\1\1\1') mixc = mix.copy() mixc['b'] = ['a', 'b', '...', '...'] expec = DataFrame(mixc) assert_frame_equal(res, expec) res = dfmix.replace(regex=re.compile(r'\s*(\.)\s*'), value=r'\1\1\1') mixc = mix.copy() mixc['b'] = ['a', 'b', '...', '...'] expec = DataFrame(mixc) assert_frame_equal(res, expec) res = dfmix.replace(regex=r'\s*(\.)\s*', value=r'\1\1\1') mixc = mix.copy() mixc['b'] = ['a', 'b', '...', '...'] expec = DataFrame(mixc) assert_frame_equal(res, expec) def test_regex_replace_scalar_inplace(self): obj = {'a': list('ab..'), 'b': list('efgh')} dfobj = DataFrame(obj) mix = {'a': lrange(4), 'b': list('ab..')} dfmix = DataFrame(mix) # simplest cases # regex -> value # obj frame res = dfobj.copy() res.replace(r'\s*\.\s*', nan, regex=True, inplace=True) assert_frame_equal(dfobj, res.fillna('.')) # mixed res = dfmix.copy() res.replace(r'\s*\.\s*', nan, regex=True, inplace=True) assert_frame_equal(dfmix, res.fillna('.')) # regex -> regex # obj frame res = dfobj.copy() res.replace(r'\s*(\.)\s*', r'\1\1\1', regex=True, inplace=True) objc = obj.copy() objc['a'] = ['a', 'b', '...', '...'] expec = DataFrame(objc) assert_frame_equal(res, expec) # with mixed res = dfmix.copy() res.replace(r'\s*(\.)\s*', r'\1\1\1', regex=True, inplace=True) mixc = mix.copy() mixc['b'] = ['a', 'b', '...', '...'] expec = DataFrame(mixc) assert_frame_equal(res, expec) # everything with compiled regexs as well res = dfobj.copy() res.replace(re.compile(r'\s*\.\s*'), nan, regex=True, inplace=True) assert_frame_equal(dfobj, res.fillna('.')) # mixed res = dfmix.copy() res.replace(re.compile(r'\s*\.\s*'), nan, regex=True, inplace=True) assert_frame_equal(dfmix, res.fillna('.')) # regex -> regex # obj frame res = dfobj.copy() res.replace(re.compile(r'\s*(\.)\s*'), r'\1\1\1', regex=True, inplace=True) objc = obj.copy() objc['a'] = ['a', 'b', '...', '...'] expec = DataFrame(objc) assert_frame_equal(res, expec) # with mixed res = dfmix.copy() res.replace(re.compile(r'\s*(\.)\s*'), r'\1\1\1', regex=True, inplace=True) mixc = mix.copy() mixc['b'] = ['a', 'b', '...', '...'] expec = DataFrame(mixc) assert_frame_equal(res, expec) res = dfobj.copy() res.replace(regex=r'\s*\.\s*', value=nan, inplace=True) assert_frame_equal(dfobj, res.fillna('.')) # mixed res = dfmix.copy() res.replace(regex=r'\s*\.\s*', value=nan, inplace=True) assert_frame_equal(dfmix, res.fillna('.')) # regex -> regex # obj frame res = dfobj.copy() res.replace(regex=r'\s*(\.)\s*', value=r'\1\1\1', inplace=True) objc = obj.copy() objc['a'] = ['a', 'b', '...', '...'] expec = DataFrame(objc) assert_frame_equal(res, expec) # with mixed res = dfmix.copy() res.replace(regex=r'\s*(\.)\s*', value=r'\1\1\1', inplace=True) mixc = mix.copy() mixc['b'] = ['a', 'b', '...', '...'] expec = DataFrame(mixc) assert_frame_equal(res, expec) # everything with compiled regexs as well res = dfobj.copy() res.replace(regex=re.compile(r'\s*\.\s*'), value=nan, inplace=True) assert_frame_equal(dfobj, res.fillna('.')) # mixed res = dfmix.copy() res.replace(regex=re.compile(r'\s*\.\s*'), value=nan, inplace=True) assert_frame_equal(dfmix, res.fillna('.')) # regex -> regex # obj frame res = dfobj.copy() res.replace(regex=re.compile(r'\s*(\.)\s*'), value=r'\1\1\1', inplace=True) objc = obj.copy() objc['a'] = ['a', 'b', '...', '...'] expec = DataFrame(objc) assert_frame_equal(res, expec) # with mixed res = dfmix.copy() res.replace(regex=re.compile(r'\s*(\.)\s*'), value=r'\1\1\1', inplace=True) mixc = mix.copy() mixc['b'] = ['a', 'b', '...', '...'] expec = DataFrame(mixc) assert_frame_equal(res, expec) def test_regex_replace_list_obj(self): obj = {'a': list('ab..'), 'b': list('efgh'), 'c': list('helo')} dfobj = DataFrame(obj) # lists of regexes and values # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN] to_replace_res = [r'\s*\.\s*', r'e|f|g'] values = [nan, 'crap'] res = dfobj.replace(to_replace_res, values, regex=True) expec = DataFrame({'a': ['a', 'b', nan, nan], 'b': ['crap'] * 3 + ['h'], 'c': ['h', 'crap', 'l', 'o']}) assert_frame_equal(res, expec) # list of [re1, re2, ..., reN] -> [re1, re2, .., reN] to_replace_res = [r'\s*(\.)\s*', r'(e|f|g)'] values = [r'\1\1', r'\1_crap'] res = dfobj.replace(to_replace_res, values, regex=True) expec = DataFrame({'a': ['a', 'b', '..', '..'], 'b': ['e_crap', 'f_crap', 'g_crap', 'h'], 'c': ['h', 'e_crap', 'l', 'o']}) assert_frame_equal(res, expec) # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN # or vN)] to_replace_res = [r'\s*(\.)\s*', r'e'] values = [r'\1\1', r'crap'] res = dfobj.replace(to_replace_res, values, regex=True) expec = DataFrame({'a': ['a', 'b', '..', '..'], 'b': ['crap', 'f', 'g', 'h'], 'c': ['h', 'crap', 'l', 'o']}) assert_frame_equal(res, expec) to_replace_res = [r'\s*(\.)\s*', r'e'] values = [r'\1\1', r'crap'] res = dfobj.replace(value=values, regex=to_replace_res) expec = DataFrame({'a': ['a', 'b', '..', '..'], 'b': ['crap', 'f', 'g', 'h'], 'c': ['h', 'crap', 'l', 'o']}) assert_frame_equal(res, expec) def test_regex_replace_list_obj_inplace(self): # same as above with inplace=True # lists of regexes and values obj = {'a': list('ab..'), 'b': list('efgh'), 'c': list('helo')} dfobj = DataFrame(obj) # lists of regexes and values # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN] to_replace_res = [r'\s*\.\s*', r'e|f|g'] values = [nan, 'crap'] res = dfobj.copy() res.replace(to_replace_res, values, inplace=True, regex=True) expec = DataFrame({'a': ['a', 'b', nan, nan], 'b': ['crap'] * 3 + ['h'], 'c': ['h', 'crap', 'l', 'o']}) assert_frame_equal(res, expec) # list of [re1, re2, ..., reN] -> [re1, re2, .., reN] to_replace_res = [r'\s*(\.)\s*', r'(e|f|g)'] values = [r'\1\1', r'\1_crap'] res = dfobj.copy() res.replace(to_replace_res, values, inplace=True, regex=True) expec = DataFrame({'a': ['a', 'b', '..', '..'], 'b': ['e_crap', 'f_crap', 'g_crap', 'h'], 'c': ['h', 'e_crap', 'l', 'o']}) assert_frame_equal(res, expec) # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN # or vN)] to_replace_res = [r'\s*(\.)\s*', r'e'] values = [r'\1\1', r'crap'] res = dfobj.copy() res.replace(to_replace_res, values, inplace=True, regex=True) expec = DataFrame({'a': ['a', 'b', '..', '..'], 'b': ['crap', 'f', 'g', 'h'], 'c': ['h', 'crap', 'l', 'o']}) assert_frame_equal(res, expec) to_replace_res = [r'\s*(\.)\s*', r'e'] values = [r'\1\1', r'crap'] res = dfobj.copy() res.replace(value=values, regex=to_replace_res, inplace=True) expec = DataFrame({'a': ['a', 'b', '..', '..'], 'b': ['crap', 'f', 'g', 'h'], 'c': ['h', 'crap', 'l', 'o']}) assert_frame_equal(res, expec) def test_regex_replace_list_mixed(self): # mixed frame to make sure this doesn't break things mix = {'a': lrange(4), 'b': list('ab..')} dfmix = DataFrame(mix) # lists of regexes and values # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN] to_replace_res = [r'\s*\.\s*', r'a'] values = [nan, 'crap'] mix2 = {'a': lrange(4), 'b': list('ab..'), 'c': list('halo')} dfmix2 = DataFrame(mix2) res = dfmix2.replace(to_replace_res, values, regex=True) expec = DataFrame({'a': mix2['a'], 'b': ['crap', 'b', nan, nan], 'c': ['h', 'crap', 'l', 'o']}) assert_frame_equal(res, expec) # list of [re1, re2, ..., reN] -> [re1, re2, .., reN] to_replace_res = [r'\s*(\.)\s*', r'(a|b)'] values = [r'\1\1', r'\1_crap'] res = dfmix.replace(to_replace_res, values, regex=True) expec = DataFrame({'a': mix['a'], 'b': ['a_crap', 'b_crap', '..', '..']}) assert_frame_equal(res, expec) # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN # or vN)] to_replace_res = [r'\s*(\.)\s*', r'a', r'(b)'] values = [r'\1\1', r'crap', r'\1_crap'] res = dfmix.replace(to_replace_res, values, regex=True) expec = DataFrame({'a': mix['a'], 'b': ['crap', 'b_crap', '..', '..']}) assert_frame_equal(res, expec) to_replace_res = [r'\s*(\.)\s*', r'a', r'(b)'] values = [r'\1\1', r'crap', r'\1_crap'] res = dfmix.replace(regex=to_replace_res, value=values) expec = DataFrame({'a': mix['a'], 'b': ['crap', 'b_crap', '..', '..']}) assert_frame_equal(res, expec) def test_regex_replace_list_mixed_inplace(self): mix = {'a': lrange(4), 'b': list('ab..')} dfmix = DataFrame(mix) # the same inplace # lists of regexes and values # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN] to_replace_res = [r'\s*\.\s*', r'a'] values = [nan, 'crap'] res = dfmix.copy() res.replace(to_replace_res, values, inplace=True, regex=True) expec = DataFrame({'a': mix['a'], 'b': ['crap', 'b', nan, nan]}) assert_frame_equal(res, expec) # list of [re1, re2, ..., reN] -> [re1, re2, .., reN] to_replace_res = [r'\s*(\.)\s*', r'(a|b)'] values = [r'\1\1', r'\1_crap'] res = dfmix.copy() res.replace(to_replace_res, values, inplace=True, regex=True) expec = DataFrame({'a': mix['a'], 'b': ['a_crap', 'b_crap', '..', '..']}) assert_frame_equal(res, expec) # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN # or vN)] to_replace_res = [r'\s*(\.)\s*', r'a', r'(b)'] values = [r'\1\1', r'crap', r'\1_crap'] res = dfmix.copy() res.replace(to_replace_res, values, inplace=True, regex=True) expec = DataFrame({'a': mix['a'], 'b': ['crap', 'b_crap', '..', '..']}) assert_frame_equal(res, expec) to_replace_res = [r'\s*(\.)\s*', r'a', r'(b)'] values = [r'\1\1', r'crap', r'\1_crap'] res = dfmix.copy() res.replace(regex=to_replace_res, value=values, inplace=True) expec = DataFrame({'a': mix['a'], 'b': ['crap', 'b_crap', '..', '..']}) assert_frame_equal(res, expec) def test_regex_replace_dict_mixed(self): mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} dfmix = DataFrame(mix) # dicts # single dict {re1: v1}, search the whole frame # need test for this... # list of dicts {re1: v1, re2: v2, ..., re3: v3}, search the whole # frame res = dfmix.replace({'b': r'\s*\.\s*'}, {'b': nan}, regex=True) res2 = dfmix.copy() res2.replace({'b': r'\s*\.\s*'}, {'b': nan}, inplace=True, regex=True) expec = DataFrame({'a': mix['a'], 'b': ['a', 'b', nan, nan], 'c': mix['c']}) assert_frame_equal(res, expec) assert_frame_equal(res2, expec) # list of dicts {re1: re11, re2: re12, ..., reN: re1N}, search the # whole frame res = dfmix.replace({'b': r'\s*(\.)\s*'}, {'b': r'\1ty'}, regex=True) res2 = dfmix.copy() res2.replace({'b': r'\s*(\.)\s*'}, {'b': r'\1ty'}, inplace=True, regex=True) expec = DataFrame({'a': mix['a'], 'b': ['a', 'b', '.ty', '.ty'], 'c': mix['c']}) assert_frame_equal(res, expec) assert_frame_equal(res2, expec) res = dfmix.replace(regex={'b': r'\s*(\.)\s*'}, value={'b': r'\1ty'}) res2 = dfmix.copy() res2.replace(regex={'b': r'\s*(\.)\s*'}, value={'b': r'\1ty'}, inplace=True) expec = DataFrame({'a': mix['a'], 'b': ['a', 'b', '.ty', '.ty'], 'c': mix['c']}) assert_frame_equal(res, expec) assert_frame_equal(res2, expec) # scalar -> dict # to_replace regex, {value: value} expec = DataFrame({'a': mix['a'], 'b': [nan, 'b', '.', '.'], 'c': mix['c']}) res = dfmix.replace('a', {'b': nan}, regex=True) res2 = dfmix.copy() res2.replace('a', {'b': nan}, regex=True, inplace=True) assert_frame_equal(res, expec) assert_frame_equal(res2, expec) res = dfmix.replace('a', {'b': nan}, regex=True) res2 = dfmix.copy() res2.replace(regex='a', value={'b': nan}, inplace=True) expec = DataFrame({'a': mix['a'], 'b': [nan, 'b', '.', '.'], 'c': mix['c']}) assert_frame_equal(res, expec) assert_frame_equal(res2, expec) def test_regex_replace_dict_nested(self): # nested dicts will not work until this is implemented for Series mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} dfmix = DataFrame(mix) res = dfmix.replace({'b': {r'\s*\.\s*': nan}}, regex=True) res2 = dfmix.copy() res4 = dfmix.copy() res2.replace({'b': {r'\s*\.\s*': nan}}, inplace=True, regex=True) res3 = dfmix.replace(regex={'b': {r'\s*\.\s*': nan}}) res4.replace(regex={'b': {r'\s*\.\s*': nan}}, inplace=True) expec = DataFrame({'a': mix['a'], 'b': ['a', 'b', nan, nan], 'c': mix['c']}) assert_frame_equal(res, expec) assert_frame_equal(res2, expec) assert_frame_equal(res3, expec) assert_frame_equal(res4, expec) def test_regex_replace_dict_nested_gh4115(self): df = pd.DataFrame({'Type': ['Q', 'T', 'Q', 'Q', 'T'], 'tmp': 2}) expected = DataFrame({'Type': [0, 1, 0, 0, 1], 'tmp': 2}) result = df.replace({'Type': {'Q': 0, 'T': 1}}) assert_frame_equal(result, expected) def test_regex_replace_list_to_scalar(self): mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} df = DataFrame(mix) expec = DataFrame({'a': mix['a'], 'b': np.array([nan] * 4), 'c': [nan, nan, nan, 'd']}) res = df.replace([r'\s*\.\s*', 'a|b'], nan, regex=True) res2 = df.copy() res3 = df.copy() res2.replace([r'\s*\.\s*', 'a|b'], nan, regex=True, inplace=True) res3.replace(regex=[r'\s*\.\s*', 'a|b'], value=nan, inplace=True) assert_frame_equal(res, expec) assert_frame_equal(res2, expec) assert_frame_equal(res3, expec) def test_regex_replace_str_to_numeric(self): # what happens when you try to replace a numeric value with a regex? mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} df = DataFrame(mix) res = df.replace(r'\s*\.\s*', 0, regex=True) res2 = df.copy() res2.replace(r'\s*\.\s*', 0, inplace=True, regex=True) res3 = df.copy() res3.replace(regex=r'\s*\.\s*', value=0, inplace=True) expec = DataFrame({'a': mix['a'], 'b': ['a', 'b', 0, 0], 'c': mix['c']}) assert_frame_equal(res, expec) assert_frame_equal(res2, expec) assert_frame_equal(res3, expec) def test_regex_replace_regex_list_to_numeric(self): mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} df = DataFrame(mix) res = df.replace([r'\s*\.\s*', 'b'], 0, regex=True) res2 = df.copy() res2.replace([r'\s*\.\s*', 'b'], 0, regex=True, inplace=True) res3 = df.copy() res3.replace(regex=[r'\s*\.\s*', 'b'], value=0, inplace=True) expec = DataFrame({'a': mix['a'], 'b': ['a', 0, 0, 0], 'c': ['a', 0, nan, 'd']}) assert_frame_equal(res, expec) assert_frame_equal(res2, expec) assert_frame_equal(res3, expec) def test_regex_replace_series_of_regexes(self): mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} df = DataFrame(mix) s1 = Series({'b': r'\s*\.\s*'}) s2 = Series({'b': nan}) res = df.replace(s1, s2, regex=True) res2 = df.copy() res2.replace(s1, s2, inplace=True, regex=True) res3 = df.copy() res3.replace(regex=s1, value=s2, inplace=True) expec = DataFrame({'a': mix['a'], 'b': ['a', 'b', nan, nan], 'c': mix['c']}) assert_frame_equal(res, expec) assert_frame_equal(res2, expec) assert_frame_equal(res3, expec) def test_regex_replace_numeric_to_object_conversion(self): mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} df = DataFrame(mix) expec = DataFrame({'a': ['a', 1, 2, 3], 'b': mix['b'], 'c': mix['c']}) res = df.replace(0, 'a') assert_frame_equal(res, expec) assert res.a.dtype == np.object_ def test_replace_regex_metachar(self): metachars = '[]', '()', r'\d', r'\w', r'\s' for metachar in metachars: df = DataFrame({'a': [metachar, 'else']}) result = df.replace({'a': {metachar: 'paren'}}) expected = DataFrame({'a': ['paren', 'else']}) assert_frame_equal(result, expected) def test_replace(self): self.tsframe['A'][:5] = nan self.tsframe['A'][-5:] = nan zero_filled = self.tsframe.replace(nan, -1e8) assert_frame_equal(zero_filled, self.tsframe.fillna(-1e8)) assert_frame_equal(zero_filled.replace(-1e8, nan), self.tsframe) self.tsframe['A'][:5] = nan self.tsframe['A'][-5:] = nan self.tsframe['B'][:5] = -1e8 # empty df = DataFrame(index=['a', 'b']) assert_frame_equal(df, df.replace(5, 7)) # GH 11698 # test for mixed data types. df = pd.DataFrame([('-', pd.to_datetime('20150101')), ('a', pd.to_datetime('20150102'))]) df1 = df.replace('-', np.nan) expected_df = pd.DataFrame([(np.nan, pd.to_datetime('20150101')), ('a', pd.to_datetime('20150102'))]) assert_frame_equal(df1, expected_df) def test_replace_list(self): obj = {'a': list('ab..'), 'b': list('efgh'), 'c': list('helo')} dfobj = DataFrame(obj) # lists of regexes and values # list of [v1, v2, ..., vN] -> [v1, v2, ..., vN] to_replace_res = [r'.', r'e'] values = [nan, 'crap'] res = dfobj.replace(to_replace_res, values) expec = DataFrame({'a': ['a', 'b', nan, nan], 'b': ['crap', 'f', 'g', 'h'], 'c': ['h', 'crap', 'l', 'o']}) assert_frame_equal(res, expec) # list of [v1, v2, ..., vN] -> [v1, v2, .., vN] to_replace_res = [r'.', r'f'] values = [r'..', r'crap'] res = dfobj.replace(to_replace_res, values) expec = DataFrame({'a': ['a', 'b', '..', '..'], 'b': ['e', 'crap', 'g', 'h'], 'c': ['h', 'e', 'l', 'o']}) assert_frame_equal(res, expec) def test_replace_series_dict(self): # from GH 3064 df = DataFrame({'zero': {'a': 0.0, 'b': 1}, 'one': {'a': 2.0, 'b': 0}}) result = df.replace(0, {'zero': 0.5, 'one': 1.0}) expected = DataFrame( {'zero': {'a': 0.5, 'b': 1}, 'one': {'a': 2.0, 'b': 1.0}}) assert_frame_equal(result, expected) result = df.replace(0, df.mean()) assert_frame_equal(result, expected) # series to series/dict df = DataFrame({'zero': {'a': 0.0, 'b': 1}, 'one': {'a': 2.0, 'b': 0}}) s = Series({'zero': 0.0, 'one': 2.0}) result = df.replace(s, {'zero': 0.5, 'one': 1.0}) expected = DataFrame( {'zero': {'a': 0.5, 'b': 1}, 'one': {'a': 1.0, 'b': 0.0}}) assert_frame_equal(result, expected) result = df.replace(s, df.mean()) assert_frame_equal(result, expected) def test_replace_convert(self): # gh 3907 df = DataFrame([['foo', 'bar', 'bah'], ['bar', 'foo', 'bah']]) m = {'foo': 1, 'bar': 2, 'bah': 3} rep = df.replace(m) expec = Series([np.int64] * 3) res = rep.dtypes assert_series_equal(expec, res) def test_replace_mixed(self): mf = self.mixed_frame mf.iloc[5:20, mf.columns.get_loc('foo')] = nan mf.iloc[-10:, mf.columns.get_loc('A')] = nan result = self.mixed_frame.replace(np.nan, -18) expected = self.mixed_frame.fillna(value=-18) assert_frame_equal(result, expected) assert_frame_equal(result.replace(-18, nan), self.mixed_frame) result = self.mixed_frame.replace(np.nan, -1e8) expected = self.mixed_frame.fillna(value=-1e8) assert_frame_equal(result, expected) assert_frame_equal(result.replace(-1e8, nan), self.mixed_frame) # int block upcasting df = DataFrame({'A': Series([1.0, 2.0], dtype='float64'), 'B': Series([0, 1], dtype='int64')}) expected = DataFrame({'A': Series([1.0, 2.0], dtype='float64'), 'B': Series([0.5, 1], dtype='float64')}) result = df.replace(0, 0.5) assert_frame_equal(result, expected) df.replace(0, 0.5, inplace=True) assert_frame_equal(df, expected) # int block splitting df = DataFrame({'A': Series([1.0, 2.0], dtype='float64'), 'B': Series([0, 1], dtype='int64'), 'C': Series([1, 2], dtype='int64')}) expected = DataFrame({'A': Series([1.0, 2.0], dtype='float64'), 'B': Series([0.5, 1], dtype='float64'), 'C': Series([1, 2], dtype='int64')}) result = df.replace(0, 0.5) assert_frame_equal(result, expected) # to object block upcasting df = DataFrame({'A': Series([1.0, 2.0], dtype='float64'), 'B': Series([0, 1], dtype='int64')}) expected = DataFrame({'A': Series([1, 'foo'], dtype='object'), 'B': Series([0, 1], dtype='int64')}) result = df.replace(2, 'foo') assert_frame_equal(result, expected) expected = DataFrame({'A': Series(['foo', 'bar'], dtype='object'), 'B': Series([0, 'foo'], dtype='object')}) result = df.replace([1, 2], ['foo', 'bar']) assert_frame_equal(result, expected) # test case from df = DataFrame({'A': Series([3, 0], dtype='int64'), 'B': Series([0, 3], dtype='int64')}) result = df.replace(3, df.mean().to_dict()) expected = df.copy().astype('float64') m = df.mean() expected.iloc[0, 0] = m[0] expected.iloc[1, 1] = m[1] assert_frame_equal(result, expected) def test_replace_simple_nested_dict(self): df = DataFrame({'col': range(1, 5)}) expected = DataFrame({'col': ['a', 2, 3, 'b']}) result = df.replace({'col': {1: 'a', 4: 'b'}}) assert_frame_equal(expected, result) # in this case, should be the same as the not nested version result = df.replace({1: 'a', 4: 'b'}) assert_frame_equal(expected, result) def test_replace_simple_nested_dict_with_nonexistent_value(self): df = DataFrame({'col': range(1, 5)}) expected = DataFrame({'col': ['a', 2, 3, 'b']}) result = df.replace({-1: '-', 1: 'a', 4: 'b'}) assert_frame_equal(expected, result) result = df.replace({'col': {-1: '-', 1: 'a', 4: 'b'}}) assert_frame_equal(expected, result) def test_replace_value_is_none(self): orig_value = self.tsframe.iloc[0, 0] orig2 = self.tsframe.iloc[1, 0] self.tsframe.iloc[0, 0] = nan self.tsframe.iloc[1, 0] = 1 result = self.tsframe.replace(to_replace={nan: 0}) expected = self.tsframe.T.replace(to_replace={nan: 0}).T assert_frame_equal(result, expected) result = self.tsframe.replace(to_replace={nan: 0, 1: -1e8}) tsframe = self.tsframe.copy() tsframe.iloc[0, 0] = 0 tsframe.iloc[1, 0] = -1e8 expected = tsframe assert_frame_equal(expected, result) self.tsframe.iloc[0, 0] = orig_value self.tsframe.iloc[1, 0] = orig2 def test_replace_for_new_dtypes(self): # dtypes tsframe = self.tsframe.copy().astype(np.float32) tsframe['A'][:5] = nan tsframe['A'][-5:] = nan zero_filled = tsframe.replace(nan, -1e8) assert_frame_equal(zero_filled, tsframe.fillna(-1e8)) assert_frame_equal(zero_filled.replace(-1e8, nan), tsframe) tsframe['A'][:5] = nan tsframe['A'][-5:] = nan tsframe['B'][:5] = -1e8 b = tsframe['B'] b[b == -1e8] = nan tsframe['B'] = b result = tsframe.fillna(method='bfill') assert_frame_equal(result, tsframe.fillna(method='bfill')) def test_replace_dtypes(self): # int df = DataFrame({'ints': [1, 2, 3]}) result = df.replace(1, 0) expected = DataFrame({'ints': [0, 2, 3]}) assert_frame_equal(result, expected) df = DataFrame({'ints': [1, 2, 3]}, dtype=np.int32) result = df.replace(1, 0) expected = DataFrame({'ints': [0, 2, 3]}, dtype=np.int32) assert_frame_equal(result, expected) df = DataFrame({'ints': [1, 2, 3]}, dtype=np.int16) result = df.replace(1, 0) expected = DataFrame({'ints': [0, 2, 3]}, dtype=np.int16) assert_frame_equal(result, expected) # bools df = DataFrame({'bools': [True, False, True]}) result = df.replace(False, True) assert result.values.all() # complex blocks df = DataFrame({'complex': [1j, 2j, 3j]}) result = df.replace(1j, 0j) expected = DataFrame({'complex': [0j, 2j, 3j]}) assert_frame_equal(result, expected) # datetime blocks prev = datetime.today() now = datetime.today() df = DataFrame({'datetime64': Index([prev, now, prev])}) result = df.replace(prev, now) expected = DataFrame({'datetime64': Index([now] * 3)}) assert_frame_equal(result, expected) def test_replace_input_formats_listlike(self): # both dicts to_rep = {'A': np.nan, 'B': 0, 'C': ''} values = {'A': 0, 'B': -1, 'C': 'missing'} df = DataFrame({'A': [np.nan, 0, np.inf], 'B': [0, 2, 5], 'C': ['', 'asdf', 'fd']}) filled = df.replace(to_rep, values) expected = {} for k, v in compat.iteritems(df): expected[k] = v.replace(to_rep[k], values[k]) assert_frame_equal(filled, DataFrame(expected)) result = df.replace([0, 2, 5], [5, 2, 0]) expected = DataFrame({'A': [np.nan, 5, np.inf], 'B': [5, 2, 0], 'C': ['', 'asdf', 'fd']}) assert_frame_equal(result, expected) # scalar to dict values = {'A': 0, 'B': -1, 'C': 'missing'} df = DataFrame({'A': [np.nan, 0, np.nan], 'B': [0, 2, 5], 'C': ['', 'asdf', 'fd']}) filled = df.replace(np.nan, values) expected = {} for k, v in compat.iteritems(df): expected[k] = v.replace(np.nan, values[k]) assert_frame_equal(filled, DataFrame(expected)) # list to list to_rep = [np.nan, 0, ''] values = [-2, -1, 'missing'] result = df.replace(to_rep, values) expected = df.copy() for i in range(len(to_rep)): expected.replace(to_rep[i], values[i], inplace=True) assert_frame_equal(result, expected) pytest.raises(ValueError, df.replace, to_rep, values[1:]) def test_replace_input_formats_scalar(self): df = DataFrame({'A': [np.nan, 0, np.inf], 'B': [0, 2, 5], 'C': ['', 'asdf', 'fd']}) # dict to scalar to_rep = {'A': np.nan, 'B': 0, 'C': ''} filled = df.replace(to_rep, 0) expected = {} for k, v in compat.iteritems(df): expected[k] = v.replace(to_rep[k], 0) assert_frame_equal(filled, DataFrame(expected)) pytest.raises(TypeError, df.replace, to_rep, [np.nan, 0, '']) # list to scalar to_rep = [np.nan, 0, ''] result = df.replace(to_rep, -1) expected = df.copy() for i in range(len(to_rep)): expected.replace(to_rep[i], -1, inplace=True) assert_frame_equal(result, expected) def test_replace_limit(self): pass def test_replace_dict_no_regex(self): answer = Series({0: 'Strongly Agree', 1: 'Agree', 2: 'Neutral', 3: 'Disagree', 4: 'Strongly Disagree'}) weights = {'Agree': 4, 'Disagree': 2, 'Neutral': 3, 'Strongly Agree': 5, 'Strongly Disagree': 1} expected = Series({0: 5, 1: 4, 2: 3, 3: 2, 4: 1}) result = answer.replace(weights) assert_series_equal(result, expected) def test_replace_series_no_regex(self): answer = Series({0: 'Strongly Agree', 1: 'Agree', 2: 'Neutral', 3: 'Disagree', 4: 'Strongly Disagree'}) weights = Series({'Agree': 4, 'Disagree': 2, 'Neutral': 3, 'Strongly Agree': 5, 'Strongly Disagree': 1}) expected = Series({0: 5, 1: 4, 2: 3, 3: 2, 4: 1}) result = answer.replace(weights) assert_series_equal(result, expected) def test_replace_dict_tuple_list_ordering_remains_the_same(self): df = DataFrame(dict(A=[nan, 1])) res1 = df.replace(to_replace={nan: 0, 1: -1e8}) res2 = df.replace(to_replace=(1, nan), value=[-1e8, 0]) res3 = df.replace(to_replace=[1, nan], value=[-1e8, 0]) expected = DataFrame({'A': [0, -1e8]}) assert_frame_equal(res1, res2) assert_frame_equal(res2, res3) assert_frame_equal(res3, expected) def test_replace_doesnt_replace_without_regex(self): raw = """fol T_opp T_Dir T_Enh 0 1 0 0 vo 1 2 vr 0 0 2 2 0 0 0 3 3 0 bt 0""" df = pd.read_csv(StringIO(raw), sep=r'\s+') res = df.replace({r'\D': 1}) assert_frame_equal(df, res) def test_replace_bool_with_string(self): df = DataFrame({'a': [True, False], 'b': list('ab')}) result = df.replace(True, 'a') expected = DataFrame({'a': ['a', False], 'b': df.b}) assert_frame_equal(result, expected) def test_replace_pure_bool_with_string_no_op(self): df = DataFrame(np.random.rand(2, 2) > 0.5) result = df.replace('asdf', 'fdsa') assert_frame_equal(df, result) def test_replace_bool_with_bool(self): df = DataFrame(np.random.rand(2, 2) > 0.5) result = df.replace(False, True) expected = DataFrame(np.ones((2, 2), dtype=bool)) assert_frame_equal(result, expected) def test_replace_with_dict_with_bool_keys(self): df = DataFrame({0: [True, False], 1: [False, True]}) with tm.assert_raises_regex(TypeError, 'Cannot compare types .+'): df.replace({'asdf': 'asdb', True: 'yes'}) def test_replace_truthy(self): df = DataFrame({'a': [True, True]}) r = df.replace([np.inf, -np.inf], np.nan) e = df assert_frame_equal(r, e) def test_replace_int_to_int_chain(self): df = DataFrame({'a': lrange(1, 5)}) with tm.assert_raises_regex(ValueError, "Replacement not allowed .+"): df.replace({'a': dict(zip(range(1, 5), range(2, 6)))}) def test_replace_str_to_str_chain(self): a = np.arange(1, 5) astr = a.astype(str) bstr = np.arange(2, 6).astype(str) df = DataFrame({'a': astr}) with tm.assert_raises_regex(ValueError, "Replacement not allowed .+"): df.replace({'a': dict(zip(astr, bstr))}) def test_replace_swapping_bug(self): df = pd.DataFrame({'a': [True, False, True]}) res = df.replace({'a': {True: 'Y', False: 'N'}}) expect = pd.DataFrame({'a': ['Y', 'N', 'Y']}) assert_frame_equal(res, expect) df = pd.DataFrame({'a': [0, 1, 0]}) res = df.replace({'a': {0: 'Y', 1: 'N'}}) expect = pd.DataFrame({'a': ['Y', 'N', 'Y']}) assert_frame_equal(res, expect) def test_replace_period(self): d = { 'fname': { 'out_augmented_AUG_2011.json': pd.Period(year=2011, month=8, freq='M'), 'out_augmented_JAN_2011.json': pd.Period(year=2011, month=1, freq='M'), 'out_augmented_MAY_2012.json': pd.Period(year=2012, month=5, freq='M'), 'out_augmented_SUBSIDY_WEEK.json': pd.Period(year=2011, month=4, freq='M'), 'out_augmented_AUG_2012.json': pd.Period(year=2012, month=8, freq='M'), 'out_augmented_MAY_2011.json': pd.Period(year=2011, month=5, freq='M'), 'out_augmented_SEP_2013.json': pd.Period(year=2013, month=9, freq='M')}} df = pd.DataFrame(['out_augmented_AUG_2012.json', 'out_augmented_SEP_2013.json', 'out_augmented_SUBSIDY_WEEK.json', 'out_augmented_MAY_2012.json', 'out_augmented_MAY_2011.json', 'out_augmented_AUG_2011.json', 'out_augmented_JAN_2011.json'], columns=['fname']) assert set(df.fname.values) == set(d['fname'].keys()) expected = DataFrame({'fname': [d['fname'][k] for k in df.fname.values]}) result = df.replace(d) assert_frame_equal(result, expected) def test_replace_datetime(self): d = {'fname': {'out_augmented_AUG_2011.json': pd.Timestamp('2011-08'), 'out_augmented_JAN_2011.json': pd.Timestamp('2011-01'), 'out_augmented_MAY_2012.json': pd.Timestamp('2012-05'), 'out_augmented_SUBSIDY_WEEK.json': pd.Timestamp('2011-04'), 'out_augmented_AUG_2012.json': pd.Timestamp('2012-08'), 'out_augmented_MAY_2011.json': pd.Timestamp('2011-05'), 'out_augmented_SEP_2013.json': pd.Timestamp('2013-09')}} df = pd.DataFrame(['out_augmented_AUG_2012.json', 'out_augmented_SEP_2013.json', 'out_augmented_SUBSIDY_WEEK.json', 'out_augmented_MAY_2012.json', 'out_augmented_MAY_2011.json', 'out_augmented_AUG_2011.json', 'out_augmented_JAN_2011.json'], columns=['fname']) assert set(df.fname.values) == set(d['fname'].keys()) expected = DataFrame({'fname': [d['fname'][k] for k in df.fname.values]}) result = df.replace(d) assert_frame_equal(result, expected) def test_replace_datetimetz(self): # GH 11326 # behaving poorly when presented with a datetime64[ns, tz] df = DataFrame({'A': date_range('20130101', periods=3, tz='US/Eastern'), 'B': [0, np.nan, 2]}) result = df.replace(np.nan, 1) expected = DataFrame({'A': date_range('20130101', periods=3, tz='US/Eastern'), 'B': Series([0, 1, 2], dtype='float64')}) assert_frame_equal(result, expected) result = df.fillna(1) assert_frame_equal(result, expected) result = df.replace(0, np.nan) expected = DataFrame({'A': date_range('20130101', periods=3, tz='US/Eastern'), 'B': [np.nan, np.nan, 2]}) assert_frame_equal(result, expected) result = df.replace(Timestamp('20130102', tz='US/Eastern'), Timestamp('20130104', tz='US/Eastern')) expected = DataFrame({'A': [Timestamp('20130101', tz='US/Eastern'), Timestamp('20130104', tz='US/Eastern'), Timestamp('20130103', tz='US/Eastern')], 'B': [0, np.nan, 2]}) assert_frame_equal(result, expected) result = df.copy() result.iloc[1, 0] = np.nan result = result.replace( {'A': pd.NaT}, Timestamp('20130104', tz='US/Eastern')) assert_frame_equal(result, expected) # coerce to object result = df.copy() result.iloc[1, 0] = np.nan result = result.replace( {'A': pd.NaT}, Timestamp('20130104', tz='US/Pacific')) expected = DataFrame({'A': [Timestamp('20130101', tz='US/Eastern'), Timestamp('20130104', tz='US/Pacific'), Timestamp('20130103', tz='US/Eastern')], 'B': [0, np.nan, 2]}) assert_frame_equal(result, expected) result = df.copy() result.iloc[1, 0] = np.nan result = result.replace({'A': np.nan}, Timestamp('20130104')) expected = DataFrame({'A': [Timestamp('20130101', tz='US/Eastern'), Timestamp('20130104'), Timestamp('20130103', tz='US/Eastern')], 'B': [0, np.nan, 2]}) assert_frame_equal(result, expected) def test_replace_with_empty_dictlike(self): # GH 15289 mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} df = DataFrame(mix) assert_frame_equal(df, df.replace({})) assert_frame_equal(df, df.replace(Series([]))) assert_frame_equal(df, df.replace({'b': {}})) assert_frame_equal(df, df.replace(Series({'b': {}}))) @pytest.mark.parametrize("to_replace, method, expected", [ (0, 'bfill', {'A': [1, 1, 2], 'B': [5, nan, 7], 'C': ['a', 'b', 'c']}), (nan, 'bfill', {'A': [0, 1, 2], 'B': [5.0, 7.0, 7.0], 'C': ['a', 'b', 'c']}), ('d', 'ffill', {'A': [0, 1, 2], 'B': [5, nan, 7], 'C': ['a', 'b', 'c']}), ([0, 2], 'bfill', {'A': [1, 1, 2], 'B': [5, nan, 7], 'C': ['a', 'b', 'c']}), ([1, 2], 'pad', {'A': [0, 0, 0], 'B': [5, nan, 7], 'C': ['a', 'b', 'c']}), ((1, 2), 'bfill', {'A': [0, 2, 2], 'B': [5, nan, 7], 'C': ['a', 'b', 'c']}), (['b', 'c'], 'ffill', {'A': [0, 1, 2], 'B': [5, nan, 7], 'C': ['a', 'a', 'a']}), ]) def test_replace_method(self, to_replace, method, expected): # GH 19632 df = DataFrame({'A': [0, 1, 2], 'B': [5, nan, 7], 'C': ['a', 'b', 'c']}) result = df.replace(to_replace=to_replace, value=None, method=method) expected = DataFrame(expected) assert_frame_equal(result, expected)