|
1003 | 1003 | "metadata": {}, |
1004 | 1004 | "outputs": [], |
1005 | 1005 | "source": [ |
1006 | | - "!wc -l data/earthquakes.csv" |
| 1006 | + "!wc -l data/01/earthquakes.csv" |
1007 | 1007 | ] |
1008 | 1008 | }, |
1009 | 1009 | { |
|
1014 | 1014 | "**Windows users**: if the above doesn't work for you (depends on your setup), then use this instead:\n", |
1015 | 1015 | "\n", |
1016 | 1016 | "```python\n", |
1017 | | - "!find /c /v \"\" data\\earthquakes.csv\n", |
| 1017 | + "!find /c /v \"\" data\\01\\earthquakes.csv\n", |
1018 | 1018 | "```\n", |
1019 | 1019 | "\n", |
1020 | 1020 | "\n", |
|
1030 | 1030 | "metadata": {}, |
1031 | 1031 | "outputs": [], |
1032 | 1032 | "source": [ |
1033 | | - "!ls -lh data | grep earthquakes.csv" |
| 1033 | + "!ls -lh data/01 | grep earthquakes.csv" |
1034 | 1034 | ] |
1035 | 1035 | }, |
1036 | 1036 | { |
|
1041 | 1041 | "**Windows users**: if the above doesn't work for you (depends on your setup), then use this instead:\n", |
1042 | 1042 | "\n", |
1043 | 1043 | "```python\n", |
1044 | | - "!dir data | findstr \"earthquakes.csv\"\n", |
| 1044 | + "!dir data\\01 | findstr \"earthquakes.csv\"\n", |
1045 | 1045 | "```\n", |
1046 | 1046 | "\n", |
1047 | 1047 | "We can even capture the result of a command and use it in our Python code:" |
|
1054 | 1054 | "metadata": {}, |
1055 | 1055 | "outputs": [], |
1056 | 1056 | "source": [ |
1057 | | - "files = !ls -lh data\n", |
| 1057 | + "files = !ls -lh data/01\n", |
1058 | 1058 | "[file for file in files if 'earthquake' in file]" |
1059 | 1059 | ] |
1060 | 1060 | }, |
|
1066 | 1066 | "**Windows users**: if the above doesn't work for you (depends on your setup), then use this instead:\n", |
1067 | 1067 | "\n", |
1068 | 1068 | "```python\n", |
1069 | | - "files = !dir data\n", |
| 1069 | + "files = !dir data\\01\n", |
1070 | 1070 | "[file for file in files if 'earthquake' in file]\n", |
1071 | 1071 | "```" |
1072 | 1072 | ] |
|
1088 | 1088 | "metadata": {}, |
1089 | 1089 | "outputs": [], |
1090 | 1090 | "source": [ |
1091 | | - "!head -n 2 data/earthquakes.csv" |
| 1091 | + "!head -n 2 data/01/earthquakes.csv" |
1092 | 1092 | ] |
1093 | 1093 | }, |
1094 | 1094 | { |
|
1100 | 1100 | "\n", |
1101 | 1101 | "```python\n", |
1102 | 1102 | "n = 2\n", |
1103 | | - "with open('data/earthquakes.csv', 'r') as file:\n", |
| 1103 | + "with open('data/01/earthquakes.csv', 'r') as file:\n", |
1104 | 1104 | " for _ in range(n):\n", |
1105 | 1105 | " print(file.readline(), end='\\r')\n", |
1106 | 1106 | "```\n", |
|
1116 | 1116 | "metadata": {}, |
1117 | 1117 | "outputs": [], |
1118 | 1118 | "source": [ |
1119 | | - "!tail -n 1 data/earthquakes.csv" |
| 1119 | + "!tail -n 1 data/01/earthquakes.csv" |
1120 | 1120 | ] |
1121 | 1121 | }, |
1122 | 1122 | { |
|
1129 | 1129 | "```python\n", |
1130 | 1130 | "import os\n", |
1131 | 1131 | "\n", |
1132 | | - "with open('data/earthquakes.csv', 'rb') as file:\n", |
| 1132 | + "with open('data/01/earthquakes.csv', 'rb') as file:\n", |
1133 | 1133 | " file.seek(0, os.SEEK_END)\n", |
1134 | 1134 | " while file.read(1) != b'\\n':\n", |
1135 | 1135 | " file.seek(-2, os.SEEK_CUR)\n", |
|
1140 | 1140 | "\n", |
1141 | 1141 | "```python\n", |
1142 | 1142 | "n = 2\n", |
1143 | | - "with open('data/earthquakes.csv', 'r') as file:\n", |
| 1143 | + "with open('data/01/earthquakes.csv', 'r') as file:\n", |
1144 | 1144 | " print('\\r'.join(file.readlines()[-n:]))\n", |
1145 | 1145 | "```\n", |
1146 | 1146 | "\n" |
|
1164 | 1164 | "metadata": {}, |
1165 | 1165 | "outputs": [], |
1166 | 1166 | "source": [ |
1167 | | - "!awk -F',' '{print NF; exit}' data/earthquakes.csv" |
| 1167 | + "!awk -F',' '{print NF; exit}' data/01/earthquakes.csv" |
1168 | 1168 | ] |
1169 | 1169 | }, |
1170 | 1170 | { |
|
1175 | 1175 | "**Windows users**: if the above or below don't work for you (depends on your setup), then use this instead:\n", |
1176 | 1176 | "\n", |
1177 | 1177 | "```python\n", |
1178 | | - "with open('data/earthquakes.csv', 'r') as file:\n", |
| 1178 | + "with open('data/01/earthquakes.csv', 'r') as file:\n", |
1179 | 1179 | " print(len(file.readline().split(',')))\n", |
1180 | 1180 | "```\n", |
1181 | 1181 | "\n", |
|
1190 | 1190 | "metadata": {}, |
1191 | 1191 | "outputs": [], |
1192 | 1192 | "source": [ |
1193 | | - "headers = !head -n 1 data/earthquakes.csv\n", |
| 1193 | + "headers = !head -n 1 data/01/earthquakes.csv\n", |
1194 | 1194 | "len(headers[0].split(','))" |
1195 | 1195 | ] |
1196 | 1196 | }, |
|
1220 | 1220 | "metadata": {}, |
1221 | 1221 | "outputs": [], |
1222 | 1222 | "source": [ |
1223 | | - "df = pd.read_csv('data/earthquakes.csv')" |
| 1223 | + "df = pd.read_csv('data/01/earthquakes.csv')" |
1224 | 1224 | ] |
1225 | 1225 | }, |
1226 | 1226 | { |
|
2155 | 2155 | "pd.concat([tsunami, no_tsunami]).shape" |
2156 | 2156 | ] |
2157 | 2157 | }, |
2158 | | - { |
2159 | | - "cell_type": "markdown", |
2160 | | - "id": "d38495fa-fe5e-4937-9774-b90c0d26e6d9", |
2161 | | - "metadata": {}, |
2162 | | - "source": [ |
2163 | | - "Note that the previous result is equivalent to running the `append()` method of the dataframe:" |
2164 | | - ] |
2165 | | - }, |
2166 | | - { |
2167 | | - "cell_type": "code", |
2168 | | - "execution_count": null, |
2169 | | - "id": "8c6be158-f310-42b7-a05e-cd4b6a6e07b2", |
2170 | | - "metadata": {}, |
2171 | | - "outputs": [], |
2172 | | - "source": [ |
2173 | | - "tsunami.append(no_tsunami).shape" |
2174 | | - ] |
2175 | | - }, |
2176 | 2158 | { |
2177 | 2159 | "cell_type": "markdown", |
2178 | 2160 | "id": "ba559768-6848-4eca-9b84-04b8b6e78417", |
|
2189 | 2171 | "outputs": [], |
2190 | 2172 | "source": [ |
2191 | 2173 | "additional_columns = pd.read_csv(\n", |
2192 | | - " 'data/earthquakes.csv', usecols=['tz', 'felt', 'ids']\n", |
| 2174 | + " 'data/01/earthquakes.csv', usecols=['tz', 'felt', 'ids']\n", |
2193 | 2175 | ")\n", |
2194 | 2176 | "pd.concat([df.head(2), additional_columns.head(2)], axis=1)" |
2195 | 2177 | ] |
|
2210 | 2192 | "outputs": [], |
2211 | 2193 | "source": [ |
2212 | 2194 | "additional_columns = pd.read_csv(\n", |
2213 | | - " 'data/earthquakes.csv', usecols=['tz', 'felt', 'ids', 'time'], index_col='time'\n", |
| 2195 | + " 'data/01/earthquakes.csv', usecols=['tz', 'felt', 'ids', 'time'], index_col='time'\n", |
2214 | 2196 | ")\n", |
2215 | 2197 | "pd.concat([df.head(2), additional_columns.head(2)], axis=1)" |
2216 | 2198 | ] |
|
3618 | 3600 | "metadata": {}, |
3619 | 3601 | "outputs": [], |
3620 | 3602 | "source": [ |
3621 | | - "extra_data = long_df.append([{\n", |
| 3603 | + "extra_data = pd.DataFrame([{\n", |
3622 | 3604 | " 'datatype': 'TAVG', \n", |
3623 | 3605 | " 'date': '2018-10-01', \n", |
3624 | 3606 | " 'temp_C': 10, \n", |
3625 | 3607 | " 'temp_F': 50\n", |
3626 | 3608 | "}]).set_index(['date', 'datatype']).sort_index()\n", |
3627 | 3609 | "\n", |
3628 | | - "extra_data['2018-10-01':'2018-10-02']" |
| 3610 | + "extra_data = pd.concat([long_df, extra_data])\n", |
| 3611 | + "\n", |
| 3612 | + "extra_data.head()" |
3629 | 3613 | ] |
3630 | 3614 | }, |
3631 | 3615 | { |
|
0 commit comments