@@ -1724,68 +1724,18 @@ static void bch2_journal_entries_postprocess(struct bch_fs *c, struct jset *jset
17241724 jset -> u64s = cpu_to_le32 ((u64 * ) prev - jset -> _data );
17251725}
17261726
1727- void bch2_journal_write (struct closure * cl )
1727+ static int bch2_journal_write_prep (struct journal * j , struct journal_buf * w )
17281728{
1729- struct journal * j = container_of (cl , struct journal , io );
17301729 struct bch_fs * c = container_of (j , struct bch_fs , journal );
1731- struct bch_dev * ca ;
1732- struct journal_buf * w = journal_last_unwritten_buf (j );
1733- struct bch_replicas_padded replicas ;
17341730 struct jset_entry * start , * end ;
17351731 struct jset * jset ;
1736- struct bio * bio ;
1737- struct printbuf journal_debug_buf = PRINTBUF ;
1732+ unsigned sectors , bytes , u64s ;
17381733 bool validate_before_checksum = false;
1739- unsigned i , sectors , bytes , u64s , nr_rw_members = 0 ;
17401734 int ret ;
17411735
1742- BUG_ON (BCH_SB_CLEAN (c -> disk_sb .sb ));
1743-
17441736 journal_buf_realloc (j , w );
17451737 jset = w -> data ;
17461738
1747- j -> write_start_time = local_clock ();
1748-
1749- spin_lock (& j -> lock );
1750-
1751- /*
1752- * If the journal is in an error state - we did an emergency shutdown -
1753- * we prefer to continue doing journal writes. We just mark them as
1754- * noflush so they'll never be used, but they'll still be visible by the
1755- * list_journal tool - this helps in debugging.
1756- *
1757- * There's a caveat: the first journal write after marking the
1758- * superblock dirty must always be a flush write, because on startup
1759- * from a clean shutdown we didn't necessarily read the journal and the
1760- * new journal write might overwrite whatever was in the journal
1761- * previously - we can't leave the journal without any flush writes in
1762- * it.
1763- *
1764- * So if we're in an error state, and we're still starting up, we don't
1765- * write anything at all.
1766- */
1767- if (!test_bit (JOURNAL_NEED_FLUSH_WRITE , & j -> flags ) &&
1768- (bch2_journal_error (j ) ||
1769- w -> noflush ||
1770- (!w -> must_flush &&
1771- (jiffies - j -> last_flush_write ) < msecs_to_jiffies (c -> opts .journal_flush_delay ) &&
1772- test_bit (JOURNAL_MAY_SKIP_FLUSH , & j -> flags )))) {
1773- w -> noflush = true;
1774- SET_JSET_NO_FLUSH (jset , true);
1775- jset -> last_seq = 0 ;
1776- w -> last_seq = 0 ;
1777-
1778- j -> nr_noflush_writes ++ ;
1779- } else if (!bch2_journal_error (j )) {
1780- j -> last_flush_write = jiffies ;
1781- j -> nr_flush_writes ++ ;
1782- clear_bit (JOURNAL_NEED_FLUSH_WRITE , & j -> flags );
1783- } else {
1784- spin_unlock (& j -> lock );
1785- goto err ;
1786- }
1787- spin_unlock (& j -> lock );
1788-
17891739 /*
17901740 * New btree roots are set by journalling them; when the journal entry
17911741 * gets written we have to propagate them to c->btree_roots
@@ -1816,7 +1766,7 @@ void bch2_journal_write(struct closure *cl)
18161766 bch2_fs_fatal_error (c , "aieeee! journal write overran available space, %zu > %u (extra %u reserved %u/%u)" ,
18171767 vstruct_bytes (jset ), w -> sectors << 9 ,
18181768 u64s , w -> u64s_reserved , j -> entry_u64s_reserved );
1819- goto err ;
1769+ return - EINVAL ;
18201770 }
18211771
18221772 jset -> magic = cpu_to_le64 (jset_magic (c ));
@@ -1835,37 +1785,115 @@ void bch2_journal_write(struct closure *cl)
18351785 validate_before_checksum = true;
18361786
18371787 if (validate_before_checksum &&
1838- jset_validate (c , NULL , jset , 0 , WRITE ))
1839- goto err ;
1788+ ( ret = jset_validate (c , NULL , jset , 0 , WRITE ) ))
1789+ return ret ;
18401790
18411791 ret = bch2_encrypt (c , JSET_CSUM_TYPE (jset ), journal_nonce (jset ),
18421792 jset -> encrypted_start ,
18431793 vstruct_end (jset ) - (void * ) jset -> encrypted_start );
18441794 if (bch2_fs_fatal_err_on (ret , c ,
18451795 "error decrypting journal entry: %i" , ret ))
1846- goto err ;
1796+ return ret ;
18471797
18481798 jset -> csum = csum_vstruct (c , JSET_CSUM_TYPE (jset ),
18491799 journal_nonce (jset ), jset );
18501800
18511801 if (!validate_before_checksum &&
1852- jset_validate (c , NULL , jset , 0 , WRITE ))
1853- goto err ;
1802+ ( ret = jset_validate (c , NULL , jset , 0 , WRITE ) ))
1803+ return ret ;
18541804
18551805 memset ((void * ) jset + bytes , 0 , (sectors << 9 ) - bytes );
1806+ return 0 ;
1807+ }
1808+
1809+ static int bch2_journal_write_pick_flush (struct journal * j , struct journal_buf * w )
1810+ {
1811+ struct bch_fs * c = container_of (j , struct bch_fs , journal );
1812+ int error = bch2_journal_error (j );
1813+
1814+ /*
1815+ * If the journal is in an error state - we did an emergency shutdown -
1816+ * we prefer to continue doing journal writes. We just mark them as
1817+ * noflush so they'll never be used, but they'll still be visible by the
1818+ * list_journal tool - this helps in debugging.
1819+ *
1820+ * There's a caveat: the first journal write after marking the
1821+ * superblock dirty must always be a flush write, because on startup
1822+ * from a clean shutdown we didn't necessarily read the journal and the
1823+ * new journal write might overwrite whatever was in the journal
1824+ * previously - we can't leave the journal without any flush writes in
1825+ * it.
1826+ *
1827+ * So if we're in an error state, and we're still starting up, we don't
1828+ * write anything at all.
1829+ */
1830+ if (error && test_bit (JOURNAL_NEED_FLUSH_WRITE , & j -> flags ))
1831+ return - EIO ;
1832+
1833+ if (error ||
1834+ w -> noflush ||
1835+ (!w -> must_flush &&
1836+ (jiffies - j -> last_flush_write ) < msecs_to_jiffies (c -> opts .journal_flush_delay ) &&
1837+ test_bit (JOURNAL_MAY_SKIP_FLUSH , & j -> flags ))) {
1838+ w -> noflush = true;
1839+ SET_JSET_NO_FLUSH (w -> data , true);
1840+ w -> data -> last_seq = 0 ;
1841+ w -> last_seq = 0 ;
1842+
1843+ j -> nr_noflush_writes ++ ;
1844+ } else {
1845+ j -> last_flush_write = jiffies ;
1846+ j -> nr_flush_writes ++ ;
1847+ clear_bit (JOURNAL_NEED_FLUSH_WRITE , & j -> flags );
1848+ }
1849+
1850+ return 0 ;
1851+ }
1852+
1853+ void bch2_journal_write (struct closure * cl )
1854+ {
1855+ struct journal * j = container_of (cl , struct journal , io );
1856+ struct bch_fs * c = container_of (j , struct bch_fs , journal );
1857+ struct bch_dev * ca ;
1858+ struct journal_buf * w = journal_last_unwritten_buf (j );
1859+ struct bch_replicas_padded replicas ;
1860+ struct bio * bio ;
1861+ struct printbuf journal_debug_buf = PRINTBUF ;
1862+ unsigned i , nr_rw_members = 0 ;
1863+ int ret ;
1864+
1865+ BUG_ON (BCH_SB_CLEAN (c -> disk_sb .sb ));
1866+
1867+ j -> write_start_time = local_clock ();
18561868
1857- retry_alloc :
18581869 spin_lock (& j -> lock );
1859- ret = journal_write_alloc (j , w );
1870+ ret = bch2_journal_write_pick_flush (j , w );
1871+ spin_unlock (& j -> lock );
1872+ if (ret )
1873+ goto err ;
1874+
1875+ ret = bch2_journal_write_prep (j , w );
1876+ if (ret )
1877+ goto err ;
1878+
1879+ while (1 ) {
1880+ spin_lock (& j -> lock );
1881+ ret = journal_write_alloc (j , w );
1882+ if (!ret || !j -> can_discard )
1883+ break ;
18601884
1861- if (ret && j -> can_discard ) {
18621885 spin_unlock (& j -> lock );
18631886 bch2_journal_do_discards (j );
1864- goto retry_alloc ;
18651887 }
18661888
1867- if (ret )
1889+ if (ret ) {
18681890 __bch2_journal_debug_to_text (& journal_debug_buf , j );
1891+ spin_unlock (& j -> lock );
1892+ bch_err (c , "Unable to allocate journal write:\n%s" ,
1893+ journal_debug_buf .buf );
1894+ printbuf_exit (& journal_debug_buf );
1895+ goto err ;
1896+ }
18691897
18701898 /*
18711899 * write is allocated, no longer need to account for it in
@@ -1880,13 +1908,6 @@ void bch2_journal_write(struct closure *cl)
18801908 bch2_journal_space_available (j );
18811909 spin_unlock (& j -> lock );
18821910
1883- if (ret ) {
1884- bch_err (c , "Unable to allocate journal write:\n%s" ,
1885- journal_debug_buf .buf );
1886- printbuf_exit (& journal_debug_buf );
1887- goto err ;
1888- }
1889-
18901911 w -> devs_written = bch2_bkey_devs (bkey_i_to_s_c (& w -> key ));
18911912
18921913 if (c -> opts .nochanges )
@@ -1908,7 +1929,7 @@ void bch2_journal_write(struct closure *cl)
19081929 if (ret )
19091930 goto err ;
19101931
1911- if (!JSET_NO_FLUSH (jset ) && w -> separate_flush ) {
1932+ if (!JSET_NO_FLUSH (w -> data ) && w -> separate_flush ) {
19121933 for_each_rw_member (ca , c , i ) {
19131934 percpu_ref_get (& ca -> io_ref );
19141935
0 commit comments